Assignment 18
#install.packages('AER')
library(AER)
## Warning: package 'AER' was built under R version 3.5.1
## Loading required package: car
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.5.1
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.5.1
## Loading required package: survival
data(Affairs,package="AER")
mydata <- Affairs
attach(mydata)
summary(mydata)
## affairs gender age yearsmarried children
## Min. : 0.000 female:315 Min. :17.50 Min. : 0.125 no :171
## 1st Qu.: 0.000 male :286 1st Qu.:27.00 1st Qu.: 4.000 yes:430
## Median : 0.000 Median :32.00 Median : 7.000
## Mean : 1.456 Mean :32.49 Mean : 8.178
## 3rd Qu.: 0.000 3rd Qu.:37.00 3rd Qu.:15.000
## Max. :12.000 Max. :57.00 Max. :15.000
## religiousness education occupation rating
## Min. :1.000 Min. : 9.00 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:14.00 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :16.00 Median :5.000 Median :4.000
## Mean :3.116 Mean :16.17 Mean :4.195 Mean :3.932
## 3rd Qu.:4.000 3rd Qu.:18.00 3rd Qu.:6.000 3rd Qu.:5.000
## Max. :5.000 Max. :20.00 Max. :7.000 Max. :5.000
colnames(mydata)
## [1] "affairs" "gender" "age" "yearsmarried"
## [5] "children" "religiousness" "education" "occupation"
## [9] "rating"
CPaffairs <- NULL
CPaffairs <- ifelse(affairs>0,1,0)
mydata[,"CPaffairs"] <- CPaffairs
CPgender <- NULL
CPgender <- ifelse(gender=="male",1,0)
mydata[,"CPgender"] <- CPgender
CPchildren <- NULL
CPchildren <- ifelse(children=="yes",1,0)
mydata[,"CPchildren"] <- CPchildren
mydata <- mydata[,-c(1,2,5)]
plot(mydata)

cor(mydata)
## age yearsmarried religiousness education
## age 1.00000000 0.77754585 0.193776931 0.134596015
## yearsmarried 0.77754585 1.00000000 0.218260672 0.040002716
## religiousness 0.19377693 0.21826067 1.000000000 -0.042571079
## education 0.13459601 0.04000272 -0.042571079 1.000000000
## occupation 0.16641254 0.04459201 -0.039722324 0.533605242
## rating -0.19899990 -0.24311883 0.024295777 0.109303473
## CPaffairs 0.05732152 0.14030120 -0.130090072 0.019285840
## CPgender 0.19064108 0.03028252 0.007679445 0.397504680
## CPchildren 0.42193082 0.57285736 0.129351259 -0.006985882
## occupation rating CPaffairs CPgender
## age 0.16641254 -0.198999899 0.05732152 0.190641080
## yearsmarried 0.04459201 -0.243118827 0.14030120 0.030282521
## religiousness -0.03972232 0.024295777 -0.13009007 0.007679445
## education 0.53360524 0.109303473 0.01928584 0.397504680
## occupation 1.00000000 0.017422273 0.03764236 0.467923152
## rating 0.01742227 1.000000000 -0.25381444 -0.007523748
## CPaffairs 0.03764236 -0.253814441 1.00000000 0.050955678
## CPgender 0.46792315 -0.007523748 0.05095568 1.000000000
## CPchildren -0.09272712 -0.196275616 0.13360509 0.069222338
## CPchildren
## age 0.421930815
## yearsmarried 0.572857364
## religiousness 0.129351259
## education -0.006985882
## occupation -0.092727118
## rating -0.196275616
## CPaffairs 0.133605091
## CPgender 0.069222338
## CPchildren 1.000000000
library(corpcor)
cor2pcor(cor(mydata))
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.00000000 0.716325696 0.03443158 0.034786185 0.07179874
## [2,] 0.71632570 1.000000000 0.11742940 -0.001835041 0.03380241
## [3,] 0.03443158 0.117429399 1.00000000 -0.047130873 -0.03372505
## [4,] 0.03478618 -0.001835041 -0.04713087 1.000000000 0.41877583
## [5,] 0.07179874 0.033802414 -0.03372505 0.418775832 1.00000000
## [6,] -0.05300696 -0.073531639 0.05543208 0.138532776 -0.02880954
## [7,] -0.10071835 0.118777157 -0.14670237 0.014809502 0.02052325
## [8,] 0.19121521 -0.198155079 0.03450469 0.185846871 0.32457915
## [9,] -0.04354499 0.411627444 0.01016725 0.029326573 -0.18545642
## [,6] [,7] [,8] [,9]
## [1,] -0.05300696 -0.10071835 0.19121521 -0.04354499
## [2,] -0.07353164 0.11877716 -0.19815508 0.41162744
## [3,] 0.05543208 -0.14670237 0.03450469 0.01016725
## [4,] 0.13853278 0.01480950 0.18584687 0.02932657
## [5,] -0.02880954 0.02052325 0.32457915 -0.18545642
## [6,] 1.00000000 -0.21940016 -0.01726631 -0.05872978
## [7,] -0.21940016 1.00000000 0.04636879 0.04795731
## [8,] -0.01726631 0.04636879 1.00000000 0.14413766
## [9,] -0.05872978 0.04795731 0.14413766 1.00000000
model <- glm(CPaffairs ~ CPgender + age + yearsmarried + CPchildren + religiousness + education + occupation + rating, family = "binomial")
summary(model)
##
## Call:
## glm(formula = CPaffairs ~ CPgender + age + yearsmarried + CPchildren +
## religiousness + education + occupation + rating, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5713 -0.7499 -0.5690 -0.2539 2.5191
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.37726 0.88776 1.551 0.120807
## CPgender 0.28029 0.23909 1.172 0.241083
## age -0.04426 0.01825 -2.425 0.015301 *
## yearsmarried 0.09477 0.03221 2.942 0.003262 **
## CPchildren 0.39767 0.29151 1.364 0.172508
## religiousness -0.32472 0.08975 -3.618 0.000297 ***
## education 0.02105 0.05051 0.417 0.676851
## occupation 0.03092 0.07178 0.431 0.666630
## rating -0.46845 0.09091 -5.153 2.56e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 675.38 on 600 degrees of freedom
## Residual deviance: 609.51 on 592 degrees of freedom
## AIC: 627.51
##
## Number of Fisher Scoring iterations: 4
influenceIndexPlot(model)

influencePlot(model)

## StudRes Hat CookD
## 170 -0.7935995 0.051429098 0.002266777
## 405 -0.8358598 0.076371364 0.003930464
## 483 2.4447086 0.006704172 0.013364040
## 552 2.2931376 0.013542106 0.018224209
## 568 2.5610182 0.009231835 0.023903607
model2 <- glm(CPaffairs ~ CPgender + age + yearsmarried + CPchildren + religiousness + education + occupation + rating, family = "binomial", data = mydata[-c(170,405,552,568),])
summary(model2)
##
## Call:
## glm(formula = CPaffairs ~ CPgender + age + yearsmarried + CPchildren +
## religiousness + education + occupation + rating, family = "binomial",
## data = mydata[-c(170, 405, 552, 568), ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6370 -0.7386 -0.5496 -0.2402 2.4368
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.853750 0.912329 2.032 0.042165 *
## CPgender 0.283377 0.243250 1.165 0.244034
## age -0.053385 0.019154 -2.787 0.005318 **
## yearsmarried 0.104626 0.032791 3.191 0.001419 **
## CPchildren 0.432033 0.298408 1.448 0.147676
## religiousness -0.345729 0.091123 -3.794 0.000148 ***
## education 0.009618 0.052102 0.185 0.853541
## occupation 0.042177 0.072621 0.581 0.561381
## rating -0.497776 0.092329 -5.391 6.99e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 668.67 on 596 degrees of freedom
## Residual deviance: 596.30 on 588 degrees of freedom
## AIC: 614.3
##
## Number of Fisher Scoring iterations: 4
#Identification for Not performed Column
vif(model)
## CPgender age yearsmarried CPchildren religiousness
## 1.429705 2.648541 3.052111 1.411823 1.076183
## education occupation rating
## 1.535033 1.698276 1.079448
avPlots(model)

FinalModel <- glm(CPaffairs ~ age + yearsmarried + religiousness + rating, family = "binomial", data= mydata[-c(170,405,552,568),])
summary(FinalModel)
##
## Call:
## glm(formula = CPaffairs ~ age + yearsmarried + religiousness +
## rating, family = "binomial", data = mydata[-c(170, 405, 552,
## 568), ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6850 -0.7482 -0.5469 -0.2877 2.3187
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.28086 0.62989 3.621 0.000293 ***
## age -0.04341 0.01818 -2.388 0.016953 *
## yearsmarried 0.11070 0.02994 3.698 0.000218 ***
## religiousness -0.34786 0.09080 -3.831 0.000128 ***
## rating -0.49651 0.09053 -5.485 4.14e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 668.67 on 596 degrees of freedom
## Residual deviance: 602.21 on 592 degrees of freedom
## AIC: 612.21
##
## Number of Fisher Scoring iterations: 4
prob <- predict(FinalModel, mydata, type = "response")
prob <- as.data.frame(prob)
final <- cbind(mydata, prob)
confusion <- table(prob>0.5, CPaffairs)
confusion
## CPaffairs
## 0 1
## FALSE 430 125
## TRUE 21 25
accuracy <- sum(diag(confusion)/sum(confusion))
accuracy
## [1] 0.7570715
library(ROCR)
## Warning: package 'ROCR' was built under R version 3.5.1
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.5.1
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
rocrpred <- prediction(prob, CPaffairs)
rocrperf <- performance(rocrpred, 'tpr', 'fpr')
str(rocrperf)
## Formal class 'performance' [package "ROCR"] with 6 slots
## ..@ x.name : chr "False positive rate"
## ..@ y.name : chr "True positive rate"
## ..@ alpha.name : chr "Cutoff"
## ..@ x.values :List of 1
## .. ..$ : num [1:277] 0 0 0.00443 0.00443 0.00443 ...
## ..@ y.values :List of 1
## .. ..$ : num [1:277] 0 0.00667 0.00667 0.02 0.02667 ...
## ..@ alpha.values:List of 1
## .. ..$ : num [1:277] Inf 0.771 0.758 0.733 0.67 ...
plot(rocrperf,colorize=T, text.adj=c(0.2,-1.7))
