library(car)
## 载入需要的程序包:carData
library(lmtest)
## 载入需要的程序包:zoo
##
## 载入程序包:'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## 载入程序包:'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
# Fit a model
ESS8UK<-readRDS("D:/LSE/ST411/seminar4/ESS8UK.rds")
ESS8UK$basicincome<-as.factor(ESS8UK$basicincome)
summary(cl4.m1 <- glm(basicincome~age+male+educ+leftright,data=ESS8UK,family=binomial))
##
## Call:
## glm(formula = basicincome ~ age + male + educ + leftright, family = binomial,
## data = ESS8UK)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.37709 0.24007 5.736 9.68e-09 ***
## age -0.01359 0.00290 -4.688 2.76e-06 ***
## male 0.28300 0.10144 2.790 0.00527 **
## educupper secondary -0.18591 0.12411 -1.498 0.13415
## eductertiary -0.18258 0.13352 -1.367 0.17147
## leftright -0.11238 0.02738 -4.104 4.06e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2255.4 on 1626 degrees of freedom
## Residual deviance: 2202.3 on 1621 degrees of freedom
## AIC: 2214.3
##
## Number of Fisher Scoring iterations: 4
# Examples of Wald tests of hypotheses about the coefficients
# Example hypothesis A
linearHypothesis(cl4.m1,"age") # This is also included in standard output
##
## Linear hypothesis test:
## age = 0
##
## Model 1: restricted model
## Model 2: basicincome ~ age + male + educ + leftright
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 1622
## 2 1621 1 21.979 2.756e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Example hypothesis B
linearHypothesis(cl4.m1,c("educupper secondary","eductertiary"))
##
## Linear hypothesis test:
## educupper secondary = 0
## eductertiary = 0
##
## Model 1: restricted model
## Model 2: basicincome ~ age + male + educ + leftright
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 1623
## 2 1621 2 2.6645 0.2639
# Example hypothesis C
linearHypothesis(cl4.m1,"educupper secondary=eductertiary")
##
## Linear hypothesis test:
## educupper secondary - eductertiary = 0
##
## Model 1: restricted model
## Model 2: basicincome ~ age + male + educ + leftright
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 1622
## 2 1621 1 7e-04 0.9783
b.tmp <- coef(cl4.m1)
names(b.tmp)
## [1] "(Intercept)" "age" "male"
## [4] "educupper secondary" "eductertiary" "leftright"
V.tmp <- summary(cl4.m1)$cov.scaled
R.tmp <- matrix(0,2,length(b.tmp))
R.tmp[1,4] <- R.tmp[2,5] <- 1
R.tmp
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0 0 0 1 0 0
## [2,] 0 0 0 0 1 0
c.tmp <- R.tmp%*%b.tmp
W.tmp <- t(c.tmp)%*%solve(R.tmp%*%V.tmp%*%t(R.tmp))%*%c.tmp
df.tmp <- nrow(R.tmp)
p.tmp <- 1-pchisq(W.tmp,df.tmp)
c(W.tmp, df.tmp, p.tmp)
## [1] 2.6645479 2.0000000 0.2638765
# Compare this with
linearHypothesis(cl4.m1,c("educupper secondary","eductertiary"))
##
## Linear hypothesis test:
## educupper secondary = 0
## eductertiary = 0
##
## Model 1: restricted model
## Model 2: basicincome ~ age + male + educ + leftright
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 1623
## 2 1621 2 2.6645 0.2639
## End of digression
# Confidence intervals for coefficients and their exponentials (odds ratios)
confint(cl4.m1)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.90949200 1.851083568
## age -0.01930399 -0.007932276
## male 0.08443900 0.482171916
## educupper secondary -0.42969562 0.056985538
## eductertiary -0.44481870 0.078752796
## leftright -0.16642330 -0.059006658
exp(confint(cl4.m1))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 2.4830608 6.3667146
## age 0.9808811 0.9920991
## male 1.0881065 1.6195882
## educupper secondary 0.6507071 1.0586405
## eductertiary 0.6409405 1.0819368
## leftright 0.8466878 0.9427005
# Examples of likelihood ratio tests, for the same hypotheses as above
# Hypothesis A
lrtest(cl4.m1,"age")
## Likelihood ratio test
##
## Model 1: basicincome ~ age + male + educ + leftright
## Model 2: basicincome ~ male + educ + leftright
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 6 -1101.1
## 2 5 -1112.3 -1 22.312 2.318e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Calculate this explicitly, for comparison
summary(cl4.m2 <- glm(basicincome~male+educ+leftright,data=ESS8UK,family=binomial))
##
## Call:
## glm(formula = basicincome ~ male + educ + leftright, family = binomial,
## data = ESS8UK)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.71157 0.19109 3.724 0.000196 ***
## male 0.27944 0.10074 2.774 0.005541 **
## educupper secondary -0.07193 0.12054 -0.597 0.550694
## eductertiary -0.06097 0.12995 -0.469 0.638955
## leftright -0.13137 0.02703 -4.860 1.18e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2255.4 on 1626 degrees of freedom
## Residual deviance: 2224.6 on 1622 degrees of freedom
## AIC: 2234.6
##
## Number of Fisher Scoring iterations: 4
logLik(cl4.m1)
## 'log Lik.' -1101.144 (df=6)
logLik(cl4.m2)
## 'log Lik.' -1112.3 (df=5)
g.tmp <- 2*(logLik(cl4.m1)-logLik(cl4.m2))
g.tmp
## 'log Lik.' 22.31171 (df=6)
1-pchisq(g.tmp,cl4.m2$df.residual-cl4.m1$df.residual)
## 'log Lik.' 2.317888e-06 (df=6)
# Hypothesis B
lrtest(cl4.m1,"educ")
## Likelihood ratio test
##
## Model 1: basicincome ~ age + male + educ + leftright
## Model 2: basicincome ~ age + male + leftright
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 6 -1101.1
## 2 4 -1102.5 -2 2.673 0.2628
# Hypothesis C
levels(ESS8UK$educ)
## [1] "lower secondary or less" "upper secondary"
## [3] "tertiary"
ESS8UK$educ2 <- ESS8UK$educ
levels(ESS8UK$educ2) <- c("lower","higher","higher") # Combine two levels of the variable
summary(cl4.m4 <- glm(basicincome~age+male+educ2+leftright,data=ESS8UK,family=binomial))
##
## Call:
## glm(formula = basicincome ~ age + male + educ2 + leftright, family = binomial,
## data = ESS8UK)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.377499 0.239608 5.749 8.98e-09 ***
## age -0.013595 0.002899 -4.689 2.75e-06 ***
## male 0.282985 0.101439 2.790 0.00528 **
## educ2higher -0.184517 0.113054 -1.632 0.10266
## leftright -0.112432 0.027307 -4.117 3.83e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2255.4 on 1626 degrees of freedom
## Residual deviance: 2202.3 on 1622 degrees of freedom
## AIC: 2212.3
##
## Number of Fisher Scoring iterations: 4
lrtest(cl4.m1,cl4.m4)
## Likelihood ratio test
##
## Model 1: basicincome ~ age + male + educ + leftright
## Model 2: basicincome ~ age + male + educ2 + leftright
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 6 -1101.1
## 2 5 -1101.1 -1 7e-04 0.9783
# with Probit link function
cl4.glm.probit <- glm(basicincome~age+male+educ+leftright,data=ESS8UK,family=binomial(link="probit"))
# with cloglog link function
cl4.glm.cloglog <- glm(basicincome~age+male+educ+leftright,data=ESS8UK,family=binomial(link="cloglog"))
# ratios of logit to probit
ratio<-coef(cl4.m1)/coef(cl4.glm.probit)
ratio
## (Intercept) age male educupper secondary
## 1.602138 1.602004 1.590388 1.584538
## eductertiary leftright
## 1.593776 1.604209
#####################################
binary.classification.table <- function (model.object,cutoff=0.5)
{
# ST411
# Function for calculating the classification table
# and the various probabilities for it for a fitted binary model,
# given a value of the classification cutoff.
#
# Classification table (frequencies)
cl.table <- table(fitted(model.object)>cutoff,model.object$y)
## Fix the table if all classifications are 0 or 1
if(nrow(cl.table)==1){
if(row.names(cl.table)=="FALSE")
cl.table <- rbind(cl.table,0)
else cl.table <- rbind(0,cl.table)
}
t2x2 <- cl.table
cl.table <- rbind(cl.table,margin.table(cl.table,2))
cl.table <- cbind(cl.table,margin.table(cl.table,1))
dimnames(cl.table) <- list(c("Classified 0","Classified 1","Total"),
c("Observed 0","Observed 1","Total"))
# The probabilities
probs <- c(diag(prop.table(t2x2,2)),
diag(prop.table(t2x2,1)))
probs <- c(probs,1-probs)
probs <- c(as.vector(prop.table(table(model.object$y))),probs)
## This turns it into a column matrix, for convenience of display
dim(probs) <- c(length(probs),1)
dimnames(probs) <- list(c("Observed 0s","Observed 1s",
"Specificity","Sensitivity",
"Negative predictive value","Positive predictive value",
"False positive rate for true negatives",
"False negative rate for true positives",
"False positive rate for classified negatives",
"False negative rate for classified positives"),"probability")
result <- list(classification.table=cl.table, rates=round(probs,4))
result
}
#####################################
summary(cl4.m5 <- glm(basicincome~age+male+leftright,data=ESS8UK,family=binomial))
##
## Call:
## glm(formula = basicincome ~ age + male + leftright, family = binomial,
## data = ESS8UK)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.189646 0.209251 5.685 1.31e-08 ***
## age -0.012551 0.002822 -4.448 8.68e-06 ***
## male 0.282903 0.101351 2.791 0.00525 **
## leftright -0.111553 0.027288 -4.088 4.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2255.4 on 1626 degrees of freedom
## Residual deviance: 2205.0 on 1623 degrees of freedom
## AIC: 2213
##
## Number of Fisher Scoring iterations: 4
# Classification table and error rates, for different cut-offs
binary.classification.table(cl4.m5,.5)
## $classification.table
## Observed 0 Observed 1 Total
## Classified 0 435 328 763
## Classified 1 373 491 864
## Total 808 819 1627
##
## $rates
## probability
## Observed 0s 0.4966
## Observed 1s 0.5034
## Specificity 0.5384
## Sensitivity 0.5995
## Negative predictive value 0.5701
## Positive predictive value 0.5683
## False positive rate for true negatives 0.4616
## False negative rate for true positives 0.4005
## False positive rate for classified negatives 0.4299
## False negative rate for classified positives 0.4317
binary.classification.table(cl4.m5,.3)
## $classification.table
## Observed 0 Observed 1 Total
## Classified 0 10 6 16
## Classified 1 798 813 1611
## Total 808 819 1627
##
## $rates
## probability
## Observed 0s 0.4966
## Observed 1s 0.5034
## Specificity 0.0124
## Sensitivity 0.9927
## Negative predictive value 0.6250
## Positive predictive value 0.5047
## False positive rate for true negatives 0.9876
## False negative rate for true positives 0.0073
## False positive rate for classified negatives 0.3750
## False negative rate for classified positives 0.4953
binary.classification.table(cl4.m5,.8)
## $classification.table
## Observed 0 Observed 1 Total
## Classified 0 808 819 1627
## Classified 1 0 0 0
## Total 808 819 1627
##
## $rates
## probability
## Observed 0s 0.4966
## Observed 1s 0.5034
## Specificity 1.0000
## Sensitivity 0.0000
## Negative predictive value 0.4966
## Positive predictive value NaN
## False positive rate for true negatives 0.0000
## False negative rate for true positives 1.0000
## False positive rate for classified negatives 0.5034
## False negative rate for classified positives NaN
# ROC curve and Area Under the Curve
roc(cl4.m5$y,fitted(cl4.m5),plot=T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
##
## Call:
## roc.default(response = cl4.m5$y, predictor = fitted(cl4.m5), plot = T)
##
## Data: fitted(cl4.m5) in 808 controls (cl4.m5$y 0) < 819 cases (cl4.m5$y 1).
## Area under the curve: 0.5979
setwd("D:/LSE/ST411/seminar5")
BngDHS<-readRDS("BngDHS.rds")
BngDHS$educ<-as.factor(BngDHS$educ)
BngDHS$urban<-as.factor(BngDHS$urban)
model1<-glm(usemod~urban+educ+age,data=BngDHS,family=binomial)
# hypothesis A
lrtest(model1,"urban")
## Likelihood ratio test
##
## Model 1: usemod ~ urban + educ + age
## Model 2: usemod ~ educ + age
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -3412.8
## 2 4 -3417.7 -1 9.8086 0.001737 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# hypothesis B
lrtest(model1,"educ")
## Likelihood ratio test
##
## Model 1: usemod ~ urban + educ + age
## Model 2: usemod ~ urban + age
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -3412.8
## 2 3 -3433.7 -2 41.902 7.962e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# hypothesis C
lrtest(model1,"age")
## Likelihood ratio test
##
## Model 1: usemod ~ urban + educ + age
## Model 2: usemod ~ urban + educ
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -3412.8
## 2 4 -3426.1 -1 26.784 2.276e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
All of the three variables are significant in the model, at the 5% level.
model2<-glm(usemod~urban+educ+age+urban*educ,data=BngDHS,family=binomial)
summary(model2)
##
## Call:
## glm(formula = usemod ~ urban + educ + age + urban * educ, family = binomial,
## data = BngDHS)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.22733 0.10892 -11.268 < 2e-16 ***
## urbanurban -0.02773 0.12691 -0.218 0.82705
## educprimary 0.17643 0.07176 2.459 0.01395 *
## educsecondary+ 0.39106 0.09998 3.911 9.18e-05 ***
## age 0.01674 0.00331 5.058 4.23e-07 ***
## urbanurban:educprimary 0.42257 0.20713 2.040 0.04134 *
## urbanurban:educsecondary+ 0.57022 0.19430 2.935 0.00334 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6909.9 on 5295 degrees of freedom
## Residual deviance: 6815.9 on 5289 degrees of freedom
## AIC: 6829.9
##
## Number of Fisher Scoring iterations: 4
lrtest(model1,model2)
## Likelihood ratio test
##
## Model 1: usemod ~ urban + educ + age
## Model 2: usemod ~ urban + educ + age + urban * educ
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -3412.8
## 2 7 -3407.9 2 9.6448 0.008048 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
It is significant at the 5% level.
#For living in a rural area + primary educ:
d.tmp <- BngDHS
d.tmp$age <- 25
d.tmp$urban <- "rural"
d.tmp$educ <- "primary"
mean(predict(model1, d.tmp, type = "response"))
## [1] 0.3506452
#For living in a rural area + secondary educ:
d.tmp <- BngDHS
d.tmp$age <- 25
d.tmp$urban <- "rural"
d.tmp$educ <- "secondary+"
mean(predict(model1, d.tmp, type = "response"))
## [1] 0.4226516
#For living in a rural area + none educ:
d.tmp <- BngDHS
d.tmp$age <- 25
d.tmp$urban <- "rural"
d.tmp$educ <- "none"
mean(predict(model1, d.tmp, type = "response"))
## [1] 0.3011179
#For living in a urban area + primary educ:
d.tmp <- BngDHS
d.tmp$age <- 25
d.tmp$urban <- "urban"
d.tmp$educ <- "primary"
mean(predict(model1, d.tmp, type = "response"))
## [1] 0.4112324
#For living in a urban area + secondary educ:
d.tmp <- BngDHS
d.tmp$age <- 25
d.tmp$urban <- "urban"
d.tmp$educ <- "secondary+"
mean(predict(model1, d.tmp, type = "response"))
## [1] 0.4863617
#For living in a urban area + none educ:
d.tmp <- BngDHS
d.tmp$age <- 25
d.tmp$urban <- "urban"
d.tmp$educ <- "none"
mean(predict(model1, d.tmp, type = "response"))
## [1] 0.3578636
1. Consider the likelihood ratio test of the association between education and use of modern
contraception in Exercise 2.
a. State the null hypothesis of this test.
b. Show, with numbers, how the value of the test statistic was calculated.
c. What do you conclude from the test here, and why?
lrtest(model1,"educ")
## Likelihood ratio test
##
## Model 1: usemod ~ urban + educ + age
## Model 2: usemod ~ urban + age
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -3412.8
## 2 3 -3433.7 -2 41.902 7.962e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
null hypothesis: Levels of education is not significant in the model, i.e. \(\hat{\beta}_{educ} = 0\).
test statistic \(G^2 = 2*(-3412.8-(-3433.7)) = 41.902\).
Levels of education is significant in the model at the 5% significance, for the p value < 0.05.
2. From the coefficients of the model estimated in Exercise 3, calculate the following estimated odds ratios of using modern contraception, controlling for age:
a. the odds ratio comparing urban to rural women, separately at each level of education.
b. the odds ratio comparing each of the two other education levels to no education,
separately for rural and urban women.
summary(model2)
##
## Call:
## glm(formula = usemod ~ urban + educ + age + urban * educ, family = binomial,
## data = BngDHS)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.22733 0.10892 -11.268 < 2e-16 ***
## urbanurban -0.02773 0.12691 -0.218 0.82705
## educprimary 0.17643 0.07176 2.459 0.01395 *
## educsecondary+ 0.39106 0.09998 3.911 9.18e-05 ***
## age 0.01674 0.00331 5.058 4.23e-07 ***
## urbanurban:educprimary 0.42257 0.20713 2.040 0.04134 *
## urbanurban:educsecondary+ 0.57022 0.19430 2.935 0.00334 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6909.9 on 5295 degrees of freedom
## Residual deviance: 6815.9 on 5289 degrees of freedom
## AIC: 6829.9
##
## Number of Fisher Scoring iterations: 4
for educ=none, the odds ratio = \(e^{-0.02773}= 0.9726509\); for educ=primary, the odds ratio = \(e^{-0.02773+0.17643+0.42257}= 1.770514\); for educ=secondary+, the odds ratio = \(e^{-0.02773+0.39106+0.57022}= 2.543523\).
for rural women, the odds ratio of educ=primary to educ=none equals \(e^{0.17643}= 1.192951\), the odds ratio of educ=secondary+ to educ=none equals \(e^{0.39106}= 1.478547\), the odds ratio of educ=secondary+ to educ=primary equals \(e^{0.39106-0.17643}= 1.239403\); for urban women, the odds ratio of educ=primary to educ=none equals \(e^{0.17643+0.42257}= 1.820298\), the odds ratio of educ=secondary+ to educ=none equals \(e^{0.39106+0.57022}= 2.615042\), the odds ratio of educ=secondary+ to educ=primary equals \(e^{0.39106+0.57022-(0.17643+0.42257)}= 1.436601\).
3. Drawing on these odds ratios, and the fitted probabilities in Exercise 4, briefly describe, in your own words, the nature of the interaction between education and location in the model of Exercise 3.
Education and location are correlated when controlling for other variables, suggesting that these two variables will have an additional effect on the probability of use of modern contraception when considered as a combination rather than just included as the main effect. According to the calculation, women with higher education level and living in urban area, are more likely to use modern contraception.
binary.classification.table <- function (model.object,cutoff=0.5)
{
# Function for calculating the classification table
# and the various probabilities for it for a fitted binary model,
# given a value of the classification cutoff.
# Classification table (frequencies)
cl.table <- table(fitted(model.object)>cutoff,model.object$y)
## Fix the table if all classifications are 0 or 1
if(nrow(cl.table)==1){
if(row.names(cl.table)=="FALSE")
cl.table <- rbind(cl.table,0)
else cl.table <- rbind(0,cl.table)
}
t2x2 <- cl.table
cl.table <- rbind(cl.table,margin.table(cl.table,2))
cl.table <- cbind(cl.table,margin.table(cl.table,1))
dimnames(cl.table) <- list(c("Classified 0","Classified 1","Total"),
c("Observed 0","Observed 1","Total"))
# The probabilities
probs <- c(diag(prop.table(t2x2,2)),
diag(prop.table(t2x2,1)))
probs <- c(probs,1-probs)
probs <- c(as.vector(prop.table(table(model.object$y))),probs)
## This turns it into a column matrix, for convenience of display
dim(probs) <- c(length(probs),1)
dimnames(probs) <- list(c("Observed 0s","Observed 1s",
"Specificity","Sensitivity",
"Negative predictive value","Positive predictive value",
"False positive rate for true negatives",
"False negative rate for true positives",
"False positive rate for classified negatives",
"False negative rate for classified positives"),"probability")
result <- list(classification.table=cl.table, rates=round(probs,4))
result
}
# Classification table and error rates, for different cut-offs
binary.classification.table(model2,.5)
## $classification.table
## Observed 0 Observed 1 Total
## Classified 0 3254 1733 4987
## Classified 1 145 164 309
## Total 3399 1897 5296
##
## $rates
## probability
## Observed 0s 0.6418
## Observed 1s 0.3582
## Specificity 0.9573
## Sensitivity 0.0865
## Negative predictive value 0.6525
## Positive predictive value 0.5307
## False positive rate for true negatives 0.0427
## False negative rate for true positives 0.9135
## False positive rate for classified negatives 0.3475
## False negative rate for classified positives 0.4693
binary.classification.table(model2,.3)
## $classification.table
## Observed 0 Observed 1 Total
## Classified 0 567 145 712
## Classified 1 2832 1752 4584
## Total 3399 1897 5296
##
## $rates
## probability
## Observed 0s 0.6418
## Observed 1s 0.3582
## Specificity 0.1668
## Sensitivity 0.9236
## Negative predictive value 0.7963
## Positive predictive value 0.3822
## False positive rate for true negatives 0.8332
## False negative rate for true positives 0.0764
## False positive rate for classified negatives 0.2037
## False negative rate for classified positives 0.6178
binary.classification.table(model2,.8)
## $classification.table
## Observed 0 Observed 1 Total
## Classified 0 3399 1897 5296
## Classified 1 0 0 0
## Total 3399 1897 5296
##
## $rates
## probability
## Observed 0s 0.6418
## Observed 1s 0.3582
## Specificity 1.0000
## Sensitivity 0.0000
## Negative predictive value 0.6418
## Positive predictive value NaN
## False positive rate for true negatives 0.0000
## False negative rate for true positives 1.0000
## False positive rate for classified negatives 0.3582
## False negative rate for classified positives NaN
# ROC curve and Area Under the Curve
roc(model2$y,fitted(model2),plot= T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
##
## Call:
## roc.default(response = model2$y, predictor = fitted(model2), plot = T)
##
## Data: fitted(model2) in 3399 controls (model2$y 0) < 1897 cases (model2$y 1).
## Area under the curve: 0.5777
The ROC is close to the line at the slope of 1, and the AUC value is near 0.5, suggesting that this model including the interaction term is not a very good illustration of classification.