Question 1
students <- read.table("http://users.stat.ufl.edu/~aa/cat/data/Students.dat", header = TRUE)
library("car")
## Loading required package: carData
#do models with individual predictors first
ideology <- glm(abor ~ ideol, family = "binomial", data = students)
Anova(ideology) #significant
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## ideol 17.255 1 3.269e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
religion <- glm(abor ~ relig, family = "binomial", data = students)
Anova(religion) #significant
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## relig 14.457 1 0.0001434 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
news <- glm(abor ~ news, family = "binomial", data = students)
Anova(news) #significant
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## news 7.3299 1 0.006782 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
gpa <- glm(abor ~ hsgpa, family = "binomial", data = students)
Anova(gpa) #not significant
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## hsgpa 0.073479 1 0.7863
gender <- glm(abor ~ gender, family = "binomial", data = students)
Anova(gender) #not significant
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## gender 1.1648 1 0.2805
#just ideology and religion and news
step1 <- glm(abor~ ideol+ relig + news, family = "binomial", data = students )
Anova(step1)
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## ideol 11.9057 1 0.0005596 ***
## relig 2.2229 1 0.1359770
## news 12.7306 1 0.0003597 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#remove religion
step2 <-glm(abor ~ideol + news , family = "binomial", data = students)
Anova(step2)
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## ideol 23.375 1 1.333e-06 ***
## news 13.450 1 0.000245 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#add gpa
step3 <- glm(abor ~ ideol + news + hsgpa , family = "binomial", data = students)
Anova(step3)
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## ideol 26.7100 1 2.364e-07 ***
## news 16.7234 1 4.324e-05 ***
## hsgpa 4.0704 1 0.04364 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#add gender, not significant
step5 <- glm(abor ~ ideol + news + hsgpa + gender, family = "binomial", data = students)
Anova(step5)
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## ideol 23.5615 1 1.210e-06 ***
## news 16.4569 1 4.977e-05 ***
## hsgpa 4.1486 1 0.04167 *
## gender 0.0796 1 0.77784
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#check interactions, none of them are significant
step6 <- glm(abor ~ ideol + news + hsgpa + ideol*news*hsgpa, family = "binomial", data = students)
Anova(step6)
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## ideol 24.8834 1 6.091e-07 ***
## news 13.2913 1 0.0002666 ***
## hsgpa 4.0505 1 0.0441581 *
## ideol:news 0.2029 1 0.6523619
## ideol:hsgpa 1.6654 1 0.1968702
## news:hsgpa 0.3326 1 0.5641574
## ideol:news:hsgpa 0.5789 1 0.4467626
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#final model
final <- glm(abor ~ ideol + news + hsgpa , family = "binomial", data = students)
Anova(final)
## Analysis of Deviance Table (Type II tests)
##
## Response: abor
## LR Chisq Df Pr(>Chisq)
## ideol 26.7100 1 2.364e-07 ***
## news 16.7234 1 4.324e-05 ***
## hsgpa 4.0704 1 0.04364 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#step 5, diagnostics
important <- students[, c(1,4,10,14 )]
df <- cbind(important, res =round(rstandard(final, type ="pearson"),4))
df <- df[order(df$res),]
df
## subject hsgpa news ideol res
## 41 41 3.4 2 2 -4.0103
## 17 17 4.0 3 3 -1.7832
## 32 32 3.5 0 2 -1.0775
## 56 56 3.6 5 6 -0.9931
## 10 10 4.0 2 3 -0.9578
## 11 11 2.3 1 5 -0.9012
## 48 48 3.3 5 7 -0.6384
## 51 51 3.0 2 5 -0.6135
## 36 36 3.6 3 5 -0.5661
## 52 52 4.0 2 4 -0.4204
## 43 43 3.8 4 6 -0.3893
## 1 1 2.2 0 6 -0.1920
## 23 23 2.7 2 7 -0.1707
## 33 33 3.4 14 2 0.0001
## 13 13 3.3 12 1 0.0002
## 47 47 3.0 14 3 0.0002
## 25 25 3.2 7 1 0.0036
## 2 2 2.1 5 2 0.0080
## 6 6 3.5 7 2 0.0114
## 12 12 3.5 7 2 0.0114
## 54 54 3.5 7 2 0.0114
## 27 27 3.7 7 2 0.0144
## 37 37 3.8 7 2 0.0162
## 38 38 3.4 6 2 0.0193
## 22 22 4.0 7 2 0.0204
## 46 46 3.0 5 2 0.0231
## 34 34 3.2 5 2 0.0292
## 18 18 3.8 6 2 0.0308
## 9 9 3.0 3 1 0.0379
## 5 5 3.1 3 1 0.0426
## 60 60 3.4 7 4 0.0500
## 42 42 2.0 3 3 0.0582
## 8 8 3.0 3 2 0.0845
## 7 7 3.6 4 2 0.0893
## 4 4 3.5 6 4 0.1083
## 3 3 3.3 3 2 0.1203
## 30 30 3.3 3 2 0.1203
## 31 31 3.3 3 2 0.1203
## 15 15 3.0 1 1 0.1396
## 50 50 3.0 1 1 0.1396
## 28 28 3.7 7 5 0.1623
## 29 29 3.3 5 4 0.1646
## 58 58 3.0 3 3 0.1898
## 39 39 2.8 5 5 0.2074
## 49 49 3.8 3 2 0.2184
## 45 45 3.0 4 4 0.2227
## 55 55 3.4 2 2 0.2609
## 44 44 3.0 5 5 0.2637
## 20 20 3.0 0 1 0.2738
## 53 53 3.0 1 2 0.3168
## 26 26 3.0 2 3 0.3679
## 59 59 3.0 3 4 0.4312
## 35 35 3.5 0 1 0.5087
## 21 21 3.7 4 4 0.5127
## 19 19 4.0 2 2 0.5505
## 57 57 3.5 3 4 0.7625
## 16 16 3.8 1 2 0.8451
## 40 40 3.8 5 6 1.5931
## 24 24 3.7 2 4 1.8718
## 14 14 3.2 1 4 2.0165
#part b
library(MASS)
allvars <- glm(abor~ . - affil - life - subject, family = "binomial", data = students)
summary(allvars)
##
## Call:
## glm(formula = abor ~ . - affil - life - subject, family = "binomial",
## data = students)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.48649 0.00009 0.04055 0.20125 1.97028
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.1014276 10.8914049 0.927 0.3537
## gender 1.0021615 1.8655342 0.537 0.5911
## age -0.0783427 0.1274839 -0.615 0.5389
## hsgpa -3.7344482 2.8093160 -1.329 0.1837
## cogpa 2.5112750 3.7399060 0.671 0.5019
## dhome 0.0005574 0.0006789 0.821 0.4116
## dres -0.3388230 0.2953830 -1.147 0.2514
## tv 0.2659760 0.2531643 1.051 0.2934
## sport 0.0272104 0.2551460 0.107 0.9151
## news 1.3868778 0.6986772 1.985 0.0471 *
## aids 0.3966764 0.5663706 0.700 0.4837
## veg 4.3213535 3.8614639 1.119 0.2631
## ideol -1.6377902 0.7892505 -2.075 0.0380 *
## relig -0.7245665 0.7820724 -0.926 0.3542
## affirm -2.7481550 2.6898822 -1.022 0.3069
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 62.719 on 59 degrees of freedom
## Residual deviance: 21.368 on 45 degrees of freedom
## AIC: 51.368
##
## Number of Fisher Scoring iterations: 9
stepAIC(allvars)
## Start: AIC=51.37
## abor ~ (subject + gender + age + hsgpa + cogpa + dhome + dres +
## tv + sport + news + aids + veg + affil + ideol + relig +
## affirm + life) - affil - life - subject
##
## Df Deviance AIC
## - sport 1 21.380 49.380
## - gender 1 21.665 49.665
## - age 1 21.752 49.752
## - cogpa 1 22.028 50.028
## - aids 1 22.197 50.197
## - relig 1 22.355 50.355
## - dhome 1 22.466 50.466
## - affirm 1 22.664 50.664
## - dres 1 22.927 50.927
## - tv 1 23.147 51.147
## <none> 21.368 51.368
## - veg 1 23.389 51.389
## - hsgpa 1 24.924 52.924
## - ideol 1 32.261 60.261
## - news 1 34.371 62.371
##
## Step: AIC=49.38
## abor ~ gender + age + hsgpa + cogpa + dhome + dres + tv + news +
## aids + veg + ideol + relig + affirm
##
## Df Deviance AIC
## - gender 1 21.686 47.686
## - age 1 21.754 47.754
## - aids 1 22.199 48.199
## - cogpa 1 22.261 48.261
## - relig 1 22.397 48.397
## - dhome 1 22.497 48.497
## - affirm 1 22.689 48.689
## - dres 1 22.927 48.927
## - tv 1 23.172 49.172
## <none> 21.380 49.380
## - veg 1 23.778 49.778
## - hsgpa 1 24.990 50.990
## - ideol 1 32.418 58.418
## - news 1 35.239 61.239
##
## Step: AIC=47.69
## abor ~ age + hsgpa + cogpa + dhome + dres + tv + news + aids +
## veg + ideol + relig + affirm
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - age 1 22.094 46.094
## - relig 1 22.418 46.418
## - aids 1 22.680 46.680
## - dhome 1 22.713 46.713
## - affirm 1 22.787 46.787
## - dres 1 23.051 47.051
## - cogpa 1 23.200 47.200
## <none> 21.686 47.686
## - veg 1 24.103 48.103
## - tv 1 24.238 48.238
## - hsgpa 1 25.008 49.008
## - ideol 1 33.813 57.813
## - news 1 35.965 59.965
##
## Step: AIC=46.09
## abor ~ hsgpa + cogpa + dhome + dres + tv + news + aids + veg +
## ideol + relig + affirm
##
## Df Deviance AIC
## - relig 1 22.691 44.691
## - aids 1 22.701 44.701
## - dhome 1 22.740 44.740
## - affirm 1 22.790 44.790
## - dres 1 23.138 45.138
## - cogpa 1 23.553 45.553
## <none> 22.094 46.094
## - veg 1 24.106 46.106
## - tv 1 24.288 46.288
## - hsgpa 1 25.454 47.454
## - ideol 1 33.815 55.815
## - news 1 36.056 58.056
##
## Step: AIC=44.69
## abor ~ hsgpa + cogpa + dhome + dres + tv + news + aids + veg +
## ideol + affirm
##
## Df Deviance AIC
## - affirm 1 23.286 43.286
## - aids 1 23.371 43.371
## - dhome 1 23.773 43.773
## - veg 1 24.626 44.626
## - cogpa 1 24.653 44.653
## <none> 22.691 44.691
## - dres 1 24.784 44.784
## - tv 1 25.364 45.364
## - hsgpa 1 26.035 46.035
## - news 1 36.921 56.921
## - ideol 1 40.943 60.943
##
## Step: AIC=43.29
## abor ~ hsgpa + cogpa + dhome + dres + tv + news + aids + veg +
## ideol
##
## Df Deviance AIC
## - aids 1 23.754 41.754
## - dhome 1 23.901 41.901
## - veg 1 24.658 42.658
## - dres 1 24.785 42.785
## - cogpa 1 25.135 43.135
## <none> 23.286 43.286
## - tv 1 25.430 43.430
## - hsgpa 1 26.426 44.426
## - news 1 37.250 55.250
## - ideol 1 41.782 59.782
##
## Step: AIC=41.75
## abor ~ hsgpa + cogpa + dhome + dres + tv + news + veg + ideol
##
## Df Deviance AIC
## - dhome 1 24.266 40.266
## - veg 1 24.712 40.712
## - dres 1 24.790 40.790
## - cogpa 1 25.197 41.197
## - tv 1 25.450 41.450
## <none> 23.754 41.754
## - hsgpa 1 26.694 42.694
## - news 1 37.343 53.343
## - ideol 1 43.856 59.856
##
## Step: AIC=40.27
## abor ~ hsgpa + cogpa + dres + tv + news + veg + ideol
##
## Df Deviance AIC
## - veg 1 25.004 39.004
## - dres 1 25.279 39.279
## - tv 1 25.716 39.716
## - cogpa 1 25.790 39.790
## <none> 24.266 40.266
## - hsgpa 1 27.251 41.251
## - news 1 39.648 53.648
## - ideol 1 46.859 60.859
##
## Step: AIC=39
## abor ~ hsgpa + cogpa + dres + tv + news + ideol
##
## Df Deviance AIC
## - cogpa 1 25.912 37.912
## - tv 1 26.009 38.009
## - dres 1 26.151 38.151
## <none> 25.004 39.004
## - hsgpa 1 27.460 39.460
## - news 1 39.657 51.657
## - ideol 1 50.040 62.040
##
## Step: AIC=37.91
## abor ~ hsgpa + dres + tv + news + ideol
##
## Df Deviance AIC
## - dres 1 27.146 37.146
## - tv 1 27.160 37.160
## - hsgpa 1 27.839 37.839
## <none> 25.912 37.912
## - news 1 40.892 50.892
## - ideol 1 50.933 60.933
##
## Step: AIC=37.15
## abor ~ hsgpa + tv + news + ideol
##
## Df Deviance AIC
## - tv 1 27.944 35.944
## <none> 27.146 37.146
## - hsgpa 1 30.248 38.248
## - news 1 42.143 50.143
## - ideol 1 52.334 60.334
##
## Step: AIC=35.94
## abor ~ hsgpa + news + ideol
##
## Df Deviance AIC
## <none> 27.944 35.944
## - hsgpa 1 32.014 38.014
## - news 1 44.667 50.667
## - ideol 1 54.654 60.654
##
## Call: glm(formula = abor ~ hsgpa + news + ideol, family = "binomial",
## data = students)
##
## Coefficients:
## (Intercept) hsgpa news ideol
## 11.287 -2.338 1.291 -1.594
##
## Degrees of Freedom: 59 Total (i.e. Null); 56 Residual
## Null Deviance: 62.72
## Residual Deviance: 27.94 AIC: 35.94
Using the purposeful selection process, the variables included in the final model were high school GPA, ideology, and news. The R function stepAIC came to the same conclusion. The highest absolute value for the Pearson residuals is about 4, but almost all others are below 2 whic indicates that for almost all of the observations, the model is a good fit.
Question 2
#logit[pi(x)] = -12.351 + 0.497x
#mean = 26.3
#2.1
pi_bar <- exp(-12.351+0.497*26.3)/(1+exp(-12.351+0.497*26.3))
pi_two <- exp(-12.351+0.497*28.4)/(1+exp(-12.351+0.497*28.4))
theta <- (pi_bar/(1-pi_bar))/(pi_two/(1-pi_two))
lambda <- log(theta)
delta <- (1 + (1+(lambda)^2)*exp((5*lambda^2)/4))/(1+exp(-(lambda^2)/4))
z_alpha <- qnorm(0.95)
z_beta <- qnorm(0.90)
n = (z_alpha + z_beta*exp(-lambda^2/4))^2 *(1+2*pi_bar*delta)/(pi_bar*lambda^2)
crabbies <- read.table("http://users.stat.ufl.edu/~aa/cat/data/Crabs.dat", header = TRUE)
hist(crabbies$width, main = "Crab Width", xlab = "Width")
library("moments")
skewness(crabbies$width)
## [1] 0.3235141
kurtosis(crabbies$width)
## [1] 3.163346
Under the assumption that X is random and has a normal distribution, \(n =\) 74.9085563. Based on the histogram of width, it does appear to have approximately a normal distribution. The skewness of .3235 and kurtosis of 3.1611 support this as well.
Question 3
#part a
x <- c(5.1,10.1,20.2,30.3,40.4,50.5)
y <- c(16,18,34,47,47,48)
n <- c(49,48,48,49,50,48)
pct <- c(0.327,0.375,0.708,0.959,0.940,1)
insects <- data.frame(x,y,n, pct)
logit <- glm(y/n ~ x, family = "binomial", weights = n, data = insects)
summary(logit)
##
## Call:
## glm(formula = y/n ~ x, family = "binomial", data = insects, weights = n)
##
## Deviance Residuals:
## 1 2 3 4 5 6
## 0.6493 -0.7413 -0.3090 1.4266 -1.1867 0.8786
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5694 0.2939 -5.339 9.35e-08 ***
## x 0.1265 0.0161 7.856 3.96e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 123.9631 on 5 degrees of freedom
## Residual deviance: 5.2819 on 4 degrees of freedom
## AIC: 27.427
##
## Number of Fisher Scoring iterations: 4
probit <- glm(y/n~x, family = "binomial"(link=probit), weights = n, data = insects)
summary(probit)
##
## Call:
## glm(formula = y/n ~ x, family = binomial(link = probit), data = insects,
## weights = n)
##
## Deviance Residuals:
## 1 2 3 4 5 6
## 0.4049 -0.8063 0.0173 1.6251 -1.4000 0.5839
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.886370 0.170177 -5.209 1.9e-07 ***
## x 0.070871 0.008394 8.443 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 123.9631 on 5 degrees of freedom
## Residual deviance: 5.7563 on 4 degrees of freedom
## AIC: 27.901
##
## Number of Fisher Scoring iterations: 5
data <- cbind(insects, round(rstandard(logit, type="pearson"),4), round(rstandard(probit, type = "pearson"),4) )
colnames(data) <- c("conc", "y", "n", "pct", "Logit Standard Res.", "Probit Standard Res.")
data
## conc y n pct Logit Standard Res. Probit Standard Res.
## 1 5.1 16 49 0.327 0.9595 0.5935
## 2 10.1 18 48 0.375 -0.9463 -1.0079
## 3 20.2 34 48 0.708 -0.3771 0.0202
## 4 30.3 47 49 0.959 1.6001 1.8214
## 5 40.4 47 50 0.940 -1.5860 -2.0255
## 6 50.5 48 48 1.000 0.6689 0.4447
logit$aic
## [1] 27.4265
probit$aic
## [1] 27.90091
Question 4
#plots
range_x <- range(insects$x)
range_y <- c(0,1)
curve(predict(logit, data.frame(x=x), type = "response"), col = "purple", lty=1, lwd = 4, xlim = range_x, ylim = range_y, xlab ="Concentration", ylab = "Percent Killed")
par(new=T)
curve(predict(probit, data.frame(x=x), type = "response"), col = "blue", lty = 2, lwd = 4, xlim = range_x, ylim = range_y,xlab = "", ylab = "")
legend("topleft", lwd = 3, lty = c(3,2), col = c("purple", "blue"), legend=c("Logit", "Probit"))
par(new=T)
plot(insects$x, insects$y/insects$n, xlab = "", ylab = "", main = "Logit and Probit Models", xlim = range_x, ylim = range_y, col = "dark green", pch = 4)
Question 5
The SAS output indicates that the log dose scale is more appropriate for this data. The hyperparameter theta is found to be closer to one than to zero, which, in the context of this problem, indicates that the log dose scale is better. In addition, the AIC is lower for the log dose scale and the plot shows that the data points follow the log dose model more closely than the dose scale. Looking at the residual plots shows lower residuals for the log dose scale, which is another indication that this is a better choice. (See SAS code and output attached)