Exercise 5.3


library(car)
## Loading required package: carData



crabs <- read.table("https://users.stat.ufl.edu/~aa/cat/data/Crabs2.dat", header=TRUE)
head(crabs)
##   y Year Fcolor Fsurf  FCW AMCW AMcolor AMsurf
## 1 0 1993      1     1 20.2 18.1       5      5
## 2 0 1993      1     3 23.0 13.5       5      5
## 3 0 1993      5     5 22.6 16.7       5      3
## 4 0 1993      5     5 20.8 16.0       5      4
## 5 0 1993      3     4 23.3 15.3       5      3
## 6 0 1993      5     3 22.9 16.2       5      3



One Variable


logit[P(Y=1)] = α + β1*Fcolor

fit <- glm(y ~ factor(Fcolor), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1600.993
## [1] 1342
## [1] 1606.993
1 - pchisq(32.8, 2)
## [1] 7.543458e-08
anova(fit)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: y
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev
## NULL                            1344     1633.8
## factor(Fcolor)  2   32.805      1342     1601.0
Anova(fit)
## Analysis of Deviance Table (Type II tests)
## 
## Response: y
##                LR Chisq Df Pr(>Chisq)    
## factor(Fcolor)   32.805  2  7.526e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1



logit[P(Y=1)] = α + β1*Fsurf

fit <- glm(y ~ factor(Fsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1612.567
## [1] 1340
## [1] 1622.567
Anova(fit)
## Analysis of Deviance Table (Type II tests)
## 
## Response: y
##               LR Chisq Df Pr(>Chisq)    
## factor(Fsurf)    21.23  4  0.0002851 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1



logit[P(Y=1)] = α + β1*FCW

fit <- glm(y ~ FCW, family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1631.36
## [1] 1343
## [1] 1635.36
Anova(fit)
## Analysis of Deviance Table (Type II tests)
## 
## Response: y
##     LR Chisq Df Pr(>Chisq)
## FCW   2.4369  1     0.1185



logit[P(Y=1)] = α + β1*AMCW

fit <- glm(y ~ AMCW, family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1632.779
## [1] 1343
## [1] 1636.779
Anova(fit)
## Analysis of Deviance Table (Type II tests)
## 
## Response: y
##      LR Chisq Df Pr(>Chisq)
## AMCW   1.0181  1      0.313



logit[P(Y=1)] = α + β1*AMcolor

fit <- glm(y ~ factor(AMcolor), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1631.451
## [1] 1342
## [1] 1637.451
Anova(fit)
## Analysis of Deviance Table (Type II tests)
## 
## Response: y
##                 LR Chisq Df Pr(>Chisq)
## factor(AMcolor)   2.3459  2     0.3095



logit[P(Y=1)] = α + β1*AMsurf

fit <- glm(y ~ factor(AMsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1621.529
## [1] 1340
## [1] 1631.529
Anova(fit)
## Analysis of Deviance Table (Type II tests)
## 
## Response: y
##                LR Chisq Df Pr(>Chisq)  
## factor(AMsurf)   12.268  4    0.01546 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1



Significant Variables


logit[P(Y=1)] = α + β1Fcolor + β2Fsurf

fit <- glm(y ~ factor(Fcolor) + factor(Fsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1585.927
## [1] 1338
## [1] 1599.927
1 - pchisq(15.1, 4); 1 - pchisq(26.7, 2); 1 - pchisq(35.6, 2)
## [1] 0.004498242
## [1] 1.592827e-06
## [1] 1.860194e-08



logit[P(Y=1)] = α + β1Fcolor + β2AMsurf

fit <- glm(y ~ factor(Fcolor) + factor(AMsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1587.795
## [1] 1338
## [1] 1601.795
1 - pchisq(13.2, 4); 1 - pchisq(24.8, 2); 1 - pchisq(33.7, 2)
## [1] 0.0103388
## [1] 4.118589e-06
## [1] 4.809921e-08



logit[P(Y=1)] = α + β1Fsurf + β2AMsurf

fit <- glm(y ~ factor(Fsurf) + factor(AMsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1599.78
## [1] 1336
## [1] 1617.78
1 - pchisq(1.2, 6); 1 - pchisq(12.8, 4); 1 - pchisq(21.7, 4)
## [1] 0.9768847
## [1] 0.01229552
## [1] 0.0002299446



logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3*AMsurf

fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + factor(AMsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1572.835
## [1] 1334
## [1] 1594.835
1 - pchisq(28.2, 8); 1 - pchisq(39.8, 10); 1 - pchisq(29.8, 10)
## [1] 0.0004376765
## [1] 1.837528e-05
## [1] 0.0009235903
1 - pchisq(13.1, 4); 1 - pchisq(15, 4)
## [1] 0.01079737
## [1] 0.004701217



Significant Variables + Unsignificant Variables


logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3*FCW

fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + FCW, family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1584.341
## [1] 1337
## [1] 1600.341
1 - pchisq(1.6, 1); 1 - pchisq(3.5, 1)
## [1] 0.2059032
## [1] 0.06136883



logit[P(Y=1)] = α + β1Fcolor + β2AMsurf + β3*FCW

fit <- glm(y ~ factor(Fcolor) + factor(AMsurf) + FCW, family=binomial, data = crabs)
fit$deviance
## [1] 1586.565
fit$df.residual
## [1] 1337
fit$aic
## [1] 1602.565
1 - pchisq(0.7, 1); 1 - pchisq(1.2, 1)
## [1] 0.4027837
## [1] 0.2733217



logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3*AMCW

fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + AMCW, family=binomial, data = crabs)
fit$deviance
## [1] 1584.816
fit$df.residual
## [1] 1337
fit$aic
## [1] 1600.816
1 - pchisq(1.1, 1); 1 - pchisq(3.0, 1)
## [1] 0.2942661
## [1] 0.08326452



logit[P(Y=1)] = α + β1Fcolor + β2AMsurf + β3*AMCW

fit <- glm(y ~ factor(Fcolor) + factor(AMsurf) + AMCW, family=binomial, data = crabs)
fit$deviance
## [1] 1586.609
fit$df.residual
## [1] 1337
fit$aic
## [1] 1602.609
1 - pchisq(72.6, 66)
## [1] 0.2696234
1 - pchisq(74.5, 66)
## [1] 0.2213234



logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3AMCW + β4FCW

fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + factor(AMsurf) + FCW, family=binomial, data = crabs)
fit$deviance
## [1] 1571.778
fit$df.residual
## [1] 1333
fit$aic
## [1] 1595.778
1 - pchisq(1, 5)
## [1] 0.9625658



logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3AMsurf + β4AMCW

fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + factor(AMsurf) + AMCW, family=binomial, data = crabs)
fit$deviance
## [1] 1571.405
fit$df.residual
## [1] 1333
fit$aic
## [1] 1595.405
1 - pchisq(1.4, 5)
## [1] 0.9243133



Interaction Variables


logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3Fcolor + β4Fsurf

fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + factor(Fcolor) * factor(Fsurf), family=binomial, data = crabs)
fit$deviance
## [1] 1573.925
fit$df.residual
## [1] 1330
fit$aic
## [1] 1603.925
1 - pchisq(12, 8)
## [1] 0.1512039
1 - pchisq(13.9, 8)
## [1] 0.0844094



logit[P(Y=1)] = α + β1Fcolor + β2AMsurf + β3Fcolor + β4AMsurf

fit <- glm(y ~ factor(Fcolor) + factor(AMsurf) + factor(Fcolor) * factor(AMsurf), family=binomial, data = crabs)
fit$deviance
## [1] 1576.534
fit$df.residual
## [1] 1330
fit$aic
## [1] 1606.534
1 - pchisq(9.4, 8)
## [1] 0.3096836
1 - pchisq(11.3, 8)
## [1] 0.1852733



Final Model


logit[P(Y=1)] = α + β1Fcolor + β2Fsurf
logit[P(Y=1)] = α + β1Fcolor + β2AMsurf
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3*AMsurf



Exercise 5.9


Major <- c(1, 2, 3, 4, 5, 6)
MaleYes <- c(512, 353, 120, 138, 53, 22); MaleNo <- c(313, 207, 205, 279, 138, 351)
FemaleYes <- c(89, 17, 202, 131, 94, 24); FemaleNo <- c(19, 9, 391, 244, 299, 317)
admission <- data.frame(Major, MaleYes, MaleNo, FemaleYes, FemaleNo)
admission$Yes <- admission$MaleYes + admission$FemaleYes
admission$No <- admission$MaleNo + admission$FemaleNo
admission$N <- admission$Yes + admission$No
admission
##   Major MaleYes MaleNo FemaleYes FemaleNo Yes  No   N
## 1     1     512    313        89       19 601 332 933
## 2     2     353    207        17        9 370 216 586
## 3     3     120    205       202      391 322 596 918
## 4     4     138    279       131      244 269 523 792
## 5     5      53    138        94      299 147 437 584
## 6     6      22    351        24      317  46 668 714



fit <- glm(Yes/N ~ Major, family=binomial, data = admission, weights = N)
cbind(rstandard(fit, type = "pearson"), residuals(fit, type = "pearson"), 
    rstandard(fit, type = "deviance"), residuals(fit, type = "deviance"))
##        [,1]      [,2]      [,3]      [,4]
## 1 -3.261655 -2.033601 -3.236246 -2.017759
## 2  4.684244  4.108896  4.721311  4.141410
## 3 -4.337947 -3.764456 -4.376593 -3.797994
## 4  3.639266  3.144973  3.582178  3.095639
## 5  4.362239  3.743110  4.207921  3.610694
## 6 -5.904581 -4.595225 -6.409891 -4.988482



fit <- glm(Yes/N ~ Major, family=binomial, data = admission, weights = N)
fit
## 
## Call:  glm(formula = Yes/N ~ Major, family = binomial, data = admission, 
##     weights = N)
## 
## Coefficients:
## (Intercept)        Major  
##      1.2766      -0.5442  
## 
## Degrees of Freedom: 5 Total (i.e. Null);  4 Residual
## Null Deviance:       854.3 
## Residual Deviance: 83.15     AIC: 127.5



Major <- c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6)
Gender <- c("Male", "Female","Male", "Female", "Male", "Female", "Male", "Female",
            "Male", "Female", "Male", "Female")
Yes <- c(512, 89, 353, 17, 120, 202, 138, 131, 53, 94, 22, 24);
No <- c(313, 19, 207, 9, 205, 391, 279, 244, 138, 299, 351, 317)
admission <- data.frame(Major, Gender, Yes, No)
admission$N <- admission$Yes + admission$No
fit <- glm(Yes/N ~ Major + Gender, family=binomial, data = admission, weights = N)
fit
## 
## Call:  glm(formula = Yes/N ~ Major + Gender, family = binomial, data = admission, 
##     weights = N)
## 
## Coefficients:
## (Intercept)        Major   GenderMale  
##    1.282609    -0.544944    -0.006228  
## 
## Degrees of Freedom: 11 Total (i.e. Null);  9 Residual
## Null Deviance:       875.8 
## Residual Deviance: 104.7     AIC: 179.7



Reference

[1] Alan Agresti, 『범주형 자료분석 개론』, 박태성, 이승연 역, 자유아카데미, 2020
[2] Alan Agresti, 『Categorical Data Analysis (3rd Edition)』, Wiley, 2018
[3] https://users.stat.ufl.edu/~aa/cat/data/Crabs2.dat
[4] P.J.Bickel, E.A.Hammel, J.W.O’Connell, 「Sex Bias in Graduate Admissions: Data from Berkeley」, Science, New Series, Vol.187, No.4175 (Feb.7, 1975), 398-404