library(car)
## Loading required package: carData
crabs <- read.table("https://users.stat.ufl.edu/~aa/cat/data/Crabs2.dat", header=TRUE)
head(crabs)
## y Year Fcolor Fsurf FCW AMCW AMcolor AMsurf
## 1 0 1993 1 1 20.2 18.1 5 5
## 2 0 1993 1 3 23.0 13.5 5 5
## 3 0 1993 5 5 22.6 16.7 5 3
## 4 0 1993 5 5 20.8 16.0 5 4
## 5 0 1993 3 4 23.3 15.3 5 3
## 6 0 1993 5 3 22.9 16.2 5 3
logit[P(Y=1)] = α + β1*Fcolor
fit <- glm(y ~ factor(Fcolor), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1600.993
## [1] 1342
## [1] 1606.993
1 - pchisq(32.8, 2)
## [1] 7.543458e-08
anova(fit)
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: y
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 1344 1633.8
## factor(Fcolor) 2 32.805 1342 1601.0
Anova(fit)
## Analysis of Deviance Table (Type II tests)
##
## Response: y
## LR Chisq Df Pr(>Chisq)
## factor(Fcolor) 32.805 2 7.526e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
logit[P(Y=1)] = α + β1*Fsurf
fit <- glm(y ~ factor(Fsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1612.567
## [1] 1340
## [1] 1622.567
Anova(fit)
## Analysis of Deviance Table (Type II tests)
##
## Response: y
## LR Chisq Df Pr(>Chisq)
## factor(Fsurf) 21.23 4 0.0002851 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
logit[P(Y=1)] = α + β1*FCW
fit <- glm(y ~ FCW, family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1631.36
## [1] 1343
## [1] 1635.36
Anova(fit)
## Analysis of Deviance Table (Type II tests)
##
## Response: y
## LR Chisq Df Pr(>Chisq)
## FCW 2.4369 1 0.1185
logit[P(Y=1)] = α + β1*AMCW
fit <- glm(y ~ AMCW, family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1632.779
## [1] 1343
## [1] 1636.779
Anova(fit)
## Analysis of Deviance Table (Type II tests)
##
## Response: y
## LR Chisq Df Pr(>Chisq)
## AMCW 1.0181 1 0.313
logit[P(Y=1)] = α + β1*AMcolor
fit <- glm(y ~ factor(AMcolor), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1631.451
## [1] 1342
## [1] 1637.451
Anova(fit)
## Analysis of Deviance Table (Type II tests)
##
## Response: y
## LR Chisq Df Pr(>Chisq)
## factor(AMcolor) 2.3459 2 0.3095
logit[P(Y=1)] = α + β1*AMsurf
fit <- glm(y ~ factor(AMsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1621.529
## [1] 1340
## [1] 1631.529
Anova(fit)
## Analysis of Deviance Table (Type II tests)
##
## Response: y
## LR Chisq Df Pr(>Chisq)
## factor(AMsurf) 12.268 4 0.01546 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf
fit <- glm(y ~ factor(Fcolor) + factor(Fsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1585.927
## [1] 1338
## [1] 1599.927
1 - pchisq(15.1, 4); 1 - pchisq(26.7, 2); 1 - pchisq(35.6, 2)
## [1] 0.004498242
## [1] 1.592827e-06
## [1] 1.860194e-08
logit[P(Y=1)] = α + β1Fcolor + β2AMsurf
fit <- glm(y ~ factor(Fcolor) + factor(AMsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1587.795
## [1] 1338
## [1] 1601.795
1 - pchisq(13.2, 4); 1 - pchisq(24.8, 2); 1 - pchisq(33.7, 2)
## [1] 0.0103388
## [1] 4.118589e-06
## [1] 4.809921e-08
logit[P(Y=1)] = α + β1Fsurf + β2AMsurf
fit <- glm(y ~ factor(Fsurf) + factor(AMsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1599.78
## [1] 1336
## [1] 1617.78
1 - pchisq(1.2, 6); 1 - pchisq(12.8, 4); 1 - pchisq(21.7, 4)
## [1] 0.9768847
## [1] 0.01229552
## [1] 0.0002299446
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf +
β3*AMsurf
fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + factor(AMsurf), family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1572.835
## [1] 1334
## [1] 1594.835
1 - pchisq(28.2, 8); 1 - pchisq(39.8, 10); 1 - pchisq(29.8, 10)
## [1] 0.0004376765
## [1] 1.837528e-05
## [1] 0.0009235903
1 - pchisq(13.1, 4); 1 - pchisq(15, 4)
## [1] 0.01079737
## [1] 0.004701217
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3*FCW
fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + FCW, family=binomial, data = crabs)
fit$deviance; fit$df.residual; fit$aic
## [1] 1584.341
## [1] 1337
## [1] 1600.341
1 - pchisq(1.6, 1); 1 - pchisq(3.5, 1)
## [1] 0.2059032
## [1] 0.06136883
logit[P(Y=1)] = α + β1Fcolor + β2AMsurf + β3*FCW
fit <- glm(y ~ factor(Fcolor) + factor(AMsurf) + FCW, family=binomial, data = crabs)
fit$deviance
## [1] 1586.565
fit$df.residual
## [1] 1337
fit$aic
## [1] 1602.565
1 - pchisq(0.7, 1); 1 - pchisq(1.2, 1)
## [1] 0.4027837
## [1] 0.2733217
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3*AMCW
fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + AMCW, family=binomial, data = crabs)
fit$deviance
## [1] 1584.816
fit$df.residual
## [1] 1337
fit$aic
## [1] 1600.816
1 - pchisq(1.1, 1); 1 - pchisq(3.0, 1)
## [1] 0.2942661
## [1] 0.08326452
logit[P(Y=1)] = α + β1Fcolor + β2AMsurf +
β3*AMCW
fit <- glm(y ~ factor(Fcolor) + factor(AMsurf) + AMCW, family=binomial, data = crabs)
fit$deviance
## [1] 1586.609
fit$df.residual
## [1] 1337
fit$aic
## [1] 1602.609
1 - pchisq(72.6, 66)
## [1] 0.2696234
1 - pchisq(74.5, 66)
## [1] 0.2213234
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf +
β3AMCW + β4FCW
fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + factor(AMsurf) + FCW, family=binomial, data = crabs)
fit$deviance
## [1] 1571.778
fit$df.residual
## [1] 1333
fit$aic
## [1] 1595.778
1 - pchisq(1, 5)
## [1] 0.9625658
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf +
β3AMsurf + β4AMCW
fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + factor(AMsurf) + AMCW, family=binomial, data = crabs)
fit$deviance
## [1] 1571.405
fit$df.residual
## [1] 1333
fit$aic
## [1] 1595.405
1 - pchisq(1.4, 5)
## [1] 0.9243133
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf + β3Fcolor
+ β4Fsurf
fit <- glm(y ~ factor(Fcolor) + factor(Fsurf) + factor(Fcolor) * factor(Fsurf), family=binomial, data = crabs)
fit$deviance
## [1] 1573.925
fit$df.residual
## [1] 1330
fit$aic
## [1] 1603.925
1 - pchisq(12, 8)
## [1] 0.1512039
1 - pchisq(13.9, 8)
## [1] 0.0844094
logit[P(Y=1)] = α + β1Fcolor + β2AMsurf +
β3Fcolor + β4AMsurf
fit <- glm(y ~ factor(Fcolor) + factor(AMsurf) + factor(Fcolor) * factor(AMsurf), family=binomial, data = crabs)
fit$deviance
## [1] 1576.534
fit$df.residual
## [1] 1330
fit$aic
## [1] 1606.534
1 - pchisq(9.4, 8)
## [1] 0.3096836
1 - pchisq(11.3, 8)
## [1] 0.1852733
logit[P(Y=1)] = α + β1Fcolor + β2Fsurf
logit[P(Y=1)] = α + β1Fcolor + β2AMsurf
logit[P(Y=1)]
= α + β1Fcolor + β2Fsurf + β3*AMsurf
Major <- c(1, 2, 3, 4, 5, 6)
MaleYes <- c(512, 353, 120, 138, 53, 22); MaleNo <- c(313, 207, 205, 279, 138, 351)
FemaleYes <- c(89, 17, 202, 131, 94, 24); FemaleNo <- c(19, 9, 391, 244, 299, 317)
admission <- data.frame(Major, MaleYes, MaleNo, FemaleYes, FemaleNo)
admission$Yes <- admission$MaleYes + admission$FemaleYes
admission$No <- admission$MaleNo + admission$FemaleNo
admission$N <- admission$Yes + admission$No
admission
## Major MaleYes MaleNo FemaleYes FemaleNo Yes No N
## 1 1 512 313 89 19 601 332 933
## 2 2 353 207 17 9 370 216 586
## 3 3 120 205 202 391 322 596 918
## 4 4 138 279 131 244 269 523 792
## 5 5 53 138 94 299 147 437 584
## 6 6 22 351 24 317 46 668 714
fit <- glm(Yes/N ~ Major, family=binomial, data = admission, weights = N)
cbind(rstandard(fit, type = "pearson"), residuals(fit, type = "pearson"),
rstandard(fit, type = "deviance"), residuals(fit, type = "deviance"))
## [,1] [,2] [,3] [,4]
## 1 -3.261655 -2.033601 -3.236246 -2.017759
## 2 4.684244 4.108896 4.721311 4.141410
## 3 -4.337947 -3.764456 -4.376593 -3.797994
## 4 3.639266 3.144973 3.582178 3.095639
## 5 4.362239 3.743110 4.207921 3.610694
## 6 -5.904581 -4.595225 -6.409891 -4.988482
fit <- glm(Yes/N ~ Major, family=binomial, data = admission, weights = N)
fit
##
## Call: glm(formula = Yes/N ~ Major, family = binomial, data = admission,
## weights = N)
##
## Coefficients:
## (Intercept) Major
## 1.2766 -0.5442
##
## Degrees of Freedom: 5 Total (i.e. Null); 4 Residual
## Null Deviance: 854.3
## Residual Deviance: 83.15 AIC: 127.5
Major <- c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6)
Gender <- c("Male", "Female","Male", "Female", "Male", "Female", "Male", "Female",
"Male", "Female", "Male", "Female")
Yes <- c(512, 89, 353, 17, 120, 202, 138, 131, 53, 94, 22, 24);
No <- c(313, 19, 207, 9, 205, 391, 279, 244, 138, 299, 351, 317)
admission <- data.frame(Major, Gender, Yes, No)
admission$N <- admission$Yes + admission$No
fit <- glm(Yes/N ~ Major + Gender, family=binomial, data = admission, weights = N)
fit
##
## Call: glm(formula = Yes/N ~ Major + Gender, family = binomial, data = admission,
## weights = N)
##
## Coefficients:
## (Intercept) Major GenderMale
## 1.282609 -0.544944 -0.006228
##
## Degrees of Freedom: 11 Total (i.e. Null); 9 Residual
## Null Deviance: 875.8
## Residual Deviance: 104.7 AIC: 179.7
[1] Alan Agresti, 『범주형 자료분석 개론』, 박태성, 이승연 역,
자유아카데미, 2020
[2] Alan Agresti, 『Categorical Data Analysis
(3rd Edition)』, Wiley, 2018
[3] https://users.stat.ufl.edu/~aa/cat/data/Crabs2.dat
[4] P.J.Bickel, E.A.Hammel, J.W.O’Connell, 「Sex Bias in Graduate
Admissions: Data from Berkeley」, Science, New Series, Vol.187, No.4175
(Feb.7, 1975), 398-404