## 'data.frame': 26 obs. of 4 variables:
## $ Age : int 18 21 25 30 36 40 45 50 55 60 ...
## $ Married: int 0 0 0 0 0 0 0 0 0 0 ...
## $ No : int 164 126 83 85 51 38 47 63 53 41 ...
## $ Yes : int 11 153 179 134 128 148 138 220 228 198 ...
## Age Married No Yes
## 1 18 0 164 11
## 2 21 0 126 153
## 3 25 0 83 179
## 4 30 0 85 134
## 5 36 0 51 128
## 6 40 0 38 148
## Installing package into 'C:/Users/user/Documents/R/win-library/4.0'
## (as 'lib' is unspecified)
## Warning: package 'binomTools' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.0:
## 無法開啟 URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.0/PACKAGES'
## Warning: 'BiocManager' not available. Could not check Bioconductor.
##
## Please use `install.packages('BiocManager')` and then retry.
## Warning in p_install(package, character.only = TRUE, ...):
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'binomTools'
## Warning in pacman::p_load(HSAUR3, tidyverse, binomTools): Failed to install/load:
## binomTools
## 'data.frame': 26 obs. of 5 variables:
## $ Age : int 18 21 25 30 36 40 45 50 55 60 ...
## $ Married: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ No : int 164 126 83 85 51 38 47 63 53 41 ...
## $ Yes : int 11 153 179 134 128 148 138 220 228 198 ...
## $ Total : int 175 279 262 219 179 186 185 283 281 239 ...
## Married No Yes
## 1 0 814 1861
## 2 1 476 2678
## Age No Yes
## 1 18 172 11
## 2 21 155 173
## 3 25 146 276
## 4 30 139 315
## 5 36 103 298
## 6 40 84 393
## 7 45 86 378
## 8 50 110 529
## 9 55 105 562
## 10 60 74 510
## 11 65 49 469
## 12 70 38 293
## 13 75 29 332
#
ggplot(dtahw3, aes(Age, Yes/Total, color = Married)) +
geom_jitter() +
stat_smooth(method = "glm", formula = y ~ x,
method.args = list(family = binomial),
aes(weight = Total), se = FALSE) +
labs(x = "Age", y = "Proportion of Yes") +
theme(legend.position = c(.1, .9))
#logistic regression models
# slopes and intercepts dependent on Married
summary(m0 <- glm(cbind(Yes, No) ~ Age*Married, data = dtahw3,
family = "binomial"))
##
## Call:
## glm(formula = cbind(Yes, No) ~ Age * Married, family = "binomial",
## data = dtahw3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.7977 -1.2562 0.0811 1.1050 4.7559
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.918787 0.116487 -7.887 3.08e-15 ***
## Age 0.042995 0.002813 15.285 < 2e-16 ***
## Married1 0.358598 0.205253 1.747 0.0806 .
## Age:Married1 0.005082 0.004599 1.105 0.2691
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 882.94 on 25 degrees of freedom
## Residual deviance: 229.51 on 22 degrees of freedom
## AIC: 368.35
##
## Number of Fisher Scoring iterations: 4
# different intercepts for Married
summary(m1 <- glm(cbind(Yes, No) ~ Age+Married, data = dtahw3,
family = "binomial"))
##
## Call:
## glm(formula = cbind(Yes, No) ~ Age + Married, family = "binomial",
## data = dtahw3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.566 -1.154 -0.051 1.142 4.959
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.992469 0.095874 -10.352 <2e-16 ***
## Age 0.044922 0.002229 20.154 <2e-16 ***
## Married1 0.572484 0.069292 8.262 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 882.94 on 25 degrees of freedom
## Residual deviance: 230.73 on 23 degrees of freedom
## AIC: 367.57
##
## Number of Fisher Scoring iterations: 4
##
## Call:
## glm(formula = cbind(Yes, No) ~ Age, family = "binomial", data = dtahw3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -12.5330 -2.1475 0.3402 1.5936 4.3390
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.911675 0.094900 -9.607 <2e-16 ***
## Age 0.049421 0.002183 22.637 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 882.94 on 25 degrees of freedom
## Residual deviance: 299.48 on 24 degrees of freedom
## AIC: 434.32
##
## Number of Fisher Scoring iterations: 4
## Analysis of Deviance Table
##
## Model 1: cbind(Yes, No) ~ Age * Married
## Model 2: cbind(Yes, No) ~ Age + Married
## Model 3: cbind(Yes, No) ~ Age
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 22 229.51
## 2 23 230.74 -1 -1.224 0.2685
## 3 24 299.48 -1 -68.746 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dta_m1 <- dtahw3 %>%
mutate(Prop = Yes/(Yes+No),
phat = predict(m1, newdata = dtahw3[, c(1,2)],
type = "response"))
# plot
ggplot(dta_m1, aes(Age, Prop, color = Married)) +
geom_jitter() +
geom_line(aes(Age, phat, color = Married)) +
geom_hline(yintercept = .5, col = "gray") +
labs(x = "Age", y = "Proportion of Yes") +
theme(legend.position = c(.9, .1))
# residual plot
plot(resid(m1, type = "pearson") ~ fitted(m1), ylim=c(-3.3, 3.3),
pch = 20, xlab = "Fitted values", ylab = "Pearson residuals")
abline(h=0)
grid()