This dataset is based on 2012 American National Election Survey.
The response variable of interest is whether the respondent had voted in 2008.
There are 2 covariates: age and the marital status of the respondent.
Source: nes{poliscidata}
Column 1: Age (in year) Column 2: Marital status, 1 = Married, 0 = Not married Column 3: Number of respondents who did not vote in 2008 Column 4: Number of respondents voted in 2008
pacman::p_load(HSAUR3, tidyverse, binomTools)
## Installing package into 'C:/Users/Ching-Fang Wu/Documents/R/win-library/4.0'
## (as 'lib' is unspecified)
## Warning: package 'binomTools' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.0:
## 無法開啟 URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.0/PACKAGES'
## Warning: 'BiocManager' not available. Could not check Bioconductor.
##
## Please use `install.packages('BiocManager')` and then retry.
## Warning in p_install(package, character.only = TRUE, ...):
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'binomTools'
## Warning in pacman::p_load(HSAUR3, tidyverse, binomTools): Failed to install/load:
## binomTools
#input data
dta <- read.csv("C:/Users/Ching-Fang Wu/Documents/data/nes_2008.csv",h=T)
str(dta)
## 'data.frame': 26 obs. of 4 variables:
## $ Age : int 18 21 25 30 36 40 45 50 55 60 ...
## $ Married: int 0 0 0 0 0 0 0 0 0 0 ...
## $ No : int 164 126 83 85 51 38 47 63 53 41 ...
## $ Yes : int 11 153 179 134 128 148 138 220 228 198 ...
dta$Married<-factor(dta$Married) #把婚姻狀況改為類別變數
summary(dta)
## Age Married No Yes
## Min. :18.00 0:13 Min. : 8.00 Min. : 0.0
## 1st Qu.:30.00 1:13 1st Qu.: 26.00 1st Qu.:129.5
## Median :45.00 Median : 46.50 Median :174.5
## Mean :45.38 Mean : 49.62 Mean :174.6
## 3rd Qu.:60.00 3rd Qu.: 53.75 3rd Qu.:237.0
## Max. :75.00 Max. :164.00 Max. :334.0
dta <- GHQ %>% mutate(Total = cases + non.cases) %>% dplyr::rename(Gender=gender, Score=GHQ, Case=cases, Noncase=non.cases)
dta <- dta %>%
mutate(Total = No + Yes) #新增變數Total
# numerical summaries
# Married effect
aggregate(cbind(No, Yes) ~ Married, data = dta, sum)
## Married No Yes
## 1 0 814 1861
## 2 1 476 2678
#Age effect
aggregate(cbind(No, Yes) ~ Age, data = dta, sum)
## Age No Yes
## 1 18 172 11
## 2 21 155 173
## 3 25 146 276
## 4 30 139 315
## 5 36 103 298
## 6 40 84 393
## 7 45 86 378
## 8 50 110 529
## 9 55 105 562
## 10 60 74 510
## 11 65 49 469
## 12 70 38 293
## 13 75 29 332
# set black and white theme
ot <- theme_set(theme_bw())
ggplot(dta, aes(Age, Yes/Total, color = Married)) +
geom_jitter() +
stat_smooth(method = "glm", formula = y ~ x,
method.args = list(family = binomial),
aes(weight = Total), se = FALSE) +
labs(x = "Age", y = "Proportion of Yes") +
theme(legend.position = c(.1, .9))
# slopes and intercepts dependent on Married
summary(m0 <- glm(cbind(Yes, No) ~ Age*Married, data = dta,family = "binomial"))
##
## Call:
## glm(formula = cbind(Yes, No) ~ Age * Married, family = "binomial",
## data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.7977 -1.2562 0.0811 1.1050 4.7559
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.918787 0.116487 -7.887 3.08e-15 ***
## Age 0.042995 0.002813 15.285 < 2e-16 ***
## Married1 0.358598 0.205253 1.747 0.0806 .
## Age:Married1 0.005082 0.004599 1.105 0.2691
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 882.94 on 25 degrees of freedom
## Residual deviance: 229.51 on 22 degrees of freedom
## AIC: 368.35
##
## Number of Fisher Scoring iterations: 4
# different intercepts for Married
summary(m1 <- glm(cbind(Yes, No) ~ Age + Married, data = dta,family = "binomial"))
##
## Call:
## glm(formula = cbind(Yes, No) ~ Age + Married, family = "binomial",
## data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.566 -1.154 -0.051 1.142 4.959
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.992469 0.095874 -10.352 <2e-16 ***
## Age 0.044922 0.002229 20.154 <2e-16 ***
## Married1 0.572484 0.069292 8.262 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 882.94 on 25 degrees of freedom
## Residual deviance: 230.73 on 23 degrees of freedom
## AIC: 367.57
##
## Number of Fisher Scoring iterations: 4
# no Married effect
summary(m2 <- glm(cbind(Yes, No) ~ Age, data = dta,family = "binomial"))
##
## Call:
## glm(formula = cbind(Yes, No) ~ Age, family = "binomial", data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -12.5330 -2.1475 0.3402 1.5936 4.3390
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.911675 0.094900 -9.607 <2e-16 ***
## Age 0.049421 0.002183 22.637 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 882.94 on 25 degrees of freedom
## Residual deviance: 299.48 on 24 degrees of freedom
## AIC: 434.32
##
## Number of Fisher Scoring iterations: 4
anova(m0, m1, m2, test = "Chi")
## Analysis of Deviance Table
##
## Model 1: cbind(Yes, No) ~ Age * Married
## Model 2: cbind(Yes, No) ~ Age + Married
## Model 3: cbind(Yes, No) ~ Age
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 22 229.51
## 2 23 230.74 -1 -1.224 0.2685
## 3 24 299.48 -1 -68.746 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dta_m1 <- dta %>%
mutate(Prop = Yes/(Yes+No),
phat = predict(m1, newdata = dta[, c(1,2)],
type = "response"))
ggplot(dta_m1, aes(Age, Prop, color = Married)) +
geom_jitter() +
geom_line(aes(Age, phat, color = Married)) +
geom_hline(yintercept = .5, col = "gray") +
labs(x = "Age", y = "Proportion of Yes") +
theme(legend.position = c(.9, .1))
# residual plot
plot(resid(m1, type = "pearson") ~ fitted(m1), ylim=c(-3.3, 3.3),pch = 20, xlab = "Fitted values", ylab = "Pearson residuals")
abline(h=0)
grid()