Based on Example: modeling political preference given income Logistic regression, identifiability, and separation. See Chapters 13 and 14 in Regression and Other Stories.
# library("arm")
# library("foreign")
library("rstanarm")
library(ggplot2)
library(car)
library(effects)
library(vcd)
Set root for data files. I run this in the directory for ROS-Examples.
library(here)
root <- here
nes <- read.table(root("NES/data","nes.txt"), header=TRUE)
head(nes, 4)
year resid weight1 weight2 weight3 age gender race educ1 urban region
536 1952 1 1 1 1 25 2 1 2 2 1
537 1952 2 1 1 1 33 2 1 1 2 1
538 1952 3 1 1 1 26 2 1 2 2 1
539 1952 4 1 1 1 63 1 1 2 2 1
income occup1 union religion educ2 educ3 martial_status occup2 icpsr_cty
536 4 2 1 1 3 3 1 2 NA
537 4 6 1 1 1 1 1 6 NA
538 3 6 2 2 3 3 1 6 NA
539 3 3 1 1 2 2 1 3 NA
fips_cty partyid7 partyid3 partyid3_b str_partyid father_party mother_party
536 NA 6 3 3 3 3 3
537 NA 5 3 3 2 2 2
538 NA 4 2 2 1 1 1
539 NA 7 3 3 4 1 NA
dlikes rlikes dem_therm rep_therm regis vote regisvote presvote
536 0 1 NA NA 2 2 3 2
537 -1 3 NA NA 2 2 3 1
538 0 5 NA NA 2 2 3 2
539 -1 3 NA NA 1 2 3 2
presvote_2party presvote_intent ideo_feel ideo7 ideo cd state inter_pre
536 2 2 NA NA NA NA 13 50
537 1 2 NA NA NA NA 13 50
538 2 2 NA NA NA NA 13 50
539 2 2 NA NA NA NA 13 50
inter_post black female age_sq rep_presvote rep_pres_intent south real_ideo
536 NA 0 1 625 1 1 0 NA
537 NA 0 1 1089 0 1 0 NA
538 NA 0 1 676 1 1 0 NA
539 NA 0 0 3969 1 1 0 NA
presapprov perfin1 perfin2 perfin presadm age_10 age_sq_10 newfathe newmoth
536 NA NA NA NA -1 2.5 6.250000 1 1
537 NA NA NA NA -1 3.3 10.889999 0 0
538 NA NA NA NA -1 2.6 6.759999 -1 -1
539 NA NA NA NA -1 6.3 39.690002 -1 NA
parent_party white year_new income_new age_new vote.1 age_discrete
536 2 1 1 1 -2.052455 1 1
537 0 1 1 1 -1.252455 1 2
538 -2 1 1 0 -1.952455 1 1
539 NA 1 1 0 1.747545 1 3
race_adj dvote rvote
536 1 0 1
537 1 1 0
538 1 0 1
539 1 0 1
The two binary variables of interest are rvote and dvote
ok <- nes$year==1992 &
!is.na(nes$rvote) & !is.na(nes$dvote) &
(nes$rvote==1 | nes$dvote==1)
nes92 <- nes[ok,]
plot(rvote ~ income, data = nes92)
Jitter both x & y. Should really be more careful to keep the result in [0,1]
plot(jitter(rvote) ~ jitter(income), data = nes92,
xlab = "Income groups (jittered)",
ylab = "Republican votes (jittered)")
with (nes92,
lines(loess.smooth(income, rvote), lwd=3, col="red")
)
tab <- with(nes92, table(rvote, income))
tab
income
rvote 1 2 3 4 5
0 98 138 222 208 36
1 30 69 144 196 38
mosaicplot(t(tab), shade=TRUE,
xlab="Income group", ylab="Republican vote", main="nes92 table")
treat as a binomial glm
fit_glm <- glm(rvote ~ income, family=binomial(link="logit"), data=nes92)
brief(fit_glm)
(Intercept) income
Estimate -1.402 0.3260
Std. Error 0.189 0.0569
exp(Estimate) 0.246 1.3854
Residual deviance = 1557 on 1177 df
plot(allEffects(fit_glm),
xlab= "Income group",
ylab= "Republican vote")
ggplot2::ggplot(nes92, aes(x=income, y=rvote)) +
geom_point(position=position_jitter(height=0.1, width = 0.1)) +
stat_smooth(method = "glm", method.args=list(family="binomial")) +
labs(x = "Income group",
y = "Republican vote")
Could also use vcd::binreg_plot
#binreg_plot(fit_glm, type="link")
fit_1 <- stan_glm(rvote ~ income, family=binomial(link="logit"), data=nes92,
refresh=0)
print(fit_1)
stan_glm
family: binomial [logit]
formula: rvote ~ income
observations: 1179
predictors: 2
------
Median MAD_SD
(Intercept) -1.4 0.2
income 0.3 0.1
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
n <- nrow(nes92)
income_jitt <- nes92$income + runif(n, -.2, .2)
vote_jitt <- nes92$rvote +
ifelse(nes92$rvote==0,
runif(n, .005, .05),
runif(n, -.05, -.005))
par(mar=c(3,3,1,.1), tck=-.01, mgp=c(1.7, .3, 0))
curve(invlogit(fit_1$coef[1] + fit_1$coef[2]*x), 1, 5, ylim=c(0,1),
xlim=c(-2,8), xaxt="n", xaxs="i",
ylab="Pr (Republican vote)", xlab="Income",
lwd=5, col="red", yaxs="i")
curve(invlogit(fit_1$coef[1] + fit_1$coef[2]*x), -2, 8, lwd=.5, add=TRUE)
axis(1, 1:5)
mtext("(poor)", 1, 1.2, at=1, adj=.5)
mtext("(rich)", 1, 1.2, at=5, adj=.5)
points(income_jitt, vote_jitt, pch=20, cex=.1)
par(mar=c(3,3,1,.1), tck=-.01, mgp=c(1.7, .3, 0))
curve (invlogit(fit_1$coef[1] + fit_1$coef[2]*x), .5, 5.5, ylim=c(0,1),
xlim=c(.5, 5.5), xaxt="n", xaxs="i",
ylab="Pr (Republican vote)", xlab="Income", yaxs="i",
)
axis(1, 1:5)
mtext("(poor)", 1, 1.2, at=1, adj=.5)
mtext("(rich)", 1, 1.2, at=5, adj=.5)
sims_1 <- as.matrix(fit_1)
n_sims <- nrow(sims_1)
for (j in sample(n_sims, 20)){
curve(invlogit(sims_1[j,1] +sims_1[j,2]*x), .5, 5.5, lwd=.5, col="gray", add=TRUE)
}
curve(invlogit(fit_1$coef[1] + fit_1$coef[2]*x), .5, 5.5, add=TRUE,
lwd=3, col="red")
points(income_jitt, vote_jitt, pch=20, cex=.1)