This data set contains turnout and demogrpahic data from a sample of respondents to the 2000 Current Population Survey. The states that are represented are South Carolina and Arkansas. This is only a sample containing 7 variables (state, year, vote, income, education, age, and female-which takes on the values (1Female, 2 Male)). I want to look at which factors influence voting.

library(Zelig)
library(survival)
library(texreg)
library(dplyr)
library(knitr)

data("voteincome")
head(voteincome)

##   state year vote income education age female
## 1    AR 2000    1      9         2  73      0
## 2    AR 2000    1     11         2  24      0
## 3    AR 2000    0     12         2  24      1
## 4    AR 2000    1     16         4  40      0
## 5    AR 2000    1     10         4  85      1
## 6    AR 2000    1     12         3  78      1

summary(voteincome)

##  state          year           vote            income        education    
##  AR: 500   Min.   :2000   Min.   :0.0000   Min.   : 4.00   Min.   :1.000  
##  SC:1000   1st Qu.:2000   1st Qu.:1.0000   1st Qu.: 9.00   1st Qu.:2.000  
##            Median :2000   Median :1.0000   Median :13.00   Median :3.000  
##            Mean   :2000   Mean   :0.8553   Mean   :12.46   Mean   :2.651  
##            3rd Qu.:2000   3rd Qu.:1.0000   3rd Qu.:16.00   3rd Qu.:4.000  
##            Max.   :2000   Max.   :1.0000   Max.   :17.00   Max.   :4.000  
##       age            female      
##  Min.   :18.00   Min.   :0.0000  
##  1st Qu.:36.00   1st Qu.:0.0000  
##  Median :49.00   Median :1.0000  
##  Mean   :49.26   Mean   :0.5593  
##  3rd Qu.:62.00   3rd Qu.:1.0000  
##  Max.   :85.00   Max.   :1.0000

Step 1: Estimate the model

z.out <- zelig(vote ~  female + income + education, model = "logit", data = voteincome, cite = F)
summary(z.out)

## Model: 
## 
## Call:
## z5$zelig(formula = vote ~ female + income + education, data = voteincome)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3081   0.4135   0.4943   0.5995   0.9040  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.18086    0.25667   0.705   0.4810
## female       0.31119    0.15045   2.068   0.0386
## income       0.08774    0.02186   4.015 5.96e-05
## education    0.15196    0.08674   1.752   0.0798
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1240.0  on 1499  degrees of freedom
## Residual deviance: 1200.3  on 1496  degrees of freedom
## AIC: 1208.3
## 
## Number of Fisher Scoring iterations: 4
## 
## Next step: Use 'setx' method

Step 2: Setup Counterfactuals

x.out <- setx(z.out)

Step 3:Simulate Expected /Predicted Results

s.out <- sim(z.out, x = x.out)

Step 4:Presenting Results

summary(s.out)

## 
##  sim x :
##  -----
## ev
##           mean          sd       50%      2.5%     97.5%
## [1,] 0.8641638 0.009412557 0.8644084 0.8448773 0.8807292
## pv
##          0     1
## [1,] 0.154 0.846

plot(s.out)

Automating Using Zelig

Education Effect

z.out3 <- zelig(vote ~  female + income + education, model = "logit", data = voteincome, cite = F)
a.range = min(voteincome$education):max(voteincome$education)
x <- setx(z.out3, education = a.range)
s <- sim(z.out3, x = x)
ci.plot(s)

From this we can see that education does have an effect on the likelihood of voting.

The more education you have the more likely you will go out and vote and be concerned with political influences and issues.

Income Effect

z.out4 <- zelig(vote ~  female + income + education, model = "logit", data = voteincome, cite = F)
c.range = min(voteincome$income):max(voteincome$income)
x <- setx(z.out3, income = c.range)
s <- sim(z.out3, x = x)
ci.plot(s)

From this we can see that income also relates to whether or not someone will be more likely to vote. The higher the income the more likely they are more concerned how any different laws or changes may effect them.

x <- setx(z.out, female =1)
x1 <- setx(z.out, female =0)
s <- sim(z.out, x, x1)
summary(s)

## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.8787671 0.01124648 0.8792699 0.8547888 0.8988404
## pv
##          0     1
## [1,] 0.132 0.868
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.8423504 0.01432243 0.8428022 0.8133188 0.8690335
## pv
##          0     1
## [1,] 0.153 0.847
## fd
##             mean        sd         50%        2.5%        97.5%
## [1,] -0.03641676 0.0180593 -0.03610783 -0.07155933 -0.003633296

From this it shows there is an average of -.03 difference in the likelihood of voting based on sex.

Looking at First Difference

fd <- s$get_qi(xvalue="x1", qi="fd")
summary(fd)

##        V1          
##  Min.   :-0.09654  
##  1st Qu.:-0.04870  
##  Median :-0.03611  
##  Mean   :-0.03642  
##  3rd Qu.:-0.02437  
##  Max.   : 0.01896

plot(s)

How to Test Education Variations in Sex Differences

c1x <- setx(z.out, female = 1, education  = 1)
c1x1 <- setx(z.out, female = 0, education = 1)
c1s <- sim(z.out, x = c1x, x1 = c1x1)

c2x <- setx(z.out, female = 1, education = 4)
c2x1 <- setx(z.out, female= 0, education = 4)
c2s <- sim(z.out, x = c1x, x1 = c1x1)

plot(c1s)

plot(c2s)

####Seems like there are slight differences.

Putting what we just did into one area

d1 <- c1s$get_qi(xvalue="x1", qi="fd")
d2 <- c2s$get_qi(xvalue="x1", qi="fd")

dfd <- as.data.frame(cbind(d1, d2))
head(dfd)

##            V1          V2
## 1 -0.04328815 -0.03739653
## 2 -0.03360037 -0.04338909
## 3 -0.07884797 -0.06045992
## 4 -0.04911756 -0.02525513
## 5 -0.07768353 -0.07021157
## 6 -0.02599545 -0.01013836

library(tidyr)

tidd <- dfd %>% 
  gather(colour, simv, 1:2)
head(tidd)

##   colour        simv
## 1     V1 -0.04328815
## 2     V1 -0.03360037
## 3     V1 -0.07884797
## 4     V1 -0.04911756
## 5     V1 -0.07768353
## 6     V1 -0.02599545

library(dplyr)

tidd %>% 
  group_by(colour) %>% 
  summarise(mean = mean(simv), sd = sd(simv))

## # A tibble: 2 x 3
##   colour    mean     sd
##   <chr>    <dbl>  <dbl>
## 1 V1     -0.0441 0.0217
## 2 V2     -0.0451 0.0215

library(ggplot2)

ggplot(tidd, aes(simv)) + geom_histogram(bins = 50) + facet_grid(~colour)