Homework 5

Initially we begin by using data from a 1948 Social Science Survey. For purposes of this assignment, I am not hiding any work so that everything can be seen clearly.

library(memisc)
## Warning: package 'memisc' was built under R version 3.1.3
## Loading required package: lattice
## Loading required package: MASS
## 
## Attaching package: 'memisc'
## 
## The following objects are masked from 'package:stats':
## 
##     contr.sum, contr.treatment, contrasts
## 
## The following object is masked from 'package:base':
## 
##     as.array
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:memisc':
## 
##     collect, query, rename
## 
## The following object is masked from 'package:MASS':
## 
##     select
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:memisc':
## 
##     recode
library(foreign)
library(tidyr)
library(pander)
library(gmodels)
library(stargazer)
## 
## Please cite as: 
## 
##  Hlavac, Marek (2014). stargazer: LaTeX code and ASCII text for well-formatted regression and summary statistics tables.
##  R package version 5.1. http://CRAN.R-project.org/package=stargazer
options(digits=3)
nes1948.por <- UnZip("anes/NES1948.ZIP","NES1948.POR", package="memisc")
nes1948 <- spss.portable.file(nes1948.por)

Here we can see the dataset with names and descriptions.

names(nes1948)
##  [1] "vversion" "vdsetno"  "v480001"  "v480002"  "v480003"  "v480004" 
##  [7] "v480005"  "v480006"  "v480007"  "v480008"  "v480009"  "v480010" 
## [13] "v480011"  "v480012"  "v480013"  "v480014a" "v480014b" "v480015a"
## [19] "v480015b" "v480016a" "v480016b" "v480017a" "v480017b" "v480018" 
## [25] "v480019"  "v480020"  "v480021a" "v480021b" "v480022a" "v480022b"
## [31] "v480023"  "v480024"  "v480025a" "v480025b" "v480026"  "v480027" 
## [37] "v480028"  "v480029"  "v480030"  "v480031a" "v480031b" "v480031c"
## [43] "v480032a" "v480032b" "v480032c" "v480033a" "v480033b" "v480034a"
## [49] "v480034b" "v480035a" "v480035b" "v480036a" "v480036b" "v480037" 
## [55] "v480038"  "v480039"  "v480040"  "v480041"  "v480042"  "v480043" 
## [61] "v480044"  "v480045"  "v480046"  "v480047"  "v480048"  "v480049" 
## [67] "v480050"
description(nes1948)
## 
##  vversion 'NES VERSION NUMBER'        
##  vdsetno  'NES DATASET NUMBER'        
##  v480001  'ICPSR ARCHIVE NUMBER'      
##  v480002  'INTERVIEW NUMBER'          
##  v480003  'POP CLASSIFICATION'        
##  v480004  'CODER'                     
##  v480005  'NUMBER OF CALLS TO R'      
##  v480006  'R REMEMBER PREVIOUS INT'   
##  v480007  'INTR INTERVIEW THIS R'     
##  v480008  'PRVS PRE-ELCTN R REINT'    
##  v480009  'R INT IN PRE/POSTELCTN'    
##  v480010  'RENT CNTRL KEPT/DROPPED'   
##  v480011  'GOVT CONTROL PRICES'       
##  v480012  'WHAT TO DO W TFT-HT ACT'   
##  v480013  'PRESLELCTN OTCM SURPRISE'  
##  v480014a 'WHY PPL VTD FOR TRUMAN 1'  
##  v480014b 'WHY PPL VTD FOR TRUMAN 2'  
##  v480015a 'WHY PPL VTD AGNST TRUMAN 1'
##  v480015b 'WHY PPL VTD AGNST TRUMAN 2'
##  v480016a 'WHY PPL VTD FOR DEWEY 1'   
##  v480016b 'WHY PPL VTD FOR DEWEY 2'   
##  v480017a 'WHY PPL VTD AGNST DEWEY 1' 
##  v480017b 'WHY PPL VTD AGNST DEWEY 2' 
##  v480018  'DID R VOTE/FOR WHOM'       
##  v480019  'WN DECIDE FOR WHOM TO VT'  
##  v480020  'CNSD VT FOR SOMEONE ELSE'  
##  v480021a 'XWHY DID NOT VT FOR HIM 1' 
##  v480021b 'XWHY DID NOT VT FOR HIM 2' 
##  v480022a 'WHY VT THE WAY YOU DID 1'  
##  v480022b 'WHY VT THE WAY YOU DID 2'  
##  v480023  'VOTED STRAIGHT TICKET'     
##  v480024  'R NOT VT-IF VT,FOR WHOM'   
##  v480025a 'R NOT VT-WHY DID NOT VT 1' 
##  v480025b 'R NOT VT-WHY DID NOT VT 2' 
##  v480026  'R NOT VT-WAS R REG TO VT'  
##  v480027  'VTD IN PRVS PRESL ELCTN'   
##  v480028  'VTD FOR WHOM IN 1944'      
##  v480029  'OCCUPATION OF HEAD'        
##  v480030  'HEAD BELONG TO LBR UN'     
##  v480031a 'GRPS IDENTIFIED W TRUMAN 1'
##  v480031b 'GRPS IDENTIFIED W TRUMAN 2'
##  v480031c 'GRPS IDENTIFIED W TRUMAN 3'
##  v480032a 'GRPS IDENTIFIED W DEWEY 1' 
##  v480032b 'GRPS IDENTIFIED W DEWEY 2' 
##  v480032c 'GRPS IDENTIFIED W DEWEY 3' 
##  v480033a 'ISSUES CONNECTED W TRMN 1' 
##  v480033b 'ISSUES CONNECTED W TRMN 2' 
##  v480034a 'ISSUES CONNECTED W DEWEY 1'
##  v480034b 'ISSUES CONNECTED W DEWEY 2'
##  v480035a 'PERSONAL ATTRIBUTE TRMN 1' 
##  v480035b 'PERSONAL ATTRIBUTE TRMN 2' 
##  v480036a 'PERSONAL ATTRIBUTE DEWEY 1'
##  v480036b 'PERSONAL ATTRIBUTE DEWEY 2'
##  v480037  'CMPN INCIDENTS MENTIONED'  
##  v480038  '41-PRESLELCTN PLAN TO VT'  
##  v480039  '41-PLAN TO VT REP/DEM'     
##  v480040  '41-USA'S CNCRN W OTHERS'   
##  v480041  '41-SATISD USA TWRD RUSS'   
##  v480042  '41-INFORMATION LEVEL'      
##  v480043  '41-USA GV IN,AGRT RUSS'    
##  v480044  '41-USA-RUSS AGRT VIA U.N'  
##  v480045  'SEX OF RESPONDENT'         
##  v480046  'RACE OF RESPONDENT'        
##  v480047  'AGE OF RESPONDENT'         
##  v480048  'EDUCATION OF RESPONDENT'   
##  v480049  'TOTAL 1948 INCOME'         
##  v480050  'RELIGIOUS PREFERENCE'

We are only using a few of the variables collected, shown here:

library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:memisc':
## 
##     rename
vote.48 <- subset(nes1948, select=c(v480018, v480029, v480030, v480045, v480046, v480047, v480048, v480049, v480050))
str(vote.48)
## Data set with 662 obs. of 9 variables:
##  $ v480018: Nmnl. item w/ 7 labels for 1,2,3,... + ms.v.  num  1 2 1 2 1 2 2 1 2 1 ...
##  $ v480029: Nmnl. item w/ 12 labels for 10,20,30,... + ms.v.  num  70 30 40 10 10 20 80 80 40 40 ...
##  $ v480030: Nmnl. item w/ 4 labels for 1,2,8,... + ms.v.  num  1 2 2 2 2 2 2 2 1 1 ...
##  $ v480045: Nmnl. item w/ 3 labels for 1,2,9 + ms.v.  num  1 2 2 2 1 2 1 2 1 1 ...
##  $ v480046: Nmnl. item w/ 4 labels for 1,2,3,... + ms.v.  num  1 1 1 1 1 1 1 1 1 1 ...
##  $ v480047: Nmnl. item w/ 7 labels for 1,2,3,... + ms.v.  num  3 3 2 3 2 3 4 5 2 2 ...
##  $ v480048: Nmnl. item w/ 4 labels for 1,2,3,... + ms.v.  num  1 2 2 3 3 2 1 1 2 2 ...
##  $ v480049: Nmnl. item w/ 8 labels for 1,2,3,... + ms.v.  num  4 7 5 7 5 7 5 2 5 6 ...
##  $ v480050: Nmnl. item w/ 6 labels for 1,2,3,... + ms.v.  num  1 1 2 1 2 1 1 1 1 2 ...

To make them easier to understand, we relabel them using names that identify the data collected.

vote.48 <- rename(vote.48, c(v480018 = "vote", v480029 = "occupation.hh", v480030 = "unionized.hh", v480045 = "gender", v480046 = "race",
 v480047 = "age", v480048 = "education", v480049 = "total.income", v480050 = "religious.pref"))
str(vote.48)
## Data set with 662 obs. of 9 variables:
##  $ vote          : Nmnl. item w/ 7 labels for 1,2,3,... + ms.v.  num  1 2 1 2 1 2 2 1 2 1 ...
##  $ occupation.hh : Nmnl. item w/ 12 labels for 10,20,30,... + ms.v.  num  70 30 40 10 10 20 80 80 40 40 ...
##  $ unionized.hh  : Nmnl. item w/ 4 labels for 1,2,8,... + ms.v.  num  1 2 2 2 2 2 2 2 1 1 ...
##  $ gender        : Nmnl. item w/ 3 labels for 1,2,9 + ms.v.  num  1 2 2 2 1 2 1 2 1 1 ...
##  $ race          : Nmnl. item w/ 4 labels for 1,2,3,... + ms.v.  num  1 1 1 1 1 1 1 1 1 1 ...
##  $ age           : Nmnl. item w/ 7 labels for 1,2,3,... + ms.v.  num  3 3 2 3 2 3 4 5 2 2 ...
##  $ education     : Nmnl. item w/ 4 labels for 1,2,3,... + ms.v.  num  1 2 2 3 3 2 1 1 2 2 ...
##  $ total.income  : Nmnl. item w/ 8 labels for 1,2,3,... + ms.v.  num  4 7 5 7 5 7 5 2 5 6 ...
##  $ religious.pref: Nmnl. item w/ 6 labels for 1,2,3,... + ms.v.  num  1 1 2 1 2 1 1 1 1 2 ...

We are now going to recode certain variables to make them simpler.

vote.48$vote3 <- recode(vote.48$vote, "'VOTED - FOR TRUMAN'='Truman';'VOTED - FOR DEWEY'='Dewey';'VOTED - FOR WALLACE'='Other'; 'VOTED - FOR OTHER'='Other';else=NA")
## Warning in asMethod(object, strict = FALSE): NAs introduced by coercion
vote.48$occup4 <- recode(vote.48$occupation, "'PROFESSIONAL, SEMI-PROFESSIONAL'='Upper white collar';'SELF-EMPLOYED, MANAGERIAL, SUPERVISORY'='Upper white collar';'OTHER WHITE-COLLAR (CLERICAL, SALES, ET'='Other white collar'; 'SKILLED AND SEMI-SKILLED'='Blue Collar'; 'PROTECTIVE SERVICE'='Blue Collar'; 'UNSKILLED, INCLUDING FARM AND SERVICE W'='Blue Collar'; 'FARM OPERATORS AND MANAGERS'='Farmer';else=NA")
## Warning in asMethod(object, strict = FALSE): NAs introduced by coercion
vote.48$relig3 <- recode(vote.48$religious.pref, "'PROTESTANT'='Protestant';'CATHOLIC'='Catholic';'JEWISH'='Other, none'; 'OTHER'='Other, none'; 'NONE'='Other, none' ;else=NA")
## Warning in asMethod(object, strict = FALSE): NAs introduced by coercion
vote.48$race2 <- recode(vote.48$race, "'WHITE'='White';'NEGRO'='Black';else=NA")
## Warning in asMethod(object, strict = FALSE): NAs introduced by coercion

As shown in the text, we can compare some data for some interesting, yet predictable results:

crsst1<-CrossTable(vote.48$vote3, vote.48$occup4)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  344 
## 
##  
##               | vote.48$occup4 
## vote.48$vote3 |        Blue Collar |             Farmer | Other white collar | Upper white collar |          Row Total | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##         Dewey |                 36 |                 14 |                 31 |                 67 |                148 | 
##               |             13.816 |              1.095 |              0.862 |             24.324 |                    | 
##               |              0.243 |              0.095 |              0.209 |              0.453 |              0.430 | 
##               |              0.234 |              0.326 |              0.508 |              0.779 |                    | 
##               |              0.105 |              0.041 |              0.090 |              0.195 |                    | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##         Other |                  4 |                  3 |                  0 |                  2 |                  9 | 
##               |              0.000 |              3.125 |              1.596 |              0.028 |                    | 
##               |              0.444 |              0.333 |              0.000 |              0.222 |              0.026 | 
##               |              0.026 |              0.070 |              0.000 |              0.023 |                    | 
##               |              0.012 |              0.009 |              0.000 |              0.006 |                    | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##        Truman |                114 |                 26 |                 30 |                 17 |                187 | 
##               |             10.956 |              0.295 |              0.301 |             18.932 |                    | 
##               |              0.610 |              0.139 |              0.160 |              0.091 |              0.544 | 
##               |              0.740 |              0.605 |              0.492 |              0.198 |                    | 
##               |              0.331 |              0.076 |              0.087 |              0.049 |                    | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##  Column Total |                154 |                 43 |                 61 |                 86 |                344 | 
##               |              0.448 |              0.125 |              0.177 |              0.250 |                    | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## 
## 

Not surprisingly, and as explained by the text, we see that the lower grade workers supported Truman more, who was a Democrat, versus the upper grade workers who supported the Republican Dewey.

crsst2<-CrossTable(vote.48$relig3, vote.48$vote3)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  402 
## 
##  
##                | vote.48$vote3 
## vote.48$relig3 |     Dewey |     Other |    Truman | Row Total | 
## ---------------|-----------|-----------|-----------|-----------|
##       Catholic |        35 |         0 |        68 |       103 | 
##                |     2.467 |     3.075 |     3.446 |           | 
##                |     0.340 |     0.000 |     0.660 |     0.256 | 
##                |     0.197 |     0.000 |     0.321 |           | 
##                |     0.087 |     0.000 |     0.169 |           | 
## ---------------|-----------|-----------|-----------|-----------|
##    Other, none |        13 |         1 |        30 |        44 | 
##                |     2.157 |     0.075 |     1.990 |           | 
##                |     0.295 |     0.023 |     0.682 |     0.109 | 
##                |     0.073 |     0.083 |     0.142 |           | 
##                |     0.032 |     0.002 |     0.075 |           | 
## ---------------|-----------|-----------|-----------|-----------|
##     Protestant |       130 |        11 |       114 |       255 | 
##                |     2.587 |     1.508 |     3.118 |           | 
##                |     0.510 |     0.043 |     0.447 |     0.634 | 
##                |     0.730 |     0.917 |     0.538 |           | 
##                |     0.323 |     0.027 |     0.284 |           | 
## ---------------|-----------|-----------|-----------|-----------|
##   Column Total |       178 |        12 |       212 |       402 | 
##                |     0.443 |     0.030 |     0.527 |           | 
## ---------------|-----------|-----------|-----------|-----------|
## 
## 

Here we again see predictable results. The Catholics, as usually predicted, voted for the Democrat (Truman) and the Protestants, as usual, voted for the Republican.

To analyse the data compiled in an original way, we look at the relationship between race and occupation:

crsst3<-CrossTable(vote.48$race2, vote.48$occup4)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  541 
## 
##  
##               | vote.48$occup4 
## vote.48$race2 |        Blue Collar |             Farmer | Other white collar | Upper white collar |          Row Total | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##         Black |                 30 |                 12 |                  3 |                  5 |                 50 | 
##               |              2.121 |              0.761 |              2.305 |              2.981 |                    | 
##               |              0.600 |              0.240 |              0.060 |              0.100 |              0.092 | 
##               |              0.120 |              0.119 |              0.039 |              0.043 |                    | 
##               |              0.055 |              0.022 |              0.006 |              0.009 |                    | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##         White |                219 |                 89 |                 73 |                110 |                491 | 
##               |              0.216 |              0.078 |              0.235 |              0.304 |                    | 
##               |              0.446 |              0.181 |              0.149 |              0.224 |              0.908 | 
##               |              0.880 |              0.881 |              0.961 |              0.957 |                    | 
##               |              0.405 |              0.165 |              0.135 |              0.203 |                    | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##  Column Total |                249 |                101 |                 76 |                115 |                541 | 
##               |              0.460 |              0.187 |              0.140 |              0.213 |                    | 
## --------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## 
## 

Not surprisingly we see that in 1948, many more whites had occupations in general, but a clear amount more had upper class occupations than blacks.

An interesting look is the breakdown of income and religion at the time, which explains some of the consistent voting patterns perhaps:

crsst4<-CrossTable(vote.48$relig3, vote.48$occup4)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  552 
## 
##  
##                | vote.48$occup4 
## vote.48$relig3 |        Blue Collar |             Farmer | Other white collar | Upper white collar |          Row Total | 
## ---------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##       Catholic |                 78 |                 12 |                 17 |                 17 |                124 | 
##                |              7.686 |              5.198 |              0.031 |              3.278 |                    | 
##                |              0.629 |              0.097 |              0.137 |              0.137 |              0.225 | 
##                |              0.307 |              0.118 |              0.215 |              0.145 |                    | 
##                |              0.141 |              0.022 |              0.031 |              0.031 |                    | 
## ---------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##    Other, none |                 20 |                  0 |                 12 |                 12 |                 44 | 
##                |              0.003 |              8.130 |              5.165 |              0.767 |                    | 
##                |              0.455 |              0.000 |              0.273 |              0.273 |              0.080 | 
##                |              0.079 |              0.000 |              0.152 |              0.103 |                    | 
##                |              0.036 |              0.000 |              0.022 |              0.022 |                    | 
## ---------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##     Protestant |                156 |                 90 |                 50 |                 88 |                384 | 
##                |              2.424 |              5.111 |              0.447 |              0.537 |                    | 
##                |              0.406 |              0.234 |              0.130 |              0.229 |              0.696 | 
##                |              0.614 |              0.882 |              0.633 |              0.752 |                    | 
##                |              0.283 |              0.163 |              0.091 |              0.159 |                    | 
## ---------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##   Column Total |                254 |                102 |                 79 |                117 |                552 | 
##                |              0.460 |              0.185 |              0.143 |              0.212 |                    | 
## ---------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## 
## 

Not surprisingly, Protestants, who are more likely to vote Repulican, also help more upper level occupations. The upper level occupations are also more likely to vote Republican. Here we see a hint of the profile that we already know about of a person most likely to vote a certain way. For Truman to win the election, it’s assumed that the majority of people who supported him had lower-level occupations and were more likely to be Catholic.