1. Reading in a Portable SPSS data file

library(foreign)
library(Zelig)
library(memisc)
library(magrittr)
options(digits=3)
nes1948.por <- UnZip("anes/NES1948.ZIP","NES1948.POR", package="memisc")
nes1948 <- spss.portable.file(nes1948.por)
print(nes1948)
## 
## SPSS portable file 'C:/Users/Y2K/AppData/Local/Temp/RtmpMnZFEF/NES1948.POR' 
##  with 67 variables and 662 observations
dim(nes1948)
## [1] 662  67

2. Reading in a Subset of the Data and renaming

library(plyr)
vote.48 <- subset(nes1948, select=c(v480018, v480029, v480030, v480045, v480046, v480047, v480048, v480049, v480050))
str(vote.48)
## Data set with 662 obs. of 9 variables:
##  $ v480018: Nmnl. item w/ 7 labels for 1,2,3,... + ms.v.  num  1 2 1 2 1 2 2 1 2 1 ...
##  $ v480029: Nmnl. item w/ 12 labels for 10,20,30,... + ms.v.  num  70 30 40 10 10 20 80 80 40 40 ...
##  $ v480030: Nmnl. item w/ 4 labels for 1,2,8,... + ms.v.  num  1 2 2 2 2 2 2 2 1 1 ...
##  $ v480045: Nmnl. item w/ 3 labels for 1,2,9 + ms.v.  num  1 2 2 2 1 2 1 2 1 1 ...
##  $ v480046: Nmnl. item w/ 4 labels for 1,2,3,... + ms.v.  num  1 1 1 1 1 1 1 1 1 1 ...
##  $ v480047: Nmnl. item w/ 7 labels for 1,2,3,... + ms.v.  num  3 3 2 3 2 3 4 5 2 2 ...
##  $ v480048: Nmnl. item w/ 4 labels for 1,2,3,... + ms.v.  num  1 2 2 3 3 2 1 1 2 2 ...
##  $ v480049: Nmnl. item w/ 8 labels for 1,2,3,... + ms.v.  num  4 7 5 7 5 7 5 2 5 6 ...
##  $ v480050: Nmnl. item w/ 6 labels for 1,2,3,... + ms.v.  num  1 1 2 1 2 1 1 1 1 2 ...
vote.48 <- rename(vote.48, 
                  c(v480018="vote", 
                  v480029="occu", 
                  v480030="union", 
                  v480045="gender", 
                  v480046="race", 
                  v480047="age", 
                  v480048="edu", 
                  v480049="income", 
                  v480050="relig"
                  ))
str(vote.48)
## Data set with 662 obs. of 9 variables:
##  $ vote  : Nmnl. item w/ 7 labels for 1,2,3,... + ms.v.  num  1 2 1 2 1 2 2 1 2 1 ...
##  $ occu  : Nmnl. item w/ 12 labels for 10,20,30,... + ms.v.  num  70 30 40 10 10 20 80 80 40 40 ...
##  $ union : Nmnl. item w/ 4 labels for 1,2,8,... + ms.v.  num  1 2 2 2 2 2 2 2 1 1 ...
##  $ gender: Nmnl. item w/ 3 labels for 1,2,9 + ms.v.  num  1 2 2 2 1 2 1 2 1 1 ...
##  $ race  : Nmnl. item w/ 4 labels for 1,2,3,... + ms.v.  num  1 1 1 1 1 1 1 1 1 1 ...
##  $ age   : Nmnl. item w/ 7 labels for 1,2,3,... + ms.v.  num  3 3 2 3 2 3 4 5 2 2 ...
##  $ edu   : Nmnl. item w/ 4 labels for 1,2,3,... + ms.v.  num  1 2 2 3 3 2 1 1 2 2 ...
##  $ income: Nmnl. item w/ 8 labels for 1,2,3,... + ms.v.  num  4 7 5 7 5 7 5 2 5 6 ...
##  $ relig : Nmnl. item w/ 6 labels for 1,2,3,... + ms.v.  num  1 1 2 1 2 1 1 1 1 2 ...

3. Recoding the variables of interest into categories

attach(vote.48)
vote.48$voteGroup[vote==1] <- 1
vote.48$voteGroup[vote==2] <- 2
vote.48$voteGroup[(vote >= 3) & (vote <=4)] <- 3
detach(vote.48)
voteType <- c("Truman", "Dewey", "Other")
vote.48$voteGroup <-factor(vote.48$voteGroup, labels = voteType)
summary(vote.48$voteGroup)
## Truman  Dewey  Other   NA's 
##    212    178     12    260
attach(vote.48)
vote.48$occuGroup[occu==10] <- 1
vote.48$occuGroup[occu==20] <- 1
vote.48$occuGroup[occu==30] <- 2
vote.48$occuGroup[(occu >= 40) & (occu <= 70)] <- 3
vote.48$occuGroup[occu==80] <- 4
detach(vote.48)
occuType <- c(" Upper White collar ", " Other white collar ", " Blue collar ", " Farmer ")
vote.48$occuGroup <-factor(vote.48$occuGroup, labels = occuType)
summary(vote.48$occuGroup)
##  Upper White collar   Other white collar          Blue collar  
##                  117                   79                  255 
##              Farmer                  NA's 
##                  105                  106
attach(vote.48)
vote.48$reliGroup[relig==1] <- 1
vote.48$reliGroup[relig==2] <- 2
vote.48$reliGroup[(relig >=3) & (relig <=5)] <- 3
detach(vote.48)
reliType <- c(" Protestant ", " Catholic ",  " Other, none ")
vote.48$reliGroup <-factor(vote.48$reliGroup, labels = reliType)
summary(vote.48$reliGroup)
##   Protestant      Catholic   Other, none           NA's 
##           460           140            57             5
attach(vote.48)
vote.48$raceGroup[race==1] <- 1
vote.48$raceGroup[race==2] <- 2
detach(vote.48)
raceType <- c("White", "Black")
vote.48$raceGroup <-factor(vote.48$raceGroup, labels = raceType)

summary(vote.48$raceGroup)
## White Black  NA's 
##   585    60    17
attach(vote.48)
vote.48$incGroup[income ==1] <- 1
vote.48$incGroup[income ==2] <- 2
vote.48$incGroup[income ==3] <- 3
vote.48$incGroup[income ==4] <- 4
vote.48$incGroup[income ==5] <- 5
vote.48$incGroup[income ==6] <- 6
vote.48$incGroup[income ==7] <- 7
detach(vote.48)
incType <-c("Under$500","$500~$999","$1000~$1999","$2000~$2999","$3000~$3999","$4000~$4999","$5000 and more")
vote.48$incGroup <-factor(vote.48$incGroup, labels = incType)

summary(vote.48$incGroup)
##      Under$500      $500~$999    $1000~$1999    $2000~$2999    $3000~$3999 
##             25             43            110            185            142 
##    $4000~$4999 $5000 and more           NA's 
##             66             84              7

4. Replicating the tables between 2 variables

library(DescTools)
options(digits=2)
attach(vote.48)

table(voteGroup, occuGroup) 
##          occuGroup
## voteGroup  Upper White collar   Other white collar   Blue collar   Farmer 
##    Truman                   17                   30           114       26
##    Dewey                    67                   31            36       14
##    Other                     2                    0             4        3
table(occuGroup, voteGroup) %>%
prop.table( margin=1)*100
##                       voteGroup
## occuGroup              Truman Dewey Other
##    Upper White collar    19.8  77.9   2.3
##    Other white collar    49.2  50.8   0.0
##    Blue collar           74.0  23.4   2.6
##    Farmer                60.5  32.6   7.0
table(reliGroup, voteGroup) %>% 
prop.table( margin=1)*100 
##                voteGroup
## reliGroup       Truman Dewey Other
##    Protestant     44.7  51.0   4.3
##    Catholic       66.0  34.0   0.0
##    Other, none    68.2  29.5   2.3
table(raceGroup, voteGroup) %>%
prop.table( margin=1)*100
##          voteGroup
## raceGroup Truman Dewey Other
##     White   51.3  45.5   3.2
##     Black   64.7  35.3   0.0
table(incGroup, voteGroup) %>%
  prop.table( margin=1)*100
##                 voteGroup
## incGroup         Truman Dewey Other
##   Under$500        50.0  50.0   0.0
##   $500~$999        61.5  38.5   0.0
##   $1000~$1999      64.4  32.2   3.4
##   $2000~$2999      67.0  30.1   2.9
##   $3000~$3999      47.5  48.5   4.0
##   $4000~$4999      45.8  50.0   4.2
##   $5000 and more   31.8  68.2   0.0
detach(vote.48)

5. Some examples of more detailed contingency tables

library(gmodels)
CrossTable(vote.48$occuGroup, vote.48$voteGroup) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  344 
## 
##  
##                      | vote.48$voteGroup 
##    vote.48$occuGroup |    Truman |     Dewey |     Other | Row Total | 
## ---------------------|-----------|-----------|-----------|-----------|
##  Upper White collar  |        17 |        67 |         2 |        86 | 
##                      |    18.932 |    24.324 |     0.028 |           | 
##                      |     0.198 |     0.779 |     0.023 |     0.250 | 
##                      |     0.091 |     0.453 |     0.222 |           | 
##                      |     0.049 |     0.195 |     0.006 |           | 
## ---------------------|-----------|-----------|-----------|-----------|
##  Other white collar  |        30 |        31 |         0 |        61 | 
##                      |     0.301 |     0.862 |     1.596 |           | 
##                      |     0.492 |     0.508 |     0.000 |     0.177 | 
##                      |     0.160 |     0.209 |     0.000 |           | 
##                      |     0.087 |     0.090 |     0.000 |           | 
## ---------------------|-----------|-----------|-----------|-----------|
##         Blue collar  |       114 |        36 |         4 |       154 | 
##                      |    10.956 |    13.816 |     0.000 |           | 
##                      |     0.740 |     0.234 |     0.026 |     0.448 | 
##                      |     0.610 |     0.243 |     0.444 |           | 
##                      |     0.331 |     0.105 |     0.012 |           | 
## ---------------------|-----------|-----------|-----------|-----------|
##              Farmer  |        26 |        14 |         3 |        43 | 
##                      |     0.295 |     1.095 |     3.125 |           | 
##                      |     0.605 |     0.326 |     0.070 |     0.125 | 
##                      |     0.139 |     0.095 |     0.333 |           | 
##                      |     0.076 |     0.041 |     0.009 |           | 
## ---------------------|-----------|-----------|-----------|-----------|
##         Column Total |       187 |       148 |         9 |       344 | 
##                      |     0.544 |     0.430 |     0.026 |           | 
## ---------------------|-----------|-----------|-----------|-----------|
## 
## 
CrossTable(vote.48$incGroup, vote.48$voteGroup) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  398 
## 
##  
##                  | vote.48$voteGroup 
## vote.48$incGroup |    Truman |     Dewey |     Other | Row Total | 
## -----------------|-----------|-----------|-----------|-----------|
##        Under$500 |         4 |         4 |         0 |         8 | 
##                  |     0.012 |     0.055 |     0.221 |           | 
##                  |     0.500 |     0.500 |     0.000 |     0.020 | 
##                  |     0.019 |     0.023 |     0.000 |           | 
##                  |     0.010 |     0.010 |     0.000 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##        $500~$999 |         8 |         5 |         0 |        13 | 
##                  |     0.190 |     0.106 |     0.359 |           | 
##                  |     0.615 |     0.385 |     0.000 |     0.033 | 
##                  |     0.038 |     0.028 |     0.000 |           | 
##                  |     0.020 |     0.013 |     0.000 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##      $1000~$1999 |        38 |        19 |         2 |        59 | 
##                  |     1.516 |     1.997 |     0.084 |           | 
##                  |     0.644 |     0.322 |     0.034 |     0.148 | 
##                  |     0.181 |     0.107 |     0.182 |           | 
##                  |     0.095 |     0.048 |     0.005 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##      $2000~$2999 |        69 |        31 |         3 |       103 | 
##                  |     3.951 |     4.786 |     0.008 |           | 
##                  |     0.670 |     0.301 |     0.029 |     0.259 | 
##                  |     0.329 |     0.175 |     0.273 |           | 
##                  |     0.173 |     0.078 |     0.008 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##      $3000~$3999 |        48 |        49 |         4 |       101 | 
##                  |     0.525 |     0.371 |     0.523 |           | 
##                  |     0.475 |     0.485 |     0.040 |     0.254 | 
##                  |     0.229 |     0.277 |     0.364 |           | 
##                  |     0.121 |     0.123 |     0.010 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##      $4000~$4999 |        22 |        24 |         2 |        48 | 
##                  |     0.437 |     0.330 |     0.342 |           | 
##                  |     0.458 |     0.500 |     0.042 |     0.121 | 
##                  |     0.105 |     0.136 |     0.182 |           | 
##                  |     0.055 |     0.060 |     0.005 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##   $5000 and more |        21 |        45 |         0 |        66 | 
##                  |     5.488 |     8.343 |     1.824 |           | 
##                  |     0.318 |     0.682 |     0.000 |     0.166 | 
##                  |     0.100 |     0.254 |     0.000 |           | 
##                  |     0.053 |     0.113 |     0.000 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##     Column Total |       210 |       177 |        11 |       398 | 
##                  |     0.528 |     0.445 |     0.028 |           | 
## -----------------|-----------|-----------|-----------|-----------|
## 
## 

6. Regression Analysis using Multinomial-Logit Model.

The dependent variable “vote” is an unordered factor response, having several discrete values(“Truman”, “Dewey” and others) which are arranged with no order. So Multinomial logistic regression can be considered to analize the relationship between the response variable “vote” and predictors.The function multinom can fit models in which the observations represent counts in the several response categories, as one would have in a contingency table.(Fox and Weisberg 2011) The following analysis sets “Truman” as the baseline category and then fits a mulitinomial-logit model with the predictors. So this models compares the ratio of probabilities of the possible outcomes of the baseline “Truman” and other categories in the dependent variable “vote”, given a set of independent variables. Therefore, the test hypothesis is as follows.
Ho(null hypothesis): b0=b1=b2 = …bk = 0 (The effect of all predictors on the ratio of other categories to the base line is not significant) and

Ha(alternative hypothesis): Not all coefficients of the predictors on the ratio are 0(The effect of predictors on the response variable is statistically significant).

library(car)
library(nnet)
library(stats)

vote.48$voteGroup <- factor(vote.48$voteGroup, levels=c("Truman", "Dewey", "others"))
mod.multinom <- multinom(voteGroup ~ income + occu+ relig +race+edu+age, data = vote.48)
## # weights:  31 (30 variable)
## initial  value 248.839838 
## iter  10 value 173.520811
## iter  20 value 172.460712
## iter  30 value 172.381567
## iter  40 value 172.367030
## final  value 172.366999 
## converged
Anova(mod.multinom)
## Analysis of Deviance Table (Type II tests)
## 
## Response: voteGroup
##        LR Chisq Df Pr(>Chisq)    
## income      6.6  6    0.35663    
## occu       43.1 10    4.8e-06 ***
## relig      31.6  4    2.3e-06 ***
## race        0.1  2    0.96334    
## edu        19.3  2    6.5e-05 ***
## age        21.5  5    0.00064 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As a result, the variables “Occupation”, “Religion”, “Education”, and “Age” have statistically significant effects on the response variable “Vote”. However, “race” and “income” are not significant on the ressponse variable. The following summary shows the coefficents measures and s.d. of the coefficients of predictors.

mod.multinom.1 <- update(mod.multinom, .~. - income -race)
## # weights:  23 (22 variable)
## initial  value 257.157604 
## iter  10 value 182.624456
## iter  20 value 182.027836
## iter  30 value 181.960428
## final  value 181.959785 
## converged
summary(mod.multinom.1)
## Call:
## multinom(formula = voteGroup ~ occu + relig + edu + age, data = vote.48)
## 
## Coefficients:
##                                              Values Std. Err.
## (Intercept)                                  -2.211      0.91
## occuSELF-EMPLOYED, MANAGERIAL, SUPERVISORY    2.008      0.69
## occuOTHER WHITE-COLLAR (CLERICAL, SALES, ET  -0.162      0.55
## occuSKILLED AND SEMI-SKILLED                 -0.982      0.55
## occuPROTECTIVE SERVICE                        0.455      1.34
## occuUNSKILLED, INCLUDING FARM AND SERVICE W  -0.986      0.62
## occuFARM OPERATORS AND MANAGERS              -0.702      0.63
## occuSTUDENT                                   0.744      1.48
## occuUNEMPLOYED                                0.000       NaN
## occuRETIRED, TOO OLD OR UNABLE TO WORK        0.391      0.74
## occuHOUSEWIFE                                 0.370      0.89
## religCATHOLIC                                -0.493      0.31
## religJEWISH                                 -16.766    723.75
## religOTHER                                   -0.225      0.74
## religNONE                                    -0.082      0.80
## eduHIGH SCHOOL                                0.865      0.33
## eduCOLLEGE                                    2.237      0.48
## age25-34                                      1.326      0.76
## age35-44                                      1.314      0.74
## age45-54                                      2.288      0.77
## age55-64                                      2.770      0.81
## age65 AND OVER                                1.992      0.86
## 
## Residual Deviance: 364 
## AIC: 406

References

Analysing the American National Election Study of 1948 using the memisc package[http://cran.r-project.org/web/packages/memisc/vignettes/anes48.pdf]

Fox, John, and Harvey Sanford Weisberg. 2011. An R Companion to Applied Regression. 2nd ed. Thousand Oaks, CA: Sage Publications.