library(foreign)
library(Zelig)
library(memisc)
library(magrittr)
options(digits=3)
nes1948.por <- UnZip("anes/NES1948.ZIP","NES1948.POR", package="memisc")
nes1948 <- spss.portable.file(nes1948.por)
print(nes1948)
##
## SPSS portable file 'C:/Users/Y2K/AppData/Local/Temp/RtmpMnZFEF/NES1948.POR'
## with 67 variables and 662 observations
dim(nes1948)
## [1] 662 67
library(plyr)
vote.48 <- subset(nes1948, select=c(v480018, v480029, v480030, v480045, v480046, v480047, v480048, v480049, v480050))
str(vote.48)
## Data set with 662 obs. of 9 variables:
## $ v480018: Nmnl. item w/ 7 labels for 1,2,3,... + ms.v. num 1 2 1 2 1 2 2 1 2 1 ...
## $ v480029: Nmnl. item w/ 12 labels for 10,20,30,... + ms.v. num 70 30 40 10 10 20 80 80 40 40 ...
## $ v480030: Nmnl. item w/ 4 labels for 1,2,8,... + ms.v. num 1 2 2 2 2 2 2 2 1 1 ...
## $ v480045: Nmnl. item w/ 3 labels for 1,2,9 + ms.v. num 1 2 2 2 1 2 1 2 1 1 ...
## $ v480046: Nmnl. item w/ 4 labels for 1,2,3,... + ms.v. num 1 1 1 1 1 1 1 1 1 1 ...
## $ v480047: Nmnl. item w/ 7 labels for 1,2,3,... + ms.v. num 3 3 2 3 2 3 4 5 2 2 ...
## $ v480048: Nmnl. item w/ 4 labels for 1,2,3,... + ms.v. num 1 2 2 3 3 2 1 1 2 2 ...
## $ v480049: Nmnl. item w/ 8 labels for 1,2,3,... + ms.v. num 4 7 5 7 5 7 5 2 5 6 ...
## $ v480050: Nmnl. item w/ 6 labels for 1,2,3,... + ms.v. num 1 1 2 1 2 1 1 1 1 2 ...
vote.48 <- rename(vote.48,
c(v480018="vote",
v480029="occu",
v480030="union",
v480045="gender",
v480046="race",
v480047="age",
v480048="edu",
v480049="income",
v480050="relig"
))
str(vote.48)
## Data set with 662 obs. of 9 variables:
## $ vote : Nmnl. item w/ 7 labels for 1,2,3,... + ms.v. num 1 2 1 2 1 2 2 1 2 1 ...
## $ occu : Nmnl. item w/ 12 labels for 10,20,30,... + ms.v. num 70 30 40 10 10 20 80 80 40 40 ...
## $ union : Nmnl. item w/ 4 labels for 1,2,8,... + ms.v. num 1 2 2 2 2 2 2 2 1 1 ...
## $ gender: Nmnl. item w/ 3 labels for 1,2,9 + ms.v. num 1 2 2 2 1 2 1 2 1 1 ...
## $ race : Nmnl. item w/ 4 labels for 1,2,3,... + ms.v. num 1 1 1 1 1 1 1 1 1 1 ...
## $ age : Nmnl. item w/ 7 labels for 1,2,3,... + ms.v. num 3 3 2 3 2 3 4 5 2 2 ...
## $ edu : Nmnl. item w/ 4 labels for 1,2,3,... + ms.v. num 1 2 2 3 3 2 1 1 2 2 ...
## $ income: Nmnl. item w/ 8 labels for 1,2,3,... + ms.v. num 4 7 5 7 5 7 5 2 5 6 ...
## $ relig : Nmnl. item w/ 6 labels for 1,2,3,... + ms.v. num 1 1 2 1 2 1 1 1 1 2 ...
attach(vote.48)
vote.48$voteGroup[vote==1] <- 1
vote.48$voteGroup[vote==2] <- 2
vote.48$voteGroup[(vote >= 3) & (vote <=4)] <- 3
detach(vote.48)
voteType <- c("Truman", "Dewey", "Other")
vote.48$voteGroup <-factor(vote.48$voteGroup, labels = voteType)
summary(vote.48$voteGroup)
## Truman Dewey Other NA's
## 212 178 12 260
attach(vote.48)
vote.48$occuGroup[occu==10] <- 1
vote.48$occuGroup[occu==20] <- 1
vote.48$occuGroup[occu==30] <- 2
vote.48$occuGroup[(occu >= 40) & (occu <= 70)] <- 3
vote.48$occuGroup[occu==80] <- 4
detach(vote.48)
occuType <- c(" Upper White collar ", " Other white collar ", " Blue collar ", " Farmer ")
vote.48$occuGroup <-factor(vote.48$occuGroup, labels = occuType)
summary(vote.48$occuGroup)
## Upper White collar Other white collar Blue collar
## 117 79 255
## Farmer NA's
## 105 106
attach(vote.48)
vote.48$reliGroup[relig==1] <- 1
vote.48$reliGroup[relig==2] <- 2
vote.48$reliGroup[(relig >=3) & (relig <=5)] <- 3
detach(vote.48)
reliType <- c(" Protestant ", " Catholic ", " Other, none ")
vote.48$reliGroup <-factor(vote.48$reliGroup, labels = reliType)
summary(vote.48$reliGroup)
## Protestant Catholic Other, none NA's
## 460 140 57 5
attach(vote.48)
vote.48$raceGroup[race==1] <- 1
vote.48$raceGroup[race==2] <- 2
detach(vote.48)
raceType <- c("White", "Black")
vote.48$raceGroup <-factor(vote.48$raceGroup, labels = raceType)
summary(vote.48$raceGroup)
## White Black NA's
## 585 60 17
attach(vote.48)
vote.48$incGroup[income ==1] <- 1
vote.48$incGroup[income ==2] <- 2
vote.48$incGroup[income ==3] <- 3
vote.48$incGroup[income ==4] <- 4
vote.48$incGroup[income ==5] <- 5
vote.48$incGroup[income ==6] <- 6
vote.48$incGroup[income ==7] <- 7
detach(vote.48)
incType <-c("Under$500","$500~$999","$1000~$1999","$2000~$2999","$3000~$3999","$4000~$4999","$5000 and more")
vote.48$incGroup <-factor(vote.48$incGroup, labels = incType)
summary(vote.48$incGroup)
## Under$500 $500~$999 $1000~$1999 $2000~$2999 $3000~$3999
## 25 43 110 185 142
## $4000~$4999 $5000 and more NA's
## 66 84 7
library(DescTools)
options(digits=2)
attach(vote.48)
table(voteGroup, occuGroup)
## occuGroup
## voteGroup Upper White collar Other white collar Blue collar Farmer
## Truman 17 30 114 26
## Dewey 67 31 36 14
## Other 2 0 4 3
table(occuGroup, voteGroup) %>%
prop.table( margin=1)*100
## voteGroup
## occuGroup Truman Dewey Other
## Upper White collar 19.8 77.9 2.3
## Other white collar 49.2 50.8 0.0
## Blue collar 74.0 23.4 2.6
## Farmer 60.5 32.6 7.0
table(reliGroup, voteGroup) %>%
prop.table( margin=1)*100
## voteGroup
## reliGroup Truman Dewey Other
## Protestant 44.7 51.0 4.3
## Catholic 66.0 34.0 0.0
## Other, none 68.2 29.5 2.3
table(raceGroup, voteGroup) %>%
prop.table( margin=1)*100
## voteGroup
## raceGroup Truman Dewey Other
## White 51.3 45.5 3.2
## Black 64.7 35.3 0.0
table(incGroup, voteGroup) %>%
prop.table( margin=1)*100
## voteGroup
## incGroup Truman Dewey Other
## Under$500 50.0 50.0 0.0
## $500~$999 61.5 38.5 0.0
## $1000~$1999 64.4 32.2 3.4
## $2000~$2999 67.0 30.1 2.9
## $3000~$3999 47.5 48.5 4.0
## $4000~$4999 45.8 50.0 4.2
## $5000 and more 31.8 68.2 0.0
detach(vote.48)
library(gmodels)
CrossTable(vote.48$occuGroup, vote.48$voteGroup)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 344
##
##
## | vote.48$voteGroup
## vote.48$occuGroup | Truman | Dewey | Other | Row Total |
## ---------------------|-----------|-----------|-----------|-----------|
## Upper White collar | 17 | 67 | 2 | 86 |
## | 18.932 | 24.324 | 0.028 | |
## | 0.198 | 0.779 | 0.023 | 0.250 |
## | 0.091 | 0.453 | 0.222 | |
## | 0.049 | 0.195 | 0.006 | |
## ---------------------|-----------|-----------|-----------|-----------|
## Other white collar | 30 | 31 | 0 | 61 |
## | 0.301 | 0.862 | 1.596 | |
## | 0.492 | 0.508 | 0.000 | 0.177 |
## | 0.160 | 0.209 | 0.000 | |
## | 0.087 | 0.090 | 0.000 | |
## ---------------------|-----------|-----------|-----------|-----------|
## Blue collar | 114 | 36 | 4 | 154 |
## | 10.956 | 13.816 | 0.000 | |
## | 0.740 | 0.234 | 0.026 | 0.448 |
## | 0.610 | 0.243 | 0.444 | |
## | 0.331 | 0.105 | 0.012 | |
## ---------------------|-----------|-----------|-----------|-----------|
## Farmer | 26 | 14 | 3 | 43 |
## | 0.295 | 1.095 | 3.125 | |
## | 0.605 | 0.326 | 0.070 | 0.125 |
## | 0.139 | 0.095 | 0.333 | |
## | 0.076 | 0.041 | 0.009 | |
## ---------------------|-----------|-----------|-----------|-----------|
## Column Total | 187 | 148 | 9 | 344 |
## | 0.544 | 0.430 | 0.026 | |
## ---------------------|-----------|-----------|-----------|-----------|
##
##
CrossTable(vote.48$incGroup, vote.48$voteGroup)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 398
##
##
## | vote.48$voteGroup
## vote.48$incGroup | Truman | Dewey | Other | Row Total |
## -----------------|-----------|-----------|-----------|-----------|
## Under$500 | 4 | 4 | 0 | 8 |
## | 0.012 | 0.055 | 0.221 | |
## | 0.500 | 0.500 | 0.000 | 0.020 |
## | 0.019 | 0.023 | 0.000 | |
## | 0.010 | 0.010 | 0.000 | |
## -----------------|-----------|-----------|-----------|-----------|
## $500~$999 | 8 | 5 | 0 | 13 |
## | 0.190 | 0.106 | 0.359 | |
## | 0.615 | 0.385 | 0.000 | 0.033 |
## | 0.038 | 0.028 | 0.000 | |
## | 0.020 | 0.013 | 0.000 | |
## -----------------|-----------|-----------|-----------|-----------|
## $1000~$1999 | 38 | 19 | 2 | 59 |
## | 1.516 | 1.997 | 0.084 | |
## | 0.644 | 0.322 | 0.034 | 0.148 |
## | 0.181 | 0.107 | 0.182 | |
## | 0.095 | 0.048 | 0.005 | |
## -----------------|-----------|-----------|-----------|-----------|
## $2000~$2999 | 69 | 31 | 3 | 103 |
## | 3.951 | 4.786 | 0.008 | |
## | 0.670 | 0.301 | 0.029 | 0.259 |
## | 0.329 | 0.175 | 0.273 | |
## | 0.173 | 0.078 | 0.008 | |
## -----------------|-----------|-----------|-----------|-----------|
## $3000~$3999 | 48 | 49 | 4 | 101 |
## | 0.525 | 0.371 | 0.523 | |
## | 0.475 | 0.485 | 0.040 | 0.254 |
## | 0.229 | 0.277 | 0.364 | |
## | 0.121 | 0.123 | 0.010 | |
## -----------------|-----------|-----------|-----------|-----------|
## $4000~$4999 | 22 | 24 | 2 | 48 |
## | 0.437 | 0.330 | 0.342 | |
## | 0.458 | 0.500 | 0.042 | 0.121 |
## | 0.105 | 0.136 | 0.182 | |
## | 0.055 | 0.060 | 0.005 | |
## -----------------|-----------|-----------|-----------|-----------|
## $5000 and more | 21 | 45 | 0 | 66 |
## | 5.488 | 8.343 | 1.824 | |
## | 0.318 | 0.682 | 0.000 | 0.166 |
## | 0.100 | 0.254 | 0.000 | |
## | 0.053 | 0.113 | 0.000 | |
## -----------------|-----------|-----------|-----------|-----------|
## Column Total | 210 | 177 | 11 | 398 |
## | 0.528 | 0.445 | 0.028 | |
## -----------------|-----------|-----------|-----------|-----------|
##
##
The dependent variable “vote” is an unordered factor response, having several discrete values(“Truman”, “Dewey” and others) which are arranged with no order. So Multinomial logistic regression can be considered to analize the relationship between the response variable “vote” and predictors.The function multinom can fit models in which the observations represent counts in the several response categories, as one would have in a contingency table.(Fox and Weisberg 2011) The following analysis sets “Truman” as the baseline category and then fits a mulitinomial-logit model with the predictors. So this models compares the ratio of probabilities of the possible outcomes of the baseline “Truman” and other categories in the dependent variable “vote”, given a set of independent variables. Therefore, the test hypothesis is as follows.
Ho(null hypothesis): b0=b1=b2 = …bk = 0 (The effect of all predictors on the ratio of other categories to the base line is not significant) and
Ha(alternative hypothesis): Not all coefficients of the predictors on the ratio are 0(The effect of predictors on the response variable is statistically significant).
library(car)
library(nnet)
library(stats)
vote.48$voteGroup <- factor(vote.48$voteGroup, levels=c("Truman", "Dewey", "others"))
mod.multinom <- multinom(voteGroup ~ income + occu+ relig +race+edu+age, data = vote.48)
## # weights: 31 (30 variable)
## initial value 248.839838
## iter 10 value 173.520811
## iter 20 value 172.460712
## iter 30 value 172.381567
## iter 40 value 172.367030
## final value 172.366999
## converged
Anova(mod.multinom)
## Analysis of Deviance Table (Type II tests)
##
## Response: voteGroup
## LR Chisq Df Pr(>Chisq)
## income 6.6 6 0.35663
## occu 43.1 10 4.8e-06 ***
## relig 31.6 4 2.3e-06 ***
## race 0.1 2 0.96334
## edu 19.3 2 6.5e-05 ***
## age 21.5 5 0.00064 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As a result, the variables “Occupation”, “Religion”, “Education”, and “Age” have statistically significant effects on the response variable “Vote”. However, “race” and “income” are not significant on the ressponse variable. The following summary shows the coefficents measures and s.d. of the coefficients of predictors.
mod.multinom.1 <- update(mod.multinom, .~. - income -race)
## # weights: 23 (22 variable)
## initial value 257.157604
## iter 10 value 182.624456
## iter 20 value 182.027836
## iter 30 value 181.960428
## final value 181.959785
## converged
summary(mod.multinom.1)
## Call:
## multinom(formula = voteGroup ~ occu + relig + edu + age, data = vote.48)
##
## Coefficients:
## Values Std. Err.
## (Intercept) -2.211 0.91
## occuSELF-EMPLOYED, MANAGERIAL, SUPERVISORY 2.008 0.69
## occuOTHER WHITE-COLLAR (CLERICAL, SALES, ET -0.162 0.55
## occuSKILLED AND SEMI-SKILLED -0.982 0.55
## occuPROTECTIVE SERVICE 0.455 1.34
## occuUNSKILLED, INCLUDING FARM AND SERVICE W -0.986 0.62
## occuFARM OPERATORS AND MANAGERS -0.702 0.63
## occuSTUDENT 0.744 1.48
## occuUNEMPLOYED 0.000 NaN
## occuRETIRED, TOO OLD OR UNABLE TO WORK 0.391 0.74
## occuHOUSEWIFE 0.370 0.89
## religCATHOLIC -0.493 0.31
## religJEWISH -16.766 723.75
## religOTHER -0.225 0.74
## religNONE -0.082 0.80
## eduHIGH SCHOOL 0.865 0.33
## eduCOLLEGE 2.237 0.48
## age25-34 1.326 0.76
## age35-44 1.314 0.74
## age45-54 2.288 0.77
## age55-64 2.770 0.81
## age65 AND OVER 1.992 0.86
##
## Residual Deviance: 364
## AIC: 406
Analysing the American National Election Study of 1948 using the memisc package[http://cran.r-project.org/web/packages/memisc/vignettes/anes48.pdf]
Fox, John, and Harvey Sanford Weisberg. 2011. An R Companion to Applied Regression. 2nd ed. Thousand Oaks, CA: Sage Publications.