library(car)
## Loading required package: carData
library(mice)
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(haven)
GSS <- read_sav("Desktop/GSS7216_R4.sav")
#abortion attitudes
GSS$abort<-Recode(GSS$ABANY,recodes="1='Pro-Choice'; 2='Pro-Life'; else=NA", as.factor=T)
GSS$abort<-relevel(GSS$abort,ref="Pro-Life")
GSS$race_eth<-Recode(GSS$RACE, recodes="1='nhwhite'; 2='nh black'; 3='nh other';else=NA", as.factor = T)
GSS$race_eth<-relevel(GSS$race_eth,ref="nhwhite")
GSS$educ<-Recode(GSS$DEGREE,recodes="0='Less than HS';1='High School';2='Junior College';3='Undergraduate';4='Graduate';else=NA", as.factor=T)
GSS$educ<-relevel(GSS$educ,ref="Graduate")
GSS$marst<-Recode(GSS$MARITAL,recodes="1='Married'; 2='Widowed';3='Divorced';4='Seperated';5='Never Married'; else=NA", as.factor=T)
GSS$marst<-relevel(GSS$marst,ref="Married")
GSS$party<-Recode(GSS$PARTYID,recodes="0:2='Democrat'; 3='Independent'; 4:6='Republican';7='Other'; else=NA", as.factor=T)
GSS$party<-relevel(GSS$party,ref="Republican")
GSS$polit<-Recode(GSS$MEMPOLIT,recodes="1='Politically Active'; 2='Not Politically Active';else=NA", as.factor=T)
GSS$polit<-relevel(GSS$polit,ref="Politically Active")
GSS$polview<-Recode(GSS$POLVIEWS,recodes="1:3='Liberal'; 4='Moderate'; 5:7='Conservative';else=NA", as.factor=T)
GSS$polview<-relevel(GSS$polview,ref="Conservative")
GSS$discrim<-Recode(GSS$RACDIF1,recodes="1='Discrimination'; 2='No Discrimination'", as.factor=T)
GSS$discrim<-relevel(GSS$discrim,ref="No Discrimination")
#abortion attitudes indexed
GSS.pc<-prcomp(~ABDEFECT+ABNOMORE+ABPOOR+ABRAPE+ABSINGLE+ABANY,data=GSS, center=T, scale=T, retx=T)
scores<-data.frame(GSS.pc$x)
scores$name<-rownames(GSS.pc$x)
GSS$name<-rownames(GSS)
GSS<-merge(GSS, scores, by.x="name", by.y="name", all.x=F)
tail(names(GSS), 20)
## [1] "RHLTHEND" "WTSS" "WTSSNR" "WTSSALL" "VSTRAT" "VPSU"
## [7] "abort" "race_eth" "educ" "marst" "party" "polit"
## [13] "polview" "discrim" "PC1" "PC2" "PC3" "PC4"
## [19] "PC5" "PC6"
round(cor(GSS[,c("ABDEFECT","ABNOMORE","ABPOOR","ABRAPE","ABSINGLE","ABANY")], method = "spearman"), 3)
## ABDEFECT ABNOMORE ABPOOR ABRAPE ABSINGLE ABANY
## ABDEFECT 1.000 0.444 0.443 0.609 0.436 0.408
## ABNOMORE 0.444 1.000 0.782 0.424 0.800 0.777
## ABPOOR 0.443 0.782 1.000 0.441 0.790 0.735
## ABRAPE 0.609 0.424 0.441 1.000 0.432 0.401
## ABSINGLE 0.436 0.800 0.790 0.432 1.000 0.796
## ABANY 0.408 0.777 0.735 0.401 0.796 1.000
round(cor(GSS[,c("ABDEFECT","ABNOMORE","ABPOOR","ABRAPE","ABSINGLE","ABANY","PC1","PC2")], method = "spearman"), 3)
## ABDEFECT ABNOMORE ABPOOR ABRAPE ABSINGLE ABANY PC1 PC2
## ABDEFECT 1.000 0.444 0.443 0.609 0.436 0.408 0.662 0.521
## ABNOMORE 0.444 1.000 0.782 0.424 0.800 0.777 0.874 -0.310
## ABPOOR 0.443 0.782 1.000 0.441 0.790 0.735 0.867 -0.284
## ABRAPE 0.609 0.424 0.441 1.000 0.432 0.401 0.676 0.552
## ABSINGLE 0.436 0.800 0.790 0.432 1.000 0.796 0.880 -0.319
## ABANY 0.408 0.777 0.735 0.401 0.796 1.000 0.853 -0.364
## PC1 0.662 0.874 0.867 0.676 0.880 0.853 1.000 -0.081
## PC2 0.521 -0.310 -0.284 0.552 -0.319 -0.364 -0.081 1.000
summary(GSS[, c("race_eth", "educ", "marst", "party", "polit", "RACDIF1", "polview", "PC1", "PC2")])
## race_eth educ marst
## nhwhite :25892 Graduate : 2478 Married :16672
## nh black: 4575 High School :16717 Divorced : 4335
## nh other: 1694 Junior College: 1842 Never Married: 7113
## Less than HS : 6252 Seperated : 1120
## Undergraduate : 4804 Widowed : 2906
## NA's : 68 NA's : 15
##
## party polit RACDIF1
## Republican :11137 Politically Active : 432 Min. :1.000
## Democrat :15880 Not Politically Active:10073 1st Qu.:1.000
## Independent: 4521 NA's :21656 Median :2.000
## Other : 467 Mean :1.591
## NA's : 156 3rd Qu.:2.000
## Max. :2.000
## NA's :19303
## polview PC1 PC2
## Conservative:10214 Min. :-2.2666 Min. :-1.4726
## Liberal : 8641 1st Qu.:-2.2666 1st Qu.:-1.0030
## Moderate :11447 Median : 0.4066 Median : 0.3656
## NA's : 1859 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 1.3045 3rd Qu.: 0.3656
## Max. : 2.9115 Max. : 3.4831
##
Of the selected variables, we see that RACDIF1 and polit have a great deal of missing values. Other variables have a minimial amount of missingness, contrast to their respective reported values.
table(GSS$polit)
##
## Politically Active Not Politically Active
## 432 10073
#most common value
mcv.polit<-factor(names(which.max(table(GSS$polit))), levels=levels(GSS$polit))
mcv.polit
## [1] Not Politically Active
## Levels: Politically Active Not Politically Active
#impute the cases
GSS$polit.imp<-as.factor(ifelse(is.na(GSS$polit)==T, mcv.polit, GSS$polit))
levels(GSS$polit.imp)<-levels(GSS$polit)
prop.table(table(GSS$polit))
##
## Politically Active Not Politically Active
## 0.04112327 0.95887673
Majority of respondents recorded themselves as being not politcally active. An modal imputation ismade so that missingness is accounted for.
prop.table(table(GSS$polit.imp))
##
## Politically Active Not Politically Active
## 0.01343242 0.98656758
barplot(prop.table(table(GSS$polit)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(GSS$polit.imp)), main="Imputed Data",ylim=c(0, .6))
The percentage of politially active decreased by 3 percent, with imputations allocated in favor of the not politically active variable.
table(GSS$discrim)
##
## No Discrimination Discrimination
## 7602 5256
#most common value
mcv.discrim<-factor(names(which.max(table(GSS$discrim))), levels=levels(GSS$discrim))
mcv.discrim
## [1] No Discrimination
## Levels: No Discrimination Discrimination
#impute the cases
GSS$discrim.imp<-as.factor(ifelse(is.na(GSS$discrim)==T, mcv.discrim, GSS$discrim))
levels(GSS$discrim.imp)<-levels(GSS$discrim)
prop.table(table(GSS$discrim))
##
## No Discrimination Discrimination
## 0.5912273 0.4087727
prop.table(table(GSS$discrim.imp))
##
## No Discrimination Discrimination
## 0.8365722 0.1634278
barplot(prop.table(table(GSS$discrim)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(GSS$discrim.imp)), main="Imputed Data",ylim=c(0, .6))
This is where things become dangerous. The modal imputation exagerrated the the number of respondents who recorded “no discrimination” in reference to their opinion on whether members of disadvantaged racial groups actually experience differential treatment because of their race. This group went from accounting for 40 percent of the intial sample to about 84 percent after imputation.
#look at the patterns of missingness
md.pattern(GSS[, c("race_eth", "educ", "marst", "party", "polit", "RACDIF1", "polview", "PC1", "PC2")])
## race_eth PC1 PC2 marst educ party polview RACDIF1 polit
## 1460 1 1 1 1 1 1 1 1 1 0
## 10935 1 1 1 1 1 1 1 1 0 1
## 7971 1 1 1 1 1 1 1 0 1 1
## 9769 1 1 1 1 1 1 1 0 0 2
## 44 1 1 1 1 1 1 0 1 1 1
## 340 1 1 1 1 1 1 0 1 0 2
## 975 1 1 1 1 1 1 0 0 1 2
## 438 1 1 1 1 1 1 0 0 0 3
## 6 1 1 1 1 1 0 1 1 1 1
## 37 1 1 1 1 1 0 1 1 0 2
## 11 1 1 1 1 1 0 1 0 1 2
## 50 1 1 1 1 1 0 1 0 0 3
## 1 1 1 1 1 1 0 0 1 1 2
## 8 1 1 1 1 1 0 0 1 0 3
## 16 1 1 1 1 1 0 0 0 1 3
## 18 1 1 1 1 1 0 0 0 0 4
## 3 1 1 1 1 0 1 1 1 1 1
## 14 1 1 1 1 0 1 1 1 0 2
## 8 1 1 1 1 0 1 1 0 1 2
## 25 1 1 1 1 0 1 1 0 0 3
## 2 1 1 1 1 0 1 0 1 0 3
## 4 1 1 1 1 0 1 0 0 1 3
## 4 1 1 1 1 0 1 0 0 0 4
## 1 1 1 1 1 0 0 1 0 1 3
## 1 1 1 1 1 0 0 1 0 0 4
## 1 1 1 1 1 0 0 0 1 0 4
## 3 1 1 1 1 0 0 0 0 1 4
## 1 1 1 1 1 0 0 0 0 0 5
## 6 1 1 1 0 1 1 1 1 0 2
## 2 1 1 1 0 1 1 1 0 1 2
## 2 1 1 1 0 1 1 1 0 0 3
## 2 1 1 1 0 1 1 0 0 0 4
## 1 1 1 1 0 1 0 1 0 0 4
## 1 1 1 1 0 1 0 0 1 0 4
## 1 1 1 1 0 0 1 0 0 0 5
## 0 0 0 15 68 156 1859 19303 21656 43057
md.pairs(GSS[, c("race_eth", "educ", "marst", "party", "polit", "discrim", "polview", "PC1", "PC2")])
## $rr
## race_eth educ marst party polit discrim polview PC1 PC2
## race_eth 32161 32093 32146 32005 10505 12858 30302 32161 32161
## educ 32093 32093 32079 31944 10486 12838 30250 32093 32093
## marst 32146 32079 32146 31992 10503 12851 30291 32146 32146
## party 32005 31944 31992 32005 10467 12804 30195 32005 32005
## polit 10505 10486 10503 10467 10505 1514 9462 10505 10505
## discrim 12858 12838 12851 12804 1514 12858 12461 12858 12858
## polview 30302 30250 30291 30195 9462 12461 30302 30302 30302
## PC1 32161 32093 32146 32005 10505 12858 30302 32161 32161
## PC2 32161 32093 32146 32005 10505 12858 30302 32161 32161
##
## $rm
## race_eth educ marst party polit discrim polview PC1 PC2
## race_eth 0 68 15 156 21656 19303 1859 0 0
## educ 0 0 14 149 21607 19255 1843 0 0
## marst 0 67 0 154 21643 19295 1855 0 0
## party 0 61 13 0 21538 19201 1810 0 0
## polit 0 19 2 38 0 8991 1043 0 0
## discrim 0 20 7 54 11344 0 397 0 0
## polview 0 52 11 107 20840 17841 0 0 0
## PC1 0 68 15 156 21656 19303 1859 0 0
## PC2 0 68 15 156 21656 19303 1859 0 0
##
## $mr
## race_eth educ marst party polit discrim polview PC1 PC2
## race_eth 0 0 0 0 0 0 0 0 0
## educ 68 0 67 61 19 20 52 68 68
## marst 15 14 0 13 2 7 11 15 15
## party 156 149 154 0 38 54 107 156 156
## polit 21656 21607 21643 21538 0 11344 20840 21656 21656
## discrim 19303 19255 19295 19201 8991 0 17841 19303 19303
## polview 1859 1843 1855 1810 1043 397 0 1859 1859
## PC1 0 0 0 0 0 0 0 0 0
## PC2 0 0 0 0 0 0 0 0 0
##
## $mm
## race_eth educ marst party polit discrim polview PC1 PC2
## race_eth 0 0 0 0 0 0 0 0 0
## educ 0 68 1 7 49 48 16 0 0
## marst 0 1 15 2 13 8 4 0 0
## party 0 7 2 156 118 102 49 0 0
## polit 0 49 13 118 21656 10312 816 0 0
## discrim 0 48 8 102 10312 19303 1462 0 0
## polview 0 16 4 49 816 1462 1859 0 0
## PC1 0 0 0 0 0 0 0 0 0
## PC2 0 0 0 0 0 0 0 0 0
imp<-mice(data = GSS[,c("race_eth", "educ", "marst", "party", "polit", "discrim", "polview", "PC1", "PC2")], seed = 22, m = 5)
##
## iter imp variable
## 1 1 educ marst party polit discrim polview
## 1 2 educ marst party polit discrim polview
## 1 3 educ marst party polit discrim polview
## 1 4 educ marst party polit discrim polview
## 1 5 educ marst party polit discrim polview
## 2 1 educ marst party polit discrim polview
## 2 2 educ marst party polit discrim polview
## 2 3 educ marst party polit discrim polview
## 2 4 educ marst party polit discrim polview
## 2 5 educ marst party polit discrim polview
## 3 1 educ marst party polit discrim polview
## 3 2 educ marst party polit discrim polview
## 3 3 educ marst party polit discrim polview
## 3 4 educ marst party polit discrim polview
## 3 5 educ marst party polit discrim polview
## 4 1 educ marst party polit discrim polview
## 4 2 educ marst party polit discrim polview
## 4 3 educ marst party polit discrim polview
## 4 4 educ marst party polit discrim polview
## 4 5 educ marst party polit discrim polview
## 5 1 educ marst party polit discrim polview
## 5 2 educ marst party polit discrim polview
## 5 3 educ marst party polit discrim polview
## 5 4 educ marst party polit discrim polview
## 5 5 educ marst party polit discrim polview
print(imp)
## Class: mids
## Number of multiple imputations: 5
## Imputation methods:
## race_eth educ marst party polit discrim polview
## "" "polyreg" "polyreg" "polyreg" "logreg" "logreg" "polyreg"
## PC1 PC2
## "" ""
## PredictorMatrix:
## race_eth educ marst party polit discrim polview PC1 PC2
## race_eth 0 1 1 1 1 1 1 1 1
## educ 1 0 1 1 1 1 1 1 1
## marst 1 1 0 1 1 1 1 1 1
## party 1 1 1 0 1 1 1 1 1
## polit 1 1 1 1 0 1 1 1 1
## discrim 1 1 1 1 1 0 1 1 1
Majority of variables are categorical in this analysis, so the preferred imputaiton method is nearly consistent across the baord (polyreg). Let’s see what basic multiple imputation does for my discrim variable.
head(imp$imp$discrim)
## 1 2 3 4
## 1 Discrimination Discrimination Discrimination No Discrimination
## 2 Discrimination Discrimination No Discrimination Discrimination
## 3 Discrimination Discrimination Discrimination No Discrimination
## 4 Discrimination No Discrimination Discrimination No Discrimination
## 5 Discrimination Discrimination No Discrimination No Discrimination
## 6 No Discrimination Discrimination Discrimination No Discrimination
## 5
## 1 Discrimination
## 2 Discrimination
## 3 Discrimination
## 4 No Discrimination
## 5 No Discrimination
## 6 Discrimination
summary(imp$imp$discrim)
## 1 2
## No Discrimination:11236 No Discrimination:11358
## Discrimination : 8067 Discrimination : 7945
## 3 4
## No Discrimination:11244 No Discrimination:11415
## Discrimination : 8059 Discrimination : 7888
## 5
## No Discrimination:11441
## Discrimination : 7862
Granted, there are only two choices to impute per case. But the five imputations have consistency at least. This would seem much more ideal than the modal imputation performed prior. Now I will conduct the analysis using my polit variable.
head(imp$imp$polit)
## 1 2 3
## 72 Not Politically Active Not Politically Active Not Politically Active
## 74 Not Politically Active Not Politically Active Not Politically Active
## 110 Not Politically Active Not Politically Active Not Politically Active
## 337 Not Politically Active Not Politically Active Not Politically Active
## 385 Not Politically Active Not Politically Active Not Politically Active
## 458 Not Politically Active Not Politically Active Not Politically Active
## 4 5
## 72 Not Politically Active Not Politically Active
## 74 Not Politically Active Not Politically Active
## 110 Not Politically Active Not Politically Active
## 337 Not Politically Active Not Politically Active
## 385 Not Politically Active Not Politically Active
## 458 Not Politically Active Not Politically Active
summary(imp$imp$polit)
## 1 2
## Politically Active : 1062 Politically Active : 1098
## Not Politically Active:20594 Not Politically Active:20558
## 3 4
## Politically Active : 1240 Politically Active : 1056
## Not Politically Active:20416 Not Politically Active:20600
## 5
## Politically Active : 1109
## Not Politically Active:20547
There seems to be much more reasonability with multiple imputation. The variables in this analysis are dichotomous, making response choices a bit more sensitive to modal imputation - being that imputation is not equitably distributed across answer choices. We see in the discrim variable that the modal imputation exaggerates one of the answer choice. Basic multiple imputation allows for a more consistent representation at 5 imputations.
library(lattice)
dat.imp<-complete(imp, action = 1)
head(dat.imp, n=10)
## race_eth educ marst party polit
## 1 nh black High School Seperated Democrat Not Politically Active
## 2 nh black High School Married Democrat Not Politically Active
## 3 nhwhite High School Never Married Democrat Not Politically Active
## 4 nhwhite High School Never Married Republican Not Politically Active
## 5 nhwhite High School Married Independent Not Politically Active
## 6 nh other Less than HS Never Married Republican Not Politically Active
## 7 nhwhite Undergraduate Married Republican Not Politically Active
## 8 nhwhite Less than HS Married Democrat Not Politically Active
## 9 nhwhite Undergraduate Married Democrat Not Politically Active
## 10 nh black Undergraduate Never Married Democrat Not Politically Active
## discrim polview PC1 PC2
## 1 Discrimination Conservative 1.304500 -1.47262821
## 2 Discrimination Moderate 1.304500 -1.47262821
## 3 Discrimination Moderate 1.304500 -1.47262821
## 4 Discrimination Conservative 2.911531 1.64484372
## 5 Discrimination Moderate 2.114428 0.12456554
## 6 No Discrimination Conservative -2.266603 0.36564993
## 7 No Discrimination Conservative 1.304500 -1.47262821
## 8 Discrimination Moderate 1.304500 -1.47262821
## 9 No Discrimination Conservative -2.266603 0.36564993
## 10 Discrimination Liberal 2.101603 0.04764997
#Compare to the original data
head(GSS[,c("race_eth", "educ", "marst","party","polit")], n=10)
## race_eth educ marst party polit
## 1 nh black High School Seperated Democrat Not Politically Active
## 2 nh black High School Married Democrat Not Politically Active
## 3 nhwhite High School Never Married Democrat Not Politically Active
## 4 nhwhite High School Never Married Republican Not Politically Active
## 5 nhwhite High School Married Independent Not Politically Active
## 6 nh other Less than HS Never Married Republican Not Politically Active
## 7 nhwhite Undergraduate Married Republican Not Politically Active
## 8 nhwhite Less than HS Married Democrat Not Politically Active
## 9 nhwhite Undergraduate Married Democrat Not Politically Active
## 10 nh black Undergraduate Never Married Democrat Not Politically Active
fit.GSS<-with(data=imp ,expr=lm(PC1~polview+discrim+marst+party+polit+educ+race_eth))
fit.GSS
## call :
## with.mids(data = imp, expr = lm(PC1 ~ polview + discrim + marst +
## party + polit + educ + race_eth))
##
## call1 :
## mice(data = GSS[, c("race_eth", "educ", "marst", "party", "polit",
## "discrim", "polview", "PC1", "PC2")], m = 5, seed = 22)
##
## nmis :
## race_eth educ marst party polit discrim polview PC1
## 0 68 15 156 21656 19303 1859 0
## PC2
## 0
##
## analyses :
## [[1]]
##
## Call:
## lm(formula = PC1 ~ polview + discrim + marst + party + polit +
## educ + race_eth)
##
## Coefficients:
## (Intercept) polviewLiberal
## -0.13916 -1.00068
## polviewModerate discrimDiscrimination
## -0.53495 -0.06467
## marstDivorced marstNever Married
## -0.35946 -0.34057
## marstSeperated marstWidowed
## -0.16167 0.07647
## partyDemocrat partyIndependent
## -0.27393 -0.08557
## partyOther politNot Politically Active
## -0.39251 0.08860
## educHigh School educJunior College
## 0.78913 0.59247
## educLess than HS educUndergraduate
## 1.36218 0.29599
## race_ethnh black race_ethnh other
## 0.48877 0.38950
##
##
## [[2]]
##
## Call:
## lm(formula = PC1 ~ polview + discrim + marst + party + polit +
## educ + race_eth)
##
## Coefficients:
## (Intercept) polviewLiberal
## -0.13839 -0.99763
## polviewModerate discrimDiscrimination
## -0.53501 -0.07131
## marstDivorced marstNever Married
## -0.36425 -0.34038
## marstSeperated marstWidowed
## -0.16776 0.07472
## partyDemocrat partyIndependent
## -0.27264 -0.08638
## partyOther politNot Politically Active
## -0.40464 0.08472
## educHigh School educJunior College
## 0.79545 0.59331
## educLess than HS educUndergraduate
## 1.37189 0.30149
## race_ethnh black race_ethnh other
## 0.48455 0.38931
##
##
## [[3]]
##
## Call:
## lm(formula = PC1 ~ polview + discrim + marst + party + polit +
## educ + race_eth)
##
## Coefficients:
## (Intercept) polviewLiberal
## -0.18734 -1.00163
## polviewModerate discrimDiscrimination
## -0.53786 -0.06645
## marstDivorced marstNever Married
## -0.35821 -0.34036
## marstSeperated marstWidowed
## -0.17045 0.07232
## partyDemocrat partyIndependent
## -0.27514 -0.09373
## partyOther politNot Politically Active
## -0.40156 0.15087
## educHigh School educJunior College
## 0.78103 0.58604
## educLess than HS educUndergraduate
## 1.35308 0.29002
## race_ethnh black race_ethnh other
## 0.48568 0.39291
##
##
## [[4]]
##
## Call:
## lm(formula = PC1 ~ polview + discrim + marst + party + polit +
## educ + race_eth)
##
## Coefficients:
## (Intercept) polviewLiberal
## -0.20487 -0.98558
## polviewModerate discrimDiscrimination
## -0.52598 -0.12657
## marstDivorced marstNever Married
## -0.35953 -0.33397
## marstSeperated marstWidowed
## -0.16685 0.07885
## partyDemocrat partyIndependent
## -0.27106 -0.08889
## partyOther politNot Politically Active
## -0.40598 0.18358
## educHigh School educJunior College
## 0.77366 0.58278
## educLess than HS educUndergraduate
## 1.35460 0.28723
## race_ethnh black race_ethnh other
## 0.50149 0.39250
##
##
## [[5]]
##
## Call:
## lm(formula = PC1 ~ polview + discrim + marst + party + polit +
## educ + race_eth)
##
## Coefficients:
## (Intercept) polviewLiberal
## -0.12331 -0.99040
## polviewModerate discrimDiscrimination
## -0.53307 -0.09525
## marstDivorced marstNever Married
## -0.36067 -0.33683
## marstSeperated marstWidowed
## -0.15883 0.08049
## partyDemocrat partyIndependent
## -0.27119 -0.08454
## partyOther politNot Politically Active
## -0.39354 0.07662
## educHigh School educJunior College
## 0.79192 0.58725
## educLess than HS educUndergraduate
## 1.36493 0.29643
## race_ethnh black race_ethnh other
## 0.48976 0.39243
with (data=GSS, exp=(sd(PC1)))
## [1] 1.991193
with (data=GSS, exp=(prop.table(table(discrim))))
## discrim
## No Discrimination Discrimination
## 0.5912273 0.4087727
with (data=GSS, exp=(prop.table(table(race_eth))))
## race_eth
## nhwhite nh black nh other
## 0.80507447 0.14225304 0.05267249
with (data=GSS, exp=(prop.table(table(educ))))
## educ
## Graduate High School Junior College Less than HS Undergraduate
## 0.07721310 0.52089241 0.05739569 0.19480884 0.14968996
with (data=GSS, exp=(prop.table(table(marst))))
## marst
## Married Divorced Never Married Seperated Widowed
## 0.51863373 0.13485348 0.22127170 0.03484104 0.09040005
with (data=GSS, exp=(prop.table(table(party))))
## party
## Republican Democrat Independent Other
## 0.34797688 0.49617247 0.14125918 0.01459147
with (data=GSS, exp=(prop.table(table(polit))))
## polit
## Politically Active Not Politically Active
## 0.04112327 0.95887673
with (data=GSS, exp=(prop.table(table(polview))))
## polview
## Conservative Liberal Moderate
## 0.3370735 0.2851627 0.3777638
est.p<-pool(fit.GSS)
print(est.p)
## Class: mipo m = 5
## estimate ubar b
## (Intercept) -0.15861353 0.0036710626 1.249603e-03
## polviewLiberal -0.99518272 0.0008341679 4.821072e-05
## polviewModerate -0.53337561 0.0006627537 2.000470e-05
## discrimDiscrimination -0.08484859 0.0004969940 6.943302e-04
## marstDivorced -0.36042599 0.0010382702 5.334849e-06
## marstNever Married -0.33842084 0.0007429840 8.644966e-06
## marstSeperated -0.16511212 0.0034450300 2.245312e-05
## marstWidowed 0.07656888 0.0014733623 1.050832e-05
## partyDemocrat -0.27279362 0.0006605151 3.101889e-06
## partyIndependent -0.08782050 0.0011731515 1.350811e-05
## partyOther -0.39964304 0.0079115439 3.923973e-05
## politNot Politically Active 0.11687641 0.0024844486 2.264747e-03
## educHigh School 0.78623899 0.0016923383 7.768795e-05
## educJunior College 0.58836985 0.0033878145 1.977292e-05
## educLess than HS 1.36133611 0.0020944043 5.964903e-05
## educUndergraduate 0.29423393 0.0021779112 3.183827e-05
## race_ethnh black 0.49005264 0.0010496338 4.546660e-05
## race_ethnh other 0.39132815 0.0022673254 3.129691e-06
## t dfcom df riv
## (Intercept) 0.0051705868 32143 47.46009 0.408471420
## polviewLiberal 0.0008920208 32143 921.79086 0.069353985
## polviewModerate 0.0006867594 32143 2961.19178 0.036221048
## discrimDiscrimination 0.0013301903 32143 10.18648 1.676471639
## marstDivorced 0.0010446720 32143 24574.22538 0.006165850
## marstNever Married 0.0007533580 32143 12665.83522 0.013962559
## marstSeperated 0.0034719737 32143 21546.14525 0.007821047
## marstWidowed 0.0014859723 32143 20250.16597 0.008558645
## partyDemocrat 0.0006642374 32143 25549.99972 0.005635400
## partyIndependent 0.0011893613 32143 12823.83836 0.013817250
## partyOther 0.0079586316 32143 24969.10777 0.005951768
## politNot Politically Active 0.0052021451 32143 14.64227 1.093883202
## educHigh School 0.0017855638 32143 1399.94044 0.055086821
## educJunior College 0.0034115420 32143 23028.71199 0.007003778
## educLess than HS 0.0021659831 32143 3276.54905 0.034176226
## educUndergraduate 0.0022161171 32143 9437.22580 0.017542460
## race_ethnh black 0.0011041938 32143 1554.95504 0.051979958
## race_ethnh other 0.0022710811 32143 31399.04282 0.001656414
## lambda fmi
## (Intercept) 0.290010443 0.318151081
## polviewLiberal 0.064855965 0.066878356
## polviewModerate 0.034954943 0.035606079
## discrimDiscrimination 0.626373773 0.683041832
## marstDivorced 0.006128065 0.006208943
## marstNever Married 0.013770291 0.013925985
## marstSeperated 0.007760353 0.007852444
## marstWidowed 0.008486016 0.008583928
## partyDemocrat 0.005603821 0.005681651
## partyIndependent 0.013628935 0.013782733
## partyOther 0.005916554 0.005996170
## politNot Politically Active 0.522418443 0.576559033
## educHigh School 0.052210699 0.053561846
## educJunior College 0.006955066 0.007041299
## educLess than HS 0.033046811 0.033636497
## educUndergraduate 0.017240028 0.017448234
## race_ethnh black 0.049411547 0.050631850
## race_ethnh other 0.001653675 0.001717260
summary(est.p)
## estimate std.error statistic df
## (Intercept) -0.15861353 0.07190679 -2.205821 47.46009
## polviewLiberal -0.99518272 0.02986672 -33.320794 921.79086
## polviewModerate -0.53337561 0.02620609 -20.353114 2961.19178
## discrimDiscrimination -0.08484859 0.03647177 -2.326418 10.18648
## marstDivorced -0.36042599 0.03232139 -11.151316 24574.22538
## marstNever Married -0.33842084 0.02744737 -12.329810 12665.83522
## marstSeperated -0.16511212 0.05892346 -2.802146 21546.14525
## marstWidowed 0.07656888 0.03854831 1.986309 20250.16597
## partyDemocrat -0.27279362 0.02577280 -10.584554 25549.99972
## partyIndependent -0.08782050 0.03448712 -2.546472 12823.83836
## partyOther -0.39964304 0.08921116 -4.479743 24969.10777
## politNot Politically Active 0.11687641 0.07212590 1.620450 14.64227
## educHigh School 0.78623899 0.04225593 18.606594 1399.94044
## educJunior College 0.58836985 0.05840841 10.073376 23028.71199
## educLess than HS 1.36133611 0.04654012 29.250805 3276.54905
## educUndergraduate 0.29423393 0.04707565 6.250236 9437.22580
## race_ethnh black 0.49005264 0.03322941 14.747557 1554.95504
## race_ethnh other 0.39132815 0.04765586 8.211543 31399.04282
## p.value
## (Intercept) 2.740374e-02
## polviewLiberal 0.000000e+00
## polviewModerate 0.000000e+00
## discrimDiscrimination 2.000260e-02
## marstDivorced 0.000000e+00
## marstNever Married 0.000000e+00
## marstSeperated 5.079499e-03
## marstWidowed 4.700765e-02
## partyDemocrat 0.000000e+00
## partyIndependent 1.088651e-02
## partyOther 7.499657e-06
## politNot Politically Active 1.051457e-01
## educHigh School 0.000000e+00
## educJunior College 0.000000e+00
## educLess than HS 0.000000e+00
## educUndergraduate 4.150948e-10
## race_ethnh black 0.000000e+00
## race_ethnh other 2.220446e-16
fit1<-lm(PC1~polview+discrim+marst+party+polit+educ+race_eth, data=GSS)
summary(fit1)
##
## Call:
## lm(formula = PC1 ~ polview + discrim + marst + party + polit +
## educ + race_eth, data = GSS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0140 -1.6860 -0.1405 1.5239 4.4708
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.58003 0.28804 -2.014 0.04423 *
## polviewLiberal -0.69761 0.13058 -5.342 1.06e-07 ***
## polviewModerate -0.36220 0.11526 -3.143 0.00171 **
## discrimDiscrimination -0.18671 0.09859 -1.894 0.05845 .
## marstDivorced -0.46891 0.17290 -2.712 0.00677 **
## marstNever Married -0.17538 0.13792 -1.272 0.20373
## marstSeperated -0.35544 0.31333 -1.134 0.25682
## marstWidowed -0.24115 0.16353 -1.475 0.14053
## partyDemocrat 0.20951 0.10717 1.955 0.05079 .
## partyIndependent 0.15964 0.17135 0.932 0.35165
## partyOther -0.86240 0.69064 -1.249 0.21198
## politNot Politically Active -0.12908 0.22115 -0.584 0.55954
## educHigh School 0.82649 0.20710 3.991 6.92e-05 ***
## educJunior College 0.61255 0.32511 1.884 0.05975 .
## educLess than HS 1.24695 0.21723 5.740 1.15e-08 ***
## educUndergraduate 0.49051 0.24252 2.023 0.04330 *
## race_ethnh black -0.07676 0.31016 -0.247 0.80457
## race_ethnh other -0.20086 0.32012 -0.627 0.53046
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.809 on 1442 degrees of freedom
## (30701 observations deleted due to missingness)
## Multiple R-squared: 0.06722, Adjusted R-squared: 0.05622
## F-statistic: 6.113 on 17 and 1442 DF, p-value: 6.154e-14
fit.imp<-lm(PC1~polview+discrim+marst+party+polit+educ+race_eth, data=dat.imp)
summary(fit.imp)
##
## Call:
## lm(formula = PC1 ~ polview + discrim + marst + party + polit +
## educ + race_eth, data = dat.imp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0788 -1.7306 0.1225 1.5999 4.7305
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.13916 0.06119 -2.274 0.02296 *
## polviewLiberal -1.00068 0.02889 -34.634 < 2e-16 ***
## polviewModerate -0.53495 0.02575 -20.776 < 2e-16 ***
## discrimDiscrimination -0.06467 0.02226 -2.906 0.00367 **
## marstDivorced -0.35946 0.03224 -11.149 < 2e-16 ***
## marstNever Married -0.34057 0.02726 -12.493 < 2e-16 ***
## marstSeperated -0.16167 0.05871 -2.754 0.00589 **
## marstWidowed 0.07647 0.03840 1.991 0.04648 *
## partyDemocrat -0.27393 0.02573 -10.647 < 2e-16 ***
## partyIndependent -0.08557 0.03430 -2.495 0.01261 *
## partyOther -0.39251 0.08870 -4.425 9.66e-06 ***
## politNot Politically Active 0.08860 0.05058 1.752 0.07985 .
## educHigh School 0.78913 0.04117 19.166 < 2e-16 ***
## educJunior College 0.59247 0.05817 10.186 < 2e-16 ***
## educLess than HS 1.36218 0.04581 29.738 < 2e-16 ***
## educUndergraduate 0.29599 0.04667 6.342 2.30e-10 ***
## race_ethnh black 0.48877 0.03239 15.092 < 2e-16 ***
## race_ethnh other 0.38950 0.04762 8.179 2.97e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.879 on 32143 degrees of freedom
## Multiple R-squared: 0.1099, Adjusted R-squared: 0.1095
## F-statistic: 233.5 on 17 and 32143 DF, p-value: < 2.2e-16
The model has greatly improved with missing data imputed for, compared to the intial dataset. Variance is still low, but increased from the intial model.