library(haven)
library(foreign)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(readr)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(car)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(alr3)
##
## Attaching package: 'alr3'
## The following object is masked from 'package:MASS':
##
## forbes
library(zoo)
library(nortest)
library(plotrix)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:plotrix':
##
## rescale
## The following object is masked from 'package:readr':
##
## col_factor
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:survey':
##
## calibrate
## The following object is masked from 'package:lmtest':
##
## lrtest
## The following object is masked from 'package:car':
##
## logit
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(sandwich)
library(mice)
## Warning: package 'mice' was built under R version 3.6.3
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
anes2016<-read_dta("C:\\Users\\Jaire\\OneDrive\\Desktop\\stats for dem data 2\\Homework 5\\ANES2016.dta")
# Self-reated health
anes2016$SelfratedHlth<-recode(anes2016$V161115, recodes = "1='5'; 2='4'; ; 4='2';5='1';-9=NA", as.factor = T)
anes2016$SelfratedHlth<-recode(anes2016$SelfratedHlth, recodes = "1:2='Negative'; 3:5='Positive'", as.factor = T)
table(anes2016$SelfratedHlth)
##
## Negative Positive
## 750 3513
# Income
anes2016$Income<-recode(anes2016$V161361X, recodes = "-9:-5=NA; 1:14='49.9k or less';15:22='50k to 99.9k';23:28='100k to 150k'", as.factor = T )
table(anes2016$Income)
##
## 100k to 150k 49.9k or less 50k to 99.9k
## 988 1809 1272
# Generation
anes2016$Generation<-recode(anes2016$V161267C, recodes = "1928:1945=NA;1946:1964='BabyBoomer';1965:1980='X';1981:1996='Millenial'; 1997:1998='Z'; 1926:1927=NA; -9:-8=NA", as.factor = T)
table(anes2016$Generation)
##
## BabyBoomer Millenial X Z
## 1473 1051 1046 67
# Working status
anes2016$Employed<-recode(anes2016$V161277, recodes = "-9=NA; 2:8='0'", as.factor = T)
table(anes2016$Employed)
##
## 0 1
## 1708 2547
# College education
anes2016$CollegeEd<-recode(anes2016$V161270, recodes = "90:95=NA;-9=NA;1:10='0'; 11:16='1'", as.factor = T)
table(anes2016$CollegeEd)
##
## 0 1
## 1991 2236
# Racial Minority
anes2016$MinorityRace<-recode(anes2016$V161310X, recodes = "-9=NA; 1='0'; 2:6='1'", as.factor = T)
table(anes2016$MinorityRace)
##
## 0 1
## 3038 1200
sub1<-dplyr::select(anes2016, SelfratedHlth, Income, Generation, Employed, CollegeEd, MinorityRace) %>%
filter(complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(nest = TRUE, ids=~anes2016$V160202, strata =~anes2016$V160201, weights =~anes2016$V160101, data=anes2016)
fit.logit1<-svyglm(SelfratedHlth~ Income+Generation+Employed+CollegeEd+MinorityRace,
design= des,
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit1)
##
## Call:
## svyglm(formula = SelfratedHlth ~ Income + Generation + Employed +
## CollegeEd + MinorityRace, design = des, family = binomial)
##
## Survey design:
## svydesign(nest = TRUE, ids = ~anes2016$V160202, strata = ~anes2016$V160201,
## weights = ~anes2016$V160101, data = anes2016)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.41778 0.18900 7.501 1.02e-11 ***
## Income49.9k or less -1.22422 0.18287 -6.695 6.54e-10 ***
## Income50k to 99.9k -0.08791 0.20620 -0.426 0.670620
## GenerationMillenial 0.80325 0.15240 5.271 5.77e-07 ***
## GenerationX 0.10448 0.13100 0.798 0.426626
## GenerationZ 1.58992 0.46924 3.388 0.000941 ***
## Employed1 0.77979 0.12082 6.454 2.17e-09 ***
## CollegeEd1 0.49084 0.11576 4.240 4.31e-05 ***
## MinorityRace1 -0.10761 0.12884 -0.835 0.405166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.082511)
##
## Number of Fisher Scoring iterations: 5
summary(anes2016[, c("SelfratedHlth", "Income", "Generation", "Employed", "CollegeEd", "MinorityRace")])
## SelfratedHlth Income Generation Employed CollegeEd
## Negative: 750 100k to 150k : 988 BabyBoomer:1473 0 :1708 0 :1991
## Positive:3513 49.9k or less:1809 Millenial :1051 1 :2547 1 :2236
## NA's : 8 50k to 99.9k :1272 X :1046 NA's: 16 NA's: 44
## NA's : 202 Z : 67
## NA's : 634
## MinorityRace
## 0 :3038
## 1 :1200
## NA's: 33
##
##
The pattern of missingness for each indicator is shown above, I tanslate these into percentages of the sample missing:
Self-rated health has the lowest number of missing cases among these indicators.
# Self-rated health
table(anes2016$SelfratedHlth)
##
## Negative Positive
## 750 3513
# most common value
mcv.SelfratedHlth<-factor(names(which.max(table(anes2016$SelfratedHlth))), levels=levels(anes2016$SelfratedHlth))
mcv.SelfratedHlth
## [1] Positive
## Levels: Negative Positive
# Imputed cases and proportion
anes2016$SelfratedHlth.imp<-as.factor(ifelse(is.na(anes2016$SelfratedHlth)==T, mcv.SelfratedHlth, anes2016$SelfratedHlth))
levels(anes2016$SelfratedHlth.imp)<-levels(anes2016$SelfratedHlth)
prop.table(table(anes2016$SelfratedHlth))
##
## Negative Positive
## 0.1759324 0.8240676
prop.table(table(anes2016$SelfratedHlth.imp))
##
## Negative Positive
## 0.1756029 0.8243971
barplot(prop.table(table(anes2016$SelfratedHlth)), main="Original Data", ylim=c(0, 1))
barplot(prop.table(table(anes2016$SelfratedHlth.imp)), main="Imputed Data", ylim=c(0, 1))
table(anes2016$Income)
##
## 100k to 150k 49.9k or less 50k to 99.9k
## 988 1809 1272
mcv.Income<-factor(names(which.max(table(anes2016$Income))), levels=levels(anes2016$Income))
mcv.Income
## [1] 49.9k or less
## Levels: 100k to 150k 49.9k or less 50k to 99.9k
anes2016$Income.imp<-as.factor(ifelse(is.na(anes2016$Income)==T, mcv.Income, anes2016$Income))
levels(anes2016$Income.imp)<-levels(anes2016$Income)
prop.table(table(anes2016$Income))
##
## 100k to 150k 49.9k or less 50k to 99.9k
## 0.2428115 0.4445810 0.3126075
prop.table(table(anes2016$Income))
##
## 100k to 150k 49.9k or less 50k to 99.9k
## 0.2428115 0.4445810 0.3126075
barplot(prop.table(table(anes2016$Income)), main="Original Data", ylim=c(0, 1))
barplot(prop.table(table(anes2016$Income.imp)), main="Imputed Data", ylim=c(0, 1))
table(anes2016$Generation)
##
## BabyBoomer Millenial X Z
## 1473 1051 1046 67
mcv.Generation<-factor(names(which.max(table(anes2016$Generation))), levels=levels(anes2016$Generation))
mcv.Generation
## [1] BabyBoomer
## Levels: BabyBoomer Millenial X Z
anes2016$Generation.imp<-as.factor(ifelse(is.na(anes2016$Generation)==T, mcv.Generation, anes2016$Generation))
levels(anes2016$Generation.imp)<-levels(anes2016$Generation)
prop.table(table(anes2016$Generation))
##
## BabyBoomer Millenial X Z
## 0.40500412 0.28897443 0.28759967 0.01842178
prop.table(table(anes2016$Generation))
##
## BabyBoomer Millenial X Z
## 0.40500412 0.28897443 0.28759967 0.01842178
barplot(prop.table(table(anes2016$Generation)), main="Original Data", ylim=c(0, 1))
barplot(prop.table(table(anes2016$Generation.imp)), main="Imputed Data", ylim=c(0, 1))
table(anes2016$Employed)
##
## 0 1
## 1708 2547
mcv.Employed<-factor(names(which.max(table(anes2016$Employed))), levels=levels(anes2016$Employed))
mcv.Employed
## [1] 1
## Levels: 0 1
anes2016$Employed.imp<-as.factor(ifelse(is.na(anes2016$Employed)==T, mcv.Employed, anes2016$Employed))
levels(anes2016$Employed.imp)<-levels(anes2016$Employed)
prop.table(table(anes2016$Employed))
##
## 0 1
## 0.4014101 0.5985899
prop.table(table(anes2016$Employed))
##
## 0 1
## 0.4014101 0.5985899
barplot(prop.table(table(anes2016$Employed)), main="Original Data", ylim=c(0, 1))
barplot(prop.table(table(anes2016$Employed.imp)), main="Imputed Data", ylim=c(0, 1))
table(anes2016$CollegeEd)
##
## 0 1
## 1991 2236
mcv.CollegeEd<-factor(names(which.max(table(anes2016$CollegeEd))), levels=levels(anes2016$CollegeEd))
mcv.CollegeEd
## [1] 1
## Levels: 0 1
anes2016$CollegeEd.imp<-as.factor(ifelse(is.na(anes2016$CollegeEd)==T, mcv.CollegeEd, anes2016$CollegeEd))
levels(anes2016$CollegeEd.imp)<-levels(anes2016$CollegeEd)
prop.table(table(anes2016$CollegeEd))
##
## 0 1
## 0.4710196 0.5289804
prop.table(table(anes2016$CollegeEd))
##
## 0 1
## 0.4710196 0.5289804
barplot(prop.table(table(anes2016$CollegeEd)), main="Original Data", ylim=c(0, 1))
barplot(prop.table(table(anes2016$CollegeEd.imp)), main="Imputed Data", ylim=c(0, 1))
table(anes2016$MinorityRace)
##
## 0 1
## 3038 1200
mcv.MinorityRace<-factor(names(which.max(table(anes2016$MinorityRace))), levels=levels(anes2016$MinorityRace))
mcv.MinorityRace
## [1] 0
## Levels: 0 1
anes2016$MinorityRace.imp<-as.factor(ifelse(is.na(anes2016$MinorityRace)==T, mcv.MinorityRace, anes2016$MinorityRace))
levels(anes2016$MinorityRace.imp)<-levels(anes2016$MinorityRace)
prop.table(table(anes2016$MinorityRace))
##
## 0 1
## 0.7168476 0.2831524
prop.table(table(anes2016$MinorityRace))
##
## 0 1
## 0.7168476 0.2831524
barplot(prop.table(table(anes2016$MinorityRace)), main="Original Data", ylim=c(0, 1))
barplot(prop.table(table(anes2016$MinorityRace.imp)), main="Imputed Data", ylim=c(0, 1))
sub2<-dplyr::select(anes2016, SelfratedHlth.imp, Income.imp, Generation.imp, Employed.imp, CollegeEd.imp, MinorityRace.imp) %>%
filter(complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(nest = TRUE, ids=~anes2016$V160202, strata =~anes2016$V160201, weights =~anes2016$V160101, data=anes2016)
fit.logit2<-svyglm(SelfratedHlth.imp~ Income.imp+Generation.imp+Employed.imp+CollegeEd.imp+MinorityRace.imp,
design= des,
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit2)
##
## Call:
## svyglm(formula = SelfratedHlth.imp ~ Income.imp + Generation.imp +
## Employed.imp + CollegeEd.imp + MinorityRace.imp, design = des,
## family = binomial)
##
## Survey design:
## svydesign(nest = TRUE, ids = ~anes2016$V160202, strata = ~anes2016$V160201,
## weights = ~anes2016$V160101, data = anes2016)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.35505 0.16302 8.312 1.34e-13 ***
## Income.imp49.9k or less -1.01197 0.15751 -6.425 2.51e-09 ***
## Income.imp50k to 99.9k -0.02984 0.17608 -0.169 0.86569
## Generation.impMillenial 0.65585 0.13737 4.774 4.95e-06 ***
## Generation.impX -0.03743 0.11740 -0.319 0.75038
## Generation.impZ 1.49678 0.45707 3.275 0.00137 **
## Employed.imp1 0.88109 0.10986 8.020 6.50e-13 ***
## CollegeEd.imp1 0.48587 0.10060 4.830 3.92e-06 ***
## MinorityRace.imp1 -0.15367 0.10820 -1.420 0.15803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.027584)
##
## Number of Fisher Scoring iterations: 5
The dispersion parameter for model 2 (fit.logit2) using the imputed data is lower than the dispersion parameter of model 1 - difference -0.054927. Therefore, modal imputation improved the model fit of this analysis.
md.pattern(anes2016[,c("SelfratedHlth", "Income", "Generation", "Employed", "CollegeEd", "MinorityRace")])
## SelfratedHlth Employed MinorityRace CollegeEd Income Generation
## 3494 1 1 1 1 1 1 0
## 522 1 1 1 1 1 0 1
## 103 1 1 1 1 0 1 1
## 75 1 1 1 1 0 0 2
## 20 1 1 1 0 1 1 1
## 9 1 1 1 0 1 0 2
## 2 1 1 1 0 0 1 2
## 2 1 1 1 0 0 0 3
## 12 1 1 0 1 1 1 1
## 3 1 1 0 1 1 0 2
## 7 1 1 0 1 0 0 3
## 1 1 1 0 0 0 0 4
## 1 1 0 1 1 1 0 2
## 1 1 0 1 1 0 1 2
## 1 1 0 1 1 0 0 3
## 1 1 0 1 0 1 1 2
## 3 1 0 1 0 0 0 4
## 2 1 0 0 1 1 0 3
## 1 1 0 0 0 0 1 4
## 3 1 0 0 0 0 0 5
## 2 0 1 1 1 1 1 1
## 1 0 1 1 1 1 0 2
## 1 0 1 1 1 0 0 3
## 1 0 1 0 1 1 1 2
## 1 0 0 0 1 1 0 4
## 2 0 0 0 0 0 0 6
## 8 16 33 44 202 634 937
md.pairs(anes2016[,c("SelfratedHlth", "Income", "Generation", "Employed", "CollegeEd", "MinorityRace")])
## $rr
## SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## SelfratedHlth 4263 4064 3634 4250 4221 4234
## Income 4064 4069 3530 4064 4039 4050
## Generation 3634 3530 3637 3634 3613 3623
## Employed 4250 4064 3634 4255 4221 4231
## CollegeEd 4221 4039 3613 4221 4227 4201
## MinorityRace 4234 4050 3623 4231 4201 4238
##
## $rm
## SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## SelfratedHlth 0 199 629 13 42 29
## Income 5 0 539 5 30 19
## Generation 3 107 0 3 24 14
## Employed 5 191 621 0 34 24
## CollegeEd 6 188 614 6 0 26
## MinorityRace 4 188 615 7 37 0
##
## $mr
## SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## SelfratedHlth 0 5 3 5 6 4
## Income 199 0 107 191 188 188
## Generation 629 539 0 621 614 615
## Employed 13 5 3 0 6 7
## CollegeEd 42 30 24 34 0 37
## MinorityRace 29 19 14 24 26 0
##
## $mm
## SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## SelfratedHlth 8 3 5 3 2 4
## Income 3 202 95 11 14 14
## Generation 5 95 634 13 20 19
## Employed 3 11 13 16 10 9
## CollegeEd 2 14 20 10 44 7
## MinorityRace 4 14 19 9 7 33
dat2<-anes2016
samp2<-sample(1:dim(dat2)[1], replace = F, size = 500)
imp<-mice(data = dat2[,c("SelfratedHlth", "Income", "Generation", "Employed", "CollegeEd", "MinorityRace")], seed = 22, m = 5)
##
## iter imp variable
## 1 1 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 1 2 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 1 3 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 1 4 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 1 5 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 2 1 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 2 2 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 2 3 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 2 4 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 2 5 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 3 1 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 3 2 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 3 3 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 3 4 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 3 5 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 4 1 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 4 2 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 4 3 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 4 4 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 4 5 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 5 1 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 5 2 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 5 3 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 5 4 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 5 5 SelfratedHlth Income Generation Employed CollegeEd MinorityRace
print(imp)
## Class: mids
## Number of multiple imputations: 5
## Imputation methods:
## SelfratedHlth Income Generation Employed CollegeEd
## "logreg" "polyreg" "polyreg" "logreg" "logreg"
## MinorityRace
## "logreg"
## PredictorMatrix:
## SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## SelfratedHlth 0 1 1 1 1 1
## Income 1 0 1 1 1 1
## Generation 1 1 0 1 1 1
## Employed 1 1 1 0 1 1
## CollegeEd 1 1 1 1 0 1
## MinorityRace 1 1 1 1 1 0
head(imp$imp$SelfratedHlth)
## 1 2 3 4 5
## 982 Positive Negative Positive Positive Positive
## 1573 Positive Negative Negative Positive Positive
## 2190 Negative Negative Positive Negative Negative
## 2459 Positive Positive Positive Positive Positive
## 2465 Positive Positive Positive Positive Positive
## 3151 Negative Positive Positive Positive Negative
summary(imp$imp$SelfratedHlth)
## 1 2 3 4 5
## Negative:3 Negative:3 Negative:2 Negative:1 Negative:2
## Positive:5 Positive:5 Positive:6 Positive:7 Positive:6
summary(anes2016$SelfratedHlth)
## Negative Positive NA's
## 750 3513 8
head(imp$imp$Income)
## 1 2 3 4 5
## 23 100k to 150k 49.9k or less 50k to 99.9k 100k to 150k 100k to 150k
## 27 50k to 99.9k 100k to 150k 50k to 99.9k 100k to 150k 50k to 99.9k
## 33 50k to 99.9k 49.9k or less 49.9k or less 50k to 99.9k 49.9k or less
## 41 49.9k or less 50k to 99.9k 49.9k or less 50k to 99.9k 49.9k or less
## 61 49.9k or less 49.9k or less 49.9k or less 49.9k or less 49.9k or less
## 93 49.9k or less 49.9k or less 50k to 99.9k 49.9k or less 100k to 150k
summary(imp$imp$Income)
## 1 2 3 4
## 100k to 150k :54 100k to 150k : 43 100k to 150k :55 100k to 150k :46
## 49.9k or less:96 49.9k or less:102 49.9k or less:92 49.9k or less:91
## 50k to 99.9k :52 50k to 99.9k : 57 50k to 99.9k :55 50k to 99.9k :65
## 5
## 100k to 150k :45
## 49.9k or less:87
## 50k to 99.9k :70
summary(anes2016$Income)
## 100k to 150k 49.9k or less 50k to 99.9k NA's
## 988 1809 1272 202
head(imp$imp$Generation)
## 1 2 3 4 5
## 12 Millenial BabyBoomer BabyBoomer Z BabyBoomer
## 29 BabyBoomer BabyBoomer BabyBoomer BabyBoomer X
## 33 BabyBoomer BabyBoomer Millenial BabyBoomer Z
## 41 Millenial BabyBoomer X BabyBoomer X
## 57 BabyBoomer X Millenial X Millenial
## 62 BabyBoomer BabyBoomer Z X BabyBoomer
summary(imp$imp$Generation)
## 1 2 3 4
## BabyBoomer:340 BabyBoomer:346 BabyBoomer:338 BabyBoomer:333
## Millenial :148 Millenial :147 Millenial :148 Millenial :137
## X :136 X :121 X :130 X :145
## Z : 10 Z : 20 Z : 18 Z : 19
## 5
## BabyBoomer:353
## Millenial :145
## X :116
## Z : 20
summary(anes2016$Generation)
## BabyBoomer Millenial X Z NA's
## 1473 1051 1046 67 634
head(imp$imp$Employed)
## 1 2 3 4 5
## 982 0 0 0 1 0
## 1224 0 1 1 1 1
## 1799 0 0 1 0 0
## 1844 1 1 0 1 1
## 1885 0 0 1 0 1
## 2076 1 0 1 1 1
summary(imp$imp$Employed)
## 1 2 3 4 5
## 0:7 0: 5 0: 4 0: 3 0:7
## 1:9 1:11 1:12 1:13 1:9
summary(anes2016$Employed)
## 0 1 NA's
## 1708 2547 16
head(imp$imp$CollegeEd)
## 1 2 3 4 5
## 49 0 0 1 1 1
## 132 1 1 0 1 1
## 259 1 0 1 0 0
## 429 1 1 0 0 0
## 691 0 0 1 1 1
## 816 1 0 0 1 1
summary(imp$imp$CollegeEd)
## 1 2 3 4 5
## 0:17 0:24 0:23 0:16 0:21
## 1:27 1:20 1:21 1:28 1:23
summary(anes2016$CollegeEd)
## 0 1 NA's
## 1991 2236 44
head(imp$imp$MinorityRace)
## 1 2 3 4 5
## 572 0 0 1 0 0
## 904 1 0 0 0 0
## 982 1 1 0 0 1
## 1059 0 0 0 0 0
## 1232 0 1 0 0 0
## 1411 0 1 1 0 1
summary(imp$imp$MinorityRace)
## 1 2 3 4 5
## 0:26 0:23 0:20 0:22 0:23
## 1: 7 1:10 1:13 1:11 1:10
summary(anes2016$MinorityRace)
## 0 1 NA's
## 3038 1200 33
library(lattice)
stripplot(imp,SelfratedHlth~Income|.imp, pch=20)
dat.imp <-complete(imp, action = 1)
head(dat.imp, n=10)
## SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 1 Positive 49.9k or less Millenial 1 0 0
## 2 Negative 50k to 99.9k Millenial 1 1 0
## 3 Positive 49.9k or less Millenial 1 0 0
## 4 Positive 50k to 99.9k BabyBoomer 1 0 0
## 5 Positive 49.9k or less X 1 0 0
## 6 Positive 49.9k or less BabyBoomer 1 1 0
## 7 Negative 50k to 99.9k BabyBoomer 1 0 1
## 8 Positive 100k to 150k BabyBoomer 1 0 0
## 9 Positive 49.9k or less X 1 0 1
## 10 Positive 50k to 99.9k Millenial 1 0 0
head(anes2016[,c("SelfratedHlth", "Income", "Generation", "Employed", "CollegeEd", "MinorityRace")], n=10)
## # A tibble: 10 x 6
## SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## <fct> <fct> <fct> <fct> <fct> <fct>
## 1 Positive 49.9k or less Millenial 1 0 0
## 2 Negative 50k to 99.9k Millenial 1 1 0
## 3 Positive 49.9k or less Millenial 1 0 0
## 4 Positive 50k to 99.9k BabyBoomer 1 0 0
## 5 Positive 49.9k or less X 1 0 0
## 6 Positive 49.9k or less BabyBoomer 1 1 0
## 7 Negative 50k to 99.9k BabyBoomer 1 0 1
## 8 Positive 100k to 150k BabyBoomer 1 0 0
## 9 Positive 49.9k or less X 1 0 1
## 10 Positive 50k to 99.9k Millenial 1 0 0
head(dat.imp[is.na(anes2016$SelfratedHlth)==T,], n=10)
## SelfratedHlth Income Generation Employed CollegeEd MinorityRace
## 982 Positive 49.9k or less BabyBoomer 0 1 1
## 1573 Positive 49.9k or less BabyBoomer 0 1 0
## 2190 Negative 49.9k or less BabyBoomer 0 1 0
## 2459 Positive 50k to 99.9k Millenial 1 1 0
## 2465 Positive 100k to 150k Millenial 1 1 0
## 3151 Negative 49.9k or less BabyBoomer 1 0 1
## 3788 Positive 49.9k or less X 1 0 1
## 3922 Negative 49.9k or less BabyBoomer 0 1 0
fit.SelfratedHlth<-with(data=imp ,expr=svyglm(SelfratedHlth~ Income+Generation+Employed+CollegeEd+MinorityRace,
design= des,
family=binomial))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.SelfratedHlth)
## # A tibble: 45 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.42 0.189 7.50 1.02e-11
## 2 Income49.9k or less -1.22 0.183 -6.69 6.54e-10
## 3 Income50k to 99.9k -0.0879 0.206 -0.426 6.71e- 1
## 4 GenerationMillenial 0.803 0.152 5.27 5.77e- 7
## 5 GenerationX 0.104 0.131 0.798 4.27e- 1
## 6 GenerationZ 1.59 0.469 3.39 9.41e- 4
## 7 Employed1 0.780 0.121 6.45 2.17e- 9
## 8 CollegeEd1 0.491 0.116 4.24 4.31e- 5
## 9 MinorityRace1 -0.108 0.129 -0.835 4.05e- 1
## 10 (Intercept) 1.42 0.189 7.50 1.02e-11
## # ... with 35 more rows
fit.imp<-svyglm(SelfratedHlth~ Income+Generation+Employed+CollegeEd+MinorityRace, design= des,
family=binomial, data=dat.imp)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.imp)
##
## Call:
## svyglm(formula = SelfratedHlth ~ Income + Generation + Employed +
## CollegeEd + MinorityRace, design = des, family = binomial,
## data = dat.imp)
##
## Survey design:
## svydesign(nest = TRUE, ids = ~anes2016$V160202, strata = ~anes2016$V160201,
## weights = ~anes2016$V160101, data = anes2016)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.41778 0.18900 7.501 1.02e-11 ***
## Income49.9k or less -1.22422 0.18287 -6.695 6.54e-10 ***
## Income50k to 99.9k -0.08791 0.20620 -0.426 0.670620
## GenerationMillenial 0.80325 0.15240 5.271 5.77e-07 ***
## GenerationX 0.10448 0.13100 0.798 0.426626
## GenerationZ 1.58992 0.46924 3.388 0.000941 ***
## Employed1 0.77979 0.12082 6.454 2.17e-09 ***
## CollegeEd1 0.49084 0.11576 4.240 4.31e-05 ***
## MinorityRace1 -0.10761 0.12884 -0.835 0.405166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.082511)
##
## Number of Fisher Scoring iterations: 5
summary(fit.logit1)
##
## Call:
## svyglm(formula = SelfratedHlth ~ Income + Generation + Employed +
## CollegeEd + MinorityRace, design = des, family = binomial)
##
## Survey design:
## svydesign(nest = TRUE, ids = ~anes2016$V160202, strata = ~anes2016$V160201,
## weights = ~anes2016$V160101, data = anes2016)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.41778 0.18900 7.501 1.02e-11 ***
## Income49.9k or less -1.22422 0.18287 -6.695 6.54e-10 ***
## Income50k to 99.9k -0.08791 0.20620 -0.426 0.670620
## GenerationMillenial 0.80325 0.15240 5.271 5.77e-07 ***
## GenerationX 0.10448 0.13100 0.798 0.426626
## GenerationZ 1.58992 0.46924 3.388 0.000941 ***
## Employed1 0.77979 0.12082 6.454 2.17e-09 ***
## CollegeEd1 0.49084 0.11576 4.240 4.31e-05 ***
## MinorityRace1 -0.10761 0.12884 -0.835 0.405166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.082511)
##
## Number of Fisher Scoring iterations: 5
summary(fit.logit2)
##
## Call:
## svyglm(formula = SelfratedHlth.imp ~ Income.imp + Generation.imp +
## Employed.imp + CollegeEd.imp + MinorityRace.imp, design = des,
## family = binomial)
##
## Survey design:
## svydesign(nest = TRUE, ids = ~anes2016$V160202, strata = ~anes2016$V160201,
## weights = ~anes2016$V160101, data = anes2016)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.35505 0.16302 8.312 1.34e-13 ***
## Income.imp49.9k or less -1.01197 0.15751 -6.425 2.51e-09 ***
## Income.imp50k to 99.9k -0.02984 0.17608 -0.169 0.86569
## Generation.impMillenial 0.65585 0.13737 4.774 4.95e-06 ***
## Generation.impX -0.03743 0.11740 -0.319 0.75038
## Generation.impZ 1.49678 0.45707 3.275 0.00137 **
## Employed.imp1 0.88109 0.10986 8.020 6.50e-13 ***
## CollegeEd.imp1 0.48587 0.10060 4.830 3.92e-06 ***
## MinorityRace.imp1 -0.15367 0.10820 -1.420 0.15803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.027584)
##
## Number of Fisher Scoring iterations: 5
After attempting the multiple imputation I received an output that showed no difference from the original data (fit.logit1 & fit.imp). The modal imputation provided the best results for the data used in the analysis. Model 3 (fit.logit2) has a dispersion parameter closest to 1, and provides the best model fit to measure the variation in the outcome.