library(car)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)
library(haven)

dat<-read_xpt("C:/Users/Monica/Documents/CSparks_StatsII/CNTY05.xpt")

a) My outcome variable will be as the self-reporting of number of poor mental health days in the past month. I will recode the variable as a binary variable in the following manner: a value of 88==0 (for NO) and a value of 1 - 30== 1 (for YES)

b) My research question is as follows: How do education level, employment status, marital status affect the mental health of an individual?

c) My predictor variables are 1) education level and 2) employment status and 3) marital status

Rename variables

nams<-names(dat)
head(nams, n=10)

##  [1] "_CNTYNAM" "_STATE"   "FMONTH"   "IDATE"    "IMONTH"   "IDAY"    
##  [7] "IYEAR"    "INTVID"   "DISPCODE" "SEQNO"

myvariables<-tolower(names(dat))
names(dat)<-myvariables

Recode for poor mental health (outcome variable)

dat$poormenhlth<-recode (dat$menthlth,recodes= "1:30=1; 88=0; else=NA")

Recode for education level

dat$educ<-recode(dat$educa, recodes="1:2='Primary'; 3='SomeHS'; 4='HSGrad'; 5='SomeCollege'; 6='CollegeGrad';9=NA", as.factor.result=T)
dat$educ<-relevel(dat$educ, ref='HSGrad')

Recode for employment status

dat$myemploy<-recode(dat$employ, recodes="1:2='Employed'; 3:4='Unemployed'; 5:8= 'NoLaborForce';  else=NA", as.factor.result=T)
dat$myemploy<-relevel(dat$myemploy, ref='Employed')

Recode for marital status

dat$marst<-recode(dat$marital, recodes="1='Married'; 2='Divorced'; 3='Widowed'; 4='Separated'; 5='NevrMarried';6='Cohab'; else=NA", as.factor.result=T)
dat$marst<-relevel(dat$marst, ref='Married')

Create a subset

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

dat$strata<-dat$`_ststr`
dat$weight<-dat$`_cntywt`
sub<-dat%>%
  select(poormenhlth, educ, myemploy, marst, weight, strata) %>%
  filter( complete.cases(.))

Create my survey design

options(survey.lonely.psu = "adjust")
mydesign<-svydesign(ids=~1, strata=~strata, weights=~weight, data =sub )

Examine percent of respondents with poor mental health by education level, perform survey-corrected chi-square test and plot it

library(ggplot2)
mysurvey1<-svyby(formula = ~poormenhlth, by = ~educ, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+educ, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + educ, design = mydesign)
## F = 20.848, ndf = 3.3926e+00, ddf = 5.6671e+05, p-value =
## 5.902e-15

qplot(x=mysurvey1$educ,y=mysurvey1$poormenhlth, data=mysurvey1, xlab="Education", ylab="%  Poor Mental Health" )+
geom_errorbar(aes(x=educ, ymin=poormenhlth-se,ymax= poormenhlth+se), width=.25)+
ggtitle(label = "Percent of Adults with Poor Mental Health by Education")

Examine percent of respondents with poor mental health by employment status, perform survey-corrected chi-square test and plot it

mysurvey2<-svyby(formula = ~poormenhlth, by = ~myemploy, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+myemploy, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + myemploy, design = mydesign)
## F = 66.783, ndf = 1.9909e+00, ddf = 3.3257e+05, p-value < 2.2e-16

qplot(x=mysurvey2$myemploy,y=mysurvey2$poormenhlth, data=mysurvey2 ,xlab="Employment Status", ylab="%  Poor Mental Health" )+
geom_errorbar(aes(x=myemploy, ymin=poormenhlth-se,ymax= poormenhlth+se), width=.25)+
ggtitle(label = "Percent of Adults with Poor Mental Health by Employment Status")

calculate employmenteducationhealth cross tabulation

bothsurveys<-svyby(formula = ~poormenhlth, by = ~myemploy+educ, design = mydesign, FUN = svymean, na.rm=T)
bothsurveys

##                              myemploy        educ poormenhlth          se
## Employed.HSGrad              Employed      HSGrad   0.3362184 0.007006330
## NoLaborForce.HSGrad      NoLaborForce      HSGrad   0.3231667 0.008036631
## Unemployed.HSGrad          Unemployed      HSGrad   0.4623141 0.023189999
## Employed.CollegeGrad         Employed CollegeGrad   0.3250395 0.004441163
## NoLaborForce.CollegeGrad NoLaborForce CollegeGrad   0.2869986 0.007457568
## Unemployed.CollegeGrad     Unemployed CollegeGrad   0.4652997 0.023385239
## Employed.Primary             Employed     Primary   0.3081067 0.029992250
## NoLaborForce.Primary     NoLaborForce     Primary   0.3040069 0.023214393
## Unemployed.Primary         Unemployed     Primary   0.3294651 0.060526409
## Employed.SomeCollege         Employed SomeCollege   0.3748453 0.006657592
## NoLaborForce.SomeCollege NoLaborForce SomeCollege   0.3717068 0.008475852
## Unemployed.SomeCollege     Unemployed SomeCollege   0.5297304 0.024756010
## Employed.SomeHS              Employed      SomeHS   0.3813339 0.019516172
## NoLaborForce.SomeHS      NoLaborForce      SomeHS   0.3922970 0.016722219
## Unemployed.SomeHS          Unemployed      SomeHS   0.4915616 0.036607150

Plot employmenteducationhealth cross tabulation

bothsurveys$myemploy<-rep(c("Employed","NoLaborForce", "Unemployed"),5) bothsurveys$educ<-factor(c(rep(“Primary Sch”, 5), rep(“LT HS”, 5), rep(“HS Grad”, 5), rep(“Some College”, 5), rep(“College Grad”, 5)), ordered = T) #fix the order of the education factor levels bothsurveys$educ<-factor(bothsurveys$educ, levels(bothsurveys$educ)[c(4,3,2,5,1)])

crosstabplot<-ggplot(bothsurveys, aes(educ, poormenhlth),xlab="Employment Status", ylab="Percent Poor Mental Health")
crosstabplot<-crosstabplot+geom_point(aes(colour=myemploy))
crosstabplot<-crosstabplot+geom_line(aes(colour=myemploy,group=myemploy))
crosstabplot<-crosstabplot+ylab("Percent Poor Mental Health")
crosstabplot<-crosstabplot+xlab("Education Level")
crosstabplot+ggtitle("Percent of Adults in 2005 with Poor Mental Health by Employment Status and Education")

Create Logit Mode on poor mental health by employment, education and marital status

fit.mylogit<-svyglm(poormenhlth~myemploy+educ+marst,design= mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

Present model coefficients

stargazer(fit.mylogit, type= "text", style= "demography", covariate.labels=c("employed", "unemployed", "not in labor force", "some hs", "hs grad", "some college", "college grad", "married", "divorced", "widowed", "separated", "not married", "cohabitating"))

## 
## ---------------------------------
##                      poormenhlth 
## ---------------------------------
## employed                -0.003   
##                        (0.027)   
## unemployed             0.439***  
##                        (0.055)   
## not in labor force      -0.022   
##                        (0.030)   
## some hs                 -0.144   
##                        (0.090)   
## hs grad                0.193***  
##                        (0.032)   
## some college           0.168**   
##                        (0.056)   
## college grad           0.566***  
##                        (0.067)   
## married                0.351***  
##                        (0.035)   
## divorced               0.551***  
##                        (0.032)   
## widowed                0.698***  
##                        (0.077)   
## separated             -0.159***  
##                        (0.044)   
## not married           -0.893***  
##                        (0.028)   
## N                      167,573   
## Log Likelihood       -97,856.260 
## AIC                  195,736.500 
## ---------------------------------
## *p < .05; **p < .01; ***p < .001

Present model marginal effects in data frame—WHAT IS m.logit?

log.marg<-coef(fit.mylogit)*mean(dlogis(predict(fit.mylogit)), na.rm=T)
data.frame(m.logit=log.marg)

##                            m.logit
## (Intercept)          -0.1962785224
## myemployNoLaborForce -0.0006859134
## myemployUnemployed    0.0965905611
## educCollegeGrad      -0.0048834859
## educPrimary          -0.0316257423
## educSomeCollege       0.0424232451
## educSomeHS            0.0368815055
## marstCohab            0.1243023696
## marstDivorced         0.0771182531
## marstNevrMarried      0.1211819894
## marstSeparated        0.1533335353
## marstWidowed         -0.0348449824

Create a series of predicted probabilities for different “types” of people

mynewdat<-expand.grid(myemploy=levels(dat$myemploy), educ=levels(dat$educ), marst=levels(dat$marst))

Generate the fitted values, add values to fake data and print the first 20 probabilities

fit<-predict(fit.mylogit, newdat=mynewdat, type= "response")
mynewdat$fitted.prob.lrm<-round(fit,3)
head(mynewdat, n=20)

##        myemploy        educ   marst fitted.prob.lrm
## 1      Employed      HSGrad Married           0.290
## 2  NoLaborForce      HSGrad Married           0.290
## 3    Unemployed      HSGrad Married           0.389
## 4      Employed CollegeGrad Married           0.286
## 5  NoLaborForce CollegeGrad Married           0.285
## 6    Unemployed CollegeGrad Married           0.383
## 7      Employed     Primary Married           0.262
## 8  NoLaborForce     Primary Married           0.261
## 9    Unemployed     Primary Married           0.355
## 10     Employed SomeCollege Married           0.332
## 11 NoLaborForce SomeCollege Married           0.331
## 12   Unemployed SomeCollege Married           0.435
## 13     Employed      SomeHS Married           0.326
## 14 NoLaborForce      SomeHS Married           0.326
## 15   Unemployed      SomeHS Married           0.429
## 16     Employed      HSGrad   Cohab           0.419
## 17 NoLaborForce      HSGrad   Cohab           0.418
## 18   Unemployed      HSGrad   Cohab           0.528
## 19     Employed CollegeGrad   Cohab           0.413
## 20 NoLaborForce CollegeGrad   Cohab           0.413

Nested model comparisons

Fit logit 1 for employment

fit.logit1<-svyglm(poormenhlth~myemploy, design=mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit1)

## 
## Call:
## svyglm(formula = poormenhlth ~ myemploy, design = mydesign, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.65200    0.01489 -43.787   <2e-16 ***
## myemployNoLaborForce -0.05107    0.02511  -2.033    0.042 *  
## myemployUnemployed    0.55521    0.05320  10.437   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.000006)
## 
## Number of Fisher Scoring iterations: 4

Fit logit 2 for employment and education

fit.logit2<-svyglm(poormenhlth~myemploy+educ, design=mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit2)

## 
## Call:
## svyglm(formula = poormenhlth ~ myemploy + educ, design = mydesign, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.67629    0.02577 -26.243  < 2e-16 ***
## myemployNoLaborForce -0.07033    0.02580  -2.726  0.00641 ** 
## myemployUnemployed    0.53129    0.05380   9.875  < 2e-16 ***
## educCollegeGrad      -0.07614    0.02931  -2.598  0.00939 ** 
## educPrimary          -0.13767    0.08994  -1.531  0.12587    
## educSomeCollege       0.18822    0.03192   5.897 3.71e-09 ***
## educSomeHS            0.23232    0.05702   4.074 4.62e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9999852)
## 
## Number of Fisher Scoring iterations: 4

Fit logit 3 for employment, education and marital status

fit.logit3<-svyglm(poormenhlth~myemploy+educ+marst, design=mydesign, family= binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit3)

## 
## Call:
## svyglm(formula = poormenhlth ~ myemploy + educ + marst, design = mydesign, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.893092   0.027866 -32.050  < 2e-16 ***
## myemployNoLaborForce -0.003121   0.026807  -0.116 0.907318    
## myemployUnemployed    0.439499   0.054689   8.036 9.31e-16 ***
## educCollegeGrad      -0.022220   0.029605  -0.751 0.452920    
## educPrimary          -0.143901   0.090177  -1.596 0.110545    
## educSomeCollege       0.193031   0.032331   5.970 2.37e-09 ***
## educSomeHS            0.167816   0.056409   2.975 0.002930 ** 
## marstCohab            0.565592   0.066744   8.474  < 2e-16 ***
## marstDivorced         0.350898   0.034571  10.150  < 2e-16 ***
## marstNevrMarried      0.551394   0.032401  17.018  < 2e-16 ***
## marstSeparated        0.697687   0.077469   9.006  < 2e-16 ***
## marstWidowed         -0.158549   0.044178  -3.589 0.000332 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9999592)
## 
## Number of Fisher Scoring iterations: 4

Odds ratios and confidence values

stargazer(fit.logit1, fit.logit2, fit.logit3,type= "text", style= "demography", coviariate.labels=c("employed", "unemployed", "not in labor force", "some hs", "hs grad", "some college", "college grad", "married", "divorced", "widowed", "separated", "not married", "cohabitating"))

## 
## --------------------------------------------------------
##                                  poormenhlth            
##                        Model 1     Model 2     Model 3  
## --------------------------------------------------------
## myemployNoLaborForce   -0.051*    -0.070**     -0.003   
##                        (0.025)     (0.026)     (0.027)  
## myemployUnemployed    0.555***    0.531***    0.439***  
##                        (0.053)     (0.054)     (0.055)  
## educCollegeGrad                   -0.076**     -0.022   
##                                    (0.029)     (0.030)  
## educPrimary                        -0.138      -0.144   
##                                    (0.090)     (0.090)  
## educSomeCollege                   0.188***    0.193***  
##                                    (0.032)     (0.032)  
## educSomeHS                        0.232***     0.168**  
##                                    (0.057)     (0.056)  
## marstCohab                                    0.566***  
##                                                (0.067)  
## marstDivorced                                 0.351***  
##                                                (0.035)  
## marstNevrMarried                              0.551***  
##                                                (0.032)  
## marstSeparated                                0.698***  
##                                                (0.077)  
## marstWidowed                                  -0.159*** 
##                                                (0.044)  
## Constant              -0.652***   -0.676***   -0.893*** 
##                        (0.015)     (0.026)     (0.028)  
## N                      167,573     167,573     167,573  
## Log Likelihood       -99,277.760 -98,995.430 -97,856.260
## AIC                  198,561.500 198,004.900 195,736.500
## --------------------------------------------------------
## *p < .05; **p < .01; ***p < .001                        
## 
## --------------------------------------------------------------------------------------------------------------------------------------------
## employed unemployed not in labor force some hs hs grad some college college grad married divorced widowed separated not married cohabitating
## --------------------------------------------------------------------------------------------------------------------------------------------

Homework II

Monica Cruz

February 3, 2018