library(car)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)
library(haven)

dat<-read_xpt("C:/Users/Monica/Documents/CSparks_StatsII/CNTY05.xpt")

a) My outcome variable will be the self-reported number of poor mental health days in the past month. I will recode the variable as a binary variable in the following manner: a value of 88==0 (for NO) and a value of 1 - 30== 1 (for YES)

b) My research questions are as follows: How does intimate partner threat affect one’s mental health? How does being hurt by your intimate partner affect one’s mental health? Does being threatened only affect one’s mental health as much as being threatened and hurt?

c) My predictor variables are 1) intimate partner threat and 2) intimate partner hurt

d) Covariates I will use are marital status, education, and employment

Rename variables

nams<-names(dat)
head(nams, n=10)

##  [1] "_CNTYNAM" "_STATE"   "FMONTH"   "IDATE"    "IMONTH"   "IDAY"    
##  [7] "IYEAR"    "INTVID"   "DISPCODE" "SEQNO"

myvariables<-tolower(names(dat))
names(dat)<-myvariables

Recode for poor mental health (outcome variable)

dat$poormenhlth<-recode (dat$menthlth,recodes= "1:30=1; 88=0; else=NA")

Recode for intimate partner threat (predictor variable)

dat$threat<-recode (dat$ipvthrat, recodes = "7:9=NA; 1='1YESthreat'; 2='0NOthreat'")

Recode for intimate partner hurt (predictor variable)

dat$hurt<-recode(dat$ipvphhrt, recodes = "7:9=NA; 1='1YEShurt'; 2='0NOhurt'")

Recode for marital status

dat$marst<-recode(dat$marital, recodes="1='Married'; 2='Divorced'; 3='Widowed'; 4='Separated'; 5='NevrMarried';6='Cohab'; else=NA", as.factor.result=T)
dat$marst<-relevel(dat$marst, ref='Married')

Recode for education level

dat$educ<-recode(dat$educa, recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor.result=T)
dat$educ<-relevel(dat$educ, ref='2hsgrad')

Recode for employment status

dat$myemploy<-recode(dat$employ, recodes="1:2='Employed'; 3:4='Not Employed'; 5:8= 'nilf'; else=NA", as.factor.result=T)
dat$myemploy<-relevel(dat$myemploy, ref= 'Employed')

Create a subset

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

dat$strata<-dat$`_ststr`
dat$weight<-dat$`_cntywt`
sub<-dat%>%
  select(poormenhlth, threat, hurt, marst, educ, myemploy, weight, strata) %>%
  filter( complete.cases(.))

Create my survey design

options(survey.lonely.psu = "adjust")
mydesign<-svydesign(ids=~1, strata=~strata, weights=~weight, data =sub )

Examine percent of respondents with poor mental health by intimate partner threat, perform survey-corrected chi-square test and plot it

library(ggplot2)
mysurvey1<-svyby(formula = ~poormenhlth, by = ~threat, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+threat, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + threat, design = mydesign)
## F = 217.46, ndf = 1, ddf = 29519, p-value < 2.2e-16

qplot(x=mysurvey1$threat,y=mysurvey1$poormenhlth, data=mysurvey1, xlab="Threat", ylab="%  Poor Mental Health" )+
geom_errorbar(aes(x=threat, ymin=poormenhlth-se,ymax= poormenhlth+se), width=.25)+
ggtitle(label = "Percent of Adults with Poor Mental Health who were Threatened")

Examine percent of respondents with poor mental health by intimate partner hurt, perform survey-corrected chi-square test and plot it

mysurvey2<-svyby(formula = ~poormenhlth, by = ~hurt, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+hurt, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + hurt, design = mydesign)
## F = 204.58, ndf = 1, ddf = 29519, p-value < 2.2e-16

qplot(x=mysurvey2$hurt,y=mysurvey2$poormenhlth, data=mysurvey2 ,xlab="Hurt", ylab="%  Poor Mental Health" )+
geom_errorbar(aes(x=hurt, ymin=poormenhlth-se,ymax= poormenhlth+se), width=.25)+
ggtitle(label = "Percent of Adults with Poor Mental Health who were Hurt")

calculate threat, hurt, mental health cross tabulation

bothsurveys<-svyby(formula = ~poormenhlth, by = ~threat+hurt, design = mydesign, FUN = svymean, na.rm=T)
bothsurveys

##                         threat     hurt poormenhlth          se
## 0NOthreat.0NOhurt    0NOthreat  0NOhurt   0.3243498 0.006441257
## 1YESthreat.0NOhurt  1YESthreat  0NOhurt   0.5004859 0.034404278
## 0NOthreat.1YEShurt   0NOthreat 1YEShurt   0.4579675 0.028064941
## 1YESthreat.1YEShurt 1YESthreat 1YEShurt   0.5534024 0.014820587

Based on the cross tabulation above, it appears that 50% of the individuals who are threatened report poor mental health compared to 55% of those who are threatened and hurt. These two are nearly the same.

??? Bill– not sure if next two chunks of code are correct.

Create Logit Mode on poor mental health by threat, hurt, employment, education and marital status

fit.mylogit<-svyglm(poormenhlth~threat+hurt+marst+educ+myemploy,design= mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

??? Bill, I do not include labels for my reference groups (married and HS Grad). Is that correct? Not sure what to do about employment. I think employment is mislabeled.

Present model coefficients (Bill– not sure my labels are correct)

stargazer(fit.mylogit, type= "text", style= "demography", covariate.labels=c("Yes Threat", "Yes Hurt", "Divorced", "Widowed", "Separated", "Not married", "Cohabitating","PrimarySchool", "SomeHS","SomeColl", "CollGrad", "not employed"))

## 
## ---------------------------------
##                       poormenhlth
## ---------------------------------
## Yes Threat             0.517***  
##                         (0.095)  
## Yes Hurt               0.410***  
##                         (0.093)  
## Divorced               0.514***  
##                         (0.144)  
## Widowed                 0.211**  
##                         (0.075)  
## Separated              0.495***  
##                         (0.072)  
## Not married              0.330   
##                         (0.213)  
## Cohabitating            -0.189*  
##                         (0.093)  
## PrimarySchool            0.101   
##                         (0.206)  
## SomeHS                   0.169   
##                         (0.127)  
## SomeColl                 0.111   
##                         (0.069)  
## CollGrad                -0.005   
##                         (0.066)  
## not employed             0.024   
##                         (0.057)  
## myemployNot Employed     0.253   
##                         (0.140)  
## Constant               -0.928*** 
##                         (0.061)  
## N                       29,668   
## Log Likelihood        -17,361.900
## AIC                   34,751.800 
## ---------------------------------
## *p < .05; **p < .01; ***p < .001

For section above, those who were threatened and hurt were more likely to experience poor mental health than those who were not, my reference group for marital status is “married” so those who are divorced, widowed, separated are more likely to experience poor mental health than those who are married (my reference group). Those who are College graduates are less likely to experience poor mental health than those who have are HS graduates (my reference group). Regarding employment, I don’t understand the output here– not sure why it came out that way.

Present model marginal effects in data frame

log.marg<-coef(fit.mylogit)*mean(dlogis(predict(fit.mylogit)), na.rm=T)
data.frame(m.logit=log.marg)

##                           m.logit
## (Intercept)          -0.203956010
## threat1YESthreat      0.113690840
## hurt1YEShurt          0.090020010
## marstCohab            0.112997681
## marstDivorced         0.046432664
## marstNevrMarried      0.108729708
## marstSeparated        0.072596959
## marstWidowed         -0.041434007
## educ0Prim             0.022269143
## educ1somehs           0.037127863
## educ3somecol          0.024424546
## educ4colgrad         -0.001064305
## myemploynilf          0.005337515
## myemployNot Employed  0.055650649

Nested model comparisons

Fit logit 1 for threat

fit.logit1<-svyglm(poormenhlth~threat, design=mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit1)

## 
## Call:
## svyglm(formula = poormenhlth ~ threat, design = mydesign, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -0.71095    0.02848  -24.96   <2e-16 ***
## threat1YESthreat  0.89334    0.06186   14.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.000034)
## 
## Number of Fisher Scoring iterations: 4

Fit logit 2 for threat and hurt

fit.logit2<-svyglm(poormenhlth~threat+hurt, design=mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit2)

## 
## Call:
## svyglm(formula = poormenhlth ~ threat + hurt, design = mydesign, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -0.72846    0.02903 -25.090  < 2e-16 ***
## threat1YESthreat  0.53938    0.09592   5.623 1.89e-08 ***
## hurt1YEShurt      0.43754    0.09348   4.681 2.87e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9999191)
## 
## Number of Fisher Scoring iterations: 4

Fit logit 3 for threat, hurt, marital status, education, and employment

fit.logit3<-svyglm(poormenhlth~threat+hurt+marst+educ+myemploy, design=mydesign, family= binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit3)

## 
## Call:
## svyglm(formula = poormenhlth ~ threat + hurt + marst + educ + 
##     myemploy, design = mydesign, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.928287   0.060672 -15.300  < 2e-16 ***
## threat1YESthreat      0.517453   0.095337   5.428 5.76e-08 ***
## hurt1YEShurt          0.409718   0.093358   4.389 1.14e-05 ***
## marstCohab            0.514298   0.143642   3.580 0.000344 ***
## marstDivorced         0.211334   0.075202   2.810 0.004954 ** 
## marstNevrMarried      0.494873   0.072063   6.867 6.67e-12 ***
## marstSeparated        0.330418   0.213412   1.548 0.121570    
## marstWidowed         -0.188583   0.093077  -2.026 0.042764 *  
## educ0Prim             0.101356   0.206150   0.492 0.622963    
## educ1somehs           0.168984   0.126907   1.332 0.183018    
## educ3somecol          0.111166   0.068865   1.614 0.106483    
## educ4colgrad         -0.004844   0.065543  -0.074 0.941085    
## myemploynilf          0.024293   0.056634   0.429 0.667962    
## myemployNot Employed  0.253289   0.139882   1.811 0.070193 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.000048)
## 
## Number of Fisher Scoring iterations: 4

Odds ratios and confidence values

stargazer(fit.logit1, fit.logit2, fit.logit3,type= "text", style= "demography", coviariate.labels=c("1Yesthreat", "1YesHurt","Divorced", "Widowed", "Separated", "Not married", "Cohabitating","PrimarySchool", "SomeHS","SomeColl", "CollGrad", "not employed"  ))

## 
## --------------------------------------------------------
##                                  poormenhlth            
##                        Model 1     Model 2     Model 3  
## --------------------------------------------------------
## threat1YESthreat      0.893***    0.539***    0.517***  
##                        (0.062)     (0.096)     (0.095)  
## hurt1YEShurt                      0.438***    0.410***  
##                                    (0.093)     (0.093)  
## marstCohab                                    0.514***  
##                                                (0.144)  
## marstDivorced                                  0.211**  
##                                                (0.075)  
## marstNevrMarried                              0.495***  
##                                                (0.072)  
## marstSeparated                                  0.330   
##                                                (0.213)  
## marstWidowed                                   -0.189*  
##                                                (0.093)  
## educ0Prim                                       0.101   
##                                                (0.206)  
## educ1somehs                                     0.169   
##                                                (0.127)  
## educ3somecol                                    0.111   
##                                                (0.069)  
## educ4colgrad                                   -0.005   
##                                                (0.066)  
## myemploynilf                                    0.024   
##                                                (0.057)  
## myemployNot Employed                            0.253   
##                                                (0.140)  
## Constant              -0.711***   -0.728***   -0.928*** 
##                        (0.028)     (0.029)     (0.061)  
## N                      29,668      29,668      29,668   
## Log Likelihood       -17,582.100 -17,550.890 -17,361.900
## AIC                  35,168.190  35,107.790  34,751.800 
## --------------------------------------------------------
## *p < .05; **p < .01; ***p < .001                        
## 
## ---------------------------------------------------------------------------------------------------------------------------
## 1Yesthreat 1YesHurt Divorced Widowed Separated Not married Cohabitating PrimarySchool SomeHS SomeColl CollGrad not employed
## ---------------------------------------------------------------------------------------------------------------------------

Research Draft

Monica Cruz

March 26, 2018

a) My outcome variable will be the self-reported number of poor mental health days in the past month. I will recode the variable as a binary variable in the following manner: a value of 88==0 (for NO) and a value of 1 - 30== 1 (for YES)

b) My research questions are as follows: How does intimate partner threat affect one’s mental health? How does being hurt by your intimate partner affect one’s mental health? Does being threatened only affect one’s mental health as much as being threatened and hurt?

c) My predictor variables are 1) intimate partner threat and 2) intimate partner hurt

d) Covariates I will use are marital status, education, and employment

Rename variables

Recode for poor mental health (outcome variable)

Recode for intimate partner threat (predictor variable)

Recode for intimate partner hurt (predictor variable)

Recode for marital status

Recode for education level

Recode for employment status

Create a subset

Create my survey design

Examine percent of respondents with poor mental health by intimate partner threat, perform survey-corrected chi-square test and plot it

Examine percent of respondents with poor mental health by intimate partner hurt, perform survey-corrected chi-square test and plot it

calculate threat, hurt, mental health cross tabulation

Based on the cross tabulation above, it appears that 50% of the individuals who are threatened report poor mental health compared to 55% of those who are threatened and hurt. These two are nearly the same.

??? Bill– not sure if next two chunks of code are correct.

Create Logit Mode on poor mental health by threat, hurt, employment, education and marital status

??? Bill, I do not include labels for my reference groups (married and HS Grad). Is that correct? Not sure what to do about employment. I think employment is mislabeled.

Present model coefficients (Bill– not sure my labels are correct)

Present model marginal effects in data frame

Nested model comparisons

Fit logit 1 for threat

Fit logit 2 for threat and hurt

Fit logit 3 for threat, hurt, marital status, education, and employment

Odds ratios and confidence values

??? For Bill– not sure how to best interepret output above regarding model fit. Also not sure whether my labels are correct.