library(car)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)
library(haven)
library(Hmisc)

## Loading required package: lattice

## Loading required package: Formula

## Loading required package: ggplot2

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:questionr':
## 
##     describe, wtd.mean, wtd.table, wtd.var

## The following object is masked from 'package:survey':
## 
##     deff

## The following objects are masked from 'package:base':
## 
##     format.pval, units

dat<-read_xpt("C:/Users/Monica/Documents/CSparks_StatsII/CNTY05.xpt")

a) My outcome variable will be the self-reported number of poor mental health days in the past month. I will recode the variable as a binary variable in the following manner: a value of 88==0 (for NO) and a value of 1 - 30== 1 (for YES)

b) My research questions are as follows: How does intimate partner threat affect one’s mental health? How does being hurt by your intimate partner affect one’s mental health? Does being threatened only affect one’s mental health as much as being threatened and hurt?

c) My predictor variables are 1) intimate partner threat and 2) intimate partner hurt

d) Covariates I will use are marital status, education, and employment

Rename variables

nams<-names(dat)
head(nams, n=10)

##  [1] "_CNTYNAM" "_STATE"   "FMONTH"   "IDATE"    "IMONTH"   "IDAY"    
##  [7] "IYEAR"    "INTVID"   "DISPCODE" "SEQNO"

myvariables<-tolower(names(dat))
names(dat)<-myvariables

Recode for poor mental health (outcome variable)

dat$poormenhlth<-recode (dat$menthlth,recodes= "1:30=1; 88=0; else=NA")

Recode for intimate partner threat (predictor variable)

dat$threat<-recode (dat$ipvthrat, recodes = "7:9=NA; 1='1YESthreat'; 2='0NOthreat'")

Recode for intimate partner hurt (predictor variable)

dat$hurt<-recode(dat$ipvphhrt, recodes = "7:9=NA; 1='1YEShurt'; 2='0NOhurt'")

Recode for sex

dat$sexes<-recode(dat$sex, recodes = "2= '1female'; 1= '0male'")

Recode for marital status

dat$marst<-recode(dat$marital, recodes="1='Married'; 2='Divorced'; 3='Widowed'; 4='Separated'; 5='NevrMarried';6='Cohab'; else=NA", as.factor.result=T)
dat$marst<-relevel(dat$marst, ref='Married')

Recode for education level

dat$educ<-recode(dat$educa, recodes="1:3='1Lesshs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor.result=T)
dat$educ<-relevel(dat$educ, ref='2hsgrad')

Recode for employment status

dat$myemploy<-recode(dat$employ, recodes="1:2='Employed'; 3:4='Not Employed'; 5:8= 'nilf'; else=NA", as.factor.result=T)
dat$myemploy<-relevel(dat$myemploy, ref= 'Employed')

Create a subset

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

dat$strata<-dat$`_ststr`
dat$weight<-dat$`_cntywt`
sub<-dat%>%
  select(poormenhlth, threat, hurt, sexes, marst, educ, myemploy, weight, strata) %>%
  filter( complete.cases(.))

Create my survey design

options(survey.lonely.psu = "adjust")
mydesign<-svydesign(ids=~1, strata=~strata, weights=~weight, data =sub )

Examine percent of respondents with poor mental health by intimate partner threat, perform survey-corrected chi-square test

mysurvey1<-svyby(formula = ~poormenhlth, by = ~threat, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+threat, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + threat, design = mydesign)
## F = 217.46, ndf = 1, ddf = 29519, p-value < 2.2e-16

mysurvey1

##                threat poormenhlth          se
## 0NOthreat   0NOthreat   0.3293885 0.006292014
## 1YESthreat 1YESthreat   0.5454714 0.013612122

Examine percent of respondents with poor mental health by intimate partner hurt, perform survey-corrected chi-square test and plot it

mysurvey2<-svyby(formula = ~poormenhlth, by = ~hurt, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+hurt, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + hurt, design = mydesign)
## F = 204.58, ndf = 1, ddf = 29519, p-value < 2.2e-16

mysurvey2

##              hurt poormenhlth          se
## 0NOhurt   0NOhurt   0.3286200 0.006346877
## 1YEShurt 1YEShurt   0.5326387 0.013148568

calculate threat, hurt, mental health cross tabulation

bothsurveys<-svyby(formula = ~poormenhlth, by = ~threat+hurt, design = mydesign, FUN = svymean, na.rm=T)
bothsurveys

##                         threat     hurt poormenhlth          se
## 0NOthreat.0NOhurt    0NOthreat  0NOhurt   0.3243498 0.006441257
## 1YESthreat.0NOhurt  1YESthreat  0NOhurt   0.5004859 0.034404278
## 0NOthreat.1YEShurt   0NOthreat 1YEShurt   0.4579675 0.028064941
## 1YESthreat.1YEShurt 1YESthreat 1YEShurt   0.5534024 0.014820587

Based on the cross tabulation above, it appears that 50% of the individuals who are threatened report poor mental health compared to 55% of those who are threatened and hurt. These two are nearly the same.

descriptive statistics?? (NOT sure this is correct)

describe(sub$poormenhlth)

## sub$poormenhlth : NUMBER OF DAYS MENTAL HEALTH NOT GOOD 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##    29668        0        2     0.68    10286   0.3467    0.453

describe(sub$threat)

## sub$threat : SEX PARTNER EVER THREATENED VIOLENCE 
##        n  missing distinct 
##    29668        0        2 
##                                 
## Value       0NOthreat 1YESthreat
## Frequency       24946       4722
## Proportion      0.841      0.159

describe(sub$hurt)

## sub$hurt : SEX PARTNER EVER VIOLENT 
##        n  missing distinct 
##    29668        0        2 
##                             
## Value       0NOhurt 1YEShurt
## Frequency     24623     5045
## Proportion     0.83     0.17

describe(sub$sexes)

## sub$sexes : SEX 
##        n  missing distinct 
##    29668        0        2 
##                           
## Value        0male 1female
## Frequency    11731   17937
## Proportion   0.395   0.605

describe(sub$marst)

## sub$marst 
##        n  missing distinct 
##    29668        0        6 
##                                                                       
## Value          Married       Cohab    Divorced NevrMarried   Separated
## Frequency        16402         873        4345        4570         596
## Proportion       0.553       0.029       0.146       0.154       0.020
##                       
## Value          Widowed
## Frequency         2882
## Proportion       0.097

describe(sub$educ)

## sub$educ 
##        n  missing distinct 
##    29668        0        4 
##                                               
## Value       2hsgrad  1Lesshs 3somecol 4colgrad
## Frequency      8176     2400     8250    10842
## Proportion    0.276    0.081    0.278    0.365

describe(sub$myemploy)

## sub$myemploy 
##        n  missing distinct 
##    29668        0        3 
##                                                  
## Value          Employed         nilf Not Employed
## Frequency         17874        10689         1105
## Proportion        0.602        0.360        0.037

mysurvey4<-svyby(formula = ~poormenhlth, by = ~marst, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+marst, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + marst, design = mydesign)
## F = 19.203, ndf = 4.3017e+00, ddf = 1.2698e+05, p-value < 2.2e-16

mysurvey4

##                   marst poormenhlth          se
## Married         Married   0.3185454 0.007007077
## Cohab             Cohab   0.4635775 0.035642445
## Divorced       Divorced   0.4142504 0.015624799
## NevrMarried NevrMarried   0.4442692 0.015744312
## Separated     Separated   0.4403304 0.052974375
## Widowed         Widowed   0.2848179 0.016730074

mysurvey5<-svyby(formula = ~poormenhlth, by = ~educ, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+educ, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + educ, design = mydesign)
## F = 4.9262, ndf = 2.897, ddf = 85516.000, p-value = 0.002299

mysurvey5

##              educ poormenhlth         se
## 2hsgrad   2hsgrad   0.3556348 0.01107765
## 1Lesshs   1Lesshs   0.4052100 0.02416723
## 3somecol 3somecol   0.3801277 0.01082601
## 4colgrad 4colgrad   0.3337087 0.00907718

mysurvey6<-svyby(formula = ~poormenhlth, by = ~myemploy, design = mydesign, FUN = svymean, na.rm=T)
svychisq(~poormenhlth+myemploy, design = mydesign)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~poormenhlth + myemploy, design = mydesign)
## F = 7.5407, ndf = 1.8596, ddf = 54893.0000, p-value = 0.0007461

mysurvey6

##                  myemploy poormenhlth          se
## Employed         Employed   0.3553374 0.007274321
## nilf                 nilf   0.3492284 0.009622810
## Not Employed Not Employed   0.4682325 0.034458155

Create Logit Mode on poor mental health by threat, hurt, employment, education and marital status

fit.mylogit<-svyglm(poormenhlth~threat+hurt+sexes+marst+educ+myemploy,design= mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

fit.mylogit

## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Call:  svyglm(formula = poormenhlth ~ threat + hurt + sexes + marst + 
##     educ + myemploy, design = mydesign, family = binomial)
## 
## Coefficients:
##          (Intercept)      threat1YESthreat          hurt1YEShurt  
##            -1.151437              0.425620              0.396171  
##         sexes1female            marstCohab         marstDivorced  
##             0.482205              0.487083              0.192912  
##     marstNevrMarried        marstSeparated          marstWidowed  
##             0.539102              0.345062             -0.277246  
##          educ1Lesshs          educ3somecol          educ4colgrad  
##             0.182382              0.117225              0.004405  
##         myemploynilf  myemployNot Employed  
##            -0.054640              0.255701  
## 
## Degrees of Freedom: 29667 Total (i.e. Null);  29506 Residual
## Null Deviance:       38740 
## Residual Deviance: 37220     AIC: 34410

Present model coefficients ( not sure my labels are correct)

stargazer(fit.mylogit, type= "text", style= "demography", covariate.labels=c("Intercept", "1YESthreat", "1YESHurt", "1female", "Divorced", "Widowed", "Separated", "Not married", "Cohabitating","Lesshs", "SomeHS","SomeColl", "CollGrad", "Not Employed", "nilf"))

## 
## ---------------------------------
##                     poormenhlth  
## ---------------------------------
## Intercept             0.426***   
##                       (0.097)    
## 1YESthreat            0.396***   
##                       (0.094)    
## 1YESHurt              0.482***   
##                       (0.053)    
## 1female               0.487**    
##                       (0.149)    
## Divorced               0.193*    
##                       (0.076)    
## Widowed               0.539***   
##                       (0.073)    
## Separated              0.345     
##                       (0.200)    
## Not married           -0.277**   
##                       (0.095)    
## Cohabitating           0.182     
##                       (0.113)    
## Lesshs                 0.117     
##                       (0.069)    
## SomeHS                 0.004     
##                       (0.066)    
## SomeColl               -0.055    
##                       (0.057)    
## CollGrad               0.256     
##                       (0.137)    
## Not Employed         -1.151***   
##                       (0.068)    
## N                      29,668    
## Log Likelihood      -17,192.370  
## AIC                  34,412.730  
## ---------------------------------
## *p < .05; **p < .01; ***p < .001

For section above, those who were threatened and hurt were more likely to experience poor mental health than those who were not, my reference group for marital status is “married” so those who are divorced, widowed, separated are more likely to experience poor mental health than those who are married (my reference group). Those who are College graduates are less likely to experience poor mental health than those who have are HS graduates (my reference group).

Present model marginal effects in data frame

log.marg<-coef(fit.mylogit)*mean(dlogis(predict(fit.mylogit)), na.rm=T)
data.frame(m.logit=log.marg)

##                            m.logit
## (Intercept)          -0.2522625055
## threat1YESthreat      0.0932469487
## hurt1YEShurt          0.0867951517
## sexes1female          0.1056438115
## marstCohab            0.1067126786
## marstDivorced         0.0422642077
## marstNevrMarried      0.1181090891
## marstSeparated        0.0755978621
## marstWidowed         -0.0607404311
## educ1Lesshs           0.0399570965
## educ3somecol          0.0256823200
## educ4colgrad          0.0009650096
## myemploynilf         -0.0119708877
## myemployNot Employed  0.0560202423

Nested model comparisons

Fit logit 1 for threat and hurt

fit.logit1<-svyglm(poormenhlth~threat+hurt, design=mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit1)

## 
## Call:
## svyglm(formula = poormenhlth ~ threat + hurt, design = mydesign, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -0.72846    0.02903 -25.090  < 2e-16 ***
## threat1YESthreat  0.53938    0.09592   5.623 1.89e-08 ***
## hurt1YEShurt      0.43754    0.09348   4.681 2.87e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9999191)
## 
## Number of Fisher Scoring iterations: 4

Fit logit 2 for threat, hurt , sex, and marital status

fit.logit2<-svyglm(poormenhlth~threat+hurt+sexes+marst, design=mydesign, family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit2)

## 
## Call:
## svyglm(formula = poormenhlth ~ threat + hurt + sexes + marst, 
##     design = mydesign, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -1.11054    0.04604 -24.120  < 2e-16 ***
## threat1YESthreat  0.43351    0.09691   4.473 7.74e-06 ***
## hurt1YEShurt      0.40579    0.09412   4.312 1.63e-05 ***
## sexes1female      0.47074    0.05323   8.843  < 2e-16 ***
## marstCohab        0.52728    0.15069   3.499 0.000468 ***
## marstDivorced     0.21275    0.07459   2.852 0.004343 ** 
## marstNevrMarried  0.57209    0.07311   7.825 5.24e-15 ***
## marstSeparated    0.38697    0.19956   1.939 0.052490 .  
## marstWidowed     -0.27827    0.09175  -3.033 0.002423 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9993427)
## 
## Number of Fisher Scoring iterations: 4

Fit logit 3 for threat, hurt, sex, marital status, education, and employment

fit.logit3<-svyglm(poormenhlth~threat+hurt+sexes+marst+educ+myemploy, design=mydesign, family= binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

summary(fit.logit3)

## 
## Call:
## svyglm(formula = poormenhlth ~ threat + hurt + sexes + marst + 
##     educ + myemploy, design = mydesign, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~weight, data = sub)
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.151437   0.068012 -16.930  < 2e-16 ***
## threat1YESthreat      0.425620   0.096926   4.391 1.13e-05 ***
## hurt1YEShurt          0.396171   0.094431   4.195 2.73e-05 ***
## sexes1female          0.482205   0.053356   9.038  < 2e-16 ***
## marstCohab            0.487083   0.149020   3.269  0.00108 ** 
## marstDivorced         0.192912   0.075915   2.541  0.01105 *  
## marstNevrMarried      0.539102   0.072704   7.415 1.25e-13 ***
## marstSeparated        0.345062   0.199670   1.728  0.08397 .  
## marstWidowed         -0.277246   0.094916  -2.921  0.00349 ** 
## educ1Lesshs           0.182382   0.112778   1.617  0.10585    
## educ3somecol          0.117225   0.069250   1.693  0.09051 .  
## educ4colgrad          0.004405   0.066366   0.066  0.94708    
## myemploynilf         -0.054640   0.057068  -0.957  0.33834    
## myemployNot Employed  0.255701   0.136938   1.867  0.06187 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9995211)
## 
## Number of Fisher Scoring iterations: 4

Odds ratios and confidence values

stargazer(fit.logit1, fit.logit2, fit.logit3,type= "text", style= "demography", coviariate.labels=c("Intercept", "1Yesthreat", "1YesHurt","1female", "Divorced", "Widowed", "Separated", "Not married", "Cohabitating","PrimarySchool", "SomeHS","SomeColl", "CollGrad", "not employed", "nilf"))

## 
## --------------------------------------------------------
##                                  poormenhlth            
##                        Model 1     Model 2     Model 3  
## --------------------------------------------------------
## threat1YESthreat      0.539***    0.434***    0.426***  
##                        (0.096)     (0.097)     (0.097)  
## hurt1YEShurt          0.438***    0.406***    0.396***  
##                        (0.093)     (0.094)     (0.094)  
## sexes1female                      0.471***    0.482***  
##                                    (0.053)     (0.053)  
## marstCohab                        0.527***     0.487**  
##                                    (0.151)     (0.149)  
## marstDivorced                      0.213**     0.193*   
##                                    (0.075)     (0.076)  
## marstNevrMarried                  0.572***    0.539***  
##                                    (0.073)     (0.073)  
## marstSeparated                      0.387       0.345   
##                                    (0.200)     (0.200)  
## marstWidowed                      -0.278**    -0.277**  
##                                    (0.092)     (0.095)  
## educ1Lesshs                                     0.182   
##                                                (0.113)  
## educ3somecol                                    0.117   
##                                                (0.069)  
## educ4colgrad                                    0.004   
##                                                (0.066)  
## myemploynilf                                   -0.055   
##                                                (0.057)  
## myemployNot Employed                            0.256   
##                                                (0.137)  
## Constant              -0.728***   -1.111***   -1.151*** 
##                        (0.029)     (0.046)     (0.068)  
## N                      29,668      29,668      29,668   
## Log Likelihood       -17,550.890 -17,217.070 -17,192.370
## AIC                  35,107.790  34,452.140  34,412.730 
## --------------------------------------------------------
## *p < .05; **p < .01; ***p < .001                        
## 
## --------------------------------------------------------------------------------------------------------------------------------------------------
## Intercept 1Yesthreat 1YesHurt 1female Divorced Widowed Separated Not married Cohabitating PrimarySchool SomeHS SomeColl CollGrad not employed nilf
## --------------------------------------------------------------------------------------------------------------------------------------------------

Research Draft

Monica Cruz

April, 2018

a) My outcome variable will be the self-reported number of poor mental health days in the past month. I will recode the variable as a binary variable in the following manner: a value of 88==0 (for NO) and a value of 1 - 30== 1 (for YES)

b) My research questions are as follows: How does intimate partner threat affect one’s mental health? How does being hurt by your intimate partner affect one’s mental health? Does being threatened only affect one’s mental health as much as being threatened and hurt?

c) My predictor variables are 1) intimate partner threat and 2) intimate partner hurt

d) Covariates I will use are marital status, education, and employment

Rename variables

Recode for poor mental health (outcome variable)

Recode for intimate partner threat (predictor variable)

Recode for intimate partner hurt (predictor variable)

Recode for sex

Recode for marital status

Recode for education level

Recode for employment status

Create a subset

Create my survey design

Examine percent of respondents with poor mental health by intimate partner threat, perform survey-corrected chi-square test

Examine percent of respondents with poor mental health by intimate partner hurt, perform survey-corrected chi-square test and plot it

calculate threat, hurt, mental health cross tabulation

Based on the cross tabulation above, it appears that 50% of the individuals who are threatened report poor mental health compared to 55% of those who are threatened and hurt. These two are nearly the same.

descriptive statistics?? (NOT sure this is correct)

Create Logit Mode on poor mental health by threat, hurt, employment, education and marital status

Present model coefficients ( not sure my labels are correct)

Present model marginal effects in data frame

Nested model comparisons

Fit logit 1 for threat and hurt

Fit logit 2 for threat, hurt , sex, and marital status

Fit logit 3 for threat, hurt, sex, marital status, education, and employment

Odds ratios and confidence values