library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
nhis<-read.csv("~/Desktop/DataFolder/nhis_00015.csv")
#recode variables
nhis$maritalst<-Recode(nhis$MARSTAT,recodes="10='Bmarried';50='Asingle';20:40='W/D/S'; else=NA",as.factor=T)
nhis$sexor<-Recode(nhis$SEXORIEN,recodes="1='b) bgay';2='astraight';3:5='cbisexual/?'; else=NA",as.factor=T)
nhis$marXsex<-interaction(nhis$maritalst,nhis$sexor)
nhis$educ<-Recode(nhis$EDUC,recodes="100:204='a) Less HighSchool';300:302='b) HighSchool/GED';500:600='c) ColDegree';400:401='b) HighSchool/GED';402:403='c) ColDegree'; else=NA",as.factor=T)
nhis$health<-Recode(nhis$HEALTH,recodes="1:2=0;3:5=1;else=NA",as.factor=T)
nhis$earn<-Recode(nhis$EARNINGS,recodes="01:04='<20';05:06='20<35';07:08='35<55';09:10='55<75';11='75+';else=NA",as.factor=T)
nhis$bmi<-Recode(nhis$BMICALC,recodes="1:29.9='aokay';30:100='bobese';0=NA;996=NA",as.factor=T)
nhis$insurace<-Recode(nhis$HINOTCOVE,recodes="0=NA;1='Yes';2='No';else=NA",as.factor=T)
nhis$drink<-Recode(nhis$ALCAMT,recodes="001:050='ok';060:950='bad';else=NA",as.factor=T)
nhis$smoke<-Recode(nhis$SMOKESTATUS2,recodes="10:20='smokes';30='no smokes';40='smokes';else=NA",as.factor=T)
nhis$ownhome<-Recode(nhis$OWNERSHIP,recodes="10:12='Own';20:40='Rent';else=NA",as.factor=T)
nhis$delaycost<-Recode(nhis$DELAYCOST,recodes="1='No';2='Yes';else=NA",as.factor=T)
nhis$agec<-cut(nhis$AGE,breaks = c(0,25,35,45,55,65,75,99))
nhis$sex<-Recode(nhis$SEX,recodes="1='male';2='female';else=NA",as.factor=T)
nhis$race<-Recode(nhis$RACENEW,recodes="10='aWhite';20='black';30:61='other';97:99=NA",as.factor=T)
nhis$trust<-Recode(nhis$NBHDTRUST,recodes="1:2='Agree';3:4='Disagree';else=NA",as.factor=T)
table(nhis$NBHDTRUST)
## 
##      0      1      2      3      4      7      8      9 
## 115531  27625  20179   4970   3804    107   1219   1866
#design survey
nhis<-nhis%>%filter(is.na(PERWEIGHT)==F)
options(survey.lonly.psu="adjust")
des<-svydesign(ids=~1,strata = ~STRATA,weights=~PERWEIGHT,data = nhis)
#complete cases
nhis<-nhis%>%
  filter(complete.cases(.))
#gays only
nhis <- nhis %>%
subset(sexor=='b) bgay')
#health to marital status
fit<-svyglm(health~maritalst,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit)
## 
## Call:
## svyglm(formula = health ~ maritalst, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~STRATA, weights = ~PERWEIGHT, data = nhis)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.86110    0.01204  -71.52   <2e-16 ***
## maritalstBmarried  0.32829    0.01486   22.09   <2e-16 ***
## maritalstW/D/S     1.01879    0.01858   54.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.003603)
## 
## Number of Fisher Scoring iterations: 4
#healh to marital status, sex, age, race (biological demographics)
fit1<-svyglm(health~maritalst+sex+agec+race,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit1)
## 
## Call:
## svyglm(formula = health ~ maritalst + sex + agec + race, design = des, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~STRATA, weights = ~PERWEIGHT, data = nhis)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -1.391311   0.019454 -71.519  < 2e-16 ***
## maritalstBmarried -0.408927   0.020642 -19.810  < 2e-16 ***
## maritalstW/D/S     0.059652   0.024456   2.439   0.0147 *  
## sexmale           -0.009444   0.013083  -0.722   0.4704    
## agec(25,35]        0.545368   0.026637  20.474  < 2e-16 ***
## agec(35,45]        0.926010   0.028843  32.105  < 2e-16 ***
## agec(45,55]        1.259787   0.028800  43.742  < 2e-16 ***
## agec(55,65]        1.524260   0.029180  52.237  < 2e-16 ***
## agec(65,75]        1.635252   0.031098  52.585  < 2e-16 ***
## agec(75,99]        1.977724   0.034932  56.616  < 2e-16 ***
## raceblack          0.421733   0.020417  20.656  < 2e-16 ***
## raceother          0.178026   0.022280   7.990 1.35e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.005193)
## 
## Number of Fisher Scoring iterations: 4
regTermTest(fit1, test.terms = ~sex+agec+race, method="Wald", df = NULL)
## Wald test for sex agec race
##  in svyglm(formula = health ~ maritalst + sex + agec + race, design = des, 
##     family = binomial)
## F =  575.4059  on  9  and  142192  df: p= < 2.22e-16
#health to marital, sex, age, race, education, income (biological and social demographics)
fit2<-svyglm(health~maritalst+sex+agec+race+educ+earn,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit2)
## 
## Call:
## svyglm(formula = health ~ maritalst + sex + agec + race + educ + 
##     earn, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~STRATA, weights = ~PERWEIGHT, data = nhis)
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.89402    0.04581 -19.516  < 2e-16 ***
## maritalstBmarried     -0.17668    0.03023  -5.845 5.08e-09 ***
## maritalstW/D/S         0.04484    0.03710   1.209   0.2268    
## sexmale                0.05248    0.02154   2.437   0.0148 *  
## agec(25,35]            0.69681    0.04200  16.591  < 2e-16 ***
## agec(35,45]            1.11032    0.04524  24.544  < 2e-16 ***
## agec(45,55]            1.38380    0.04579  30.222  < 2e-16 ***
## agec(55,65]            1.50234    0.04703  31.942  < 2e-16 ***
## agec(65,75]            1.21971    0.05987  20.371  < 2e-16 ***
## agec(75,99]            1.22623    0.12178  10.069  < 2e-16 ***
## raceblack              0.23567    0.03304   7.134 9.88e-13 ***
## raceother              0.20181    0.03743   5.392 7.00e-08 ***
## educb) HighSchool/GED -0.36150    0.03513 -10.291  < 2e-16 ***
## educc) ColDegree      -0.75555    0.03729 -20.262  < 2e-16 ***
## earn20<35             -0.18936    0.02930  -6.463 1.03e-10 ***
## earn35<55             -0.43096    0.03122 -13.805  < 2e-16 ***
## earn55<75             -0.61232    0.03890 -15.740  < 2e-16 ***
## earn75+               -0.81798    0.03873 -21.122  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.009868)
## 
## Number of Fisher Scoring iterations: 4
regTermTest(fit2, test.terms = ~sex+age+race+educ+earn, method="Wald", df = NULL)
## Wald test for sex age race educ earn
##  in svyglm(formula = health ~ maritalst + sex + agec + race + educ + 
##     earn, design = des, family = binomial)
## F =  175.2235  on  9  and  59580  df: p= < 2.22e-16
#health to marital, sex, age, race, education, income, bmi, smoking, drinking (biological and social demographics and social health behaviors)
fit3<-svyglm(health~maritalst+sex+agec+race+educ+earn+bmi+smoke+drink,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit3)
## 
## Call:
## svyglm(formula = health ~ maritalst + sex + agec + race + educ + 
##     earn + bmi + smoke + drink, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~STRATA, weights = ~PERWEIGHT, data = nhis)
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -1.03776    0.11535  -8.996  < 2e-16 ***
## maritalstBmarried     -0.21143    0.05309  -3.982 6.85e-05 ***
## maritalstW/D/S        -0.02646    0.05984  -0.442   0.6584    
## sexmale                0.08035    0.04024   1.997   0.0459 *  
## agec(25,35]            0.44608    0.07664   5.821 5.96e-09 ***
## agec(35,45]            0.82957    0.08278  10.022  < 2e-16 ***
## agec(45,55]            1.14272    0.08361  13.667  < 2e-16 ***
## agec(55,65]            1.15931    0.08560  13.543  < 2e-16 ***
## agec(65,75]            0.84069    0.10711   7.849 4.42e-15 ***
## agec(75,99]            0.89636    0.21220   4.224 2.41e-05 ***
## raceblack              0.25086    0.06422   3.907 9.39e-05 ***
## raceother              0.35227    0.07548   4.667 3.08e-06 ***
## educb) HighSchool/GED -0.36792    0.07584  -4.851 1.24e-06 ***
## educc) ColDegree      -0.62051    0.07827  -7.927 2.36e-15 ***
## earn20<35             -0.24616    0.05556  -4.430 9.46e-06 ***
## earn35<55             -0.50528    0.05824  -8.676  < 2e-16 ***
## earn55<75             -0.66358    0.06979  -9.508  < 2e-16 ***
## earn75+               -0.84111    0.06846 -12.286  < 2e-16 ***
## bmibobese              0.81380    0.04033  20.176  < 2e-16 ***
## smokesmokes            0.38944    0.03909   9.963  < 2e-16 ***
## drinkok               -0.15482    0.07257  -2.133   0.0329 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.006669)
## 
## Number of Fisher Scoring iterations: 4
regTermTest(fit3, test.terms = ~sex+agec+race+educ+earn+bmi+smoke+drink, method="Wald", df = NULL)
## Wald test for sex agec race educ earn bmi smoke drink
##  in svyglm(formula = health ~ maritalst + sex + agec + race + educ + 
##     earn + bmi + smoke + drink, design = des, family = binomial)
## F =  64.32156  on  18  and  18985  df: p= < 2.22e-16
#health to marital, sex, age, race, education, income, bmi, smoking, drinking (biological and social demographics and social health behaviors and financial "behaviors")
fit4<-svyglm(health~maritalst+sex+agec+race+educ+earn+bmi+smoke+drink+delaycost+insurace+ownhome+trust,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit4)
## 
## Call:
## svyglm(formula = health ~ maritalst + sex + agec + race + educ + 
##     earn + bmi + smoke + drink + delaycost + insurace + ownhome + 
##     trust, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~STRATA, weights = ~PERWEIGHT, data = nhis)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -1.306908   0.137750  -9.488  < 2e-16 ***
## maritalstBmarried     -0.131762   0.056246  -2.343 0.019160 *  
## maritalstW/D/S        -0.054785   0.062811  -0.872 0.383098    
## sexmale                0.099593   0.041750   2.385 0.017069 *  
## agec(25,35]            0.375533   0.079407   4.729 2.27e-06 ***
## agec(35,45]            0.779234   0.086539   9.004  < 2e-16 ***
## agec(45,55]            1.123716   0.087537  12.837  < 2e-16 ***
## agec(55,65]            1.145868   0.090586  12.650  < 2e-16 ***
## agec(65,75]            0.945413   0.111980   8.443  < 2e-16 ***
## agec(75,99]            1.096846   0.218561   5.018 5.26e-07 ***
## raceblack              0.224458   0.069231   3.242 0.001188 ** 
## raceother              0.343842   0.078954   4.355 1.34e-05 ***
## educb) HighSchool/GED -0.311298   0.080139  -3.884 0.000103 ***
## educc) ColDegree      -0.545093   0.083139  -6.556 5.66e-11 ***
## earn20<35             -0.236123   0.057665  -4.095 4.25e-05 ***
## earn35<55             -0.442901   0.061145  -7.243 4.55e-13 ***
## earn55<75             -0.536946   0.072698  -7.386 1.58e-13 ***
## earn75+               -0.708390   0.071984  -9.841  < 2e-16 ***
## bmibobese              0.807438   0.041604  19.408  < 2e-16 ***
## smokesmokes            0.342799   0.040559   8.452  < 2e-16 ***
## drinkok               -0.173280   0.074175  -2.336 0.019497 *  
## delaycostYes           0.824471   0.059815  13.784  < 2e-16 ***
## insuraceYes           -0.002429   0.067363  -0.036 0.971239    
## ownhomeRent            0.066021   0.047612   1.387 0.165568    
## trustDisagree          0.275133   0.055133   4.990 6.08e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.004191)
## 
## Number of Fisher Scoring iterations: 4
regTermTest(fit4, test.terms = ~sex+agec+race+educ+earn+bmi+smoke+drink+delaycost+insurace+ownhome+trust, method="Wald", df = NULL)
## Wald test for sex agec race educ earn bmi smoke drink delaycost insurace ownhome trust
##  in svyglm(formula = health ~ maritalst + sex + agec + race + educ + 
##     earn + bmi + smoke + drink + delaycost + insurace + ownhome + 
##     trust, design = des, family = binomial)
## F =  56.41177  on  22  and  18132  df: p= < 2.22e-16
car::vif(fit4)
##               GVIF Df GVIF^(1/(2*Df))
## maritalst 1.762922  2        1.152281
## sex       1.137547  1        1.066558
## agec      2.219854  6        1.068711
## race      1.124955  2        1.029873
## educ      1.178097  2        1.041826
## earn      1.535643  4        1.055082
## bmi       1.037920  1        1.018783
## smoke     1.077502  1        1.038028
## drink     1.071299  1        1.035036
## delaycost 1.102442  1        1.049972
## insurace  1.167079  1        1.080314
## ownhome   1.429663  1        1.195685
## trust     1.095746  1        1.046779
stargazer(fit,fit1,fit2,fit3,fit4,type = "html", style="demography", covariate.labels =c("married","W/D/S","SexM","25-34","35-44","45-54","55-64","65-74","75-99","black","Other","HS","ColDegree","earn20<35","earn35<55","earn55<75","75+","BMIobese","smokeY","drinksOK","delaycostY","insuranceY","ownwhomerent","trustdisagree"), ci = T )
health
Model 1 Model 2 Model 3 Model 4 Model 5
married 0.328*** -0.409*** -0.177*** -0.211*** -0.132*
(0.299, 0.357) (-0.449, -0.368) (-0.236, -0.117) (-0.315, -0.107) (-0.242, -0.022)
W/D/S 1.019*** 0.060* 0.045 -0.026 -0.055
(0.982, 1.055) (0.012, 0.108) (-0.028, 0.118) (-0.144, 0.091) (-0.178, 0.068)
SexM -0.009 0.052* 0.080* 0.100*
(-0.035, 0.016) (0.010, 0.095) (0.001, 0.159) (0.018, 0.181)
25-34 0.545*** 0.697*** 0.446*** 0.376***
(0.493, 0.598) (0.614, 0.779) (0.296, 0.596) (0.220, 0.531)
35-44 0.926*** 1.110*** 0.830*** 0.779***
(0.869, 0.983) (1.022, 1.199) (0.667, 0.992) (0.610, 0.949)
45-54 1.260*** 1.384*** 1.143*** 1.124***
(1.203, 1.316) (1.294, 1.474) (0.979, 1.307) (0.952, 1.295)
55-64 1.524*** 1.502*** 1.159*** 1.146***
(1.467, 1.581) (1.410, 1.595) (0.992, 1.327) (0.968, 1.323)
65-74 1.635*** 1.220*** 0.841*** 0.945***
(1.574, 1.696) (1.102, 1.337) (0.631, 1.051) (0.726, 1.165)
75-99 1.978*** 1.226*** 0.896*** 1.097***
(1.909, 2.046) (0.988, 1.465) (0.480, 1.312) (0.668, 1.525)
black 0.422*** 0.236*** 0.251*** 0.224**
(0.382, 0.462) (0.171, 0.300) (0.125, 0.377) (0.089, 0.360)
Other 0.178*** 0.202*** 0.352*** 0.344***
(0.134, 0.222) (0.128, 0.275) (0.204, 0.500) (0.189, 0.499)
HS -0.362*** -0.368*** -0.311***
(-0.430, -0.293) (-0.517, -0.219) (-0.468, -0.154)
ColDegree -0.756*** -0.621*** -0.545***
(-0.829, -0.682) (-0.774, -0.467) (-0.708, -0.382)
earn20<35 -0.189*** -0.246*** -0.236***
(-0.247, -0.132) (-0.355, -0.137) (-0.349, -0.123)
earn35<55 -0.431*** -0.505*** -0.443***
(-0.492, -0.370) (-0.619, -0.391) (-0.563, -0.323)
earn55<75 -0.612*** -0.664*** -0.537***
(-0.689, -0.536) (-0.800, -0.527) (-0.679, -0.394)
75+ -0.818*** -0.841*** -0.708***
(-0.894, -0.742) (-0.975, -0.707) (-0.849, -0.567)
BMIobese 0.814*** 0.807***
(0.735, 0.893) (0.726, 0.889)
smokeY 0.389*** 0.343***
(0.313, 0.466) (0.263, 0.422)
drinksOK -0.155* -0.173*
(-0.297, -0.013) (-0.319, -0.028)
delaycostY 0.824***
(0.707, 0.942)
insuranceY -0.002
(-0.134, 0.130)
ownwhomerent 0.066
(-0.027, 0.159)
trustdisagree 0.275***
(0.167, 0.383)
Constant -0.861*** -1.391*** -0.894*** -1.038*** -1.307***
(-0.885, -0.838) (-1.429, -1.353) (-0.984, -0.804) (-1.264, -0.812) (-1.577, -1.037)
N 142,912 142,912 60,074 19,057 18,208
Log Likelihood -91,834.950 -88,089.690 -35,590.300 -10,678.370 -9,995.897
AIC 183,675.900 176,203.400 71,216.590 21,398.740 20,041.790
p < .05; p < .01; p < .001