Homework 2

#load brfss
library(car)

## Loading required package: carData

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(survey)

## Warning: package 'survey' was built under R version 3.5.2

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)
library(foreign)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

brfss2017=read.xport("~/Desktop/DataFolder/brfss.XPT ")

#recode variables of interest
brfss2017$marital<-Recode(brfss2017$MARITAL, recodes="1=1;5=0;else=NA",as.factor=T)
brfss2017$sxorient<-Recode(brfss2017$SXORIENT,recodes="1='a) straight';2='b) gay';else=NA")
brfss2017$genh<-Recode(brfss2017$GENHLTH,recodes="1='a) excellent';2='b) very good';3='c) good';4='d) fair';5='e) poor';else=NA")

#Create a survey design
sub<-brfss2017%>%
  select(genh,marital,sxorient,X_LLCPWT,X_STRWT) %>%
  filter( complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~X_STRWT, weights=~X_LLCPWT, data =sub )

library(ggplot2)
cat<-svyby(formula=~marital,by=~genh,design = des,FUN=svymean,na.rm=T)
svychisq(~marital+genh,design=des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~marital + genh, design = des)
## F = 3.197, ndf = 3.9654e+00, ddf = 5.1303e+05, p-value = 0.01262

#qplot(x=cat$marital,y=cat$genh,data=cat,xlab="Marital Status",ylab="General Health")+
#  geom_errorbar(aes(x=marital,ymin=genh-se,ymax=genh+se),width=.25)+
#  ggtitle(label="US Adults, Marital Status and General Health")
#Error: Aesthetics must be either length 1 or the same as the data (5): x

dog<-svyby(formula=~marital,by=~sxorient,design = des,FUN=svymean,na.rm=T)
svychisq(~sxorient+genh,design=des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~sxorient + genh, design = des)
## F = 0.53397, ndf = 3.8271e+00, ddf = 4.9514e+05, p-value = 0.7028

#qplot(x=dog$sxorient,y=dog$genh,data = dog,xlab = "Sexual Orientation",ylab="Marital Status")+
#  geom_errorbar(aes(x=sxorient,ymin=genh-se,ymax=cgenh+se),width=.25)+
#  ggtitle(label="US Adults, Sexual Orientation and General Health")
#Error: Aesthetics must be either length 1 or the same as the data (5): x

catdog<-svyby(formula=~marital,by=~sxorient+genh,design=des,FUN=svymean,na.rm=T)
catdog

##                             sxorient         genh  marital0  marital1
## a) straight.a) excellent a) straight a) excellent 0.3163681 0.6836319
## b) gay.a) excellent           b) gay a) excellent 0.6720690 0.3279310
## a) straight.b) very good a) straight b) very good 0.2921462 0.7078538
## b) gay.b) very good           b) gay b) very good 0.6136261 0.3863739
## a) straight.c) good      a) straight      c) good 0.3023001 0.6976999
## b) gay.c) good                b) gay      c) good 0.7593230 0.2406770
## a) straight.d) fair      a) straight      d) fair 0.3044410 0.6955590
## b) gay.d) fair                b) gay      d) fair 0.6695621 0.3304379
## a) straight.e) poor      a) straight      e) poor 0.2745024 0.7254976
## b) gay.e) poor                b) gay      e) poor 0.7903084 0.2096916
##                          se.marital0 se.marital1
## a) straight.a) excellent 0.006884737 0.006884737
## b) gay.a) excellent      0.040649086 0.040649086
## a) straight.b) very good 0.004816417 0.004816417
## b) gay.b) very good      0.034202589 0.034202589
## a) straight.c) good      0.005061033 0.005061033
## b) gay.c) good           0.026892752 0.026892752
## a) straight.d) fair      0.008274441 0.008274441
## b) gay.d) fair           0.052624312 0.052624312
## a) straight.e) poor      0.013423111 0.013423111
## b) gay.e) poor           0.069442960 0.069442960

catdog$sxorient<-rep(c("a) straight","b) gay"),5)
catdog$genh<-factor(c(rep("a-excellent", 2), rep("b-very good", 2), rep("c-good", 2),
rep("d-fair", 2), rep("e-poor", 2)), ordered = T)

table(catdog$genh)

## 
## a-excellent b-very good      c-good      d-fair      e-poor 
##           2           2           2           2           2

#p<-ggplot(catdog, aes(genh,marital,),xlab="Orientation", ylab="marital status")
#p<-p+geom_point(aes(colour=sxorient))
#p<-p+geom_line(aes(colour=sxorient,group=sxorient))
#p<-p+geom_errorbar(aes(x=genh, ymin=marital-se,ymax= marital+se,colour=sxorient), width=.25)
#p<-p+ylab("marital status")
#p<-p+xlab("general health")
#p+ggtitle("US AdultsMarital Status by Orientation and Health")
#Error in FUN(X[[i]], ...) : object 'marital' not found

#Logit model
fit.logit<-svyglm(marital~sxorient+genh,
                  design=des,
                  family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

#probit model
fit.probit<-svyglm(marital~sxorient+genh,
                  design=des,
                  family=binomial(link="probit"))

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

#Present both model coefficients next to one another
stargazer(fit.logit,
          fit.probit,type="text",
          style="demography",
          covariate.labels=c("a) excellent","b) very good","c) good","d) fair","e) poor","a) straight","b) gay"),ci=T,column.sep.width="10pt")

## 
## ------------------------------------------------
##                             marital             
##                survey-weighted  survey-weighted 
##                    logistic          probit     
##                    Model 1          Model 2     
## ------------------------------------------------
## a) excellent      -1.613***        -0.999***    
##                (-1.778, -1.447) (-1.100, -0.898)
## b) very good       0.118**          0.071**     
##                 (0.041, 0.194)   (0.025, 0.118) 
## c) good             0.057            0.034      
##                (-0.021, 0.134)  (-0.013, 0.081) 
## d) fair             0.055            0.033      
##                (-0.043, 0.152)  (-0.026, 0.092) 
## e) poor             0.185*           0.112*     
##                 (0.042, 0.328)   (0.026, 0.198) 
## a) straight        0.773***         0.479***    
##                 (0.711, 0.835)   (0.442, 0.517) 
## N                  130,349          130,349     
## Log Likelihood   -74,927.600      -74,927.590   
## AIC              149,867.200      149,867.200   
## ------------------------------------------------
## *p < .05; **p < .01; ***p < .001

#Logit marginal effects
log.marg<-coef(fit.probit)*mean(dlogis(predict(fit.logit)),na.rm=T)

#Probit marginal effects
prob.marg<-coef(fit.probit)*mean(dlogis(predict(fit.probit)),na.rm=T)

effects<-data.frame(effect=c(log.marg,prob.marg),
                    term=rep(names(log.marg),2),
                    model=c(rep("logit",length(log.marg)),rep("probit",length(log.marg)))
                    )
effects%>%
  ggplot(aes(x=term,y=effect))+geom_point(aes(color=model,group=model,shape=model),
                                          position=position_jitterdodge(jitter.width = 1),
                                          size=2)+
  ylab("Marginal Effect")+
  xlab("Model Term")+
  geom_abline(intercept=0,slope=0)+
  theme(axis.text.x=element_text(angle=45,hjust=1))+
  ggtitle(label="Comparison of marginal effects in Logit and Probit models")

#get combinations of possible "people" outcomes
dat<-expand.grid(sxorient=levels(brfss2017$sxorient),genh=levels(brfss2017$genh))
#fit<-predict(fit.logit,newdat=dat,type="response")
#Error in eval(predvars, data, env) : object 'sxorient' not found
#fitp<-predict(fit.probit,newdat=dat,type="response")
#add values to fake data
#dat$fitted.prob.lrm<-round(fit,3)
#dat$fitted.prob.pro<-round(fitp,3)

oddsratio <- exp(cbind("Odds ratio" = coef(fit.logit), confint.default(fit.logit, level = 0.95)))
oddsratio

##                  Odds ratio     2.5 %    97.5 %
## (Intercept)       2.1664067 2.0365476 2.3045462
## sxorientb) gay    0.1993534 0.1689242 0.2352639
## genhb) very good  1.1249091 1.0419184 1.2145102
## genhc) good       1.0583675 0.9796119 1.1434546
## genhd) fair       1.0560890 0.9576527 1.1646435
## genhe) poor       1.2036757 1.0431960 1.3888426

Homework 2

Steven Weeks

2/10/2019