Homework 4

library(car)

## Loading required package: carData

library(VGAM)

## Warning: package 'VGAM' was built under R version 4.0.4

## Loading required package: stats4

## Loading required package: splines

## 
## Attaching package: 'VGAM'

## The following object is masked from 'package:car':
## 
##     logit

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:VGAM':
## 
##     calibrate

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(haven)

#Cleaning data and recoding variables
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
nams<-names(BRFSS19)
head(nams, n=10)

##  [1] "_STATE"   "FMONTH"   "IDATE"    "IMONTH"   "IDAY"     "IYEAR"   
##  [7] "DISPCODE" "SEQNO"    "_PSU"     "CTELENM1"

newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(BRFSS19)<-newnames
BRFSS19$Alc<-Recode(BRFSS19$acedrink, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Dep<-Recode(BRFSS19$acedeprs, recodes ="7:9=NA; 1=1;2=0") 
BRFSS19$Gen<-Recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")
#General ordinal coding, with 0 being the worst and 4 being the best health
BRFSS19$Hlth<-Recode(BRFSS19$genhlth, recodes = "7:9=NA; 1=4;2=3;3=2;4=1;5=0", as.factor=T)
BRFSS19$Hlth<-relevel(BRFSS19$Hlth, ref = 4)
BRFSS19$healthnum<-car::Recode(BRFSS19$genhlth,
                                recodes="7:9=NA; 1=4;2=3;3=2;4=1;5=0", as.factor = F)

subset<-BRFSS19%>%
  select(Hlth, healthnum,Alc,Dep,Gen,llcpwt, ststr) %>%
  filter( complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~llcpwt, data = subset)

#ordinal logit regression
fit.solr1<-svyolr(Hlth~Alc+Dep+Gen,des)
summary(fit.solr1)

## Call:
## svyolr(Hlth ~ Alc + Dep + Gen, des)
## 
## Coefficients:
##           Value Std. Error    t value
## Alc  0.06192155 0.07125513  0.8690119
## Dep -0.16709724 0.07748658 -2.1564669
## Gen  0.04536078 0.06522602  0.6954400
## 
## Intercepts:
##     Value    Std. Error t value 
## 3|0  -0.6444   0.0565   -11.4007
## 0|1  -0.4626   0.0558    -8.2961
## 1|2   0.1172   0.0549     2.1352
## 2|4   1.7548   0.0655    26.8093

fit.solr1$deviance+2*length(fit.solr1$coefficients)

## [1] 15480.76

ex1<-svyglm(I(healthnum>0)~Alc+Dep+Gen,des, family="binomial")

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

ex2<-svyglm(I(healthnum>1)~Alc+Dep+Gen,des, family="binomial")

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

ex3<-svyglm(I(healthnum>2)~Alc+Dep+Gen,des, family="binomial")

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

ex4<-svyglm(I(healthnum>3)~Alc+Dep+Gen,des, family="binomial")

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

#Poportional Odds Assumption
coefs<-data.frame(parms =c( coefficients(ex1)[-1],coefficients(ex2)[-1]),
                  beta = rep(names(coefficients(ex1))[-1], 2 ), 
                  mod = c(rep("I(healthnum>0)",12 ),
                          rep("I(healthnum>1)",12 ), rep("I(healthnum>2)",12), rep("I(healthnum>3)",12)))

coefs%>%
  ggplot()+
  geom_point(aes(x=beta, y=parms, color=mod))+
  theme(axis.text.x = element_text(angle = 45))+
  labs(title = "Graphical Summary of Proportional Odds Assumption",
       x= "Beta",
       y= "Estimate")

round(exp(rbind(coef(ex1)[-1],
                coef(ex2)[-1],coef(ex3)[-1], coef(ex4)[-1])),3)

##        Alc   Dep   Gen
## [1,] 0.825 0.699 1.104
## [2,] 0.693 0.715 1.085
## [3,] 0.735 0.851 1.037
## [4,] 0.846 0.676 1.093

#Proportional odds model
fit.vgam<-vglm(as.ordered(Hlth)~Alc+Dep+Gen,
               BRFSS19, weights =llcpwt/mean(llcpwt, na.rm=T),
               family=cumulative(parallel = T, reverse = T))  
summary(fit.vgam)

## 
## Call:
## vglm(formula = as.ordered(Hlth) ~ Alc + Dep + Gen, family = cumulative(parallel = T, 
##     reverse = T), data = BRFSS19, weights = llcpwt/mean(llcpwt, 
##     na.rm = T))
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  0.64439    0.02593  24.849  < 2e-16 ***
## (Intercept):2  0.46260    0.02563  18.048  < 2e-16 ***
## (Intercept):3 -0.11722    0.02534  -4.626 3.73e-06 ***
## (Intercept):4 -1.75474    0.03047 -57.588  < 2e-16 ***
## Alc            0.06192    0.03837   1.614    0.107    
## Dep           -0.16709    0.04169  -4.008 6.12e-05 ***
## Gen            0.04536    0.03100   1.463    0.143    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]), 
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
## 
## Residual deviance: 39114.11 on 21705 degrees of freedom
## 
## Log-likelihood: -19557.06 on 21705 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##      Alc      Dep      Gen 
## 1.063879 0.846121 1.046404

#Non-proportional odds model
fit.vgam2<-vglm(as.ordered(Hlth)~Alc+Dep+Gen,BRFSS19,
                weights =llcpwt/mean(llcpwt, na.rm=T),
                family=cumulative(parallel = F, reverse = T))
summary(fit.vgam2)

## 
## Call:
## vglm(formula = as.ordered(Hlth) ~ Alc + Dep + Gen, family = cumulative(parallel = F, 
##     reverse = T), data = BRFSS19, weights = llcpwt/mean(llcpwt, 
##     na.rm = T))
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  0.58151    0.02806  20.722  < 2e-16 ***
## (Intercept):2  0.41852    0.02745  15.249  < 2e-16 ***
## (Intercept):3 -0.09141    0.02686  -3.403 0.000665 ***
## (Intercept):4 -1.69058    0.03761 -44.946  < 2e-16 ***
## Alc:1          0.25560    0.04570   5.592 2.24e-08 ***
## Alc:2          0.20454    0.04416   4.632 3.62e-06 ***
## Alc:3         -0.01526    0.04262  -0.358 0.720300    
## Alc:4         -0.16925    0.06271  -2.699 0.006956 ** 
## Dep:1         -0.01249    0.04878  -0.256 0.797951    
## Dep:2         -0.07814    0.04726  -1.653 0.098286 .  
## Dep:3         -0.24267    0.04646  -5.223 1.76e-07 ***
## Dep:4         -0.39286    0.07179  -5.472 4.45e-08 ***
## Gen:1          0.01513    0.03603   0.420 0.674623    
## Gen:2          0.02620    0.03518   0.745 0.456417    
## Gen:3          0.05726    0.03433   1.668 0.095358 .  
## Gen:4          0.09319    0.04824   1.932 0.053416 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]), 
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
## 
## Residual deviance: 38940.94 on 21696 degrees of freedom
## 
## Log-likelihood: -19470.47 on 21696 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##     Alc:1     Alc:2     Alc:3     Alc:4     Dep:1     Dep:2     Dep:3     Dep:4 
## 1.2912368 1.2269641 0.9848554 0.8442942 0.9875888 0.9248389 0.7845318 0.6751241 
##     Gen:1     Gen:2     Gen:3     Gen:4 
## 1.0152422 1.0265449 1.0589323 1.0976653

round(exp(coef(fit.vgam2)), 3)

## (Intercept):1 (Intercept):2 (Intercept):3 (Intercept):4         Alc:1 
##         1.789         1.520         0.913         0.184         1.291 
##         Alc:2         Alc:3         Alc:4         Dep:1         Dep:2 
##         1.227         0.985         0.844         0.988         0.925 
##         Dep:3         Dep:4         Gen:1         Gen:2         Gen:3 
##         0.785         0.675         1.015         1.027         1.059 
##         Gen:4 
##         1.098

round(exp(confint(fit.vgam2)), 3)

##               2.5 % 97.5 %
## (Intercept):1 1.693  1.890
## (Intercept):2 1.440  1.604
## (Intercept):3 0.866  0.962
## (Intercept):4 0.171  0.199
## Alc:1         1.181  1.412
## Alc:2         1.125  1.338
## Alc:3         0.906  1.071
## Alc:4         0.747  0.955
## Dep:1         0.898  1.087
## Dep:2         0.843  1.015
## Dep:3         0.716  0.859
## Dep:4         0.587  0.777
## Gen:1         0.946  1.090
## Gen:2         0.958  1.100
## Gen:3         0.990  1.133
## Gen:4         0.999  1.207

AIC(fit.vgam)

## [1] 39128.11

AIC(fit.vgam2)

## [1] 38972.94

#Multinomial Model
mfit<-vglm(Hlth~Alc+Dep+Gen,
           family=multinomial(refLevel = 4),
           data=BRFSS19,
           weights=llcpwt/mean(llcpwt, na.rm=T))

summary(mfit)

## 
## Call:
## vglm(formula = Hlth ~ Alc + Dep + Gen, family = multinomial(refLevel = 4), 
##     data = BRFSS19, weights = llcpwt/mean(llcpwt, na.rm = T))
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  0.10901    0.03274   3.329 0.000871 ***
## (Intercept):2 -2.11543    0.07049 -30.010  < 2e-16 ***
## (Intercept):3 -0.92843    0.04350 -21.343  < 2e-16 ***
## (Intercept):4 -0.72145    0.04198 -17.187  < 2e-16 ***
## Alc:1         -0.22305    0.05280  -4.224 2.40e-05 ***
## Alc:2          0.13357    0.10443   1.279 0.200902    
## Alc:3          0.27173    0.06381   4.259 2.06e-05 ***
## Alc:4         -0.19724    0.06904  -2.857 0.004279 ** 
## Dep:1          0.05911    0.05696   1.038 0.299331    
## Dep:2          0.38907    0.10927   3.561 0.000370 ***
## Dep:3          0.29060    0.06901   4.211 2.54e-05 ***
## Dep:4         -0.29340    0.07878  -3.724 0.000196 ***
## Gen:1         -0.01532    0.04204  -0.364 0.715491    
## Gen:2         -0.10494    0.08936  -1.174 0.240287    
## Gen:3         -0.06820    0.05476  -1.245 0.212981    
## Gen:4          0.06627    0.05382   1.231 0.218182    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,4]), log(mu[,2]/mu[,4]), 
## log(mu[,3]/mu[,4]), log(mu[,5]/mu[,4])
## 
## Residual deviance: 38943.31 on 21696 degrees of freedom
## 
## Log-likelihood: -19471.66 on 21696 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Reference group is level  4  of the response

round(exp(coef(mfit)), 3)

## (Intercept):1 (Intercept):2 (Intercept):3 (Intercept):4         Alc:1 
##         1.115         0.121         0.395         0.486         0.800 
##         Alc:2         Alc:3         Alc:4         Dep:1         Dep:2 
##         1.143         1.312         0.821         1.061         1.476 
##         Dep:3         Dep:4         Gen:1         Gen:2         Gen:3 
##         1.337         0.746         0.985         0.900         0.934 
##         Gen:4 
##         1.069

round(exp(confint(mfit)), 3)

##               2.5 % 97.5 %
## (Intercept):1 1.046  1.189
## (Intercept):2 0.105  0.138
## (Intercept):3 0.363  0.430
## (Intercept):4 0.448  0.528
## Alc:1         0.721  0.887
## Alc:2         0.931  1.403
## Alc:3         1.158  1.487
## Alc:4         0.717  0.940
## Dep:1         0.949  1.186
## Dep:2         1.191  1.828
## Dep:3         1.168  1.531
## Dep:4         0.639  0.870
## Gen:1         0.907  1.069
## Gen:2         0.756  1.073
## Gen:3         0.839  1.040
## Gen:4         0.962  1.187

#Fitted Probabilities
dat<-expand.grid(Alc=levels(as.factor(BRFSS19$Alc)),Dep=levels(as.factor(BRFSS19$Dep)),Gen=levels(as.factor(BRFSS19$Gen)))

fitm<-predict(mfit, newdat=dat,type="response")
dat<-cbind(dat, round(fitm,3))
head(dat, n=20)

##   Alc Dep Gen     3     0     1     2     4
## 1   0   0   0 0.358 0.039 0.127 0.321 0.156
## 2   1   0   0 0.303 0.047 0.176 0.339 0.135
## 3   0   1   0 0.364 0.055 0.163 0.308 0.111
## 4   1   1   0 0.301 0.065 0.221 0.318 0.095
## 5   0   0   1 0.355 0.035 0.119 0.323 0.168
## 6   1   0   1 0.302 0.043 0.166 0.343 0.146
## 7   0   1   1 0.363 0.050 0.154 0.312 0.121
## 8   1   1   1 0.303 0.059 0.210 0.325 0.103

fitted.ord<-round(predict(fit.vgam, newdat=dat[,1:3], type="response"), 3)

dat<-cbind(dat, fitted.ord)

names(dat)<-c(names(dat)[1:3], "mp1", "mp2", "mp3", "op1", "op2", "op3")

head(dat, n=20)

##   Alc Dep Gen   mp1   mp2   mp3   op1   op2   op3    NA    NA    NA    NA
## 1   0   0   0 0.358 0.039 0.127 0.321 0.156 0.344 0.042 0.143 0.323 0.147
## 2   1   0   0 0.303 0.047 0.176 0.339 0.135 0.330 0.041 0.142 0.331 0.155
## 3   0   1   0 0.364 0.055 0.163 0.308 0.111 0.383 0.044 0.144 0.302 0.128
## 4   1   1   0 0.301 0.065 0.221 0.318 0.095 0.368 0.043 0.144 0.310 0.135
## 5   0   0   1 0.355 0.035 0.119 0.323 0.168 0.334 0.042 0.142 0.329 0.153
## 6   1   0   1 0.302 0.043 0.166 0.343 0.146 0.320 0.041 0.141 0.336 0.161
## 7   0   1   1 0.363 0.050 0.154 0.312 0.121 0.372 0.043 0.144 0.308 0.133
## 8   1   1   1 0.303 0.059 0.210 0.325 0.103 0.358 0.043 0.144 0.316 0.140

AIC(fit.vgam) #proportional odds

## [1] 39128.11

AIC(fit.vgam2) #cumulative logit, non proportional

## [1] 38972.94

AIC(mfit) #multinomial

## [1] 38975.31

General Health is the ordinal variable that I will be predicting. I recoded the original variable to reflect Poor = 0, Fair = 1, Good = 2, Very Good = 3, Excellent = 4, and Don’t know/Not Sure and Refused as missing. 2) My research question is “Do growing up with a problem drinker or alcoholic or growing up with anyone who was depressed, mentally ill, or suicidal, and gender predict general health status?” 3) After fitting the Ordinal Regression model, the assumptions of the proportional odds model were assessed. 3a) Results showed that the assumptions were not met. 3b) Plotting the coefficients of each of the four models to visually inspect if they were approximately the same values. 3c) In comparing which model, Proportional odds or Multinomial, fits the data better, the Nonproportional odds model fit slightly better that the Multinomial model based on the AIC. 4a) Results showed that respondents who grew up with a problem drinker or alcoholic are more likely to transition from poor to fair health, and from fair to good health, and less likely to transition from very good health to excellent health. Respondents who grew up with anyone who was depressed, mentally ill, or suicidal, are less likely to transition from good health to very good and very good health to excellent health. There were no marginal effects for gender.

Homework 4

Adolph Delgado

3/15/2021