library(car)
## Loading required package: carData
library(VGAM)
## Warning: package 'VGAM' was built under R version 4.0.4
## Loading required package: stats4
## Loading required package: splines
## 
## Attaching package: 'VGAM'
## The following object is masked from 'package:car':
## 
##     logit
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:VGAM':
## 
##     calibrate
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(haven)
#Cleaning data and recoding variables
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
nams<-names(BRFSS19)
head(nams, n=10)
##  [1] "_STATE"   "FMONTH"   "IDATE"    "IMONTH"   "IDAY"     "IYEAR"   
##  [7] "DISPCODE" "SEQNO"    "_PSU"     "CTELENM1"
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(BRFSS19)<-newnames
BRFSS19$Alc<-Recode(BRFSS19$acedrink, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Dep<-Recode(BRFSS19$acedeprs, recodes ="7:9=NA; 1=1;2=0") 
BRFSS19$Gen<-Recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")
#General ordinal coding, with 0 being the worst and 4 being the best health
BRFSS19$Hlth<-Recode(BRFSS19$genhlth, recodes = "7:9=NA; 1=4;2=3;3=2;4=1;5=0", as.factor=T)
BRFSS19$Hlth<-relevel(BRFSS19$Hlth, ref = 4)
BRFSS19$healthnum<-car::Recode(BRFSS19$genhlth,
                                recodes="7:9=NA; 1=4;2=3;3=2;4=1;5=0", as.factor = F)
subset<-BRFSS19%>%
  select(Hlth, healthnum,Alc,Dep,Gen,llcpwt, ststr) %>%
  filter( complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~llcpwt, data = subset)
#ordinal logit regression
fit.solr1<-svyolr(Hlth~Alc+Dep+Gen,des)
summary(fit.solr1)
## Call:
## svyolr(Hlth ~ Alc + Dep + Gen, des)
## 
## Coefficients:
##           Value Std. Error    t value
## Alc  0.06192155 0.07125513  0.8690119
## Dep -0.16709724 0.07748658 -2.1564669
## Gen  0.04536078 0.06522602  0.6954400
## 
## Intercepts:
##     Value    Std. Error t value 
## 3|0  -0.6444   0.0565   -11.4007
## 0|1  -0.4626   0.0558    -8.2961
## 1|2   0.1172   0.0549     2.1352
## 2|4   1.7548   0.0655    26.8093
fit.solr1$deviance+2*length(fit.solr1$coefficients)
## [1] 15480.76
ex1<-svyglm(I(healthnum>0)~Alc+Dep+Gen,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex2<-svyglm(I(healthnum>1)~Alc+Dep+Gen,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex3<-svyglm(I(healthnum>2)~Alc+Dep+Gen,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex4<-svyglm(I(healthnum>3)~Alc+Dep+Gen,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#Poportional Odds Assumption
coefs<-data.frame(parms =c( coefficients(ex1)[-1],coefficients(ex2)[-1]),
                  beta = rep(names(coefficients(ex1))[-1], 2 ), 
                  mod = c(rep("I(healthnum>0)",12 ),
                          rep("I(healthnum>1)",12 ), rep("I(healthnum>2)",12), rep("I(healthnum>3)",12)))

coefs%>%
  ggplot()+
  geom_point(aes(x=beta, y=parms, color=mod))+
  theme(axis.text.x = element_text(angle = 45))+
  labs(title = "Graphical Summary of Proportional Odds Assumption",
       x= "Beta",
       y= "Estimate")

round(exp(rbind(coef(ex1)[-1],
                coef(ex2)[-1],coef(ex3)[-1], coef(ex4)[-1])),3)
##        Alc   Dep   Gen
## [1,] 0.825 0.699 1.104
## [2,] 0.693 0.715 1.085
## [3,] 0.735 0.851 1.037
## [4,] 0.846 0.676 1.093
#Proportional odds model
fit.vgam<-vglm(as.ordered(Hlth)~Alc+Dep+Gen,
               BRFSS19, weights =llcpwt/mean(llcpwt, na.rm=T),
               family=cumulative(parallel = T, reverse = T))  
summary(fit.vgam)
## 
## Call:
## vglm(formula = as.ordered(Hlth) ~ Alc + Dep + Gen, family = cumulative(parallel = T, 
##     reverse = T), data = BRFSS19, weights = llcpwt/mean(llcpwt, 
##     na.rm = T))
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  0.64439    0.02593  24.849  < 2e-16 ***
## (Intercept):2  0.46260    0.02563  18.048  < 2e-16 ***
## (Intercept):3 -0.11722    0.02534  -4.626 3.73e-06 ***
## (Intercept):4 -1.75474    0.03047 -57.588  < 2e-16 ***
## Alc            0.06192    0.03837   1.614    0.107    
## Dep           -0.16709    0.04169  -4.008 6.12e-05 ***
## Gen            0.04536    0.03100   1.463    0.143    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]), 
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
## 
## Residual deviance: 39114.11 on 21705 degrees of freedom
## 
## Log-likelihood: -19557.06 on 21705 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##      Alc      Dep      Gen 
## 1.063879 0.846121 1.046404
#Non-proportional odds model
fit.vgam2<-vglm(as.ordered(Hlth)~Alc+Dep+Gen,BRFSS19,
                weights =llcpwt/mean(llcpwt, na.rm=T),
                family=cumulative(parallel = F, reverse = T))
summary(fit.vgam2)
## 
## Call:
## vglm(formula = as.ordered(Hlth) ~ Alc + Dep + Gen, family = cumulative(parallel = F, 
##     reverse = T), data = BRFSS19, weights = llcpwt/mean(llcpwt, 
##     na.rm = T))
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  0.58151    0.02806  20.722  < 2e-16 ***
## (Intercept):2  0.41852    0.02745  15.249  < 2e-16 ***
## (Intercept):3 -0.09141    0.02686  -3.403 0.000665 ***
## (Intercept):4 -1.69058    0.03761 -44.946  < 2e-16 ***
## Alc:1          0.25560    0.04570   5.592 2.24e-08 ***
## Alc:2          0.20454    0.04416   4.632 3.62e-06 ***
## Alc:3         -0.01526    0.04262  -0.358 0.720300    
## Alc:4         -0.16925    0.06271  -2.699 0.006956 ** 
## Dep:1         -0.01249    0.04878  -0.256 0.797951    
## Dep:2         -0.07814    0.04726  -1.653 0.098286 .  
## Dep:3         -0.24267    0.04646  -5.223 1.76e-07 ***
## Dep:4         -0.39286    0.07179  -5.472 4.45e-08 ***
## Gen:1          0.01513    0.03603   0.420 0.674623    
## Gen:2          0.02620    0.03518   0.745 0.456417    
## Gen:3          0.05726    0.03433   1.668 0.095358 .  
## Gen:4          0.09319    0.04824   1.932 0.053416 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]), 
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
## 
## Residual deviance: 38940.94 on 21696 degrees of freedom
## 
## Log-likelihood: -19470.47 on 21696 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##     Alc:1     Alc:2     Alc:3     Alc:4     Dep:1     Dep:2     Dep:3     Dep:4 
## 1.2912368 1.2269641 0.9848554 0.8442942 0.9875888 0.9248389 0.7845318 0.6751241 
##     Gen:1     Gen:2     Gen:3     Gen:4 
## 1.0152422 1.0265449 1.0589323 1.0976653
round(exp(coef(fit.vgam2)), 3)
## (Intercept):1 (Intercept):2 (Intercept):3 (Intercept):4         Alc:1 
##         1.789         1.520         0.913         0.184         1.291 
##         Alc:2         Alc:3         Alc:4         Dep:1         Dep:2 
##         1.227         0.985         0.844         0.988         0.925 
##         Dep:3         Dep:4         Gen:1         Gen:2         Gen:3 
##         0.785         0.675         1.015         1.027         1.059 
##         Gen:4 
##         1.098
round(exp(confint(fit.vgam2)), 3)
##               2.5 % 97.5 %
## (Intercept):1 1.693  1.890
## (Intercept):2 1.440  1.604
## (Intercept):3 0.866  0.962
## (Intercept):4 0.171  0.199
## Alc:1         1.181  1.412
## Alc:2         1.125  1.338
## Alc:3         0.906  1.071
## Alc:4         0.747  0.955
## Dep:1         0.898  1.087
## Dep:2         0.843  1.015
## Dep:3         0.716  0.859
## Dep:4         0.587  0.777
## Gen:1         0.946  1.090
## Gen:2         0.958  1.100
## Gen:3         0.990  1.133
## Gen:4         0.999  1.207
AIC(fit.vgam)
## [1] 39128.11
AIC(fit.vgam2)
## [1] 38972.94
#Multinomial Model
mfit<-vglm(Hlth~Alc+Dep+Gen,
           family=multinomial(refLevel = 4),
           data=BRFSS19,
           weights=llcpwt/mean(llcpwt, na.rm=T))

summary(mfit)
## 
## Call:
## vglm(formula = Hlth ~ Alc + Dep + Gen, family = multinomial(refLevel = 4), 
##     data = BRFSS19, weights = llcpwt/mean(llcpwt, na.rm = T))
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  0.10901    0.03274   3.329 0.000871 ***
## (Intercept):2 -2.11543    0.07049 -30.010  < 2e-16 ***
## (Intercept):3 -0.92843    0.04350 -21.343  < 2e-16 ***
## (Intercept):4 -0.72145    0.04198 -17.187  < 2e-16 ***
## Alc:1         -0.22305    0.05280  -4.224 2.40e-05 ***
## Alc:2          0.13357    0.10443   1.279 0.200902    
## Alc:3          0.27173    0.06381   4.259 2.06e-05 ***
## Alc:4         -0.19724    0.06904  -2.857 0.004279 ** 
## Dep:1          0.05911    0.05696   1.038 0.299331    
## Dep:2          0.38907    0.10927   3.561 0.000370 ***
## Dep:3          0.29060    0.06901   4.211 2.54e-05 ***
## Dep:4         -0.29340    0.07878  -3.724 0.000196 ***
## Gen:1         -0.01532    0.04204  -0.364 0.715491    
## Gen:2         -0.10494    0.08936  -1.174 0.240287    
## Gen:3         -0.06820    0.05476  -1.245 0.212981    
## Gen:4          0.06627    0.05382   1.231 0.218182    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,4]), log(mu[,2]/mu[,4]), 
## log(mu[,3]/mu[,4]), log(mu[,5]/mu[,4])
## 
## Residual deviance: 38943.31 on 21696 degrees of freedom
## 
## Log-likelihood: -19471.66 on 21696 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Reference group is level  4  of the response
round(exp(coef(mfit)), 3)
## (Intercept):1 (Intercept):2 (Intercept):3 (Intercept):4         Alc:1 
##         1.115         0.121         0.395         0.486         0.800 
##         Alc:2         Alc:3         Alc:4         Dep:1         Dep:2 
##         1.143         1.312         0.821         1.061         1.476 
##         Dep:3         Dep:4         Gen:1         Gen:2         Gen:3 
##         1.337         0.746         0.985         0.900         0.934 
##         Gen:4 
##         1.069
round(exp(confint(mfit)), 3)
##               2.5 % 97.5 %
## (Intercept):1 1.046  1.189
## (Intercept):2 0.105  0.138
## (Intercept):3 0.363  0.430
## (Intercept):4 0.448  0.528
## Alc:1         0.721  0.887
## Alc:2         0.931  1.403
## Alc:3         1.158  1.487
## Alc:4         0.717  0.940
## Dep:1         0.949  1.186
## Dep:2         1.191  1.828
## Dep:3         1.168  1.531
## Dep:4         0.639  0.870
## Gen:1         0.907  1.069
## Gen:2         0.756  1.073
## Gen:3         0.839  1.040
## Gen:4         0.962  1.187
#Fitted Probabilities
dat<-expand.grid(Alc=levels(as.factor(BRFSS19$Alc)),Dep=levels(as.factor(BRFSS19$Dep)),Gen=levels(as.factor(BRFSS19$Gen)))
fitm<-predict(mfit, newdat=dat,type="response")
dat<-cbind(dat, round(fitm,3))
head(dat, n=20)
##   Alc Dep Gen     3     0     1     2     4
## 1   0   0   0 0.358 0.039 0.127 0.321 0.156
## 2   1   0   0 0.303 0.047 0.176 0.339 0.135
## 3   0   1   0 0.364 0.055 0.163 0.308 0.111
## 4   1   1   0 0.301 0.065 0.221 0.318 0.095
## 5   0   0   1 0.355 0.035 0.119 0.323 0.168
## 6   1   0   1 0.302 0.043 0.166 0.343 0.146
## 7   0   1   1 0.363 0.050 0.154 0.312 0.121
## 8   1   1   1 0.303 0.059 0.210 0.325 0.103
fitted.ord<-round(predict(fit.vgam, newdat=dat[,1:3], type="response"), 3)

dat<-cbind(dat, fitted.ord)

names(dat)<-c(names(dat)[1:3], "mp1", "mp2", "mp3", "op1", "op2", "op3")

head(dat, n=20)
##   Alc Dep Gen   mp1   mp2   mp3   op1   op2   op3    NA    NA    NA    NA
## 1   0   0   0 0.358 0.039 0.127 0.321 0.156 0.344 0.042 0.143 0.323 0.147
## 2   1   0   0 0.303 0.047 0.176 0.339 0.135 0.330 0.041 0.142 0.331 0.155
## 3   0   1   0 0.364 0.055 0.163 0.308 0.111 0.383 0.044 0.144 0.302 0.128
## 4   1   1   0 0.301 0.065 0.221 0.318 0.095 0.368 0.043 0.144 0.310 0.135
## 5   0   0   1 0.355 0.035 0.119 0.323 0.168 0.334 0.042 0.142 0.329 0.153
## 6   1   0   1 0.302 0.043 0.166 0.343 0.146 0.320 0.041 0.141 0.336 0.161
## 7   0   1   1 0.363 0.050 0.154 0.312 0.121 0.372 0.043 0.144 0.308 0.133
## 8   1   1   1 0.303 0.059 0.210 0.325 0.103 0.358 0.043 0.144 0.316 0.140
AIC(fit.vgam) #proportional odds
## [1] 39128.11
AIC(fit.vgam2) #cumulative logit, non proportional
## [1] 38972.94
AIC(mfit) #multinomial
## [1] 38975.31
  1. General Health is the ordinal variable that I will be predicting. I recoded the original variable to reflect Poor = 0, Fair = 1, Good = 2, Very Good = 3, Excellent = 4, and Don’t know/Not Sure and Refused as missing. 2) My research question is “Do growing up with a problem drinker or alcoholic or growing up with anyone who was depressed, mentally ill, or suicidal, and gender predict general health status?” 3) After fitting the Ordinal Regression model, the assumptions of the proportional odds model were assessed. 3a) Results showed that the assumptions were not met. 3b) Plotting the coefficients of each of the four models to visually inspect if they were approximately the same values. 3c) In comparing which model, Proportional odds or Multinomial, fits the data better, the Nonproportional odds model fit slightly better that the Multinomial model based on the AIC. 4a) Results showed that respondents who grew up with a problem drinker or alcoholic are more likely to transition from poor to fair health, and from fair to good health, and less likely to transition from very good health to excellent health. Respondents who grew up with anyone who was depressed, mentally ill, or suicidal, are less likely to transition from good health to very good and very good health to excellent health. There were no marginal effects for gender.