For the following task I am interested in looking at Alcohol Consumption as an ordinal outcome with regards to Education, Race and Ethnicity, and Cohort.

Research Questions: I am interested in answering the following couple of questoins: 1. Does alcohol consumption vary with increased educational attainment? 2. Does alcohol consumption vary across racial and ethinic population? 3. Does alcohol consumption vary across agr groups?

Recoding Variables:

I have recoded the Alcohol consumption variable from the NHIS 2001-2006 dataset into 5 orders: Never Drinker (1), Former Drinker (2), Light Drinkers (3), Moderate Drinker (4), and Heavy Drinkers.

Education, as one of the moderators, will be categorized into three groups: High School or Less, Some College, and College. Race/Ethnicity, as another moderator, will be categorized into four groups: NHWhite, NHBlack, Hispanic, and NHother. Age is recoded to break into the following age groups: 18-24, 24-35, 35-45, 45-65, 65+.

Loading Libraries

library(readr)
library(car)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(survival)
library(cmprsk)
library(questionr)
library(haven)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:Matrix':
## 
##     expand

Reading data

mydata <- read_dta("C://Users/munta/Google Drive/NHIS/nhis_00005.dta")

Taking a Subset of the Population and Recoding Variables

mydata$yob<-mydata$year-mydata$age

mydata<-subset (mydata, yob%in%c(1940:1985) & year%in%c(2001:2006))

mydata$cohort<-car::recode(mydata$yob, recodes="1940:1963='early'; 1964:1985='late'; else=NA", as.factor.result = )

#Taking a subset of the total observations
sub<-subset(mydata, mydata$mortelig==1&is.na(mydata$racenew)==F)

samps<-sample(1:length(sub$psu), size = 20000, replace = F)
sub<-sub[samps,] #Using a subset of 20000 samples

#Age
sub$d.age<-ifelse(sub$mortstat==1, sub$mortdody-(sub$year-sub$age) ,
                  ifelse(sub$mortstat==2, 2006-(sub$year-sub$age), NA))
#Mortality Status
sub$d.event<-ifelse(sub$mortstat==1,1,0)
sub$timetodeath<-ifelse(sub$mortstat ==1, sub$mortdody-sub$year, 2006 - sub$year )
sub$d5yr<-ifelse(sub$timetodeath<=5&sub$mortstat==1, 1,0)

#Mortalist Status: Dead or Alive
sub$mortstat<-recode(sub$mortstat, recodes= "1='Dead'; 2='Alive'; 9=NA", as.factor.result=T)

#Marital Status
sub$married<-recode(sub$marstat, recodes="00=NA; 10:13='married'; 20:40='separated'; 50='nevermarried'; 99=NA", as.factor.result=T )
sub$sep<-ifelse(sub$married=='separated',1,0)
sub$nm<-ifelse(sub$married=='nevermarried',1,0)

#Smoking Status
sub$smoke<-recode(sub$smokestatus2, recodes="00=NA; 10:13='CurrentSmoker'; 20='FormerSmoker'; 30='NeverSmoked'; else=NA", as.factor.result=T)
sub$smoke<-relevel(sub$smoke, ref = "NeverSmoked")

#Drinking Status
sub$alcohol<-recode(sub$alcstat2, recodes= "00=NA; 10=1; 20:23=2; 31:32=3; 35=3; 33=4; 34=5; 97:99=NA", as.factor.result=T)
sub$alcohol<-relevel(sub$alcohol, ref = "5")

sub$male<-ifelse(sub$sex==1,1,0)

sub$mwt<-sub$mortwt/mean(sub$mortwt, na.rm=T)

#Age cut into intervals
sub$agere<-cut(sub$age, breaks=c(15,18,24,35,45,65,85))

#Education 
sub$moredu<-recode(sub$educrec2, recodes="00=NA; 10:42='High School or Less'; 50:53='Some College'; 54:60='College'; else=NA", as.factor.result=T)

sub$hs<-ifelse(sub$moredu=='hs or less',1,0)
sub$scol1<-ifelse(sub$moredu=='some coll',1,0)

#Race/Ethnicity
sub$race<-recode(sub$racenew, recodes ="10='wht'; 20 ='blk'; 30:61='other'; 97:99=NA", as.factor.result=T)
sub$black<-ifelse(sub$race=='blk',1,0)
sub$oth<-ifelse(sub$race=='other',1,0)

sub$hisp<-recode(sub$hispyn, recodes="1=0; 2=1; else=NA")

sub$race_eth[sub$hisp == 0 & sub$race=="wht"]<-"NHWhite"
## Warning: Unknown or uninitialised column: 'race_eth'.
sub$race_eth[sub$hisp == 0 & sub$race=="blk"]<-"NHBlack"
sub$race_eth[sub$hisp == 0 & sub$race=="other"]<-"NHOther"
sub$race_eth[sub$hisp == 1 ]<-"Hispanic"
sub$race_eth[is.na(sub$hisp) ==T | is.na(sub$race)==T]<-NA

Analysis

Survey Design

sub$alconsum<-recode(sub$alcohol, recodes= "10=1; 20:23=2; 31:32=3; 35=3; 33=4; 34=5; 97:99=NA", as.factor.result=T)

sub$alconsum<-relevel(sub$alconsum, ref = "5")

sub$alcstat<-car::recode(sub$alcohol,
                                recodes="10=1; 20:23=2; 31:32=3; 35=3; 33=4; 34=5; 97:99=NA", as.factor.result = F)

options(survey.lonely.psu="adjust")

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Keeping only clean cases
sub2<-sub%>%
  select(alconsum, alcstat, cohort, mortstat, agere,race_eth, married, moredu, race, black, hisp, oth, smoke, mortwt, strata, psu) %>%
  filter( complete.cases(.))

#Survey Design
options(survey.lonely.psu="adjust")
des<-svydesign(ids=~psu, strata=~strata, weights = ~mortwt, data=sub[sub$mortwt>0,], nest=T)

Ordinal Regression example

Proportional Odds Model

#Fitting three nested models for level of alcohol consumption
fit.solr1<-svyolr(alconsum~race_eth+moredu+agere,des)
summary(fit.solr1)
## Call:
## svyolr(alconsum ~ race_eth + moredu + agere, des)
## 
## Coefficients:
##                                Value Std. Error    t value
## race_ethNHBlack           -0.2650577 0.07348928  -3.606753
## race_ethNHOther           -0.2922584 0.10433210  -2.801232
## race_ethNHWhite            0.3158499 0.05857141   5.392562
## moreduHigh School or Less -0.6201762 0.05627530 -11.020398
## moreduSome College        -0.2755631 0.05929932  -4.646985
## agere(18,24]               0.8434941 0.21219385   3.975111
## agere(24,35]               1.0863596 0.20908585   5.195759
## agere(35,45]               1.0168974 0.21293909   4.775532
## agere(45,65]               0.7751163 0.20869449   3.714120
## agere(65,85]               0.3497087 0.31736073   1.101928
## 
## Intercepts:
##     Value    Std. Error t value 
## 5|1  -2.0470   0.2144    -9.5486
## 1|2  -0.3100   0.2184    -1.4194
## 2|3   0.2931   0.2189     1.3387
## 3|4   2.5316   0.2181    11.6054
## (10881 observations deleted due to missingness)
#Calculating the AIC 
fit.solr1$deviance+2*length(fit.solr1$coefficients)
## [1] 24890.69

We can see the results from the proportional odds model above. In this case, we see that non-Hispanic Whites have higher odds being heavy drinkers, compared to Hispanic. We also see that as education increases, the odds of being a heavy drinker decreases. As age increases, the odds of being heavy drinker decreases, compared to those <35 years of age.

Examining the proportional odds assumption by fitting logits for each change

ex1<-svyglm(I(alcstat>1)~race_eth+moredu+agere,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
ex2<-svyglm(I(alcstat>2)~race_eth+moredu+agere,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

If the proportional odds is correct, and the assumption is ok, then all these should be “approximately” the same values.

plot(coef(ex1)[-1], ylim=c(-3, 3), type="l",xaxt="n",
     ylab="Beta", main=c("Comparison of betas for", " proportional odds assumption"))
lines(coef(ex2)[-1], col=1, lty=2) 
axis(side=1, at=1:12, labels=F)
text(x=1:12, y=-4,  srt = 90, pos = 1, xpd = TRUE,cex=.8,
     labels = c( "Hispanic", "Black","MultRace" ,"Other",
                 "PrimarySch","SomeHS", "SomeColl", "CollGrad",
                 "Age 24-39","Age 39-59" ,"Age 59-79", "Age 80+" ))
legend("bottomright",col=c(1,1),lty=c(1,2), legend=c(">1", ">2"))

lines(coef(fit.solr1)[c(-13:-16)], col=4, lwd=3)

Based on this, the effects appear to be pretty similar across transitions. However, the increasing education and increase age show variations between the two models. The older ages have non-proportional effect on being heavy drinkers.

Printing the odds ratios

round(exp(rbind(coef(ex1)[-1], coef(ex2)[-1])),3)
##      race_ethNHBlack race_ethNHOther race_ethNHWhite
## [1,]           0.849           0.868           2.458
## [2,]           0.809           0.837           1.930
##      moreduHigh School or Less moreduSome College agere(18,24]
## [1,]                     0.611              0.915        3.994
## [2,]                     0.460              0.712        3.081
##      agere(24,35] agere(35,45] agere(45,65] agere(65,85]
## [1,]        5.963        6.262        5.128        3.507
## [2,]        3.323        2.661        1.795        0.599

The non-Hispanic WHites are 2 times more likely to being heavy drinkers compared to the Hispanics. The oldest age category is only 30% likely to being heavy drinkers. The effects are pretty consistent across transitions, as we have seen in the plot.

Non proportional assumption

Proportional odds

library(VGAM)
## Loading required package: stats4
## Loading required package: splines
## 
## Attaching package: 'VGAM'
## The following object is masked from 'package:tidyr':
## 
##     fill
## The following object is masked from 'package:survey':
## 
##     calibrate
## The following object is masked from 'package:car':
## 
##     logit
fit.vgam<-vglm(as.ordered(alconsum)~race_eth+moredu+agere,
               sub, weights =mortwt/mean(mortwt, na.rm=T),
               family=cumulative(parallel = T, reverse = T))  #<-parallel = T == proportional odds
summary(fit.vgam)
## 
## Call:
## vglm(formula = as.ordered(alconsum) ~ race_eth + moredu + agere, 
##     family = cumulative(parallel = T, reverse = T), data = sub, 
##     weights = mortwt/mean(mortwt, na.rm = T))
## 
## 
## Pearson residuals:
##                    Min      1Q  Median      3Q   Max
## logit(P[Y>=2]) -10.802  0.1221  0.1551  0.2656 1.237
## logit(P[Y>=3])  -4.282 -0.6102  0.2834  0.3866 3.098
## logit(P[Y>=4])  -4.041 -0.5630  0.3199  0.7648 2.028
## logit(P[Y>=5])  -1.641 -0.5555 -0.2892 -0.1427 5.151
## 
## Coefficients: 
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1              2.04693    0.22956   8.917  < 2e-16 ***
## (Intercept):2              0.30998    0.22764   1.362 0.173286    
## (Intercept):3             -0.29318    0.22767  -1.288 0.197837    
## (Intercept):4             -2.53167    0.22886 -11.062  < 2e-16 ***
## race_ethNHBlack           -0.26510    0.07913  -3.350 0.000808 ***
## race_ethNHOther           -0.29230    0.09854  -2.966 0.003014 ** 
## race_ethNHWhite            0.31586    0.06367   4.961 7.01e-07 ***
## moreduHigh School or Less -0.62025    0.04954 -12.520  < 2e-16 ***
## moreduSome College        -0.27559    0.05114  -5.389 7.07e-08 ***
## agere(18,24]               0.84356    0.22167   3.805 0.000142 ***
## agere(24,35]               1.08645    0.21867   4.969 6.74e-07 ***
## agere(35,45]               1.01699    0.21856   4.653 3.27e-06 ***
## agere(45,65]               0.77520    0.21724   3.568 0.000359 ***
## agere(65,85]               0.34996    0.42395   0.825 0.409102    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  4 
## 
## Names of linear predictors: 
## logit(P[Y>=2]), logit(P[Y>=3]), logit(P[Y>=4]), logit(P[Y>=5])
## 
## Residual deviance: 25267.11 on 36058 degrees of freedom
## 
## Log-likelihood: -12633.56 on 36058 degrees of freedom
## 
## Number of iterations: 6 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## Exponentiated coefficients:
##           race_ethNHBlack           race_ethNHOther 
##                 0.7671302                 0.7465479 
##           race_ethNHWhite moreduHigh School or Less 
##                 1.3714364                 0.5378124 
##        moreduSome College              agere(18,24] 
##                 0.7591230                 2.3246168 
##              agere(24,35]              agere(35,45] 
##                 2.9637431                 2.7648694 
##              agere(45,65]              agere(65,85] 
##                 2.1710370                 1.4190071

As we can see above, the values do not seem to change much.

Nagelkerke R^2

Proportional odds model with nothing on the right side; just fitting the intercept.

fit.null<-vglm(as.ordered(alcstat)~1,
sub, weights =mortwt/mean(mortwt, na.rm=T),
 family=cumulative(parallel = T, reverse = T))
(1-exp((fit.vgam@criterion$deviance - fit.null@criterion$deviance)/485742))/(1-exp(-fit.null@criterion$deviance/485742))
## [1] 0.02226013

It shows that the model only has a 2% fit.

Non-proportional odds

fit.vgam2<-vglm(as.ordered(alcstat)~race_eth+moredu+agere, sub,
                weights =mortwt/mean(mortwt, na.rm=T),
                family=cumulative(parallel = F, reverse = T))  #<-parallel = F == Nonproportional odds
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1

## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1

## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, :
## iterations terminated because half-step sizes are very small
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 =
## Xm2, : some quantities such as z, residuals, SEs may be inaccurate due to
## convergence at a half-step
## Warning in log(prob): NaNs produced
summary(fit.vgam2)
## Warning in matrix.power(wz, M = M, power = 0.5, fast = TRUE): Some weight
## matrices have negative eigenvalues. They will be assigned NAs
## Warning in matrix.power(wz, M = M, power = 0.5, fast = TRUE): Some weight
## matrices have negative eigenvalues. They will be assigned NAs
## 
## Call:
## vglm(formula = as.ordered(alcstat) ~ race_eth + moredu + agere, 
##     family = cumulative(parallel = F, reverse = T), data = sub, 
##     weights = mortwt/mean(mortwt, na.rm = T))
## 
## 
## Coefficients: 
##                               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept):1               -1.3262655  0.0310092  -42.770  < 2e-16 ***
## (Intercept):2               -0.2846078  0.0316130   -9.003  < 2e-16 ***
## (Intercept):3               -1.8688656  0.3025314   -6.177 6.52e-10 ***
## (Intercept):4               -3.7824645  0.5133664   -7.368 1.73e-13 ***
## race_ethNHBlack:1           -0.2002879  0.0006522 -307.105  < 2e-16 ***
## race_ethNHBlack:2           -0.2257457  0.0002728 -827.394  < 2e-16 ***
## race_ethNHBlack:3           -0.2122981  0.1110372   -1.912 0.055882 .  
## race_ethNHBlack:4            0.1903691  0.2160688    0.881 0.378286    
## race_ethNHOther:1           -0.1200638  0.0007311 -164.223  < 2e-16 ***
## race_ethNHOther:2           -0.1574974  0.0004194 -375.544  < 2e-16 ***
## race_ethNHOther:3            0.0153573  0.1305229    0.118 0.906337    
## race_ethNHOther:4            0.5673469  0.2428958    2.336 0.019504 *  
## race_ethNHWhite:1            0.9566197  0.0038156  250.710  < 2e-16 ***
## race_ethNHWhite:2            0.6507342  0.0048272  134.806  < 2e-16 ***
## race_ethNHWhite:3            0.4854335  0.0843325    5.756 8.60e-09 ***
## race_ethNHWhite:4            0.8815734  0.1717272    5.134 2.84e-07 ***
## moreduHigh School or Less:1 -0.4588116  0.0370941  -12.369  < 2e-16 ***
## moreduHigh School or Less:2 -0.7226953  0.0354336  -20.396  < 2e-16 ***
## moreduHigh School or Less:3 -0.2270565  0.0614195   -3.697 0.000218 ***
## moreduHigh School or Less:4  0.3661674  0.1094777    3.345 0.000824 ***
## moreduSome College:1        -0.0587282  0.0354886   -1.655 0.097955 .  
## moreduSome College:2        -0.2879649  0.0347236   -8.293  < 2e-16 ***
## moreduSome College:3        -0.1382891  0.0626647   -2.207 0.027327 *  
## moreduSome College:4         0.2261806  0.1138322    1.987 0.046926 *  
## agere(18,24]:1               1.9487899  0.0432476   45.061  < 2e-16 ***
## agere(18,24]:2               1.1691696  0.0450602   25.947  < 2e-16 ***
## agere(18,24]:3               0.7911532  0.2932810    2.698 0.006984 ** 
## agere(18,24]:4               0.8986635  0.4825459    1.862 0.062556 .  
## agere(24,35]:1               2.3567034  0.0507426   46.444  < 2e-16 ***
## agere(24,35]:2               1.2321190  0.0469618   26.237  < 2e-16 ***
## agere(24,35]:3               0.4071873  0.2913973    1.397 0.162305    
## agere(24,35]:4               0.1127314  0.4833261    0.233 0.815574    
## agere(35,45]:1               2.3953900  0.0535844   44.703  < 2e-16 ***
## agere(35,45]:2               1.0350586  0.0459830   22.510  < 2e-16 ***
## agere(35,45]:3               0.3844393  0.2912771    1.320 0.186888    
## agere(35,45]:4               0.0407072  0.4832851    0.084 0.932873    
## agere(45,65]:1               2.2154875  0.0430381   51.477  < 2e-16 ***
## agere(45,65]:2               0.6557336  0.0363220   18.053  < 2e-16 ***
## agere(45,65]:3               0.2444691  0.2899511    0.843 0.399151    
## agere(45,65]:4               0.0568766  0.4800968    0.118 0.905696    
## agere(65,85]:1               1.8398814  0.4721473    3.897 9.75e-05 ***
## agere(65,85]:2              -0.4822111  0.4256389   -1.133 0.257251    
## agere(65,85]:3              -0.7333592  0.7552446   -0.971 0.331537    
## agere(65,85]:4              -1.0088530  1.4145006   -0.713 0.475708    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  4 
## 
## Names of linear predictors: 
## logit(P[Y>=2]), logit(P[Y>=3]), logit(P[Y>=4]), logit(P[Y>=5])
## 
## Residual deviance: 27444.26 on 36028 degrees of freedom
## 
## Log-likelihood: NA on 36028 degrees of freedom
## 
## Number of iterations: 2 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):2', '(Intercept):3', '(Intercept):4', 'race_ethNHBlack:2', 'race_ethNHOther:2', 'race_ethNHWhite:1', 'moreduHigh School or Less:2', 'moreduSome College:2', 'agere(18,24]:1'
## 
## Exponentiated coefficients:
##           race_ethNHBlack:1           race_ethNHBlack:2 
##                   0.8184951                   0.7979210 
##           race_ethNHBlack:3           race_ethNHBlack:4 
##                   0.8087235                   1.2096960 
##           race_ethNHOther:1           race_ethNHOther:2 
##                   0.8868639                   0.8542791 
##           race_ethNHOther:3           race_ethNHOther:4 
##                   1.0154759                   1.7635818 
##           race_ethNHWhite:1           race_ethNHWhite:2 
##                   2.6028830                   1.9169477 
##           race_ethNHWhite:3           race_ethNHWhite:4 
##                   1.6248793                   2.4146960 
## moreduHigh School or Less:1 moreduHigh School or Less:2 
##                   0.6320343                   0.4854421 
## moreduHigh School or Less:3 moreduHigh School or Less:4 
##                   0.7968757                   1.4421966 
##        moreduSome College:1        moreduSome College:2 
##                   0.9429630                   0.7497879 
##        moreduSome College:3        moreduSome College:4 
##                   0.8708469                   1.2538020 
##              agere(18,24]:1              agere(18,24]:2 
##                   7.0201876                   3.2193182 
##              agere(18,24]:3              agere(18,24]:4 
##                   2.2059388                   2.4563180 
##              agere(24,35]:1              agere(24,35]:2 
##                  10.5560948                   3.4284867 
##              agere(24,35]:3              agere(24,35]:4 
##                   1.5025855                   1.1193312 
##              agere(35,45]:1              agere(35,45]:2 
##                  10.9724767                   2.8152711 
##              agere(35,45]:3              agere(35,45]:4 
##                   1.4687906                   1.0415470 
##              agere(45,65]:1              agere(45,65]:2 
##                   9.1658765                   1.9265554 
##              agere(45,65]:3              agere(45,65]:4 
##                   1.2769433                   1.0585252 
##              agere(65,85]:1              agere(65,85]:2 
##                   6.2957914                   0.6174167 
##              agere(65,85]:3              agere(65,85]:4 
##                   0.4802929                   0.3646370
fit.null2<-vglm(as.ordered(alcstat)~1, sub, weights =mortwt/mean(mortwt, na.rm=T),
 family=cumulative(parallel = F, reverse = T))

(1-exp((fit.vgam2@criterion$deviance - fit.null2@criterion$deviance)/485742))/(1-exp(-fit.null2@criterion$deviance/485742))
## [1] -0.0643912

It shows a bette fit of 33 % because of the extra cofficients, even though it shows negative direction

AIC(fit.vgam)
## [1] 25295.11
AIC(fit.vgam2)
## [1] NaN

Based on the AIC, the proportional and nonproportional odds model cannot be compared.

Adjacent categories

fit.acat<-vglm(as.ordered(alcstat)~race_eth+moredu+agere,sub, 
               weights = mortwt/mean(mortwt, na.rm=T),
               family=acat(parallel = F, reverse = T))
summary(fit.acat)
## 
## Call:
## vglm(formula = as.ordered(alcstat) ~ race_eth + moredu + agere, 
##     family = acat(parallel = F, reverse = T), data = sub, weights = mortwt/mean(mortwt, 
##         na.rm = T))
## 
## 
## Pearson residuals:
##                         Min       1Q   Median      3Q   Max
## loge(P[Y=1]/P[Y=2])  -5.080 -0.32662 -0.21174 -0.1152 4.861
## loge(P[Y=2]/P[Y=3])  -1.891 -0.69518 -0.39380  0.6197 5.428
## loge(P[Y=3]/P[Y=4])  -4.808  0.09087  0.29916  0.6736 1.687
## loge(P[Y=4]/P[Y=5]) -10.174  0.04843  0.08892  0.1490 2.318
## 
## Coefficients: 
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1                17.80382  444.37700   0.040 0.968042    
## (Intercept):2               -17.00864  444.37703  -0.038 0.969468    
## (Intercept):3                 1.15384    0.51441   2.243 0.024894 *  
## (Intercept):4                 1.73003    0.71253   2.428 0.015181 *  
## race_ethNHBlack:1            -0.01186    0.14573  -0.081 0.935150    
## race_ethNHBlack:2             0.16864    0.14312   1.178 0.238680    
## race_ethNHBlack:3             0.39988    0.15069   2.654 0.007965 ** 
## race_ethNHBlack:4            -0.75004    0.28087  -2.670 0.007575 ** 
## race_ethNHOther:1             0.01838    0.18961   0.097 0.922791    
## race_ethNHOther:2             0.19050    0.18534   1.028 0.304010    
## race_ethNHOther:3             0.13888    0.17627   0.788 0.430755    
## race_ethNHOther:4            -1.00217    0.30983  -3.235 0.001218 ** 
## race_ethNHWhite:1            -0.75598    0.12131  -6.232 4.61e-10 ***
## race_ethNHWhite:2            -0.08041    0.11574  -0.695 0.487233    
## race_ethNHWhite:3            -0.05888    0.10927  -0.539 0.590007    
## race_ethNHWhite:4            -0.85488    0.21987  -3.888 0.000101 ***
## moreduHigh School or Less:1  -0.33231    0.10437  -3.184 0.001452 ** 
## moreduHigh School or Less:2   1.00473    0.09176  10.950  < 2e-16 ***
## moreduHigh School or Less:3   0.12030    0.07780   1.546 0.122018    
## moreduHigh School or Less:4  -0.73494    0.12972  -5.666 1.46e-08 ***
## moreduSome College:1         -0.43090    0.11270  -3.824 0.000132 ***
## moreduSome College:2          0.59515    0.09706   6.132 8.69e-10 ***
## moreduSome College:3          0.13533    0.07725   1.752 0.079814 .  
## moreduSome College:4         -0.43969    0.13343  -3.295 0.000984 ***
## agere(18,24]:1              -15.08964  444.37700  -0.034 0.972912    
## agere(18,24]:2               13.89921  444.37704   0.031 0.975048    
## agere(18,24]:3               -0.34297    0.50580  -0.678 0.497720    
## agere(18,24]:4                0.03157    0.67809   0.047 0.962864    
## agere(24,35]:1              -16.20905  444.37698  -0.036 0.970903    
## agere(24,35]:2               14.54524  444.37701   0.033 0.973889    
## agere(24,35]:3               -0.06329    0.50264  -0.126 0.899801    
## agere(24,35]:4                0.49208    0.67656   0.727 0.467023    
## agere(35,45]:1              -16.74126  444.37698  -0.038 0.969948    
## agere(35,45]:2               15.12575  444.37701   0.034 0.972847    
## agere(35,45]:3               -0.14329    0.50254  -0.285 0.775547    
## agere(35,45]:4                0.55887    0.67636   0.826 0.408634    
## agere(45,65]:1              -16.92591  444.37698  -0.038 0.969617    
## agere(45,65]:2               15.62092  444.37701   0.035 0.971958    
## agere(45,65]:3               -0.10948    0.50170  -0.218 0.827254    
## agere(45,65]:4                0.36369    0.67362   0.540 0.589266    
## agere(65,85]:1              -17.33779  444.37728  -0.039 0.968878    
## agere(65,85]:2               16.61018  444.37727   0.037 0.970183    
## agere(65,85]:3                0.91542    1.15909   0.790 0.429658    
## agere(65,85]:4               13.24344  582.88558   0.023 0.981873    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  4 
## 
## Names of linear predictors: 
## loge(P[Y=1]/P[Y=2]), loge(P[Y=2]/P[Y=3]), loge(P[Y=3]/P[Y=4]), loge(P[Y=4]/P[Y=5])
## 
## Residual deviance: 24600.34 on 36028 degrees of freedom
## 
## Log-likelihood: -12300.17 on 36028 degrees of freedom
## 
## Number of iterations: 15

The above output shows that, education has the most notable changes in the results compared to results above. The results do not change much for race and ethnicity as well as for the increasing age groups. It means, the drinking habit do not change much between the adjacent categories when it comes to increase in drinking habit between different race and ethnic groups.

Multinomial Model

mfit<-vglm(alcstat~race_eth+moredu+agere,
           multinomial(refLevel = 1),sub,
           weights=mortwt/mean(mortwt, na.rm=T))

summary(mfit)
## 
## Call:
## vglm(formula = alcstat ~ race_eth + moredu + agere, family = multinomial(refLevel = 1), 
##     data = sub, weights = mortwt/mean(mortwt, na.rm = T))
## 
## 
## Pearson residuals:
##                       Min      1Q  Median       3Q    Max
## log(mu[,2]/mu[,1]) -2.342 -0.3755 -0.1915 -0.10077  7.381
## log(mu[,3]/mu[,1]) -3.661 -0.7389 -0.4190  0.97312  2.631
## log(mu[,4]/mu[,1]) -2.683 -0.4018 -0.2310 -0.13174  5.400
## log(mu[,5]/mu[,1]) -2.081 -0.2310 -0.1291 -0.08046 10.618
## 
## Coefficients: 
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1               -17.80382  444.37701  -0.040 0.968042    
## (Intercept):2                -0.79518    0.30453  -2.611 0.009023 ** 
## (Intercept):3                -1.94902    0.49191  -3.962 7.43e-05 ***
## (Intercept):4                -3.67905    0.58169  -6.325 2.54e-10 ***
## race_ethNHBlack:1             0.01186    0.14573   0.081 0.935150    
## race_ethNHBlack:2            -0.15678    0.10339  -1.516 0.129425    
## race_ethNHBlack:3            -0.55666    0.15458  -3.601 0.000317 ***
## race_ethNHBlack:4             0.19338    0.25783   0.750 0.453236    
## race_ethNHOther:1            -0.01838    0.18961  -0.097 0.922791    
## race_ethNHOther:2            -0.20888    0.13028  -1.603 0.108877    
## race_ethNHOther:3            -0.34776    0.18319  -1.898 0.057645 .  
## race_ethNHOther:4             0.65441    0.28497   2.296 0.021651 *  
## race_ethNHWhite:1             0.75598    0.12131   6.232 4.61e-10 ***
## race_ethNHWhite:2             0.83639    0.08638   9.683  < 2e-16 ***
## race_ethNHWhite:3             0.89526    0.11622   7.703 1.33e-14 ***
## race_ethNHWhite:4             1.75014    0.20891   8.378  < 2e-16 ***
## moreduHigh School or Less:1   0.33231    0.10437   3.184 0.001452 ** 
## moreduHigh School or Less:2  -0.67243    0.07489  -8.979  < 2e-16 ***
## moreduHigh School or Less:3  -0.79273    0.09308  -8.517  < 2e-16 ***
## moreduHigh School or Less:4  -0.05778    0.12847  -0.450 0.652864    
## moreduSome College:1          0.43090    0.11270   3.824 0.000132 ***
## moreduSome College:2         -0.16425    0.07991  -2.056 0.039830 *  
## moreduSome College:3         -0.29958    0.09697  -3.089 0.002006 ** 
## moreduSome College:4          0.14011    0.13539   1.035 0.300711    
## agere(18,24]:1               15.08964  444.37701   0.034 0.972912    
## agere(18,24]:2                1.19043    0.29476   4.039 5.37e-05 ***
## agere(18,24]:3                1.53340    0.48082   3.189 0.001427 ** 
## agere(18,24]:4                1.50183    0.54162   2.773 0.005557 ** 
## agere(24,35]:1               16.20905  444.37699   0.036 0.970903    
## agere(24,35]:2                1.66381    0.29091   5.719 1.07e-08 ***
## agere(24,35]:3                1.72710    0.47745   3.617 0.000298 ***
## agere(24,35]:4                1.23502    0.54059   2.285 0.022338 *  
## agere(35,45]:1               16.74126  444.37699   0.038 0.969948    
## agere(35,45]:2                1.61551    0.29117   5.548 2.88e-08 ***
## agere(35,45]:3                1.75879    0.47740   3.684 0.000230 ***
## agere(35,45]:4                1.19992    0.54053   2.220 0.026426 *  
## agere(45,65]:1               16.92592  444.37699   0.038 0.969617    
## agere(45,65]:2                1.30500    0.28885   4.518 6.25e-06 ***
## agere(45,65]:3                1.41448    0.47556   2.974 0.002936 ** 
## agere(45,65]:4                1.05080    0.53657   1.958 0.050186 .  
## agere(65,85]:1               17.33779  444.37729   0.039 0.968878    
## agere(65,85]:2                0.72761    0.61499   1.183 0.236764    
## agere(65,85]:3               -0.18782    1.17406  -0.160 0.872903    
## agere(65,85]:4              -13.43125  582.88475  -0.023 0.981616    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  4 
## 
## Names of linear predictors: 
## log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1]), log(mu[,4]/mu[,1]), log(mu[,5]/mu[,1])
## 
## Residual deviance: 24600.34 on 36028 degrees of freedom
## 
## Log-likelihood: -12300.17 on 36028 degrees of freedom
## 
## Number of iterations: 15 
## 
## Reference group is level  1  of the response

The odds ratios show better results than the ordinal regression example. Also, the beta’s show that non-Hispanic Whites are significantly more like to be heavy drinkers than the Hispanics. The impact of increasing education on drinking habit is also significant, as well as the impact of lower age groups.

Calculating the odds ratios and confidence intervals

round(exp(coef(mfit)), 3)
##               (Intercept):1               (Intercept):2 
##                       0.000                       0.452 
##               (Intercept):3               (Intercept):4 
##                       0.142                       0.025 
##           race_ethNHBlack:1           race_ethNHBlack:2 
##                       1.012                       0.855 
##           race_ethNHBlack:3           race_ethNHBlack:4 
##                       0.573                       1.213 
##           race_ethNHOther:1           race_ethNHOther:2 
##                       0.982                       0.811 
##           race_ethNHOther:3           race_ethNHOther:4 
##                       0.706                       1.924 
##           race_ethNHWhite:1           race_ethNHWhite:2 
##                       2.130                       2.308 
##           race_ethNHWhite:3           race_ethNHWhite:4 
##                       2.448                       5.755 
## moreduHigh School or Less:1 moreduHigh School or Less:2 
##                       1.394                       0.510 
## moreduHigh School or Less:3 moreduHigh School or Less:4 
##                       0.453                       0.944 
##        moreduSome College:1        moreduSome College:2 
##                       1.539                       0.849 
##        moreduSome College:3        moreduSome College:4 
##                       0.741                       1.150 
##              agere(18,24]:1              agere(18,24]:2 
##                 3575585.493                       3.289 
##              agere(18,24]:3              agere(18,24]:4 
##                       4.634                       4.490 
##              agere(24,35]:1              agere(24,35]:2 
##                10952221.572                       5.279 
##              agere(24,35]:3              agere(24,35]:4 
##                       5.624                       3.438 
##              agere(35,45]:1              agere(35,45]:2 
##                18648183.902                       5.030 
##              agere(35,45]:3              agere(35,45]:4 
##                       5.805                       3.320 
##              agere(45,65]:1              agere(45,65]:2 
##                22430115.179                       3.688 
##              agere(45,65]:3              agere(45,65]:4 
##                       4.114                       2.860 
##              agere(65,85]:1              agere(65,85]:2 
##                33861496.763                       2.070 
##              agere(65,85]:3              agere(65,85]:4 
##                       0.829                       0.000
round(exp(confint(mfit)), 3)
##                             2.5 % 97.5 %
## (Intercept):1               0.000    Inf
## (Intercept):2               0.249  0.820
## (Intercept):3               0.054  0.373
## (Intercept):4               0.008  0.079
## race_ethNHBlack:1           0.761  1.346
## race_ethNHBlack:2           0.698  1.047
## race_ethNHBlack:3           0.423  0.776
## race_ethNHBlack:4           0.732  2.011
## race_ethNHOther:1           0.677  1.424
## race_ethNHOther:2           0.629  1.048
## race_ethNHOther:3           0.493  1.011
## race_ethNHOther:4           1.101  3.363
## race_ethNHWhite:1           1.679  2.701
## race_ethNHWhite:2           1.949  2.734
## race_ethNHWhite:3           1.949  3.074
## race_ethNHWhite:4           3.822  8.668
## moreduHigh School or Less:1 1.136  1.711
## moreduHigh School or Less:2 0.441  0.591
## moreduHigh School or Less:3 0.377  0.543
## moreduHigh School or Less:4 0.734  1.214
## moreduSome College:1        1.234  1.919
## moreduSome College:2        0.726  0.992
## moreduSome College:3        0.613  0.896
## moreduSome College:4        0.882  1.500
## agere(18,24]:1              0.000    Inf
## agere(18,24]:2              1.845  5.860
## agere(18,24]:3              1.806 11.891
## agere(18,24]:4              1.553 12.980
## agere(24,35]:1              0.000    Inf
## agere(24,35]:2              2.985  9.337
## agere(24,35]:3              2.206 14.338
## agere(24,35]:4              1.192  9.920
## agere(35,45]:1              0.000    Inf
## agere(35,45]:2              2.843  8.901
## agere(35,45]:3              2.278 14.798
## agere(35,45]:4              1.151  9.577
## agere(45,65]:1              0.000    Inf
## agere(45,65]:2              2.094  6.496
## agere(45,65]:3              1.620 10.450
## agere(45,65]:4              0.999  8.186
## agere(65,85]:1              0.000    Inf
## agere(65,85]:2              0.620  6.910
## agere(65,85]:3              0.083  8.276
## agere(65,85]:4              0.000    Inf

In this case, non-hispanic Whites are more likely to be heavy drinkers compared to Hispanics. Non-Hispanics Blakcs are also comparatively heavy drinkers than the Hispanics. In terms of education, those with college education are less likely to be heavy drinkers. As for the increasing age, people tend to show less drinking habit compared to the younger age groups.