library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(haven)
NSDUH_2019 <- read_sav("NSDUH_2019.SAV")
View(NSDUH_2019)
## marital status
NSDUH_2019$marst<-Recode(NSDUH_2019$irmarit, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; else=NA", as.factor=T)
NSDUH_2019$marst<-relevel(NSDUH_2019$marst, ref='married')

## education recodes
NSDUH_2019$educ<-Recode(NSDUH_2019$ireduhighst2, recodes="1:7='LssThnHgh'; 8='highschool'; 9='someCollege'; 10='associates'; 11='colgrad';else=NA", as.factor=T)
NSDUH_2019$educ<-relevel(NSDUH_2019$educ, ref='colgrad')

## sexuality recodes
NSDUH_2019$sexuality<-Recode(NSDUH_2019$sexident, recodes="1='Heterosexual'; 2='Les/Gay'; 3='Bisexual';else=NA", as.factor=T)
NSDUH_2019$sexuality<-relevel(NSDUH_2019$sexuality, ref='Heterosexual')

## gender recodes
NSDUH_2019$male<-as.factor(ifelse(NSDUH_2019$irsex==1, "Male", "Female"))

## Race recoded items
NSDUH_2019$black<-Recode(NSDUH_2019$newrace2, recodes="2=1; 9=NA; else=0")
NSDUH_2019$white<-Recode(NSDUH_2019$newrace2, recodes="1=1; 9=NA; else=0")
NSDUH_2019$other<-Recode(NSDUH_2019$newrace2, recodes="3:4=1; 9=NA; else=0")
NSDUH_2019$mult_race<-Recode(NSDUH_2019$newrace2, recodes="6=1; 9=NA; else=0")
NSDUH_2019$asian<-Recode(NSDUH_2019$newrace2, recodes="5=1; 9=NA; else=0")
NSDUH_2019$hispanic<-Recode(NSDUH_2019$newrace2, recodes="7=1; 9=NA; else=0")
NSDUH_2019$race_eth<-Recode(NSDUH_2019$newrace2,
                          recodes="1='white'; 2='black'; 3='other'; 4='asian'; 5='mult_race'; 6='hispanic'; else=NA",
                          as.factor = T)
NSDUH_2019$race_eth<-relevel(NSDUH_2019$race_eth, ref='white')
NSDUH_2019$lst_alc_use2<-Recode(NSDUH_2019$iralcrc, recodes="1='last 30days'; 2='12>1month'; 3='>12months'; else=NA", as.factor=T)
NSDUH_2019$dep_year2<-Recode(NSDUH_2019$amdeyr, recodes="1=1; 2=0;else=NA", as.factor=T)
NSDUH_2019$age_cat<-Recode(NSDUH_2019$age2, recodes="7:8='18-19'; 9:10='20-21'; 11='22-23'; 12='24-25'; 13='26-29'; 14='30-34'; 15='35-49'; 16='50-64'; 17='65+'; else=NA", as.factor=T)

Histogram

NSDUH_2019$alcyrtot2<-Recode(NSDUH_2019$alcyrtot, recodes = "985:998=NA")
hist(NSDUH_2019$alcyrtot2)

summary(NSDUH_2019$alcyrtot2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   12.00   48.00   80.83  120.00  365.00   23006
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
sub<-NSDUH_2019%>%
  select(alcyrtot2, age_cat, race_eth, 
         marst, educ, lst_alc_use2, dep_year2, white, black, hispanic,
         other, mult_race, asian, hispanic, income, male, sexuality, analwtc, vestr) %>%
  filter( complete.cases(.))

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~vestr, weights=~analwtc, data =sub )

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~vestr, 
               weights=~analwtc,
               data = NSDUH_2019[is.na(NSDUH_2019$analwtc)==F,])
svyhist(~alcyrtot2, des)

svyby(~alcyrtot2, ~race_eth+educ, des, svymean, na.rm=T)
##                        race_eth        educ alcyrtot2        se
## white.colgrad             white     colgrad 110.01825  1.733805
## asian.colgrad             asian     colgrad  40.86604 13.174518
## black.colgrad             black     colgrad  74.13335  4.002535
## hispanic.colgrad       hispanic     colgrad  88.63893  9.033293
## mult_race.colgrad     mult_race     colgrad  58.56157  4.176777
## other.colgrad             other     colgrad  57.23540  8.537946
## white.associates          white  associates  99.52782  3.499248
## asian.associates          asian  associates  71.75474 23.619244
## black.associates          black  associates  74.84763  5.405869
## hispanic.associates    hispanic  associates  96.85974 15.445061
## mult_race.associates  mult_race  associates  75.74198 12.793567
## other.associates          other  associates  56.40716 18.401269
## white.highschool          white  highschool  94.33584  2.588339
## asian.highschool          asian  highschool  74.41367 16.201048
## black.highschool          black  highschool  84.50323  3.988172
## hispanic.highschool    hispanic  highschool 114.50072 16.537347
## mult_race.highschool  mult_race  highschool  91.27327 17.205912
## other.highschool          other  highschool 101.23943 12.643581
## white.LssThnHgh           white   LssThnHgh  69.69736  3.493612
## asian.LssThnHgh           asian   LssThnHgh  38.90550 10.571440
## black.LssThnHgh           black   LssThnHgh  82.48821  6.010704
## hispanic.LssThnHgh     hispanic   LssThnHgh  59.63737 10.219330
## mult_race.LssThnHgh   mult_race   LssThnHgh  57.93165 24.880795
## other.LssThnHgh           other   LssThnHgh  62.47405 16.481318
## white.someCollege         white someCollege  94.20759  2.281043
## asian.someCollege         asian someCollege  68.95634 12.631771
## black.someCollege         black someCollege  83.41700  4.333830
## hispanic.someCollege   hispanic someCollege  88.36257 10.059974
## mult_race.someCollege mult_race someCollege  67.49612  6.942847
## other.someCollege         other someCollege  87.10928 12.856316

##1) Define a count outcome for the dataset of your choosing ## A count outcome variable known in the codebook as alcyrtot, or total number of days alcohol was used in the past year. ## a. State a research question about your outcome ##Disadvantaged groups by demographic type and socioeconomic status are more at risk of more days of alcohol usage compared to those that are not disadvantaged. This is expected to be similarly found by racial category, at baseline whites should be at lower risk of using alcohol throughout the year than other racial categories. ##b. Is an offset term necessary? why or why not? ##An offset term is not neccessary, as every adult person in the study was asked about their alcohol usage using the same time period of 1 year or 365 days. As time went on as indicated by the histogram, less people used alcohol, a much larger concentration of people used it during the early period of the year. This is clearly not a normally distrbibuted variable.

##2) Consider a Poisson regression model for the outcome ## Poisson Regression Model

svyhist(~alcyrtot2, des)

fit1<-svyglm(alcyrtot2~factor(race_eth)+factor(educ)+factor(age_cat)+factor(marst)+factor(sexuality)+factor(male)+scale(income)+factor(lst_alc_use2)+factor(dep_year2), design=des, family=poisson)
summary(fit1)
## 
## Call:
## svyglm(formula = alcyrtot2 ~ factor(race_eth) + factor(educ) + 
##     factor(age_cat) + factor(marst) + factor(sexuality) + factor(male) + 
##     scale(income) + factor(lst_alc_use2) + factor(dep_year2), 
##     design = des, family = poisson)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~vestr, weights = ~analwtc, data = NSDUH_2019[is.na(NSDUH_2019$analwtc) == 
##     F, ])
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.454456   0.068345  35.913  < 2e-16 ***
## factor(race_eth)asian           -0.488750   0.116292  -4.203 2.65e-05 ***
## factor(race_eth)black           -0.142283   0.027443  -5.185 2.18e-07 ***
## factor(race_eth)hispanic        -0.011118   0.057021  -0.195 0.845412    
## factor(race_eth)mult_race       -0.371465   0.054244  -6.848 7.66e-12 ***
## factor(race_eth)other            0.008578   0.075423   0.114 0.909452    
## factor(educ)associates          -0.022904   0.032328  -0.708 0.478649    
## factor(educ)highschool           0.012922   0.027595   0.468 0.639585    
## factor(educ)LssThnHgh            0.030781   0.043268   0.711 0.476834    
## factor(educ)someCollege         -0.019943   0.025453  -0.784 0.433318    
## factor(age_cat)20-21             0.282204   0.055705   5.066 4.09e-07 ***
## factor(age_cat)22-23             0.478663   0.054510   8.781  < 2e-16 ***
## factor(age_cat)24-25             0.527403   0.053895   9.786  < 2e-16 ***
## factor(age_cat)26-29             0.562635   0.053455  10.525  < 2e-16 ***
## factor(age_cat)30-34             0.577600   0.054626  10.574  < 2e-16 ***
## factor(age_cat)35-49             0.630430   0.053651  11.751  < 2e-16 ***
## factor(age_cat)50-64             0.715680   0.056638  12.636  < 2e-16 ***
## factor(age_cat)65+               0.808953   0.059187  13.668  < 2e-16 ***
## factor(marst)divorced            0.085370   0.059462   1.436 0.151100    
## factor(marst)separated           0.088914   0.025062   3.548 0.000389 ***
## factor(marst)widowed             0.088877   0.030004   2.962 0.003058 ** 
## factor(sexuality)Bisexual        0.095272   0.037729   2.525 0.011570 *  
## factor(sexuality)Les/Gay         0.147879   0.053489   2.765 0.005702 ** 
## factor(male)Male                 0.232782   0.019057  12.215  < 2e-16 ***
## scale(income)                    0.022603   0.011618   1.945 0.051730 .  
## factor(lst_alc_use2)last 30days  1.484232   0.040291  36.838  < 2e-16 ***
## factor(dep_year2)1              -0.003427   0.032058  -0.107 0.914874    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 98.65696)
## 
## Number of Fisher Scoring iterations: 6
round(exp(summary(fit1)$coef[-1,1]), 3)
##           factor(race_eth)asian           factor(race_eth)black 
##                           0.613                           0.867 
##        factor(race_eth)hispanic       factor(race_eth)mult_race 
##                           0.989                           0.690 
##           factor(race_eth)other          factor(educ)associates 
##                           1.009                           0.977 
##          factor(educ)highschool           factor(educ)LssThnHgh 
##                           1.013                           1.031 
##         factor(educ)someCollege            factor(age_cat)20-21 
##                           0.980                           1.326 
##            factor(age_cat)22-23            factor(age_cat)24-25 
##                           1.614                           1.695 
##            factor(age_cat)26-29            factor(age_cat)30-34 
##                           1.755                           1.782 
##            factor(age_cat)35-49            factor(age_cat)50-64 
##                           1.878                           2.046 
##              factor(age_cat)65+           factor(marst)divorced 
##                           2.246                           1.089 
##          factor(marst)separated            factor(marst)widowed 
##                           1.093                           1.093 
##       factor(sexuality)Bisexual        factor(sexuality)Les/Gay 
##                           1.100                           1.159 
##                factor(male)Male                   scale(income) 
##                           1.262                           1.023 
## factor(lst_alc_use2)last 30days              factor(dep_year2)1 
##                           4.412                           0.997

##a. Evaluate the level of dispersion in the outcome b. Is the Poisson model a good choice? ## According to the models level of dispersion, the poisson regression model is not a good fit for this particular model. This models dispersion being 184.60, which is severely above the 1.0 threshold, and indicates a high level of dispersion. From here another model is recommended in that allows for the more variability seen in this model.

Scaling for over dispersion work

fit2<-glm(alcyrtot2~factor(race_eth)+factor(educ)+factor(age_cat)+factor(marst)+factor(sexuality)+factor(male)+scale(income)+factor(lst_alc_use2)+factor(dep_year2),  data=NSDUH_2019, family=poisson)
summary(fit2)
## 
## Call:
## glm(formula = alcyrtot2 ~ factor(race_eth) + factor(educ) + factor(age_cat) + 
##     factor(marst) + factor(sexuality) + factor(male) + scale(income) + 
##     factor(lst_alc_use2) + factor(dep_year2), family = poisson, 
##     data = NSDUH_2019)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -17.855   -7.187   -3.309    3.709   33.049  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      2.5546061  0.0052850 483.366  < 2e-16 ***
## factor(race_eth)asian           -0.1992896  0.0105016 -18.977  < 2e-16 ***
## factor(race_eth)black           -0.1079351  0.0021691 -49.761  < 2e-16 ***
## factor(race_eth)hispanic        -0.0376994  0.0035315 -10.675  < 2e-16 ***
## factor(race_eth)mult_race       -0.3242750  0.0037758 -85.884  < 2e-16 ***
## factor(race_eth)other            0.0191324  0.0060730   3.150  0.00163 ** 
## factor(educ)associates          -0.0627613  0.0024031 -26.117  < 2e-16 ***
## factor(educ)highschool          -0.0104790  0.0019387  -5.405 6.47e-08 ***
## factor(educ)LssThnHgh            0.0152747  0.0030463   5.014 5.33e-07 ***
## factor(educ)someCollege         -0.0378893  0.0018746 -20.212  < 2e-16 ***
## factor(age_cat)20-21             0.2975109  0.0048148  61.791  < 2e-16 ***
## factor(age_cat)22-23             0.4784333  0.0045802 104.457  < 2e-16 ***
## factor(age_cat)24-25             0.5047068  0.0045248 111.541  < 2e-16 ***
## factor(age_cat)26-29             0.5407936  0.0045282 119.428  < 2e-16 ***
## factor(age_cat)30-34             0.5698645  0.0045375 125.590  < 2e-16 ***
## factor(age_cat)35-49             0.6266393  0.0043965 142.533  < 2e-16 ***
## factor(age_cat)50-64             0.7000581  0.0046555 150.372  < 2e-16 ***
## factor(age_cat)65+               0.8050754  0.0048619 165.589  < 2e-16 ***
## factor(marst)divorced            0.0703371  0.0043426  16.197  < 2e-16 ***
## factor(marst)separated           0.1108477  0.0019284  57.480  < 2e-16 ***
## factor(marst)widowed             0.0957282  0.0022998  41.625  < 2e-16 ***
## factor(sexuality)Bisexual        0.1083087  0.0028224  38.375  < 2e-16 ***
## factor(sexuality)Les/Gay         0.0888386  0.0042132  21.086  < 2e-16 ***
## factor(male)Male                 0.2412059  0.0013840 174.286  < 2e-16 ***
## scale(income)                    0.0223243  0.0008098  27.567  < 2e-16 ***
## factor(lst_alc_use2)last 30days  1.3826601  0.0028727 481.312  < 2e-16 ***
## factor(dep_year2)1               0.0275527  0.0022154  12.437  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2260301  on 24982  degrees of freedom
## Residual deviance: 1770061  on 24956  degrees of freedom
##   (31153 observations deleted due to missingness)
## AIC: 1909340
## 
## Number of Fisher Scoring iterations: 6
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 8.421833
1-pchisq(fit2$deviance, df = fit2$df.residual)
## [1] 0

##Quasi Model Work

fit3<-glm(alcyrtot2~factor(race_eth)+factor(educ)+factor(age_cat)+factor(marst)+factor(sexuality)+factor(male)+scale(income)+factor(lst_alc_use2)+factor(dep_year2), data=NSDUH_2019, family=quasipoisson)
summary(fit3)
## 
## Call:
## glm(formula = alcyrtot2 ~ factor(race_eth) + factor(educ) + factor(age_cat) + 
##     factor(marst) + factor(sexuality) + factor(male) + scale(income) + 
##     factor(lst_alc_use2) + factor(dep_year2), family = quasipoisson, 
##     data = NSDUH_2019)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -17.855   -7.187   -3.309    3.709   33.049  
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.554606   0.047478  53.806  < 2e-16 ***
## factor(race_eth)asian           -0.199290   0.094341  -2.112  0.03466 *  
## factor(race_eth)black           -0.107935   0.019486  -5.539 3.07e-08 ***
## factor(race_eth)hispanic        -0.037699   0.031725  -1.188  0.23472    
## factor(race_eth)mult_race       -0.324275   0.033919  -9.560  < 2e-16 ***
## factor(race_eth)other            0.019132   0.054556   0.351  0.72582    
## factor(educ)associates          -0.062761   0.021588  -2.907  0.00365 ** 
## factor(educ)highschool          -0.010479   0.017416  -0.602  0.54739    
## factor(educ)LssThnHgh            0.015275   0.027367   0.558  0.57675    
## factor(educ)someCollege         -0.037889   0.016840  -2.250  0.02446 *  
## factor(age_cat)20-21             0.297511   0.043254   6.878 6.20e-12 ***
## factor(age_cat)22-23             0.478433   0.041146  11.628  < 2e-16 ***
## factor(age_cat)24-25             0.504707   0.040649  12.416  < 2e-16 ***
## factor(age_cat)26-29             0.540794   0.040679  13.294  < 2e-16 ***
## factor(age_cat)30-34             0.569865   0.040763  13.980  < 2e-16 ***
## factor(age_cat)35-49             0.626639   0.039495  15.866  < 2e-16 ***
## factor(age_cat)50-64             0.700058   0.041822  16.739  < 2e-16 ***
## factor(age_cat)65+               0.805075   0.043677  18.433  < 2e-16 ***
## factor(marst)divorced            0.070337   0.039012   1.803  0.07141 .  
## factor(marst)separated           0.110848   0.017324   6.398 1.60e-10 ***
## factor(marst)widowed             0.095728   0.020660   4.633 3.61e-06 ***
## factor(sexuality)Bisexual        0.108309   0.025355   4.272 1.95e-05 ***
## factor(sexuality)Les/Gay         0.088839   0.037849   2.347  0.01892 *  
## factor(male)Male                 0.241206   0.012433  19.401  < 2e-16 ***
## scale(income)                    0.022324   0.007275   3.069  0.00215 ** 
## factor(lst_alc_use2)last 30days  1.382660   0.025807  53.578  < 2e-16 ***
## factor(dep_year2)1               0.027553   0.019902   1.384  0.16624    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 80.70269)
## 
##     Null deviance: 2260301  on 24982  degrees of freedom
## Residual deviance: 1770061  on 24956  degrees of freedom
##   (31153 observations deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

3) Consider a Negative binomial model

Negative Binomial Model

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
coeftest(fit2, vcov=vcovHC(fit2, type="HC1",cluster="vestr"))
## 
## z test of coefficients:
## 
##                                   Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                      2.5546061  0.0471625 54.1660 < 2.2e-16 ***
## factor(race_eth)asian           -0.1992896  0.0912469 -2.1841  0.028957 *  
## factor(race_eth)black           -0.1079351  0.0196561 -5.4912 3.993e-08 ***
## factor(race_eth)hispanic        -0.0376994  0.0327476 -1.1512  0.249646    
## factor(race_eth)mult_race       -0.3242750  0.0351083 -9.2364 < 2.2e-16 ***
## factor(race_eth)other            0.0191324  0.0530902  0.3604  0.718566    
## factor(educ)associates          -0.0627613  0.0218199 -2.8763  0.004023 ** 
## factor(educ)highschool          -0.0104790  0.0177666 -0.5898  0.555314    
## factor(educ)LssThnHgh            0.0152747  0.0288400  0.5296  0.596364    
## factor(educ)someCollege         -0.0378893  0.0167849 -2.2573  0.023986 *  
## factor(age_cat)20-21             0.2975109  0.0418530  7.1085 1.173e-12 ***
## factor(age_cat)22-23             0.4784333  0.0401811 11.9069 < 2.2e-16 ***
## factor(age_cat)24-25             0.5047068  0.0402217 12.5481 < 2.2e-16 ***
## factor(age_cat)26-29             0.5407936  0.0399452 13.5384 < 2.2e-16 ***
## factor(age_cat)30-34             0.5698645  0.0403261 14.1314 < 2.2e-16 ***
## factor(age_cat)35-49             0.6266393  0.0392568 15.9626 < 2.2e-16 ***
## factor(age_cat)50-64             0.7000581  0.0418884 16.7124 < 2.2e-16 ***
## factor(age_cat)65+               0.8050754  0.0441789 18.2231 < 2.2e-16 ***
## factor(marst)divorced            0.0703371  0.0442580  1.5893  0.112003    
## factor(marst)separated           0.1108477  0.0171592  6.4600 1.047e-10 ***
## factor(marst)widowed             0.0957282  0.0214408  4.4648 8.015e-06 ***
## factor(sexuality)Bisexual        0.1083087  0.0255481  4.2394 2.241e-05 ***
## factor(sexuality)Les/Gay         0.0888386  0.0367351  2.4184  0.015591 *  
## factor(male)Male                 0.2412059  0.0124550 19.3662 < 2.2e-16 ***
## scale(income)                    0.0223243  0.0073309  3.0452  0.002325 ** 
## factor(lst_alc_use2)last 30days  1.3826601  0.0259688 53.2432 < 2.2e-16 ***
## factor(dep_year2)1               0.0275527  0.0204328  1.3485  0.177512    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
fit.nb1<-glm.nb(alcyrtot2~factor(race_eth),
              data=sub,
              weights=analwtc/mean(analwtc, na.rm=T))

fit.nb2<-glm.nb(alcyrtot2~factor(race_eth)+factor(educ)+factor(age_cat)+factor(marst)+factor(sexuality)+factor(male)+scale(income)+factor(lst_alc_use2)+factor(dep_year2),
              data=sub,
              weights=analwtc/mean(analwtc, na.rm=T))
#clx2(fit.nb2,cluster =sub$ststr)
tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type="HC1",cluster="vestr"))
tests<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="vestr"))
library(stargazer)

stargazer(fit.nb1, fit.nb2,style="demography", type = "text", t.auto=F,p.auto=F,coef=list(tests1[, 1],tests[,1]),  se =list(tests1[, 2], tests[, 2]), p=list(tests1[,4],tests[, 4])   )
## 
## -----------------------------------------------------------------
##                                             alcyrtot2            
##                                     Model 1          Model 2     
## -----------------------------------------------------------------
## factor(race_eth)asian              -0.505***         -0.404**    
##                                     (0.145)          (0.145)     
## factor(race_eth)black              -0.208***          -0.062     
##                                     (0.034)          (0.034)     
## factor(race_eth)hispanic             -0.069           -0.053     
##                                     (0.053)          (0.053)     
## factor(race_eth)mult_race          -0.462***        -0.396***    
##                                     (0.052)          (0.052)     
## factor(race_eth)other                -0.189           0.044      
##                                     (0.103)          (0.103)     
## factor(educ)associates                                -0.031     
##                                                      (0.036)     
## factor(educ)highschool                                0.040      
##                                                      (0.032)     
## factor(educ)LssThnHgh                                 0.045      
##                                                      (0.051)     
## factor(educ)someCollege                               -0.023     
##                                                      (0.029)     
## factor(age_cat)20-21                                 0.247***    
##                                                      (0.069)     
## factor(age_cat)22-23                                 0.463***    
##                                                      (0.071)     
## factor(age_cat)24-25                                 0.500***    
##                                                      (0.069)     
## factor(age_cat)26-29                                 0.525***    
##                                                      (0.069)     
## factor(age_cat)30-34                                 0.549***    
##                                                      (0.069)     
## factor(age_cat)35-49                                 0.556***    
##                                                      (0.068)     
## factor(age_cat)50-64                                 0.625***    
##                                                      (0.072)     
## factor(age_cat)65+                                   0.727***    
##                                                      (0.077)     
## factor(marst)divorced                                 0.032      
##                                                      (0.065)     
## factor(marst)separated                               0.090**     
##                                                      (0.032)     
## factor(marst)widowed                                  0.080*     
##                                                      (0.035)     
## factor(sexuality)Bisexual                            0.103**     
##                                                      (0.038)     
## factor(sexuality)Les/Gay                              0.110*     
##                                                      (0.056)     
## factor(male)Male                                     0.233***    
##                                                      (0.021)     
## scale(income)                                         0.004      
##                                                      (0.013)     
## factor(lst_alc_use2)last 30days                      1.483***    
##                                                      (0.037)     
## factor(dep_year2)1                                    0.004      
##                                                      (0.037)     
## Constant                            4.616***         2.520***    
##                                     (0.080)          (0.080)     
## N                                    24,983           24,983     
## Log Likelihood                    -138,637.200     -135,506.600  
## theta                           0.792*** (0.006) 0.966*** (0.008)
## AIC                               277,286.300      271,067.200   
## -----------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001

##4) Compare the model fits of the alternative models using AIC

AIC(fit1)
##        eff.p          AIC     deltabar 
##    4942.1145 2216200.6628     190.0813
AIC(fit2)
## [1] 1909340
AIC(fit3)
## [1] NA
AIC(fit.nb1)
## [1] 277286.3
AIC(fit.nb2)
## [1] 271067.2

Following usage of the poisson regression, several other models were utilized in respective order a scaling model used for over dispersion, and two negative binomial models. Of those model fits utilizing aic, the scaled model fit the model the best, with the comprehensive model proving so for the negative binomial models. Of which the second negative binomial model results will be reported. On average asians and multi raced individuals used alcohol fewer days out of the year than their caucasian counterparts 33% and 7% respectively. Interestingly, and this is most likely due to alcohol avaialability but all age categories used alcohol more days out of the year than 18-19 year olds. Separated and widowed inviduals reported using more alcohol on average than married people 9% and 8% more days of alcohol usage, with bisexuals and lesbians also at risk of using alcohol within the year more days than heterosexuals with 10% and 11% more days of alcohol consumption. Males on average used alcohol more days in the last year than females 26% more days on average, and prior alcohol usage in the last month was also a good indicator of on average alcohol use within the year with 4.4 times the amount of days.