BRFSS_HW5

#Data Prep
nams<-names(brfss_17)
head(nams, n=10)

##  [1] "dispcode" "statere1" "safetime" "hhadult"  "genhlth"  "physhlth"
##  [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"

newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(brfss_17)<-newnames

##Get TX Cities 
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1

##Remove Non-Responses 
brfss_17<-brfss_17%>%
  filter(tx==1, is.na(mmsawt)==F, is.na(hlthpln1)==F)

##Recodes
brfss_17$hlthpln1<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")

brfss_17$educ<-Recode(brfss_17$educa, recodes="1:3='0hs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)

brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")

brfss_17$marst<-Recode(brfss_17$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst, ref='married')

brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")

brfss_17$employ<-Recode(brfss_17$employ1, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='Employed')

brfss_17$agec<-cut(brfss_17$age80, breaks=c(0,24,39,59,79,99))
                   
brfss_17$smoke<-Recode(brfss_17$smoker3, recodes="1:2='Current'; 3='Former';4='NeverSmoked'; else=NA", as.factor=T)
brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")

brfss_17$sex<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))

#recoding employ to make it binomial 
brfss_17$employa <- Recode(brfss_17$employ1, recodes="1:2=1; 2:6=0; else=NA", as.factor=T)
#Do I need to relevel with a binomial? Should the reference be employed?
# brfss_17$employa<-relevel(brfss_17$employa, ref='1')

#1. Define an ordinal or multinomial outcome variable of your choosing and define how you will recode the original variable
brfss_17$educord<-Recode(brfss_17$educa,
                               recodes="1:3=1;4=2;5:6=3; else=NA", as.factor = T)
brfss_17$educord<-relevel(brfss_17$educord, ref = "1")

options(survey.lonely.psu = "adjust")

describe(brfss_17$educord)

## [8633 obs.] 
## nominal factor: "3" "3" "3" "2" "3" "2" "3" "3" "3" "3" ...
## 3 levels: 1 | 2 | 3
## NAs: 40 (0.5%)
## 
##          n     %  val%
## 1      852   9.9   9.9
## 2     1920  22.2  22.3
## 3     5821  67.4  67.7
## NA      40   0.5    NA
## Total 8633 100.0 100.0

brfss_17$educordf<-Recode(brfss_17$educa,
                               recodes="1:3=1;4=2;5:6=3; else=NA", as.factor = F)

#"1:3='0hs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA"

#2. State a research question about what factors will affect your outcome
#Does education

names(brfss_17)

##   [1] "dispcode"  "statere1"  "safetime"  "hhadult"   "genhlth"   "physhlth" 
##   [7] "menthlth"  "poorhlth"  "hlthpln1"  "persdoc2"  "medcost"   "checkup1" 
##  [13] "bphigh4"   "bpmeds"    "cholchk1"  "toldhi2"   "cholmed1"  "cvdinfr4" 
##  [19] "cvdcrhd4"  "cvdstrk3"  "asthma3"   "asthnow"   "chcscncr"  "chcocncr" 
##  [25] "chccopd1"  "havarth3"  "addepev2"  "chckidny"  "diabete3"  "diabage2" 
##  [31] "lmtjoin3"  "arthdis2"  "arthsocl"  "joinpai1"  "sex"       "marital"  
##  [37] "educa"     "renthom1"  "numhhol2"  "numphon2"  "cpdemo1a"  "veteran3" 
##  [43] "employ1"   "children"  "income2"   "internet"  "weight2"   "height3"  
##  [49] "pregnant"  "deaf"      "blind"     "decide"    "diffwalk"  "diffdres" 
##  [55] "diffalon"  "smoke100"  "smokday2"  "stopsmk2"  "lastsmk2"  "usenow3"  
##  [61] "ecigaret"  "ecignow"   "alcday5"   "avedrnk2"  "drnk3ge5"  "maxdrnks" 
##  [67] "fruit2"    "fruitju2"  "fvgreen1"  "frenchf1"  "potatoe1"  "vegetab2" 
##  [73] "exerany2"  "exract11"  "exeroft1"  "exerhmm1"  "exract21"  "exeroft2" 
##  [79] "exerhmm2"  "strength"  "seatbelt"  "flushot6"  "flshtmy2"  "pneuvac3" 
##  [85] "shingle2"  "hivtst6"   "hivtstd3"  "hivrisk5"  "casthdx2"  "casthno2" 
##  [91] "callbckz"  "wdusenow"  "wdinftrk"  "wdhowoft"  "wdshare"   "namtribe" 
##  [97] "namothr"   "urbnrrl"   "ststr"     "impsex"    "rfhlth"    "phys14d"  
## [103] "ment14d"   "hcvu651"   "rfhype5"   "cholch1"   "rfchol1"   "michd"    
## [109] "ltasth1"   "casthm1"   "asthms1"   "drdxar1"   "lmtact1"   "lmtwrk1"  
## [115] "lmtscl1"   "prace1"    "mrace1"    "hispanc"   "race"      "raceg21"  
## [121] "racegr3"   "ageg5yr"   "age65yr"   "age80"     "ageg"      "wtkg3"    
## [127] "bmi5"      "bmi5cat"   "rfbmi5"    "educag"    "incomg"    "smoker3"  
## [133] "rfsmok3"   "ecigsts"   "curecig"   "drnkany5"  "rfbing5"   "drnkwek"  
## [139] "rfdrhv5"   "ftjuda2"   "frutda2"   "grenda1"   "frnchda"   "potada1"  
## [145] "vegeda2"   "misfrt1"   "misveg1"   "frtres1"   "vegres1"   "frutsu1"  
## [151] "vegesu1"   "frtlt1a"   "veglt1a"   "frt16a"    "veg23a"    "fruite1"  
## [157] "vegete1"   "totinda"   "minac11"   "minac21"   "pacat1"    "paindx1"  
## [163] "pa150r2"   "pa300r2"   "pa30021"   "pastrng"   "parec1"    "pastae1"  
## [169] "rfseat2"   "rfseat3"   "flshot6"   "pneumo2"   "aidtst3"   "mmsa"     
## [175] "mmsawt"    "seqno"     "mmsaname"  "tx"        "educ"      "ins"      
## [181] "marst"     "badhealth" "employ"    "agec"      "smoke"     "employa"  
## [187] "educord"   "educordf"

#3. Fit both the ordinal or the multinomial logistic regression models to your outcome.Are the assumptions of the proportional odds model met? How did you evaluate the proportional odds assumption? Which model (Proportional odds or Multinomial) fits the data better? how did you decide upon this?

#Questions with Answers
#Are the assumptions of the proportional model met?
##No. 
#How did you evaluate the proportional odds assumption?
##Two ways: I plotted the coefficients for the proportional odds assumption accounting for change. Then, visually, I determined that the gaps between both models (educordf>1 & edocordf>2) were significant to call it nonproportional. Additionally, I used the AIC after I fitted both the proportional and nonproportional models using the vglm and based on the results, the AIC for the nonproportional model was smaller and thus preferable. 
#Which model fits the data better? How did you decide upon this? 
##I ran three models: proportional, nonproportional, and multinomial. According to the AIC numbers, the nonproportional model is best 



#Assuming Proportional Odds Model
#some needed specs

options(survey.lonely.psu = "adjust")

pum1<-brfss_17%>%
  select(educord,educordf,employ,sex, smoke, agec,hlthpln1,
        marst, mmsawt, ststr) %>%
  filter( complete.cases(.))

#Survey design
des1<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data =pum1 )

##Fit nested model for education outcome 
fit.solra<-svyolr(educord~sex+smoke+marst+agec,des1)
summary(fit.solra)

## Call:
## svyolr(educord ~ sex + smoke + marst + agec, des1)
## 
## Coefficients:
##                     Value Std. Error    t value
## sexMale        -0.1763910  0.1051843 -1.6769711
## smokeCurrent   -0.6002756  0.1386558 -4.3292502
## smokeFormer     0.2637998  0.1293565  2.0393249
## marstcohab     -1.0321010  0.2728474 -3.7827039
## marstdivorced  -0.1608924  0.1602647 -1.0039168
## marstnm         0.0934110  0.1617973  0.5773334
## marstseparated -1.0891644  0.2605889 -4.1796271
## marstwidowed   -0.6609646  0.2062592 -3.2045334
## agec(24,39]     0.5163036  0.1984450  2.6017462
## agec(39,59]     0.1688668  0.2138820  0.7895324
## agec(59,79]     0.5584114  0.2296093  2.4320069
## agec(79,99]     0.0990137  0.3993428  0.2479416
## 
## Intercepts:
##     Value   Std. Error t value
## 1|2 -1.6505  0.2380    -6.9340
## 2|3 -0.3203  0.2275    -1.4079

#Interpretation
#males are less likely than females to have LOWER? levels of education
#Someone who is working is more likely to have LOWER? levels of education
#people who migrated within the state are less likely than someone who did not (stayed in same house) to have _____levels of education
#people who migrated 
#hispanic people are less likely to have ____ levels of education?
#someone living at 501% poverty level or more are more likely to have less education than those living at 1% or less of the poverty level


#Calculate the AIC ourself
fit.solra$deviance+2*length(fit.solra$coefficients)

## [1] 14829.78

#"examine" the proportional odds assumption by fitting logits for each change
ex_a<-svyglm(I(educordf>1)~sex+smoke+marst+agec, des1, family="binomial")

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

ex_b<-svyglm(I(educordf>2)~sex+smoke+marst+agec, des1, family="binomial")

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

#plot the coefficients of each model. If the proportional odds is correct, and the assumption is ok, then all these should be "approximately" the same values.
coefs1<-data.frame(parms =c( coefficients(ex_a)[-1],coefficients(ex_b)[-1]),
                  beta = rep(names(coefficients(ex_a))[-1], 2 ), 
                  mod = c(rep("I(educordf>1)", 12),
                          rep("I(educordf>2)", 12)))

coefs1%>%
  ggplot()+
  geom_point(aes(x=beta, y=parms, color=mod))+
  theme(axis.text.x = element_text(angle = 45))+
  labs(title = "Graphical Summary of Proportional Odds Assumption",
       x= "Beta",
       y= "Estimate")

#Interpreting the plot
#Nonproportionality based solely on the ggplot

#print the odds ratios, 
round(exp(rbind(coef(ex_a)[-1],
                coef(ex_b)[-1])),3)

##      sexMale smokeCurrent smokeFormer marstcohab marstdivorced marstnm
## [1,]   0.886        0.623       1.754      0.283         1.094   1.570
## [2,]   0.825        0.506       1.216      0.444         0.819   1.035
##      marstseparated marstwidowed agec(24,39] agec(39,59] agec(59,79]
## [1,]          0.419        0.777       0.899       0.601       0.994
## [2,]          0.316        0.453       2.055       1.479       2.132
##      agec(79,99]
## [1,]       0.544
## [2,]       1.364

#So, there is a break in the poverty501% parameter and others, therefore nonproportionality must be declared. 

# coefficients(ex1)
# coefficients(ex1)[-1]
# coefficients(ex2)

#Cumulative Logit Model Without Proportional Odds (Nonproportional)
#Ordinal regression with cumulative probabilities = in R search

#Proportional odds
fit.vgama<-vglm(as.ordered(educord)~sex+smoke+marst+agec,
               brfss_17, weights =mmsawt/mean(mmsawt, na.rm=T),
               family=cumulative(parallel = T, reverse = T))  #<-parallel = T == proportional odds
summary(fit.vgama)

## 
## Call:
## vglm(formula = as.ordered(educord) ~ sex + smoke + marst + agec, 
##     family = cumulative(parallel = T, reverse = T), data = brfss_17, 
##     weights = mmsawt/mean(mmsawt, na.rm = T))
## 
## Coefficients: 
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1   1.66166    0.08489  19.575  < 2e-16 ***
## (Intercept):2   0.32495    0.08233   3.947 7.92e-05 ***
## sexMale        -0.17908    0.04575  -3.914 9.06e-05 ***
## smokeCurrent   -0.60641    0.06263  -9.682  < 2e-16 ***
## smokeFormer     0.23884    0.06106   3.911 9.18e-05 ***
## marstcohab     -1.01151    0.10033 -10.081  < 2e-16 ***
## marstdivorced  -0.16303    0.07579  -2.151   0.0315 *  
## marstnm         0.07862    0.06674   1.178   0.2388    
## marstseparated -1.10803    0.12066  -9.183  < 2e-16 ***
## marstwidowed   -0.66616    0.10378  -6.419 1.37e-10 ***
## agec(24,39]     0.51111    0.07909   6.462 1.03e-10 ***
## agec(39,59]     0.16014    0.08257   1.939   0.0524 .  
## agec(59,79]     0.56466    0.09438   5.983 2.20e-09 ***
## agec(79,99]     0.09764    0.15794   0.618   0.5364    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
## 
## Residual deviance: 14912.18 on 16306 degrees of freedom
## 
## Log-likelihood: -7456.092 on 16306 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##        sexMale   smokeCurrent    smokeFormer     marstcohab  marstdivorced 
##      0.8360356      0.5453073      1.2697749      0.3636702      0.8495665 
##        marstnm marstseparated   marstwidowed    agec(24,39]    agec(39,59] 
##      1.0817899      0.3302095      0.5136787      1.6671365      1.1736729 
##    agec(59,79]    agec(79,99] 
##      1.7588412      1.1025702

#Non-proportional odds
  fit.vgamb<-vglm(as.ordered(educord)~sex+smoke+marst+agec,
                  brfss_17, weights =mmsawt/mean(mmsawt, na.rm=T),
                  family=cumulative(parallel = F, reverse = T))

summary(fit.vgamb)

## 
## Call:
## vglm(formula = as.ordered(educord) ~ sex + smoke + marst + agec, 
##     family = cumulative(parallel = F, reverse = T), data = brfss_17, 
##     weights = mmsawt/mean(mmsawt, na.rm = T))
## 
## Coefficients: 
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1     1.96819    0.12258  16.057  < 2e-16 ***
## (Intercept):2     0.19243    0.08569   2.246  0.02473 *  
## sexMale:1        -0.12954    0.06233  -2.078  0.03768 *  
## sexMale:2        -0.20500    0.04754  -4.312 1.62e-05 ***
## smokeCurrent:1   -0.49764    0.08060  -6.174 6.66e-10 ***
## smokeCurrent:2   -0.67766    0.06711 -10.097  < 2e-16 ***
## smokeFormer:1     0.55127    0.09223   5.977 2.27e-09 ***
## smokeFormer:2     0.18448    0.06254   2.950  0.00318 ** 
## marstcohab:1     -1.17938    0.11629 -10.142  < 2e-16 ***
## marstcohab:2     -0.76519    0.10932  -7.000 2.57e-12 ***
## marstdivorced:1   0.05611    0.10437   0.538  0.59086    
## marstdivorced:2  -0.20857    0.07869  -2.651  0.00803 ** 
## marstnm:1         0.51177    0.09993   5.121 3.03e-07 ***
## marstnm:2         0.02041    0.06886   0.296  0.76697    
## marstseparated:1 -0.88617    0.14241  -6.223 4.88e-10 ***
## marstseparated:2 -1.15775    0.13554  -8.542  < 2e-16 ***
## marstwidowed:1   -0.17462    0.14435  -1.210  0.22641    
## marstwidowed:2   -0.82011    0.10994  -7.460 8.67e-14 ***
## agec(24,39]:1    -0.08692    0.11988  -0.725  0.46841    
## agec(24,39]:2     0.71394    0.08255   8.648  < 2e-16 ***
## agec(39,59]:1    -0.50237    0.12232  -4.107 4.01e-05 ***
## agec(39,59]:2     0.38860    0.08651   4.492 7.05e-06 ***
## agec(59,79]:1     0.01187    0.14110   0.084  0.93297    
## agec(59,79]:2     0.75759    0.09805   7.726 1.11e-14 ***
## agec(79,99]:1    -0.57226    0.21579  -2.652  0.00800 ** 
## agec(79,99]:2     0.33454    0.16613   2.014  0.04403 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
## 
## Residual deviance: 14666.8 on 16294 degrees of freedom
## 
## Log-likelihood: -7333.4 on 16294 degrees of freedom
## 
## Number of Fisher scoring iterations: 7 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##        sexMale:1        sexMale:2   smokeCurrent:1   smokeCurrent:2 
##        0.8785032        0.8146466        0.6079623        0.5078064 
##    smokeFormer:1    smokeFormer:2     marstcohab:1     marstcohab:2 
##        1.7354604        1.2025887        0.3074687        0.4652467 
##  marstdivorced:1  marstdivorced:2        marstnm:1        marstnm:2 
##        1.0577092        0.8117408        1.6682340        1.0206157 
## marstseparated:1 marstseparated:2   marstwidowed:1   marstwidowed:2 
##        0.4122312        0.3141935        0.8397786        0.4403817 
##    agec(24,39]:1    agec(24,39]:2    agec(39,59]:1    agec(39,59]:2 
##        0.9167488        2.0420301        0.6050935        1.4749179 
##    agec(59,79]:1    agec(59,79]:2    agec(79,99]:1    agec(79,99]:2 
##        1.0119385        2.1331209        0.5642464        1.3973042

AIC(fit.vgama)

## [1] 14940.18

AIC(fit.vgamb)

## [1] 14718.8

#Based off AIC, we would want to use the nonproportional model because it's a lower number 

#Multinomial Model can be used when the proportional model fails. 
mfita<-vglm(educordf~sex+smoke+marst+agec,
           family=multinomial(refLevel = 1),
           data=brfss_17,
           weights=mmsawt/mean(mmsawt, na.rm=T))

summary(mfita)

## 
## Call:
## vglm(formula = educordf ~ sex + smoke + marst + agec, family = multinomial(refLevel = 1), 
##     data = brfss_17, weights = mmsawt/mean(mmsawt, na.rm = T))
## 
## Coefficients: 
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1     0.96196    0.13876   6.933 4.13e-12 ***
## (Intercept):2     1.51624    0.12967  11.693  < 2e-16 ***
## sexMale:1         0.03199    0.07481   0.428 0.668877    
## sexMale:2        -0.17223    0.06600  -2.610 0.009065 ** 
## smokeCurrent:1   -0.06919    0.09493  -0.729 0.466073    
## smokeCurrent:2   -0.72618    0.08758  -8.291  < 2e-16 ***
## smokeFormer:1     0.47846    0.10600   4.514 6.37e-06 ***
## smokeFormer:2     0.48131    0.09414   5.113 3.17e-07 ***
## marstcohab:1     -1.01895    0.15207  -6.700 2.08e-11 ***
## marstcohab:2     -1.30935    0.12717 -10.296  < 2e-16 ***
## marstdivorced:1   0.28008    0.12326   2.272 0.023074 *  
## marstdivorced:2  -0.02817    0.10926  -0.258 0.796504    
## marstnm:1         0.58399    0.11400   5.123 3.01e-07 ***
## marstnm:2         0.40669    0.10260   3.964 7.37e-05 ***
## marstseparated:1 -0.20594    0.16694  -1.234 0.217354    
## marstseparated:2 -1.26995    0.16043  -7.916 2.46e-15 ***
## marstwidowed:1    0.42531    0.16232   2.620 0.008789 ** 
## marstwidowed:2   -0.54474    0.15248  -3.573 0.000354 ***
## agec(24,39]:1    -0.74548    0.13520  -5.514 3.51e-08 ***
## agec(24,39]:2     0.19788    0.12703   1.558 0.119301    
## agec(39,59]:1    -0.98052    0.13882  -7.063 1.63e-12 ***
## agec(39,59]:2    -0.27005    0.12999  -2.077 0.037756 *  
## agec(59,79]:1    -0.64531    0.16090  -4.011 6.05e-05 ***
## agec(59,79]:2     0.30888    0.14850   2.080 0.037521 *  
## agec(79,99]:1    -1.00520    0.25033  -4.015 5.93e-05 ***
## agec(79,99]:2    -0.37748    0.22820  -1.654 0.098094 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1])
## 
## Residual deviance: 14673.96 on 16294 degrees of freedom
## 
## Log-likelihood: -7336.979 on 16294 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Reference group is level  1  of the response

#Interpreting the multinomial model summary


#Calculate the odds ratios for each covariate's effect on each model
round(exp(coef(mfita)), 3)

##    (Intercept):1    (Intercept):2        sexMale:1        sexMale:2 
##            2.617            4.555            1.033            0.842 
##   smokeCurrent:1   smokeCurrent:2    smokeFormer:1    smokeFormer:2 
##            0.933            0.484            1.614            1.618 
##     marstcohab:1     marstcohab:2  marstdivorced:1  marstdivorced:2 
##            0.361            0.270            1.323            0.972 
##        marstnm:1        marstnm:2 marstseparated:1 marstseparated:2 
##            1.793            1.502            0.814            0.281 
##   marstwidowed:1   marstwidowed:2    agec(24,39]:1    agec(24,39]:2 
##            1.530            0.580            0.475            1.219 
##    agec(39,59]:1    agec(39,59]:2    agec(59,79]:1    agec(59,79]:2 
##            0.375            0.763            0.524            1.362 
##    agec(79,99]:1    agec(79,99]:2 
##            0.366            0.686

round(exp(confint(mfita)), 3)

##                  2.5 % 97.5 %
## (Intercept):1    1.994  3.435
## (Intercept):2    3.533  5.873
## sexMale:1        0.892  1.196
## sexMale:2        0.740  0.958
## smokeCurrent:1   0.775  1.124
## smokeCurrent:2   0.407  0.574
## smokeFormer:1    1.311  1.986
## smokeFormer:2    1.346  1.946
## marstcohab:1     0.268  0.486
## marstcohab:2     0.210  0.346
## marstdivorced:1  1.039  1.685
## marstdivorced:2  0.785  1.204
## marstnm:1        1.434  2.242
## marstnm:2        1.228  1.836
## marstseparated:1 0.587  1.129
## marstseparated:2 0.205  0.385
## marstwidowed:1   1.113  2.103
## marstwidowed:2   0.430  0.782
## agec(24,39]:1    0.364  0.618
## agec(24,39]:2    0.950  1.563
## agec(39,59]:1    0.286  0.492
## agec(39,59]:2    0.592  0.985
## agec(59,79]:1    0.383  0.719
## agec(59,79]:2    1.018  1.822
## agec(79,99]:1    0.224  0.598
## agec(79,99]:2    0.438  1.072

#4. For the best fitting model from part c, Describe the results of your model, and present output from the model in terms of odds ratios and confidence intervals for all model parameters in a table.

#Interpretation of the nonproportional model (my best fitting model)
##Sex: males are less likely than females to have be a HS grad in both scenarios(log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1])
##Smoke: people who are current smokers are less likely to have lower education compared to never smokers. Former smokers are more likely than never smokers to be a hs grad or college grad. 
##Marital Status: Married people cohabiting are less likely than married people to have lower education (<2 or3) and both are statistically significant
##age: people ages 59-79 are more likely to have less education compared to younger people 


#odds ratios for each covariate's effect on each model
round(exp(coef(mfita)), 3)

##    (Intercept):1    (Intercept):2        sexMale:1        sexMale:2 
##            2.617            4.555            1.033            0.842 
##   smokeCurrent:1   smokeCurrent:2    smokeFormer:1    smokeFormer:2 
##            0.933            0.484            1.614            1.618 
##     marstcohab:1     marstcohab:2  marstdivorced:1  marstdivorced:2 
##            0.361            0.270            1.323            0.972 
##        marstnm:1        marstnm:2 marstseparated:1 marstseparated:2 
##            1.793            1.502            0.814            0.281 
##   marstwidowed:1   marstwidowed:2    agec(24,39]:1    agec(24,39]:2 
##            1.530            0.580            0.475            1.219 
##    agec(39,59]:1    agec(39,59]:2    agec(59,79]:1    agec(59,79]:2 
##            0.375            0.763            0.524            1.362 
##    agec(79,99]:1    agec(79,99]:2 
##            0.366            0.686

#confidence intervals 
round(exp(confint(mfita)), 3)

##                  2.5 % 97.5 %
## (Intercept):1    1.994  3.435
## (Intercept):2    3.533  5.873
## sexMale:1        0.892  1.196
## sexMale:2        0.740  0.958
## smokeCurrent:1   0.775  1.124
## smokeCurrent:2   0.407  0.574
## smokeFormer:1    1.311  1.986
## smokeFormer:2    1.346  1.946
## marstcohab:1     0.268  0.486
## marstcohab:2     0.210  0.346
## marstdivorced:1  1.039  1.685
## marstdivorced:2  0.785  1.204
## marstnm:1        1.434  2.242
## marstnm:2        1.228  1.836
## marstseparated:1 0.587  1.129
## marstseparated:2 0.205  0.385
## marstwidowed:1   1.113  2.103
## marstwidowed:2   0.430  0.782
## agec(24,39]:1    0.364  0.618
## agec(24,39]:2    0.950  1.563
## agec(39,59]:1    0.286  0.492
## agec(39,59]:2    0.592  0.985
## agec(59,79]:1    0.383  0.719
## agec(59,79]:2    1.018  1.822
## agec(79,99]:1    0.224  0.598
## agec(79,99]:2    0.438  1.072

#Get fitted probabilities model 
dat_a<-expand.grid(sex=levels(brfss_17$sex),
                 smoke=levels(brfss_17$smoke),
                 marst=levels(brfss_17$marst),
                 agec =levels (brfss_17$agec))


#generate the fitted values
#Unfortunately, the survey proportional odds model won't generate fitted values
#but here I use a weighted multinomial model, which fits the data better anyway

fitm<-predict(mfita, newdat=dat_a,type="response")
#add the values to the fake data
dat45<-cbind(dat_a, round(fitm,3))

#Print the fitted probabilities
head(dat45, n=20)

##       sex       smoke    marst   agec     1     2     3
## 1  Female NeverSmoked  married (0,24] 0.122 0.320 0.557
## 2    Male NeverSmoked  married (0,24] 0.133 0.359 0.509
## 3  Female     Current  married (0,24] 0.177 0.433 0.390
## 4    Male     Current  married (0,24] 0.186 0.469 0.345
## 5  Female      Former  married (0,24] 0.079 0.335 0.585
## 6    Male      Former  married (0,24] 0.086 0.377 0.537
## 7  Female NeverSmoked    cohab (0,24] 0.315 0.298 0.387
## 8    Male NeverSmoked    cohab (0,24] 0.332 0.324 0.344
## 9  Female     Current    cohab (0,24] 0.404 0.356 0.240
## 10   Male     Current    cohab (0,24] 0.415 0.377 0.208
## 11 Female      Former    cohab (0,24] 0.222 0.338 0.441
## 12   Male      Former    cohab (0,24] 0.235 0.370 0.394
## 13 Female NeverSmoked divorced (0,24] 0.112 0.389 0.498
## 14   Male NeverSmoked divorced (0,24] 0.120 0.431 0.449
## 15 Female     Current divorced (0,24] 0.157 0.507 0.336
## 16   Male     Current divorced (0,24] 0.163 0.543 0.294
## 17 Female      Former divorced (0,24] 0.073 0.406 0.521
## 18   Male      Former divorced (0,24] 0.078 0.451 0.471
## 19 Female NeverSmoked       nm (0,24] 0.080 0.374 0.546
## 20   Male NeverSmoked       nm (0,24] 0.086 0.418 0.496

#Fitted probabilities from the proportional odds model
fitted.ord<-round(predict(fit.vgama, newdat=dat_a[,1:4], type="response"), 3)

# dat<-cbind(dat_a, fitted.ord)
# 
# names(dat)<-c(names(dat)[1:4], "mp1", "mp2", "mp3", "op1", "op2", "op3")
# 
# head(dat, n=20)

AIC(fit.vgama) #proportional odds

## [1] 14940.18

AIC(fit.vgamb) #cumulative logit, non proportional

## [1] 14718.8

AIC(mfita) #multinomial

## [1] 14725.96

BRFSS_HW5

Coda Rayo-Garza

3/1/2021