#Data Prep
nams<-names(brfss_17)
head(nams, n=10)
## [1] "dispcode" "statere1" "safetime" "hhadult" "genhlth" "physhlth"
## [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(brfss_17)<-newnames
##Get TX Cities
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1
##Remove Non-Responses
brfss_17<-brfss_17%>%
filter(tx==1, is.na(mmsawt)==F, is.na(hlthpln1)==F)
##Recodes
brfss_17$hlthpln1<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
brfss_17$educ<-Recode(brfss_17$educa, recodes="1:3='0hs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
brfss_17$marst<-Recode(brfss_17$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst, ref='married')
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")
brfss_17$employ<-Recode(brfss_17$employ1, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='Employed')
brfss_17$agec<-cut(brfss_17$age80, breaks=c(0,24,39,59,79,99))
brfss_17$smoke<-Recode(brfss_17$smoker3, recodes="1:2='Current'; 3='Former';4='NeverSmoked'; else=NA", as.factor=T)
brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")
brfss_17$sex<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))
#recoding employ to make it binomial
brfss_17$employa <- Recode(brfss_17$employ1, recodes="1:2=1; 2:6=0; else=NA", as.factor=T)
#Do I need to relevel with a binomial? Should the reference be employed?
# brfss_17$employa<-relevel(brfss_17$employa, ref='1')
#1. Define an ordinal or multinomial outcome variable of your choosing and define how you will recode the original variable
brfss_17$educord<-Recode(brfss_17$educa,
recodes="1:3=1;4=2;5:6=3; else=NA", as.factor = T)
brfss_17$educord<-relevel(brfss_17$educord, ref = "1")
options(survey.lonely.psu = "adjust")
describe(brfss_17$educord)
## [8633 obs.]
## nominal factor: "3" "3" "3" "2" "3" "2" "3" "3" "3" "3" ...
## 3 levels: 1 | 2 | 3
## NAs: 40 (0.5%)
##
## n % val%
## 1 852 9.9 9.9
## 2 1920 22.2 22.3
## 3 5821 67.4 67.7
## NA 40 0.5 NA
## Total 8633 100.0 100.0
brfss_17$educordf<-Recode(brfss_17$educa,
recodes="1:3=1;4=2;5:6=3; else=NA", as.factor = F)
#"1:3='0hs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA"
#2. State a research question about what factors will affect your outcome
#Does education
names(brfss_17)
## [1] "dispcode" "statere1" "safetime" "hhadult" "genhlth" "physhlth"
## [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2" "medcost" "checkup1"
## [13] "bphigh4" "bpmeds" "cholchk1" "toldhi2" "cholmed1" "cvdinfr4"
## [19] "cvdcrhd4" "cvdstrk3" "asthma3" "asthnow" "chcscncr" "chcocncr"
## [25] "chccopd1" "havarth3" "addepev2" "chckidny" "diabete3" "diabage2"
## [31] "lmtjoin3" "arthdis2" "arthsocl" "joinpai1" "sex" "marital"
## [37] "educa" "renthom1" "numhhol2" "numphon2" "cpdemo1a" "veteran3"
## [43] "employ1" "children" "income2" "internet" "weight2" "height3"
## [49] "pregnant" "deaf" "blind" "decide" "diffwalk" "diffdres"
## [55] "diffalon" "smoke100" "smokday2" "stopsmk2" "lastsmk2" "usenow3"
## [61] "ecigaret" "ecignow" "alcday5" "avedrnk2" "drnk3ge5" "maxdrnks"
## [67] "fruit2" "fruitju2" "fvgreen1" "frenchf1" "potatoe1" "vegetab2"
## [73] "exerany2" "exract11" "exeroft1" "exerhmm1" "exract21" "exeroft2"
## [79] "exerhmm2" "strength" "seatbelt" "flushot6" "flshtmy2" "pneuvac3"
## [85] "shingle2" "hivtst6" "hivtstd3" "hivrisk5" "casthdx2" "casthno2"
## [91] "callbckz" "wdusenow" "wdinftrk" "wdhowoft" "wdshare" "namtribe"
## [97] "namothr" "urbnrrl" "ststr" "impsex" "rfhlth" "phys14d"
## [103] "ment14d" "hcvu651" "rfhype5" "cholch1" "rfchol1" "michd"
## [109] "ltasth1" "casthm1" "asthms1" "drdxar1" "lmtact1" "lmtwrk1"
## [115] "lmtscl1" "prace1" "mrace1" "hispanc" "race" "raceg21"
## [121] "racegr3" "ageg5yr" "age65yr" "age80" "ageg" "wtkg3"
## [127] "bmi5" "bmi5cat" "rfbmi5" "educag" "incomg" "smoker3"
## [133] "rfsmok3" "ecigsts" "curecig" "drnkany5" "rfbing5" "drnkwek"
## [139] "rfdrhv5" "ftjuda2" "frutda2" "grenda1" "frnchda" "potada1"
## [145] "vegeda2" "misfrt1" "misveg1" "frtres1" "vegres1" "frutsu1"
## [151] "vegesu1" "frtlt1a" "veglt1a" "frt16a" "veg23a" "fruite1"
## [157] "vegete1" "totinda" "minac11" "minac21" "pacat1" "paindx1"
## [163] "pa150r2" "pa300r2" "pa30021" "pastrng" "parec1" "pastae1"
## [169] "rfseat2" "rfseat3" "flshot6" "pneumo2" "aidtst3" "mmsa"
## [175] "mmsawt" "seqno" "mmsaname" "tx" "educ" "ins"
## [181] "marst" "badhealth" "employ" "agec" "smoke" "employa"
## [187] "educord" "educordf"
#3. Fit both the ordinal or the multinomial logistic regression models to your outcome.Are the assumptions of the proportional odds model met? How did you evaluate the proportional odds assumption? Which model (Proportional odds or Multinomial) fits the data better? how did you decide upon this?
#Questions with Answers
#Are the assumptions of the proportional model met?
##No.
#How did you evaluate the proportional odds assumption?
##Two ways: I plotted the coefficients for the proportional odds assumption accounting for change. Then, visually, I determined that the gaps between both models (educordf>1 & edocordf>2) were significant to call it nonproportional. Additionally, I used the AIC after I fitted both the proportional and nonproportional models using the vglm and based on the results, the AIC for the nonproportional model was smaller and thus preferable.
#Which model fits the data better? How did you decide upon this?
##I ran three models: proportional, nonproportional, and multinomial. According to the AIC numbers, the nonproportional model is best
#Assuming Proportional Odds Model
#some needed specs
options(survey.lonely.psu = "adjust")
pum1<-brfss_17%>%
select(educord,educordf,employ,sex, smoke, agec,hlthpln1,
marst, mmsawt, ststr) %>%
filter( complete.cases(.))
#Survey design
des1<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data =pum1 )
##Fit nested model for education outcome
fit.solra<-svyolr(educord~sex+smoke+marst+agec,des1)
summary(fit.solra)
## Call:
## svyolr(educord ~ sex + smoke + marst + agec, des1)
##
## Coefficients:
## Value Std. Error t value
## sexMale -0.1763910 0.1051843 -1.6769711
## smokeCurrent -0.6002756 0.1386558 -4.3292502
## smokeFormer 0.2637998 0.1293565 2.0393249
## marstcohab -1.0321010 0.2728474 -3.7827039
## marstdivorced -0.1608924 0.1602647 -1.0039168
## marstnm 0.0934110 0.1617973 0.5773334
## marstseparated -1.0891644 0.2605889 -4.1796271
## marstwidowed -0.6609646 0.2062592 -3.2045334
## agec(24,39] 0.5163036 0.1984450 2.6017462
## agec(39,59] 0.1688668 0.2138820 0.7895324
## agec(59,79] 0.5584114 0.2296093 2.4320069
## agec(79,99] 0.0990137 0.3993428 0.2479416
##
## Intercepts:
## Value Std. Error t value
## 1|2 -1.6505 0.2380 -6.9340
## 2|3 -0.3203 0.2275 -1.4079
#Interpretation
#males are less likely than females to have LOWER? levels of education
#Someone who is working is more likely to have LOWER? levels of education
#people who migrated within the state are less likely than someone who did not (stayed in same house) to have _____levels of education
#people who migrated
#hispanic people are less likely to have ____ levels of education?
#someone living at 501% poverty level or more are more likely to have less education than those living at 1% or less of the poverty level
#Calculate the AIC ourself
fit.solra$deviance+2*length(fit.solra$coefficients)
## [1] 14829.78
#"examine" the proportional odds assumption by fitting logits for each change
ex_a<-svyglm(I(educordf>1)~sex+smoke+marst+agec, des1, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex_b<-svyglm(I(educordf>2)~sex+smoke+marst+agec, des1, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#plot the coefficients of each model. If the proportional odds is correct, and the assumption is ok, then all these should be "approximately" the same values.
coefs1<-data.frame(parms =c( coefficients(ex_a)[-1],coefficients(ex_b)[-1]),
beta = rep(names(coefficients(ex_a))[-1], 2 ),
mod = c(rep("I(educordf>1)", 12),
rep("I(educordf>2)", 12)))
coefs1%>%
ggplot()+
geom_point(aes(x=beta, y=parms, color=mod))+
theme(axis.text.x = element_text(angle = 45))+
labs(title = "Graphical Summary of Proportional Odds Assumption",
x= "Beta",
y= "Estimate")

#Interpreting the plot
#Nonproportionality based solely on the ggplot
#print the odds ratios,
round(exp(rbind(coef(ex_a)[-1],
coef(ex_b)[-1])),3)
## sexMale smokeCurrent smokeFormer marstcohab marstdivorced marstnm
## [1,] 0.886 0.623 1.754 0.283 1.094 1.570
## [2,] 0.825 0.506 1.216 0.444 0.819 1.035
## marstseparated marstwidowed agec(24,39] agec(39,59] agec(59,79]
## [1,] 0.419 0.777 0.899 0.601 0.994
## [2,] 0.316 0.453 2.055 1.479 2.132
## agec(79,99]
## [1,] 0.544
## [2,] 1.364
#So, there is a break in the poverty501% parameter and others, therefore nonproportionality must be declared.
# coefficients(ex1)
# coefficients(ex1)[-1]
# coefficients(ex2)
#Cumulative Logit Model Without Proportional Odds (Nonproportional)
#Ordinal regression with cumulative probabilities = in R search
#Proportional odds
fit.vgama<-vglm(as.ordered(educord)~sex+smoke+marst+agec,
brfss_17, weights =mmsawt/mean(mmsawt, na.rm=T),
family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
summary(fit.vgama)
##
## Call:
## vglm(formula = as.ordered(educord) ~ sex + smoke + marst + agec,
## family = cumulative(parallel = T, reverse = T), data = brfss_17,
## weights = mmsawt/mean(mmsawt, na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 1.66166 0.08489 19.575 < 2e-16 ***
## (Intercept):2 0.32495 0.08233 3.947 7.92e-05 ***
## sexMale -0.17908 0.04575 -3.914 9.06e-05 ***
## smokeCurrent -0.60641 0.06263 -9.682 < 2e-16 ***
## smokeFormer 0.23884 0.06106 3.911 9.18e-05 ***
## marstcohab -1.01151 0.10033 -10.081 < 2e-16 ***
## marstdivorced -0.16303 0.07579 -2.151 0.0315 *
## marstnm 0.07862 0.06674 1.178 0.2388
## marstseparated -1.10803 0.12066 -9.183 < 2e-16 ***
## marstwidowed -0.66616 0.10378 -6.419 1.37e-10 ***
## agec(24,39] 0.51111 0.07909 6.462 1.03e-10 ***
## agec(39,59] 0.16014 0.08257 1.939 0.0524 .
## agec(59,79] 0.56466 0.09438 5.983 2.20e-09 ***
## agec(79,99] 0.09764 0.15794 0.618 0.5364
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 14912.18 on 16306 degrees of freedom
##
## Log-likelihood: -7456.092 on 16306 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## sexMale smokeCurrent smokeFormer marstcohab marstdivorced
## 0.8360356 0.5453073 1.2697749 0.3636702 0.8495665
## marstnm marstseparated marstwidowed agec(24,39] agec(39,59]
## 1.0817899 0.3302095 0.5136787 1.6671365 1.1736729
## agec(59,79] agec(79,99]
## 1.7588412 1.1025702
#Non-proportional odds
fit.vgamb<-vglm(as.ordered(educord)~sex+smoke+marst+agec,
brfss_17, weights =mmsawt/mean(mmsawt, na.rm=T),
family=cumulative(parallel = F, reverse = T))
summary(fit.vgamb)
##
## Call:
## vglm(formula = as.ordered(educord) ~ sex + smoke + marst + agec,
## family = cumulative(parallel = F, reverse = T), data = brfss_17,
## weights = mmsawt/mean(mmsawt, na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 1.96819 0.12258 16.057 < 2e-16 ***
## (Intercept):2 0.19243 0.08569 2.246 0.02473 *
## sexMale:1 -0.12954 0.06233 -2.078 0.03768 *
## sexMale:2 -0.20500 0.04754 -4.312 1.62e-05 ***
## smokeCurrent:1 -0.49764 0.08060 -6.174 6.66e-10 ***
## smokeCurrent:2 -0.67766 0.06711 -10.097 < 2e-16 ***
## smokeFormer:1 0.55127 0.09223 5.977 2.27e-09 ***
## smokeFormer:2 0.18448 0.06254 2.950 0.00318 **
## marstcohab:1 -1.17938 0.11629 -10.142 < 2e-16 ***
## marstcohab:2 -0.76519 0.10932 -7.000 2.57e-12 ***
## marstdivorced:1 0.05611 0.10437 0.538 0.59086
## marstdivorced:2 -0.20857 0.07869 -2.651 0.00803 **
## marstnm:1 0.51177 0.09993 5.121 3.03e-07 ***
## marstnm:2 0.02041 0.06886 0.296 0.76697
## marstseparated:1 -0.88617 0.14241 -6.223 4.88e-10 ***
## marstseparated:2 -1.15775 0.13554 -8.542 < 2e-16 ***
## marstwidowed:1 -0.17462 0.14435 -1.210 0.22641
## marstwidowed:2 -0.82011 0.10994 -7.460 8.67e-14 ***
## agec(24,39]:1 -0.08692 0.11988 -0.725 0.46841
## agec(24,39]:2 0.71394 0.08255 8.648 < 2e-16 ***
## agec(39,59]:1 -0.50237 0.12232 -4.107 4.01e-05 ***
## agec(39,59]:2 0.38860 0.08651 4.492 7.05e-06 ***
## agec(59,79]:1 0.01187 0.14110 0.084 0.93297
## agec(59,79]:2 0.75759 0.09805 7.726 1.11e-14 ***
## agec(79,99]:1 -0.57226 0.21579 -2.652 0.00800 **
## agec(79,99]:2 0.33454 0.16613 2.014 0.04403 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 14666.8 on 16294 degrees of freedom
##
## Log-likelihood: -7333.4 on 16294 degrees of freedom
##
## Number of Fisher scoring iterations: 7
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## sexMale:1 sexMale:2 smokeCurrent:1 smokeCurrent:2
## 0.8785032 0.8146466 0.6079623 0.5078064
## smokeFormer:1 smokeFormer:2 marstcohab:1 marstcohab:2
## 1.7354604 1.2025887 0.3074687 0.4652467
## marstdivorced:1 marstdivorced:2 marstnm:1 marstnm:2
## 1.0577092 0.8117408 1.6682340 1.0206157
## marstseparated:1 marstseparated:2 marstwidowed:1 marstwidowed:2
## 0.4122312 0.3141935 0.8397786 0.4403817
## agec(24,39]:1 agec(24,39]:2 agec(39,59]:1 agec(39,59]:2
## 0.9167488 2.0420301 0.6050935 1.4749179
## agec(59,79]:1 agec(59,79]:2 agec(79,99]:1 agec(79,99]:2
## 1.0119385 2.1331209 0.5642464 1.3973042
AIC(fit.vgama)
## [1] 14940.18
AIC(fit.vgamb)
## [1] 14718.8
#Based off AIC, we would want to use the nonproportional model because it's a lower number
#Multinomial Model can be used when the proportional model fails.
mfita<-vglm(educordf~sex+smoke+marst+agec,
family=multinomial(refLevel = 1),
data=brfss_17,
weights=mmsawt/mean(mmsawt, na.rm=T))
summary(mfita)
##
## Call:
## vglm(formula = educordf ~ sex + smoke + marst + agec, family = multinomial(refLevel = 1),
## data = brfss_17, weights = mmsawt/mean(mmsawt, na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.96196 0.13876 6.933 4.13e-12 ***
## (Intercept):2 1.51624 0.12967 11.693 < 2e-16 ***
## sexMale:1 0.03199 0.07481 0.428 0.668877
## sexMale:2 -0.17223 0.06600 -2.610 0.009065 **
## smokeCurrent:1 -0.06919 0.09493 -0.729 0.466073
## smokeCurrent:2 -0.72618 0.08758 -8.291 < 2e-16 ***
## smokeFormer:1 0.47846 0.10600 4.514 6.37e-06 ***
## smokeFormer:2 0.48131 0.09414 5.113 3.17e-07 ***
## marstcohab:1 -1.01895 0.15207 -6.700 2.08e-11 ***
## marstcohab:2 -1.30935 0.12717 -10.296 < 2e-16 ***
## marstdivorced:1 0.28008 0.12326 2.272 0.023074 *
## marstdivorced:2 -0.02817 0.10926 -0.258 0.796504
## marstnm:1 0.58399 0.11400 5.123 3.01e-07 ***
## marstnm:2 0.40669 0.10260 3.964 7.37e-05 ***
## marstseparated:1 -0.20594 0.16694 -1.234 0.217354
## marstseparated:2 -1.26995 0.16043 -7.916 2.46e-15 ***
## marstwidowed:1 0.42531 0.16232 2.620 0.008789 **
## marstwidowed:2 -0.54474 0.15248 -3.573 0.000354 ***
## agec(24,39]:1 -0.74548 0.13520 -5.514 3.51e-08 ***
## agec(24,39]:2 0.19788 0.12703 1.558 0.119301
## agec(39,59]:1 -0.98052 0.13882 -7.063 1.63e-12 ***
## agec(39,59]:2 -0.27005 0.12999 -2.077 0.037756 *
## agec(59,79]:1 -0.64531 0.16090 -4.011 6.05e-05 ***
## agec(59,79]:2 0.30888 0.14850 2.080 0.037521 *
## agec(79,99]:1 -1.00520 0.25033 -4.015 5.93e-05 ***
## agec(79,99]:2 -0.37748 0.22820 -1.654 0.098094 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1])
##
## Residual deviance: 14673.96 on 16294 degrees of freedom
##
## Log-likelihood: -7336.979 on 16294 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Reference group is level 1 of the response
#Interpreting the multinomial model summary
#Calculate the odds ratios for each covariate's effect on each model
round(exp(coef(mfita)), 3)
## (Intercept):1 (Intercept):2 sexMale:1 sexMale:2
## 2.617 4.555 1.033 0.842
## smokeCurrent:1 smokeCurrent:2 smokeFormer:1 smokeFormer:2
## 0.933 0.484 1.614 1.618
## marstcohab:1 marstcohab:2 marstdivorced:1 marstdivorced:2
## 0.361 0.270 1.323 0.972
## marstnm:1 marstnm:2 marstseparated:1 marstseparated:2
## 1.793 1.502 0.814 0.281
## marstwidowed:1 marstwidowed:2 agec(24,39]:1 agec(24,39]:2
## 1.530 0.580 0.475 1.219
## agec(39,59]:1 agec(39,59]:2 agec(59,79]:1 agec(59,79]:2
## 0.375 0.763 0.524 1.362
## agec(79,99]:1 agec(79,99]:2
## 0.366 0.686
round(exp(confint(mfita)), 3)
## 2.5 % 97.5 %
## (Intercept):1 1.994 3.435
## (Intercept):2 3.533 5.873
## sexMale:1 0.892 1.196
## sexMale:2 0.740 0.958
## smokeCurrent:1 0.775 1.124
## smokeCurrent:2 0.407 0.574
## smokeFormer:1 1.311 1.986
## smokeFormer:2 1.346 1.946
## marstcohab:1 0.268 0.486
## marstcohab:2 0.210 0.346
## marstdivorced:1 1.039 1.685
## marstdivorced:2 0.785 1.204
## marstnm:1 1.434 2.242
## marstnm:2 1.228 1.836
## marstseparated:1 0.587 1.129
## marstseparated:2 0.205 0.385
## marstwidowed:1 1.113 2.103
## marstwidowed:2 0.430 0.782
## agec(24,39]:1 0.364 0.618
## agec(24,39]:2 0.950 1.563
## agec(39,59]:1 0.286 0.492
## agec(39,59]:2 0.592 0.985
## agec(59,79]:1 0.383 0.719
## agec(59,79]:2 1.018 1.822
## agec(79,99]:1 0.224 0.598
## agec(79,99]:2 0.438 1.072
#4. For the best fitting model from part c, Describe the results of your model, and present output from the model in terms of odds ratios and confidence intervals for all model parameters in a table.
#Interpretation of the nonproportional model (my best fitting model)
##Sex: males are less likely than females to have be a HS grad in both scenarios(log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1])
##Smoke: people who are current smokers are less likely to have lower education compared to never smokers. Former smokers are more likely than never smokers to be a hs grad or college grad.
##Marital Status: Married people cohabiting are less likely than married people to have lower education (<2 or3) and both are statistically significant
##age: people ages 59-79 are more likely to have less education compared to younger people
#odds ratios for each covariate's effect on each model
round(exp(coef(mfita)), 3)
## (Intercept):1 (Intercept):2 sexMale:1 sexMale:2
## 2.617 4.555 1.033 0.842
## smokeCurrent:1 smokeCurrent:2 smokeFormer:1 smokeFormer:2
## 0.933 0.484 1.614 1.618
## marstcohab:1 marstcohab:2 marstdivorced:1 marstdivorced:2
## 0.361 0.270 1.323 0.972
## marstnm:1 marstnm:2 marstseparated:1 marstseparated:2
## 1.793 1.502 0.814 0.281
## marstwidowed:1 marstwidowed:2 agec(24,39]:1 agec(24,39]:2
## 1.530 0.580 0.475 1.219
## agec(39,59]:1 agec(39,59]:2 agec(59,79]:1 agec(59,79]:2
## 0.375 0.763 0.524 1.362
## agec(79,99]:1 agec(79,99]:2
## 0.366 0.686
#confidence intervals
round(exp(confint(mfita)), 3)
## 2.5 % 97.5 %
## (Intercept):1 1.994 3.435
## (Intercept):2 3.533 5.873
## sexMale:1 0.892 1.196
## sexMale:2 0.740 0.958
## smokeCurrent:1 0.775 1.124
## smokeCurrent:2 0.407 0.574
## smokeFormer:1 1.311 1.986
## smokeFormer:2 1.346 1.946
## marstcohab:1 0.268 0.486
## marstcohab:2 0.210 0.346
## marstdivorced:1 1.039 1.685
## marstdivorced:2 0.785 1.204
## marstnm:1 1.434 2.242
## marstnm:2 1.228 1.836
## marstseparated:1 0.587 1.129
## marstseparated:2 0.205 0.385
## marstwidowed:1 1.113 2.103
## marstwidowed:2 0.430 0.782
## agec(24,39]:1 0.364 0.618
## agec(24,39]:2 0.950 1.563
## agec(39,59]:1 0.286 0.492
## agec(39,59]:2 0.592 0.985
## agec(59,79]:1 0.383 0.719
## agec(59,79]:2 1.018 1.822
## agec(79,99]:1 0.224 0.598
## agec(79,99]:2 0.438 1.072
#Get fitted probabilities model
dat_a<-expand.grid(sex=levels(brfss_17$sex),
smoke=levels(brfss_17$smoke),
marst=levels(brfss_17$marst),
agec =levels (brfss_17$agec))
#generate the fitted values
#Unfortunately, the survey proportional odds model won't generate fitted values
#but here I use a weighted multinomial model, which fits the data better anyway
fitm<-predict(mfita, newdat=dat_a,type="response")
#add the values to the fake data
dat45<-cbind(dat_a, round(fitm,3))
#Print the fitted probabilities
head(dat45, n=20)
## sex smoke marst agec 1 2 3
## 1 Female NeverSmoked married (0,24] 0.122 0.320 0.557
## 2 Male NeverSmoked married (0,24] 0.133 0.359 0.509
## 3 Female Current married (0,24] 0.177 0.433 0.390
## 4 Male Current married (0,24] 0.186 0.469 0.345
## 5 Female Former married (0,24] 0.079 0.335 0.585
## 6 Male Former married (0,24] 0.086 0.377 0.537
## 7 Female NeverSmoked cohab (0,24] 0.315 0.298 0.387
## 8 Male NeverSmoked cohab (0,24] 0.332 0.324 0.344
## 9 Female Current cohab (0,24] 0.404 0.356 0.240
## 10 Male Current cohab (0,24] 0.415 0.377 0.208
## 11 Female Former cohab (0,24] 0.222 0.338 0.441
## 12 Male Former cohab (0,24] 0.235 0.370 0.394
## 13 Female NeverSmoked divorced (0,24] 0.112 0.389 0.498
## 14 Male NeverSmoked divorced (0,24] 0.120 0.431 0.449
## 15 Female Current divorced (0,24] 0.157 0.507 0.336
## 16 Male Current divorced (0,24] 0.163 0.543 0.294
## 17 Female Former divorced (0,24] 0.073 0.406 0.521
## 18 Male Former divorced (0,24] 0.078 0.451 0.471
## 19 Female NeverSmoked nm (0,24] 0.080 0.374 0.546
## 20 Male NeverSmoked nm (0,24] 0.086 0.418 0.496
#Fitted probabilities from the proportional odds model
fitted.ord<-round(predict(fit.vgama, newdat=dat_a[,1:4], type="response"), 3)
# dat<-cbind(dat_a, fitted.ord)
#
# names(dat)<-c(names(dat)[1:4], "mp1", "mp2", "mp3", "op1", "op2", "op3")
#
# head(dat, n=20)
AIC(fit.vgama) #proportional odds
## [1] 14940.18
AIC(fit.vgamb) #cumulative logit, non proportional
## [1] 14718.8
AIC(mfita) #multinomial
## [1] 14725.96