library(haven)
tb19 <- read_dta("state_19_working_pudf.dta")
View(tb19)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
nams<-names(tb19)
head(nams, n=10)
## [1] "year" "ststr" "state" "fmonth" "dispcode" "orgseqno"
## [7] "c01q01" "fairpoor" "c02q01" "phyng"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(tb19)<-newnames
#recode
#age
tb19$agegr4 = as.factor(tb19$agegr4)
tb19$age<-Recode(tb19$agegr4, recodes="1='18-19'; 2='30-44'; 3='45-64'; 4='65+'; else=NA", as.factor=T)
#insurance
tb19$c03q01 = as.factor(tb19$c03q01)
tb19$ins<-Recode(tb19$c03q01, recodes ="1=1;2=0")
#employment
tb19$c08q14 = as.factor(tb19$c08q14)
tb19$employ<-Recode(tb19$c08q14, recodes="1:2='Employed'; 3:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
tb19$employ<-relevel(tb19$employ, ref='Employed')
#education level
tb19$educat4 = as.factor(tb19$educat4)
tb19$educ<-Recode(tb19$educat4, recodes="1='leshs'; 2='hsgrad'; 3='sumcol'; 4='colgrad'", as.factor=T)
tb19$educ<-relevel(tb19$educ, ref='hsgrad')
#race/ethnicity
tb19$race = as.factor(tb19$race)
tb19$black<-Recode(tb19$race, recodes="2=1; 9=NA; else=0")
tb19$white<-Recode(tb19$race, recodes="1=1; 9=NA; else=0")
tb19$other<-Recode(tb19$race, recodes="3:7=1; 9=NA; else=0")
tb19$hispanic<-Recode(tb19$race, recodes="8=1; else=0")
tb19$racegr5 = as.factor(tb19$racegr5)
tb19$ethn<-Recode(tb19$racegr5, recodes="1='nhwhite'; 2='nh black'; 3='Hispanic';4='othernh'; 5='multinh'; else=NA", as.factor = T)
#smoking currently
tb19$smoker2 = as.factor(tb19$smoker2)
tb19$smoke<-Recode(tb19$smoker2, recodes="1:2='Current'; 3='Former';4='NeverSmoked'; else=NA", as.factor=T)
tb19$smoke<-relevel(tb19$smoke, ref = "NeverSmoked")
#Poor or fair self rated health
tb19$fairpoor = as.factor(tb19$fairpoor)
tb19$genhlth<-Recode(tb19$fairpoor, recodes="1='poor'; 2='good'")
#sex
tb19$sex = as.factor(tb19$sex)
tb19$sex<-Recode(tb19$sex, recodes="1='Male'; 2='Female'")
#marital status
tb19$c08q05 = as.factor(tb19$c08q05)
tb19$marital<-Recode(tb19$c08q05, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
tb19$marital<-relevel(tb19$marital, ref='married')
#harveyhomedamage
tb19$tx04q04 = as.factor(tb19$tx04q04)
tb19$hdamage<-Recode(tb19$tx04q04, recodes="1='midly';2='moderate'; 3='severe';4='none'; else=NA", as.factor=T)
#Harvey_health
tb19$tx04q05 = as.factor(tb19$tx04q05)
tb19$h_hlth<-Recode(tb19$tx04q05, recodes="1=1;2=0; else=NA", as.factor=T)
#harveychildasthma
tb19$tx05q01 = as.factor(tb19$tx05q01)
tb19$c_asth<-Recode(tb19$tx05q01, recodes="1=1;2=0; 3='notaffected'")
#harveymold
tb19$tx05q06 = as.factor(tb19$tx05q06)
tb19$mold<-Recode(tb19$tx05q06, recodes="1=1;2=0")
#harveyvermon
tb19$tx05q09 = as.factor(tb19$tx05q09)
tb19$vermon<-Recode(tb19$tx05q09, recodes="1=1;2=0")
#copd
tb19$c06q08 = as.factor(tb19$c06q08)
tb19$copd<-Recode(tb19$c06q08, recodes="1=1;2=0")
#county_Harvey
tb19$tx04q01 = as.factor(tb19$tx04q01)
tb19$hcounty<-Recode(tb19$tx04q01, recodes="1=1;2=0; else=NA")
#income
tb19$c08q16 = as.factor(tb19$c08q16)
tb19$income<-Recode(tb19$c08q16, recodes="1='less10k';2='10-15k'; 3='15-20k'; 4='20-25k'; 5='25-35';6='35-50k';7='50-75';8='75k'; else=NA")
HW Questions
#1 Define an ordinal or multinomial outcome variable of your choosing and define how you will recode the original variable.Oordinal variable will be income.
#2.State a research question about what factors you believe will affect your outcome variable.What social and demographic factors might play a role on income?
#Fit both the ordinal or the multinomial logistic regression models to your outcome.
#3.1 Are the assumptions of the proportional odds model met? Based on the graphical summary of proportional odds assummption, the results seem to be consistent.
#3.2 How did you evaluate the proportional odds assumption? I plotted the "Graphical Summary of Proportional Odds Assumption". After running both proportional and nonproportional models, I looked at the AIC numbers.
#3.3 Which Model Fits best? The Multinomial model fits best.
#how did you decide upon this? #Vgam1- 16195.96 Vgam2 13859.61 Multinomial - 13334.55 based on the AIC numbers I selected the smallest model.
#4 For the best fitting model from part c,Describe the results of your model, and present output from the model in terms of odds ratios and confidence intervals for all model parameters in a table. ->See analysis below.
#general ordinal coding 1 is 0-20k, 2 20-50k, 3 is 50-75k
tb19$income<-Recode(tb19$c08q16,
recodes="1:3=1; 4:6=2;7:8=3; else=NA", as.factor = T)
tb19$income<-relevel(tb19$income, ref = "3")
tb19$incomenum<-car::Recode(tb19$c08q16,
recodes="1:3=1; 4:6=2;7:8=3; else=NA", as.factor = F)
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
#library(dplyr)
#subtb19<-tb19%>%
#select(age,employ,educ,black,white,other,ethn,hispanic,sex,marital,hcounty,ststr,llcpwt) %>%
#filter( complete.cases(.))
des<-svydesign(ids=~1, strata=~ststr, weights=~llcpwt, data =tb19 )
Ordinal Regression
#if no Pvalue use T value (when tvalue is bigger 2/-2) then test of paramater is significant at the zero level. so we are looking at t statistics, regardless of the direction, that are bigger than 2. educleshs have a lower likelyhood of reporting higer income (50-75K).
#Based on the income level results, less than high education have lower odds of reporting 50-75k income when compared to college graduates. Divorced have lower odds of reporting 50-75k income when compared to married couples. Age of 65+ have lower odds of reporting 50-75k income when compared to Ages 30-44 and 45-64.
#{as x increases your odds of reporting the higher value go down}
#ordinal Regression
fit.ord<-svyolr(income~age+educ+ethn+employ+sex+marital+ins+smoke+genhlth,des)
summary(fit.ord)
## Call:
## svyolr(income ~ age + educ + ethn + employ + sex + marital +
## ins + smoke + genhlth, des)
##
## Coefficients:
## Value Std. Error t value
## age30-44 -0.51890681 0.14073344 -3.68716073
## age45-64 -0.56194412 0.15327842 -3.66616596
## age65+ -0.01087955 0.18298586 -0.05945569
## educcolgrad -1.33285260 0.12568216 -10.60494651
## educleshs 0.21533093 0.13871523 1.55232365
## educsumcol -0.32305823 0.11425035 -2.82763439
## ethnmultinh -0.62936849 0.27037649 -2.32774860
## ethnnh black 0.05477901 0.14288890 0.38336782
## ethnnhwhite -0.53920897 0.10873852 -4.95876697
## ethnothernh 0.20558066 0.26463002 0.77686070
## employnilf 0.33296748 0.13091495 2.54338772
## employretired 0.56718575 0.13512180 4.19758884
## employunable 0.43162281 0.14249470 3.02904475
## sexMale -0.12024920 0.09141664 -1.31539726
## maritalcohab 0.62366404 0.19632306 3.17672336
## maritaldivorced 0.90127065 0.13738783 6.56004712
## maritalnm 0.50406047 0.12680319 3.97514031
## maritalseparated 0.50951610 0.19048097 2.67489241
## maritalwidowed 0.89491072 0.13684518 6.53958523
## ins1 -0.57009550 0.11464881 -4.97253762
## smokeCurrent 0.10050221 0.12419508 0.80922862
## smokeFormer -0.01096463 0.10399639 -0.10543283
## genhlthpoor 0.46669321 0.11090376 4.20809197
##
## Intercepts:
## Value Std. Error t value
## 3|1 -0.8048 0.1812 -4.4422
## 1|2 0.1597 0.1856 0.8603
## (3208 observations deleted due to missingness)
#Calculate the AIC ourself
fit.ord$deviance+2*length(fit.ord$coefficients)
## [1] 16151.35
#des<-svydesign(ids=~1, strata=~ststr, weights=~llcpwt, data =tb19 )
#des$variables$incomenum<-recode(des$variables$income, recodes="1:3=1; 4:6=2;7:8=3; else=NA", as.factor.result = F)
ex1<-svyglm(I(incomenum>1)~age+educ+ethn+employ+sex+marital+ins+smoke+genhlth,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex2<-svyglm(I(incomenum>2)~age+educ+ethn+employ+sex+marital+ins+smoke+genhlth,des, family="binomial") #this will model the level 3
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Plotting coefficients of each model.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.6 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::expand() masks Matrix::expand()
## x tidyr::fill() masks VGAM::fill()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
## x tidyr::unpack() masks Matrix::unpack()
coefs<-data.frame(parms =c( coefficients(ex1)[-1],coefficients(ex2)[-1]),
beta = rep(names(coefficients(ex1))[-1], 2 ),
mod = c(rep("I(incomenum>1)",23 ),
rep("I(incomenum>2)",23 )))
coefs%>%
ggplot()+
geom_point(aes(x=beta, y=parms, color=mod))+
theme(axis.text.x = element_text(angle = 45))+
labs(title = "Graphical Summary of Proportional Odds Assumption",
x= "Beta",
y= "Estimate")
round(exp(rbind(coef(ex1)[-1], coef(ex2)[-1])),3)
## age30-44 age45-64 age65+ educcolgrad educleshs educsumcol ethnmultinh
## [1,] 1.059 1.096 0.722 4.819 0.392 1.887 0.981
## [2,] 1.741 1.956 0.795 4.841 0.321 1.763 2.195
## ethnnh black ethnnhwhite ethnothernh employnilf employretired employunable
## [1,] 0.712 1.806 1.430 0.416 0.345 0.092
## [2,] 0.833 2.125 0.933 0.538 0.459 0.127
## sexMale maritalcohab maritaldivorced maritalnm maritalseparated
## [1,] 2.054 0.572 0.254 0.215 0.156
## [2,] 1.618 0.493 0.222 0.372 0.224
## maritalwidowed ins1 smokeCurrent smokeFormer genhlthpoor
## [1,] 0.283 3.719 1.106 1.079 0.702
## [2,] 0.217 3.701 1.037 1.143 0.434
Proportional Assumption
library(VGAM)
#Proportional odds
fit.vgam<-vglm(as.ordered(income)~age+educ+ethn+employ+sex+marital+ins+smoke+genhlth,
tb19, weights =llcpwt/mean(llcpwt, na.rm=T),
family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
summary(fit.vgam)
##
## Call:
## vglm(formula = as.ordered(income) ~ age + educ + ethn + employ +
## sex + marital + ins + smoke + genhlth, family = cumulative(parallel = T,
## reverse = T), data = tb19, weights = llcpwt/mean(llcpwt,
## na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.80484 0.09132 8.813 < 2e-16 ***
## (Intercept):2 -0.15971 0.09086 -1.758 0.07879 .
## age30-44 -0.51889 0.06609 -7.851 4.13e-15 ***
## age45-64 -0.56193 0.07046 -7.975 1.52e-15 ***
## age65+ -0.01090 0.10511 -0.104 0.91742
## educcolgrad -1.33285 0.06652 -20.036 < 2e-16 ***
## educleshs 0.21534 0.07064 3.048 0.00230 **
## educsumcol -0.32307 0.05553 -5.818 5.95e-09 ***
## ethnmultinh -0.62937 0.26780 -2.350 0.01877 *
## ethnnh black 0.05478 0.07450 0.735 0.46213
## ethnnhwhite -0.53923 0.05429 -9.933 < 2e-16 ***
## ethnothernh 0.20557 0.10265 2.003 0.04520 *
## employnilf 0.33298 0.06084 5.473 4.43e-08 ***
## employretired 0.56717 0.09219 6.152 7.63e-10 ***
## employunable 0.43153 0.09777 4.414 1.02e-05 ***
## sexMale -0.12025 0.04574 -2.629 0.00857 **
## maritalcohab 0.62368 0.10066 6.196 5.81e-10 ***
## maritaldivorced 0.90124 0.07437 12.118 < 2e-16 ***
## maritalnm 0.50404 0.06332 7.960 1.72e-15 ***
## maritalseparated 0.50945 0.11465 4.444 8.85e-06 ***
## maritalwidowed 0.89488 0.09861 9.075 < 2e-16 ***
## ins1 -0.57010 0.05489 -10.387 < 2e-16 ***
## smokeCurrent 0.10053 0.06351 1.583 0.11347
## smokeFormer -0.01096 0.05813 -0.189 0.85045
## genhlthpoor 0.46673 0.05604 8.328 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 16145.96 on 18153 degrees of freedom
##
## Log-likelihood: -8072.98 on 18153 degrees of freedom
##
## Number of Fisher scoring iterations: 8
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## age30-44 age45-64 age65+ educcolgrad
## 0.5951798 0.5701100 0.9891602 0.2637241
## educleshs educsumcol ethnmultinh ethnnh black
## 1.2402869 0.7239222 0.5329287 1.0563080
## ethnnhwhite ethnothernh employnilf employretired
## 0.5831981 1.2282293 1.3951232 1.7632693
## employunable sexMale maritalcohab maritaldivorced
## 1.5396099 0.8866995 1.8657790 2.4626435
## maritalnm maritalseparated maritalwidowed ins1
## 1.6553992 1.6643674 2.4470379 0.5654678
## smokeCurrent smokeFormer genhlthpoor
## 1.1057532 0.9890997 1.5947733
AIC(fit.vgam)
## [1] 16195.96
N0on-proportional Odds
#Non-proportional odds, more flexible but more to interpret. we have two parameters for everyone of our effects.
fit.vgam2<-vglm(as.ordered(income)~age+educ+ethn+employ+sex+marital+ins+smoke+genhlth,tb19, weights =llcpwt/mean(llcpwt, na.rm=T),
family=cratio(parallel = F, reverse = T))
summary(fit.vgam2)
##
## Call:
## vglm(formula = as.ordered(income) ~ age + educ + ethn + employ +
## sex + marital + ins + smoke + genhlth, family = cratio(parallel = F,
## reverse = T), data = tb19, weights = llcpwt/mean(llcpwt,
## na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.037694 0.177169 0.213 0.831517
## (Intercept):2 0.157497 0.097787 1.611 0.107262
## age30-44:1 0.181976 0.130587 1.394 0.163462
## age30-44:2 0.495892 0.071747 6.912 4.79e-12 ***
## age45-64:1 0.340250 0.144682 2.352 0.018687 *
## age45-64:2 0.527167 0.076416 6.899 5.25e-12 ***
## age65+:1 -0.832409 0.223135 -3.731 0.000191 ***
## age65+:2 0.094631 0.113424 0.834 0.404105
## educcolgrad:1 2.160864 0.143766 15.030 < 2e-16 ***
## educcolgrad:2 0.981016 0.072892 13.459 < 2e-16 ***
## educleshs:1 -1.596484 0.138791 -11.503 < 2e-16 ***
## educleshs:2 0.060637 0.076003 0.798 0.424968
## educsumcol:1 0.797264 0.107506 7.416 1.21e-13 ***
## educsumcol:2 0.172293 0.060574 2.844 0.004450 **
## ethnmultinh:1 0.602385 0.459982 1.310 0.190336
## ethnmultinh:2 0.597599 0.303886 1.967 0.049238 *
## ethnnh black:1 -0.394215 0.139016 -2.836 0.004572 **
## ethnnh black:2 0.028612 0.081488 0.351 0.725499
## ethnnhwhite:1 1.060187 0.109260 9.703 < 2e-16 ***
## ethnnhwhite:2 0.385842 0.059316 6.505 7.78e-11 ***
## ethnothernh:1 0.405634 0.224954 1.803 0.071359 .
## ethnothernh:2 -0.269896 0.110474 -2.443 0.014563 *
## employnilf:1 -1.124599 0.114072 -9.859 < 2e-16 ***
## employnilf:2 -0.083849 0.066416 -1.262 0.206776
## employretired:1 -1.134699 0.195962 -5.790 7.02e-09 ***
## employretired:2 -0.340571 0.099635 -3.418 0.000630 ***
## employunable:1 -3.232668 0.193755 -16.684 < 2e-16 ***
## employunable:2 0.350687 0.110569 3.172 0.001516 **
## sexMale:1 0.965764 0.094525 10.217 < 2e-16 ***
## sexMale:2 -0.051490 0.049812 -1.034 0.301284
## maritalcohab:1 -1.150598 0.206559 -5.570 2.54e-08 ***
## maritalcohab:2 -0.463016 0.107113 -4.323 1.54e-05 ***
## maritaldivorced:1 -2.012375 0.153057 -13.148 < 2e-16 ***
## maritaldivorced:2 -0.578725 0.080026 -7.232 4.77e-13 ***
## maritalnm:1 -1.977459 0.130352 -15.170 < 2e-16 ***
## maritalnm:2 -0.122750 0.069725 -1.760 0.078325 .
## maritalseparated:1 -2.666351 0.225482 -11.825 < 2e-16 ***
## maritalseparated:2 -0.003038 0.127725 -0.024 0.981024
## maritalwidowed:1 -2.156133 0.192730 -11.187 < 2e-16 ***
## maritalwidowed:2 -0.563821 0.104878 -5.376 7.62e-08 ***
## ins1:1 2.077181 0.102950 20.177 < 2e-16 ***
## ins1:2 0.201148 0.060028 3.351 0.000806 ***
## smokeCurrent:1 0.061953 0.125519 0.494 0.621604
## smokeCurrent:2 -0.113010 0.068639 -1.646 0.099673 .
## smokeFormer:1 0.297754 0.120147 2.478 0.013203 *
## smokeFormer:2 -0.012466 0.063360 -0.197 0.844019
## genhlthpoor:1 -0.896276 0.108781 -8.239 < 2e-16 ***
## genhlthpoor:2 -0.359360 0.060385 -5.951 2.66e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y<2|Y<=2]), logitlink(P[Y<3|Y<=3])
##
## Residual deviance: 13763.61 on 18130 degrees of freedom
##
## Log-likelihood: -6881.804 on 18130 degrees of freedom
##
## Number of Fisher scoring iterations: 8
##
## No Hauck-Donner effect found in any of the estimates
AIC(fit.vgam2)
## [1] 13859.61
Multinomial Model (this model fits best) For the best fitting model from part c, Describe the results of your model:
#no ordering to the outcome. ie. how to get to work? bike, bus, train? a decision with more than 1 option. good for unordered options. ex2 birth control: pill, none, ring, patch, etc based on characteristics you can run the prob. of using one over the other. leave one level out as a reference category.
#Describe the results of your model:
#ref level 3
#In terms of education, those with less than high school and some college education are more likely to report lower income, compared to those with college education. In terms of age, ages 65+ and ages 45-64 are more likely to report lower incomes, compared to those in age category of 30-44.In terms of health, individuals with poor health are more likely to report lower income, compared to those with good health.
#Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
mfit<-vglm(income~age+educ+ethn+employ+sex+marital+ins+smoke+genhlth,
family=multinomial(refLevel = 3),
data=tb19,
weights=llcpwt/mean(llcpwt, na.rm=T))
summary(mfit)
##
## Call:
## vglm(formula = income ~ age + educ + ethn + employ + sex + marital +
## ins + smoke + genhlth, family = multinomial(refLevel = 3),
## data = tb19, weights = llcpwt/mean(llcpwt, na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -1.037788 0.122065 -8.502 < 2e-16 ***
## (Intercept):2 -0.954998 0.135836 -7.031 2.06e-12 ***
## age30-44:1 0.590768 0.086046 6.866 6.62e-12 ***
## age30-44:2 0.141266 0.103452 1.366 0.172090
## age45-64:1 0.701344 0.091462 7.668 1.75e-14 ***
## age45-64:2 0.134360 0.114146 1.177 0.239160
## age65+:1 -0.151658 0.135671 -1.118 0.263636
## age65+:2 0.298674 0.170572 1.751 0.079943 .
## educcolgrad:1 1.415267 0.081084 17.454 < 2e-16 ***
## educcolgrad:2 -0.899709 0.138874 -6.479 9.26e-11 ***
## educleshs:1 -0.961568 0.109889 -8.750 < 2e-16 ***
## educleshs:2 0.706727 0.094862 7.450 9.33e-14 ***
## educsumcol:1 0.471298 0.070967 6.641 3.11e-11 ***
## educsumcol:2 -0.470262 0.088784 -5.297 1.18e-07 ***
## ethnmultinh:1 0.860470 0.343503 2.505 0.012246 *
## ethnmultinh:2 0.340363 0.415366 0.819 0.412541
## ethnnh black:1 -0.117300 0.098145 -1.195 0.232018
## ethnnh black:2 0.285008 0.109877 2.594 0.009490 **
## ethnnhwhite:1 0.685483 0.068687 9.980 < 2e-16 ***
## ethnnhwhite:2 -0.344259 0.091102 -3.779 0.000158 ***
## ethnothernh:1 -0.148973 0.125689 -1.185 0.235920
## ethnothernh:2 -0.424777 0.188270 -2.256 0.024057 *
## employnilf:1 -0.471828 0.081883 -5.762 8.30e-09 ***
## employnilf:2 0.692083 0.087969 7.867 3.62e-15 ***
## employretired:1 -0.649947 0.116792 -5.565 2.62e-08 ***
## employretired:2 0.779615 0.155787 5.004 5.60e-07 ***
## employunable:1 -1.520470 0.180801 -8.410 < 2e-16 ***
## employunable:2 1.895687 0.131407 14.426 < 2e-16 ***
## sexMale:1 0.355398 0.059469 5.976 2.28e-09 ***
## sexMale:2 -0.612752 0.073732 -8.311 < 2e-16 ***
## maritalcohab:1 -0.651472 0.134666 -4.838 1.31e-06 ***
## maritalcohab:2 0.314357 0.154433 2.036 0.041795 *
## maritaldivorced:1 -1.336225 0.098589 -13.554 < 2e-16 ***
## maritaldivorced:2 0.931012 0.113972 8.169 3.12e-16 ***
## maritalnm:1 -0.737198 0.082639 -8.921 < 2e-16 ***
## maritalnm:2 1.258440 0.104158 12.082 < 2e-16 ***
## maritalseparated:1 -1.157508 0.180631 -6.408 1.47e-10 ***
## maritalseparated:2 1.471679 0.155932 9.438 < 2e-16 ***
## maritalwidowed:1 -1.370887 0.138993 -9.863 < 2e-16 ***
## maritalwidowed:2 0.828898 0.137591 6.024 1.70e-09 ***
## ins1:1 1.083034 0.076831 14.096 < 2e-16 ***
## ins1:2 -0.957009 0.079822 -11.989 < 2e-16 ***
## smokeCurrent:1 0.008101 0.085828 0.094 0.924804
## smokeCurrent:2 -0.110271 0.094398 -1.168 0.242744
## smokeFormer:1 0.115202 0.074514 1.546 0.122095
## smokeFormer:2 -0.057134 0.096992 -0.589 0.555822
## genhlthpoor:1 -0.806644 0.079081 -10.200 < 2e-16 ***
## genhlthpoor:2 0.140271 0.077263 1.815 0.069448 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 13238.55 on 18130 degrees of freedom
##
## Log-likelihood: -6619.277 on 18130 degrees of freedom
##
## Number of Fisher scoring iterations: 6
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Reference group is level 3 of the response
#For the best fitting model (multinominal) from part c, Describe the results of your model, and present output from the model in terms of odds ratios and confidence intervals for all model parameters in a table.
Odds ratio for each covariate effect on each model
round(exp(coef(mfit)), 3)
## (Intercept):1 (Intercept):2 age30-44:1 age30-44:2
## 0.354 0.385 1.805 1.152
## age45-64:1 age45-64:2 age65+:1 age65+:2
## 2.016 1.144 0.859 1.348
## educcolgrad:1 educcolgrad:2 educleshs:1 educleshs:2
## 4.118 0.407 0.382 2.027
## educsumcol:1 educsumcol:2 ethnmultinh:1 ethnmultinh:2
## 1.602 0.625 2.364 1.405
## ethnnh black:1 ethnnh black:2 ethnnhwhite:1 ethnnhwhite:2
## 0.889 1.330 1.985 0.709
## ethnothernh:1 ethnothernh:2 employnilf:1 employnilf:2
## 0.862 0.654 0.624 1.998
## employretired:1 employretired:2 employunable:1 employunable:2
## 0.522 2.181 0.219 6.657
## sexMale:1 sexMale:2 maritalcohab:1 maritalcohab:2
## 1.427 0.542 0.521 1.369
## maritaldivorced:1 maritaldivorced:2 maritalnm:1 maritalnm:2
## 0.263 2.537 0.478 3.520
## maritalseparated:1 maritalseparated:2 maritalwidowed:1 maritalwidowed:2
## 0.314 4.357 0.254 2.291
## ins1:1 ins1:2 smokeCurrent:1 smokeCurrent:2
## 2.954 0.384 1.008 0.896
## smokeFormer:1 smokeFormer:2 genhlthpoor:1 genhlthpoor:2
## 1.122 0.944 0.446 1.151
Confidence intervals for odds ratios
round(exp(confint(mfit)), 3)
## 2.5 % 97.5 %
## (Intercept):1 0.279 0.450
## (Intercept):2 0.295 0.502
## age30-44:1 1.525 2.137
## age30-44:2 0.940 1.411
## age45-64:1 1.686 2.412
## age45-64:2 0.915 1.431
## age65+:1 0.659 1.121
## age65+:2 0.965 1.883
## educcolgrad:1 3.513 4.827
## educcolgrad:2 0.310 0.534
## educleshs:1 0.308 0.474
## educleshs:2 1.683 2.442
## educsumcol:1 1.394 1.841
## educsumcol:2 0.525 0.744
## ethnmultinh:1 1.206 4.635
## ethnmultinh:2 0.623 3.172
## ethnnh black:1 0.734 1.078
## ethnnh black:2 1.072 1.649
## ethnnhwhite:1 1.735 2.271
## ethnnhwhite:2 0.593 0.847
## ethnothernh:1 0.673 1.102
## ethnothernh:2 0.452 0.946
## employnilf:1 0.531 0.732
## employnilf:2 1.681 2.374
## employretired:1 0.415 0.656
## employretired:2 1.607 2.959
## employunable:1 0.153 0.312
## employunable:2 5.146 8.613
## sexMale:1 1.270 1.603
## sexMale:2 0.469 0.626
## maritalcohab:1 0.400 0.679
## maritalcohab:2 1.012 1.853
## maritaldivorced:1 0.217 0.319
## maritaldivorced:2 2.029 3.172
## maritalnm:1 0.407 0.563
## maritalnm:2 2.870 4.317
## maritalseparated:1 0.221 0.448
## maritalseparated:2 3.209 5.914
## maritalwidowed:1 0.193 0.333
## maritalwidowed:2 1.749 3.000
## ins1:1 2.541 3.434
## ins1:2 0.328 0.449
## smokeCurrent:1 0.852 1.193
## smokeCurrent:2 0.744 1.078
## smokeFormer:1 0.970 1.299
## smokeFormer:2 0.781 1.142
## genhlthpoor:1 0.382 0.521
## genhlthpoor:2 0.989 1.339
Fitted Prob. from models
dat<-expand.grid(ethn=levels(tb19$ethn),
educ=levels(tb19$educ),
age=levels(tb19$age),
employ=levels(tb19$employ),
sex=levels(tb19$sex),
ins=levels(tb19$ins),
smoke=levels(tb19$smoke),
income=levels(tb19$income),
marital=levels(tb19$marital),
genhlth=levels(tb19$genhlth))
#generate the fitted values
#Unfortunately, the survey proportional odds model won't generate fitted values
#but here I use a weighted multinomial model, which fits the data better
fitm<-predict(mfit, newdat=dat,type="response")
#add the values to the fake data
dat<-cbind(dat, round(fitm,3))
#Print the fitted probabilities
head(dat, n=20)
fitted.ord<-round(predict(fit.vgam, newdat=dat, type="response"), 3)
dat<-cbind(dat, fitted.ord)
names(dat)<-c(names(dat)[1:3], "mp1", "mp2", "mp3", "op1", "op2", "op3")
head(dat, n=20)
AIC(fit.vgam) #proportional odds
## [1] 16195.96
AIC(fit.vgam2) #cumulative logit, non proportional
## [1] 13859.61
AIC(mfit) #multinomial
## [1] 13334.55