library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(tableone)
library(ggplot2)
library(VGAM)
## Warning: package 'VGAM' was built under R version 4.0.4
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:survey':
##
## calibrate
## The following object is masked from 'package:car':
##
## logit
load("~/ipumstor2/.RData")
summary(data)
## STRATA SAMPWEIGHT EDUC BMI
## Min. :6001 Min. : 0 Min. : 0.0 Min. : 0.000
## 1st Qu.:6081 1st Qu.: 0 1st Qu.:111.0 1st Qu.: 0.000
## Median :6158 Median : 0 Median :201.0 Median : 0.000
## Mean :6156 Mean : 3380 Mean :232.7 Mean : 9.144
## 3rd Qu.:6238 3rd Qu.: 5466 3rd Qu.:302.0 3rd Qu.:22.330
## Max. :6300 Max. :104188 Max. :999.0 Max. :99.990
## HINOTCOVE ASTHMAEV DIABETICEV
## Min. :1.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.000 Median :0.0000 Median :0.0000
## Mean :1.242 Mean :0.4852 Mean :0.4681
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :9.000 Max. :9.0000 Max. :9.0000
data$HWT <- cut(data$BMI,
breaks=c(-Inf, 18, 25, 30, Inf),
labels=c("1", "2", "3", "4"))
data$INS<-Recode(data$HINOTCOVE,
recodes="2=1; 1=0; else=NA")
data$AST<-Recode(data$ASTHMAEV,
recodes="2=1; 1=0; else=NA")
data$DIA<-Recode(data$DIABETICEV,
recodes="2=1; 1=0; else=NA")
data <- na.omit(data)
options(survey.lonely.psu = "adjust")
library(dplyr)
sub<-data%>%
select(HWT,INS,AST, BMI, HINOTCOVE,
ASTHMAEV, DIA, DIABETICEV, STRATA, EDUC, SAMPWEIGHT, STRATA) %>%
filter( complete.cases(.))
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~STRATA, data =sub )
## Warning in svydesign.default(ids = ~1, strata = ~STRATA, data = sub): No weights
## or probabilities supplied, assuming equal probability
fit.solr1<-svyolr(HWT~INS+AST+DIA, des)
summary(fit.solr1)
## Call:
## svyolr(HWT ~ INS + AST + DIA, des)
##
## Coefficients:
## Value Std. Error t value
## INS 0.54714415 0.02327516 23.5076411
## AST 0.01869013 0.03035813 0.6156548
## DIA 1.79678229 0.03453603 52.0263052
##
## Intercepts:
## Value Std. Error t value
## 1|2 -0.6469 0.0126 -51.2806
## 2|3 0.4197 0.0124 33.9053
## 3|4 1.5820 0.0150 105.7493
fit.solr1$deviance+2*length(fit.solr1$coefficients)
## [1] 101640.8
ex1<-svyglm(I(BMI>18)~INS+AST+DIA,des, family="binomial")
ex2<-svyglm(I(BMI>25)~INS+AST+DIA,des, family="binomial")
coefs<-data.frame(parms =c( coefficients(ex1)[-1],coefficients(ex2)[-1]),
beta = rep(names(coefficients(ex1))[-1], 2 ),
mod = c(rep("I(BMI>18)",12 ),
rep("I(BMI>25)",12 )))
coefs%>%
ggplot()+
geom_point(aes(x=beta, y=parms, color=mod))+
theme(axis.text.x = element_text(angle = 45))+
labs(title = "Graphical Summary of Proportional Odds Assumption",
x= "Beta",
y= "Estimate")

round(exp(rbind(coef(ex1)[-1],
coef(ex2)[-1])),3)
## INS AST DIA
## [1,] 2.362 0.847 38.804
## [2,] 1.628 1.015 6.605
fit.vgam<-vglm(as.ordered(HWT)~INS+AST+DIA,
data, weights =SAMPWEIGHT/mean(SAMPWEIGHT, na.rm=T),
family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
summary(fit.vgam)
##
## Call:
## vglm(formula = as.ordered(HWT) ~ INS + AST + DIA, family = cumulative(parallel = T,
## reverse = T), data = data, weights = SAMPWEIGHT/mean(SAMPWEIGHT,
## na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.90265 0.01295 69.69 <2e-16 ***
## (Intercept):2 -0.28771 0.01203 -23.91 <2e-16 ***
## (Intercept):3 -1.48661 0.01449 -102.62 <2e-16 ***
## INS 0.49811 0.02571 19.38 <2e-16 ***
## AST 0.06479 0.02769 2.34 0.0193 *
## DIA 1.74301 0.03959 44.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4])
##
## Residual deviance: 102496.4 on 113727 degrees of freedom
##
## Log-likelihood: -51248.19 on 113727 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## INS AST DIA
## 1.645613 1.066939 5.714530
fit.vgam2<-vglm(as.ordered(HWT)~INS+AST+DIA,data,
weights =SAMPWEIGHT/mean(SAMPWEIGHT, na.rm=T),
family=cumulative(parallel = F, reverse = T)) #<-parallel = F == Nonproportional odds
summary(fit.vgam2)
##
## Call:
## vglm(formula = as.ordered(HWT) ~ INS + AST + DIA, family = cumulative(parallel = F,
## reverse = T), data = data, weights = SAMPWEIGHT/mean(SAMPWEIGHT,
## na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.86100 0.01352 63.668 < 2e-16 ***
## (Intercept):2 -0.26124 0.01239 -21.090 < 2e-16 ***
## (Intercept):3 -1.45943 0.01554 -93.924 < 2e-16 ***
## INS:1 0.94212 0.04008 23.505 < 2e-16 ***
## INS:2 0.40262 0.02883 13.968 < 2e-16 ***
## INS:3 0.32618 0.03312 9.850 < 2e-16 ***
## AST:1 -0.08062 0.03502 -2.302 0.0213 *
## AST:2 0.05762 0.03129 1.841 0.0656 .
## AST:3 0.24285 0.03586 6.772 1.27e-11 ***
## DIA:1 3.40912 0.17057 19.987 < 2e-16 ***
## DIA:2 1.81799 0.05302 34.289 < 2e-16 ***
## DIA:3 1.63327 0.04142 39.433 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4])
##
## Residual deviance: 101961.4 on 113721 degrees of freedom
##
## Log-likelihood: -50980.72 on 113721 degrees of freedom
##
## Number of Fisher scoring iterations: 7
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## 'DIA:1'
##
##
## Exponentiated coefficients:
## INS:1 INS:2 INS:3 AST:1 AST:2 AST:3 DIA:1
## 2.5654088 1.4957399 1.3856671 0.9225426 1.0593093 1.2748816 30.2386568
## DIA:2 DIA:3
## 6.1594446 5.1205996
AIC(fit.vgam)
## [1] 102508.4
AIC(fit.vgam2)
## [1] 101985.4
mfit<-vglm(HWT~INS+AST+DIA,
family=multinomial(refLevel = 1),
data=data,
weights=SAMPWEIGHT/mean(SAMPWEIGHT, na.rm=T))
summary(mfit)
##
## Call:
## vglm(formula = HWT ~ INS + AST + DIA, family = multinomial(refLevel = 1),
## data = data, weights = SAMPWEIGHT/mean(SAMPWEIGHT, na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.10363 0.01646 -6.298 3.02e-10 ***
## (Intercept):2 -0.18400 0.01679 -10.958 < 2e-16 ***
## (Intercept):3 -0.45401 0.01807 -25.128 < 2e-16 ***
## INS:1 0.93112 0.04509 20.651 < 2e-16 ***
## INS:2 0.90782 0.04576 19.838 < 2e-16 ***
## INS:3 1.00555 0.04695 21.420 < 2e-16 ***
## AST:1 -0.17885 0.04337 -4.124 3.73e-05 ***
## AST:2 -0.18941 0.04413 -4.292 1.77e-05 ***
## AST:3 0.11655 0.04379 2.661 0.00778 **
## DIA:1 2.54486 0.17746 14.340 < 2e-16 ***
## DIA:2 3.19474 0.17451 18.307 < 2e-16 ***
## DIA:3 4.12919 0.17257 23.928 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1]),
## log(mu[,4]/mu[,1])
##
## Residual deviance: 101961.3 on 113721 degrees of freedom
##
## Log-likelihood: -50980.65 on 113721 degrees of freedom
##
## Number of Fisher scoring iterations: 6
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## 'DIA:3'
##
##
## Reference group is level 1 of the response
round(exp(coef(mfit)), 3)
## (Intercept):1 (Intercept):2 (Intercept):3 INS:1 INS:2
## 0.902 0.832 0.635 2.537 2.479
## INS:3 AST:1 AST:2 AST:3 DIA:1
## 2.733 0.836 0.827 1.124 12.741
## DIA:2 DIA:3
## 24.404 62.127
round(exp(confint(mfit)), 3)
## 2.5 % 97.5 %
## (Intercept):1 0.873 0.931
## (Intercept):2 0.805 0.860
## (Intercept):3 0.613 0.658
## INS:1 2.323 2.772
## INS:2 2.266 2.712
## INS:3 2.493 2.997
## AST:1 0.768 0.910
## AST:2 0.759 0.902
## AST:3 1.031 1.224
## DIA:1 8.998 18.042
## DIA:2 17.335 34.356
## DIA:3 44.299 87.131
AIC(fit.vgam)
## [1] 102508.4
AIC(fit.vgam2)
## [1] 101985.4
AIC(mfit)
## [1] 101985.3
#looking at the three models, The mulitnomial model seems to be more efficient, but each model is close