##Print
myprint=function(x, lab="") {print(x)%>%kbl(caption=lab)%>%kable_classic(html_font="Cambria")}
## VIF and Effect Sizes
mydef=function(x){
pseudo=round(1-x$deviance/x$null.deviance,3)
vif=round(max(vif(x)),3)
VIF=vif
R2=pseudo
return(myprint(noquote(rbind(VIF, R2))))
}
y2019=read.csv('D:/MI/MI.CSV', encoding="UTF-8")
set.seed(1234)
mys=sample(seq(1, nrow(y2019)),.2*nrow(y2019),replace=TRUE)
mytrain=y2019[-mys,]
mytest=y2019[mys,]
print(c(nrow(mytrain),nrow(mytest)))
## [1] 285856 69835
mydata=y2019
options(survey.lonely.psu = "adjust")
svy2019 = svydesign(id=~1,strata = ~Stratum,weights = ~Weights,data = mydata)
train2019=svydesign(id=~1,strata = ~Stratum,weights = ~Weights,data = mytrain)
test2019=svydesign(id=~1,strata = ~Stratum,weights = ~Weights,data = mytest)
options(scipen=9999)
library(psych)
myprint(as.data.frame(apply(y2019,2,function(x)round(mean(x),3))))
## apply(y2019, 2, function(x) round(mean(x), 3))
## MI 0.069
## Male 0.441
## Veteran 0.141
## Rent_Home 0.184
## Poor_Health 0.060
## Smoker 0.428
## Chew_Snuff 0.029
## Percent_Drink 0.154
## High_Cholesterol 0.398
## High_BP 0.463
## High_BMI 0.643
## Poor_Health_Percent 0.159
## Poor_Mental_Health_Percent 0.114
## Depression 0.183
## Stroke 0.053
## Asthma 0.134
## Skin_Cancer 0.118
## Cancer 0.120
## COPD 0.097
## Kidney 0.045
## Arthritis 0.388
## No_Health_Plan 0.068
## No_Doctor 0.130
## Cost 0.093
## No_Checkup 0.169
## Metropolitan 0.683
## Weights 508.551
## Stratum 297642.443
## PSU 2019004649.714
## Age_65 0.443
## Age_55_to_64 0.241
## Age_45_to_54 0.175
## Black 0.074
## Hispanic 0.073
## Other_Race 0.064
## Previous_Marriage 0.320
## Never_Married 0.095
## Income_LT25K 0.201
## Income_LT75K 0.335
## Income_DKR 0.175
## Pre_High_School 0.073
## High_School 0.261
## Post_High_School 0.273
## Retired_Unable 0.449
## Out_of_Work 0.032
## Other_Not_Working 0.058
## Fair_Health 0.153
## Good_Health 0.324
## Poor_Exercise 0.279
## Percent_Drink_2 0.101
## Diabetic 0.161
## Prediabetic 0.025
## Division_D1 0.117
## Division_D2 0.049
## Division_D3 0.105
## Division_D4 0.171
## Division_D5 0.182
## Division_D6 0.063
## Division_D7 0.069
## Division_D8 0.131
## Division_D9 0.095
apply(y2019, 2, function(x) round(mean(x), 3)) | |
---|---|
MI | 0.069 |
Male | 0.441 |
Veteran | 0.141 |
Rent_Home | 0.184 |
Poor_Health | 0.060 |
Smoker | 0.428 |
Chew_Snuff | 0.029 |
Percent_Drink | 0.154 |
High_Cholesterol | 0.398 |
High_BP | 0.463 |
High_BMI | 0.643 |
Poor_Health_Percent | 0.159 |
Poor_Mental_Health_Percent | 0.114 |
Depression | 0.183 |
Stroke | 0.053 |
Asthma | 0.134 |
Skin_Cancer | 0.118 |
Cancer | 0.120 |
COPD | 0.097 |
Kidney | 0.045 |
Arthritis | 0.388 |
No_Health_Plan | 0.068 |
No_Doctor | 0.130 |
Cost | 0.093 |
No_Checkup | 0.169 |
Metropolitan | 0.683 |
Weights | 508.551 |
Stratum | 297642.443 |
PSU | 2019004649.714 |
Age_65 | 0.443 |
Age_55_to_64 | 0.241 |
Age_45_to_54 | 0.175 |
Black | 0.074 |
Hispanic | 0.073 |
Other_Race | 0.064 |
Previous_Marriage | 0.320 |
Never_Married | 0.095 |
Income_LT25K | 0.201 |
Income_LT75K | 0.335 |
Income_DKR | 0.175 |
Pre_High_School | 0.073 |
High_School | 0.261 |
Post_High_School | 0.273 |
Retired_Unable | 0.449 |
Out_of_Work | 0.032 |
Other_Not_Working | 0.058 |
Fair_Health | 0.153 |
Good_Health | 0.324 |
Poor_Exercise | 0.279 |
Percent_Drink_2 | 0.101 |
Diabetic | 0.161 |
Prediabetic | 0.025 |
Division_D1 | 0.117 |
Division_D2 | 0.049 |
Division_D3 | 0.105 |
Division_D4 | 0.171 |
Division_D5 | 0.182 |
Division_D6 | 0.063 |
Division_D7 | 0.069 |
Division_D8 | 0.131 |
Division_D9 | 0.095 |
myd=matrix(rep(0,61*4), 61,4)
for (i in 1:length(svy2019$variables)){
myd[i,1]=colnames(svy2019$variables[i])
myd[i,2]=round(svymean(~svy2019$variables[,i], svy2019),3)
myd[i,3]=round(svyvar(~svy2019$variables[,i], svy2019)^.5,3)
myd[i,4]=round(svytotal(~svy2019$variables[,i], svy2019)/1000000,3)
}
colnames(myd)=c('Variable', 'Mean', 'SD','Total in Millions')
myd=noquote(myd)
myd%>%kbl()%>%kable_classic(html_font='Cambria')
Variable | Mean | SD | Total in Millions |
---|---|---|---|
MI | 0.057 | 0.232 | 10.173 |
Male | 0.477 | 0.499 | 84.748 |
Veteran | 0.12 | 0.325 | 21.314 |
Rent_Home | 0.189 | 0.392 | 33.6 |
Poor_Health | 0.06 | 0.237 | 10.588 |
Smoker | 0.42 | 0.493 | 74.505 |
Chew_Snuff | 0.03 | 0.17 | 5.265 |
Percent_Drink | 0.147 | 0.266 | 26.161 |
High_Cholesterol | 0.37 | 0.483 | 65.677 |
High_BP | 0.418 | 0.493 | 74.138 |
High_BMI | 0.646 | 0.478 | 114.753 |
Poor_Health_Percent | 0.152 | 0.304 | 27.059 |
Poor_Mental_Health_Percent | 0.121 | 0.266 | 21.417 |
Depression | 0.177 | 0.382 | 31.455 |
Stroke | 0.046 | 0.209 | 8.141 |
Asthma | 0.133 | 0.34 | 23.635 |
Skin_Cancer | 0.089 | 0.284 | 15.747 |
Cancer | 0.095 | 0.293 | 16.834 |
COPD | 0.084 | 0.277 | 14.896 |
Kidney | 0.04 | 0.196 | 7.138 |
Arthritis | 0.329 | 0.47 | 58.432 |
No_Health_Plan | 0.102 | 0.303 | 18.099 |
No_Doctor | 0.165 | 0.371 | 29.254 |
Cost | 0.116 | 0.32 | 20.577 |
No_Checkup | 0.195 | 0.396 | 34.538 |
Metropolitan | 0.84 | 0.366 | 149.215 |
Weights | 2236.452 | 3387.957 | 397134.892 |
Stratum | 280247.348 | 168454.592 | 49764539.418 |
PSU | 2019005190.677 | 3748.729 | 358522084248.168 |
Age_65 | 0.305 | 0.46 | 54.097 |
Age_55_to_64 | 0.235 | 0.424 | 41.692 |
Age_45_to_54 | 0.229 | 0.42 | 40.64 |
Black | 0.114 | 0.318 | 20.312 |
Hispanic | 0.149 | 0.356 | 26.413 |
Other_Race | 0.075 | 0.263 | 13.318 |
Previous_Marriage | 0.26 | 0.439 | 46.174 |
Never_Married | 0.107 | 0.309 | 19.047 |
Income_LT25K | 0.203 | 0.402 | 36.034 |
Income_LT75K | 0.314 | 0.464 | 55.691 |
Income_DKR | 0.165 | 0.371 | 29.342 |
Pre_High_School | 0.136 | 0.343 | 24.22 |
High_School | 0.26 | 0.438 | 46.113 |
Post_High_School | 0.295 | 0.456 | 52.352 |
Retired_Unable | 0.35 | 0.477 | 62.149 |
Out_of_Work | 0.042 | 0.2 | 7.382 |
Other_Not_Working | 0.071 | 0.257 | 12.652 |
Fair_Health | 0.159 | 0.366 | 28.238 |
Good_Health | 0.327 | 0.469 | 58.047 |
Poor_Exercise | 0.274 | 0.446 | 48.722 |
Percent_Drink_2 | 0.093 | 0.241 | 16.428 |
Diabetic | 0.15 | 0.358 | 26.721 |
Prediabetic | 0.026 | 0.159 | 4.623 |
Division_D1 | 0.049 | 0.215 | 8.619 |
Division_D2 | 0.102 | 0.303 | 18.186 |
Division_D3 | 0.146 | 0.353 | 25.992 |
Division_D4 | 0.065 | 0.247 | 11.622 |
Division_D5 | 0.211 | 0.408 | 37.379 |
Division_D6 | 0.06 | 0.237 | 10.568 |
Division_D7 | 0.118 | 0.323 | 20.95 |
Division_D8 | 0.075 | 0.263 | 13.244 |
Division_D9 | 0.163 | 0.37 | 28.99 |
myformula1=as.formula(paste("MI","~Age_45_to_54+Age_55_to_64+Age_65+Male+
Black+Hispanic+Other_Race+Previous_Marriage+Never_Married+
Veteran"))
m1 = survey::svyglm(myformula1,design=train2019,
family=quasibinomial,maxit = 100)
pm1=plot_model(m1, main="1. Demographics", show.values=TRUE, show.p=TRUE, value.offset=.4)
mydef(m1)
## [,1]
## VIF 7.158
## R2 0.083
VIF | 7.158 |
R2 | 0.083 |
pm1
myformula2=as.formula(paste("MI","~Age_45_to_54+Age_55_to_64+Age_65+Male+
Previous_Marriage+Never_Married+Veteran+
Income_LT25K+Income_LT75K+Income_DKR+Rent_Home+
Retired_Unable+Out_of_Work+Other_Not_Working+
Pre_High_School+High_School+Post_High_School"))
m2 = survey::svyglm(myformula2,design=train2019,
family=quasibinomial,maxit = 100)
mydef(m2)
## [,1]
## VIF 8.332
## R2 0.117
VIF | 8.332 |
R2 | 0.117 |
pm2=plot_model(m2, main="2. Socioeconomics", show.values=TRUE, show.p=TRUE, value.offset=.4)
pm2
myformula3=as.formula(paste("MI","~Age_45_to_54+Age_55_to_64+Age_65+Male+
Previous_Marriage+Never_Married+Veteran+
Income_LT25K+Income_LT75K+Income_DKR+Rent_Home+
Retired_Unable+Out_of_Work+Other_Not_Working+
Pre_High_School+High_School+Post_High_School+
Division_D1+Division_D2+Division_D3+Division_D4+Division_D5+
Division_D6+Division_D7+Division_D8+Division_D9+Metropolitan"))
m3 = survey::svyglm(myformula3,design=train2019,
family=quasibinomial,maxit = 100)
mydef(m3)
## [,1]
## VIF 8.417
## R2 0.118
VIF | 8.417 |
R2 | 0.118 |
pm3=plot_model(m3, main="3. Geography", show.values=TRUE, show.p=TRUE, value.offset=.4)
pm3
myformula4=as.formula(paste("MI","~Age_45_to_54+Age_55_to_64+Age_65+Male+
Previous_Marriage+Never_Married+Veteran+
Income_LT25K+Income_LT75K+Income_DKR+Rent_Home+
Retired_Unable+
Pre_High_School+High_School+Post_High_School+
Division_D3+Division_D4+
Division_D6+Division_D7+Metropolitan+
Poor_Health+Fair_Health+Good_Health+
Smoker+Chew_Snuff+Poor_Exercise+Percent_Drink+Percent_Drink_2+
High_Cholesterol+High_BP+High_BMI+
Diabetic+Prediabetic+Poor_Health_Percent+
Poor_Mental_Health_Percent+Depression+
Stroke+Asthma+Skin_Cancer+Cancer+COPD+Kidney+Arthritis"))
m4 = survey::svyglm(myformula4,design=train2019,
family=quasibinomial,maxit = 100)
mydef(m4)
## [,1]
## VIF 18.044
## R2 0.209
VIF | 18.044 |
R2 | 0.209 |
pm4=plot_model(m4, main="4. Behavior", show.values=TRUE, show.p=TRUE, value.offset=.4)
pm4
myformula5=as.formula(paste("MI","~Age_45_to_54+Age_55_to_64+Age_65+Male+
Previous_Marriage+Never_Married+Veteran+
Income_LT25K+Income_LT75K+Income_DKR+Rent_Home+
Retired_Unable+
Pre_High_School+High_School+Post_High_School+
Division_D4+
Poor_Health+Fair_Health+Good_Health+
Smoker+Percent_Drink+Percent_Drink_2+
High_Cholesterol+High_BP+
Diabetic+Poor_Health_Percent+
Stroke+COPD+Kidney+Arthritis+
No_Health_Plan+No_Doctor+Cost+No_Checkup"))
m5 = survey::svyglm(myformula5,design=train2019,
family=quasibinomial,maxit = 100)
mydef(m5)
## [,1]
## VIF 17.605
## R2 0.210
VIF | 17.605 |
R2 | 0.210 |
pm5=plot_model(m5, main="5. Access",show.values=TRUE, show.p=TRUE, value.offset=.4)
pm5
myformula6=as.formula(paste("MI","~Age_45_to_54+Age_55_to_64+Age_65+Male+
Previous_Marriage+Never_Married+Veteran+
Income_LT25K+Income_LT75K+Income_DKR+Rent_Home+
Retired_Unable+
Pre_High_School+High_School+Post_High_School+
Division_D4+
Poor_Health+Fair_Health+Good_Health+
Smoker+Percent_Drink+Percent_Drink_2+
High_Cholesterol+High_BP+
Diabetic+Poor_Health_Percent+
Stroke+COPD+Kidney+Arthritis+
No_Doctor+Cost+No_Checkup"))
m6 = survey::svyglm(myformula6,design=train2019,
family=quasibinomial,maxit = 500)
mydef(m6)
## [,1]
## VIF 17.602
## R2 0.210
VIF | 17.602 |
R2 | 0.210 |
pm6=plot_model(m6, main="6. Total, Only Significant",show.values=TRUE, show.p=TRUE,)
pm6
myformula7=as.formula(paste("MI","~Age_45_to_54+Age_55_to_64+Age_65+Male+
Black+Hispanic+Other_Race+Previous_Marriage+Never_Married+
Veteran+
Income_LT25K+Income_LT75K+Income_DKR+Rent_Home+
Retired_Unable+Out_of_Work+Other_Not_Working+
Pre_High_School+High_School+Post_High_School+
Division_D1+Division_D2+Division_D3+Division_D4+Division_D5+
Division_D6+Division_D7+Division_D8+Division_D9+Metropolitan+
Poor_Health+Fair_Health+Good_Health+
Smoker+Chew_Snuff+Poor_Exercise+Percent_Drink+Percent_Drink_2+
High_Cholesterol+High_BP+High_BMI+
Diabetic+Prediabetic+Poor_Health_Percent+
Poor_Mental_Health_Percent+Depression+
Stroke+Asthma+Skin_Cancer+Cancer+COPD+Kidney+Arthritis+
No_Health_Plan+No_Doctor+Cost+No_Checkup"))
m7 = survey::svyglm(myformula7,design=train2019,
family=quasibinomial,maxit = 500)
mydef(m7)
## [,1]
## VIF 18.156
## R2 0.212
VIF | 18.156 |
R2 | 0.212 |
pm7=plot_model(m7, main="7. Total, All Variables", show.values=TRUE, show.p=TRUE,)
pm7
cutpoints=predict(m6, mytest, type='response')
for (i in 1:length(cutpoints)) {
if (cutpoints[i]>.17){cutpoints[i]=1}else{cutpoints[i]=0}} #.17 m6
print(noquote(c('F1 Score, No MI: ',F1_Score(cutpoints, mytest$MI, positive='0'))))
## [1] F1 Score, No MI: 0.939864247385317
print(noquote(c('Precision, No MI: ',Precision(cutpoints, mytest$MI, positive='0'))))
## [1] Precision, No MI: 0.925941886234917
print(noquote(c('Recall, No MI: ',Recall(cutpoints, mytest$MI, positive='0'))))
## [1] Recall, No MI: 0.954211669918003
print(noquote(c('F1 Score, MI: ',F1_Score(cutpoints, mytest$MI, positive='1'))))
## [1] F1 Score, MI: 0.338744309885768
print(noquote(c('Precision, MI: ',Precision(cutpoints, mytest$MI, positive='1'))))
## [1] Precision, MI: 0.405844824037868
print(noquote(c('Recall, MI: ',Recall(cutpoints, mytest$MI, positive='1'))))
## [1] Recall, MI: 0.290683962264151
print(noquote(c('Accuracy: ',Accuracy(cutpoints, mytest$MI))))
## [1] Accuracy: 0.889754421135534
print(noquote(c('Pseudo-R2: ', 1-m6$devianc/m6$null.deviance)))
## [1] Pseudo-R2: 0.210038168165197
t1=pm1$data[,c(1,2,5,6)]
t1$Group=rep("1. Demography", nrow(t1))
t2=pm2$data[,c(1,2,5,6)]
t2$Group=rep("2. SES", nrow(t2))
t3=pm3$data[,c(1,2,5,6)]
t3$Group=rep("3. Geog.", nrow(t3))
t4=pm4$data[,c(1,2,5,6)]
t4$Group=rep("4. Behavior", nrow(t4))
t5=pm5$data[,c(1,2,5,6)]
t5$Group=rep("5. Access", nrow(t5))
t6=pm6$data[,c(1,2,5,6)]
t6$Group=rep("6. Significant Only", nrow(t6))
t7=pm7$data[,c(1,2,5,6)]
t7$Group=rep("7. All Variables", nrow(t7))
ttot=rbind(t1,t2,t3,t4,t5,t6,t7)
ttot$Group=as.factor(ttot$Group)
ggplot(data=ttot,
aes(x = term,y = estimate, ymin = .5, ymax = 2.0 ))+
geom_point(aes(col=Group))+
geom_hline(aes(fill=Group),yintercept=1, linetype=2)+
xlab('')+ ylab("Odds Ratio (95% Confidence Interval)")+
geom_errorbar(aes(ymin=conf.low,
ymax=conf.high,col=Group),width=0.5,cex=1)+
facet_grid(~Group)+
theme(plot.title=element_text(size=16,face="bold"),
axis.text.y=element_text(size=8),
axis.text.x=element_text(size=8,face="bold", angle=90),
axis.title=element_text(size=8,face="bold"),
strip.text.y = element_text(hjust=0,vjust = 1,angle=180,face="bold"))+
guides(colour=FALSE)+
coord_flip()
## Warning: geom_hline(): Ignoring `mapping` because `yintercept` was provided.
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
a1=model_parameters(m6, df_method='wald')
a1$CI_high=round(exp(a1$CI_high),3)
a1$CI_low=round(exp(a1$CI_low),3)
a1$Coefficient=round(exp(a1$Coefficient),3)
a1$p=round(a1$p,3)
a1$SE=NULL
a1$t=NULL
a1$df_error=NULL
a1$CI=NULL
myprint(a1)
## Parameter | Log-Odds | 95% CI | p
## ------------------------------------------------------
## (Intercept) | 3.00e-03 | [0.00, 0.00] | < .001
## Age_45_to_54 | 1.74 | [1.45, 2.10] | < .001
## Age_55_to_64 | 2.25 | [1.89, 2.68] | < .001
## Age_65 | 3.30 | [2.76, 3.95] | < .001
## Male | 2.24 | [2.08, 2.41] | < .001
## Previous_Marriage | 1.13 | [1.05, 1.22] | 0.001
## Never_Married | 0.79 | [0.70, 0.90] | < .001
## Veteran | 1.12 | [1.03, 1.23] | 0.008
## Income_LT25K | 1.30 | [1.15, 1.46] | < .001
## Income_LT75K | 1.20 | [1.08, 1.33] | < .001
## Income_DKR | 1.14 | [1.01, 1.27] | 0.028
## Rent_Home | 1.10 | [1.01, 1.19] | 0.037
## Retired_Unable | 1.29 | [1.19, 1.40] | < .001
## Pre_High_School | 1.19 | [1.06, 1.34] | 0.004
## High_School | 1.10 | [1.01, 1.20] | 0.030
## Post_High_School | 1.11 | [1.02, 1.21] | 0.020
## Division_D4 | 1.11 | [1.04, 1.20] | 0.004
## Poor_Health | 2.77 | [2.40, 3.18] | < .001
## Fair_Health | 2.15 | [1.93, 2.40] | < .001
## Good_Health | 1.53 | [1.40, 1.67] | < .001
## Smoker | 1.50 | [1.41, 1.61] | < .001
## Percent_Drink | 0.37 | [0.22, 0.61] | < .001
## Percent_Drink_2 | 2.19 | [1.28, 3.75] | 0.004
## High_Cholesterol | 1.65 | [1.54, 1.76] | < .001
## High_BP | 1.73 | [1.60, 1.87] | < .001
## Diabetic | 1.31 | [1.22, 1.42] | < .001
## Poor_Health_Percent | 1.12 | [1.01, 1.24] | 0.032
## Stroke | 2.92 | [2.66, 3.20] | < .001
## COPD | 1.59 | [1.47, 1.73] | < .001
## Kidney | 1.38 | [1.24, 1.53] | < .001
## Arthritis | 1.16 | [1.09, 1.25] | < .001
## No_Doctor | 0.84 | [0.74, 0.95] | 0.005
## Cost | 1.23 | [1.11, 1.36] | < .001
## No_Checkup | 0.78 | [0.69, 0.87] | < .001
Parameter | Coefficient | CI_low | CI_high | p |
---|---|---|---|---|
(Intercept) | 0.003 | 0.002 | 0.003 | 0.000 |
Age_45_to_54 | 1.744 | 1.451 | 2.095 | 0.000 |
Age_55_to_64 | 2.249 | 1.886 | 2.682 | 0.000 |
Age_65 | 3.302 | 2.760 | 3.950 | 0.000 |
Male | 2.241 | 2.083 | 2.410 | 0.000 |
Previous_Marriage | 1.134 | 1.052 | 1.223 | 0.001 |
Never_Married | 0.790 | 0.697 | 0.896 | 0.000 |
Veteran | 1.125 | 1.032 | 1.226 | 0.008 |
Income_LT25K | 1.296 | 1.149 | 1.461 | 0.000 |
Income_LT75K | 1.199 | 1.084 | 1.327 | 0.000 |
Income_DKR | 1.137 | 1.014 | 1.274 | 0.028 |
Rent_Home | 1.096 | 1.006 | 1.194 | 0.037 |
Retired_Unable | 1.288 | 1.186 | 1.398 | 0.000 |
Pre_High_School | 1.188 | 1.056 | 1.336 | 0.004 |
High_School | 1.100 | 1.009 | 1.198 | 0.030 |
Post_High_School | 1.108 | 1.016 | 1.208 | 0.020 |
Division_D4 | 1.114 | 1.036 | 1.197 | 0.004 |
Poor_Health | 2.765 | 2.405 | 3.179 | 0.000 |
Fair_Health | 2.153 | 1.934 | 2.397 | 0.000 |
Good_Health | 1.526 | 1.395 | 1.670 | 0.000 |
Smoker | 1.504 | 1.408 | 1.607 | 0.000 |
Percent_Drink | 0.368 | 0.221 | 0.613 | 0.000 |
Percent_Drink_2 | 2.188 | 1.277 | 3.746 | 0.004 |
High_Cholesterol | 1.648 | 1.539 | 1.764 | 0.000 |
High_BP | 1.730 | 1.603 | 1.867 | 0.000 |
Diabetic | 1.315 | 1.220 | 1.416 | 0.000 |
Poor_Health_Percent | 1.117 | 1.009 | 1.235 | 0.032 |
Stroke | 2.915 | 2.657 | 3.197 | 0.000 |
COPD | 1.591 | 1.466 | 1.726 | 0.000 |
Kidney | 1.377 | 1.238 | 1.532 | 0.000 |
Arthritis | 1.164 | 1.088 | 1.245 | 0.000 |
No_Doctor | 0.837 | 0.738 | 0.949 | 0.005 |
Cost | 1.225 | 1.107 | 1.356 | 0.000 |
No_Checkup | 0.777 | 0.690 | 0.874 | 0.000 |
m8=survey::svyglm(myformula6,design=svy2019,
family=quasibinomial,maxit = 500) #significant var. only
a2=model_parameters(m8, df_method='wald')
a2$CI_high=round(exp(a2$CI_high),3)
a2$CI_low=round(exp(a2$CI_low),3)
a2$Coefficient=round(exp(a2$Coefficient),3)
a2$p=round(a2$p,3)
a2$SE=NULL
a2$t=NULL
a2$df_error=NULL
a2$CI=NULL
myprint(a2)
## Parameter | Log-Odds | 95% CI | p
## ------------------------------------------------------
## (Intercept) | 3.00e-03 | [0.00, 0.00] | < .001
## Age_45_to_54 | 1.60 | [1.35, 1.89] | < .001
## Age_55_to_64 | 2.10 | [1.79, 2.47] | < .001
## Age_65 | 3.07 | [2.61, 3.61] | < .001
## Male | 2.17 | [2.03, 2.32] | < .001
## Previous_Marriage | 1.12 | [1.04, 1.20] | 0.002
## Never_Married | 0.80 | [0.71, 0.90] | < .001
## Veteran | 1.16 | [1.07, 1.25] | < .001
## Income_LT25K | 1.30 | [1.16, 1.45] | < .001
## Income_LT75K | 1.22 | [1.11, 1.33] | < .001
## Income_DKR | 1.16 | [1.04, 1.28] | 0.008
## Rent_Home | 1.08 | [1.00, 1.17] | 0.063
## Retired_Unable | 1.30 | [1.20, 1.40] | < .001
## Pre_High_School | 1.23 | [1.11, 1.37] | < .001
## High_School | 1.12 | [1.03, 1.21] | 0.006
## Post_High_School | 1.09 | [1.01, 1.18] | 0.031
## Division_D4 | 1.13 | [1.05, 1.20] | < .001
## Poor_Health | 2.88 | [2.54, 3.27] | < .001
## Fair_Health | 2.19 | [1.99, 2.41] | < .001
## Good_Health | 1.55 | [1.43, 1.68] | < .001
## Smoker | 1.48 | [1.40, 1.58] | < .001
## Percent_Drink | 0.38 | [0.24, 0.61] | < .001
## Percent_Drink_2 | 2.14 | [1.30, 3.52] | 0.003
## High_Cholesterol | 1.65 | [1.55, 1.76] | < .001
## High_BP | 1.68 | [1.57, 1.80] | < .001
## Diabetic | 1.33 | [1.24, 1.43] | < .001
## Poor_Health_Percent | 1.12 | [1.02, 1.24] | 0.017
## Stroke | 2.95 | [2.71, 3.21] | < .001
## COPD | 1.56 | [1.45, 1.68] | < .001
## Kidney | 1.36 | [1.24, 1.50] | < .001
## Arthritis | 1.17 | [1.10, 1.24] | < .001
## No_Doctor | 0.85 | [0.76, 0.96] | 0.008
## Cost | 1.23 | [1.12, 1.35] | < .001
## No_Checkup | 0.81 | [0.72, 0.90] | < .001
Parameter | Coefficient | CI_low | CI_high | p |
---|---|---|---|---|
(Intercept) | 0.003 | 0.002 | 0.003 | 0.000 |
Age_45_to_54 | 1.597 | 1.349 | 1.891 | 0.000 |
Age_55_to_64 | 2.102 | 1.791 | 2.467 | 0.000 |
Age_65 | 3.066 | 2.606 | 3.607 | 0.000 |
Male | 2.171 | 2.032 | 2.320 | 0.000 |
Previous_Marriage | 1.117 | 1.043 | 1.197 | 0.002 |
Never_Married | 0.796 | 0.707 | 0.897 | 0.000 |
Veteran | 1.159 | 1.073 | 1.253 | 0.000 |
Income_LT25K | 1.297 | 1.163 | 1.446 | 0.000 |
Income_LT75K | 1.218 | 1.112 | 1.335 | 0.000 |
Income_DKR | 1.155 | 1.038 | 1.285 | 0.008 |
Rent_Home | 1.078 | 0.996 | 1.167 | 0.063 |
Retired_Unable | 1.297 | 1.202 | 1.400 | 0.000 |
Pre_High_School | 1.232 | 1.106 | 1.372 | 0.000 |
High_School | 1.118 | 1.032 | 1.210 | 0.006 |
Post_High_School | 1.091 | 1.008 | 1.181 | 0.031 |
Division_D4 | 1.127 | 1.055 | 1.204 | 0.000 |
Poor_Health | 2.882 | 2.542 | 3.267 | 0.000 |
Fair_Health | 2.189 | 1.986 | 2.412 | 0.000 |
Good_Health | 1.548 | 1.426 | 1.681 | 0.000 |
Smoker | 1.483 | 1.396 | 1.576 | 0.000 |
Percent_Drink | 0.382 | 0.238 | 0.612 | 0.000 |
Percent_Drink_2 | 2.138 | 1.299 | 3.517 | 0.003 |
High_Cholesterol | 1.653 | 1.553 | 1.759 | 0.000 |
High_BP | 1.683 | 1.569 | 1.805 | 0.000 |
Diabetic | 1.331 | 1.244 | 1.425 | 0.000 |
Poor_Health_Percent | 1.124 | 1.021 | 1.237 | 0.017 |
Stroke | 2.950 | 2.712 | 3.209 | 0.000 |
COPD | 1.558 | 1.446 | 1.677 | 0.000 |
Kidney | 1.362 | 1.235 | 1.502 | 0.000 |
Arthritis | 1.168 | 1.098 | 1.242 | 0.000 |
No_Doctor | 0.853 | 0.759 | 0.959 | 0.008 |
Cost | 1.234 | 1.125 | 1.353 | 0.000 |
No_Checkup | 0.807 | 0.724 | 0.900 | 0.000 |
pm8=plot_model(m6, main="1. Training Set, Significant Variables")
pm9=plot_model(m8, main="2. Full Set, Significant Variables")
t8=pm8$data[,c(1,2,5,6)]
t8$Group=rep("1. Training Set, Significant Variables", nrow(t8))
t9=pm9$data[,c(1,2,5,6)]
t9$Group=rep("2. Full Set, Significant Variables", nrow(t9))
ttot2=rbind(t8,t9)
ttot2$Group=as.factor(ttot2$Group)
ggplot(data=ttot2,
aes(x = term,y = estimate, ymin = .5, ymax = 2.0 ))+
geom_point(aes(col=Group))+
geom_hline(aes(fill=Group),yintercept =1, linetype=2)+
xlab('')+ ylab("Odds Ratio (95% Confidence Interval)")+
geom_errorbar(aes(ymin=conf.low,
ymax=conf.high,col=Group),width=0.5,cex=1)+
facet_grid(~Group)+
theme(plot.title=element_text(size=16,face="bold"),
axis.text.y=element_text(size=10),
axis.text.x=element_text(face="bold"),
axis.title=element_text(size=12,face="bold"),
strip.text.y = element_text(hjust=0,vjust = 1,angle=180,face="bold"))+
guides(colour=FALSE)+
coord_flip()
## Warning: geom_hline(): Ignoring `mapping` because `yintercept` was provided.
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
a1$Interval=noquote(paste0(format(a1$Coefficient,nsmall=3),' (',format(a1$CI_low,nsmall=3),',', format(a1$CI_high,nsmall=3), ')'))
a2$Interval=noquote(paste0(format(a2$Coefficient,nsmall=3),' (',format(a2$CI_low,nsmall=3),',', format(a2$CI_high,nsmall=3), ')'))
newdata=cbind(a1$Parameter,a1$Interval,a2$Interval)
colnames(newdata)=c("Variable","Training Data Odds Ratio (95% CI)", "Full Data Odds Ratio (95% CI)")
myprint(newdata)
## Variable Training Data Odds Ratio (95% CI)
## [1,] "(Intercept)" "0.003 (0.002,0.003)"
## [2,] "Age_45_to_54" "1.744 (1.451,2.095)"
## [3,] "Age_55_to_64" "2.249 (1.886,2.682)"
## [4,] "Age_65" "3.302 (2.760,3.950)"
## [5,] "Male" "2.241 (2.083,2.410)"
## [6,] "Previous_Marriage" "1.134 (1.052,1.223)"
## [7,] "Never_Married" "0.790 (0.697,0.896)"
## [8,] "Veteran" "1.125 (1.032,1.226)"
## [9,] "Income_LT25K" "1.296 (1.149,1.461)"
## [10,] "Income_LT75K" "1.199 (1.084,1.327)"
## [11,] "Income_DKR" "1.137 (1.014,1.274)"
## [12,] "Rent_Home" "1.096 (1.006,1.194)"
## [13,] "Retired_Unable" "1.288 (1.186,1.398)"
## [14,] "Pre_High_School" "1.188 (1.056,1.336)"
## [15,] "High_School" "1.100 (1.009,1.198)"
## [16,] "Post_High_School" "1.108 (1.016,1.208)"
## [17,] "Division_D4" "1.114 (1.036,1.197)"
## [18,] "Poor_Health" "2.765 (2.405,3.179)"
## [19,] "Fair_Health" "2.153 (1.934,2.397)"
## [20,] "Good_Health" "1.526 (1.395,1.670)"
## [21,] "Smoker" "1.504 (1.408,1.607)"
## [22,] "Percent_Drink" "0.368 (0.221,0.613)"
## [23,] "Percent_Drink_2" "2.188 (1.277,3.746)"
## [24,] "High_Cholesterol" "1.648 (1.539,1.764)"
## [25,] "High_BP" "1.730 (1.603,1.867)"
## [26,] "Diabetic" "1.315 (1.220,1.416)"
## [27,] "Poor_Health_Percent" "1.117 (1.009,1.235)"
## [28,] "Stroke" "2.915 (2.657,3.197)"
## [29,] "COPD" "1.591 (1.466,1.726)"
## [30,] "Kidney" "1.377 (1.238,1.532)"
## [31,] "Arthritis" "1.164 (1.088,1.245)"
## [32,] "No_Doctor" "0.837 (0.738,0.949)"
## [33,] "Cost" "1.225 (1.107,1.356)"
## [34,] "No_Checkup" "0.777 (0.690,0.874)"
## Full Data Odds Ratio (95% CI)
## [1,] "0.003 (0.002,0.003)"
## [2,] "1.597 (1.349,1.891)"
## [3,] "2.102 (1.791,2.467)"
## [4,] "3.066 (2.606,3.607)"
## [5,] "2.171 (2.032,2.320)"
## [6,] "1.117 (1.043,1.197)"
## [7,] "0.796 (0.707,0.897)"
## [8,] "1.159 (1.073,1.253)"
## [9,] "1.297 (1.163,1.446)"
## [10,] "1.218 (1.112,1.335)"
## [11,] "1.155 (1.038,1.285)"
## [12,] "1.078 (0.996,1.167)"
## [13,] "1.297 (1.202,1.400)"
## [14,] "1.232 (1.106,1.372)"
## [15,] "1.118 (1.032,1.210)"
## [16,] "1.091 (1.008,1.181)"
## [17,] "1.127 (1.055,1.204)"
## [18,] "2.882 (2.542,3.267)"
## [19,] "2.189 (1.986,2.412)"
## [20,] "1.548 (1.426,1.681)"
## [21,] "1.483 (1.396,1.576)"
## [22,] "0.382 (0.238,0.612)"
## [23,] "2.138 (1.299,3.517)"
## [24,] "1.653 (1.553,1.759)"
## [25,] "1.683 (1.569,1.805)"
## [26,] "1.331 (1.244,1.425)"
## [27,] "1.124 (1.021,1.237)"
## [28,] "2.950 (2.712,3.209)"
## [29,] "1.558 (1.446,1.677)"
## [30,] "1.362 (1.235,1.502)"
## [31,] "1.168 (1.098,1.242)"
## [32,] "0.853 (0.759,0.959)"
## [33,] "1.234 (1.125,1.353)"
## [34,] "0.807 (0.724,0.900)"
Variable | Training Data Odds Ratio (95% CI) | Full Data Odds Ratio (95% CI) |
---|---|---|
(Intercept) | 0.003 (0.002,0.003) | 0.003 (0.002,0.003) |
Age_45_to_54 | 1.744 (1.451,2.095) | 1.597 (1.349,1.891) |
Age_55_to_64 | 2.249 (1.886,2.682) | 2.102 (1.791,2.467) |
Age_65 | 3.302 (2.760,3.950) | 3.066 (2.606,3.607) |
Male | 2.241 (2.083,2.410) | 2.171 (2.032,2.320) |
Previous_Marriage | 1.134 (1.052,1.223) | 1.117 (1.043,1.197) |
Never_Married | 0.790 (0.697,0.896) | 0.796 (0.707,0.897) |
Veteran | 1.125 (1.032,1.226) | 1.159 (1.073,1.253) |
Income_LT25K | 1.296 (1.149,1.461) | 1.297 (1.163,1.446) |
Income_LT75K | 1.199 (1.084,1.327) | 1.218 (1.112,1.335) |
Income_DKR | 1.137 (1.014,1.274) | 1.155 (1.038,1.285) |
Rent_Home | 1.096 (1.006,1.194) | 1.078 (0.996,1.167) |
Retired_Unable | 1.288 (1.186,1.398) | 1.297 (1.202,1.400) |
Pre_High_School | 1.188 (1.056,1.336) | 1.232 (1.106,1.372) |
High_School | 1.100 (1.009,1.198) | 1.118 (1.032,1.210) |
Post_High_School | 1.108 (1.016,1.208) | 1.091 (1.008,1.181) |
Division_D4 | 1.114 (1.036,1.197) | 1.127 (1.055,1.204) |
Poor_Health | 2.765 (2.405,3.179) | 2.882 (2.542,3.267) |
Fair_Health | 2.153 (1.934,2.397) | 2.189 (1.986,2.412) |
Good_Health | 1.526 (1.395,1.670) | 1.548 (1.426,1.681) |
Smoker | 1.504 (1.408,1.607) | 1.483 (1.396,1.576) |
Percent_Drink | 0.368 (0.221,0.613) | 0.382 (0.238,0.612) |
Percent_Drink_2 | 2.188 (1.277,3.746) | 2.138 (1.299,3.517) |
High_Cholesterol | 1.648 (1.539,1.764) | 1.653 (1.553,1.759) |
High_BP | 1.730 (1.603,1.867) | 1.683 (1.569,1.805) |
Diabetic | 1.315 (1.220,1.416) | 1.331 (1.244,1.425) |
Poor_Health_Percent | 1.117 (1.009,1.235) | 1.124 (1.021,1.237) |
Stroke | 2.915 (2.657,3.197) | 2.950 (2.712,3.209) |
COPD | 1.591 (1.466,1.726) | 1.558 (1.446,1.677) |
Kidney | 1.377 (1.238,1.532) | 1.362 (1.235,1.502) |
Arthritis | 1.164 (1.088,1.245) | 1.168 (1.098,1.242) |
No_Doctor | 0.837 (0.738,0.949) | 0.853 (0.759,0.959) |
Cost | 1.225 (1.107,1.356) | 1.234 (1.125,1.353) |
No_Checkup | 0.777 (0.690,0.874) | 0.807 (0.724,0.900) |