Made with R Studio version 2024.12.1
Load the data set from 2023 National Survey on Drug Use and Health (NSDUH) into R Studio.
Data Table (Quantitative):
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(janitor)
## Warning: package 'janitor' was built under R version 4.4.3
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(broom)
## Warning: package 'broom' was built under R version 4.4.3
load('NSDUH_2023.Rdata')
This dataset is too large. Let’s create smaller dataset.
Select just the 11 variables: SUTNEEDPY, SUTRTPY, CAIDCHIP, IRINSUR4, COUTYP4, IRSEX, AGE3, NEWRACE2, EDUHIGHCAT, INCOME, IRWRKSTAT18
select <- dplyr::select
Treatment<-select(puf2023_102124, SUTNEEDPY, SUTRTPY, CAIDCHIP, IRINSUR4, COUTYP4, IRSEX, AGE3, NEWRACE2, EDUHIGHCAT, INCOME, IRWRKSTAT18)
Respondents who are aged 12-17 (AGE3=1, 2, 3) as well as only respondents who do not need SUD treatment (SUTNEEDPY = 0) will be removed from the data set, as well as responses to Medicaid coverage (CAIDCHIP= 85, 94, 97, 98)
TreatmentClean <- Treatment %>%
filter(AGE3 >=4 & SUTNEEDPY >=1 & CAIDCHIP <=3)
TreatmentClean <- TreatmentClean %>%
mutate(across(everything(), as.factor))
summary(TreatmentClean)
## SUTNEEDPY SUTRTPY CAIDCHIP IRINSUR4 COUTYP4 IRSEX AGE3 NEWRACE2
## 1:10156 0:7991 1:3189 1:8913 1:4537 1:5097 9 :2765 1:5942
## 1:2165 2:6967 2:1243 2:3955 2:5059 5 :1600 2:1227
## 3:1664 8 :1341 3: 243
## 4 :1123 4: 54
## 7 :1060 5: 255
## 6 :1022 6: 564
## (Other):1245 7:1871
## EDUHIGHCAT INCOME IRWRKSTAT18
## 1:1265 1:2172 1:5163
## 2:2984 2:2910 2:1621
## 3:3152 3:1497 3: 874
## 4:2755 4:3577 4:2498
##
##
##
str(TreatmentClean)
## tibble [10,156 × 11] (S3: tbl_df/tbl/data.frame)
## $ SUTNEEDPY : Factor w/ 1 level "1": 1 1 1 1 1 1 1 1 1 1 ...
## $ SUTRTPY : Factor w/ 2 levels "0","1": 1 2 2 1 2 1 1 1 2 1 ...
## $ CAIDCHIP : Factor w/ 2 levels "1","2": 2 1 2 1 2 2 2 1 2 1 ...
## $ IRINSUR4 : Factor w/ 2 levels "1","2": 2 1 2 1 1 1 2 1 1 1 ...
## $ COUTYP4 : Factor w/ 3 levels "1","2","3": 3 2 1 3 1 2 2 3 2 1 ...
## $ IRSEX : Factor w/ 2 levels "1","2": 1 2 1 1 1 1 1 1 1 1 ...
## $ AGE3 : Factor w/ 8 levels "4","5","6","7",..: 6 6 5 7 7 4 6 5 1 2 ...
## $ NEWRACE2 : Factor w/ 7 levels "1","2","3","4",..: 1 4 1 3 1 7 1 1 1 3 ...
## $ EDUHIGHCAT : Factor w/ 4 levels "1","2","3","4": 2 2 4 2 4 4 3 1 2 3 ...
## $ INCOME : Factor w/ 4 levels "1","2","3","4": 4 1 2 2 4 4 2 2 4 4 ...
## $ IRWRKSTAT18: Factor w/ 4 levels "1","2","3","4": 1 4 1 4 1 1 3 1 1 3 ...
There are no N/As to remove from this data set. All 11 variables have values for all the remaining observations. Let’s recode them to give them meaningful labels.
TreatmentLabeled <- TreatmentClean %>%
mutate(
ReceivedTreatment = factor(SUTRTPY, levels = c(0, 1), labels = c("No", "Yes")),
MedicaidCoverage = factor(CAIDCHIP,
levels = c(1, 2),
labels = c("Medicaid", "No Medicaid")),
Insurance = factor(IRINSUR4,
levels = c(1, 2),
labels = c("Insurance", "No Insurance")),
Geography = factor(COUTYP4,
levels = c(1, 2, 3),
labels = c("Large Metro", "Small Metro", "Nonmetro")),
Sex = factor(IRSEX,
levels = c(1, 2),
labels = c("Male", "Female")),
Age = factor(AGE3,
levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
labels = c("12-13 years", "14-15 years", "16-17 years", "18-20 years", "21-23 years", "24-25 years", "26-29 years", "30-34 years", "35-49 years", "50-64 years", "65 or older")),
Race = factor(NEWRACE2,
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("White", "Black", "Native American", "Native Hawaiian/Pacific Islander", "Asian", "More than one", "Hispanic")),
EducationLevel = factor(EDUHIGHCAT,
levels = c(1, 2, 3, 4, 5),
labels = c("less than H.S", "H.S Grad", "Some College", "College Grad", "12-17 years old")),
Income = factor(INCOME,
levels = c(1, 2, 3, 4),
labels = c("<$20,000", "$20,000-$49,000", "$50,000-$74,000", "$75,000 or more")),
Employment = factor(IRWRKSTAT18,
levels = c(1, 2, 3, 4),
labels = c("Full-time", "Part-time", "Unemployed", "Other")),
) %>%
select(-SUTNEEDPY,-SUTRTPY,-CAIDCHIP,-IRINSUR4,-COUTYP4, -IRSEX, -AGE3, -NEWRACE2, -EDUHIGHCAT, -INCOME, -IRWRKSTAT18)
Let’s explore this data now…
table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Insurance, TreatmentLabeled$MedicaidCoverage)
## , , = Medicaid
##
##
## Insurance No Insurance
## No 2155 0
## Yes 1034 0
##
## , , = No Medicaid
##
##
## Insurance No Insurance
## No 4806 1030
## Yes 918 213
tabyl(TreatmentLabeled$ReceivedTreatment)
## TreatmentLabeled$ReceivedTreatment n percent
## No 7991 0.7868255
## Yes 2165 0.2131745
tabyl(TreatmentLabeled$MedicaidCoverage)
## TreatmentLabeled$MedicaidCoverage n percent
## Medicaid 3189 0.3140016
## No Medicaid 6967 0.6859984
tabyl(TreatmentLabeled$Insurance)
## TreatmentLabeled$Insurance n percent
## Insurance 8913 0.8776093
## No Insurance 1243 0.1223907
tabyl(TreatmentLabeled$Geography)
## TreatmentLabeled$Geography n percent
## Large Metro 4537 0.446731
## Small Metro 3955 0.389425
## Nonmetro 1664 0.163844
tabyl(TreatmentLabeled$Sex)
## TreatmentLabeled$Sex n percent
## Male 5097 0.5018708
## Female 5059 0.4981292
tabyl(TreatmentLabeled$Age)
## TreatmentLabeled$Age n percent
## 12-13 years 0 0.00000000
## 14-15 years 0 0.00000000
## 16-17 years 0 0.00000000
## 18-20 years 1123 0.11057503
## 21-23 years 1600 0.15754234
## 24-25 years 1022 0.10063017
## 26-29 years 1060 0.10437180
## 30-34 years 1341 0.13204017
## 35-49 years 2765 0.27225286
## 50-64 years 791 0.07788499
## 65 or older 454 0.04470264
tabyl(TreatmentLabeled$Race)
## TreatmentLabeled$Race n percent
## White 5942 0.585072863
## Black 1227 0.120815282
## Native American 243 0.023926743
## Native Hawaiian/Pacific Islander 54 0.005317054
## Asian 255 0.025108310
## More than one 564 0.055533675
## Hispanic 1871 0.184226073
tabyl(TreatmentLabeled$EducationLevel)
## TreatmentLabeled$EducationLevel n percent
## less than H.S 1265 0.1245569
## H.S Grad 2984 0.2938165
## Some College 3152 0.3103584
## College Grad 2755 0.2712682
## 12-17 years old 0 0.0000000
tabyl(TreatmentLabeled$Income)
## TreatmentLabeled$Income n percent
## <$20,000 2172 0.2138637
## $20,000-$49,000 2910 0.2865301
## $50,000-$74,000 1497 0.1474006
## $75,000 or more 3577 0.3522056
tabyl(TreatmentLabeled$Employment)
## TreatmentLabeled$Employment n percent
## Full-time 5163 0.5083694
## Part-time 1621 0.1596101
## Unemployed 874 0.0860575
## Other 2498 0.2459630
freq_tables <- table(TreatmentLabeled)
variable_order <- c("ReceivedTreatment", "MedicaidCoverage", "Insurance", "Geography", "Sex", "Age", "Race", "EducationLevel", "Income", "Employment")
pivot_detailed <- TreatmentLabeled %>%
pivot_longer(everything(), names_to = "Variable", values_to = "Value") %>%
group_by(Variable, Value) %>%
summarise(Count = n(), .groups = "drop") %>%
group_by(Variable) %>%
mutate(
Percentage = round(Count/sum(Count)*100, 1),
Total = sum(Count)
) %>%
ungroup() %>%
mutate(Variable = factor(Variable, levels = variable_order)) %>% # Set custom order
arrange(Variable, Value)
# View
print(pivot_detailed, n = 200)
## # A tibble: 38 × 5
## Variable Value Count Percentage Total
## <fct> <fct> <int> <dbl> <int>
## 1 ReceivedTreatment No 7991 78.7 10156
## 2 ReceivedTreatment Yes 2165 21.3 10156
## 3 MedicaidCoverage Medicaid 3189 31.4 10156
## 4 MedicaidCoverage No Medicaid 6967 68.6 10156
## 5 Insurance Insurance 8913 87.8 10156
## 6 Insurance No Insurance 1243 12.2 10156
## 7 Geography Large Metro 4537 44.7 10156
## 8 Geography Small Metro 3955 38.9 10156
## 9 Geography Nonmetro 1664 16.4 10156
## 10 Sex Male 5097 50.2 10156
## 11 Sex Female 5059 49.8 10156
## 12 Age 18-20 years 1123 11.1 10156
## 13 Age 21-23 years 1600 15.8 10156
## 14 Age 24-25 years 1022 10.1 10156
## 15 Age 26-29 years 1060 10.4 10156
## 16 Age 30-34 years 1341 13.2 10156
## 17 Age 35-49 years 2765 27.2 10156
## 18 Age 50-64 years 791 7.8 10156
## 19 Age 65 or older 454 4.5 10156
## 20 Race White 5942 58.5 10156
## 21 Race Black 1227 12.1 10156
## 22 Race Native American 243 2.4 10156
## 23 Race Native Hawaiian/Pacific Islander 54 0.5 10156
## 24 Race Asian 255 2.5 10156
## 25 Race More than one 564 5.6 10156
## 26 Race Hispanic 1871 18.4 10156
## 27 EducationLevel less than H.S 1265 12.5 10156
## 28 EducationLevel H.S Grad 2984 29.4 10156
## 29 EducationLevel Some College 3152 31 10156
## 30 EducationLevel College Grad 2755 27.1 10156
## 31 Income <$20,000 2172 21.4 10156
## 32 Income $20,000-$49,000 2910 28.7 10156
## 33 Income $50,000-$74,000 1497 14.7 10156
## 34 Income $75,000 or more 3577 35.2 10156
## 35 Employment Full-time 5163 50.8 10156
## 36 Employment Part-time 1621 16 10156
## 37 Employment Unemployed 874 8.6 10156
## 38 Employment Other 2498 24.6 10156
class(pivot_detailed$Value)
## [1] "factor"
pivot_detailed %>% filter(Variable == "Income") %>% select(Value)
## # A tibble: 4 × 1
## Value
## <fct>
## 1 <$20,000
## 2 $20,000-$49,000
## 3 $50,000-$74,000
## 4 $75,000 or more
pivot_detailed %>%
filter(Variable == "Income") %>%
print(n = Inf)
## # A tibble: 4 × 5
## Variable Value Count Percentage Total
## <fct> <fct> <int> <dbl> <int>
## 1 Income <$20,000 2172 21.4 10156
## 2 Income $20,000-$49,000 2910 28.7 10156
## 3 Income $50,000-$74,000 1497 14.7 10156
## 4 Income $75,000 or more 3577 35.2 10156
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.4.3
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
apa_frequency_table <- pivot_detailed %>%
mutate(
Variable_Value = paste(as.character(Variable), as.character(Value), sep = ": "),
Percent = paste0(Percentage, "%")
) %>%
select(Variable_Value, Count, Percent) %>%
as.data.frame()
colnames(apa_frequency_table) <- c("Variable", "n", "%")
apa_frequency_table %>%
kbl(format = "html", caption = "<b>Table 1</b><br><i>Frequency Distribution of Sample Characteristics</i>",
align = c("l", "r", "r")) %>%
kable_classic(full_width = FALSE, html_font = "Arial") %>%
column_spec(1, width = "17em") %>%
column_spec(2, width = "5em") %>%
column_spec(3, width = "5em") %>%
row_spec(0, bold = TRUE) %>%
save_kable("Frequency_APA2.html")
Let’s plot these variables on bar chart
ggplot(TreatmentLabeled, aes(x =factor(MedicaidCoverage), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Medicaid Status", x="Medicaid Status", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment"))
ggplot(TreatmentLabeled, aes(x =factor(Insurance), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Insurance Status", x="Insurance Status", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment"))
ggplot(TreatmentLabeled, aes(x =factor(Geography), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Geography", x="Metro Status", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment"))
ggplot(TreatmentLabeled, aes(x =factor(Sex), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Sex", x="Sex assigned at birth", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment"))
ggplot(TreatmentLabeled, aes(x =factor(Age), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Age", x="Age Range", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment")) +
theme(axis.text.x = element_text(size = 6))
ggplot(TreatmentLabeled, aes(x =factor(Race), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Race", x="Race", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment")) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
theme(axis.text.x = element_text(size = 7))
ggplot(TreatmentLabeled, aes(x =factor(EducationLevel), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Highest Level of Education", x="Education", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment"))
ggplot(TreatmentLabeled, aes(x =factor(Income), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Income", x="Income", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment"))
ggplot(TreatmentLabeled, aes(x =factor(Employment), fill = (ReceivedTreatment))) +
geom_bar() +
labs(title = "Employement Status", x="Employment Status", y="Respondents", fill = "Received Treatment") +
scale_fill_manual(values = c("No" = "coral", "Yes" = "steelblue"),
labels = c("No" = "No Treatment", "Yes" = "Received Treatment"))
RELATIONSHIPS BETWEEN VARIABLES
CAN’T RUN BECAUSE ALL VARIABLES ARE FACTORS: >hist(TreatmentClean\(INCOME) >cor(TreatmentClean\)SUTRTPY, TreatmentClean\(CAIDCHIP) >Treatmentmodel1 <- lm(formula = SUTRTPY~CAIDCHIP+IRINSUR4,data=TreatmentClean) >TreatmentModel_2<-lm(SUTRTPY~CAIDCHIP+COUTYP4+AGE3,data=TreatmentClean) >library(lsr) for cramersV(table(TreatmentClean\)SUTRTPY, TreatmentClean$CAIDCHIP))
CAN RUN THIS
result <- chisq.test(table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Insurance))
print(result)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Insurance)
## X-squared = 14.482, df = 1, p-value = 0.0001415
p-value is less than 0.05, demonstrating that receiving treatment is significantly associated with Insurance
result <- chisq.test(table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$MedicaidCoverage))
print(result)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$MedicaidCoverage)
## X-squared = 340.91, df = 1, p-value < 2.2e-16
p-value is MUCH MUCH MUCH less than 0.05, demonstrating that receiving treatment is significantly associated with Medicaid Coverage
results_tidy <- TreatmentLabeled %>%
pivot_longer(cols = -ReceivedTreatment, names_to = "variable", values_to = "value") %>%
group_by(variable) %>%
nest() %>%
mutate(stats = map(data, ~broom::tidy(chisq.test(.$ReceivedTreatment, .$value)))) %>%
select(-data) %>%
unnest(cols = stats)
print(results_tidy)
## # A tibble: 9 × 5
## # Groups: variable [9]
## variable statistic p.value parameter method
## <chr> <dbl> <dbl> <int> <chr>
## 1 MedicaidCoverage 341. 4.03e-76 1 Pearson's Chi-squared test with…
## 2 Insurance 14.5 1.42e- 4 1 Pearson's Chi-squared test with…
## 3 Geography 30.7 2.18e- 7 2 Pearson's Chi-squared test
## 4 Sex 24.0 9.75e- 7 1 Pearson's Chi-squared test with…
## 5 Age 133. 1.71e-25 7 Pearson's Chi-squared test
## 6 Race 20.5 2.23e- 3 6 Pearson's Chi-squared test
## 7 EducationLevel 161. 1.22e-34 3 Pearson's Chi-squared test
## 8 Income 180. 8.71e-39 3 Pearson's Chi-squared test
## 9 Employment 205. 3.23e-44 3 Pearson's Chi-squared test
results_df <- as.data.frame(results_tidy)
stargazer(results_df,
type = "html",
out = "ChiSquare.htm",
summary = FALSE,
title = "Chi-Square Test Results by Variable",
digits = 2,
rownames = FALSE)
##
## <table style="text-align:center"><caption><strong>Chi-Square Test Results by Variable</strong></caption>
## <tr><td colspan="5" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">variable</td><td>statistic</td><td>p.value</td><td>parameter</td><td>method</td></tr>
## <tr><td colspan="5" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">MedicaidCoverage</td><td>340.91</td><td>0</td><td>1</td><td>Pearson's Chi-squared test with Yates' continuity correction</td></tr>
## <tr><td style="text-align:left">Insurance</td><td>14.48</td><td>0.0001</td><td>1</td><td>Pearson's Chi-squared test with Yates' continuity correction</td></tr>
## <tr><td style="text-align:left">Geography</td><td>30.67</td><td>0.0000</td><td>2</td><td>Pearson's Chi-squared test</td></tr>
## <tr><td style="text-align:left">Sex</td><td>23.98</td><td>0.0000</td><td>1</td><td>Pearson's Chi-squared test with Yates' continuity correction</td></tr>
## <tr><td style="text-align:left">Age</td><td>132.71</td><td>0</td><td>7</td><td>Pearson's Chi-squared test</td></tr>
## <tr><td style="text-align:left">Race</td><td>20.53</td><td>0.002</td><td>6</td><td>Pearson's Chi-squared test</td></tr>
## <tr><td style="text-align:left">EducationLevel</td><td>160.82</td><td>0</td><td>3</td><td>Pearson's Chi-squared test</td></tr>
## <tr><td style="text-align:left">Income</td><td>180.03</td><td>0</td><td>3</td><td>Pearson's Chi-squared test</td></tr>
## <tr><td style="text-align:left">Employment</td><td>205.17</td><td>0</td><td>3</td><td>Pearson's Chi-squared test</td></tr>
## <tr><td colspan="5" style="border-bottom: 1px solid black"></td></tr></table>
Let’s try another one. This one is for Cramer’s V Cramer’s V – an effect size (ES) metric that quantifies the overall associations within a contingency table’s rows and columns.
cramer_v <- function(x, y) {
cm <- table(x, y)
n <- sum(cm)
r <- nrow(cm)
k <- ncol(cm)
chi2 <- chisq.test(cm)$statistic
chi2corr <- max(0, chi2 - (k - 1) * (r - 1) / (n - 1))
kcorr <- k - (k - 1)^2 / (n - 1)
rcorr <- r - (r - 1)^2 / (n - 1)
return(sqrt(chi2corr / n) / min(kcorr - 1, rcorr - 1))
}
a1 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$ReceivedTreatment)
a2 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$MedicaidCoverage)
a3 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$Insurance)
a4 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$Geography)
a5 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$Sex)
a6 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a7 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$Race)
a8 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a9 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$Income)
a10 <- cramer_v(TreatmentLabeled$ReceivedTreatment,TreatmentLabeled$Employment)
a11 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$ReceivedTreatment)
a12 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$MedicaidCoverage)
a13 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$Insurance)
a14 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$Geography)
a15 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$Sex)
a16 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a17 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$Race)
a18 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a19 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$Income)
a20 <- cramer_v(TreatmentLabeled$MedicaidCoverage,TreatmentLabeled$Employment)
a21 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$ReceivedTreatment)
a22 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$MedicaidCoverage)
a23 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$Insurance)
a24 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$Geography)
a25 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$Sex)
a26 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a27 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$Race)
a28 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a29 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$Income)
a30 <- cramer_v(TreatmentLabeled$Insurance,TreatmentLabeled$Employment)
a31 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$ReceivedTreatment)
a32 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$MedicaidCoverage)
a33 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$Insurance)
a34 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$Geography)
a35 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$Sex)
a36 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a37 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$Race)
a38 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a39 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$Income)
a40 <- cramer_v(TreatmentLabeled$Geography,TreatmentLabeled$Employment)
a41 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$ReceivedTreatment)
a42 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$MedicaidCoverage)
a43 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$Insurance)
a44 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$Geography)
a45 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$Sex)
a46 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a47 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$Race)
a48 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a49 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$Income)
a50 <- cramer_v(TreatmentLabeled$Sex,TreatmentLabeled$Employment)
a51 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$ReceivedTreatment)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a52 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$MedicaidCoverage)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a53 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$Insurance)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a54 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$Geography)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a55 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$Sex)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a56 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a57 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$Race)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a58 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a59 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$Income)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a60 <- cramer_v(TreatmentLabeled$Age,TreatmentLabeled$Employment)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a61 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$ReceivedTreatment)
a62 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$MedicaidCoverage)
a63 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$Insurance)
a64 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$Geography)
a65 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$Sex)
a66 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a67 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$Race)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a68 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a69 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$Income)
a70 <- cramer_v(TreatmentLabeled$Race,TreatmentLabeled$Employment)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a71 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$ReceivedTreatment)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a72 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$MedicaidCoverage)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a73 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$Insurance)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a74 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$Geography)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a75 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$Sex)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a76 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a77 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$Race)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a78 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a79 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$Income)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a80 <- cramer_v(TreatmentLabeled$EducationLevel,TreatmentLabeled$Employment)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a81 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$ReceivedTreatment)
a82 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$MedicaidCoverage)
a83 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$Insurance)
a84 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$Geography)
a85 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$Sex)
a86 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a87 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$Race)
a88 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a89 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$Income)
a90 <- cramer_v(TreatmentLabeled$Income,TreatmentLabeled$Employment)
a91 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$ReceivedTreatment)
a92 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$MedicaidCoverage)
a93 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$Insurance)
a94 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$Geography)
a95 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$Sex)
a96 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$Age)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a97 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$Race)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a98 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$EducationLevel)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
a99 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$Income)
a100 <- cramer_v(TreatmentLabeled$Employment,TreatmentLabeled$Employment)
d <- data.frame(
ReceivedTreatment=c(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10),
Medicaid=c(a11,a12,a13,a14,a15,a16,a17,a18,a19,a20),
Insurance=c(a21,a22,a23,a24,a25,a26,a27,a28,a29,a30),
Geography=c(a31,a32,a33,a34,a35,a36,a37,a38,a39,a40),
Sex=c(a41,a42,a43,a44,a45,a46,a47,a48,a49,a50),
Age=c(a51,a52,a53,a54,a55,a56,a57,a58,a59,a60),
Race=c(a61,a62,a63,a64,a65,a66,a67,a68,a69,a70),
Education=c(a71,a72,a73,a74,a75,a76,a77,a78,a79,a80),
Income=c(a81,a82,a83,a84,a85,a86,a87,a88,a89,a90),
Employment=c(a91,a92,a93,a94,a95,a96,a97,a98,a99,a100))
print(d)
## ReceivedTreatment Medicaid Insurance Geography Sex Age
## 1 0.99980493 0.18323326 0.037765105 0.054963045 0.04859405 NaN
## 2 0.18323326 0.99986990 0.252356206 0.073318435 0.12092569 NaN
## 3 0.03776511 0.25235621 0.999640083 0.009212179 0.08093075 NaN
## 4 0.05496304 0.07331843 0.009212179 0.707246065 0.03383537 NaN
## 5 0.04859405 0.12092569 0.080930754 0.033835372 0.99990153 NaN
## 6 NaN NaN NaN NaN NaN NaN
## 7 0.04495983 0.20405379 0.128678007 0.112291302 0.03837589 NaN
## 8 NaN NaN NaN NaN NaN NaN
## 9 0.13315200 0.39782468 0.147929159 0.075893564 0.10895653 NaN
## 10 0.14214606 0.29537716 0.078979593 0.032510736 0.12217652 NaN
## Race Education Income Employment
## 1 0.04495983 NaN 0.13315200 0.14214606
## 2 0.20405379 NaN 0.39782468 0.29537716
## 3 0.12867801 NaN 0.14792916 0.07897959
## 4 0.11229130 NaN 0.07589356 0.03251074
## 5 0.03837589 NaN 0.10895653 0.12217652
## 6 NaN NaN NaN NaN
## 7 0.40848963 NaN 0.07014788 0.04736897
## 8 NaN NaN NaN NaN
## 9 0.07014788 NaN 0.57752087 0.11464757
## 10 0.04736897 NaN 0.11464757 0.57752087
rownames(d) <- colnames(d)
variables <- colnames(d)
cor_TreatmentReceived <- data.frame()
for (i in 1:length(variables)) {
for (j in 1:length(variables)) {
row <- data.frame(
Variable1 = variables[i],
Variable2 = variables[j],
Cramers_V = round(d[i, j],2)
)
cor_TreatmentReceived <- rbind(cor_TreatmentReceived, row)
}
}
cor_TreatmentReceived <- cor_TreatmentReceived[cor_TreatmentReceived$Variable1 != cor_TreatmentReceived$Variable2, ]
rownames(cor_TreatmentReceived) <- NULL
colnames(cor_TreatmentReceived) <- c("Variable 1", "Variable 2", "Cramér's V")
cor_TreatmentReceived %>%
kbl(format = "html", caption = "<b>Table 2</b><br><i>Cramér's V Correlations Between Variables</i>",
align = c("l", "l", "r")) %>%
kable_classic(full_width = FALSE, html_font = "Arial") %>%
column_spec(1, width = "15em") %>%
column_spec(2, width = "15em") %>%
column_spec(3, width = "8em") %>%
row_spec(0, bold = TRUE) %>%
save_kable("Correlation_APA.html")
cramers_matrix <- d
cramers_matrix[upper.tri(cramers_matrix)] <- NA #
diag(cramers_matrix) <- NA
cramers_matrix <- round(cramers_matrix, 2)
var_names <- rownames(cramers_matrix)
cramers_matrix <- as.data.frame(cramers_matrix)
cramers_matrix <- cbind(Variable = var_names, cramers_matrix)
cramers_matrix %>%
kbl(format = "html",
caption = "<b>Table 2</b><br><i>Cramér's V Associations Between Categorical Variables</i>",
align = c("l", rep("r", ncol(cramers_matrix)-1))) %>%
kable_classic(full_width = FALSE, html_font = "Arial") %>%
row_spec(0, bold = TRUE) %>%
footnote(general = "Note. Values represent Cramér's V coefficients. Interpretation: < .20 = weak association, .20-.40 = moderate association, > .40 = strong association.",
general_title = "",
footnote_as_chunk = TRUE) %>%
save_kable("Cramers_V_APA.html")
library(ggplot2)
ggplot(data = cor_TreatmentReceived, aes(x = `Variable 1`, y = `Variable 2`, fill = `Cramér's V`)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red") +
labs(title = "Cramer's V Heatmap",
x = "Variable 1",
y = "Variable 2",
fill = "Cramer's V") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(vcd)
## Warning: package 'vcd' was built under R version 4.4.3
## Loading required package: grid
tbl <- table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$MedicaidCoverage)
chisq.test(tbl)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl
## X-squared = 340.91, df = 1, p-value < 2.2e-16
assocstats(tbl)
## X^2 df P(> X^2)
## Likelihood Ratio 325.94 1 0
## Pearson 341.88 1 0
##
## Phi-Coefficient : 0.183
## Contingency Coeff.: 0.18
## Cramer's V : 0.183
tbl2 <- table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Geography)
chisq.test(tbl2)
##
## Pearson's Chi-squared test
##
## data: tbl2
## X-squared = 30.675, df = 2, p-value = 2.183e-07
assocstats(tbl2)
## X^2 df P(> X^2)
## Likelihood Ratio 30.325 2 2.5999e-07
## Pearson 30.675 2 2.1830e-07
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.055
## Cramer's V : 0.055
tbl3 <- table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Insurance)
chisq.test(tbl3)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl3
## X-squared = 14.482, df = 1, p-value = 0.0001415
assocstats(tbl3)
## X^2 df P(> X^2)
## Likelihood Ratio 15.446 1 8.4905e-05
## Pearson 14.764 1 1.2181e-04
##
## Phi-Coefficient : 0.038
## Contingency Coeff.: 0.038
## Cramer's V : 0.038
tbl4 <- table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Income)
chisq.test(tbl4)
##
## Pearson's Chi-squared test
##
## data: tbl4
## X-squared = 180.03, df = 3, p-value < 2.2e-16
assocstats(tbl4)
## X^2 df P(> X^2)
## Likelihood Ratio 179.76 3 0
## Pearson 180.03 3 0
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.132
## Cramer's V : 0.133
tbl5 <- table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Employment)
chisq.test(tbl5)
##
## Pearson's Chi-squared test
##
## data: tbl5
## X-squared = 205.17, df = 3, p-value < 2.2e-16
assocstats(tbl5)
## X^2 df P(> X^2)
## Likelihood Ratio 198.71 3 0
## Pearson 205.17 3 0
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.141
## Cramer's V : 0.142
tbl6 <- table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Race)
chisq.test(tbl6)
##
## Pearson's Chi-squared test
##
## data: tbl6
## X-squared = 20.526, df = 6, p-value = 0.002231
assocstats(tbl6)
## X^2 df P(> X^2)
## Likelihood Ratio 19.330 6 0.0036413
## Pearson 20.526 6 0.0022314
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.045
## Cramer's V : 0.045
tbl7 <- table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Age)
chisq.test(tbl7)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: tbl7
## X-squared = NaN, df = 10, p-value = NA
assocstats(tbl7)
## X^2 df P(> X^2)
## Likelihood Ratio 133.96 10 0
## Pearson NaN 10 NaN
##
## Phi-Coefficient : NA
## Contingency Coeff.: NaN
## Cramer's V : NaN
tbl8 <- table(TreatmentLabeled$ReceivedTreatment, TreatmentLabeled$Sex)
chisq.test(tbl8)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl8
## X-squared = 23.978, df = 1, p-value = 9.746e-07
assocstats(tbl8)
## X^2 df P(> X^2)
## Likelihood Ratio 24.240 1 8.5053e-07
## Pearson 24.215 1 8.6139e-07
##
## Phi-Coefficient : 0.049
## Contingency Coeff.: 0.049
## Cramer's V : 0.049
REGRESSION TIME
model <- glm(ReceivedTreatment ~ MedicaidCoverage + Insurance + Geography + Sex + Age + Race + EducationLevel + Income + Employment, data = TreatmentLabeled, family = binomial)
summary(model)
##
## Call:
## glm(formula = ReceivedTreatment ~ MedicaidCoverage + Insurance +
## Geography + Sex + Age + Race + EducationLevel + Income +
## Employment, family = binomial, data = TreatmentLabeled)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.07337 0.13025 -8.241 < 2e-16 ***
## MedicaidCoverageNo Medicaid -0.56621 0.06507 -8.701 < 2e-16 ***
## InsuranceNo Insurance -0.14410 0.09137 -1.577 0.114768
## GeographySmall Metro 0.06521 0.05722 1.140 0.254406
## GeographyNonmetro 0.08389 0.07365 1.139 0.254691
## SexFemale 0.13857 0.05198 2.666 0.007679 **
## Age21-23 years -0.07353 0.10718 -0.686 0.492689
## Age24-25 years 0.15230 0.11911 1.279 0.201035
## Age26-29 years 0.30924 0.11578 2.671 0.007566 **
## Age30-34 years 0.59049 0.10609 5.566 2.61e-08 ***
## Age35-49 years 0.84378 0.09392 8.984 < 2e-16 ***
## Age50-64 years 0.55826 0.11907 4.689 2.75e-06 ***
## Age65 or older 0.41634 0.14690 2.834 0.004595 **
## RaceBlack -0.42412 0.08491 -4.995 5.88e-07 ***
## RaceNative American 0.04368 0.14975 0.292 0.770544
## RaceNative Hawaiian/Pacific Islander -0.09098 0.33499 -0.272 0.785932
## RaceAsian 0.12870 0.16666 0.772 0.439948
## RaceMore than one -0.25566 0.11669 -2.191 0.028457 *
## RaceHispanic -0.17375 0.07145 -2.432 0.015020 *
## EducationLevelH.S Grad -0.17515 0.07880 -2.223 0.026242 *
## EducationLevelSome College -0.28560 0.08182 -3.490 0.000482 ***
## EducationLevelCollege Grad -0.71499 0.09751 -7.333 2.25e-13 ***
## Income$20,000-$49,000 -0.13209 0.06920 -1.909 0.056290 .
## Income$50,000-$74,000 -0.13328 0.08744 -1.524 0.127430
## Income$75,000 or more -0.36181 0.08186 -4.420 9.87e-06 ***
## EmploymentPart-time 0.19331 0.07669 2.521 0.011710 *
## EmploymentUnemployed 0.23320 0.09424 2.475 0.013337 *
## EmploymentOther 0.43932 0.06705 6.552 5.67e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 10524.3 on 10155 degrees of freedom
## Residual deviance: 9819.2 on 10128 degrees of freedom
## AIC: 9875.2
##
## Number of Fisher Scoring iterations: 4
print(model)
##
## Call: glm(formula = ReceivedTreatment ~ MedicaidCoverage + Insurance +
## Geography + Sex + Age + Race + EducationLevel + Income +
## Employment, family = binomial, data = TreatmentLabeled)
##
## Coefficients:
## (Intercept) MedicaidCoverageNo Medicaid
## -1.07337 -0.56621
## InsuranceNo Insurance GeographySmall Metro
## -0.14410 0.06521
## GeographyNonmetro SexFemale
## 0.08389 0.13857
## Age21-23 years Age24-25 years
## -0.07353 0.15230
## Age26-29 years Age30-34 years
## 0.30924 0.59049
## Age35-49 years Age50-64 years
## 0.84378 0.55826
## Age65 or older RaceBlack
## 0.41634 -0.42412
## RaceNative American RaceNative Hawaiian/Pacific Islander
## 0.04368 -0.09098
## RaceAsian RaceMore than one
## 0.12870 -0.25566
## RaceHispanic EducationLevelH.S Grad
## -0.17375 -0.17515
## EducationLevelSome College EducationLevelCollege Grad
## -0.28560 -0.71499
## Income$20,000-$49,000 Income$50,000-$74,000
## -0.13209 -0.13328
## Income$75,000 or more EmploymentPart-time
## -0.36181 0.19331
## EmploymentUnemployed EmploymentOther
## 0.23320 0.43932
##
## Degrees of Freedom: 10155 Total (i.e. Null); 10128 Residual
## Null Deviance: 10520
## Residual Deviance: 9819 AIC: 9875
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(lmtest)
library(pROC)
## Warning: package 'pROC' was built under R version 4.4.3
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(ggplot2)
#ASSUMPTIONS IN GLM
vif_values <- vif(model)
print("Variance Inflation Factors (VIF):")
## [1] "Variance Inflation Factors (VIF):"
print(vif_values)
## GVIF Df GVIF^(1/(2*Df))
## MedicaidCoverage 1.626504 1 1.275345
## Insurance 1.272239 1 1.127936
## Geography 1.111872 2 1.026866
## Sex 1.063332 1 1.031180
## Age 1.336171 7 1.020916
## Race 1.252100 6 1.018912
## EducationLevel 1.473183 3 1.066701
## Income 1.552718 3 1.076090
## Employment 1.415917 3 1.059676
Deviance: The summary output provides the Null Deviance (model with only an intercept) and Residual Deviance (full model). A smaller residual deviance relative to the null deviance suggests a better fit.
null_deviance <- model$null.deviance
residual_deviance <- model$deviance
df_null <- model$df.null
df_residual <- model$df.residual
cat("\n--- Deviance Analysis ---\n")
##
## --- Deviance Analysis ---
cat("Null Deviance:", null_deviance, "on", df_null, "degrees of freedom\n")
## Null Deviance: 10524.3 on 10155 degrees of freedom
cat("Residual Deviance:", residual_deviance, "on", df_residual, "degrees of freedom\n")
## Residual Deviance: 9819.162 on 10128 degrees of freedom
cat("Deviance Reduction:", null_deviance - residual_deviance, "\n")
## Deviance Reduction: 705.1426
chi_sq <- null_deviance - residual_deviance
df_diff <- df_null - df_residual
p_value <- pchisq(chi_sq, df_diff, lower.tail = FALSE)
cat("Chi-square:", chi_sq, "df:", df_diff, "p-value:", p_value, "\n")
## Chi-square: 705.1426 df: 27 p-value: 3.186422e-131
library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 4.4.3
## ResourceSelection 0.3-6 2023-06-27
hoslem_test <- hoslem.test(model$y, fitted(model), g = 10)
print("\n--- Hosmer-Lemeshow Goodness of Fit Test ---")
## [1] "\n--- Hosmer-Lemeshow Goodness of Fit Test ---"
print(hoslem_test)
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: model$y, fitted(model)
## X-squared = 6.2384, df = 8, p-value = 0.6205
Residual Analysis: Plot standardized residuals versus fitted values to check for patterns, which can indicate issues with the assumed mean-variance relationship or link function.
TreatmentLabeled$fitted_probs <- fitted(model)
TreatmentLabeled$deviance_resid <- residuals(model, type = "deviance")
TreatmentLabeled$pearson_resid <- residuals(model, type = "pearson")
TreatmentLabeled$std_resid <- rstandard(model)
ggplot(TreatmentLabeled, aes(x = fitted_probs, y = deviance_resid)) +
geom_point(alpha = 0.3) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
geom_smooth(se = FALSE) +
labs(title = "Deviance Residuals vs Fitted Probabilities",
x = "Fitted Probabilities", y = "Deviance Residuals") +
theme_minimal()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(TreatmentLabeled, aes(x = seq_along(std_resid), y = std_resid)) +
geom_point(alpha = 0.3) +
geom_hline(yintercept = c(-3, 0, 3), color = c("red", "blue", "red"),
linetype = c("dashed", "solid", "dashed")) +
labs(title = "Standardized Residuals",
x = "Observation Index", y = "Standardized Residuals") +
theme_minimal()
influential <- which(abs(TreatmentLabeled$std_resid) > 3)
cat("\n--- Influential Observations (|std residual| > 3) ---\n")
##
## --- Influential Observations (|std residual| > 3) ---
cat("Number of influential observations:", length(influential), "\n")
## Number of influential observations: 0
cat("Percentage:", round(length(influential)/nrow(TreatmentLabeled)*100, 2), "%\n")
## Percentage: 0 %
cooks_d <- cooks.distance(model)
plot(cooks_d, type = "h", main = "Cook's Distance", ylab = "Cook's Distance")
abline(h = 4/nrow(TreatmentLabeled), col = "red", lty = 2)
Cross-validation: Use a hold-out or validation sample to compare the model’s predictions with actual results, assessing its generalization performance. # Tests model stability (10-fold CV)
set.seed(123)
train_control <- trainControl(method = "cv",
number = 10,
savePredictions = TRUE,
classProbs = TRUE,
summaryFunction = twoClassSummary)
TreatmentLabeled$ReceivedTreatment_factor <- factor(
TreatmentLabeled$ReceivedTreatment,
levels = c("No", "Yes")
)
cv_model <- train(ReceivedTreatment_factor ~ MedicaidCoverage + Insurance +
Geography + Sex + Age + Race + EducationLevel +
Income + Employment,
data = TreatmentLabeled,
method = "glm",
family = "binomial",
trControl = train_control,
metric = "ROC")
print("\n--- 10-Fold Cross-Validation Results ---")
## [1] "\n--- 10-Fold Cross-Validation Results ---"
print(cv_model)
## Generalized Linear Model
##
## 10156 samples
## 9 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 9139, 9141, 9140, 9141, 9141, 9141, ...
## Resampling results:
##
## ROC Sens Spec
## 0.6682197 0.9876109 0.05635134
print(cv_model$results)
## parameter ROC Sens Spec ROCSD SensSD SpecSD
## 1 none 0.6682197 0.9876109 0.05635134 0.01766354 0.003658918 0.02089676
stargazer(model,
type = "html",
out = "model.htm",
title = "Logistic Regression Results",
single.row = TRUE,
column.sep.width = "20pt")
##
## <table style="text-align:center"><caption><strong>Logistic Regression Results</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>ReceivedTreatment</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">MedicaidCoverageNo Medicaid</td><td>-0.566<sup>***</sup> (0.065)</td></tr>
## <tr><td style="text-align:left">InsuranceNo Insurance</td><td>-0.144 (0.091)</td></tr>
## <tr><td style="text-align:left">GeographySmall Metro</td><td>0.065 (0.057)</td></tr>
## <tr><td style="text-align:left">GeographyNonmetro</td><td>0.084 (0.074)</td></tr>
## <tr><td style="text-align:left">SexFemale</td><td>0.139<sup>***</sup> (0.052)</td></tr>
## <tr><td style="text-align:left">Age21-23 years</td><td>-0.074 (0.107)</td></tr>
## <tr><td style="text-align:left">Age24-25 years</td><td>0.152 (0.119)</td></tr>
## <tr><td style="text-align:left">Age26-29 years</td><td>0.309<sup>***</sup> (0.116)</td></tr>
## <tr><td style="text-align:left">Age30-34 years</td><td>0.590<sup>***</sup> (0.106)</td></tr>
## <tr><td style="text-align:left">Age35-49 years</td><td>0.844<sup>***</sup> (0.094)</td></tr>
## <tr><td style="text-align:left">Age50-64 years</td><td>0.558<sup>***</sup> (0.119)</td></tr>
## <tr><td style="text-align:left">Age65 or older</td><td>0.416<sup>***</sup> (0.147)</td></tr>
## <tr><td style="text-align:left">RaceBlack</td><td>-0.424<sup>***</sup> (0.085)</td></tr>
## <tr><td style="text-align:left">RaceNative American</td><td>0.044 (0.150)</td></tr>
## <tr><td style="text-align:left">RaceNative Hawaiian/Pacific Islander</td><td>-0.091 (0.335)</td></tr>
## <tr><td style="text-align:left">RaceAsian</td><td>0.129 (0.167)</td></tr>
## <tr><td style="text-align:left">RaceMore than one</td><td>-0.256<sup>**</sup> (0.117)</td></tr>
## <tr><td style="text-align:left">RaceHispanic</td><td>-0.174<sup>**</sup> (0.071)</td></tr>
## <tr><td style="text-align:left">EducationLevelH.S Grad</td><td>-0.175<sup>**</sup> (0.079)</td></tr>
## <tr><td style="text-align:left">EducationLevelSome College</td><td>-0.286<sup>***</sup> (0.082)</td></tr>
## <tr><td style="text-align:left">EducationLevelCollege Grad</td><td>-0.715<sup>***</sup> (0.098)</td></tr>
## <tr><td style="text-align:left">49,000</td><td>-0.132<sup>*</sup> (0.069)</td></tr>
## <tr><td style="text-align:left">74,000</td><td>-0.133 (0.087)</td></tr>
## <tr><td style="text-align:left">75,000 or more</td><td>-0.362<sup>***</sup> (0.082)</td></tr>
## <tr><td style="text-align:left">EmploymentPart-time</td><td>0.193<sup>**</sup> (0.077)</td></tr>
## <tr><td style="text-align:left">EmploymentUnemployed</td><td>0.233<sup>**</sup> (0.094)</td></tr>
## <tr><td style="text-align:left">EmploymentOther</td><td>0.439<sup>***</sup> (0.067)</td></tr>
## <tr><td style="text-align:left">Constant</td><td>-1.073<sup>***</sup> (0.130)</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>10,156</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-4,909.581</td></tr>
## <tr><td style="text-align:left">Akaike Inf. Crit.</td><td>9,875.162</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
exp(coef(model))
## (Intercept) MedicaidCoverageNo Medicaid
## 0.3418562 0.5676755
## InsuranceNo Insurance GeographySmall Metro
## 0.8658024 1.0673850
## GeographyNonmetro SexFemale
## 1.0875090 1.1486342
## Age21-23 years Age24-25 years
## 0.9291095 1.1645067
## Age26-29 years Age30-34 years
## 1.3623894 1.8048679
## Age35-49 years Age50-64 years
## 2.3251492 1.7476276
## Age65 or older RaceBlack
## 1.5164014 0.6543478
## RaceNative American RaceNative Hawaiian/Pacific Islander
## 1.0446446 0.9130337
## RaceAsian RaceMore than one
## 1.1373545 0.7744062
## RaceHispanic EducationLevelH.S Grad
## 0.8405034 0.8393319
## EducationLevelSome College EducationLevelCollege Grad
## 0.7515650 0.4891987
## Income$20,000-$49,000 Income$50,000-$74,000
## 0.8762633 0.8752204
## Income$75,000 or more EmploymentPart-time
## 0.6964167 1.2132609
## EmploymentUnemployed EmploymentOther
## 1.2626395 1.5516527
stargazer(model,
align = TRUE,
apply.coef=exp,
type = "html",
out = "model2.htm",
title = "Odds Ratio",
single.row = TRUE,
column.sep.width = "20pt"
)
##
## <table style="text-align:center"><caption><strong>Odds Ratio</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>ReceivedTreatment</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">MedicaidCoverageNo Medicaid</td><td>0.568<sup>***</sup> (0.065)</td></tr>
## <tr><td style="text-align:left">InsuranceNo Insurance</td><td>0.866<sup>***</sup> (0.091)</td></tr>
## <tr><td style="text-align:left">GeographySmall Metro</td><td>1.067<sup>***</sup> (0.057)</td></tr>
## <tr><td style="text-align:left">GeographyNonmetro</td><td>1.088<sup>***</sup> (0.074)</td></tr>
## <tr><td style="text-align:left">SexFemale</td><td>1.149<sup>***</sup> (0.052)</td></tr>
## <tr><td style="text-align:left">Age21-23 years</td><td>0.929<sup>***</sup> (0.107)</td></tr>
## <tr><td style="text-align:left">Age24-25 years</td><td>1.165<sup>***</sup> (0.119)</td></tr>
## <tr><td style="text-align:left">Age26-29 years</td><td>1.362<sup>***</sup> (0.116)</td></tr>
## <tr><td style="text-align:left">Age30-34 years</td><td>1.805<sup>***</sup> (0.106)</td></tr>
## <tr><td style="text-align:left">Age35-49 years</td><td>2.325<sup>***</sup> (0.094)</td></tr>
## <tr><td style="text-align:left">Age50-64 years</td><td>1.748<sup>***</sup> (0.119)</td></tr>
## <tr><td style="text-align:left">Age65 or older</td><td>1.516<sup>***</sup> (0.147)</td></tr>
## <tr><td style="text-align:left">RaceBlack</td><td>0.654<sup>***</sup> (0.085)</td></tr>
## <tr><td style="text-align:left">RaceNative American</td><td>1.045<sup>***</sup> (0.150)</td></tr>
## <tr><td style="text-align:left">RaceNative Hawaiian/Pacific Islander</td><td>0.913<sup>***</sup> (0.335)</td></tr>
## <tr><td style="text-align:left">RaceAsian</td><td>1.137<sup>***</sup> (0.167)</td></tr>
## <tr><td style="text-align:left">RaceMore than one</td><td>0.774<sup>***</sup> (0.117)</td></tr>
## <tr><td style="text-align:left">RaceHispanic</td><td>0.841<sup>***</sup> (0.071)</td></tr>
## <tr><td style="text-align:left">EducationLevelH.S Grad</td><td>0.839<sup>***</sup> (0.079)</td></tr>
## <tr><td style="text-align:left">EducationLevelSome College</td><td>0.752<sup>***</sup> (0.082)</td></tr>
## <tr><td style="text-align:left">EducationLevelCollege Grad</td><td>0.489<sup>***</sup> (0.098)</td></tr>
## <tr><td style="text-align:left">49,000</td><td>0.876<sup>***</sup> (0.069)</td></tr>
## <tr><td style="text-align:left">74,000</td><td>0.875<sup>***</sup> (0.087)</td></tr>
## <tr><td style="text-align:left">75,000 or more</td><td>0.696<sup>***</sup> (0.082)</td></tr>
## <tr><td style="text-align:left">EmploymentPart-time</td><td>1.213<sup>***</sup> (0.077)</td></tr>
## <tr><td style="text-align:left">EmploymentUnemployed</td><td>1.263<sup>***</sup> (0.094)</td></tr>
## <tr><td style="text-align:left">EmploymentOther</td><td>1.552<sup>***</sup> (0.067)</td></tr>
## <tr><td style="text-align:left">Constant</td><td>0.342<sup>***</sup> (0.130)</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>10,156</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-4,909.581</td></tr>
## <tr><td style="text-align:left">Akaike Inf. Crit.</td><td>9,875.162</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
All variables are statistically significant. p value is P<0.01 #No Medicaid coverage decreases the odds of treatment by 43%. #Small metro (7% increase) and nonmetro (9% increase) have slight increase of odds to receive treatment. #Being female increases the odds of treatment by almost 15%. #Being Aged 35-49 increases the odds of treatment by 132%. Largest increase in Odds Ratio. #Being Black decreases the odds of treatment by 34%. Races black, hispanic, more than one are all less likley to receive treatment than white individuals. This is a concerning disparity. #Employment status other than Full time (baseline) have higher odds of receiving treatment. #Both income and education have decreased odds of receiving treatment the higher up the range you go. Perhaps this is due to less need in higher income or higher achieved education, or a lack of motivation to seek out treatment due to social factors.
exp(confint.default(model))
## 2.5 % 97.5 %
## (Intercept) 0.2648340 0.4412790
## MedicaidCoverageNo Medicaid 0.4996991 0.6448990
## InsuranceNo Insurance 0.7238456 1.0355988
## GeographySmall Metro 0.9541515 1.1940563
## GeographyNonmetro 0.9413296 1.2563886
## SexFemale 1.0373750 1.2718261
## Age21-23 years 0.7530716 1.1462981
## Age24-25 years 0.9220485 1.4707207
## Age26-29 years 1.0857922 1.7094477
## Age30-34 years 1.4660249 2.2220277
## Age35-49 years 1.9342125 2.7951008
## Age50-64 years 1.3838833 2.2069795
## Age65 or older 1.1370298 2.0223510
## RaceBlack 0.5540311 0.7728284
## RaceNative American 0.7789342 1.4009944
## RaceNative Hawaiian/Pacific Islander 0.4735243 1.7604810
## RaceAsian 0.8204238 1.5767158
## RaceMore than one 0.6160871 0.9734094
## RaceHispanic 0.7306721 0.9668441
## EducationLevelH.S Grad 0.7192112 0.9795148
## EducationLevelSome College 0.6402048 0.8822957
## EducationLevelCollege Grad 0.4040992 0.5922192
## Income$20,000-$49,000 0.7651228 1.0035480
## Income$50,000-$74,000 0.7373812 1.0388260
## Income$75,000 or more 0.5931874 0.8176105
## EmploymentPart-time 1.0439450 1.4100377
## EmploymentUnemployed 1.0496971 1.5187795
## EmploymentOther 1.3605715 1.7695698
or_table <- data.frame(
Variable = names(coef(model))[-1],
Odds_Ratio = exp(coef(model))[-1],
CI_Lower = exp(confint.default(model))[-1, 1],
CI_Upper = exp(confint.default(model))[-1, 2],
P_Value = summary(model)$coefficients[-1, 4]
)
or_table$OR_95CI <- sprintf("%.2f (%.2f-%.2f)",
or_table$Odds_Ratio,
or_table$CI_Lower,
or_table$CI_Upper)
or_table$P_Value_Formatted <- ifelse(or_table$P_Value < 0.001,
"<0.001",
sprintf("%.3f", or_table$P_Value))
final_table <- or_table[, c("Variable", "OR_95CI", "P_Value_Formatted")]
colnames(final_table) <- c("Variable", "Odds Ratio (95% CI)", "P-value")
final_table$Variable <- gsub("MedicaidCoverageNo Medicaid", "No Medicaid (vs. Medicaid)", final_table$Variable)
final_table$Variable <- gsub("InsuranceNo Insurance", "No Insurance (vs. Insurance)", final_table$Variable)
final_table$Variable <- gsub("GeographySmall Metro", "Small Metro (vs. Large Metro)", final_table$Variable)
final_table$Variable <- gsub("GeographyNonmetro", "Nonmetro (vs. Large Metro)", final_table$Variable)
final_table$Variable <- gsub("SexFemale", "Female (vs. Male)", final_table$Variable)
final_table$Variable <- gsub("Race", "Race: ", final_table$Variable)
final_table$Variable <- gsub("EducationLevel", "Education: ", final_table$Variable)
final_table$Variable <- gsub("Employment", "Employment: ", final_table$Variable)
final_table$Variable <- gsub("Income\\$", "Income: $", final_table$Variable)
final_table$Variable <- gsub("Age", "Age: ", final_table$Variable)
rownames(final_table) <- NULL
final_table %>%
kbl(format = "html",
caption = "<b><i>Adjusted Odds Ratios for Receipt of SUD Treatment with 95% Confidence Intervals</i>",
align = c("l", "r", "r")) %>%
kable_classic(full_width = FALSE, html_font = "Arial") %>%
column_spec(1, width = "20em") %>%
column_spec(2, width = "12em") %>%
column_spec(3, width = "8em") %>%
row_spec(0, bold = TRUE) %>%
footnote(general = "Note: Reference categories are shown in parentheses. All models control for all variables simultaneously.",
general_title = "",
footnote_as_chunk = TRUE) %>%
save_kable("OddsRatios_CI.html")
print(final_table)
## Variable Odds Ratio (95% CI) P-value
## 1 No Medicaid (vs. Medicaid) 0.57 (0.50-0.64) <0.001
## 2 No Insurance (vs. Insurance) 0.87 (0.72-1.04) 0.115
## 3 Small Metro (vs. Large Metro) 1.07 (0.95-1.19) 0.254
## 4 Nonmetro (vs. Large Metro) 1.09 (0.94-1.26) 0.255
## 5 Female (vs. Male) 1.15 (1.04-1.27) 0.008
## 6 Age: 21-23 years 0.93 (0.75-1.15) 0.493
## 7 Age: 24-25 years 1.16 (0.92-1.47) 0.201
## 8 Age: 26-29 years 1.36 (1.09-1.71) 0.008
## 9 Age: 30-34 years 1.80 (1.47-2.22) <0.001
## 10 Age: 35-49 years 2.33 (1.93-2.80) <0.001
## 11 Age: 50-64 years 1.75 (1.38-2.21) <0.001
## 12 Age: 65 or older 1.52 (1.14-2.02) 0.005
## 13 Race: Black 0.65 (0.55-0.77) <0.001
## 14 Race: Native American 1.04 (0.78-1.40) 0.771
## 15 Race: Native Hawaiian/Pacific Islander 0.91 (0.47-1.76) 0.786
## 16 Race: Asian 1.14 (0.82-1.58) 0.440
## 17 Race: More than one 0.77 (0.62-0.97) 0.028
## 18 Race: Hispanic 0.84 (0.73-0.97) 0.015
## 19 Education: H.S Grad 0.84 (0.72-0.98) 0.026
## 20 Education: Some College 0.75 (0.64-0.88) <0.001
## 21 Education: College Grad 0.49 (0.40-0.59) <0.001
## 22 Income: $20,000-$49,000 0.88 (0.77-1.00) 0.056
## 23 Income: $50,000-$74,000 0.88 (0.74-1.04) 0.127
## 24 Income: $75,000 or more 0.70 (0.59-0.82) <0.001
## 25 Employment: Part-time 1.21 (1.04-1.41) 0.012
## 26 Employment: Unemployed 1.26 (1.05-1.52) 0.013
## 27 Employment: Other 1.55 (1.36-1.77) <0.001