Public health outcomes often reflect differences in policy priorities and resource allocation. This assignment uses state-level adult obesity prevalence data to explore whether differences in healthcare spending correlate with obesity rates.
library(readxl)
obesity_data <- read_excel('/Users/leslietavarez/Downloads/adult obesity (1).xlsx')
healthcare_spending <- read_excel('/Users/leslietavarez/Documents/raw_data (1).xlsx')
merged_data <- merge(obesity_data, healthcare_spending, by = "State")
merged_data <- merged_data[, !names(merged_data) %in% "Rank"]
merged_data
## State Obesity % Total Health Spending
## 1 Alabama 0.392 44722
## 2 Alaska 0.352 9447
## 3 Arizona 0.319 65941
## 4 Arkansas 0.400 27142
## 5 California 0.277 410903
## 6 Colorado 0.249 50918
## 7 Connecticut 0.294 44372
## 8 Delaware 0.357 11995
## 9 District of Columbia 0.215 16275
## 10 Florida 0.301 214229
## 11 Georgia 0.350 95458
## 12 Hawaii 0.261 14635
## 13 Idaho 0.310 14519
## 14 Illinois 0.360 123448
## 15 Indiana 0.378 71239
## 16 Iowa 0.378 28688
## 17 Kansas 0.359 27626
## 18 Kentucky 0.387 44798
## 19 Louisiana 0.399 48881
## 20 Maine 0.326 15912
## 21 Maryland 0.341 61506
## 22 Massachusetts 0.274 93737
## 23 Michigan 0.354 96154
## 24 Minnesota 0.333 63772
## 25 Mississippi 0.401 25905
## 26 Missouri 0.353 61901
## 27 Montana 0.305 10768
## 28 Nebraska 0.366 20805
## 29 Nevada 0.308 25359
## 30 New Hampshire 0.328 15631
## 31 New Jersey 0.289 101077
## 32 New Mexico 0.353 17320
## 33 New York 0.280 273010
## 34 North Carolina 0.340 95329
## 35 North Dakota 0.356 9245
## 36 Ohio 0.364 124390
## 37 Oklahoma 0.387 35654
## 38 Oregon 0.336 42864
## 39 Pennsylvania 0.324 149450
## 40 Rhode Island 0.316 12361
## 41 South Carolina 0.360 42476
## 42 South Dakota 0.360 11779
## 43 Tennessee 0.376 67558
## 44 Texas 0.344 254024
## 45 Utah 0.302 25972
## 46 Vermont 0.288 6991
## 47 Virginia 0.343 76517
## 48 Washington 0.306 70885
## 49 West Virginia 0.412 21772
## 50 Wisconsin 0.359 57529
## 51 Wyoming 0.333 4943
merged_data$`Obesity %` <- merged_data$`Obesity %` * 100
colnames(merged_data)[colnames(merged_data) == "Total Health Spending"] <- "Healthcare_Spending"
merged_data$Spending_Quintile <- cut(merged_data$Healthcare_Spending,
breaks = quantile(merged_data$Healthcare_Spending, probs = seq(0, 1, 0.2), na.rm = TRUE),
labels = c("Lowest", "Second Lowest", "Middle", "Second Highest", "Highest"),
include.lowest = TRUE)
table(merged_data$Spending_Quintile)
##
## Lowest Second Lowest Middle Second Highest Highest
## 11 10 10 10 10
head(merged_data)
## State Obesity % Healthcare_Spending Spending_Quintile
## 1 Alabama 39.2 44722 Middle
## 2 Alaska 35.2 9447 Lowest
## 3 Arizona 31.9 65941 Second Highest
## 4 Arkansas 40.0 27142 Second Lowest
## 5 California 27.7 410903 Highest
## 6 Colorado 24.9 50918 Middle
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
ggplot(merged_data, aes(x = Spending_Quintile, y = `Obesity %`, fill = Spending_Quintile)) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2) +
scale_fill_viridis(discrete = TRUE) +
labs(title = "Obesity Rates Across Healthcare Spending Quintiles",
x = "Healthcare Spending Quintile",
y = "Obesity Prevalence (%)",
fill = "Spending Quintile") +
theme_minimal(base_size = 14) +
theme(legend.position = "none")
# Compute correlation
cor_result <- cor.test(merged_data$Healthcare_Spending, merged_data$`Obesity %`)
cor_value <- round(cor_result$estimate, 2)
p_value <- cor_result$p.value
p_text <- ifelse(p_value < 0.001, "< 0.001", round(p_value, 3))
ggplot(merged_data, aes(x = Healthcare_Spending, y = `Obesity %`, color = Spending_Quintile)) +
geom_point(size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE, color = "black", linetype = "dashed") + # Trend line
scale_color_viridis(discrete = TRUE) +
labs(title = "Relationship Between Healthcare Spending and Obesity Prevalence",
x = "Per Capita Healthcare Spending ($)",
y = "Obesity Prevalence (%)",
color = "Spending Quintile") +
theme_minimal(base_size = 14) +
annotate("text", x = max(merged_data$Healthcare_Spending) * 0.7,
y = max(merged_data$`Obesity %`) * 0.9,
label = paste("Correlation: r =", cor_value, "\n p-value =", p_text),
size = 3, hjust = 0)
## `geom_smooth()` using formula = 'y ~ x'
The correlation between healthcare spending and obesity prevalence is weakly negative, r=-0.222. Also, the p-value is 0.115 so it’s statisically insignificant. This suggests that higher healthcare spending is not strongly associated with lower obesity rates at the state level. However, there is a considerable variation amongst states, this can indicate that there are other factors influencing obesity prevalence beyond healtcare spending.