Public health outcomes often reflect differences in policy priorities and resource allocation. This assignment uses state-level adult obesity prevalence data to explore whether differences in healthcare spending correlate with obesity rates.

Research Question: Do differences in per capita healthcare spending correlate with adult obesity prevalence?

library(readxl)

obesity_data <- read_excel('/Users/leslietavarez/Downloads/adult obesity (1).xlsx')

healthcare_spending <- read_excel('/Users/leslietavarez/Documents/raw_data (1).xlsx')

merged_data <- merge(obesity_data, healthcare_spending, by = "State")

merged_data <- merged_data[, !names(merged_data) %in% "Rank"]
merged_data
##                   State Obesity % Total Health Spending
## 1               Alabama     0.392                 44722
## 2                Alaska     0.352                  9447
## 3               Arizona     0.319                 65941
## 4              Arkansas     0.400                 27142
## 5            California     0.277                410903
## 6              Colorado     0.249                 50918
## 7           Connecticut     0.294                 44372
## 8              Delaware     0.357                 11995
## 9  District of Columbia     0.215                 16275
## 10              Florida     0.301                214229
## 11              Georgia     0.350                 95458
## 12               Hawaii     0.261                 14635
## 13                Idaho     0.310                 14519
## 14             Illinois     0.360                123448
## 15              Indiana     0.378                 71239
## 16                 Iowa     0.378                 28688
## 17               Kansas     0.359                 27626
## 18             Kentucky     0.387                 44798
## 19            Louisiana     0.399                 48881
## 20                Maine     0.326                 15912
## 21             Maryland     0.341                 61506
## 22        Massachusetts     0.274                 93737
## 23             Michigan     0.354                 96154
## 24            Minnesota     0.333                 63772
## 25          Mississippi     0.401                 25905
## 26             Missouri     0.353                 61901
## 27              Montana     0.305                 10768
## 28             Nebraska     0.366                 20805
## 29               Nevada     0.308                 25359
## 30        New Hampshire     0.328                 15631
## 31           New Jersey     0.289                101077
## 32           New Mexico     0.353                 17320
## 33             New York     0.280                273010
## 34       North Carolina     0.340                 95329
## 35         North Dakota     0.356                  9245
## 36                 Ohio     0.364                124390
## 37             Oklahoma     0.387                 35654
## 38               Oregon     0.336                 42864
## 39         Pennsylvania     0.324                149450
## 40         Rhode Island     0.316                 12361
## 41       South Carolina     0.360                 42476
## 42         South Dakota     0.360                 11779
## 43            Tennessee     0.376                 67558
## 44                Texas     0.344                254024
## 45                 Utah     0.302                 25972
## 46              Vermont     0.288                  6991
## 47             Virginia     0.343                 76517
## 48           Washington     0.306                 70885
## 49        West Virginia     0.412                 21772
## 50            Wisconsin     0.359                 57529
## 51              Wyoming     0.333                  4943
merged_data$`Obesity %` <- merged_data$`Obesity %` * 100


colnames(merged_data)[colnames(merged_data) == "Total Health Spending"] <- "Healthcare_Spending"

merged_data$Spending_Quintile <- cut(merged_data$Healthcare_Spending,
        breaks = quantile(merged_data$Healthcare_Spending, probs = seq(0, 1, 0.2), na.rm = TRUE),
        labels = c("Lowest", "Second Lowest", "Middle", "Second Highest", "Highest"),
                                     include.lowest = TRUE)

table(merged_data$Spending_Quintile)
## 
##         Lowest  Second Lowest         Middle Second Highest        Highest 
##             11             10             10             10             10
head(merged_data)
##        State Obesity % Healthcare_Spending Spending_Quintile
## 1    Alabama      39.2               44722            Middle
## 2     Alaska      35.2                9447            Lowest
## 3    Arizona      31.9               65941    Second Highest
## 4   Arkansas      40.0               27142     Second Lowest
## 5 California      27.7              410903           Highest
## 6   Colorado      24.9               50918            Middle

Visualizations

library(ggplot2)
library(viridis)
## Loading required package: viridisLite
ggplot(merged_data, aes(x = Spending_Quintile, y = `Obesity %`, fill = Spending_Quintile)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2) +
  scale_fill_viridis(discrete = TRUE) +
  labs(title = "Obesity Rates Across Healthcare Spending Quintiles",
       x = "Healthcare Spending Quintile",
       y = "Obesity Prevalence (%)",
       fill = "Spending Quintile") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none") 

# Compute correlation
cor_result <- cor.test(merged_data$Healthcare_Spending, merged_data$`Obesity %`)

cor_value <- round(cor_result$estimate, 2)
p_value <- cor_result$p.value
p_text <- ifelse(p_value < 0.001, "< 0.001", round(p_value, 3))


ggplot(merged_data, aes(x = Healthcare_Spending, y = `Obesity %`, color = Spending_Quintile)) +
  geom_point(size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "black", linetype = "dashed") +  # Trend line
  scale_color_viridis(discrete = TRUE) +
  labs(title = "Relationship Between Healthcare Spending and Obesity Prevalence",
       x = "Per Capita Healthcare Spending ($)",
       y = "Obesity Prevalence (%)",
       color = "Spending Quintile") +
  
  theme_minimal(base_size = 14) +
  annotate("text", x = max(merged_data$Healthcare_Spending) * 0.7, 
           y = max(merged_data$`Obesity %`) * 0.9, 
           label = paste("Correlation: r =", cor_value, "\n p-value =", p_text),
           size = 3, hjust = 0)
## `geom_smooth()` using formula = 'y ~ x'

Summary of findings

The correlation between healthcare spending and obesity prevalence is weakly negative, r=-0.222. Also, the p-value is 0.115 so it’s statisically insignificant. This suggests that higher healthcare spending is not strongly associated with lower obesity rates at the state level. However, there is a considerable variation amongst states, this can indicate that there are other factors influencing obesity prevalence beyond healtcare spending.