Code
library(tidyverse)
cheese <- read_csv("https://jsuleiman.com/datasets/cheese.csv")
deaths <- read_csv("https://jsuleiman.com/datasets/Injury_Mortality__United_States.csv")In this R script, I process and analyze two different datasets to explore a possible link between eating Swiss cheese and general injury deaths in the US. I start by importing a large dataset on deaths caused by injuries, along with a dataset on Swiss cheese consumption by year. Then, I sum the death rates across all groups to keep the focus on broader trends rather than specific subgroups. After preprocessing, I join the two datasets by year to ensure that the statistics on cheese consumption and death rates align accurately. For further analysis, I store the updated injury data in a newly calculated variable. In the final step, I examine the relationship between Swiss cheese consumption and overall mortality, using a scatter plot and a linear model fit to visualize the link. This helps clearly illustrate the connection between the two factors and the strength of that relationship.
library(tidyverse)
cheese <- read_csv("https://jsuleiman.com/datasets/cheese.csv")
deaths <- read_csv("https://jsuleiman.com/datasets/Injury_Mortality__United_States.csv")cheese <- read_csv("https://jsuleiman.com/datasets/cheese.csv", show_col_types = FALSE)
swiss <- cheese |>
select(year, swiss)
deaths <- read_csv("https://jsuleiman.com/datasets/Injury_Mortality__United_States.csv", show_col_types = FALSE)
deaths_filtered <- deaths |>
filter(Sex == "Both sexes",
`Age group (years)` == "All Ages",
Race == "All races",
`Injury mechanism` == "All Mechanisms",
`Injury intent` == "All Intentions") |>
group_by(Year) |>
summarize(deaths_total = sum(Deaths, na.rm = TRUE)) |>
ungroup() |>
rename(year = Year)
merged <- swiss |>
inner_join(deaths_filtered, by = "year")
print("Merged data:")[1] "Merged data:"
print(merged)# A tibble: 18 × 3
year swiss deaths_total
<dbl> <dbl> <dbl>
1 1999 1.09 148286
2 2000 1.02 148209
3 2001 1.12 157078
4 2002 1.09 161269
5 2003 1.13 164002
6 2004 1.2 167184
7 2005 1.24 173753
8 2006 1.23 179065
9 2007 1.24 182479
10 2008 1.1 181226
11 2009 1.16 177154
12 2010 1.18 180811
13 2011 1.14 187464
14 2012 1.09 190385
15 2013 1 192945
16 2014 1.02 199752
17 2015 1.05 214008
18 2016 1.06 231991
set.seed(123)
merged <- merged |>
mutate(injury_int = swiss * 10 + rnorm(nrow(merged), mean = 0, sd = 0.5))corr <- cor(merged$swiss, merged$deaths_total, use = "complete.obs")
ggplot(merged, aes(x = swiss, y = deaths_total)) +
geom_point(size = 3, color = "red") + # Points in red
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "green") +
labs(
title = "Exploring Correlations: Swiss Cheese Consumption vs. Total Injury Deaths",
subtitle = paste("Correlation coefficient (r):", round(corr, 2)),
x = "Per Capita Swiss Cheese Consumption (lbs/year)",
y = "Total Deaths by Injury"
) +
theme_minimal()