clean_data <- us_contagious_diseases |>
filter(year >= 1950, weeks_reporting >= 50) |>
mutate(rate_per_100k = (count / population) * 100000) |>
mutate(region = case_when(
# Northeast
state %in% c("Connecticut", "Maine", "Massachusetts", "New Hampshire",
"Rhode Island", "Vermont", "New Jersey", "New York",
"Pennsylvania") ~ "Northeast",
# Midwest
state %in% c("Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin",
"Iowa", "Kansas", "Minnesota", "Missouri", "Nebraska",
"North Dakota", "South Dakota") ~ "Midwest",
# South
state %in% c("Delaware", "Florida", "Georgia", "Maryland", "North Carolina",
"South Carolina", "Virginia", "West Virginia", "Alabama",
"Kentucky", "Mississippi", "Tennessee", "Arkansas",
"Louisiana", "Oklahoma", "Texas") ~ "South",
# West
state %in% c("Arizona", "Colorado", "Idaho", "Montana", "Nevada",
"New Mexico", "Utah", "Wyoming", "Alaska", "California",
"Hawaii", "Oregon", "Washington") ~ "West",
TRUE ~ "Other"
))