library(readxl) # if this package can not be found, uncomment the line above, install the package and comment the line out again.

measles <- read_xls("measlescasesbycountrybymonth.xls", sheet = 2)

library(dplyr) library(tidyr) library(lubridate) library(ggplot2)

#a. Each row in the measles dataset contains the number of reported measles cases by month. Use pivot_longer to reshape the data into long format as measles_long. Ensure the variable for measles incidences is converted to a numeric variable named Cases, and arrange the levels of the month variable in the correct order, saving it as Month. Avoid manually specifying column indices when using pivot_longer; month.name can be helpful for this task. Check ?tidyr_tidy_select for more information.

measles_long <- measles %>% pivot_longer( cols = starts_with(“Jan”):starts_with(“Dec”), names_to = “Month”, values_to = “Cases” ) %>% mutate( Cases = as.numeric(Cases), Month = factor(Month, levels = month.name) )

Display the first few rows

head(measles_long)

#b. With measles_long, answer the following questions: #- How many cases of measles were reported in the United States in 2014? # - Which month saw the maximum number of measles cases in 2019 in the United States?

#Calculate Q1 us_cases_2014 <- measles_long %>% filter(ISO3 == “USA”, Year == 2014) %>% summarize(total_cases = sum(Cases, na.rm = TRUE))

Answer: Total cases reported in the United States in 2014: 667

print(us_cases_2014)

#Calculate Q2 max_month_2019 <- measles_long %>% filter(ISO3 == “USA”, Year == 2019) %>% group_by(Month) %>% summarize(total_cases = sum(Cases, na.rm = TRUE)) %>% arrange(desc(total_cases)) %>% slice(1)

Answer: Month with maximum cases in 2019: June (Total cases: 1001)

print(max_month_2019)

#c. Use the lubridate package to create a Date variable from the variables Month and Year. You can assume that cases were reported on the 15th of every month.

#Create Variable measles_long <- measles_long %>% mutate(Date = make_date(Year, match(Month, month.name), 15))

#Use ggplot2 to create line charts showing the number of reported measles cases against Date for Brazil, Canada, and the United States (using “BRA”, “CAN”, and “USA” for ISO3). Facet the chart by Country.

#Plot data ggplot(measles_long %>% filter(ISO3 %in% c(“BRA”, “CAN”, “USA”)), aes(x = Date, y = Cases)) + geom_line() + facet_wrap(~ ISO3) + labs(title = “Monthly Measles Cases in Brazil, Canada, and the United States”, x = “Date”, y = “Cases”)

#Describe the resulting plot in 2-3 sentences.

#Description of the Plot #The resulting plot shows the trend of reported measles cases over time for Brazil, Canada, and the United States from the dataset. Each country is represented in a separate facet, allowing for easy comparison of the incidence of measles across these three nations.

#d. Use the map_data function from ggplot2 to extract a data set of all countries. Call the data set world. We want to draw a choropleth map of the total number of measles cases for all measles cases of 2019. To make the question easier, list out all country names in the subset of measles_long that don’t have a match in world. Once you have the names, you can use the following code to modify country names in the subset directly.

library(dplyr) library(tidyr) library(lubridate) library(ggplot2) library(maps)

#Calc measles_2019 <- measles_long %>% filter(Year == 2019) %>% group_by(Country) %>% summarize(total_cases = sum(Cases, na.rm = TRUE))

unmatched <- setdiff(measles_2019\(Country, unique(map_data("world")\)region))

Answer: Unmatched country names: [“Sao Tome and Principe”, “United States of America”]

print(unmatched)

measles_2019 <- measles_2019 %>% mutate(Country = case_when( Country == “Bolivia (Plurinational State of)” ~ “Bolivia”, Country == “Brunei Darussalam” ~ “Brunei”, Country == “Cabo Verde” ~ “Cape Verde”, Country == “Congo” ~ “Republic of Congo”, Country == “Cote d’Ivoire” ~ “Ivory Coast”, Country == “Democratic People’s Republic of Korea” ~ “North Korea”, Country == “Kingdom of Eswatini” ~ “Swaziland”, Country == “Iran (Islamic Republic of)” ~ “Iran”, Country == “Lao People’s Democratic Republic” ~ “Laos”, Country == “Micronesia (Federated States of)” ~ “Micronesia”, Country == “Republic of Korea” ~ “South Korea”, Country == “Republic of Moldova” ~ “Moldova”, Country == “Russian Federation” ~ “Russia”, Country == “Syrian Arab Republic” ~ “Syria”, Country == “The Republic of North Macedonia” ~ “North Macedonia”, Country == “United Kingdom of Great Britain and Northern Ireland” ~ “UK”, Country == “United Republic of Tanzania” ~ “Tanzania”, Country == “United States of America” ~ “USA”, Country == “Venezuela (Bolivarian Republic of)” ~ “Venezuela”, Country == “Viet Nam” ~ “Vietnam”, .default = Country ))

#Then, use a polygon layer with geom_polygon, with lowest value set to white and highest color set to red.

Map preparation

world <- map_data(“world”)

ggplot() + geom_polygon(data = world, aes(x = long, y = lat, group = group), fill = “gray90”) + geom_map(data = measles_2019, aes(map_id = Country, fill = total_cases), map = world) + scale_fill_gradient(low = “white”, high = “red”) + labs(title = “Global Measles Cases in 2019”, fill = “Cases”)