> # library
> library(tidyverse)
> library(ggsci)
> # loading the data
> disease <- dslabs::us_contagious_diseases
> str(disease)'data.frame': 16065 obs. of 6 variables:
$ disease : Factor w/ 7 levels "Hepatitis A",..: 1 1 1 1 1 1 1 1 1 1 ...
$ state : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 1 1 1 1 ...
$ year : num 1966 1967 1968 1969 1970 ...
$ weeks_reporting: num 50 49 52 49 51 51 45 45 45 46 ...
$ count : num 321 291 314 380 413 378 342 467 244 286 ...
$ population : num 3345787 3364130 3386068 3412450 3444165 ...
disease. A factor containing disease names.
state. A factor containing state names.
year.
weeks_reporting. Number of weeks counts were reported that year.
count. Total number of reported cases.
population. State population, interpolated for non-census years.
> # I want to filter the top ten states in the United States by population in
> # 2011. Filter out data for 2011.
> states_2011 <- subset(disease[, c("state", "year", "population")], year == 2011)
> # Remove duplicate data.
> states_2011 <- unique(states_2011)
> # After sorting according to population, the top ten states are taken.
> states_2011 <- states_2011$state[order(states_2011$population, decreasing = T)][1:10] %>%
+ as.character()
> # Filter data for the top 10 populous states.
> disease <- disease[disease$state %in% states_2011, ]> # Cancel scientific notation.
> options(scipen=200)
> # Input dataset and aesthetic mapping.
> ggplot(disease, aes(x = year, y = count, color = disease)) +
+ # Create a scatter plot layer.
+ geom_point(size = 1.2) +
+ # Color palette
+ scale_color_lancet(name = "Disease") +
+ # Set the labels for the x-axis and y-axis.
+ scale_x_continuous(name = "Year",
+ breaks = seq(1930, 2010, 20),
+ labels = seq(1930, 2010, 20)) +
+ scale_y_continuous(name = "Total number of reported cases",
+ breaks = seq(0, 140000, 20000),
+ labels = seq(0, 140000, 20000)) +
+ # Faceting is carried out according to the state
+ facet_wrap(vars(state)) +
+ # Set the axis labels as well as the title
+ labs(x = "Year", y = "Total number of reported cases",
+ title = "Number of disease reports in the top 10 states with the US population") +
+ # Choose a theme style.
+ theme_classic() +
+ # Set the theme content.
+ # Adjust the font to a serif font.
+ theme(text = element_text(family = "serif"),
+ # Sets the alignment of the size of the title.
+ plot.title = element_text(size = 16, vjust = 0.5, hjust = 0.5),
+ # Sets the size and alignment of the axis titles.
+ axis.title = element_text(size = 12, vjust = 0.5, hjust = 0.5),
+ # Sets the angle of the x-axis text.
+ axis.text.x = element_text(angle = 30, vjust = 0.5, hjust = 0.5),
+ # Sets the position of the legend.
+ legend.position = "top",
+ # Set the legend title to bold, size 14, center-aligned.
+ legend.title = element_text(size = 14, face = "bold",
+ vjust = 0.5, hjust = 0.5))