0.1 Load the “DSLabs” dataset.

> # library
> library(tidyverse)
> library(ggsci)
> # loading the data
> disease <- dslabs::us_contagious_diseases
> str(disease)
'data.frame':   16065 obs. of  6 variables:
 $ disease        : Factor w/ 7 levels "Hepatitis A",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ state          : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ year           : num  1966 1967 1968 1969 1970 ...
 $ weeks_reporting: num  50 49 52 49 51 51 45 45 45 46 ...
 $ count          : num  321 291 314 380 413 378 342 467 244 286 ...
 $ population     : num  3345787 3364130 3386068 3412450 3444165 ...

disease. A factor containing disease names.

state. A factor containing state names.

year.

weeks_reporting. Number of weeks counts were reported that year.

count. Total number of reported cases.

population. State population, interpolated for non-census years.

0.2 Filter the data

> # I want to filter the top ten states in the United States by population in
> # 2011.  Filter out data for 2011.
> states_2011 <- subset(disease[, c("state", "year", "population")], year == 2011)
> # Remove duplicate data.
> states_2011 <- unique(states_2011)
> # After sorting according to population, the top ten states are taken.
> states_2011 <- states_2011$state[order(states_2011$population, decreasing = T)][1:10] %>%
+     as.character()
> # Filter data for the top 10 populous states.
> disease <- disease[disease$state %in% states_2011, ]

0.3 Create a scatter plot

> # Cancel scientific notation.
> options(scipen=200)
> # Input dataset and aesthetic mapping.
> ggplot(disease, aes(x = year, y = count, color = disease)) + 
+ # Create a scatter plot layer.
+   geom_point(size = 1.2) + 
+ # Color palette
+   scale_color_lancet(name = "Disease") + 
+ # Set the labels for the x-axis and y-axis.
+   scale_x_continuous(name = "Year", 
+                breaks = seq(1930, 2010, 20), 
+                labels = seq(1930, 2010, 20)) + 
+   scale_y_continuous(name = "Total number of reported cases", 
+                breaks = seq(0, 140000, 20000), 
+                labels = seq(0, 140000, 20000)) + 
+ # Faceting is carried out according to the state
+   facet_wrap(vars(state)) + 
+ # Set the axis labels as well as the title
+   labs(x = "Year", y = "Total number of reported cases", 
+        title = "Number of disease reports in the top 10 states with the US population") + 
+ # Choose a theme style.
+   theme_classic() + 
+ # Set the theme content.
+ # Adjust the font to a serif font.
+   theme(text = element_text(family = "serif"),
+ # Sets the alignment of the size of the title.
+         plot.title = element_text(size = 16, vjust = 0.5, hjust = 0.5), 
+ # Sets the size and alignment of the axis titles.
+         axis.title = element_text(size = 12, vjust = 0.5, hjust = 0.5),
+ # Sets the angle of the x-axis text.
+         axis.text.x = element_text(angle = 30, vjust = 0.5, hjust = 0.5),
+ # Sets the position of the legend.
+         legend.position = "top",
+ # Set the legend title to bold, size 14, center-aligned.
+         legend.title = element_text(size = 14, face = "bold", 
+                                     vjust = 0.5, hjust = 0.5))