Replicating COVID time series lesson with malaria data
Dataset
You can download here{target = “_blank”} . From the study here{target = “_blank”}
All children admitted to Sussundenga District Hospital (SDH) are tested for malaria on admission, with a rapid diagnostic test (RDT) and microscopic tests for confirmation. These data were collated from 24 book records of the Pediatric Department of SDH from November 2018 to October 2020.
## # A tibble: 20,939 × 4
## date_pos_test area sex age
## <date> <chr> <chr> <chr>
## 1 2020-01-22 25 De Junho M 6-11 meses
## 2 2020-01-22 Chicueu F 5-14 anos
## 3 2020-01-22 Mussessa M 5-14 anos
## 4 2020-01-22 Nhamizara M 12-23 meses
## 5 2020-01-22 Nhamizara F 12-23 meses
## 6 2020-01-22 Unidade F 5-14 anos
## 7 2020-01-22 Bapua F 12-23 meses
## 8 2020-01-22 Bapua F 5-14 anos
## 9 2020-01-22 7 De Abril M 12-23 meses
## 10 2020-01-22 Nhanguzue F 5-14 anos
## # ℹ 20,929 more rows
From this we can plot time series on malaria notifications.
4 Going from linelist to notification counts per day
5 Constructing an Epicurve with ggplot2
5.1 Taking the First Step: A Basic Epicurve
# Create a basic epicurve using ggplot2
ggplot(notif_count, aes(x=date_pos_test,y=cases)) +
geom_line()# Would be better with a rolling 7 day average
notif_count <- notif_count %>%
mutate(rolling_cases = zoo::rollmean(cases, 7, fill = NA))
# New plot
ggplot(notif_count, aes(x=date_pos_test,y=rolling_cases)) +
geom_line()## Warning: Removed 6 rows containing missing values (`geom_line()`).
5.2 Enhancing the Visual: Crafting a Detailed Epicurve
# Refining the plot for clarity and appeal
ggplot(notif_count, aes(x=date_pos_test,y=rolling_cases)) +
geom_line(color = "blue", linewidth = 1.2) + # Modifying the line's aesthetics
labs(title = "Pediatric Malaria notifications in Sussundenga", # Setting a descriptive title
x = "Timeline (Date)", y = "Cases (rolling 7 day average)") + # Labeling the axes comprehensively
theme_light() + # Choosing a light, distraction-free theme
theme(plot.title = element_text(hjust = 0.5, face="bold"), # Enhancing the title's positioning and appearance
axis.line = element_line(color = "grey")) # Refining the appearance of the axis lines## Warning: Removed 6 rows containing missing values (`geom_line()`).
5.3 Spotlight on a Specific Duration:
# Setting the Time Frame:
# Defining the start and end dates for the period of interest.
start_date <- as_date("2019-04-01")
end_date <- as_date("2019-06-30")
# Creating the Graph:
# We'll represent our data using a line graph and overlay it with a shaded region that represents the period of interest.
ggplot(notif_count, aes(x = date_pos_test, y = rolling_cases)) +
geom_rect(aes(xmin=start_date, xmax=end_date, ymin=-Inf, ymax=Inf),
fill="yellow", alpha=0.2, inherit.aes=TRUE) + # Shaded area for emphasis
geom_line(color = "blue", size = 0.5) + # Data line
labs(title = "Pediatric Malaria notifications in Sussundenga", # Setting a descriptive title
x = "Timeline (Date)", y = "Cases (rolling 7 day average)") +
theme_minimal() ## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 6 rows containing missing values (`geom_line()`).
5.4 Zooming in on specific regions
notif_count_per_area <-
mal %>%
group_by(area) %>%
count(date_pos_test, name = "cases") %>%
# fill missing dates
complete(date_pos_test = seq.Date(min(date_pos_test),
max(date_pos_test), by = "day"),
fill = list(cases = 0)) %>%
mutate(rolling_cases = zoo::rollmean(cases, 7, fill = NA))
# There are too many regions:
mal %>%
count(area)## # A tibble: 56 × 2
## area n
## <chr> <int>
## 1 25 De Junho 2786
## 2 3 De Fevereiro 137
## 3 7 De Abril 1336
## 4 9 Congresso 12
## 5 Bapua 648
## 6 Binga 3
## 7 Bloco 30
## 8 Boa Vista 20
## 9 Bunga 1
## 10 Caminho De Ferro 15
## # ℹ 46 more rows
# Let's focus on the top 6 by case count
top_6_areas <-
mal %>%
count(area) %>%
top_n(wt = n, n = 6)
top_6_areas## # A tibble: 6 × 2
## area n
## <chr> <int>
## 1 25 De Junho 2786
## 2 7 De Abril 1336
## 3 Chicueu 3050
## 4 Nhamarenza 1912
## 5 Nhamizara 3717
## 6 Nhanguzue 1390
notif_count_per_area %>%
filter(area %in% top_6_areas$area) %>%
ggplot(aes(x = date_pos_test, y = rolling_cases, color = area)) + # Define the main plot aesthetics: x-axis as Date, y-axis as Infected, and color-coded by Country
geom_line(size = 0.5) + # Add a line for each country's infection numbers. Set the line width to 1 for readability.
labs(title = "Pediatric Malaria notifications in Sussundenga by Neighbourhood", # Setting a descriptive title
x = "Timeline (Date)", y = "Cases (rolling 7 day average)") + # Label for the y-axis
theme_minimal() + # Apply a minimal theme for a clean look
theme(plot.title = element_text(hjust = 0.5), # Center the title of the plot
axis.line = element_line(color = "black"), # Set axis line color to black
legend.position = "right") # Position the legend to the right side of the plot## Warning: Removed 36 rows containing missing values (`geom_line()`).
5.6 Highlighting a specific region:
# Assuming 'Chicueu' is the area of interest
# Establishing a color guide. Here we're using ifelse to set Chicueu to red and all other areas to grey.
notif_count_per_area %>%
filter(area %in% top_6_areas$area) %>%
mutate(color = ifelse(area == "Chicueu", "red", "grey")) %>%
filter(area %in% top_6_areas$area) %>%
ggplot(aes(x = date_pos_test, y = rolling_cases, color = color)) +
geom_line(size = 0.5) +
scale_color_identity() + # Applying our custom color palette
labs(title = "Highlighting Pediatric Malaria in Chicueu Amidst Top Areas",
x = "Timeline (Date)", y = "Cases (rolling 7 day average)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
axis.line = element_line(color = "black"),
legend.position = "right")## Warning: Removed 36 rows containing missing values (`geom_line()`).
5.5 Fine-Tuning the Time Axis for Malaria Data
# Modifying the date axis for clarity in the malaria data visualization
notif_count_per_area %>%
filter(area %in% top_6_areas$area) %>%
ggplot(aes(x = date_pos_test, y = rolling_cases, color = area)) +
geom_line() +
labs(title = "Pediatric Malaria Notifications Over Time",
x = "Timeline (Date)", y = "Cases (rolling 7 day average)") +
scale_x_date(date_breaks = '1 month',
date_labels = '%b %Y') + # Adjusting to monthly breaks for better clarity
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
axis.line = element_line(color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability## Warning: Removed 36 rows containing missing values (`geom_line()`).
5.7 Facetting Epidemic Curves for Malaria Data
# Using facet_wrap to separate data by area
notif_count_per_area %>%
filter(area %in% top_6_areas$area) %>%
ggplot(aes(x = date_pos_test, y = rolling_cases, color = area)) +
geom_line() +
labs(title = "Comparing Pediatric Malaria Across Top Areas",
x = "Time Frame (Date)", y = "Cases (rolling 7 day average)") +
scale_x_date(date_breaks = '3 months',
date_labels = '%b %Y') +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
axis.line = element_line(color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~area) # Separate plots for each area## Warning: Removed 36 rows containing missing values (`geom_line()`).
5.7 Cumulative Malaria Cases Visualization
# Computing the running total of cases for each area
notif_count_per_area_cumulative <- notif_count_per_area %>%
group_by(area) %>%
arrange(date_pos_test) %>%
mutate(Cumulative_Cases = cumsum(cases))
# Crafting the visualization for cumulative cases
notif_count_per_area_cumulative %>%
filter(area %in% top_6_areas$area) %>%
ggplot(aes(x = date_pos_test, y = Cumulative_Cases, color = area)) +
geom_line() +
labs(title = "Cumulated Pediatric Malaria Cases Over Time",
x = "Timeline (Date)", y = "Aggregate Cases Up To Date") +
scale_x_date(date_breaks = '1 month',
date_labels = '%b %Y') +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
axis.line = element_line(color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1))