Replicating COVID time series lesson with malaria data

Dataset

You can download here{target = “_blank”} . From the study here{target = “_blank”}

All children admitted to Sussundenga District Hospital (SDH) are tested for malaria on admission, with a rapid diagnostic test (RDT) and microscopic tests for confirmation. These data were collated from 24 book records of the Pediatric Department of SDH from November 2018 to October 2020.

mal
## # A tibble: 20,939 × 4
##    date_pos_test area        sex   age        
##    <date>        <chr>       <chr> <chr>      
##  1 2020-01-22    25 De Junho M     6-11 meses 
##  2 2020-01-22    Chicueu     F     5-14 anos  
##  3 2020-01-22    Mussessa    M     5-14 anos  
##  4 2020-01-22    Nhamizara   M     12-23 meses
##  5 2020-01-22    Nhamizara   F     12-23 meses
##  6 2020-01-22    Unidade     F     5-14 anos  
##  7 2020-01-22    Bapua       F     12-23 meses
##  8 2020-01-22    Bapua       F     5-14 anos  
##  9 2020-01-22    7 De Abril  M     12-23 meses
## 10 2020-01-22    Nhanguzue   F     5-14 anos  
## # ℹ 20,929 more rows

From this we can plot time series on malaria notifications.

4 Going from linelist to notification counts per day

notif_count <-
  mal %>%
  count(date_pos_test, name = "cases") %>%
 # fill missing dates
  complete(date_pos_test = seq.Date(min(date_pos_test),
                                  max(date_pos_test), by = "day"),
         fill = list(cases = 0))

5 Constructing an Epicurve with ggplot2

5.1 Taking the First Step: A Basic Epicurve

# Create a basic epicurve using ggplot2
ggplot(notif_count, aes(x=date_pos_test,y=cases)) +
  geom_line()

# Would be better with a rolling 7 day average
notif_count <- notif_count %>% 
  mutate(rolling_cases = zoo::rollmean(cases, 7, fill = NA))

# New plot
ggplot(notif_count, aes(x=date_pos_test,y=rolling_cases)) +
  geom_line()
## Warning: Removed 6 rows containing missing values (`geom_line()`).

5.2 Enhancing the Visual: Crafting a Detailed Epicurve

# Refining the plot for clarity and appeal
ggplot(notif_count, aes(x=date_pos_test,y=rolling_cases)) +
 geom_line(color = "blue", linewidth = 1.2) +  # Modifying the line's aesthetics
 labs(title = "Pediatric Malaria notifications in Sussundenga",  # Setting a descriptive title
       x = "Timeline (Date)", y = "Cases (rolling 7 day average)") +    # Labeling the  axes comprehensively
 theme_light() +                   # Choosing a light,    distraction-free theme
 theme(plot.title = element_text(hjust = 0.5, face="bold"), # Enhancing the title's positioning and appearance
      axis.line = element_line(color = "grey"))  # Refining the appearance of the axis lines
## Warning: Removed 6 rows containing missing values (`geom_line()`).

5.3 Spotlight on a Specific Duration:

# Setting the Time Frame:
# Defining the start and end dates for the period of interest.
start_date <- as_date("2019-04-01")
end_date <- as_date("2019-06-30")

# Creating the Graph:
# We'll represent our data using a line graph and overlay it with a shaded region that represents the period of interest.
ggplot(notif_count, aes(x = date_pos_test, y = rolling_cases)) +
    geom_rect(aes(xmin=start_date, xmax=end_date, ymin=-Inf, ymax=Inf), 
              fill="yellow", alpha=0.2, inherit.aes=TRUE) +  # Shaded area for emphasis
    geom_line(color = "blue", size = 0.5) +  # Data line
    labs(title = "Pediatric Malaria notifications in Sussundenga",  # Setting a descriptive title
       x = "Timeline (Date)", y = "Cases (rolling 7 day average)") + 
    theme_minimal() 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 6 rows containing missing values (`geom_line()`).

5.4 Zooming in on specific regions

notif_count_per_area <-
  mal %>%
  group_by(area) %>% 
  count(date_pos_test, name = "cases") %>%
 # fill missing dates
  complete(date_pos_test = seq.Date(min(date_pos_test),
                                  max(date_pos_test), by = "day"),
         fill = list(cases = 0)) %>% 
  mutate(rolling_cases = zoo::rollmean(cases, 7, fill = NA))


# There are too many regions: 
mal %>% 
  count(area)
## # A tibble: 56 × 2
##    area                 n
##    <chr>            <int>
##  1 25 De Junho       2786
##  2 3 De Fevereiro     137
##  3 7 De Abril        1336
##  4 9 Congresso         12
##  5 Bapua              648
##  6 Binga                3
##  7 Bloco               30
##  8 Boa Vista           20
##  9 Bunga                1
## 10 Caminho De Ferro    15
## # ℹ 46 more rows
# Let's focus on the top 6 by case count
top_6_areas <- 
  mal %>% 
  count(area) %>% 
  top_n(wt = n, n = 6) 

top_6_areas
## # A tibble: 6 × 2
##   area            n
##   <chr>       <int>
## 1 25 De Junho  2786
## 2 7 De Abril   1336
## 3 Chicueu      3050
## 4 Nhamarenza   1912
## 5 Nhamizara    3717
## 6 Nhanguzue    1390
notif_count_per_area %>% 
  filter(area %in% top_6_areas$area) %>%
  ggplot(aes(x = date_pos_test, y = rolling_cases, color = area)) +  # Define the main plot aesthetics: x-axis as Date, y-axis as Infected, and color-coded by Country
  geom_line(size = 0.5) +  # Add a line for each country's infection numbers. Set the line width to 1 for readability.
    labs(title = "Pediatric Malaria notifications in Sussundenga by Neighbourhood",  # Setting a descriptive title
       x = "Timeline (Date)", y = "Cases (rolling 7 day average)") +  # Label for the y-axis   
  theme_minimal() +  # Apply a minimal theme for a clean look
  theme(plot.title = element_text(hjust = 0.5),  # Center the title of the plot
        axis.line = element_line(color = "black"),  # Set axis line color to black
        legend.position = "right")  # Position the legend to the right side of the plot
## Warning: Removed 36 rows containing missing values (`geom_line()`).

5.6 Highlighting a specific region:

# Assuming 'Chicueu' is the area of interest

# Establishing a color guide. Here we're using ifelse to set Chicueu to red and all other areas to grey.

notif_count_per_area %>% 
  filter(area %in% top_6_areas$area) %>%
  mutate(color = ifelse(area == "Chicueu", "red", "grey")) %>%
  filter(area %in% top_6_areas$area) %>%
  ggplot(aes(x = date_pos_test, y = rolling_cases, color = color)) +
  geom_line(size = 0.5) + 
  scale_color_identity() +  # Applying our custom color palette
  labs(title = "Highlighting Pediatric Malaria in Chicueu Amidst Top Areas", 
       x = "Timeline (Date)", y = "Cases (rolling 7 day average)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5),
        axis.line = element_line(color = "black"),
        legend.position = "right")
## Warning: Removed 36 rows containing missing values (`geom_line()`).

5.5 Fine-Tuning the Time Axis for Malaria Data

# Modifying the date axis for clarity in the malaria data visualization
notif_count_per_area %>%
  filter(area %in% top_6_areas$area) %>%
  ggplot(aes(x = date_pos_test, y = rolling_cases, color = area)) +
  geom_line() +
  labs(title = "Pediatric Malaria Notifications Over Time", 
       x = "Timeline (Date)", y = "Cases (rolling 7 day average)") +
  scale_x_date(date_breaks = '1 month', 
               date_labels = '%b %Y') +  # Adjusting to monthly breaks for better clarity
  theme_minimal() +                          
  theme(plot.title = element_text(hjust = 0.5),
        axis.line = element_line(color = "black"),
        axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels for readability
## Warning: Removed 36 rows containing missing values (`geom_line()`).

5.7 Facetting Epidemic Curves for Malaria Data

# Using facet_wrap to separate data by area
notif_count_per_area %>%
  filter(area %in% top_6_areas$area) %>%
  ggplot(aes(x = date_pos_test, y = rolling_cases, color = area)) +
  geom_line() +
  labs(title = "Comparing Pediatric Malaria Across Top Areas", 
       x = "Time Frame (Date)", y = "Cases (rolling 7 day average)") +
  scale_x_date(date_breaks = '3 months', 
               date_labels = '%b %Y') +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5),
        axis.line = element_line(color = "black"),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~area)  # Separate plots for each area
## Warning: Removed 36 rows containing missing values (`geom_line()`).

5.7 Cumulative Malaria Cases Visualization

# Computing the running total of cases for each area
notif_count_per_area_cumulative <- notif_count_per_area %>%
    group_by(area) %>%
    arrange(date_pos_test) %>%
    mutate(Cumulative_Cases = cumsum(cases))


# Crafting the visualization for cumulative cases
notif_count_per_area_cumulative %>%
  filter(area %in% top_6_areas$area) %>%
    ggplot(aes(x = date_pos_test, y = Cumulative_Cases, color = area)) +
    geom_line() +
    labs(title = "Cumulated Pediatric Malaria Cases Over Time", 
         x = "Timeline (Date)", y = "Aggregate Cases Up To Date") +
    scale_x_date(date_breaks = '1 month', 
                 date_labels = '%b %Y') +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0.5),
          axis.line = element_line(color = "black"),
          axis.text.x = element_text(angle = 45, hjust = 1))