Introduction to data visualization in R

0. Getting started ——–

Libraries

library(tidyverse)
TRUE ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
TRUE ✔ dplyr     1.1.4     ✔ readr     2.1.5
TRUE ✔ forcats   1.0.0     ✔ stringr   1.5.1
TRUE ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
TRUE ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
TRUE ✔ purrr     1.0.2     
TRUE ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
TRUE ✖ dplyr::filter() masks stats::filter()
TRUE ✖ dplyr::lag()    masks stats::lag()
TRUE ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Create a data folder

dir.create("data")

Download example data

download.file("https://raw.githubusercontent.com/AMMnet/AMMnet-Hackathon/main/01_data-vis/data/mockdata_cases.csv", destfile = "data/mockdata_cases.csv")
download.file("https://raw.githubusercontent.com/AMMnet/AMMnet-Hackathon/main/01_data-vis/data/mosq_mock.csv", destfile = "data/mosq_mock.csv")

Load example data

malaria_data   <- read_csv("data/mockdata_cases.csv")
TRUE Rows: 514 Columns: 10
TRUE ── Column specification ────────────────────────────────────────────────────────
TRUE Delimiter: ","
TRUE chr (2): location, ages
TRUE dbl (8): month, year, total, positive, xcoord, ycoord, prev, time_order_loc
TRUE 
TRUE ℹ Use `spec()` to retrieve the full column specification for this data.
TRUE ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mosquito_data  <- read_csv("data/mosq_mock.csv")
TRUE Rows: 104 Columns: 19
TRUE ── Column specification ────────────────────────────────────────────────────────
TRUE Delimiter: ","
TRUE chr  (4): Village, Method, Location, hour
TRUE dbl (15): session, Compound.ID, ag.Male, Ag.unfed, Ag.halffed, Ag.fed, Ag.gr...
TRUE 
TRUE ℹ Use `spec()` to retrieve the full column specification for this data.
TRUE ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1. Characterizing our data ——–

#’ Before we start visualizing our data, we need to understand the #’ characteristics of our data. The goal is to get an idea of the #’ data structure and to understand the relationships between variables.

Explore the structure and summary of the datasets

dim(malaria_data)  
TRUE [1] 514  10
head(malaria_data)
summary(malaria_data)
TRUE    location             month             year          ages          
TRUE  Length:514         Min.   : 1.000   Min.   :2018   Length:514        
TRUE  Class :character   1st Qu.: 4.000   1st Qu.:2018   Class :character  
TRUE  Mode  :character   Median : 7.000   Median :2019   Mode  :character  
TRUE                     Mean   : 6.486   Mean   :2019                     
TRUE                     3rd Qu.: 9.000   3rd Qu.:2020                     
TRUE                     Max.   :12.000   Max.   :2020                     
TRUE      total          positive          xcoord           ycoord     
TRUE  Min.   : 20.0   Min.   : -1.00   Min.   :-21.84   Min.   :28.52  
TRUE  1st Qu.: 46.0   1st Qu.: 14.00   1st Qu.:-20.39   1st Qu.:29.64  
TRUE  Median :103.0   Median : 33.00   Median :-20.06   Median :29.99  
TRUE  Mean   :141.5   Mean   : 47.81   Mean   :-20.04   Mean   :30.00  
TRUE  3rd Qu.:206.0   3rd Qu.: 67.00   3rd Qu.:-19.71   3rd Qu.:30.32  
TRUE  Max.   :611.0   Max.   :264.00   Max.   :-18.79   Max.   :31.81  
TRUE       prev          time_order_loc 
TRUE  Min.   :-0.04545   Min.   : 1.00  
TRUE  1st Qu.: 0.24615   1st Qu.: 9.00  
TRUE  Median : 0.33016   Median :18.00  
TRUE  Mean   : 0.31518   Mean   :17.65  
TRUE  3rd Qu.: 0.39024   3rd Qu.:26.00  
TRUE  Max.   : 0.53488   Max.   :35.00

Explore individual columns/variables

malaria_data$location          # values for a single column
TRUE   [1] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE   [6] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [11] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [16] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [21] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [26] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [31] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [36] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [41] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [46] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [51] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [56] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [61] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [66] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [71] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [76] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [81] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [86] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [91] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE  [96] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE [101] "mordor"     "mordor"     "mordor"     "mordor"     "mordor"    
TRUE [106] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [111] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [116] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [121] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [126] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [131] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [136] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [141] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [146] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [151] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [156] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [161] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [166] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [171] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [176] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [181] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [186] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [191] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [196] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [201] "narnia"     "narnia"     "narnia"     "narnia"     "narnia"    
TRUE [206] "narnia"     "narnia"     "narnia"     "narnia"     "neverwhere"
TRUE [211] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [216] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [221] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [226] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [231] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [236] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [241] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [246] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [251] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [256] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [261] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [266] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [271] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [276] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [281] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [286] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [291] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [296] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [301] "neverwhere" "neverwhere" "neverwhere" "neverwhere" "neverwhere"
TRUE [306] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [311] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [316] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [321] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [326] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [331] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [336] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [341] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [346] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [351] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [356] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [361] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [366] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [371] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [376] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [381] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [386] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [391] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [396] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [401] "oz"         "oz"         "oz"         "oz"         "oz"        
TRUE [406] "oz"         "oz"         "oz"         "oz"         "wonderland"
TRUE [411] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [416] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [421] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [426] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [431] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [436] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [441] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [446] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [451] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [456] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [461] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [466] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [471] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [476] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [481] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [486] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [491] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [496] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [501] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [506] "wonderland" "wonderland" "wonderland" "wonderland" "wonderland"
TRUE [511] "wonderland" "wonderland" "wonderland" "wonderland"
unique(malaria_data$location)  # unique values for a single column
TRUE [1] "mordor"     "narnia"     "neverwhere" "oz"         "wonderland"
table(malaria_data$location)   # frequencies for a single column
TRUE 
TRUE     mordor     narnia neverwhere         oz wonderland 
TRUE        105        104         96        104        105
table(malaria_data$location, malaria_data$ages)  # frequencies for multiple columns
TRUE             
TRUE              15_above 5_to_14 under_5
TRUE   mordor           35      35      35
TRUE   narnia           35      35      34
TRUE   neverwhere       32      32      32
TRUE   oz               35      35      34
TRUE   wonderland       35      35      35

Check for missing values in the data and in each column

sum(is.na(malaria_data))
TRUE [1] 0
colSums(is.na(malaria_data)) 
TRUE       location          month           year           ages          total 
TRUE              0              0              0              0              0 
TRUE       positive         xcoord         ycoord           prev time_order_loc 
TRUE              0              0              0              0              0

2. Exploratory Visualizations Using Base R Functions ————

#’ First, we will look at some exploratory data visualization #’ techniques using base R functions. The purpose of these plots #’ is to help us understand the relationships between variables and #’ characteristics of our data. They are useful for quickly exploring #’ the data and understanding the relationships, but they are not #’ are not great for sharing in scientific publications/presentations.

One variable comparison

Histograms

hist(malaria_data$prev)

hist(malaria_data$prev, 
     breaks = 10, 
     main = "Distribution of Malaria Prevalence",
     xlab = "Malaria Prevalence",
     ylab = "Frequency",
     col = "purple",
     border = "black")

Barplot

barplot(table(malaria_data$ages))

table(malaria_data$ages)
TRUE 
TRUE 15_above  5_to_14  under_5 
TRUE      172      172      170
barplot(table(malaria_data$location))

barplot(table(malaria_data$year))

Multiple variables

Scatterplot

plot(malaria_data$total, malaria_data$positive)

plot(malaria_data$month, malaria_data$prev)

plot_jan<-filter(malaria_data, month==1) #scatter plot for a single month of january
plot(plot_jan$month, plot_jan$prev)

Boxplots

boxplot(malaria_data$prev ~ malaria_data$month) 

boxplot(malaria_data$prev ~ malaria_data$location)

3. Data Visualization with ggplot2 ————

#’ ggplot2 is a popular visualization package for R. It provides #’ an easy-to-use interface for creating data visualizations. #’ The ggplot2 package is based on the “grammar of graphics” #’ and is a powerful way to create complex visualizations that #’ are useful for creating scientific and publication-quality #’ figures. #’ #’ The “grammar of graphics” used in ggplot2 is a set of rules that are #’ used to develop data visualizations using a layering approach. Layers #’ are added using the “+” operator. #’ # Components of a ggplot #’ There are three main components of a ggplot: #’ 1. The data: the dataset we want to visualize #’ 2. The aesthetics: the visual properties from the data used in the plot #’ 3. The geometries: the visual representations of the data (e.g., points, lines, bars)

The data

#’ All ggplot2 plots require a data frame as input. Just running this #’ line will produce a blank plot because we have stated which elements from #’ the data we want to visualize or how we want to visualize them.

ggplot(data = malaria_data) 

The aesthetics

#’ Next, we need to specify the visual properties of the plot that are determined #’ by the data. The aesthetics are specified using the aes() function. The output should #’ now produce a blank plot but with determined visual properties (e.g., axes labels).

ggplot(data = malaria_data, aes(x = total, y = positive))

The geometries

#’ Finally, we need to specify the visual representation of the data. The #’ geometries are specified using the geom_ function. There are many #’ different geometries that can be used in ggplot2. We will use geom_point #’ in this example and we will append it to the previous plot using the #’ “+” operator. The output should now produce a plot with the specified visual #’ representation of the data.

ggplot(data = malaria_data, aes(x = total, y = positive)) +
  geom_point()

Here are some examples of different geom functions

ggplot(data = malaria_data, aes(x = prev)) +
  geom_histogram(bins = 20)  # the "bins" argument specifies the number of bars

ggplot(data = malaria_data, aes(x = year)) +
  geom_bar(fill = "tomato")  # the "fill" argument specifies the color of the bars

ggplot(data = malaria_data, aes(x = location, y = prev)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2)  # geom_jitter adds jittered points to the plot, and the "alpha" argument specifies the transparency

ggplot(data = malaria_data, aes(x = location, y = prev)) +
  geom_violin() +  # Violin plot are similar to boxplots, but illustrate the distribution of the data
  geom_jitter(alpha = 0.2)

ggplot(data = malaria_data, aes(x = total, y = positive)) +
  geom_point() +
  geom_smooth(method = "lm")  # The smooth geom add a smoothed line to the plot, using the "lm" or other methods
TRUE `geom_smooth()` using formula = 'y ~ x'

#’ Expanding the aes() function #’ Addition visual properties, such as color, size, and shape, can be defined #’ from our input data using the aes() function. Here is an example of adding #’ color to a previous plot using the color aesthetic.

ggplot(data = malaria_data, aes(x = total, y = positive, color = location)) +
  geom_point()

#’ Note that this is different then defining a color directly within the geom_point, #’ which would only apply a single color to all points.

ggplot(data = malaria_data, aes(x = total, y = positive)) +
  geom_point(color = "tomato")

#’ When using the aes() function, the visual properties will be determined by a #’ variable in the dataset. This allows us to visualize relationships between #’ multiple variables at the same time.

ggplot(data = malaria_data, aes(x = prev, fill = ages)) +
  geom_histogram(color = "black")
TRUE `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = malaria_data, aes(x = prev, fill = ages)) +
  geom_histogram(color = "black", bins = 12)

ggplot(data = malaria_data, aes(x = location, y = prev, fill = location)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2)

ggplot(data = malaria_data, aes(x = location, y = prev, fill = location)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2,aes(color=location))

ggplot(data = malaria_data, aes(x = total, y = positive, color = location), alpha = 0.5) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)
TRUE `geom_smooth()` using formula = 'y ~ x'

ggplot(data = malaria_data, aes(x = xcoord, y = ycoord, color = location)) +
  geom_point(alpha = 0.5)

4. Customizing ggplot Graphics for Presentation and Communication ——–

#’ In this section, we will using additional features of ggplot2 to customize and #’ develop high-quality plots that can used in scientific publications and presentations.

Themes

#’ There are many different themes that can be used in ggplot2. #’ The “theme” function is used to specify the theme of the plot. There are many #’ preset theme functions, and further custom themes can be created using the #’ generic theme() function. #’ Typically you will want to set the theme at the end of your plot.

ggplot(data = malaria_data, aes(x = location, y = prev, fill = location)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2) +
  theme_classic()

ggplot(data = malaria_data, aes(x = location, y = prev, fill = location)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2) +
  theme_bw()

ggplot(data = malaria_data, aes(x = location, y = prev, fill = ages)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2) +
  theme_classic() +
  theme(legend.position = "bottom")

Labels

#’ Labels can be added to plots using the labs() function.

ggplot(data = malaria_data, aes(x = location, y = prev, fill = ages)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2) +
  labs(title = "Malaria prevalence by location and age group",
       subtitle = "Data from 2018 - 2020",
       x = "Location",
       y = "Prevalence",
       fill = "Age group") +
  theme_classic() +
  theme(legend.position = "bottom")

Custom color palettes

#’ There are many different color palettes that can be used in ggplot2. #’ The “scale_color” function is used to specify the color of the plot. There are many #’ preset color palettes, and further custom color palettes can be created using the #’ generic scale_color() function.

ggplot(data = malaria_data, aes(x = location, y = prev, fill = location)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2) +
  scale_fill_brewer(palette = "Set1")

We can also set our own colors

ggplot(data = malaria_data, aes(x = location, y = prev, fill = location)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2) +
  scale_fill_manual(values = c("#C6E0FF", "#136F63", "#E0CA3C", "#F34213", "#3E2F5B"))

We can also use custom color palettes for continuous variables

ggplot(data = malaria_data, aes(x = total, y = positive, color = prev)) +
  geom_point() +
  scale_color_gradient(low = "blue", high = "red")

ggplot(data = malaria_data, aes(x = total, y = positive, color = prev)) +
  geom_point() +
  scale_color_viridis_c(option = "magma")  # use viridis package to create custom color palettes

Facets

#’ Facets are a powerful feature of ggplot2 that allow us to create multiple plots #’ based on a single variable. This “small multiple” approach is another effective #’ way to visualize relationships between mutliple variables.

ggplot(data = malaria_data, aes(x = total, y = positive, color = prev)) +
  geom_point() +
  scale_color_viridis_c(option = "magma") +
  facet_wrap(~ location)

ggplot(data = malaria_data, aes(x = location, y = prev, fill = location)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2) +
  facet_wrap(~ ages) +
  coord_flip() +  # flips the x and y axes
  scale_fill_manual(values = c("#C6E0FF", "#136F63", "#E0CA3C", "#F34213", "#3E2F5B")) +
  labs(title = "Malaria prevalence by location and age group",
       subtitle = "Data from 2018 - 2020",
       x = "Location",
       y = "Prevalence",
       fill = "Age group") +
  theme_classic()

ggplot(data = malaria_data, aes(x = prev, fill = ages)) +
  geom_histogram(bins = 10) +
  scale_fill_viridis_d() +
  facet_grid(year ~ .)

Exporting plots

#’ ggplot2 can be exported to a variety of formats using the ggsave() function. #’ You can specify which plot to export by saving in an object and then calling the #’ object in the ggsave() function, otherwise ggsave() will save the current/last plot. #’ The width and height of the output image using the width and height can be set using #’ the width and height arguments, and the resolution of the image using the dpi argument. #’ #’ The file type can be set using the format argument, or by using a specific file extension. #’ I recommend using informative names for the output file.

ggplot(data = malaria_data, aes(x = location, y = prev, fill = location)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2) +
  facet_wrap(~ ages) +
  coord_flip() +  # flips the x and y axes
  scale_fill_manual(values = c("#C6E0FF", "#136F63", "#E0CA3C", "#F34213", "#3E2F5B")) +
  labs(title = "Malaria prevalence by location and age group",
       subtitle = "Data from 2018 - 2020",
       x = "Location",
       y = "Prevalence",
       fill = "Age group") +
  theme_classic()

ggsave("malaria-prevalence-age-boxplot.png", width = 10, height = 6, dpi = 300)

5. Something to try————————-

#’ CHALLENGE 1: Create a figure showing how the Anopheles gambiae total counts #’ vary each day and by location.