1 Working example SpillOver

-The code use several datasets EMPRES-I (FAO), ECDC Surveillance Atlas ECDC, Global Health observatory (WHO). Adagio. -Please feel free to analyse the code and run from your terminal (there is a folder with the code and data on: DG1 Disease occurrence > datasources > Spill-over > sources > spillover.Rmd)

2 Libraries

# Spill over analysis
# Update: 17/01/2024
library(tidyverse)
## Warning: package 'forcats' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(stringr)
library(readr)
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)
library(arsenal)
## Warning: package 'arsenal' was built under R version 4.3.3
## 
## Attaching package: 'arsenal'
## 
## The following object is masked from 'package:lubridate':
## 
##     is.Date

3 Working directory

setwd("C:/Users/alfredo.acosta/SVA/LiRA_consortium - Documents/WG1 Disease occurrence/datasources/Spill-over/sources/sc2")

4 Loading ECDC Surveillance atlas

# EGran
# soe <- read.csv(file="ECDC_surveillance_data_Echinococcosis.27.11.24.csv")
soe <- read.csv(file="ECDC_surveillance_data_Echinococcosis.granulosus.27.11.24.csv")
# Lyme
soe1 <- read.csv(file="ECDC_surveillance_data_Lyme_Neuroborreliosis.27.11.24.csv")
# Qfever
soe2 <- read.csv(file="ECDC_surveillance_data_Q_fever.27.11.24.csv")
# TBE
soe3 <- read.csv(file="ECDC_surveillance_data_Tick-borne_encephalitis.27.11.24.csv")
# Influenza
soe4 <- read.csv(file="ECDC_surveillance_data_Influenza.27.11.24.csv")

soe <- rbind(soe, soe1, soe2, soe3)
rm(soe1, soe2, soe3, soe4)

soe1 <- soe %>% 
        group_by(HealthTopic, RegionName,Time) %>%
        summarise(cases=sum(as.numeric(NumValue))) %>% 
        filter(cases != 0)
## `summarise()` has grouped output by 'HealthTopic', 'RegionName'. You can
## override using the `.groups` argument.
colnames(soe1) <- c("disease", "country", "time", "cases")

5 Descriptive

# Number of human cases
soe %>% 
  filter(!str_detect(RegionName, "EU")) %>% 
    group_by(HealthTopic) %>%
  summarise(cases=sum(NumValue)) %>% 
  filter(cases != 0)
## # A tibble: 4 × 2
##   HealthTopic             cases
##   <chr>                   <dbl>
## 1 Echinococcosis           7772
## 2 Lyme Neuroborreliosis    7220
## 3 Q fever                 15673
## 4 Tick-borne encephalitis 34633
# Number of countries
soe %>% 
  filter(!str_detect(RegionName, "EU")) %>% 
    group_by(HealthTopic) %>%
  summarise(countries=length(unique(RegionName)))
## # A tibble: 4 × 2
##   HealthTopic             countries
##   <chr>                       <int>
## 1 Echinococcosis                 30
## 2 Lyme Neuroborreliosis          19
## 3 Q fever                        30
## 4 Tick-borne encephalitis        30
table(soe$Time)
## 
## 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 
##   31   61   59   60   60   87   89   91   92   93   94  108  114  110  113  117 
## 2023 
##  118
table(soe$Time, soe$HealthTopic)
##       
##        Echinococcosis Lyme Neuroborreliosis Q fever Tick-borne encephalitis
##   2007             31                     0       0                       0
##   2008             31                     0      30                       0
##   2009             30                     0      29                       0
##   2010             30                     0      30                       0
##   2011             30                     0      30                       0
##   2012             31                     0      31                      25
##   2013             32                     0      31                      26
##   2014             32                     0      31                      28
##   2015             32                     0      32                      28
##   2016             31                     0      33                      29
##   2017             32                     0      33                      29
##   2018             31                    14      33                      30
##   2019             32                    19      33                      30
##   2020             31                    18      31                      30
##   2021             31                    19      32                      31
##   2022             31                    21      32                      33
##   2023             32                    21      32                      33

6 Loading Adagio database

# Reading ADAGIO database
adg <- read.csv(file= "Outbreaks and cases with labels.csv")
str(adg)
## 'data.frame':    3670 obs. of  9 variables:
##  $ Disease        : chr  "African swine fever virus" "African swine fever virus" "African swine fever virus" "African swine fever virus" ...
##  $ FunctionalGroup: chr  "Wildlife amplification" "Wildlife amplification" "Wildlife amplification" "Wildlife amplification" ...
##  $ Species        : chr  "Wild Boar" "Wild Boar" "Wild Boar" "Wild Boar" ...
##  $ Country        : chr  "Poland" "Poland" "Poland" "Poland" ...
##  $ NUTS2          : chr  "Warmińsko-Mazurskie" "Warmińsko-Mazurskie" "Warmińsko-Mazurskie" "Warmińsko-Mazurskie" ...
##  $ Year           : int  2019 2019 2019 2018 2019 2019 2019 2019 2019 2019 ...
##  $ Month          : chr  "November" "October" "February" "November" ...
##  $ SumCases       : int  168 97 102 93 38 71 105 130 53 58 ...
##  $ SumOutbreaks   : int  122 87 60 50 37 49 62 87 45 42 ...

7 Loading Human population from Adagio(FAO)

library(readxl)
adg_pop <- read_excel("FAO human population.xlsx")

8 Matrix with human population over the years

# Create matrix
db <- expand.grid(unique(adg_pop$Country), c(2003:2023), unique(soe1$disease))
colnames(db) <- c("country", "time", "disease")

# first index to  fill the human cases db & soe1
index <- match(paste(db$country, db$time, db$disease), paste(soe1$country, soe1$time, soe1$disease))
db$human_c <- soe1$cases[index]

# second index to pass the FAO Adagio human population
index <- match(paste(db$country, db$time), paste(adg_pop$Country, adg_pop$Year))

db$human_pop <- adg_pop$`Human population by country.Population in thousands`[index]
db$human_pop <- db$human_pop*1000

8.1 Human cases / 100.000 habitants using FAO

db$human_cases_hab <- db$human_c/db$human_pop*100000

9 Reading the UN regions

unregions <- read.csv("all.csv")
unregions$name[unregions$name == "United States of America"] <- "United States"
unregions$name[unregions$name == "United Kingdom of Great Britain and Northern Ireland"] <- "United Kingdom"

str(unregions)
## 'data.frame':    249 obs. of  11 variables:
##  $ name                    : chr  "Afghanistan" "Ã…land Islands" "Albania" "Algeria" ...
##  $ alpha.2                 : chr  "AF" "AX" "AL" "DZ" ...
##  $ alpha.3                 : chr  "AFG" "ALA" "ALB" "DZA" ...
##  $ country.code            : int  4 248 8 12 16 20 24 660 10 28 ...
##  $ iso_3166.2              : chr  "ISO 3166-2:AF" "ISO 3166-2:AX" "ISO 3166-2:AL" "ISO 3166-2:DZ" ...
##  $ region                  : chr  "Asia" "Europe" "Europe" "Africa" ...
##  $ sub.region              : chr  "Southern Asia" "Northern Europe" "Southern Europe" "Northern Africa" ...
##  $ intermediate.region     : chr  "" "" "" "" ...
##  $ region.code             : int  142 150 150 2 9 150 2 19 NA 19 ...
##  $ sub.region.code         : int  34 154 39 15 61 39 202 419 NA 419 ...
##  $ intermediate.region.code: int  NA NA NA NA NA NA 17 29 NA 29 ...
str(db)
## 'data.frame':    20160 obs. of  6 variables:
##  $ country        : Factor w/ 240 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ time           : int  2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 ...
##  $ disease        : Factor w/ 4 levels "Echinococcosis",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ human_c        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ human_pop      : num  22644000 3092000 32055000 57000 73000 ...
##  $ human_cases_hab: num  NA NA NA NA NA NA NA NA NA NA ...
##  - attr(*, "out.attrs")=List of 2
##   ..$ dim     : int [1:3] 240 21 4
##   ..$ dimnames:List of 3
##   .. ..$ Var1: chr [1:240] "Var1=Afghanistan" "Var1=Albania" "Var1=Algeria" "Var1=American Samoa" ...
##   .. ..$ Var2: chr [1:21] "Var2=2003" "Var2=2004" "Var2=2005" "Var2=2006" ...
##   .. ..$ Var3: chr [1:4] "Var3=Echinococcosis" "Var3=Lyme Neuroborreliosis" "Var3=Q fever" "Var3=Tick-borne encephalitis"
db$country <- as.character(db$country)
db <- data.frame(db)
db$unr <- unregions$sub.region[match(db$country, unregions$name)]

10 General view on rate x 100.000 hab

db %>% 
  filter(time > 2003) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, group=country))+
  geom_point()

db %>% 
  filter(time > 2003) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, group=country))+
  geom_point()+
  scale_y_log10()+
  facet_wrap(vars(disease), ncol = 1)

# General plots by country

# List of diseases
diseases <- unique(db$disease)

# Function to generate plots for a given disease
generate_plots <- function(disease_name) {
  # Filter data for the specific disease
  disease_data <- db %>% 
    filter(time > 2003, disease == disease_name, !is.na(human_cases_hab))
  
  # Check if disease_data is empty
  if (nrow(disease_data) == 0) {
    return(list(plot1 = NULL, plot2 = NULL, plot3 = NULL))
  }
  
  # Plot 1: Boxplot per country
  plot1 <- disease_data %>%
    ggplot(aes(human_cases_hab, color = country)) +
    geom_boxplot() +
    scale_x_log10() +
    facet_wrap(vars(country), ncol = 1) +
    theme_minimal() +
    theme(legend.position = "none") +
    xlab(paste(disease_name, "human cases / 100.000 habitants")) +
    ylab(NULL) +
    guides(y = "none")
  
  # Plot 2: Boxplot grouped by year
  plot2 <- disease_data %>%
    ggplot(aes(human_cases_hab, time, group = time)) +
    geom_boxplot() +
    scale_x_log10() +
    theme_minimal() +
    theme(legend.position = "none") +
    xlab(paste(disease_name, "human cases / 100.000 habitants (boxplot grouped by year)")) +
    ylab(NULL)
  
  # Plot 3: Time-series plot
  plot3 <- disease_data %>%
    ggplot(aes(time, human_cases_hab, color = country)) +
    geom_point() +
    geom_line(size = 1) +
    scale_y_log10() +
    facet_wrap(vars(country), ncol = 3) +
    theme_minimal() +
    theme(legend.position = "none") +
    xlab(paste(disease_name, "human cases / 100.000 habitants (by year-country)")) +
    ylab(NULL)
  
  # Return a list of plots
  list(plot1 = plot1, plot2 = plot2, plot3 = plot3)
}

# Generate all plots for all diseases
all_plots <- map(diseases, ~ generate_plots(.x))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
names(all_plots) <- diseases

all_plots
## $Echinococcosis
## $Echinococcosis$plot1

## 
## $Echinococcosis$plot2

## 
## $Echinococcosis$plot3
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## 
## 
## $`Lyme Neuroborreliosis`
## $`Lyme Neuroborreliosis`$plot1

## 
## $`Lyme Neuroborreliosis`$plot2

## 
## $`Lyme Neuroborreliosis`$plot3

## 
## 
## $`Q fever`
## $`Q fever`$plot1

## 
## $`Q fever`$plot2

## 
## $`Q fever`$plot3
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## 
## 
## $`Tick-borne encephalitis`
## $`Tick-borne encephalitis`$plot1

## 
## $`Tick-borne encephalitis`$plot2

## 
## $`Tick-borne encephalitis`$plot3
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Access plots for specific diseases
# Qfever_plots <- all_plots[["Q fever"]]
# EGran_plots <- all_plots[["Echinococcosis"]]
# Lyme_plots <- all_plots[["Lyme Neuroborreliosis"]]
# TBE_plots <- all_plots[["Tick-borne encephalitis"]]

# Display specific plots
# Qfever_plots$plot1
# Qfever_plots$plot2
# Qfever_plots$plot3

# EGran_plots$plot1
# Lyme_plots$plot2
# TBE_plots$plot3

11 General view of diseases spread

db %>% 
  filter(time > 2003) %>% 
  # filter(disease == "Rabies") %>% 
  # filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time, color=disease))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(disease), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("human cases/100.000 habitants (log scaled) country rates")

db %>% 
  filter(time > 2003 & !is.na(human_cases_hab & !is.na(disease))) %>% 
  ggplot(aes(human_cases_hab, unr, group=unr, color=disease))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(disease), scales="free", ncol = 1)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("human cases/100.000 habitants (log scaled) country rates ")

12 Calculating cut points and levels by disease

level <- db %>% 
  filter(time > 2003) %>%
  # filter(!is.na(unr)) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  group_by(disease) %>% 
  mutate(name=cut(human_cases_hab, breaks = 3, labels = c("low","medium","high"))) %>% 
  mutate(ranges=cut(human_cases_hab, breaks = 3)) %>% 
  select(-unr)


level2 <- level %>%  
  group_by(disease,name,ranges) %>% 
  dplyr::summarize(mean_point=median(human_cases_hab),
                   human_cases_hab_unr=(sum(human_c)/sum(human_pop*100000)),
                   mean2=mean(human_cases_hab_unr))
## `summarise()` has grouped output by 'disease', 'name'. You can override using
## the `.groups` argument.

13 Using UN regions

diseases <- unique(db$disease)

generate_plots_unr <- function(disease_name) {
  
  disease_data <- db %>% 
    filter(time > 2003, disease == disease_name, !is.na(human_cases_hab))

  #boxplot by unr 
p1 <- disease_data %>% 
  ggplot(aes(human_cases_hab, color=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(unr), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab(paste(disease_name, "human cases / 100.000 habitants"))+
  ylab(NULL) + 
  guides(y="none")

# boxplot by year
p2 <- disease_data %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab(paste(disease_name, "human cases / 100.000 habitants"))+
  ylab(NULL)

# Time series plot
p3 <- disease_data %>% 
  ggplot(aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(unr), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab(paste(disease_name, "human cases / 100.000 habitants (by year-unr)"))

# Return a list or plots
  list(p1 = p1, p2 = p2, p3 = p3)
}

# Generate all plots for all diseases
all_plots2 <- map(diseases, generate_plots_unr)

all_plots2
## [[1]]
## [[1]]$p1

## 
## [[1]]$p2

## 
## [[1]]$p3
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## 
## 
## [[2]]
## [[2]]$p1

## 
## [[2]]$p2

## 
## [[2]]$p3

## 
## 
## [[3]]
## [[3]]$p1

## 
## [[3]]$p2

## 
## [[3]]$p3

## 
## 
## [[4]]
## [[4]]$p1

## 
## [[4]]$p2

## 
## [[4]]$p3

# Access plots for a specific disease, e.g., "Q fever"

# Qfever_plots_unr<- all_plots2[[which(diseases == "Q fever")]]
# EGran_plots_unr <- all_plots2[[which(diseases == "Echinococcosis")]]
# Lyme_plots_unr <- all_plots2[[which(diseases == "Lyme Neuroborreliosis")]]
# TBE_plots_unr <- all_plots2[[which(diseases == "Tick-borne encephalitis")]]

14 Calculating cut points and levels by disease and UNregions

UNRegion_db_unr <- db %>% 
  group_by(disease, unr) %>% 
  filter(time > 2003) %>% 
  filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>%
  summarise(human_cases_hab_unr=(sum(human_c)/sum(human_pop)*100000)) %>% 
  mutate(name=cut(human_cases_hab_unr, breaks = 3, labels = c("low","medium","high"))) %>% 
  mutate(range=cut(human_cases_hab_unr, breaks = 3))
## `summarise()` has grouped output by 'disease'. You can override using the
## `.groups` argument.
UNRegion_db_unr
## # A tibble: 16 × 5
## # Groups:   disease [4]
##    disease                 unr             human_cases_hab_unr name   range     
##    <fct>                   <chr>                         <dbl> <fct>  <fct>     
##  1 Echinococcosis          Eastern Europe               0.429  high   (0.303,0.…
##  2 Echinococcosis          Northern Europe              0.0521 low    (0.0517,0…
##  3 Echinococcosis          Southern Europe              0.0720 low    (0.0517,0…
##  4 Echinococcosis          Western Europe               0.0916 low    (0.0517,0…
##  5 Lyme Neuroborreliosis   Eastern Europe               0.921  high   (0.751,1.…
##  6 Lyme Neuroborreliosis   Northern Europe              1.07   high   (0.751,1.…
##  7 Lyme Neuroborreliosis   Southern Europe              0.117  low    (0.116,0.…
##  8 Lyme Neuroborreliosis   Western Europe               0.243  low    (0.116,0.…
##  9 Q fever                 Eastern Europe               0.181  medium (0.119,0.…
## 10 Q fever                 Northern Europe              0.0532 low    (0.053,0.…
## 11 Q fever                 Southern Europe              0.250  high   (0.184,0.…
## 12 Q fever                 Western Europe               0.227  high   (0.184,0.…
## 13 Tick-borne encephalitis Eastern Europe               1.22   medium (0.943,1.…
## 14 Tick-borne encephalitis Northern Europe              2.27   high   (1.61,2.2…
## 15 Tick-borne encephalitis Southern Europe              0.279  low    (0.277,0.…
## 16 Tick-borne encephalitis Western Europe               0.346  low    (0.277,0.…
UNRegion_db <- db %>% 
  group_by(disease) %>% 
  filter(time > 2003) %>% 
  filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>%
  summarise(human_cases_hab_unr=(sum(human_c)/sum(human_pop)*100000)) %>% 
  mutate(name=cut(human_cases_hab_unr, breaks = 3, labels = c("low","medium","high"))) %>% 
  mutate(range=cut(human_cases_hab_unr, breaks = 3))
UNRegion_db
## # A tibble: 4 × 4
##   disease                 human_cases_hab_unr name  range        
##   <fct>                                 <dbl> <fct> <fct>        
## 1 Echinococcosis                        0.151 low   (0.151,0.367]
## 2 Lyme Neuroborreliosis                 0.692 high  (0.583,0.799]
## 3 Q fever                               0.192 low   (0.151,0.367]
## 4 Tick-borne encephalitis               0.799 high  (0.583,0.799]

15 General cut points by disease

db %>% 
  filter(time > 2003) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=disease, group=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(disease, unr))+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("human cases / 100.000 habitants (agregated by UNRegions and years)")

16 Lira table median 95 CI

lira_table_sp <- db %>%
  filter(time > 2003) %>%
  # filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>%
  group_by(disease, UNregions=unr) %>%
  summarise(
    human_cases=sum(human_c),
    human_pop=sum(human_pop, na.rm = TRUE),
    human_rate_median=round(median(human_cases_hab), 3),
    CI05 = round(quantile(human_cases_hab, probs = 0.025, na.rm = TRUE), 3),
    CI95 = round(quantile(human_cases_hab, probs = 0.975, na.rm = TRUE) ,3)) %>%
  mutate(level=cut(human_rate_median, breaks = 3, labels = c("low","medium","high"))) %>% 
  arrange(disease, desc(human_rate_median), UNregions)
## `summarise()` has grouped output by 'disease'. You can override using the
## `.groups` argument.
datatable(lira_table_sp)
# lira_table_sp
# write.csv(lira_table_sp, file="lira_table_sp.csv")

#Graph

library(ggplot2)

# Visualization
ggplot(lira_table_sp, aes(x = reorder(UNregions, -human_rate_median), y = human_rate_median, color = disease)) +
  geom_point(size = 3, position = position_dodge(width = 0.1)) +  # Points for median
  geom_errorbar(aes(ymin = CI05, ymax = CI95), width = 0.2, position = position_dodge(width = 0.5)) +  # Error bars for CI
  facet_wrap(vars(disease), scales = "free_y") +  # Separate panels for each disease
  coord_flip() +  # Flip axes for better readability
  theme_minimal() +
  labs(
    title = "Median Human Cases and 95% CI by Disease and UN Region",
    x = "UN Region",
    y = "Human Cases / 100,000 habitants",
    color = "Disease"
  ) +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

17 Graph

ggplot(lira_table_sp, aes(x = level, y = human_rate_median, fill = level)) +
  geom_boxplot(outlier.shape = NA, alpha = 0.8) +  # Boxplot for distribution
  geom_jitter(aes(color = level), width = 0.2, size = 2, alpha = 0.6) +  # Add points for individual medians
  facet_wrap(vars(disease), scales = "free_y") +  # Separate panels for each disease
  theme_minimal() +
  labs(
    title = "Distribution of Human Rate Median by Level (High, Medium, Low)",
    x = "Level",
    y = "Human Cases / 100,000 habitants",
    fill = "Level",
    color = "Level"
  ) +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.spacing = unit(1, "lines")
  )

# Credits Acosta, Alfredo PhD1. SVA1: SVA http://www.sva.se/.