2025-03-16

Data Description

Data Source - World Health Organisation -> https://data.who.int/dashboards/covid19/data

The data set contains COVID-19 vaccine data for 215 countries and territories accepted by the World Health Organization. It contains 16 fields of information, such as country, WHO region, vaccination rates, and numbers per 100 individuals. Key columns include total vaccinations completed, total vaccinations administered, booster doses, and first doses. Most of the records are current up to late 2023. Some data, such as vaccines administered, are incomplete or missing. The data set allows us to know how vaccinations are occurring globally, reveals differences by region, and reveals overall trends. It provides valuable information on how countries responded during the pandemic by operating vaccination programs.

Total Vs Booster Doses

plot_data <- vacc_data %>%
  filter(!is.na(TOTAL_VACCINATIONS) & 
           !is.na(PERSONS_BOOSTER_ADD_DOSE)) %>%
  mutate(TOTAL_VACCINATIONS_MILLIONS = TOTAL_VACCINATIONS / 1e6,
         BOOSTER_DOSES_MILLIONS = PERSONS_BOOSTER_ADD_DOSE / 1e6)

p1 <- plot_ly(plot_data, 
              x = ~TOTAL_VACCINATIONS_MILLIONS, 
              y = ~BOOSTER_DOSES_MILLIONS,
        type = 'scatter', mode = 'markers', 
        text = ~paste(COUNTRY, "<br>Total:", 
                      TOTAL_VACCINATIONS_MILLIONS, "M"),
        hoverinfo = "text") %>%
  layout(title = "Total Vaccinations vs Booster Doses (Millions)",
         xaxis = list(title = "Total Vaccinations (Millions)"),
         yaxis = list(title = "Booster Doses (Millions)"))

PLOT 1

Vaccination Coverage by Region

p2 <- plot_ly(
  vacc_data %>% filter(!is.na(PERSONS_VACCINATED_1PLUS_DOSE_PER100)),
  x = ~PERSONS_VACCINATED_1PLUS_DOSE_PER100,
  y = ~PERSONS_BOOSTER_ADD_DOSE_PER100, 
  z = ~TOTAL_VACCINATIONS_PER100,
  type = "scatter3d",
  mode = "markers",
  color = ~WHO_REGION,
  size = 10,  
  text = ~paste(COUNTRY, "<br>Primary:", PERSONS_VACCINATED_1PLUS_DOSE_PER100, "%"),
  hoverinfo = "text"
) %>% 
  layout(
    scene = list(
      xaxis = list(title = "Primary Vaccination (%)"),
      yaxis = list(title = "Boosters (%)"),
      zaxis = list(title = "Doses/100"),
      camera = list(eye = list(x = -1.65, y = -1.65, z = 0.5)) 
    ),
    title = "3D View: Vaccination Coverage by Region"
  )

PLOT 2

Average Vaccination Rate by Month

plot_data <- vacc_data %>%
  mutate(update_date = as.Date(DATE_UPDATED),
         year_month = format(update_date, "%Y-%m")) %>%
  filter(!is.na(update_date)) %>%
  group_by(year_month) %>%
  summarise(avg_vacc_rate = mean(PERSONS_VACCINATED_1PLUS_DOSE_PER100, na.rm = TRUE),
            countries = n())

p3 <- plot_ly(plot_data) %>%
  add_lines(x = ~year_month, 
            y = ~avg_vacc_rate,
            text = ~paste("Month:", year_month,
                          "<br>Avg Rate:", round(avg_vacc_rate, 1), "%",
                          "<br>Countries:", countries),
            hoverinfo = "text") %>%
  layout(title = "Average Vaccination Rate by Update Month",
         xaxis = list(title = "Month of Data Update"),
         yaxis = list(title = "Vaccination Rate (%)"))

PLOT 3

Top 20 Countries by Vaccination Rate

top_20 <- vacc_data %>%
  arrange(desc(PERSONS_VACCINATED_1PLUS_DOSE_PER100)) %>%
  head(20) %>%
  mutate(COUNTRY = fct_reorder(COUNTRY, PERSONS_VACCINATED_1PLUS_DOSE_PER100))  

p4 <- ggplot(top_20, aes(x = COUNTRY, y = PERSONS_VACCINATED_1PLUS_DOSE_PER100)) +
  geom_bar(stat = "identity", fill = "#649ac6", width = 0.7) +
  geom_text(
    aes(label = paste0(PERSONS_VACCINATED_1PLUS_DOSE_PER100, "%")),
    hjust = -0.1, size = 3, color = "black"
  ) +
  labs(
    title = "Top 20 Countries by Vaccination Rate (Primary Doses)",
    x = NULL, 
    y = "Vaccination Rate (% of Population)"
  ) +
  coord_flip() +  
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.y = element_text(size = 9),
    panel.grid.major.y = element_blank(), 
    panel.grid.minor.y = element_blank()
  )

PLOT 4

Distribution of Vacc. Rates by WHO Region

plot_data <- vacc_data %>%
  filter( !is.na(PERSONS_VACCINATED_1PLUS_DOSE_PER100), !is.na(WHO_REGION),
    WHO_REGION %in% c("AFRO", "AMRO", "EMRO", "EURO", "SEARO", "WPRO")   
  ) %>%
  mutate(WHO_REGION = fct_reorder(WHO_REGION, 
                                  PERSONS_VACCINATED_1PLUS_DOSE_PER100, .fun = median)) 
p5 <- ggplot(plot_data, aes(x = WHO_REGION, 
                            y = PERSONS_VACCINATED_1PLUS_DOSE_PER100, fill = WHO_REGION)) +
  geom_boxplot(alpha = 0.8, outlier.shape = NA) + 
  geom_jitter(width = 0.2, size = 1.5, alpha = 0.4, color = "black") +  
  scale_fill_brewer(palette = "Set2") +  
  labs(
    title = "Distribution of Vaccination Rates by WHO Region",
    x = "WHO Region",
    y = "Primary Vaccination Rate (% of Population)",
    fill = "Region"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),  
    legend.position = "none"  
  )

PLOT 5

Statistics & T-Test

summary_stats <- vacc_data %>%
  summarise(
    Mean = round(mean(PERSONS_VACCINATED_1PLUS_DOSE_PER100, na.rm = TRUE), 2),
    Median = median(PERSONS_VACCINATED_1PLUS_DOSE_PER100, na.rm = TRUE),
    SD = round(sd(PERSONS_VACCINATED_1PLUS_DOSE_PER100, na.rm = TRUE), 2),
    IQR = IQR(PERSONS_VACCINATED_1PLUS_DOSE_PER100, na.rm = TRUE)
  )

t_test <- t.test(PERSONS_VACCINATED_1PLUS_DOSE_PER100 ~ WHO_REGION,
                 data = filter(vacc_data, WHO_REGION %in% c("EURO", "AFRO")))

percent_diff <- round(abs(diff(t_test$estimate)), 2)

Statistical Summary

Primary Vaccination Rate Statistics
Mean Median SD IQR
62.77 67 24.06 38
## **Welch Two Sample t-test: EURO vs AFRO**
## 
## Group Means: AFRO = 43.78%, EURO = 64.3%  
## t = -4.94, df = 86.15, p < 0.001  
## 95% CI = [-28.78, -12.26]
## 
## Interpretation: EURO vaccination rate is 20.52% higher than AFRO.  
## Statistically significant difference (p < 0.001). CI excludes 0.

Data Analyses Conclusion

This graph displays variations in COVID-19 vaccine rates globally. China administered more than 3 billion vaccinations, illustrating its robust initiative to vaccinate individuals as it had most of the chinese citizens vaccinated with both the doses as well as the booster dose. Meanwhile, Papua New Guinea had the lowest primary rate of vaccination at only 4 doses per 100 individuals, illustrating huge issues with access or delivery. European nations such as Germany and France averaged 43.62 boosters per 100, whereas African nations such as Nigeria and Ethiopia averaged considerably less at 12.03, illustrating irregular booster distribution. The 3D and box plots also demonstrated that Europe and Southeast Asia were the top performers, whereas Africa trailed behind. Europe’s mean primary vaccination was 20.52% more than Africa’s (p < 0.001) per a Welch t-test. The three performers—Puerto Rico, UAE, Brunei, and Tokelau —were nearly 100% primary vaccination, illustrating strong public health and outreach.