# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Load and preprocess data
data <- read.csv("covid.csv") %>%
mutate(date = as.Date(date),
excess_deaths = excess_mortality) %>%
filter(aged_65_older > 0)
# Calculate vaccination rate (ensure it's between 0 and 1)
vaccination_rate <- pmax(0, pmin(1, data$people_vaccinated_per_hundred / 100))
# Correlation analysis
correlation <- cor(vaccination_rate, data$excess_deaths)
# Scatter plot
ggplot(data, aes(x = vaccination_rate * 100, y = excess_deaths)) +
geom_point() +
labs(title = "Vaccination Rate vs. Excess Deaths (Over 65)",
x = "Vaccination Rate (%)",
y = "Excess Deaths") +
annotate("text", x = max(vaccination_rate * 100) - 5, y = max(data$excess_deaths) - 0.2,
label = paste0("Correlation:", round(correlation, 3)), hjust = 1) +
scale_color_manual(values = c("#CC0033", "#000000"))
## Warning: Removed 280619 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text()`).

# Time series plot with k argument in rollmean
data <- data %>%
mutate(smoothed_vaccination_rate = zoo::rollmean(people_vaccinated_per_hundred, k = 7, fill = NA) / 100)
ggplot(data, aes(x = date)) +
geom_line(aes(y = excess_deaths, color = "Excess Deaths")) +
geom_line(aes(y = smoothed_vaccination_rate, color = "Smoothed Vaccination Rate")) +
labs(title = "Excess Deaths and Smoothed Vaccination Rate (Over 65) Over Time",
x = "Date",
y = "Value") +
scale_color_manual(name = "Variable", values = c("Excess Deaths" = "#CC0033", "Smoothed Vaccination Rate" = "#000000"))
## Warning: Removed 4313 rows containing missing values (`geom_line()`).
## Warning: Removed 64520 rows containing missing values (`geom_line()`).

# Correlation analysis
correlation <- cor(vaccination_rate, data$excess_deaths, use = "complete.obs")
# Print correlation if it's not NA
if (!is.na(correlation)) {
print(paste0("Correlation between vaccination rate and excess deaths:", round(correlation, 3)))
} else {
print("Correlation could not be calculated due to missing values.")
}
## [1] "Correlation between vaccination rate and excess deaths:-0.24"