# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# Load and preprocess data
data <- read.csv("covid.csv") %>%
  mutate(date = as.Date(date),
         excess_deaths = excess_mortality) %>%
  filter(aged_65_older > 0)

# Calculate vaccination rate (ensure it's between 0 and 1)
vaccination_rate <- pmax(0, pmin(1, data$people_vaccinated_per_hundred / 100))

# Correlation analysis
correlation <- cor(vaccination_rate, data$excess_deaths)

# Scatter plot
ggplot(data, aes(x = vaccination_rate * 100, y = excess_deaths)) +
  geom_point() +
  labs(title = "Vaccination Rate vs. Excess Deaths (Over 65)",
       x = "Vaccination Rate (%)",
       y = "Excess Deaths") +
  annotate("text", x = max(vaccination_rate * 100) - 5, y = max(data$excess_deaths) - 0.2,
           label = paste0("Correlation:", round(correlation, 3)), hjust = 1) +
  scale_color_manual(values = c("#CC0033", "#000000"))  
## Warning: Removed 280619 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text()`).

# Time series plot with k argument in rollmean
data <- data %>%
  mutate(smoothed_vaccination_rate = zoo::rollmean(people_vaccinated_per_hundred, k = 7, fill = NA) / 100)

ggplot(data, aes(x = date)) +
  geom_line(aes(y = excess_deaths, color = "Excess Deaths")) +
  geom_line(aes(y = smoothed_vaccination_rate, color = "Smoothed Vaccination Rate")) +
  labs(title = "Excess Deaths and Smoothed Vaccination Rate (Over 65) Over Time",
       x = "Date",
       y = "Value") +
  scale_color_manual(name = "Variable", values = c("Excess Deaths" = "#CC0033", "Smoothed Vaccination Rate" = "#000000"))
## Warning: Removed 4313 rows containing missing values (`geom_line()`).
## Warning: Removed 64520 rows containing missing values (`geom_line()`).

# Correlation analysis
correlation <- cor(vaccination_rate, data$excess_deaths, use = "complete.obs")

# Print correlation if it's not NA
if (!is.na(correlation)) {
  print(paste0("Correlation between vaccination rate and excess deaths:", round(correlation, 3)))
} else {
  print("Correlation could not be calculated due to missing values.")
}
## [1] "Correlation between vaccination rate and excess deaths:-0.24"