install.packages("zoo")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(zoo) # For rolling averages
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# Load the data
data <- read.csv("covid.csv")

# Preprocess data
data_filtered <- data %>%
  filter(!is.na(excess_mortality_cumulative_per_million) & 
           !is.na(total_vaccinations_per_hundred)) %>%
  mutate(date = base::as.Date(date)) # Use base::as.Date()

# Calculate rolling averages for smoother trends
data_filtered <- data_filtered %>%
  mutate(excess_mortality_rolling = rollmean(excess_mortality_cumulative_per_million, 
                                             k = 7, fill = NA),
         vaccination_rate_rolling = rollmean(total_vaccinations_per_hundred, 
                                             k = 7, fill = NA))
# Simple Linear Regression Model
model <- lm(excess_mortality_rolling ~ vaccination_rate_rolling, 
            data = data_filtered)

# View model summary
summary(model)
## 
## Call:
## lm(formula = excess_mortality_rolling ~ vaccination_rate_rolling, 
##     data = data_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3095.7 -1287.0  -389.5  1103.8  7906.8 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2319.4318    52.6537  44.051   <2e-16 ***
## vaccination_rate_rolling   -0.6710     0.3528  -1.902   0.0572 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1765 on 4346 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.0008317,  Adjusted R-squared:  0.0006018 
## F-statistic: 3.618 on 1 and 4346 DF,  p-value: 0.05724
# Correlation analysis
correlation <- cor(data_filtered$excess_mortality_rolling, 
                   data_filtered$vaccination_rate_rolling,
                   use = "complete.obs")

# Visualisation
ggplot(data_filtered, aes(x = date)) + 
  geom_line(aes(y = excess_mortality_rolling, color = "Excess Mortality")) +
  geom_line(aes(y = vaccination_rate_rolling, color = "Vaccination Rate")) +
  labs(title = "Excess Mortality and Vaccination Rate over Time",
       x = "Date",
       y = "Rate",
       color = "") +
  scale_y_continuous(limits = c(0, NA)) + # Ensure both lines are visible
  theme_bw() + 
  annotate("text", x = max(data_filtered$date), y = 80,
           label = paste0("Correlation: ", round(correlation, 2)))