install.packages("zoo")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(zoo) # For rolling averages
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Load the data
data <- read.csv("covid.csv")
# Preprocess data
data_filtered <- data %>%
filter(!is.na(excess_mortality_cumulative_per_million) &
!is.na(total_vaccinations_per_hundred)) %>%
mutate(date = base::as.Date(date)) # Use base::as.Date()
# Calculate rolling averages for smoother trends
data_filtered <- data_filtered %>%
mutate(excess_mortality_rolling = rollmean(excess_mortality_cumulative_per_million,
k = 7, fill = NA),
vaccination_rate_rolling = rollmean(total_vaccinations_per_hundred,
k = 7, fill = NA))
# Simple Linear Regression Model
model <- lm(excess_mortality_rolling ~ vaccination_rate_rolling,
data = data_filtered)
# View model summary
summary(model)
##
## Call:
## lm(formula = excess_mortality_rolling ~ vaccination_rate_rolling,
## data = data_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3095.7 -1287.0 -389.5 1103.8 7906.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2319.4318 52.6537 44.051 <2e-16 ***
## vaccination_rate_rolling -0.6710 0.3528 -1.902 0.0572 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1765 on 4346 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.0008317, Adjusted R-squared: 0.0006018
## F-statistic: 3.618 on 1 and 4346 DF, p-value: 0.05724
# Correlation analysis
correlation <- cor(data_filtered$excess_mortality_rolling,
data_filtered$vaccination_rate_rolling,
use = "complete.obs")
# Visualisation
ggplot(data_filtered, aes(x = date)) +
geom_line(aes(y = excess_mortality_rolling, color = "Excess Mortality")) +
geom_line(aes(y = vaccination_rate_rolling, color = "Vaccination Rate")) +
labs(title = "Excess Mortality and Vaccination Rate over Time",
x = "Date",
y = "Rate",
color = "") +
scale_y_continuous(limits = c(0, NA)) + # Ensure both lines are visible
theme_bw() +
annotate("text", x = max(data_filtered$date), y = 80,
label = paste0("Correlation: ", round(correlation, 2)))
