4 Plots from the data covid.csv for the period 2020-2024.
# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.4.4 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
# Load the data
data <- read.csv("covid.csv")
# Convert date column to Date format
data$date <- as.Date(data$date)
# Calculate excess deaths
data <- data %>%
mutate(excess_deaths = excess_mortality)
# Check for missing values
missing_values <- sum(is.na(data$excess_deaths))
cat("Number of missing values in excess_deaths column:", missing_values, "\n")
## Number of missing values in excess_deaths column: 360589
# Explore non-finite values
non_finite_values <- sum(!is.finite(data$excess_deaths))
cat("Number of non-finite values in excess_deaths column:", non_finite_values, "\n")
## Number of non-finite values in excess_deaths column: 360589
# Summarize the data
summary(data$excess_deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -95.9 -1.5 5.6 11.0 15.7 377.7 360589
# Create a histogram of excess deaths
histogram <- ggplot(data, aes(x = excess_deaths)) +
geom_histogram(binwidth = 5, fill = "#33CCFF", color = "#000000") +
labs(title = "Distribution of Excess Deaths",
x = "Excess Deaths",
y = "Frequency")
# Time series plot of total cases
total_cases_plot <- ggplot(data, aes(x = date, y = total_cases / 1e6)) + # Divide by 1 million for y-axis label in millions
geom_line() +
scale_y_continuous(labels = scales::label_number(scale = 1e6)) +
labs(title = "Total COVID Cases Over Time",
x = "Date",
y = "Total Cases (Millions)")
# Time series plot of total deaths
total_deaths_plot <- ggplot(data, aes(x = date, y = total_deaths / 1e6)) + # Divide by 1 million for y-axis label in millions
geom_line() +
scale_y_continuous(labels = scales::label_number(scale = 1e6)) +
labs(title = "Total COVID Deaths Over Time",
x = "Date",
y = "Total Deaths (Millions)")
# Time series plot of excess deaths for individuals aged 65 and older
excess_deaths_over_65_plot <- ggplot(data %>% filter(aged_65_older > 0), aes(x = date, y = excess_deaths)) +
geom_line() +
labs(title = "Excess Deaths in Individuals Aged 65 and Older Over Time",
x = "Date",
y = "Excess Deaths")
# Combine plots into a 2x2 grid
gridExtra::grid.arrange( total_cases_plot,
total_deaths_plot, histogram, excess_deaths_over_65_plot, ncol = 2)
## Warning: Removed 398 rows containing missing values (`geom_line()`).
## Warning: Removed 455 rows containing missing values (`geom_line()`).
## Warning: Removed 360589 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 4313 rows containing missing values (`geom_line()`).
