# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
# Load the data
data <- read.csv("covid.csv")
# Convert date column to Date format
data$date <- as.Date(data$date)
# Reshape the data for easier plotting
data_long <- data %>%
filter(aged_65_older > 0) %>%
select(date, cardiovasc_death_rate, diabetes_prevalence) %>%
pivot_longer(cols = c(cardiovasc_death_rate, diabetes_prevalence),
names_to = "variable",
values_to = "value")
ggplot(data_long, aes(x = date, y = value, color = variable)) +
geom_point() +
labs(x = "Date", y = "Rate / Prevalence") +
facet_wrap(~variable)
## Warning: Removed 8972 rows containing missing values (`geom_point()`).

# Check for missing values
summary(data_long)
## date variable value
## Min. :2020-01-01 Length:569614 Min. : 0.99
## 1st Qu.:2021-01-09 Class :character 1st Qu.: 7.11
## Median :2022-01-14 Mode :character Median : 22.63
## Mean :2022-01-13 Mean :135.01
## 3rd Qu.:2023-01-17 3rd Qu.:245.47
## Max. :2024-02-06 Max. :724.42
## NA's :8972
# Handle missing values (e.g., remove rows with missing values)
data_long_filtered <- data_long %>%
na.omit() # Replace with appropriate missing value handling technique
# Calculate summary statistics (using filtered data if necessary)
data_long_filtered %>% # Replace with data_long if missing values aren't an issue
group_by(variable) %>%
summarize(
mean_value = mean(value),
median_value = median(value),
sd_value = sd(value),
min_value = min(value),
max_value = max(value)
)
## # A tibble: 2 × 6
## variable mean_value median_value sd_value min_value max_value
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 cardiovasc_death_rate 265. 253. 120. 79.4 724.
## 2 diabetes_prevalence 8.19 7.14 4.40 0.99 23.4