# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)

# Load the data
data <- read.csv("covid.csv")

# Convert date column to Date format
data$date <- as.Date(data$date)

# Reshape the data for easier plotting
data_long <- data %>%
  filter(aged_65_older > 0) %>%
  select(date, cardiovasc_death_rate, diabetes_prevalence) %>%
  pivot_longer(cols = c(cardiovasc_death_rate, diabetes_prevalence),
               names_to = "variable",
               values_to = "value")

ggplot(data_long, aes(x = date, y = value, color = variable)) +
  geom_point() +
  labs(x = "Date", y = "Rate / Prevalence") +
  facet_wrap(~variable)
## Warning: Removed 8972 rows containing missing values (`geom_point()`).

# Check for missing values
summary(data_long)
##       date              variable             value       
##  Min.   :2020-01-01   Length:569614      Min.   :  0.99  
##  1st Qu.:2021-01-09   Class :character   1st Qu.:  7.11  
##  Median :2022-01-14   Mode  :character   Median : 22.63  
##  Mean   :2022-01-13                      Mean   :135.01  
##  3rd Qu.:2023-01-17                      3rd Qu.:245.47  
##  Max.   :2024-02-06                      Max.   :724.42  
##                                          NA's   :8972
# Handle missing values (e.g., remove rows with missing values)
data_long_filtered <- data_long %>%
  na.omit()  # Replace with appropriate missing value handling technique

# Calculate summary statistics (using filtered data if necessary)
data_long_filtered %>%  # Replace with data_long if missing values aren't an issue
  group_by(variable) %>%
  summarize(
    mean_value = mean(value),
    median_value = median(value),
    sd_value = sd(value),
    min_value = min(value),
    max_value = max(value)
  )
## # A tibble: 2 × 6
##   variable              mean_value median_value sd_value min_value max_value
##   <chr>                      <dbl>        <dbl>    <dbl>     <dbl>     <dbl>
## 1 cardiovasc_death_rate     265.         253.     120.       79.4      724. 
## 2 diabetes_prevalence         8.19         7.14     4.40      0.99      23.4