COVID-19 Deaths

Since the start of the covid-19 pandemic, contagion and mortality data have been collected, this is an analysis of the epidemic data.

library("ggplot2")
## Registered S3 methods overwritten by 'tibble':
##   method     from  
##   format.tbl pillar
##   print.tbl  pillar
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(gridExtra)
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
setwd("/home/ron/Documentos/DataScience/")

Data

The information was obtained from this url https://www.ecdc.europa.eu/en/covid-19/data and the first analysis is a comparison of cases and deaths, then filtered by continent and used.

cov_data <- read.csv("covid_data_death.csv")
#cov_data <- read.csv("https://opendata.ecdc.europa.eu/covid19/virusvariant/csv/data.csv")

cov_africa <- cov_data %>% filter(country == "Africa (total)") %>% na.omit()

cov_usa <- cov_data %>% filter(country == "EU/EEA (total)") %>% na.omit()

cov_america <- cov_data %>% filter(country == "America (total)") %>% na.omit()

cov_asia <- cov_data %>% filter(country == "Asia (total)") %>% na.omit()

cov_europe <- cov_data %>% filter(country == "Europe (total)") %>% na.omit()

cov_oceania <- cov_data %>% filter(country == "Oceania (total)") %>% na.omit()

Cases and deaths comparations

To make the comparison of deaths and cases graphically visible, a base 10 logarithm is applied to the number of cases.

Africa cases and deaths comparations

options(scipen=999)

ggplot(cov_africa, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Africa")

USA cases and deaths comparations

ggplot(cov_usa, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in USA")

America cases and deaths comparations

ggplot(cov_america, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in America")

Asia cases and deaths comparations

ggplot(cov_asia, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Asia")

Europe cases and deaths comparations

ggplot(cov_europe, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Europe")

Oceania cases and deaths comparations

ggplot(cov_oceania, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Oceania")

Analysis of deaths

For this we are going to use the central limit theorem, using a histogram and showing how the data is distributed based on the z function.

Africa analysis of deaths

uno <- ggplot(filter(cov_africa,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Africa", x="Number of deaths") + geom_vline(xintercept = mean(filter(cov_africa,indicator=="deaths")$weekly_count), color="blue")+
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_africa,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_africa,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <-ggplot(filter(cov_africa,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Africa")

grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

USA analysis of deaths

uno <- ggplot(filter(cov_usa,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID USA", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_usa,indicator=="deaths")$weekly_count), color="blue")+
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_usa,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_usa,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_usa,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID USA")

grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

America analysis of deaths

uno <- ggplot(filter(cov_america,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID America", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_america,indicator=="deaths")$weekly_count), color="blue")+
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_america,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_america,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_america,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID America")

grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Asia analysis of deaths

uno <- ggplot(filter(cov_asia,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Asia", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_asia,indicator=="deaths")$weekly_count), color="blue") +
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_asia,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_asia,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_asia,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Asia")

grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Europe analysis of deaths

uno <- ggplot(filter(cov_europe,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Europe", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_europe,indicator=="deaths")$weekly_count), color="blue") +
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_europe,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_europe,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_europe,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Europe")

grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Oceania analysis of deaths

uno <- ggplot(filter(cov_oceania,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Oceania", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_oceania,indicator=="deaths")$weekly_count), color="blue") +
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_oceania,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_oceania,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_oceania,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Oceania")

grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.