COVID-19 Deaths

Since the start of the covid-19 pandemic, contagion and mortality data have been collected, this is an analysis of the epidemic data.

library("ggplot2")

## Registered S3 methods overwritten by 'tibble':
##   method     from  
##   format.tbl pillar
##   print.tbl  pillar

library("dplyr")

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

require(gridExtra)

## Loading required package: gridExtra

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

setwd("/home/ron/Documentos/DataScience/")

Data

The information was obtained from this url https://www.ecdc.europa.eu/en/covid-19/data and the first analysis is a comparison of cases and deaths, then filtered by continent and used.

cov_data <- read.csv("covid_data_death.csv")
#cov_data <- read.csv("https://opendata.ecdc.europa.eu/covid19/virusvariant/csv/data.csv")

cov_africa <- cov_data %>% filter(country == "Africa (total)") %>% na.omit()

cov_usa <- cov_data %>% filter(country == "EU/EEA (total)") %>% na.omit()

cov_america <- cov_data %>% filter(country == "America (total)") %>% na.omit()

cov_asia <- cov_data %>% filter(country == "Asia (total)") %>% na.omit()

cov_europe <- cov_data %>% filter(country == "Europe (total)") %>% na.omit()

cov_oceania <- cov_data %>% filter(country == "Oceania (total)") %>% na.omit()

Cases and deaths comparations

To make the comparison of deaths and cases graphically visible, a base 10 logarithm is applied to the number of cases.

Africa cases and deaths comparations

options(scipen=999)

ggplot(cov_africa, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Africa")

USA cases and deaths comparations

ggplot(cov_usa, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in USA")

America cases and deaths comparations

ggplot(cov_america, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in America")

Asia cases and deaths comparations

ggplot(cov_asia, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Asia")

Europe cases and deaths comparations

ggplot(cov_europe, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Europe")

Oceania cases and deaths comparations

ggplot(cov_oceania, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Oceania")

Analysis of deaths

For this we are going to use the central limit theorem, using a histogram and showing how the data is distributed based on the z function.

Africa analysis of deaths

uno <- ggplot(filter(cov_africa,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Africa", x="Number of deaths") + geom_vline(xintercept = mean(filter(cov_africa,indicator=="deaths")$weekly_count), color="blue")+
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_africa,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_africa,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <-ggplot(filter(cov_africa,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Africa")

grid.arrange(uno, dos, ncol=2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

USA analysis of deaths

uno <- ggplot(filter(cov_usa,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID USA", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_usa,indicator=="deaths")$weekly_count), color="blue")+
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_usa,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_usa,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_usa,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID USA")

grid.arrange(uno, dos, ncol=2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

America analysis of deaths

uno <- ggplot(filter(cov_america,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID America", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_america,indicator=="deaths")$weekly_count), color="blue")+
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_america,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_america,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_america,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID America")

grid.arrange(uno, dos, ncol=2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Asia analysis of deaths

uno <- ggplot(filter(cov_asia,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Asia", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_asia,indicator=="deaths")$weekly_count), color="blue") +
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_asia,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_asia,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_asia,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Asia")

grid.arrange(uno, dos, ncol=2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Europe analysis of deaths

uno <- ggplot(filter(cov_europe,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Europe", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_europe,indicator=="deaths")$weekly_count), color="blue") +
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_europe,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_europe,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_europe,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Europe")

grid.arrange(uno, dos, ncol=2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Oceania analysis of deaths

uno <- ggplot(filter(cov_oceania,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Oceania", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_oceania,indicator=="deaths")$weekly_count), color="blue") +
  stat_function(fun = dnorm,
                args = list(mean = mean(filter(cov_oceania,indicator=="deaths")$weekly_count),
                            sd = sd(filter(cov_oceania,indicator=="deaths")$weekly_count)),
                col = "#1b98e0",
                size = 5)

dos <- ggplot(filter(cov_oceania,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Oceania")

grid.arrange(uno, dos, ncol=2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

COVID-19 Deaths

Ronaldo Echeverria

25/1/2022

COVID-19 Deaths

Data

Cases and deaths comparations

Africa cases and deaths comparations

USA cases and deaths comparations

America cases and deaths comparations

Asia cases and deaths comparations

Europe cases and deaths comparations

Oceania cases and deaths comparations

Analysis of deaths

Africa analysis of deaths

USA analysis of deaths

America analysis of deaths

Asia analysis of deaths

Europe analysis of deaths

Oceania analysis of deaths