Since the start of the covid-19 pandemic, contagion and mortality data have been collected, this is an analysis of the epidemic data.
library("ggplot2")
## Registered S3 methods overwritten by 'tibble':
## method from
## format.tbl pillar
## print.tbl pillar
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(gridExtra)
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
setwd("/home/ron/Documentos/DataScience/")
The information was obtained from this url https://www.ecdc.europa.eu/en/covid-19/data and the first analysis is a comparison of cases and deaths, then filtered by continent and used.
cov_data <- read.csv("covid_data_death.csv")
#cov_data <- read.csv("https://opendata.ecdc.europa.eu/covid19/virusvariant/csv/data.csv")
cov_africa <- cov_data %>% filter(country == "Africa (total)") %>% na.omit()
cov_usa <- cov_data %>% filter(country == "EU/EEA (total)") %>% na.omit()
cov_america <- cov_data %>% filter(country == "America (total)") %>% na.omit()
cov_asia <- cov_data %>% filter(country == "Asia (total)") %>% na.omit()
cov_europe <- cov_data %>% filter(country == "Europe (total)") %>% na.omit()
cov_oceania <- cov_data %>% filter(country == "Oceania (total)") %>% na.omit()
To make the comparison of deaths and cases graphically visible, a base 10 logarithm is applied to the number of cases.
options(scipen=999)
ggplot(cov_africa, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Africa")
ggplot(cov_usa, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in USA")
ggplot(cov_america, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in America")
ggplot(cov_asia, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Asia")
ggplot(cov_europe, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Europe")
ggplot(cov_oceania, aes(x=year_week, y=log10(weekly_count), group=indicator, color = indicator))+geom_line(color="black")+geom_point()+scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+labs(title="COVID in Oceania")
For this we are going to use the central limit theorem, using a histogram and showing how the data is distributed based on the z function.
uno <- ggplot(filter(cov_africa,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Africa", x="Number of deaths") + geom_vline(xintercept = mean(filter(cov_africa,indicator=="deaths")$weekly_count), color="blue")+
stat_function(fun = dnorm,
args = list(mean = mean(filter(cov_africa,indicator=="deaths")$weekly_count),
sd = sd(filter(cov_africa,indicator=="deaths")$weekly_count)),
col = "#1b98e0",
size = 5)
dos <-ggplot(filter(cov_africa,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Africa")
grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
uno <- ggplot(filter(cov_usa,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID USA", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_usa,indicator=="deaths")$weekly_count), color="blue")+
stat_function(fun = dnorm,
args = list(mean = mean(filter(cov_usa,indicator=="deaths")$weekly_count),
sd = sd(filter(cov_usa,indicator=="deaths")$weekly_count)),
col = "#1b98e0",
size = 5)
dos <- ggplot(filter(cov_usa,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID USA")
grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
uno <- ggplot(filter(cov_america,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID America", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_america,indicator=="deaths")$weekly_count), color="blue")+
stat_function(fun = dnorm,
args = list(mean = mean(filter(cov_america,indicator=="deaths")$weekly_count),
sd = sd(filter(cov_america,indicator=="deaths")$weekly_count)),
col = "#1b98e0",
size = 5)
dos <- ggplot(filter(cov_america,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID America")
grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
uno <- ggplot(filter(cov_asia,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Asia", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_asia,indicator=="deaths")$weekly_count), color="blue") +
stat_function(fun = dnorm,
args = list(mean = mean(filter(cov_asia,indicator=="deaths")$weekly_count),
sd = sd(filter(cov_asia,indicator=="deaths")$weekly_count)),
col = "#1b98e0",
size = 5)
dos <- ggplot(filter(cov_asia,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Asia")
grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
uno <- ggplot(filter(cov_europe,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Europe", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_europe,indicator=="deaths")$weekly_count), color="blue") +
stat_function(fun = dnorm,
args = list(mean = mean(filter(cov_europe,indicator=="deaths")$weekly_count),
sd = sd(filter(cov_europe,indicator=="deaths")$weekly_count)),
col = "#1b98e0",
size = 5)
dos <- ggplot(filter(cov_europe,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Europe")
grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
uno <- ggplot(filter(cov_oceania,indicator=="deaths"), aes(x=weekly_count))+geom_histogram(aes(y = ..density..),col="red", fill="green", alpha = .2) + labs(title="Density deaths COVID Oceania", x="Number of deaths", y="Count") + geom_vline(xintercept = mean(filter(cov_oceania,indicator=="deaths")$weekly_count), color="blue") +
stat_function(fun = dnorm,
args = list(mean = mean(filter(cov_oceania,indicator=="deaths")$weekly_count),
sd = sd(filter(cov_oceania,indicator=="deaths")$weekly_count)),
col = "#1b98e0",
size = 5)
dos <- ggplot(filter(cov_oceania,indicator=="deaths"), aes(sample=weekly_count))+stat_qq()+stat_qq_line() + labs(title="Normal Q Q Deaths COVID Oceania")
grid.arrange(uno, dos, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.