Covid19 Data:
## [1] "2020-12-05 01:48:19 CET"
library(utils)
#read the Dataset sheet into “R”. The dataset will be called "data".
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", na.strings = "", fileEncoding = "UTF-8-BOM")
Descriptive Statistics of the cases and deaths grouped by countries:
data_sum<-data%>%select(dateRep,
countriesAndTerritories,
popData2019,
cases,
deaths,
Cumulative_number_for_14_days_of_COVID.19_cases_per_100000)%>%
group_by(countriesAndTerritories)%>%
summarize(cases,deaths)
summary(data_sum)
## countriesAndTerritories cases deaths
## Australia: 340 Min. : -8261 Min. :-1918.00
## Austria : 340 1st Qu.: 0 1st Qu.: 0.00
## Belgium : 340 Median : 15 Median : 0.00
## Brazil : 340 Mean : 1089 Mean : 25.22
## Canada : 340 3rd Qu.: 253 3rd Qu.: 4.00
## China : 340 Max. :214747 Max. : 4928.00
## (Other) :57730
data_sub<-dplyr::rename(data,
country=countriesAndTerritories,
date=dateRep,
CN_14=Cumulative_number_for_14_days_of_COVID.19_cases_per_100000)
data_sub$CN_14[is.na(data_sub$CN_14)]<-0
data_sub<-data_sub%>%
mutate(index=seq(1,length(date),by=1),CN_14=round(CN_14,2),CDR=round(deaths/cases*100,2))%>%
select(index,date,country,cases,deaths,CDR,CN_14)
data_sub$CN_14[is.na(data_sub$CN_14)]<-0
data_sub$CDR[is.na(data_sub$CDR)]<-0
head(data_sub,10)
## index date country cases deaths CDR CN_14
## 1 1 04/12/2020 Afghanistan 119 5 4.20 7.11
## 2 2 03/12/2020 Afghanistan 202 19 9.41 7.54
## 3 3 02/12/2020 Afghanistan 400 48 12.00 7.01
## 4 4 01/12/2020 Afghanistan 272 11 4.04 6.96
## 5 5 30/11/2020 Afghanistan 0 0 0.00 6.42
## 6 6 29/11/2020 Afghanistan 228 11 4.82 6.85
## 7 7 28/11/2020 Afghanistan 214 15 7.01 6.78
## 8 8 27/11/2020 Afghanistan 0 0 0.00 6.40
## 9 9 26/11/2020 Afghanistan 200 12 6.00 7.34
## 10 10 25/11/2020 Afghanistan 185 13 7.03 7.20
Moving Average of the deaths count within 3,5,7, and 14 days by Country: the function is "rollmean" and it is from the "zoo" package, K is intended as the numbers of days for which the average is calculated (source:https://www.storybench.org/how-to-calculate-a-rolling-average-in-r/)
data_sub_d <- data_sub %>%
arrange(desc(country)) %>%
group_by(country) %>%
mutate(deaths_03mav = round(rollmean(deaths, k = 3, fill = rollmean(deaths, k = 1))),
deaths_05mav = round(rollmean(deaths, k = 5, fill = rollmean(deaths, k = 2))),
deaths_07mav = round(rollmean(deaths, k = 7, fill = rollmean(deaths, k = 3))),
deaths_14mav = round(rollmean(deaths, k = 14, fill = rollmean(deaths, k = 6))))%>%
ungroup()
#names(data_sub_d)
head(arrange(data_sub_d,country),10)
## # A tibble: 10 x 11
## index date country cases deaths CDR CN_14 deaths_03mav deaths_05mav
## <dbl> <fct> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 04/1… Afghan… 119 5 4.2 7.11 5 12
## 2 2 03/1… Afghan… 202 19 9.41 7.54 24 12
## 3 3 02/1… Afghan… 400 48 12 7.01 26 17
## 4 4 01/1… Afghan… 272 11 4.04 6.96 20 18
## 5 5 30/1… Afghan… 0 0 0 6.42 7 17
## 6 6 29/1… Afghan… 228 11 4.82 6.85 9 7
## 7 7 28/1… Afghan… 214 15 7.01 6.78 9 8
## 8 8 27/1… Afghan… 0 0 0 6.4 9 10
## 9 9 26/1… Afghan… 200 12 6 7.34 8 11
## 10 10 25/1… Afghan… 185 13 7.03 7.2 14 10
## # … with 2 more variables: deaths_07mav <dbl>, deaths_14mav <dbl>
data_it<-data_sub_d%>%
filter(country=="Italy")%>%
mutate(time=seq(1,length(date),by=1))%>%
tidyr::pivot_longer(names_to = "rolling_mean_key",
values_to = "rolling_mean_value",
cols = c(deaths,
deaths_03mav,
deaths_14mav))%>%
select(time,date,rolling_mean_key,rolling_mean_value)
ggplot(data_it)+
aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
geom_point(size = 1L) +
scale_fill_hue() +
scale_color_hue() +
theme_minimal()+
labs(title = "Italy's rolling average total COVID deaths",
subtitle = "first and second wave",
y = "Deaths",
x = "Date")
ggplot(data_it) +
aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
geom_area(size = 1L) +
scale_fill_hue() +
scale_color_hue() +
theme_minimal()+
labs(title = "Italy's rolling average total COVID deaths",
subtitle = "first and second wave",
y = "Deaths",
x = "Date")
ggplot(data_it) +
aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
geom_line() +
scale_fill_hue() +
scale_color_hue() +
theme_minimal()+
labs(title = "Italy's rolling average total COVID deaths",
subtitle = "across the Pandemic period",
y = "Deaths",
x = "Date")
Same thing is done it for the number of cases: moving average for cases within 3,5,7,and 14 days is calculated as the sum of the cases for the number of selected days divided by the same number of days.
data_sub_c <- data_sub %>%
arrange(desc(country)) %>%
group_by(country) %>%
mutate(cases_03mav = round(rollmean(cases, k = 3, fill = NA)),
cases_05mav = round(rollmean(cases, k = 5, fill = NA)),
cases_07mav = round(rollmean(cases, k = 7, fill = NA)),
cases_14mav = round(rollmean(cases, k = 14, fill = NA)))%>%
ungroup()
head(arrange(data_sub_c,country),10)
## # A tibble: 10 x 11
## index date country cases deaths CDR CN_14 cases_03mav cases_05mav
## <dbl> <fct> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 04/1… Afghan… 119 5 4.2 7.11 NA NA
## 2 2 03/1… Afghan… 202 19 9.41 7.54 240 NA
## 3 3 02/1… Afghan… 400 48 12 7.01 291 199
## 4 4 01/1… Afghan… 272 11 4.04 6.96 224 220
## 5 5 30/1… Afghan… 0 0 0 6.42 167 223
## 6 6 29/1… Afghan… 228 11 4.82 6.85 147 143
## 7 7 28/1… Afghan… 214 15 7.01 6.78 147 128
## 8 8 27/1… Afghan… 0 0 0 6.4 138 165
## 9 9 26/1… Afghan… 200 12 6 7.34 128 169
## 10 10 25/1… Afghan… 185 13 7.03 7.2 210 177
## # … with 2 more variables: cases_07mav <dbl>, cases_14mav <dbl>
data_it2<-data_sub_c%>%
filter(country=="Italy")%>%
mutate(time=seq(1,length(date),by=1))%>%
tidyr::pivot_longer(names_to = "rolling_mean_key",
values_to = "rolling_mean_value",
cols = c(cases,
cases_03mav,
cases_14mav))%>%
select(time,date,rolling_mean_key,rolling_mean_value)
ggplot(data_it2)+
aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
geom_line() +
scale_fill_hue() +
scale_color_hue() +
theme_minimal()+
labs(title = "Italy's rolling average total COVID Cases",
subtitle = "first and second wave",
y = "Cases",
x = "Date")
data_rates<-cbind(data_sub_c,data_sub_d[8:11],deparse.level=1)
#names(data_rates)
data_rates <- data_rates %>%
select(index,date,country,cases,deaths,CN_14, cases_03mav:cases_14mav,deaths_03mav:deaths_14mav)%>%
mutate(CDR = round(deaths/cases*1000,2),
cfr_03 = round(deaths_03mav/cases_03mav*1000,2),
cfr_05 = round(deaths_05mav/cases_05mav*1000,2),
cfr_07 = round(deaths_07mav/cases_07mav*1000,2),
cfr_14 = round(deaths_14mav/cases_14mav*1000,2))%>%
select(index,date,country,cases,deaths,CN_14, CDR,cfr_03,cfr_05,cfr_07,cfr_14)%>%
ungroup()
#unique(data_rates$country)
data_uk<-data_rates%>%
filter(country=="United_Kingdom")%>%
mutate(time=seq(1,length(date),by=1))%>%
tidyr::pivot_longer(names_to = "rolling_mean_key",
values_to = "rolling_mean_value",
cols = c(CN_14,
CDR,
cfr_03,
cfr_14))%>%
select(time,date,rolling_mean_key,rolling_mean_value)
data_it3<-data_rates%>%
filter(country=="Italy")%>%
mutate(time=seq(1,length(date),by=1))%>%
tidyr::pivot_longer(names_to = "rolling_mean_key",
values_to = "rolling_mean_value",
cols = c(CN_14,
CDR,
cfr_03,
cfr_14))%>%
select(time,date,rolling_mean_key,rolling_mean_value)
opt_plot_uk<-ggplot(data_uk)+
aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
geom_line() +
scale_fill_hue() +
scale_color_hue() +
theme_minimal()+
labs(title = "UK's rolling average total COVID CFR",
subtitle = "first and second wave",
y = "CDR and MAV cfr",
x = "Date")
opt_plot_it<-ggplot(data_it3)+
aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
geom_line() +
scale_fill_hue() +
scale_color_hue() +
theme_minimal()+
labs(title = "Italy's rolling average total COVID CFR",
subtitle = "first and second wave",
y = "CDR and MAV cfr",
x = "Date")
gridExtra::grid.arrange(opt_plot_uk,opt_plot_it,nrow=2)
data_sub_cs <- data_sub %>%
arrange(desc(country)) %>%
group_by(country) %>%
mutate(cases_03sav = round(rollsum(cases, k = 3, fill = NA)),
cases_05sav = round(rollsum(cases, k = 5, fill = NA)),
cases_07sav = round(rollsum(cases, k = 7, fill = NA)),
cases_14sav = round(rollsum(cases, k = 14, fill = NA)/100))%>%
dplyr::ungroup()
head(data_sub_cs)
## # A tibble: 6 x 11
## index date country cases deaths CDR CN_14 cases_03sav cases_05sav
## <dbl> <fct> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 59512 04/1… Zimbab… 181 3 1.66 9.41 NA NA
## 2 59513 03/1… Zimbab… 114 0 0 8.62 390 NA
## 3 59514 02/1… Zimbab… 95 0 0 8.08 293 602
## 4 59515 01/1… Zimbab… 84 1 1.19 7.76 307 529
## 5 59516 30/1… Zimbab… 128 1 0.78 7.95 320 506
## 6 59517 29/1… Zimbab… 108 0 0 7.07 327 526
## # … with 2 more variables: cases_07sav <dbl>, cases_14sav <dbl>
data_it4<-data_sub_cs%>%
filter(country=="Italy")%>%
mutate(time=seq(1,length(date),by=1))%>%
tidyr::pivot_longer(names_to = "rolling_sum_key",
values_to = "rolling_sum_value",
cols = c(CN_14,
#cases_03sav,
cases_14sav))%>%
select(time,date,rolling_sum_key,rolling_sum_value)
ggplot(data_it4)+
aes(x = time, y = rolling_sum_value, fill = rolling_sum_key, colour = rolling_sum_key) +
geom_line() +
scale_fill_hue() +
scale_color_hue() +
theme_minimal()+
labs(title = "Italy's rolling average total COVID Cases SAV",
subtitle = "first and second wave",
y = "SAV cases",
x = "Date")
Average Cases ratio per 100 000 World Population:
data_sub$CN_14[is.na(data_sub$CN_14)]<-0
mean(data_sub$CN_14)
## [1] 58.85791
Total number of countries affected by Covid19 cases at today:
countries<-unique(data_sub$country)
length(countries)
## [1] 214
data_sub2<-data_sub%>%group_by(country)%>%
dplyr::summarize(Avg_cases = round(mean(cases)),Avg_deaths = round(mean(deaths)),Avg_CN_14 = round(mean(CN_14),2))%>%arrange(-Avg_CN_14)
## `summarise()` ungrouping output (override with `.groups` argument)
data_sub2
## # A tibble: 214 x 4
## country Avg_cases Avg_deaths Avg_CN_14
## <fct> <dbl> <dbl> <dbl>
## 1 Andorra 26 0 447.
## 2 Montenegro 141 2 288.
## 3 French_Polynesia 56 0 263.
## 4 Aruba 19 0 245.
## 5 Luxembourg 108 1 223.
## 6 Bahrain 258 1 217.
## 7 Guam 27 0 215.
## 8 Belgium 1720 50 205.
## 9 Qatar 415 1 203.
## 10 Czechia 1581 25 198.
## # … with 204 more rows
require(plyr)
data_cdc<- ddply(data_sub,.(country),summarize,deaths=sum(deaths))
data_ccc<- ddply(data_sub,.(country),summarize,cases=sum(cases))
data_cfr<- ddply(data_sub,.(country),summarize,CN_14=mean(CN_14))
data_unique<-merge(data_cdc,data_ccc,by="country")
data_unique<-merge(data_unique,data_cfr,by="country")
data_unique<-data_unique%>%
mutate(CN_14=round(CN_14,2))%>%
arrange(-deaths)
data_unique<-arrange(data_unique,-deaths)
data_unique_10<-data_unique[1:10,]
data_unique_10
## country deaths cases CN_14
## 1 United_States_of_America 276316 14139703 162.65
## 2 Brazil 175270 6487084 121.54
## 3 India 139188 9571559 28.17
## 4 Mexico 108173 1144643 34.95
## 5 United_Kingdom 60113 1674134 97.51
## 6 Italy 58038 1664829 103.45
## 7 France 54140 2257331 134.33
## 8 Iran 49348 1003494 45.40
## 9 Spain 46038 1675902 142.04
## 10 Russia 42176 2402949 62.97
ggplot(data_unique_10) +
aes(x = country, fill = country, colour = country, weight = CN_14) +
geom_bar() +
scale_fill_hue() +
scale_color_hue() +
theme_minimal()+
theme(axis.text.x = element_text(angle = 15))+
labs(title="Top 10 Countries by Covid19 Cn14",
subtitle="for 100 000 Population",
caption="Source:ECDC",
x="Top 10 World Countries",
y="Cn14 100 000")
#esquisse::esquisser(data_unique_10)