Covid19 Data:

## [1] "2020-12-05 01:48:19 CET"
library(utils)

#read the Dataset sheet into “R”. The dataset will be called "data".
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", na.strings = "", fileEncoding = "UTF-8-BOM")

Descriptive Statistics of the cases and deaths grouped by countries:

data_sum<-data%>%select(dateRep,
                        countriesAndTerritories,
                        popData2019,
                        cases,
                        deaths, 
                        Cumulative_number_for_14_days_of_COVID.19_cases_per_100000)%>%
  group_by(countriesAndTerritories)%>%
  summarize(cases,deaths)
  
summary(data_sum)
##  countriesAndTerritories     cases            deaths        
##  Australia:  340         Min.   : -8261   Min.   :-1918.00  
##  Austria  :  340         1st Qu.:     0   1st Qu.:    0.00  
##  Belgium  :  340         Median :    15   Median :    0.00  
##  Brazil   :  340         Mean   :  1089   Mean   :   25.22  
##  Canada   :  340         3rd Qu.:   253   3rd Qu.:    4.00  
##  China    :  340         Max.   :214747   Max.   : 4928.00  
##  (Other)  :57730
data_sub<-dplyr::rename(data,
                 country=countriesAndTerritories,
                 date=dateRep,
                 CN_14=Cumulative_number_for_14_days_of_COVID.19_cases_per_100000)

data_sub$CN_14[is.na(data_sub$CN_14)]<-0

data_sub<-data_sub%>%
  mutate(index=seq(1,length(date),by=1),CN_14=round(CN_14,2),CDR=round(deaths/cases*100,2))%>%
  select(index,date,country,cases,deaths,CDR,CN_14)

data_sub$CN_14[is.na(data_sub$CN_14)]<-0
data_sub$CDR[is.na(data_sub$CDR)]<-0

head(data_sub,10)
##    index       date     country cases deaths   CDR CN_14
## 1      1 04/12/2020 Afghanistan   119      5  4.20  7.11
## 2      2 03/12/2020 Afghanistan   202     19  9.41  7.54
## 3      3 02/12/2020 Afghanistan   400     48 12.00  7.01
## 4      4 01/12/2020 Afghanistan   272     11  4.04  6.96
## 5      5 30/11/2020 Afghanistan     0      0  0.00  6.42
## 6      6 29/11/2020 Afghanistan   228     11  4.82  6.85
## 7      7 28/11/2020 Afghanistan   214     15  7.01  6.78
## 8      8 27/11/2020 Afghanistan     0      0  0.00  6.40
## 9      9 26/11/2020 Afghanistan   200     12  6.00  7.34
## 10    10 25/11/2020 Afghanistan   185     13  7.03  7.20

Moving Average

Moving Average of the deaths count within 3,5,7, and 14 days by Country: the function is "rollmean" and it is from the "zoo" package, K is intended as the numbers of days for which the average is calculated (source:https://www.storybench.org/how-to-calculate-a-rolling-average-in-r/)

data_sub_d <- data_sub %>%
    arrange(desc(country)) %>% 
    group_by(country) %>% 
    mutate(deaths_03mav = round(rollmean(deaths, k = 3, fill = rollmean(deaths, k = 1))),
                  deaths_05mav = round(rollmean(deaths, k = 5, fill = rollmean(deaths, k = 2))),
                  deaths_07mav = round(rollmean(deaths, k = 7, fill = rollmean(deaths, k = 3))),
                  deaths_14mav = round(rollmean(deaths, k = 14, fill = rollmean(deaths, k = 6))))%>%
    ungroup()

#names(data_sub_d)
head(arrange(data_sub_d,country),10)
## # A tibble: 10 x 11
##    index date  country cases deaths   CDR CN_14 deaths_03mav deaths_05mav
##    <dbl> <fct> <fct>   <int>  <int> <dbl> <dbl>        <dbl>        <dbl>
##  1     1 04/1… Afghan…   119      5  4.2   7.11            5           12
##  2     2 03/1… Afghan…   202     19  9.41  7.54           24           12
##  3     3 02/1… Afghan…   400     48 12     7.01           26           17
##  4     4 01/1… Afghan…   272     11  4.04  6.96           20           18
##  5     5 30/1… Afghan…     0      0  0     6.42            7           17
##  6     6 29/1… Afghan…   228     11  4.82  6.85            9            7
##  7     7 28/1… Afghan…   214     15  7.01  6.78            9            8
##  8     8 27/1… Afghan…     0      0  0     6.4             9           10
##  9     9 26/1… Afghan…   200     12  6     7.34            8           11
## 10    10 25/1… Afghan…   185     13  7.03  7.2            14           10
## # … with 2 more variables: deaths_07mav <dbl>, deaths_14mav <dbl>

Plotting results:

Select of a favourite Country: Italy

Italy Deaths MAV:

data_it<-data_sub_d%>%
  filter(country=="Italy")%>%
  mutate(time=seq(1,length(date),by=1))%>%
  tidyr::pivot_longer(names_to = "rolling_mean_key", 
                    values_to = "rolling_mean_value", 
                    cols = c(deaths, 
                             deaths_03mav, 
                             deaths_14mav))%>%
  select(time,date,rolling_mean_key,rolling_mean_value)


ggplot(data_it)+
  aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
 geom_point(size = 1L) +
 scale_fill_hue() +
 scale_color_hue() +
 theme_minimal()+
   labs(title = "Italy's rolling average total COVID deaths", 
                  subtitle = "first and second wave",
                  y = "Deaths", 
                  x = "Date") 

ggplot(data_it) +
 aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
 geom_area(size = 1L) +
 scale_fill_hue() +
 scale_color_hue() +
 theme_minimal()+
 labs(title = "Italy's rolling average total COVID deaths", 
                  subtitle = "first and second wave",
                  y = "Deaths", 
                  x = "Date") 

ggplot(data_it) +
 aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
 geom_line() +
 scale_fill_hue() +
 scale_color_hue() +
 theme_minimal()+
labs(title = "Italy's rolling average total COVID deaths", 
                  subtitle = "across the Pandemic period",
                  y = "Deaths",
                  x = "Date") 

Same thing is done it for the number of cases: moving average for cases within 3,5,7,and 14 days is calculated as the sum of the cases for the number of selected days divided by the same number of days.

data_sub_c <- data_sub %>%
    arrange(desc(country)) %>% 
    group_by(country) %>% 
    mutate(cases_03mav = round(rollmean(cases, k = 3, fill = NA)),
                  cases_05mav = round(rollmean(cases, k = 5, fill = NA)),
                  cases_07mav = round(rollmean(cases, k = 7, fill = NA)),
                  cases_14mav = round(rollmean(cases, k = 14, fill = NA)))%>%
    ungroup()


head(arrange(data_sub_c,country),10)
## # A tibble: 10 x 11
##    index date  country cases deaths   CDR CN_14 cases_03mav cases_05mav
##    <dbl> <fct> <fct>   <int>  <int> <dbl> <dbl>       <dbl>       <dbl>
##  1     1 04/1… Afghan…   119      5  4.2   7.11          NA          NA
##  2     2 03/1… Afghan…   202     19  9.41  7.54         240          NA
##  3     3 02/1… Afghan…   400     48 12     7.01         291         199
##  4     4 01/1… Afghan…   272     11  4.04  6.96         224         220
##  5     5 30/1… Afghan…     0      0  0     6.42         167         223
##  6     6 29/1… Afghan…   228     11  4.82  6.85         147         143
##  7     7 28/1… Afghan…   214     15  7.01  6.78         147         128
##  8     8 27/1… Afghan…     0      0  0     6.4          138         165
##  9     9 26/1… Afghan…   200     12  6     7.34         128         169
## 10    10 25/1… Afghan…   185     13  7.03  7.2          210         177
## # … with 2 more variables: cases_07mav <dbl>, cases_14mav <dbl>

Cases MAV in Italy

data_it2<-data_sub_c%>%
  filter(country=="Italy")%>%
  mutate(time=seq(1,length(date),by=1))%>%
  tidyr::pivot_longer(names_to = "rolling_mean_key", 
                    values_to = "rolling_mean_value", 
                    cols = c(cases, 
                             cases_03mav, 
                             cases_14mav))%>%
  select(time,date,rolling_mean_key,rolling_mean_value)


ggplot(data_it2)+
  aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
 geom_line() +
 scale_fill_hue() +
 scale_color_hue() +
 theme_minimal()+
   labs(title = "Italy's rolling average total COVID Cases", 
                  subtitle = "first and second wave",
                  y = "Cases", 
                  x = "Date") 

CDR crude death rate and CFR100

data_rates<-cbind(data_sub_c,data_sub_d[8:11],deparse.level=1)
#names(data_rates)


data_rates <- data_rates %>%
    select(index,date,country,cases,deaths,CN_14, cases_03mav:cases_14mav,deaths_03mav:deaths_14mav)%>%
    mutate(CDR = round(deaths/cases*1000,2),
           cfr_03 = round(deaths_03mav/cases_03mav*1000,2), 
           cfr_05 = round(deaths_05mav/cases_05mav*1000,2),
           cfr_07 = round(deaths_07mav/cases_07mav*1000,2),
           cfr_14 = round(deaths_14mav/cases_14mav*1000,2))%>%
    select(index,date,country,cases,deaths,CN_14, CDR,cfr_03,cfr_05,cfr_07,cfr_14)%>%
    ungroup()


#unique(data_rates$country)

data_uk<-data_rates%>%
  filter(country=="United_Kingdom")%>%
  mutate(time=seq(1,length(date),by=1))%>%
  tidyr::pivot_longer(names_to = "rolling_mean_key", 
                    values_to = "rolling_mean_value", 
                    cols = c(CN_14,
                             CDR, 
                             cfr_03, 
                             cfr_14))%>%
  select(time,date,rolling_mean_key,rolling_mean_value)
  
data_it3<-data_rates%>%
  filter(country=="Italy")%>%
  mutate(time=seq(1,length(date),by=1))%>%
  tidyr::pivot_longer(names_to = "rolling_mean_key", 
                    values_to = "rolling_mean_value", 
                    cols = c(CN_14,
                             CDR, 
                             cfr_03, 
                             cfr_14))%>%
  select(time,date,rolling_mean_key,rolling_mean_value)


opt_plot_uk<-ggplot(data_uk)+
  aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
 geom_line() +
 scale_fill_hue() +
 scale_color_hue() +
 theme_minimal()+
   labs(title = "UK's rolling average total COVID CFR", 
                  subtitle = "first and second wave",
                  y = "CDR and MAV cfr", 
                  x = "Date") 

opt_plot_it<-ggplot(data_it3)+
  aes(x = time, y = rolling_mean_value, fill = rolling_mean_key, colour = rolling_mean_key) +
 geom_line() +
 scale_fill_hue() +
 scale_color_hue() +
 theme_minimal()+
   labs(title = "Italy's rolling average total COVID CFR", 
                  subtitle = "first and second wave",
                  y = "CDR and MAV cfr", 
                  x = "Date") 

gridExtra::grid.arrange(opt_plot_uk,opt_plot_it,nrow=2)

data_sub_cs <- data_sub %>%
    arrange(desc(country)) %>% 
    group_by(country) %>% 
    mutate(cases_03sav = round(rollsum(cases, k = 3, fill = NA)),
                  cases_05sav = round(rollsum(cases, k = 5, fill = NA)),
                  cases_07sav = round(rollsum(cases, k = 7, fill = NA)),
                  cases_14sav = round(rollsum(cases, k = 14, fill = NA)/100))%>%
    dplyr::ungroup()


head(data_sub_cs)
## # A tibble: 6 x 11
##   index date  country cases deaths   CDR CN_14 cases_03sav cases_05sav
##   <dbl> <fct> <fct>   <int>  <int> <dbl> <dbl>       <dbl>       <dbl>
## 1 59512 04/1… Zimbab…   181      3  1.66  9.41          NA          NA
## 2 59513 03/1… Zimbab…   114      0  0     8.62         390          NA
## 3 59514 02/1… Zimbab…    95      0  0     8.08         293         602
## 4 59515 01/1… Zimbab…    84      1  1.19  7.76         307         529
## 5 59516 30/1… Zimbab…   128      1  0.78  7.95         320         506
## 6 59517 29/1… Zimbab…   108      0  0     7.07         327         526
## # … with 2 more variables: cases_07sav <dbl>, cases_14sav <dbl>
data_it4<-data_sub_cs%>%
  filter(country=="Italy")%>%
  mutate(time=seq(1,length(date),by=1))%>%
  tidyr::pivot_longer(names_to = "rolling_sum_key", 
                    values_to = "rolling_sum_value", 
                    cols = c(CN_14,
                             #cases_03sav, 
                             cases_14sav))%>%
  select(time,date,rolling_sum_key,rolling_sum_value)


ggplot(data_it4)+
  aes(x = time, y = rolling_sum_value, fill = rolling_sum_key, colour = rolling_sum_key) +
 geom_line() +
 scale_fill_hue() +
 scale_color_hue() +
 theme_minimal()+
   labs(title = "Italy's rolling average total COVID Cases SAV", 
                  subtitle = "first and second wave",
                  y = "SAV cases", 
                  x = "Date") 

Average Cases ratio per 100 000 World Population:

data_sub$CN_14[is.na(data_sub$CN_14)]<-0
mean(data_sub$CN_14)
## [1] 58.85791

Total number of countries affected by Covid19 cases at today:

countries<-unique(data_sub$country)
length(countries)
## [1] 214
data_sub2<-data_sub%>%group_by(country)%>%
  dplyr::summarize(Avg_cases = round(mean(cases)),Avg_deaths = round(mean(deaths)),Avg_CN_14 = round(mean(CN_14),2))%>%arrange(-Avg_CN_14)
## `summarise()` ungrouping output (override with `.groups` argument)
data_sub2
## # A tibble: 214 x 4
##    country          Avg_cases Avg_deaths Avg_CN_14
##    <fct>                <dbl>      <dbl>     <dbl>
##  1 Andorra                 26          0      447.
##  2 Montenegro             141          2      288.
##  3 French_Polynesia        56          0      263.
##  4 Aruba                   19          0      245.
##  5 Luxembourg             108          1      223.
##  6 Bahrain                258          1      217.
##  7 Guam                    27          0      215.
##  8 Belgium               1720         50      205.
##  9 Qatar                  415          1      203.
## 10 Czechia               1581         25      198.
## # … with 204 more rows
require(plyr)
data_cdc<- ddply(data_sub,.(country),summarize,deaths=sum(deaths))
data_ccc<- ddply(data_sub,.(country),summarize,cases=sum(cases))
data_cfr<- ddply(data_sub,.(country),summarize,CN_14=mean(CN_14))

data_unique<-merge(data_cdc,data_ccc,by="country")
data_unique<-merge(data_unique,data_cfr,by="country")

data_unique<-data_unique%>%
  mutate(CN_14=round(CN_14,2))%>%
  arrange(-deaths)

data_unique<-arrange(data_unique,-deaths)
data_unique_10<-data_unique[1:10,]
data_unique_10
##                     country deaths    cases  CN_14
## 1  United_States_of_America 276316 14139703 162.65
## 2                    Brazil 175270  6487084 121.54
## 3                     India 139188  9571559  28.17
## 4                    Mexico 108173  1144643  34.95
## 5            United_Kingdom  60113  1674134  97.51
## 6                     Italy  58038  1664829 103.45
## 7                    France  54140  2257331 134.33
## 8                      Iran  49348  1003494  45.40
## 9                     Spain  46038  1675902 142.04
## 10                   Russia  42176  2402949  62.97
ggplot(data_unique_10) +
 aes(x = country, fill = country, colour = country, weight = CN_14) +
 geom_bar() +
 scale_fill_hue() +
 scale_color_hue() +
 theme_minimal()+
  theme(axis.text.x = element_text(angle = 15))+
  labs(title="Top 10 Countries by Covid19 Cn14",
       subtitle="for 100 000 Population",
       caption="Source:ECDC",
       x="Top 10 World Countries",
       y="Cn14 100 000")

#esquisse::esquisser(data_unique_10)