df_total <-nordic%>%
mutate(date = as.Date(date)) %>%
mutate(ym = format(date, '%y-%m')) %>%
group_by(ym) %>%
summarize(ym_cases = sum(new_cases),
ym_deaths=sum(new_deaths))
head(df_total,14)
## # A tibble: 14 x 3
## ym ym_cases ym_deaths
## <chr> <int> <int>
## 1 20-02 12270 765
## 2 20-03 15031 703
## 3 20-04 30420 2936
## 4 20-05 22148 2066
## 5 20-06 31174 991
## 6 20-07 10852 426
## 7 20-08 13103 103
## 8 20-09 25325 116
## 9 20-10 64438 140
## 10 20-11 178058 964
## 11 20-12 302338 2776
## 12 21-01 187396 3930
## 13 21-02 123600 1600
## 14 21-03 160415 676
df_total<-as.data.frame(df_total)
str(df_total)#chek date format
## 'data.frame': 14 obs. of 3 variables:
## $ ym : chr "20-02" "20-03" "20-04" "20-05" ...
## $ ym_cases : int 12270 15031 30420 22148 31174 10852 13103 25325 64438 178058 ...
## $ ym_deaths: int 765 703 2936 2066 991 426 103 116 140 964 ...
df_total %>%
gt()
| ym | ym_cases | ym_deaths |
|---|---|---|
| 20-02 | 12270 | 765 |
| 20-03 | 15031 | 703 |
| 20-04 | 30420 | 2936 |
| 20-05 | 22148 | 2066 |
| 20-06 | 31174 | 991 |
| 20-07 | 10852 | 426 |
| 20-08 | 13103 | 103 |
| 20-09 | 25325 | 116 |
| 20-10 | 64438 | 140 |
| 20-11 | 178058 | 964 |
| 20-12 | 302338 | 2776 |
| 21-01 | 187396 | 3930 |
| 21-02 | 123600 | 1600 |
| 21-03 | 160415 | 676 |
df_total %>%
gt(rownames_to_stub = )
| ym | ym_cases | ym_deaths |
|---|---|---|
| 20-02 | 12270 | 765 |
| 20-03 | 15031 | 703 |
| 20-04 | 30420 | 2936 |
| 20-05 | 22148 | 2066 |
| 20-06 | 31174 | 991 |
| 20-07 | 10852 | 426 |
| 20-08 | 13103 | 103 |
| 20-09 | 25325 | 116 |
| 20-10 | 64438 | 140 |
| 20-11 | 178058 | 964 |
| 20-12 | 302338 | 2776 |
| 21-01 | 187396 | 3930 |
| 21-02 | 123600 | 1600 |
| 21-03 | 160415 | 676 |
#Grouped by months first
df1 <-nordic%>%
mutate(date = as.Date(date)) %>%
mutate(ym = format(date, '%y-%m')) %>%
group_by(ym,location) %>%
summarize(ym_cases = sum(new_cases),
ym_deaths=sum(new_deaths))
head(df1,14)
## # A tibble: 14 x 4
## # Groups: ym [3]
## ym location ym_cases ym_deaths
## <chr> <chr> <int> <int>
## 1 20-02 Denmark 10435 382
## 2 20-02 Finland 2 50
## 3 20-02 Iceland 453 29
## 4 20-02 Norway 1366 188
## 5 20-02 Sweden 14 116
## 6 20-03 Denmark 3036 138
## 7 20-03 Finland 1415 40
## 8 20-03 Iceland 1134 71
## 9 20-03 Norway 4626 57
## 10 20-03 Sweden 4820 397
## 11 20-04 Denmark 6317 362
## 12 20-04 Finland 3577 194
## 13 20-04 Iceland 662 8
## 14 20-04 Norway 3097 171
df1<-as.data.frame(df1)
#ym(df$ym )
#Grouping by country first
df2 <-nordic%>%
mutate(date = as.Date(date)) %>%
mutate(ym = format(date, '%Y-%m')) %>%
group_by(location,ym) %>%
summarize(ym_cases = sum(new_cases),
ym_deaths=sum(new_deaths),
rate=round(sum(new_deaths)/sum(new_cases),digits=2))
head(df2)
## # A tibble: 6 x 5
## # Groups: location [1]
## location ym ym_cases ym_deaths rate
## <chr> <chr> <int> <int> <dbl>
## 1 Denmark 2020-02 10435 382 0.04
## 2 Denmark 2020-03 3036 138 0.05
## 3 Denmark 2020-04 6317 362 0.06
## 4 Denmark 2020-05 2513 122 0.05
## 5 Denmark 2020-06 1099 31 0.03
## 6 Denmark 2020-07 1060 10 0.01
df2=as.data.frame(df2)
str(df2) #To check to ym is in character format
## 'data.frame': 70 obs. of 5 variables:
## $ location : chr "Denmark" "Denmark" "Denmark" "Denmark" ...
## $ ym : chr "2020-02" "2020-03" "2020-04" "2020-05" ...
## $ ym_cases : int 10435 3036 6317 2513 1099 1060 3382 11069 18384 34139 ...
## $ ym_deaths: int 382 138 362 122 31 10 9 26 71 116 ...
## $ rate : num 0.04 0.05 0.06 0.05 0.03 0.01 0 0 0 0 ...
#note that with as.Date requires a day, so, I'll set them to the first day of the year for this purpose
df2$ym<-as.Date(paste(df2$ym,"-01",sep=""))
df2
## location ym ym_cases ym_deaths rate
## 1 Denmark 2020-02-01 10435 382 0.04
## 2 Denmark 2020-03-01 3036 138 0.05
## 3 Denmark 2020-04-01 6317 362 0.06
## 4 Denmark 2020-05-01 2513 122 0.05
## 5 Denmark 2020-06-01 1099 31 0.03
## 6 Denmark 2020-07-01 1060 10 0.01
## 7 Denmark 2020-08-01 3382 9 0.00
## 8 Denmark 2020-09-01 11069 26 0.00
## 9 Denmark 2020-10-01 18384 71 0.00
## 10 Denmark 2020-11-01 34139 116 0.00
## 11 Denmark 2020-12-01 83114 461 0.01
## 12 Denmark 2021-01-01 35040 828 0.02
## 13 Denmark 2021-02-01 12727 236 0.02
## 14 Denmark 2021-03-01 16703 44 0.00
## 15 Finland 2020-02-01 2 50 25.00
## 16 Finland 2020-03-01 1415 40 0.03
## 17 Finland 2020-04-01 3577 194 0.05
## 18 Finland 2020-05-01 1864 109 0.06
## 19 Finland 2020-06-01 355 8 0.02
## 20 Finland 2020-07-01 218 1 0.00
## 21 Finland 2020-08-01 654 7 0.01
## 22 Finland 2020-09-01 1906 8 0.00
## 23 Finland 2020-10-01 6121 14 0.00
## 24 Finland 2020-11-01 8799 41 0.00
## 25 Finland 2020-12-01 11195 162 0.01
## 26 Finland 2021-01-01 9131 110 0.01
## 27 Finland 2021-02-01 12434 71 0.01
## 28 Finland 2021-03-01 15844 67 0.00
## 29 Iceland 2020-02-01 453 29 0.06
## 30 Iceland 2020-03-01 1134 71 0.06
## 31 Iceland 2020-04-01 662 8 0.01
## 32 Iceland 2020-05-01 9 0 0.00
## 33 Iceland 2020-06-01 18 0 0.00
## 34 Iceland 2020-07-01 61 0 0.00
## 35 Iceland 2020-08-01 222 0 0.00
## 36 Iceland 2020-09-01 621 0 0.00
## 37 Iceland 2020-10-01 2137 2 0.00
## 38 Iceland 2020-11-01 527 14 0.03
## 39 Iceland 2020-12-01 362 3 0.01
## 40 Iceland 2021-01-01 248 0 0.00
## 41 Iceland 2021-02-01 47 0 0.00
## 42 Iceland 2021-03-01 95 0 0.00
## 43 Norway 2020-02-01 1366 188 0.14
## 44 Norway 2020-03-01 4626 57 0.01
## 45 Norway 2020-04-01 3097 171 0.06
## 46 Norway 2020-05-01 702 26 0.04
## 47 Norway 2020-06-01 439 14 0.03
## 48 Norway 2020-07-01 361 5 0.01
## 49 Norway 2020-08-01 1542 9 0.01
## 50 Norway 2020-09-01 3245 10 0.00
## 51 Norway 2020-10-01 6304 8 0.00
## 52 Norway 2020-11-01 15819 50 0.00
## 53 Norway 2020-12-01 13417 104 0.01
## 54 Norway 2021-01-01 13399 128 0.01
## 55 Norway 2021-02-01 8040 58 0.01
## 56 Norway 2021-03-01 19098 34 0.00
## 57 Sweden 2020-02-01 14 116 8.29
## 58 Sweden 2020-03-01 4820 397 0.08
## 59 Sweden 2020-04-01 16767 2201 0.13
## 60 Sweden 2020-05-01 17060 1809 0.11
## 61 Sweden 2020-06-01 29263 938 0.03
## 62 Sweden 2020-07-01 9152 410 0.04
## 63 Sweden 2020-08-01 7303 78 0.01
## 64 Sweden 2020-09-01 8484 72 0.01
## 65 Sweden 2020-10-01 31492 45 0.00
## 66 Sweden 2020-11-01 118774 743 0.01
## 67 Sweden 2020-12-01 194250 2046 0.01
## 68 Sweden 2021-01-01 129578 2864 0.02
## 69 Sweden 2021-02-01 90352 1235 0.01
## 70 Sweden 2021-03-01 108675 531 0.00
df.2 <-nordic%>%
mutate(date = as.Date(date)) %>%
mutate(ym = format(date, '%Y-%m')) %>%
group_by(location,ym) %>%
summarize(ym_cases = sum(new_cases),
ym_deaths=sum(new_deaths),
rate=round(sum(new_deaths)/sum(new_cases),digits=2),
survival=round((1-rate),digits=2))
head(df.2)
## # A tibble: 6 x 6
## # Groups: location [1]
## location ym ym_cases ym_deaths rate survival
## <chr> <chr> <int> <int> <dbl> <dbl>
## 1 Denmark 2020-02 10435 382 0.04 0.96
## 2 Denmark 2020-03 3036 138 0.05 0.95
## 3 Denmark 2020-04 6317 362 0.06 0.94
## 4 Denmark 2020-05 2513 122 0.05 0.95
## 5 Denmark 2020-06 1099 31 0.03 0.97
## 6 Denmark 2020-07 1060 10 0.01 0.99
#survival=round((1-(sum(new_deaths)/sum(new_cases))),digits=2))
df.2=as.data.frame(df.2)
str(df.2) #To check to ym is in character format
## 'data.frame': 70 obs. of 6 variables:
## $ location : chr "Denmark" "Denmark" "Denmark" "Denmark" ...
## $ ym : chr "2020-02" "2020-03" "2020-04" "2020-05" ...
## $ ym_cases : int 10435 3036 6317 2513 1099 1060 3382 11069 18384 34139 ...
## $ ym_deaths: int 382 138 362 122 31 10 9 26 71 116 ...
## $ rate : num 0.04 0.05 0.06 0.05 0.03 0.01 0 0 0 0 ...
## $ survival : num 0.96 0.95 0.94 0.95 0.97 0.99 1 1 1 1 ...
#note that with as.Date requires a day, so, I'll set them to the first day of the year for this purpose
df.2$ym<-as.Date(paste(df.2$ym,"-01",sep=""))
#https://gt.rstudio.com/articles/gt-datasets.html
# Create a gt table based on a preprocessed `yearmonth from tdyr
#df3
df_wider1<-df2 %>%
pivot_wider(
names_from = ym, values_from = c(ym_cases, ym_deaths))
colnames(df_wider1)
## [1] "location" "rate" "ym_cases_2020-02-01"
## [4] "ym_cases_2020-03-01" "ym_cases_2020-04-01" "ym_cases_2020-05-01"
## [7] "ym_cases_2020-06-01" "ym_cases_2020-07-01" "ym_cases_2020-08-01"
## [10] "ym_cases_2020-09-01" "ym_cases_2020-10-01" "ym_cases_2020-11-01"
## [13] "ym_cases_2020-12-01" "ym_cases_2021-01-01" "ym_cases_2021-02-01"
## [16] "ym_cases_2021-03-01" "ym_deaths_2020-02-01" "ym_deaths_2020-03-01"
## [19] "ym_deaths_2020-04-01" "ym_deaths_2020-05-01" "ym_deaths_2020-06-01"
## [22] "ym_deaths_2020-07-01" "ym_deaths_2020-08-01" "ym_deaths_2020-09-01"
## [25] "ym_deaths_2020-10-01" "ym_deaths_2020-11-01" "ym_deaths_2020-12-01"
## [28] "ym_deaths_2021-01-01" "ym_deaths_2021-02-01" "ym_deaths_2021-03-01"
library(data.table)
df_wider<-setnames(df_wider1, old = c(
"ym_cases_2020-02-01", "ym_cases_2020-03-01", "ym_cases_2020-04-01",
"ym_cases_2020-05-01", "ym_cases_2020-06-01", "ym_cases_2020-07-01", "ym_cases_2020-08-01",
"ym_cases_2020-09-01", "ym_cases_2020-10-01", "ym_cases_2020-11-01", "ym_cases_2020-12-01",
"ym_cases_2021-01-01", "ym_cases_2021-02-01", "ym_cases_2021-03-01", "ym_deaths_2020-02-01",
"ym_deaths_2020-03-01", "ym_deaths_2020-04-01", "ym_deaths_2020-05-01", "ym_deaths_2020-06-01",
"ym_deaths_2020-07-01", "ym_deaths_2020-08-01", "ym_deaths_2020-09-01", "ym_deaths_2020-10-01",
"ym_deaths_2020-11-01", "ym_deaths_2020-12-01", "ym_deaths_2021-01-01", "ym_deaths_2021-02-01",
"ym_deaths_2021-03-01"),
new = c(
"Feb, 2020",
"Mar, 2020",
"Apr, 2020",
"May, 2020",
"Jun, 2020",
"Jul, 2020",
"Aug, 2020",
"Sep, 2020",
"Oct, 2020",
"Nov, 2020",
"Dec, 2020",
"Jan, 2021",
"Feb, 2021",
"Mar, 2021",
"Feb, 2020",
"Mar, 2020",
"Apr, 2020",
"May, 2020",
"Jun, 2020",
"Jul, 2020",
"Aug, 2020",
"Sep, 2020",
"Oct, 2020",
"Nov, 2020",
"Dec, 2020",
"Jan, 2021",
"Feb, 2021",
"Mar, 2021"))
#(df_wider)
DF=df2
names(DF)[names(DF) == "location"] <- "Country"
names(DF)[names(DF) == "ym"] <- "Date"
names(DF)[names(DF) == "ym_cases"] <- "Cases"
names(DF)[names(DF) == "ym_deaths"] <- "Deaths"
p=ggplot(data = DF, aes(x = Date, y = Cases, group=Country, colour=Country))+
geom_vline(xintercept=as.numeric(DF$Date[yday(DF$Date)==1]), colour="grey80") +
geom_line(size = 1.2, show.legend = T)+ scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
theme_bw()
p
p1=p+scale_y_continuous(labels = scales::comma)+theme(axis.text.x=element_text(size=8))+ labs(x = " ", y="", title = "COVID-19 cases in Nordic countries", subtitle = "Monthly amounts from 2020 to 2021",
caption = "Source: https://ourworldindata.org/, March 2021")+ theme(plot.caption = element_text(color = "gray", face = "italic"))
p1
p1
#unnecesary to set panel.background = element_blank()
p2<-p1+theme(panel.grid = element_blank(), axis.line=element_line(colour="gray"), axis.ticks.y=element_blank(),panel.border = element_blank())
#?element_line
p2
#color
p3=p2+scale_color_manual(values = c("#034e7b", "#3690c0", "#74a9cf", "#a6bddb", "#d8d6dd")) +theme(legend.position = "top", legend.direction = "horizontal", legend.title = element_blank())
This plot shows the sum of total cases reported in Nordic countries by month. The date range begins from February 2020 up to March 2021.A vertical line represents the beginning of 2021.
Here is an intercative version of the previous plot.
p=ggplot(data = DF, aes(x = Date, y = Deaths, group=Country, colour=Country))+
geom_vline(xintercept=as.numeric(DF$Date[yday(DF$Date)==1]), colour="grey80") +
geom_line(size = 1)+ scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
theme_bw()
p
p1=p+scale_y_continuous(labels = scales::comma)+theme(axis.text.x=element_text(size=8))+ labs(x = " ", y="", title = "COVID-19 death cases in Nordic countries", subtitle = "Monthly amounts from 2020 to 2021",
caption = "Source: https://ourworldindata.org/, March 2021")+ theme(plot.caption = element_text(color = "gray", face = "italic"))
p1
p1
#unnecesary to set panel.background = element_blank()
p2<-p1+theme(panel.grid = element_blank(), axis.line=element_line(colour="gray"), axis.ticks.y=element_blank(),panel.border = element_blank())
p2
#color
p3=p2+scale_color_manual(values = c("#034e7b", "#3690c0", "#74a9cf", "#a6bddb", "#d8d6dd")) +theme(legend.position = "top", legend.direction = "horizontal", legend.title = element_blank())
Another trend we can look at is the total number of Coronavirus deaths per Nordic country. Also displyed in a monthly basis. An interactive version is right below.
p3
library(plotly)
ggplotly(p3)
DF=df.2
names(DF)[names(DF) == "location"] <- "Country"
names(DF)[names(DF) == "ym"] <- "Date"
names(DF)[names(DF) == "ym_cases"] <- "Cases"
names(DF)[names(DF) == "ym_deaths"] <- "Deaths"
names(DF)[names(DF)=="rate"]<-"Rate"
names(DF)[names(DF)=="survival"]<-"Survival"
#DF$Survival=as.numeric(DF$Survival)
#DF[which(DF$Rate=='8.29'| DF$Rate=='25'),]
ind<-DF[which(DF$Rate=='8.29'| DF$Rate=='25'),] #rows: 15, 57
DF2<-DF[-c(15,57),]
p=ggplot(data = DF2, aes(x = Date, y = Survival, group=Country, colour=Country))+
geom_vline(xintercept=as.numeric(DF2$Date[yday(DF2$Date)==1]), colour="grey80") +
geom_line(size = 1)+ scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
theme_bw()
p
p1=p+scale_y_continuous(labels = scales::percent_format(accuracy=1)) +theme(axis.text.x=element_text(size=8))+ labs(x = " ", y="", title = "COVID-19 survival rate in Nordic countries", subtitle = "Monthly rates from 2020 to 2021",
caption = "Source: https://ourworldindata.org/, March 2021")+ theme(plot.caption = element_text(color = "gray", face = "italic"))
p1
#unnecesary to set panel.background = element_blank()
p2<-p1+theme(panel.grid = element_blank(), axis.line=element_line(colour="gray"), axis.ticks.y=element_blank(),panel.border = element_blank())
p2
#color
p3=p2+scale_color_manual(values = c("#034e7b", "#3690c0", "#74a9cf", "#a6bddb", "#d8d6dd")) +theme(legend.position = "top", legend.direction = "horizontal", legend.title = element_blank())
More over, there is an important relationship to stand out: survival rate. This graph shows the percentage of cases that successfully recover. In Nordic countries, this rate is very high. Overtime, they converge to 100%.
p3
library(plotly)
ggplotly(p3)
The change in the trend mentioned above is displayed considering the rate at the begining of the pandemic vs the most recent data.
library(scales)
newdata <- subset(DF2, Date=="2020-03-01" | Date=='2021-03-01',
select=c(Country,Date,Cases,Deaths, Rate, Survival),)
#newdata
p<-ggplot(data = newdata, aes(x = Date, y = Survival, group = Country)) +
geom_line(aes(color = Country), size = 1) +
scale_x_date(date_labels="%b,%Y")
p
p1=p+scale_y_continuous(labels = scales::percent_format(accuracy=1)) +theme(axis.text.x=element_text(size=8))+ labs(x = " ", y="", title = "COVID-19 survival rate in Nordic countries", subtitle = "A year comparison (From March 2020 to March 2021)",
caption = "Source: https://ourworldindata.org/, March 2021")+ theme(plot.caption = element_text(color = "gray", face = "italic"))
p1
p1
#unnecesary to set panel.background = element_blank()
p2<-p1+theme(panel.grid = element_blank(), axis.line=element_line(colour="gray"), axis.ticks.y=element_blank(),panel.border = element_blank())
p2
#color
p3=p2+scale_color_manual(values = c("#034e7b", "#3690c0", "#74a9cf", "#a6bddb", "#d8d6dd")) +theme(legend.position = "top", legend.direction = "horizontal", legend.title = element_blank())
p3