Danish Coronavirus related data:
Norwegian data:
options(digits = 2)
library(pacman)
p_load(kirkegaard, googlesheets4, lubridate, ggrepel, jsonlite, rvest)
theme_set(theme_classic())
#convenience
#to percent label
format_value = function(x) (x*100) %>% format_digits(digits = 1) %>% str_c("%")
format_value(c(0, .05, .50, 1, .111, -.5))
## [1] "0.0%" "5.0%" "50.0%" "100.0%" "11.1%" "-50.0%"
#log10 convert
to_log10RR = function(x) (x + 1) %>% log10()
c(0, .05, .50, 1, .111, -.5) %>% to_log10RR()
## [1] 0.000 0.021 0.176 0.301 0.046 -0.301
#from log10 RR to percent
format_log10 = function(x) (10^x) %>% subtract(1) %>% format_value()
c(0, .05, .50, 1, .111, -.5) %>% to_log10RR() %>% format_log10()
## [1] "0.0%" "5.0%" "50.0%" "100.0%" "11.1%" "-50.0%"
#read sheet with manually extracted data
# googlesheets4::sheets_auth("the.dfx@gmail.com")
# d_orig = read_sheet("https://docs.google.com/spreadsheets/d/1o-2vsjaAsPzypZCC-rXplY8WVBO8kMsCpo6Or4Ek_xI/edit#gid=0", sheet = 1)
#scrape danish data
danish_raw = read_html("https://www.sst.dk/da/corona/tal-og-overvaagning")
#long format
danish_long = danish_raw %>%
html_table() %>%
.[5:7] %>%
map2_df(.y = c("hospitalized", "intensive", "respirator"), function(x, y) {
#fix column names
colnames = c("Date", "Region Nordjylland", "Region Midtjylland", "Region Syddanmark",
"Region Hovedstaden", "Region Sjælland", "Hele landet", "Type")
x[-c(1:2), ] %>%
mutate(
Type = y
) %>%
set_colnames(colnames)
})
#recode
d = danish_long %>%
#drop NA
miss_filter() %>%
#gather to long format
gather(key = region, value = number, `Region Nordjylland`:`Hele landet`) %>%
#fix variables
mutate(
number = number %>% str_replace_all("[✱ ]*", "") %>% as.numeric(),
Date = (Date + " 2020") %>% parse_date(format = "%d. %B %Y", locale = locale("da"))
)
#add growth values
#to do so, we need subgroup
d = d %>%
#sort by date first
arrange(Date) %>%
#then subgroup to each type and region
plyr::ddply(c("region", "Type"), function(dd) {
#add growth
dd %>%
mutate(
change = number - lag(number),
growth = change / lag(number),
log_RR = to_log10RR(growth),
country = "Denmark"
)
})
#norway
#get JSON
norway_raw = fromJSON(httr::GET("https://redutv-api.vg.no/corona/v1/areas/country/reports?include=hospitals") %>% as.character())
#mutate
norway = norway_raw$hospitals$timeseries$total %>%
as_tibble() %>%
select(date, hospitalized, respiratory) %>%
gather(key = Type, value = number, -date) %>%
arrange(date) %>%
#change values
plyr::ddply("Type", function(dd) {
dd %>% mutate(
change = number - lag(number),
growth = change / lag(number),
log_RR = to_log10RR(growth)
)
}) %>%
mutate(
date = date %>% ymd(),
country = "Norway",
Type = Type %>% mapvalues(from = "respiratory", to = "respirator")
) %>% rename(
Date = date
)
#join data
merged = d %>%
filter(region == "Hele landet") %>%
select(-region) %>%
bind_rows(
norway
)
#last update
(dk_last_update = format(max(d$Date),'%A, %d. %B %Y'))
## [1] "Monday, 06. April 2020"
#counts
d %>%
filter(region == "Hele landet") %>%
#plot
ggplot(aes(Date, number, color = Type)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_path() +
geom_text_repel(aes(label = number)) +
scale_y_continuous("Count of persons") +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 2)) +
ggtitle("Danish🇩🇰 COVID-19 hospital data: number of hospitalized by type of care",
str_glue("Data updated as of {dk_last_update}"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
GG_save("figs/dk_counts.png")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
#changes
d %>%
filter(region == "Hele landet") %>%
#plot
ggplot(aes(Date, change, color = Type)) +
scale_y_continuous("Change relative to day before") +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 2)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_path() +
geom_text_repel(aes(label = change)) +
ggtitle("Danish🇩🇰 COVID-19 hospital data: change in number of hospitalized by type of care",
str_glue("Data updated as of {dk_last_update}"))
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
GG_save("figs/dk_change.png")
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
#growth
d %>%
filter(region == "Hele landet", !is.na(log_RR)) %>%
#plot
ggplot(aes(Date, log_RR, color = Type)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_path() +
geom_text_repel(aes(label = growth %>% format_value()),
size = 3,
segment.color = "black",
segment.alpha = .3) +
scale_y_continuous("Percent change relative to day before",
labels = format_log10,
breaks = c(-.10, 0, .10, .5, 1, 3) %>% to_log10RR(),
limits = c(-.10, NA) %>% to_log10RR()) +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
ggtitle("Danish🇩🇰 COVID-19 hospital data: growth in % in number of hospitalized by type of care",
str_glue("Data updated as of {dk_last_update}"))
## Warning: Removed 1 rows containing missing values (geom_text_repel).
GG_save("figs/dk_growth.png")
## Warning: Removed 1 rows containing missing values (geom_text_repel).
#last update
(no_last_update = format(max(norway$Date),'%A, %d. %B %Y'))
## [1] "Monday, 06. April 2020"
#counts
norway %>%
filter(!is.na(number)) %>%
#plot
ggplot(aes(Date, number, color = Type)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_path() +
geom_text_repel(aes(label = number)) +
scale_y_continuous("Count of persons") +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
ggtitle("Norwegian🇳🇴 COVID-19 hospital data: number of hospitalized by type of care",
str_glue("Data updated as of {no_last_update}"))
GG_save("figs/no_counts.png")
#changes
norway %>%
filter(!is.na(change)) %>%
#plot
ggplot(aes(Date, change, color = Type)) +
scale_y_continuous("Change relative to day before") +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_path() +
geom_text_repel(aes(label = change)) +
ggtitle("Norwegian🇳🇴 COVID-19 hospital data: change in number of hospitalized by type of care",
str_glue("Data updated as of {no_last_update}"))
GG_save("figs/no_change.png")
#growth
norway %>%
filter(!is.na(log_RR)) %>%
#plot
ggplot(aes(Date, log_RR, color = Type)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_path() +
geom_text_repel(aes(label = growth %>% format_value()),
size = 3,
segment.color = "black",
segment.alpha = .3) +
scale_y_continuous("Percent change relative to day before",
labels = format_log10,
breaks = c(-.10, 0, .10, .5, 1, 3) %>% to_log10RR(),
limits = c(-.10, NA) %>% to_log10RR()) +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
theme_classic() +
ggtitle("Norwegian🇳🇴 COVID-19 hospital data: growth in % in number of hospitalized by type of care",
str_glue("Data updated as of {no_last_update}"))
GG_save("figs/no_growth.png")
#last update
(last_update = format(max(merged$Date),'%A, %d. %B %Y'))
## [1] "Monday, 06. April 2020"
#counts
merged %>%
filter(!is.na(number), Type != "intensive") %>%
#plot
ggplot(aes(Date, number, color = country)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_path() +
geom_text_repel(aes(label = number),
size = 3,
segment.color = "black",
segment.alpha = .3) +
scale_y_continuous("Count of persons") +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
facet_wrap("Type", nrow = 2, scales = "free_y") +
ggtitle("Danish🇩🇰 & Norwegian🇳🇴 COVID-19 hospital data: number of hospitalized by type of care",
str_glue("Data updated as of {last_update}"))
GG_save("figs/counts.png")
#changes
merged %>%
filter(!is.na(change), Type != "intensive") %>%
#plot
ggplot(aes(Date, change, color = country)) +
scale_y_continuous("Change relative to day before") +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_path() +
geom_text_repel(aes(label = change),
size = 3,
segment.color = "black",
segment.alpha = .3) +
facet_wrap("Type", nrow = 2, scales = "free_y") +
theme_classic() +
ggtitle("Danish🇩🇰 & Norwegian🇳🇴 COVID-19 hospital data: change in number of hospitalized by type of care",
str_glue("Data updated as of {last_update}"))
GG_save("figs/change.png")
#plot the log10 RR's, then relabel the breaks to fit
merged %>%
filter(!is.na(log_RR), Type != "intensive") %>%
#plot
ggplot(aes(Date, log_RR, color = country)) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_line() +
geom_text_repel(aes(label = growth %>% format_value()),
size = 3,
segment.color = "black",
segment.alpha = .3) +
scale_y_continuous("Percent change relative to day before",
labels = format_log10,
breaks = c(-.10, 0, .10, .5, 1, 3) %>% to_log10RR(),
limits = c(NA, NA) %>% to_log10RR()) +
scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
facet_wrap("Type", nrow = 2, scales = "free_y") +
ggtitle("Danish🇩🇰 & Norwegian🇳🇴 COVID-19 hospital data: growth in % in number of hospitalized by type of care",
str_glue("Data updated as of {last_update}"))
GG_save("figs/growth.png")
#versions
write_sessioninfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rvest_0.3.5 xml2_1.3.0 jsonlite_1.6.1
## [4] ggrepel_0.8.2 lubridate_1.7.4 googlesheets4_0.1.1
## [7] kirkegaard_2018.05 metafor_2.4-0 Matrix_1.2-18
## [10] psych_1.9.12.31 magrittr_1.5 assertthat_0.2.1
## [13] weights_1.0.1 mice_3.8.0 gdata_2.18.0
## [16] Hmisc_4.4-0 Formula_1.2-3 survival_3.1-11
## [19] lattice_0.20-40 forcats_0.5.0 stringr_1.4.0
## [22] dplyr_0.8.5 purrr_0.3.3 readr_1.3.1
## [25] tidyr_1.0.2 tibble_3.0.0 ggplot2_3.3.0
## [28] tidyverse_1.3.0 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-145 fs_1.4.1 RColorBrewer_1.1-2
## [4] httr_1.4.1 tools_3.6.3 backports_1.1.5
## [7] R6_2.4.1 rpart_4.1-15 DBI_1.1.0
## [10] colorspace_1.4-1 nnet_7.3-13 withr_2.1.2
## [13] tidyselect_1.0.0 gridExtra_2.3 mnormt_1.5-6
## [16] curl_4.3 compiler_3.6.3 cli_2.0.2
## [19] htmlTable_1.13.3 labeling_0.3 scales_1.1.0
## [22] checkmate_2.0.0 digest_0.6.25 foreign_0.8-76
## [25] rmarkdown_2.1 base64enc_0.1-3 jpeg_0.1-8.1
## [28] pkgconfig_2.0.3 htmltools_0.4.0 dbplyr_1.4.2
## [31] htmlwidgets_1.5.1 rlang_0.4.5 readxl_1.3.1
## [34] rstudioapi_0.11 farver_2.0.3 generics_0.0.2
## [37] gtools_3.8.2 acepack_1.4.1 Rcpp_1.0.4
## [40] munsell_0.5.0 fansi_0.4.1 lifecycle_0.2.0
## [43] stringi_1.4.6 yaml_2.2.1 plyr_1.8.6
## [46] grid_3.6.3 parallel_3.6.3 crayon_1.3.4
## [49] haven_2.2.0 splines_3.6.3 hms_0.5.3
## [52] knitr_1.28 pillar_1.4.3 reprex_0.3.0
## [55] glue_1.4.0 evaluate_0.14 latticeExtra_0.6-29
## [58] data.table_1.12.8 modelr_0.1.6 selectr_0.4-2
## [61] png_0.1-7 vctrs_0.2.4 cellranger_1.1.0
## [64] gtable_0.3.0 xfun_0.12 broom_0.5.5
## [67] cluster_2.1.0 ellipsis_0.3.0
#write data out
write_csv(merged, "data/merge_data.csv", na = "")
write_csv(d, "data/dk_data.csv", na = "")
write_csv(norway, "data/no_data.csv", na = "")