Data

Danish Coronavirus related data:

Norwegian data:

Init

options(digits = 2)
library(pacman)
p_load(kirkegaard, googlesheets4, lubridate, ggrepel, jsonlite, rvest)
theme_set(theme_classic())

#convenience
#to percent label
format_value = function(x) (x*100) %>% format_digits(digits = 1) %>% str_c("%")

format_value(c(0, .05, .50, 1, .111, -.5))
## [1] "0.0%"   "5.0%"   "50.0%"  "100.0%" "11.1%"  "-50.0%"
#log10 convert
to_log10RR = function(x) (x + 1) %>% log10()

c(0, .05, .50, 1, .111, -.5) %>% to_log10RR()
## [1]  0.000  0.021  0.176  0.301  0.046 -0.301
#from log10 RR to percent
format_log10 = function(x) (10^x) %>% subtract(1) %>% format_value()

c(0, .05, .50, 1, .111, -.5) %>% to_log10RR() %>% format_log10()
## [1] "0.0%"   "5.0%"   "50.0%"  "100.0%" "11.1%"  "-50.0%"

Data

#read sheet with manually extracted data
# googlesheets4::sheets_auth("the.dfx@gmail.com")
# d_orig = read_sheet("https://docs.google.com/spreadsheets/d/1o-2vsjaAsPzypZCC-rXplY8WVBO8kMsCpo6Or4Ek_xI/edit#gid=0", sheet = 1)

#scrape danish data
danish_raw = read_html("https://www.sst.dk/da/corona/tal-og-overvaagning")

#long format
danish_long = danish_raw %>% 
  html_table() %>% 
  .[5:7] %>% 
  map2_df(.y = c("hospitalized", "intensive", "respirator"), function(x, y) {
    #fix column names
    colnames = c("Date", "Region Nordjylland", "Region Midtjylland", "Region Syddanmark", 
"Region Hovedstaden", "Region Sjælland", "Hele landet", "Type")
    
    x[-c(1:2), ] %>% 
      mutate(
      Type = y
    ) %>% 
      set_colnames(colnames)
  })

#recode
d = danish_long %>% 
  #drop NA
  miss_filter() %>% 
  #gather to long format
  gather(key = region, value = number, `Region Nordjylland`:`Hele landet`) %>% 
  #fix variables
  mutate(
    number = number %>% str_replace_all("[✱ ]*", "") %>% as.numeric(),
    Date = (Date + " 2020") %>% parse_date(format = "%d. %B %Y", locale = locale("da"))
    )

#add growth values
#to do so, we need subgroup
d = d %>% 
  #sort by date first
  arrange(Date) %>% 
  #then subgroup to each type and region
  plyr::ddply(c("region", "Type"), function(dd) {
    #add growth
    dd %>% 
      mutate(
        change = number - lag(number),
        growth = change / lag(number),
        log_RR = to_log10RR(growth),
        country = "Denmark"
      )
  })

#norway
#get JSON
norway_raw = fromJSON(httr::GET("https://redutv-api.vg.no/corona/v1/areas/country/reports?include=hospitals") %>% as.character())

#mutate
norway = norway_raw$hospitals$timeseries$total %>% 
  as_tibble() %>% 
  select(date, hospitalized, respiratory) %>% 
  gather(key = Type, value = number, -date) %>% 
  arrange(date) %>% 
  #change values
  plyr::ddply("Type", function(dd) {
      dd %>% mutate(
        change = number - lag(number),
        growth = change / lag(number),
        log_RR = to_log10RR(growth)
      )
  }) %>% 
  mutate(
    date = date %>% ymd(),
    country = "Norway",
    Type = Type %>% mapvalues(from = "respiratory", to = "respirator")
  ) %>% rename(
    Date = date
  )

#join data
merged = d %>% 
  filter(region == "Hele landet") %>% 
  select(-region) %>% 
  bind_rows(
    norway
  )

Plots

Denmark only

#last update
(dk_last_update = format(max(d$Date),'%A, %d. %B %Y'))
## [1] "Monday, 06. April 2020"
#counts
d %>% 
  filter(region == "Hele landet") %>% 
  #plot
  ggplot(aes(Date, number, color = Type)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_path() +
  geom_text_repel(aes(label = number)) +
  scale_y_continuous("Count of persons") +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 2)) +
  ggtitle("Danish🇩🇰 COVID-19 hospital data: number of hospitalized by type of care",
          str_glue("Data updated as of {dk_last_update}"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_text_repel).

GG_save("figs/dk_counts.png")
## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_text_repel).
#changes
d %>% 
  filter(region == "Hele landet") %>% 
  #plot
  ggplot(aes(Date, change, color = Type)) +
  scale_y_continuous("Change relative to day before") +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 2)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_path() +
  geom_text_repel(aes(label = change)) +
  ggtitle("Danish🇩🇰 COVID-19 hospital data: change in number of hospitalized by type of care",
          str_glue("Data updated as of {dk_last_update}"))
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_text_repel).

GG_save("figs/dk_change.png")
## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_text_repel).
#growth
d %>% 
  filter(region == "Hele landet", !is.na(log_RR)) %>% 
  #plot
  ggplot(aes(Date, log_RR, color = Type)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_path() +
  geom_text_repel(aes(label = growth %>% format_value()),
                  size = 3,
                  segment.color = "black",
                  segment.alpha = .3) +
  
  scale_y_continuous("Percent change relative to day before",
                     labels = format_log10,
                     breaks = c(-.10, 0, .10, .5, 1, 3) %>% to_log10RR(),
                     limits = c(-.10, NA) %>% to_log10RR()) +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
  ggtitle("Danish🇩🇰 COVID-19 hospital data: growth in % in number of hospitalized by type of care",
          str_glue("Data updated as of {dk_last_update}"))
## Warning: Removed 1 rows containing missing values (geom_text_repel).

GG_save("figs/dk_growth.png")
## Warning: Removed 1 rows containing missing values (geom_text_repel).

Norway only

#last update
(no_last_update = format(max(norway$Date),'%A, %d. %B %Y'))
## [1] "Monday, 06. April 2020"
#counts
norway %>% 
  filter(!is.na(number)) %>% 
  #plot
  ggplot(aes(Date, number, color = Type)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_path() +
  geom_text_repel(aes(label = number)) +
  scale_y_continuous("Count of persons") +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
  ggtitle("Norwegian🇳🇴 COVID-19 hospital data: number of hospitalized by type of care",
          str_glue("Data updated as of {no_last_update}"))

GG_save("figs/no_counts.png")

#changes
norway %>% 
  filter(!is.na(change)) %>% 
  #plot
  ggplot(aes(Date, change, color = Type)) +
  scale_y_continuous("Change relative to day before") +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_path() +
  geom_text_repel(aes(label = change)) +
  ggtitle("Norwegian🇳🇴 COVID-19 hospital data: change in number of hospitalized by type of care",
          str_glue("Data updated as of {no_last_update}"))

GG_save("figs/no_change.png")

#growth
norway %>% 
  filter(!is.na(log_RR)) %>% 
  #plot
  ggplot(aes(Date, log_RR, color = Type)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_path() +
  geom_text_repel(aes(label = growth %>% format_value()),
                  size = 3,
                  segment.color = "black",
                  segment.alpha = .3) +
  
  scale_y_continuous("Percent change relative to day before",
                     labels = format_log10,
                     breaks = c(-.10, 0, .10, .5, 1, 3) %>% to_log10RR(),
                     limits = c(-.10, NA) %>% to_log10RR()) +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
  theme_classic() +
  ggtitle("Norwegian🇳🇴 COVID-19 hospital data: growth in % in number of hospitalized by type of care",
          str_glue("Data updated as of {no_last_update}"))

GG_save("figs/no_growth.png")

Combined

#last update
(last_update = format(max(merged$Date),'%A, %d. %B %Y'))
## [1] "Monday, 06. April 2020"
#counts
merged %>% 
  filter(!is.na(number), Type != "intensive") %>% 
  #plot
  ggplot(aes(Date, number, color = country)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_path() +
  geom_text_repel(aes(label = number),
                  size = 3,
                  segment.color = "black",
                  segment.alpha = .3) +
  scale_y_continuous("Count of persons") +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
  facet_wrap("Type", nrow = 2, scales = "free_y") +
  ggtitle("Danish🇩🇰 & Norwegian🇳🇴 COVID-19 hospital data: number of hospitalized by type of care",
          str_glue("Data updated as of {last_update}"))

GG_save("figs/counts.png")

#changes
merged %>% 
  filter(!is.na(change), Type != "intensive") %>% 
  #plot
  ggplot(aes(Date, change, color = country)) +
  scale_y_continuous("Change relative to day before") +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_path() +
  geom_text_repel(aes(label = change),
                  size = 3,
                  segment.color = "black",
                  segment.alpha = .3) +
  facet_wrap("Type", nrow = 2, scales = "free_y") +
  theme_classic() +
  ggtitle("Danish🇩🇰 & Norwegian🇳🇴 COVID-19 hospital data: change in number of hospitalized by type of care",
          str_glue("Data updated as of {last_update}"))

GG_save("figs/change.png")

#plot the log10 RR's, then relabel the breaks to fit
merged %>% 
  filter(!is.na(log_RR), Type != "intensive") %>% 
  #plot
  ggplot(aes(Date, log_RR, color = country)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_line() +
  geom_text_repel(aes(label = growth %>% format_value()),
                  size = 3,
                  segment.color = "black",
                  segment.alpha = .3) +
  scale_y_continuous("Percent change relative to day before",
                     labels = format_log10,
                     breaks = c(-.10, 0, .10, .5, 1, 3) %>% to_log10RR(),
                     limits = c(NA, NA) %>% to_log10RR()) +
  scale_x_date(date_breaks = "1 day", guide = guide_axis(n.dodge = 3)) +
  facet_wrap("Type", nrow = 2, scales = "free_y") +
  ggtitle("Danish🇩🇰 & Norwegian🇳🇴 COVID-19 hospital data: growth in % in number of hospitalized by type of care",
          str_glue("Data updated as of {last_update}"))

GG_save("figs/growth.png")

Meta

#versions
write_sessioninfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rvest_0.3.5         xml2_1.3.0          jsonlite_1.6.1     
##  [4] ggrepel_0.8.2       lubridate_1.7.4     googlesheets4_0.1.1
##  [7] kirkegaard_2018.05  metafor_2.4-0       Matrix_1.2-18      
## [10] psych_1.9.12.31     magrittr_1.5        assertthat_0.2.1   
## [13] weights_1.0.1       mice_3.8.0          gdata_2.18.0       
## [16] Hmisc_4.4-0         Formula_1.2-3       survival_3.1-11    
## [19] lattice_0.20-40     forcats_0.5.0       stringr_1.4.0      
## [22] dplyr_0.8.5         purrr_0.3.3         readr_1.3.1        
## [25] tidyr_1.0.2         tibble_3.0.0        ggplot2_3.3.0      
## [28] tidyverse_1.3.0     pacman_0.5.1       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-145        fs_1.4.1            RColorBrewer_1.1-2 
##  [4] httr_1.4.1          tools_3.6.3         backports_1.1.5    
##  [7] R6_2.4.1            rpart_4.1-15        DBI_1.1.0          
## [10] colorspace_1.4-1    nnet_7.3-13         withr_2.1.2        
## [13] tidyselect_1.0.0    gridExtra_2.3       mnormt_1.5-6       
## [16] curl_4.3            compiler_3.6.3      cli_2.0.2          
## [19] htmlTable_1.13.3    labeling_0.3        scales_1.1.0       
## [22] checkmate_2.0.0     digest_0.6.25       foreign_0.8-76     
## [25] rmarkdown_2.1       base64enc_0.1-3     jpeg_0.1-8.1       
## [28] pkgconfig_2.0.3     htmltools_0.4.0     dbplyr_1.4.2       
## [31] htmlwidgets_1.5.1   rlang_0.4.5         readxl_1.3.1       
## [34] rstudioapi_0.11     farver_2.0.3        generics_0.0.2     
## [37] gtools_3.8.2        acepack_1.4.1       Rcpp_1.0.4         
## [40] munsell_0.5.0       fansi_0.4.1         lifecycle_0.2.0    
## [43] stringi_1.4.6       yaml_2.2.1          plyr_1.8.6         
## [46] grid_3.6.3          parallel_3.6.3      crayon_1.3.4       
## [49] haven_2.2.0         splines_3.6.3       hms_0.5.3          
## [52] knitr_1.28          pillar_1.4.3        reprex_0.3.0       
## [55] glue_1.4.0          evaluate_0.14       latticeExtra_0.6-29
## [58] data.table_1.12.8   modelr_0.1.6        selectr_0.4-2      
## [61] png_0.1-7           vctrs_0.2.4         cellranger_1.1.0   
## [64] gtable_0.3.0        xfun_0.12           broom_0.5.5        
## [67] cluster_2.1.0       ellipsis_0.3.0
#write data out
write_csv(merged, "data/merge_data.csv", na = "")
write_csv(d, "data/dk_data.csv", na = "")
write_csv(norway, "data/no_data.csv", na = "")