Init

library(pacman)
p_load(kirkegaard, readxl, lubridate)
theme_set(theme_bw())

Data

#read prior years data
#https://www.statistikbanken.dk/statbank5a/SelectVarVal/Define.asp?MainTable=DODDAG&TabStrip=Select&PLanguage=0&FF=20
dk_mortality_old = read_excel("data/2020414121317280118157DODDAG.xlsx", skip = 2)
## New names:
## * `` -> ...1
## * `` -> ...2
#https://www.statistikbanken.dk/dodc1
dk_mortality_2020 = read_excel("data/2020417132515280428596DODC1 2020.xlsx", range = "A2:B105") %>% 
  set_colnames(c("date_str", "deaths"))
## New names:
## * `` -> ...2
#convert to long
#and fix date
dk = dk_mortality_old %>% 
  gather(key = year, value = deaths, `2007`:`2019`) %>% 
  {
    #names
    colnames(.)[1:2] = c("month", "day")
    
    #fill in month
    fill(., month) %>% 
      mutate(
        deaths = deaths %>% as.numeric(),
        day = str_replace(day, "\\.", "") %>% as.numeric(),
        date = str_glue("{year} {month} {day}"),
        #as date format
        #warnings are due to impossible values
        date = date %>% parse_date(format = "%Y %B %d", locale = locale("da"))
      ) %>% 
      filter(!is.na(date))
  } %>% 
  #join new data too
  bind_rows(
    dk_mortality_2020 %>% 
      mutate(
        date = parse_date(date_str, format = "%YM%mD%d")
      )
  ) %>% 
  #ensure we have year, month, weeks
  mutate(
    day = day(date),
    week = week(date),
    week_year = floor_date(date, unit="week") %>% year(),
    week_year_combined = week + "_" + week_year,
    year = year(date),
    month = month(date)
  ) %>% 
  #is week complete?
  plyr::ddply(c("week", "year"), function(dd) {
    dd %>% 
      mutate(
        week_days = nrow(dd),
        week_frac = week_days / 7
      )
  }) %>% 
  #sort
  arrange(date) %>% 
  as_tibble()
## Warning in function_list[[k]](value): NAs introduced by coercion
## Warning: 88 parsing failures.
## row col   expected          actual
##  60  -- valid date 2007 Februar 29
##  61  -- valid date 2007 Februar 30
##  62  -- valid date 2007 Februar 31
## 124  -- valid date 2007 April 31  
## 186  -- valid date 2007 Juni 31   
## ... ... .......... ...............
## See problems(...) for more details.
#aggregate
dk_week = plyr::ddply(dk, c("year", "week"), function(dd) {
  tibble(
    date_start = min(dd$date),
    date_end = max(dd$date),
    week_days = nrow(dd),
    week_frac = week_days / 7,
    deaths = sum(dd$deaths),
    deaths_adj = deaths / week_frac
  )
})

Plots

#daily
dk %>% 
  ggplot(aes(date, deaths, color = ordered(year))) +
  geom_line(alpha = .3) +
  # geom_smooth(aes(color = NULL), span = .1, method = "loess", color = "black", alpha = 0)
  #alpha does not work as intended
  stat_smooth(geom='line', alpha=0.7, method = "loess", mapping = aes(color = NULL), span = .005) +
  scale_x_date(date_breaks = "year", labels = year) +
  scale_color_discrete(guide = F) +
  ggtitle("Daily deaths in Denmark, 2007-present",
          "Smoothed line by LOESS")
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/dk_daily_deaths.png")
## `geom_smooth()` using formula 'y ~ x'
#weekly deaths
dk_week %>% 
  ggplot(aes(week, deaths_adj, color = ordered(year))) +
  geom_line(alpha = .7) +
  #special color choice
  scale_color_manual("Year", values = c(rainbow(dk_week$year %>% unique %>% length %>% subtract(1)), "black")) +
  geom_line(data = dk_week %>% filter(year == 2020), size = 1) +
  ggtitle("Weekly deaths in Denmark, 2007-present",
          "Incomplete weeks adjusted to full week by linear extrapolation")

GG_save("figs/dk_weekly_deaths.png")