Init
library(pacman)
p_load(kirkegaard, readxl, lubridate)
theme_set(theme_bw())
Data
#read prior years data
#https://www.statistikbanken.dk/statbank5a/SelectVarVal/Define.asp?MainTable=DODDAG&TabStrip=Select&PLanguage=0&FF=20
dk_mortality_old = read_excel("data/2020414121317280118157DODDAG.xlsx", skip = 2)
## New names:
## * `` -> ...1
## * `` -> ...2
#https://www.statistikbanken.dk/dodc1
dk_mortality_2020 = read_excel("data/2020417132515280428596DODC1 2020.xlsx", range = "A2:B105") %>%
set_colnames(c("date_str", "deaths"))
## New names:
## * `` -> ...2
#convert to long
#and fix date
dk = dk_mortality_old %>%
gather(key = year, value = deaths, `2007`:`2019`) %>%
{
#names
colnames(.)[1:2] = c("month", "day")
#fill in month
fill(., month) %>%
mutate(
deaths = deaths %>% as.numeric(),
day = str_replace(day, "\\.", "") %>% as.numeric(),
date = str_glue("{year} {month} {day}"),
#as date format
#warnings are due to impossible values
date = date %>% parse_date(format = "%Y %B %d", locale = locale("da"))
) %>%
filter(!is.na(date))
} %>%
#join new data too
bind_rows(
dk_mortality_2020 %>%
mutate(
date = parse_date(date_str, format = "%YM%mD%d")
)
) %>%
#ensure we have year, month, weeks
mutate(
day = day(date),
week = week(date),
week_year = floor_date(date, unit="week") %>% year(),
week_year_combined = week + "_" + week_year,
year = year(date),
month = month(date)
) %>%
#is week complete?
plyr::ddply(c("week", "year"), function(dd) {
dd %>%
mutate(
week_days = nrow(dd),
week_frac = week_days / 7
)
}) %>%
#sort
arrange(date) %>%
as_tibble()
## Warning in function_list[[k]](value): NAs introduced by coercion
## Warning: 88 parsing failures.
## row col expected actual
## 60 -- valid date 2007 Februar 29
## 61 -- valid date 2007 Februar 30
## 62 -- valid date 2007 Februar 31
## 124 -- valid date 2007 April 31
## 186 -- valid date 2007 Juni 31
## ... ... .......... ...............
## See problems(...) for more details.
#aggregate
dk_week = plyr::ddply(dk, c("year", "week"), function(dd) {
tibble(
date_start = min(dd$date),
date_end = max(dd$date),
week_days = nrow(dd),
week_frac = week_days / 7,
deaths = sum(dd$deaths),
deaths_adj = deaths / week_frac
)
})
Plots
#daily
dk %>%
ggplot(aes(date, deaths, color = ordered(year))) +
geom_line(alpha = .3) +
# geom_smooth(aes(color = NULL), span = .1, method = "loess", color = "black", alpha = 0)
#alpha does not work as intended
stat_smooth(geom='line', alpha=0.7, method = "loess", mapping = aes(color = NULL), span = .005) +
scale_x_date(date_breaks = "year", labels = year) +
scale_color_discrete(guide = F) +
ggtitle("Daily deaths in Denmark, 2007-present",
"Smoothed line by LOESS")
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/dk_daily_deaths.png")
## `geom_smooth()` using formula 'y ~ x'
#weekly deaths
dk_week %>%
ggplot(aes(week, deaths_adj, color = ordered(year))) +
geom_line(alpha = .7) +
#special color choice
scale_color_manual("Year", values = c(rainbow(dk_week$year %>% unique %>% length %>% subtract(1)), "black")) +
geom_line(data = dk_week %>% filter(year == 2020), size = 1) +
ggtitle("Weekly deaths in Denmark, 2007-present",
"Incomplete weeks adjusted to full week by linear extrapolation")

GG_save("figs/dk_weekly_deaths.png")