library(readr)
library(dplyr)
library(ggpubr)
library(lubridate)
library(mgcv)
Sudden weather changes were shown to have effect on mortality in Czechia. The authors found strong effect of atmospheric front passages on mortality in case of cold fronts in summer season. They observe mortality displacement effect (increased mortality before the passage of cold front and reduced mortality after the passage of cold front). This document shows similar analysis using daily road accident counts and data about passage of atmospheric fronts.
Data about atmospheric front passing in Prague are available on the web of Czech Hydrometeorological Institute and were downloaded using web crawler written in python.
head(fronts)
## # A tibble: 6 × 9
## year month day time type direction intenzity date season
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <date> <chr>
## 1 2021 1 2 23 O S M 2021-01-02 winter
## 2 2021 1 11 17 O W W 2021-01-11 winter
## 3 2021 1 12 11 O NW M 2021-01-12 winter
## 4 2021 1 13 23 S NW M 2021-01-13 winter
## 5 2021 1 15 13 S N M 2021-01-15 winter
## 6 2021 1 18 3 O NW M 2021-01-18 winter
Data about road accident counts were taken from database collected by the Police. Only road accidents close to Prague Airport meteorological station (< 50 km) were used.
head(road_accidents)
## date total
## 1 2016-11-10 176
## 2 2020-07-08 137
## 3 2014-10-25 55
## 4 2011-10-05 116
## 5 2024-08-07 73
## 6 2019-11-04 178
To each day, binary information - national holiday - was appended, together with day of year, day of week, number of days from the start (to account for long term trend) and information whereher the accident was during covid years (2020 or 2021).
# determine if is czech holiday
isCzHoliday = function(date) {
# parse date to more useful chunks
y = lubridate::year(date)
m = lubridate::month(date)
d = lubridate::day(date)
#md = format(date,"%m-%d")
# fixed holidays
#fixed_holiday = c("01-01","05-01","05-08","07-05","07-06","10-28","12-24","12-25","12-26")
fixed_holiday = matrix(c(1,1,5,1,5,8,7,5,7,6,10,28,12,24,12,25,12,26),ncol=2,byrow=T)
w = which(fixed_holiday[,1]==m & fixed_holiday[,2]==d)
if (length(w) == 1) {
return(TRUE)
}
# new holiday - Den Ceske statnosti
if (y >= 2000 & m == 9 & d == 28) {
return(TRUE)
}
### easter
easter = matrix(c(1995,4,17,1996,4,8,1997,3,31,1998,4,13,1999,4,5,2000,4,24,2001,4,16,2002,4,1,2003,4,21,2004,4,12,2005,3,28,2006,4,17,2007,4,9,2008,3,24,2009,4,13,2010,4,5,2011,4,25,2012,4,9,2013,4,1,2014,4,21,2015,4,6,2016,3,25,2016,3,28,2017,4,14,2017,4,17,2018,4,2,2018,3,30,2019,4,22,2019,4,19,2020,4,13,2020,4,10,2021,4,5,2021,4,2,2022,4,18,2022,4,15,2023,4,10,2023,4,7), ncol=3,byrow=T)
w = which(easter[,1]==y & easter[,2]==m & easter[,3]==d)
if (length(w) == 1) {
return(TRUE)
}
# no holiday - return false
return(FALSE)
}
road_accidents <- road_accidents %>%
mutate(
doy = lubridate::yday(date),
dow = lubridate::wday(date, label = TRUE),
t = as.numeric(date - min(date)),
covid = if_else(year(date) %in% 2020:2021, TRUE, FALSE),
total = as.numeric(total)
) %>%
rowwise() %>%
mutate(
holiday = isCzHoliday(date)
) %>%
arrange(date)
head(road_accidents)
## # A tibble: 6 × 7
## # Rowwise:
## date total doy dow t covid holiday
## <date> <dbl> <dbl> <ord> <dbl> <lgl> <lgl>
## 1 2010-01-01 99 1 pá 0 FALSE TRUE
## 2 2010-01-02 40 2 so 1 FALSE FALSE
## 3 2010-01-03 56 3 ne 2 FALSE FALSE
## 4 2010-01-04 97 4 po 3 FALSE FALSE
## 5 2010-01-05 68 5 út 4 FALSE FALSE
## 6 2010-01-06 97 6 st 5 FALSE FALSE
GAM model was constructed, using day of year (cubic spline), day of week (categorical variable), long-term trend (2nd order polynomial), holiday (logical) and covid (logical) as independent variables.
fit <- gam(total ~ s(doy, bs = "cc", k = 20) + dow + poly(t, 2) + holiday + covid, family = quasipoisson(link = "log"), data = road_accidents)
summary(fit)
##
## Family: quasipoisson
## Link function: log
##
## Formula:
## total ~ s(doy, bs = "cc", k = 20) + dow + poly(t, 2) + holiday +
## covid
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.611476 0.003414 1350.574 < 2e-16 ***
## dow.L 0.101319 0.009194 11.020 < 2e-16 ***
## dow.Q -0.634501 0.009070 -69.960 < 2e-16 ***
## dow.C 0.036926 0.008423 4.384 1.19e-05 ***
## dow^4 -0.276360 0.007821 -35.334 < 2e-16 ***
## dow^5 0.020694 0.007575 2.732 0.00631 **
## dow^6 -0.057673 0.007482 -7.708 1.50e-14 ***
## poly(t, 2)1 -3.333187 0.254014 -13.122 < 2e-16 ***
## poly(t, 2)2 -9.987244 0.243094 -41.084 < 2e-16 ***
## holidayTRUE -0.593644 0.023897 -24.841 < 2e-16 ***
## covidTRUE -0.106429 0.009864 -10.790 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(doy) 17.78 18 59.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.645 Deviance explained = 65.2%
## GCV = 5.2074 Scale est. = 5.3755 n = 5722
road_accidents$fitted_values <- fit$fitted.values
road_accidents$residual_values <- as.double(road_accidents$total) - fit$fitted.values
The model is fairly good and explains 65.23 % of variation in data. All the terms of the model are statistically significant on 0.05 level.
Monte Carlo simulations will be used to evaluate residuals of GAM model against atmospheric front passages. Ten day windows will be used. For two seasons (winter/summer) and three front types (warm, cold, occlusion), expected and observed residuals of GAM model will be calculated.
getMCBounds <- function(data, season, mindate=as.Date('2010/01/01'), n=10000, q=c(0.975,0.95,0.05,0.025)) {
# days in winter
all_winter_dates <- seq(as.Date('2010/01/01'), as.Date('2010/04/30'), by="day")
all_summer_dates <- c()
for (y in 2010:2023) {
all_winter_dates <- c(all_winter_dates,seq(as.Date(paste0(y,'/10/01')), as.Date(paste0(y+1,'/04/30')), by="day"))
all_summer_dates <- c(all_summer_dates,seq(as.Date(paste0(y,'/05/01')), as.Date(paste0(y,'/09/30')), by="day"))
}
all_winter_dates <- c(all_winter_dates,seq(as.Date('2024/10/01'), as.Date('2024/12/31'), by="day"))
all_summer_dates <- c(all_summer_dates,seq(as.Date('2024/05/01'), as.Date('2024/09/30'), by="day"))
# create empty matrices for winter and summer
mc_data <- matrix(NA,ncol=10,nrow=n)
# simulate
for (i in 1:n) {
# select randomly dates
if (season=="winter") {
rdate <- sample(all_winter_dates[all_winter_dates >= mindate],1)
} else {
rdate <- sample(all_summer_dates[all_summer_dates >= mindate],1)
}
# calculate time difference
tdiff <- as.numeric(data$date-rdate)
# save data into matrix
mc_data[i,] <- (data$residual_values/data$fitted_values)[between(tdiff,0,9)]
}
# calculate bounds
mc_bounds <- apply(mc_data,2,quantile, probs=q) %>% apply(.,1,median)
return(mc_bounds)
}
### calculate
# for both seasons
window_days <- -3:6
seasons <- unique(fronts$season)
front_types <- unique(fronts$type)
for (sez in seasons) {
# for warm and cold front
pltlist = list()
for (front_type in front_types) {
# select fronts
cf <- fronts %>%
filter(intenzity == "S" & season == sez & type == front_type)
if(nrow(cf)==0) {
print("Skipping - no fronts")
next
}
# get deviations
residual_windows <- bind_rows(lapply(cf$date, function(cf_date) {
window_dates <- cf_date + window_days
road_accidents %>%
filter(date %in% window_dates) %>%
mutate(relative_day = as.integer(date - cf_date))
}))
# for three data sets
#dsetlist <- c("total_accidents","cyclists","chodci","auta","old")
# bound
bnds = getMCBounds(road_accidents,sez, mindate=as.Date("2010/01/01"))
# plot
residual_windows$res_perc = (residual_windows$residual_values/residual_windows$fitted_values)
plt1 <- residual_windows %>%
group_by(relative_day) %>%
summarise(avg=median(res_perc)) %>%
# --- Plot distribution of residuals ---
ggplot(aes(x = relative_day, y = avg)) +
geom_hline(yintercept=bnds, color="red", linetype=c("dashed","solid","solid","dashed")) +
geom_bar(stat = "identity") +
labs(
title = ,
x = paste0("Days relative to front"),
y = "Residuals (proportion)"
) +
theme_minimal()
# plot into list
pltlist[[length(pltlist)+1]] <- plt1
}
# all plots in one
figure <- ggarrange(pltlist[[1]],pltlist[[2]],pltlist[[3]],
labels = c("Occlusion", "Cold", "Warm"),
ncol = 3, nrow = 1)
figure <- annotate_figure(figure,
top = paste0("Residuals relative to front dates in ", sez))
print(figure)
}
The charts show that in winter, there is not significant change in road accident counts when atmospheric fronts are passing.
In summer, there is significant increase in road accident counts on the day when the warm front is passing. The increase in road accident counts is 85.6 %. For cold and occlusion front there is no observable effect.
Observed relation between passing of fronts and road accident counts is different from the relation observed between passing of fronts and mortality. Increase in road accidents is observed for warm fronts and there is no displacement effect.