library(magrittr)
library(utils)
library(ggplot2)
library(sf)
library(sp)
library(rgdal)
library(dplyr)
library(kableExtra)
### Prepare data
load("cases.RData")
### Observed/expected table (for knitr)
kable(
cases %>%
group_by(Year) %>%
summarise(`Observed` = format(sum(Observed), big.mark=","), `Expected` = format(sum(Expected), big.mark=",")),
booktabs = TRUE,
caption = "Aggregated Admissions"
) %>% kable_styling(., full_width = TRUE)
| Year | Observed | Expected |
|---|---|---|
| 2003 | NA | NA |
| 2004 | NA | NA |
| 2005 | NA | NA |
| 2006 | NA | NA |
| 2007 | NA | NA |
| 2008 | NA | NA |
| 2009 | NA | NA |
| 2010 | NA | NA |
| 2011 | NA | NA |
| 2012 | NA | NA |
| 2013 | 42,399 | 44,258.5 |
| 2014 | 45,018 | 43,732.9 |
| 2015 | 49,738 | 43,690.5 |
| 2016 | 52,879 | 43,984.1 |
| 2017 | 50,710 | 43,316.6 |
| 2018 | 56,345 | 42,555 |
| 2019 | 56,505 | 41,570 |
missing <- data.frame()
missing_LA <- list()
for (i in 2003:2019) {
o <- cases[cases$Year == i, ]$Observed %>%
is.na() %>%
which() %>%
length()
e <- cases[cases$Year == i, ]$Expected %>%
is.na() %>%
which() %>%
length()
s <- cases[cases$Year == i, ]$SAR %>%
is.na() %>%
which() %>%
length()
p <- cases[cases$Year == i, ]$Population %>%
is.na() %>%
which() %>%
length()
row <- data.frame(Year = i, Observed = o, Expected = e, SAR = s, Population = p)
missing <- rbind(missing, row)
which <- cases[cases$Year == i, ]$SAR %>%
is.na() %>%
which()
missing_LA <- c(missing_LA, list(cases[cases$Year == i, ][which, ]$Level.description))
}
kable(missing, caption = "Missing Observations") %>% kable_styling(., full_width = TRUE)
| Year | Observed | Expected | SAR | Population |
|---|---|---|---|---|
| 2003 | 120 | 120 | 120 | 0 |
| 2004 | 99 | 99 | 99 | 0 |
| 2005 | 114 | 114 | 114 | 0 |
| 2006 | 127 | 127 | 127 | 0 |
| 2007 | 108 | 108 | 108 | 0 |
| 2008 | 77 | 77 | 77 | 0 |
| 2009 | 105 | 105 | 105 | 0 |
| 2010 | 129 | 129 | 129 | 0 |
| 2011 | 115 | 115 | 115 | 0 |
| 2012 | 73 | 73 | 73 | 0 |
| 2013 | 0 | 0 | 0 | 0 |
| 2014 | 0 | 0 | 0 | 0 |
| 2015 | 0 | 0 | 0 | 0 |
| 2016 | 0 | 0 | 0 | 0 |
| 2017 | 0 | 0 | 0 | 0 |
| 2018 | 0 | 0 | 0 | 0 |
| 2019 | 0 | 0 | 0 | 0 |
cases <- cases[cases$Year >= 2013, ]
temp<-cases %>% group_by(Year) %>% summarise(`Observed` = sum(Observed), `Expected` = sum(Expected))
ggplot(temp, aes(x=Year)) +
geom_line(aes(y = Observed), color = "darkred") +
geom_line(aes(y = Expected), color="steelblue") +
scale_color_manual(name = "Admissions", values = c("Observed" = "darkred", "Expected" = "steelblue"))
load("pollution.RData")
P25<- P25[P25$Year >= 2013,]
mu = mean(NO2$NO2)
sd = sd(NO2$NO2)
hist(P25$PM25,
main="Concentration of PM2.5",
xlab="µg/m3",
col="yellow")
abline(v=mean(P25$PM25), col="blue")
hist(NO2$NO2,
main="Concentration of NO2",
xlab="µg/m3",
col="yellow")
abline(v=mu, col="red")
#correlation
# Calculate the correlation matrix
temp<-left_join(P25, NO2, by=c("lad19nm", "Year")) %>% select(lad19nm, Year, PM25, NO2)
corr_matrix <- cor(temp[,3:4])
print(corr_matrix)
## PM25 NO2
## PM25 1.0000000 0.7452461
## NO2 0.7452461 1.0000000
Maps of SAR by LTLA by year
# Create colour pallette
library(RColorBrewer)
library(spdep)
colors <- c("#D0F0C0", brewer.pal(4, "Reds"))
#I think it's not divided by 100
cases$SAR.cut <- cut(
cases$SAR,
breaks = c(min(cases$SAR[!is.na(cases$SAR)]), 1, 2.5, 5, max(cases$SAR[!is.na(cases$SAR)])),
)
# Read shapefile
shapefile_path <- "../Boundaries (2019)/England/Local_Authority_Districts_December_2019_England.shp"
map <- st_read(shapefile_path)
## Reading layer `Local_Authority_Districts_December_2019_England' from data source `/Users/cam/Library/CloudStorage/OneDrive-ImperialCollegeLondon/LRTI project/Boundaries (2019)/England/Local_Authority_Districts_December_2019_England.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 317 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -714517.9 ymin: 6422874 xmax: 196334.8 ymax: 7520900
## Projected CRS: WGS 84 / Pseudo-Mercator
map <- map[map$lad19nm != "Isles of Scilly",]
map_nb <- poly2nb(map)
summary(map_nb)
## Neighbour list object:
## Number of regions: 316
## Number of nonzero links: 1610
## Percentage nonzero weights: 1.612322
## Average number of links: 5.094937
## 1 region with no links:
## 44
## Link number distribution:
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 1 8 25 39 54 58 46 49 21 10 3 2
## 8 least connected regions:
## 10 26 61 67 88 89 115 263 with 1 link
## 2 most connected regions:
## 51 227 with 11 links
# Convert to INLA format
nb2INLA("map.graph", map_nb)
map.adj <- paste(getwd(), "/map.graph", sep = "")
# Prep data (do I need to add PM2.5 here? - depends if I uses map_i later)
for (i in 2013:2019) {
d <- cases[cases$Year == i, ]
map_i <- left_join(map, d, by = join_by(lad19cd == Level))
p <- ggplot() +
geom_sf(data = map_i) +
aes(fill = SAR.cut) +
theme_bw() +
scale_fill_manual(values = colors) +
guides(fill = guide_legend(title = "SAR")) +
labs(title = paste0("SAR in ", i))
print(p)
}
Now we build the models
# Aggregate observed and expected cases by LA (independent of year)
cases_agg <- cases %>%
group_by(Level) %>%
summarise(Observed = sum(Observed), Expected = sum(Expected), Population = sum(Population)) %>%
mutate(SAR = Observed / Expected) %>%
mutate(log.Population = log(Population)) %>%
dplyr::rename(O = Observed, E = Expected)
#round up/down
cases_agg[cases_agg$Level == "E09000001",]$O <- cases_agg[cases_agg$Level == "E09000012",]$O %>% floor()
cases_agg[cases_agg$Level == "E09000012",]$O <- cases_agg[cases_agg$Level == "E09000012",]$O %>% ceiling()
pol <- P25[P25$Year >= 2013,] %>% group_by(lad19cd) %>% summarise(PM2.5 = mean(PM25))
#merge; 'data' is cases_agg combined with pm2.5
data<-merge(cases_agg, pol, by.x="Level", by.y="lad19cd")
Starting with the fixed environmental effects (with and without intercept)
library(INLA)
formula <- O ~ 1 + PM2.5
fixed.model1 <- inla(formula, family="poisson", data=data)
fixed.model1$summary.fixed
## mean sd 0.025quant 0.5quant 0.975quant
## (Intercept) 7.3272231 0.010352538 7.30693254 7.3272231 7.34751375
## PM2.5 -0.0319657 0.001067797 -0.03405855 -0.0319657 -0.02987286
## mode kld
## (Intercept) 7.3272231 2.431918e-11
## PM2.5 -0.0319657 6.114588e-14
formula2 <- O ~ PM2.5
fixed.model2 <- inla(formula2, family="poisson", data=data)
fixed.model2$summary.fixed
## mean sd 0.025quant 0.5quant 0.975quant
## (Intercept) 7.3272231 0.010352538 7.30693254 7.3272231 7.34751375
## PM2.5 -0.0319657 0.001067797 -0.03405855 -0.0319657 -0.02987286
## mode kld
## (Intercept) 7.3272231 2.431918e-11
## PM2.5 -0.0319657 6.114588e-14
Now map spatial effects
# Create colour pallette
colors <- c("#D0F0C0", brewer.pal(4, "Reds"))
# Create buckets for SAR
cases_agg$SAR.cut <- cut(
cases_agg$SAR,
breaks = c(min(cases_agg$SAR[!is.na(cases_agg$SAR)]), 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, max(cases_agg$SAR[!is.na(cases_agg$SAR)])),
)
# Produce spatial map of aggregatged SAR
# map_agg is map joined with cases_agg
map_agg <- left_join(map, cases_agg, by = join_by(lad19cd == Level))
# Plot SAR using ggplot
ggplot() +
geom_sf(data = map_agg, aes(fill = SAR.cut)) +
theme_bw() +
scale_fill_viridis_d() +
theme(legend.position = "bottom")
# Fit hierarchical Poisson log-linear model in INLA
LA <- seq(1, 316)
formula <- O ~ f(LA, model = "bym2", graph = map.adj, hyper = list(
prec = list(
prior = "pc.prec",
param = c(0.5 / 0.31, 0.01)
),
phi = list(
prior = "pc",
param = c(0.5, 2 / 3)
)
))
sBYM.model <- inla(formula = formula, family = "poisson", data = data, E = E, control.compute = list(waic = TRUE))
# Obtain the posterior summary statistics
summary(sBYM.model, summary.stat = c("mean", "p0.975", "p0.025", "p0.5in", "p0.5out", "waic"))
##
## Call:
## c("inla.core(formula = formula, family = family, contrasts = contrasts,
## ", " data = data, quantiles = quantiles, E = E, offset = offset, ", "
## scale = scale, weights = weights, Ntrials = Ntrials, strata = strata,
## ", " lp.scale = lp.scale, link.covariates = link.covariates, verbose =
## verbose, ", " lincomb = lincomb, selection = selection, control.compute
## = control.compute, ", " control.predictor = control.predictor,
## control.family = control.family, ", " control.inla = control.inla,
## control.fixed = control.fixed, ", " control.mode = control.mode,
## control.expert = control.expert, ", " control.hazard = control.hazard,
## control.lincomb = control.lincomb, ", " control.update =
## control.update, control.lp.scale = control.lp.scale, ", "
## control.pardiso = control.pardiso, only.hyperparam = only.hyperparam,
## ", " inla.call = inla.call, inla.arg = inla.arg, num.threads =
## num.threads, ", " keep = keep, working.directory = working.directory,
## silent = silent, ", " inla.mode = inla.mode, safe = FALSE, debug =
## debug, .parent.frame = .parent.frame)" )
## Time used:
## Pre = 4.61, Running = 0.247, Post = 0.0142, Total = 4.87
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.131 0.004 0.123 0.131 0.14 0.131 0
##
## Random effects:
## Name Model
## LA BYM2 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant 0.975quant mode
## Precision for LA 9.801 0.952 7.999 9.776 11.742 8.95
## Phi for LA 0.957 0.036 0.862 0.967 0.995 0.96
##
## Watanabe-Akaike information criterion (WAIC) ...: 3262.58
## Effective number of parameters .................: 158.88
##
## Marginal log-Likelihood: -1968.79
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
Now combine fixed with random spatial effects
LA <- seq(1, 316)
formula <- O ~ PM2.5 + f(LA, model = "bym2", graph = map.adj, hyper = list(
prec = list(
prior = "pc.prec",
param = c(0.6 / 0.31, 0.01)
),
phi = list(
prior = "pc",
param = c(0.6, 2 / 3)
)
))
fixed.sBYM.model <- inla(formula = formula, family = "poisson", data = data, E = E, control.compute = list(waic = TRUE))
summary(fixed.sBYM.model, summary.stat = c("mean", "p0.975", "p0.025", "p0.5in", "p0.5out", "waic"))
##
## Call:
## c("inla.core(formula = formula, family = family, contrasts = contrasts,
## ", " data = data, quantiles = quantiles, E = E, offset = offset, ", "
## scale = scale, weights = weights, Ntrials = Ntrials, strata = strata,
## ", " lp.scale = lp.scale, link.covariates = link.covariates, verbose =
## verbose, ", " lincomb = lincomb, selection = selection, control.compute
## = control.compute, ", " control.predictor = control.predictor,
## control.family = control.family, ", " control.inla = control.inla,
## control.fixed = control.fixed, ", " control.mode = control.mode,
## control.expert = control.expert, ", " control.hazard = control.hazard,
## control.lincomb = control.lincomb, ", " control.update =
## control.update, control.lp.scale = control.lp.scale, ", "
## control.pardiso = control.pardiso, only.hyperparam = only.hyperparam,
## ", " inla.call = inla.call, inla.arg = inla.arg, num.threads =
## num.threads, ", " keep = keep, working.directory = working.directory,
## silent = silent, ", " inla.mode = inla.mode, safe = FALSE, debug =
## debug, .parent.frame = .parent.frame)" )
## Time used:
## Pre = 5.58, Running = 0.288, Post = 0.015, Total = 5.88
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.528 0.221 0.093 0.529 0.960 0.529 0
## PM2.5 -0.041 0.023 -0.086 -0.041 0.004 -0.041 0
##
## Random effects:
## Name Model
## LA BYM2 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant 0.975quant mode
## Precision for LA 10.048 1.055 8.024 10.027 12.170 8.990
## Phi for LA 0.942 0.044 0.829 0.954 0.993 0.943
##
## Watanabe-Akaike information criterion (WAIC) ...: 3263.55
## Effective number of parameters .................: 159.30
##
## Marginal log-Likelihood: -1974.31
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')