Data Analysis

library(magrittr)
library(utils)
library(ggplot2)
library(sf)
library(sp)
library(rgdal)
library(dplyr)
library(kableExtra)

### Prepare data
load("cases.RData")

### Observed/expected table (for knitr)
kable(
  cases %>%
    group_by(Year) %>%
    summarise(`Observed` = format(sum(Observed), big.mark=","), `Expected` = format(sum(Expected), big.mark=",")),
  booktabs = TRUE,
  caption = "Aggregated Admissions"
) %>% kable_styling(., full_width = TRUE)
Aggregated Admissions
Year Observed Expected
2003 NA NA
2004 NA NA
2005 NA NA
2006 NA NA
2007 NA NA
2008 NA NA
2009 NA NA
2010 NA NA
2011 NA NA
2012 NA NA
2013 42,399 44,258.5
2014 45,018 43,732.9
2015 49,738 43,690.5
2016 52,879 43,984.1
2017 50,710 43,316.6
2018 56,345 42,555
2019 56,505 41,570
missing <- data.frame()
missing_LA <- list()
for (i in 2003:2019) {
  o <- cases[cases$Year == i, ]$Observed %>%
    is.na() %>%
    which() %>%
    length()
  e <- cases[cases$Year == i, ]$Expected %>%
    is.na() %>%
    which() %>%
    length()
  s <- cases[cases$Year == i, ]$SAR %>%
    is.na() %>%
    which() %>%
    length()
  p <- cases[cases$Year == i, ]$Population %>%
    is.na() %>%
    which() %>%
    length()
  row <- data.frame(Year = i, Observed = o, Expected = e, SAR = s, Population = p)
  missing <- rbind(missing, row)
  
  which <- cases[cases$Year == i, ]$SAR %>%
    is.na() %>%
    which()
  missing_LA <- c(missing_LA, list(cases[cases$Year == i, ][which, ]$Level.description))
}
kable(missing, caption = "Missing Observations") %>% kable_styling(., full_width = TRUE)
Missing Observations
Year Observed Expected SAR Population
2003 120 120 120 0
2004 99 99 99 0
2005 114 114 114 0
2006 127 127 127 0
2007 108 108 108 0
2008 77 77 77 0
2009 105 105 105 0
2010 129 129 129 0
2011 115 115 115 0
2012 73 73 73 0
2013 0 0 0 0
2014 0 0 0 0
2015 0 0 0 0
2016 0 0 0 0
2017 0 0 0 0
2018 0 0 0 0
2019 0 0 0 0
cases <- cases[cases$Year >= 2013, ]

temp<-cases %>% group_by(Year) %>% summarise(`Observed` = sum(Observed), `Expected` = sum(Expected))
ggplot(temp, aes(x=Year)) + 
  geom_line(aes(y = Observed), color = "darkred") + 
  geom_line(aes(y = Expected), color="steelblue")  + 
  scale_color_manual(name = "Admissions", values = c("Observed" = "darkred", "Expected" = "steelblue"))

Exploratory Analysis

load("pollution.RData")
P25<- P25[P25$Year >= 2013,]

mu = mean(NO2$NO2)
sd = sd(NO2$NO2)

hist(P25$PM25,
     main="Concentration of PM2.5",
     xlab="µg/m3",
     col="yellow")
abline(v=mean(P25$PM25), col="blue")

hist(NO2$NO2,
     main="Concentration of NO2",
     xlab="µg/m3",
     col="yellow")
abline(v=mu, col="red")

#correlation
# Calculate the correlation matrix
temp<-left_join(P25, NO2, by=c("lad19nm", "Year")) %>% select(lad19nm, Year, PM25, NO2)
corr_matrix <- cor(temp[,3:4])
print(corr_matrix)
##           PM25       NO2
## PM25 1.0000000 0.7452461
## NO2  0.7452461 1.0000000

Maps

Maps of SAR by LTLA by year

# Create colour pallette
library(RColorBrewer)
library(spdep)

colors <- c("#D0F0C0", brewer.pal(4, "Reds"))

#I think it's not divided by 100
cases$SAR.cut <- cut(
  cases$SAR,
  breaks = c(min(cases$SAR[!is.na(cases$SAR)]), 1, 2.5, 5, max(cases$SAR[!is.na(cases$SAR)])),
)

# Read shapefile
shapefile_path <- "../Boundaries (2019)/England/Local_Authority_Districts_December_2019_England.shp"
map <- st_read(shapefile_path)
## Reading layer `Local_Authority_Districts_December_2019_England' from data source `/Users/cam/Library/CloudStorage/OneDrive-ImperialCollegeLondon/LRTI project/Boundaries (2019)/England/Local_Authority_Districts_December_2019_England.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 317 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -714517.9 ymin: 6422874 xmax: 196334.8 ymax: 7520900
## Projected CRS: WGS 84 / Pseudo-Mercator
map <- map[map$lad19nm != "Isles of Scilly",]

map_nb <- poly2nb(map)
summary(map_nb)
## Neighbour list object:
## Number of regions: 316 
## Number of nonzero links: 1610 
## Percentage nonzero weights: 1.612322 
## Average number of links: 5.094937 
## 1 region with no links:
## 44
## Link number distribution:
## 
##  0  1  2  3  4  5  6  7  8  9 10 11 
##  1  8 25 39 54 58 46 49 21 10  3  2 
## 8 least connected regions:
## 10 26 61 67 88 89 115 263 with 1 link
## 2 most connected regions:
## 51 227 with 11 links
# Convert to INLA format
nb2INLA("map.graph", map_nb)
map.adj <- paste(getwd(), "/map.graph", sep = "")

# Prep data (do I need to add PM2.5 here? - depends if I uses map_i later)
for (i in 2013:2019) {
  d <- cases[cases$Year == i, ]
  map_i <- left_join(map, d, by = join_by(lad19cd == Level))
  
  p <- ggplot() +
    geom_sf(data = map_i) +
    aes(fill = SAR.cut) +
    theme_bw() +
    scale_fill_manual(values = colors) +
    guides(fill = guide_legend(title = "SAR")) +
    labs(title = paste0("SAR in ", i))
  
  print(p)
}

Analysis

Now we build the models

# Aggregate observed and expected cases by LA (independent of year)
cases_agg <- cases %>%
  group_by(Level) %>%
  summarise(Observed = sum(Observed), Expected = sum(Expected), Population = sum(Population)) %>%
  mutate(SAR = Observed / Expected) %>%
  mutate(log.Population = log(Population)) %>%
  dplyr::rename(O = Observed, E = Expected)

#round up/down
cases_agg[cases_agg$Level == "E09000001",]$O <- cases_agg[cases_agg$Level == "E09000012",]$O %>% floor()
cases_agg[cases_agg$Level == "E09000012",]$O <- cases_agg[cases_agg$Level == "E09000012",]$O %>% ceiling()

pol <- P25[P25$Year >= 2013,] %>% group_by(lad19cd) %>% summarise(PM2.5 = mean(PM25))

#merge; 'data' is cases_agg combined with pm2.5
data<-merge(cases_agg, pol, by.x="Level", by.y="lad19cd")

Starting with the fixed environmental effects (with and without intercept)

library(INLA)

formula <- O ~ 1 + PM2.5
fixed.model1 <- inla(formula, family="poisson", data=data)

fixed.model1$summary.fixed
##                   mean          sd  0.025quant   0.5quant  0.975quant
## (Intercept)  7.3272231 0.010352538  7.30693254  7.3272231  7.34751375
## PM2.5       -0.0319657 0.001067797 -0.03405855 -0.0319657 -0.02987286
##                   mode          kld
## (Intercept)  7.3272231 2.431918e-11
## PM2.5       -0.0319657 6.114588e-14
formula2 <- O ~ PM2.5
fixed.model2 <- inla(formula2, family="poisson", data=data)

fixed.model2$summary.fixed
##                   mean          sd  0.025quant   0.5quant  0.975quant
## (Intercept)  7.3272231 0.010352538  7.30693254  7.3272231  7.34751375
## PM2.5       -0.0319657 0.001067797 -0.03405855 -0.0319657 -0.02987286
##                   mode          kld
## (Intercept)  7.3272231 2.431918e-11
## PM2.5       -0.0319657 6.114588e-14

Now map spatial effects

# Create colour pallette
colors <- c("#D0F0C0", brewer.pal(4, "Reds"))

# Create buckets for SAR
cases_agg$SAR.cut <- cut(
  cases_agg$SAR,
  breaks = c(min(cases_agg$SAR[!is.na(cases_agg$SAR)]), 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, max(cases_agg$SAR[!is.na(cases_agg$SAR)])),
)

# Produce spatial map of aggregatged SAR
# map_agg is map joined with cases_agg
map_agg <- left_join(map, cases_agg, by = join_by(lad19cd == Level))

# Plot SAR using ggplot
ggplot() +
  geom_sf(data = map_agg, aes(fill = SAR.cut)) +
  theme_bw() +
  scale_fill_viridis_d() +
  theme(legend.position = "bottom")

# Fit hierarchical Poisson log-linear model in INLA
LA <- seq(1, 316)
formula <- O ~ f(LA, model = "bym2", graph = map.adj, hyper = list(
  prec = list(
    prior = "pc.prec",
    param = c(0.5 / 0.31, 0.01)
  ),
  phi = list(
    prior = "pc",
    param = c(0.5, 2 / 3)
  )
))

sBYM.model <- inla(formula = formula, family = "poisson", data = data, E = E, control.compute = list(waic = TRUE))

# Obtain the posterior summary statistics
summary(sBYM.model, summary.stat = c("mean", "p0.975", "p0.025", "p0.5in", "p0.5out", "waic"))
## 
## Call:
##    c("inla.core(formula = formula, family = family, contrasts = contrasts, 
##    ", " data = data, quantiles = quantiles, E = E, offset = offset, ", " 
##    scale = scale, weights = weights, Ntrials = Ntrials, strata = strata, 
##    ", " lp.scale = lp.scale, link.covariates = link.covariates, verbose = 
##    verbose, ", " lincomb = lincomb, selection = selection, control.compute 
##    = control.compute, ", " control.predictor = control.predictor, 
##    control.family = control.family, ", " control.inla = control.inla, 
##    control.fixed = control.fixed, ", " control.mode = control.mode, 
##    control.expert = control.expert, ", " control.hazard = control.hazard, 
##    control.lincomb = control.lincomb, ", " control.update = 
##    control.update, control.lp.scale = control.lp.scale, ", " 
##    control.pardiso = control.pardiso, only.hyperparam = only.hyperparam, 
##    ", " inla.call = inla.call, inla.arg = inla.arg, num.threads = 
##    num.threads, ", " keep = keep, working.directory = working.directory, 
##    silent = silent, ", " inla.mode = inla.mode, safe = FALSE, debug = 
##    debug, .parent.frame = .parent.frame)" ) 
## Time used:
##     Pre = 4.61, Running = 0.247, Post = 0.0142, Total = 4.87 
## Fixed effects:
##              mean    sd 0.025quant 0.5quant 0.975quant  mode kld
## (Intercept) 0.131 0.004      0.123    0.131       0.14 0.131   0
## 
## Random effects:
##   Name     Model
##     LA BYM2 model
## 
## Model hyperparameters:
##                   mean    sd 0.025quant 0.5quant 0.975quant mode
## Precision for LA 9.801 0.952      7.999    9.776     11.742 8.95
## Phi for LA       0.957 0.036      0.862    0.967      0.995 0.96
## 
## Watanabe-Akaike information criterion (WAIC) ...: 3262.58
## Effective number of parameters .................: 158.88
## 
## Marginal log-Likelihood:  -1968.79 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

Now combine fixed with random spatial effects

LA <- seq(1, 316)
formula <- O ~ PM2.5 + f(LA, model = "bym2", graph = map.adj, hyper = list(
  prec = list(
    prior = "pc.prec",
    param = c(0.6 / 0.31, 0.01)
  ),
  phi = list(
    prior = "pc",
    param = c(0.6, 2 / 3)
  )
))

fixed.sBYM.model <- inla(formula = formula, family = "poisson", data = data, E = E, control.compute = list(waic = TRUE))
summary(fixed.sBYM.model, summary.stat = c("mean", "p0.975", "p0.025", "p0.5in", "p0.5out", "waic"))
## 
## Call:
##    c("inla.core(formula = formula, family = family, contrasts = contrasts, 
##    ", " data = data, quantiles = quantiles, E = E, offset = offset, ", " 
##    scale = scale, weights = weights, Ntrials = Ntrials, strata = strata, 
##    ", " lp.scale = lp.scale, link.covariates = link.covariates, verbose = 
##    verbose, ", " lincomb = lincomb, selection = selection, control.compute 
##    = control.compute, ", " control.predictor = control.predictor, 
##    control.family = control.family, ", " control.inla = control.inla, 
##    control.fixed = control.fixed, ", " control.mode = control.mode, 
##    control.expert = control.expert, ", " control.hazard = control.hazard, 
##    control.lincomb = control.lincomb, ", " control.update = 
##    control.update, control.lp.scale = control.lp.scale, ", " 
##    control.pardiso = control.pardiso, only.hyperparam = only.hyperparam, 
##    ", " inla.call = inla.call, inla.arg = inla.arg, num.threads = 
##    num.threads, ", " keep = keep, working.directory = working.directory, 
##    silent = silent, ", " inla.mode = inla.mode, safe = FALSE, debug = 
##    debug, .parent.frame = .parent.frame)" ) 
## Time used:
##     Pre = 5.58, Running = 0.288, Post = 0.015, Total = 5.88 
## Fixed effects:
##               mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)  0.528 0.221      0.093    0.529      0.960  0.529   0
## PM2.5       -0.041 0.023     -0.086   -0.041      0.004 -0.041   0
## 
## Random effects:
##   Name     Model
##     LA BYM2 model
## 
## Model hyperparameters:
##                    mean    sd 0.025quant 0.5quant 0.975quant  mode
## Precision for LA 10.048 1.055      8.024   10.027     12.170 8.990
## Phi for LA        0.942 0.044      0.829    0.954      0.993 0.943
## 
## Watanabe-Akaike information criterion (WAIC) ...: 3263.55
## Effective number of parameters .................: 159.30
## 
## Marginal log-Likelihood:  -1974.31 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')