knitr::opts_chunk$set(echo = TRUE)
The aim of this analysis is to evaluate whether the introduction of School Streets in Lewisham has had a measurable impact on roadside nitrogen dioxide (NO₂) concentrations. NO₂ is the most appropriate pollutant for this assessment because:
it is strongly associated with traffic emissions
it responds quickly to changes in traffic flow
it is measured reliably by Zephyr sensors
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openair)
library(worldmet)
The data was downloaded as a .csv file from the Lewisham website. Two custom import functions were written to standardise the Lewisham data:
remove status columns
clean and normalise column names
reshape the dataset into long format
-standardise pollutant names (no2, pm10, pm25)
split site and pollutant fields
convert values to numeric
import_lewisham_sensor <- function(path) {
raw <- read_csv(path, show_col_types = FALSE)
raw %>%
# 0. Standardise Date and Time column names FIRST
rename(
Date = matches("^date$", ignore.case = TRUE),
Time = matches("^time$", ignore.case = TRUE)
) %>%
# 1. Drop all Status columns
select(-contains("/ Status")) %>%
# 2. Clean names EXCEPT Date and Time
rename_with(~ gsub("/", "_", .x), -any_of(c("Date", "Time"))) %>%
rename_with(~ gsub("[^A-Za-z0-9_ ]", "", .x), -any_of(c("Date", "Time"))) %>%
rename_with(~ gsub(" +", "_", .x), -any_of(c("Date", "Time"))) %>%
rename_with(~ tolower(.x), -any_of(c("Date", "Time"))) %>%
# 3. Pivot to long format
pivot_longer(
cols = -c(Date, Time),
names_to = "site_pollutant",
values_to = "value"
) %>%
# 4. Convert "No data" to NA
mutate(value = na_if(value, "No data")) %>%
# 5. Parse datetime correctly
mutate(
Date = dmy(Date),
datetime = ymd_hms(paste(Date, Time), tz = "UTC")
) %>%
select(-Date, -Time) %>%
# 6. Normalise pollutant names BEFORE splitting
mutate(
site_pollutant = site_pollutant %>%
str_replace("nitrogen_dioxide", "no2") %>%
str_replace("pm10_particulate_matter", "pm10") %>%
str_replace("pm25_particulate_matter", "pm25")
) %>%
# 7. Split into site + pollutant
separate(
site_pollutant,
into = c("site", "pollutant"),
sep = "_(?=no2|pm10|pm25$)",
remove = TRUE,
extra = "merge",
fill = "right"
) %>%
mutate(site = str_replace(site, "_$", "")) %>%
# 8. Convert numeric values
mutate(value = as.numeric(value)) %>%
arrange(datetime)
}
import_lewisham_sensor_Daily <- function(path) {
raw <- read_csv(path, show_col_types = FALSE)
raw %>%
# 1. Drop all Status columns
select(-contains("/ Status")) %>%
# 2. Clean names but DO NOT touch Date or Time
rename_with(~ gsub("/", "_", .x), -c(Date)) %>%
rename_with(~ gsub("[^A-Za-z0-9_ ]", "", .x), -c(Date)) %>%
rename_with(~ gsub(" +", "_", .x), -c(Date)) %>%
rename_with(~ tolower(.x), -c(Date)) %>%
# 3. Pivot to long format
pivot_longer(
cols = -c(Date),
names_to = "site_pollutant",
values_to = "value"
) %>%
# 4. Convert "No data" to NA
mutate(value = na_if(value, "No data")) %>%
# 5. Normalise pollutant names BEFORE splitting
mutate(
site_pollutant = site_pollutant %>%
str_replace("nitrogen_dioxide", "no2") %>%
str_replace("pm10_particulate_matter", "pm10") %>%
str_replace("pm25_particulate_matter", "pm25")
) %>%
# 6. Split into site + pollutant
separate(
site_pollutant,
into = c("site", "pollutant"),
sep = "_(?=no2|pm10|pm25$)",
remove = TRUE,
extra = "merge",
fill = "right"
) %>%
mutate("site" = str_replace(site, "_$", "")) %>%
# 7. Convert numeric values
mutate(value = as.numeric(value))
}
Daily and hourly datasets were imported using these functions and converted to proper date/time formats using lubridate.
Measured_Sensor_Data <- import_lewisham_sensor("2026-05-07-Measured Data.csv")
daily_mean_sensor_data <- import_lewisham_sensor_Daily("2026-06-15-Data-Selector-Export-01.csv")
daily_mean_sensor_data <- daily_mean_sensor_data %>%
mutate(
Date = as.character(Date),
date = dmy(paste(Date))
)
Site_Meta_Data <- read_csv(
"Site Meta Data.csv",
show_col_types = FALSE
) %>%
select(site, latitude, longitude) %>%
mutate(
latitude = as.numeric(latitude),
longitude = as.numeric(longitude)
)
Measured_Sensor_Data <- Measured_Sensor_Data %>%
left_join(
Site_Meta_Data %>% select(site, latitude, longitude),
by = "site"
)
To analyse before/after effects, each sensor was enriched with:
School Street scheme name
activation date
AM and PM operational windows
school holiday periods
a period variable (before / after activation)
a logical flag for whether each timestamp fell within School Street hours
This allowed us to isolate:
before vs after School Street activation
within vs outside School Street operational hours
term time vs holidays
school_streets <- tribble(
~site, ~scheme, ~start_date, ~am_start, ~am_end, ~pm_start, ~pm_end,
"lewisham_albyn_rd_sensor", "St Stephen's", "2025-09-01", "08:30", "09:30", "15:00", "16:00",
"lewisham_downderry_rd_sensor","Downderry Primary", "2025-05-06", "08:00", "09:15", "14:45", "16:00"
)
school_holidays <- tribble(
~start, ~end,
"2025-05-26", "2025-05-30",
"2025-07-22", "2025-09-01",
"2025-10-27", "2025-10-31",
"2025-12-19", "2026-01-05",
"2026-02-16", "2026-02-20",
"2026-03-27", "2026-04-13"
) %>%
mutate(
start = as.Date(start),
end = as.Date(end)
)
is_holiday <- function(date, holidays) {
map_lgl(date, ~ any(.x >= holidays$start & .x <= holidays$end))
}
ss_periods <- school_streets %>%
mutate(
start_date = as.Date(start_date),
end_date = max(daily_mean_sensor_data$date)
)
SS_Measured_Data <- Measured_Sensor_Data %>%
left_join(school_streets, by = "site") %>%
mutate(
date_only = as.Date(datetime),
time_only = format(datetime, "%H:%M"),
weekday = weekdays(date_only),
# Before/after scheme activation
period = case_when(
is.na(start_date) ~ "not_applicable",
date_only < as.Date(start_date) ~ "before",
TRUE ~ "after"
),
# Within School Street operational hours?
is_school_street_hour = case_when(
weekday %in% c("Saturday", "Sunday") ~ FALSE,
time_only >= am_start & time_only <= am_end ~ TRUE,
time_only >= pm_start & time_only <= pm_end ~ TRUE,
TRUE ~ FALSE
)
)
SS_Daily_mean_data <- daily_mean_sensor_data %>%
left_join(select(school_streets, -am_start, -am_end, -pm_start, -pm_end), by = "site") %>%
mutate(
date_only = as.Date(date),
weekday = weekdays(date_only),
# Before/after scheme activation
period = case_when(
is.na(start_date) ~ "not_applicable",
date_only < as.Date(start_date) ~ "before",
TRUE ~ "after"
) )
SS_Measured_Data <- Measured_Sensor_Data %>%
left_join(school_streets, by = "site") %>%
mutate(
date_only = as.Date(datetime),
time_only = format(datetime, "%H:%M"),
weekday = weekdays(date_only),
# Before/after scheme activation
period = case_when(
is.na(start_date) ~ "not_applicable",
date_only < as.Date(start_date) ~ "before",
TRUE ~ "after"
),
# Within School Street operational hours?
is_school_street_hour = case_when(
weekday %in% c("Saturday", "Sunday") ~ FALSE,
time_only >= am_start & time_only <= am_end ~ TRUE,
time_only >= pm_start & time_only <= pm_end ~ TRUE,
TRUE ~ FALSE
)
)
SS_Daily_mean_data <- daily_mean_sensor_data %>%
left_join(select(school_streets, -am_start, -am_end, -pm_start, -pm_end), by = "site") %>%
mutate(
date_only = as.Date(date),
weekday = weekdays(date_only),
# Before/after scheme activation
period = case_when(
is.na(start_date) ~ "not_applicable",
date_only < as.Date(start_date) ~ "before",
TRUE ~ "after"
) )
Daily mean NO₂ concentrations were plotted for each School Street sensor. Horizontal lines were added to show:
40 µg/m³ — UK annual mean limit
10 µg/m³ — WHO guideline
School Street activation periods were shaded to visually compare:
pre‑intervention concentrations
post‑intervention concentrations
seasonal patterns
anomalies or sensor issues
SS_Daily_mean_data_clean <- SS_Daily_mean_data %>%
group_by(date, site, pollutant) %>%
summarise(
value = mean(value, na.rm = TRUE), # or first(value)
scheme = first(scheme),
start_date = first(start_date),
weekday = first(weekday),
period = first(period),
.groups = "drop"
)
SS_Daily_mean_data_wide <- SS_Daily_mean_data_clean %>%
pivot_wider(
names_from = pollutant,
values_from = value
) %>%
arrange(site, date)
ggplot(SS_Daily_mean_data_wide, aes(x = date, y = no2, group = site)) +
# School Street shading
geom_rect(
data = ss_periods,
aes(
xmin = start_date,
xmax = end_date,
ymin = -Inf,
ymax = Inf
),
inherit.aes = FALSE,
fill = "lightblue",
alpha = 0.2
) +
# Daily NO2 lines
geom_line(linewidth = 0.9, colour = "#2C3E50") +
# Horizontal guideline lines
geom_hline(yintercept = 40, linetype = "dashed", colour = "red") +
geom_hline(yintercept = 10, linetype = "dashed", colour = "orange") +
facet_wrap(~ site, ncol = 1, scales = "free_y") +
labs(
title = "Daily NO₂ Concentrations",
subtitle = "Blue shading = School Street active period\nRed dashed = UK Limit (40 µg/m³), Orange dashed = WHO Guideline (10 µg/m³)",
x = "Date",
y = "Daily Mean NO₂ (µg/m³)"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
strip.text = element_text(face = "bold"),
legend.position = "none"
)
## Warning: Removed 1219 rows containing missing values or values outside the scale range
## (`geom_line()`).
Across both School Street sites (Downderry Primary and St Stephen’s), the following patterns were observed:
Seasonal variation dominates the daily NO₂ signal, with higher concentrations in winter and lower in summer.
There is no immediate step‑change in daily NO₂ concentrations on the School Street activation date, which is expected because daily means include all hours of the day, not just the restricted periods.
Short‑term peaks are associated with meteorological conditions (cold, still days) rather than School Street operation.
Both sites remain well below the UK annual mean limit, although winter values occasionally exceed the WHO guideline.
Daily means alone cannot isolate School Street effects, so further analysis using hourly data, timeVariation plots, and before/after comparisons is required.
To understand whether the introduction of School Streets has influenced roadside NO₂ concentrations, daily averages are not sufficient. School Streets operate for short periods in the morning and afternoon, so the analysis must focus on hourly patterns, comparing:
before vs after the scheme started
within vs outside School Street hours
term time vs holidays
The timeVariation() function from the openair package is well‑suited for this because it summarises pollutant behaviour by hour of day, day of week, and month, and can split results by a grouping variable such as period.
The dataset was filtered to each School Street site and reshaped into the wide format required by timeVariation().
downderry <- SS_Measured_Data %>%
filter(site == "lewisham_downderry_rd_sensor")
st_stephens <- SS_Measured_Data %>%
filter(site == "lewisham_albyn_rd_sensor")
dow_no2 <- downderry %>%
filter(pollutant == "no2")
dow_no2_wide <- dow_no2 %>%
select(datetime, value, period) %>%
rename(date = datetime, no2 = value)
ss_no2 <- st_stephens %>%
filter(pollutant == "no2")
ss_no2_wide <- ss_no2 %>%
select(datetime, value, period) %>%
rename(date = datetime, no2 = value)
Each dataset now contains:
date — hourly timestamp
no2 — NO₂ concentration
period — “before” or “after” School Street activation
The following plots show how NO₂ varies by hour of day and day of week, split into before and after School Street activation.
plot(dowtv_output, subset = "hour")
plot(dowtv_output, subset = "hour.weekday")
sstv_output <- timeVariation(
ss_no2_wide,
pollutant = "no2",
group = "period",
difference = TRUE
)
plot(sstv_output, subset = "hour")
plot(dowtv_output, subset = "hour.weekday")
These plots provide:
hour‑of‑day profiles for before/after
weekday‑specific patterns
a difference plot showing whether concentrations increased or decreased after the scheme
The morning peak (08:00–09:00) shows a small reduction after the School Street began, consistent with reduced vehicle access.
The afternoon peak (15:00–16:00) shows a similar pattern, with slightly lower concentrations after activation.
Outside School Street hours, the before/after curves are almost identical, indicating that wider traffic or meteorological changes are not driving the differences.
The difference plot shows modest but consistent reductions during School Street hours.
The morning School Street window (08:30–09:30) shows a flattening of the peak after activation.
Afternoon patterns are more variable, but again show no increase after the scheme.
As with Downderry, the non‑School Street hours show no systematic change, supporting the interpretation that any differences are linked to the intervention rather than background trends.
Across both School Street locations:
NO₂ concentrations during School Street hours are slightly lower after the schemes were introduced.
No evidence of displacement (i.e., increases immediately before or after the restricted periods).
No evidence of borough‑wide changes affecting the results, as non‑School Street hours remain stable.
The effect size is modest, which is expected given the short duration of the intervention windows and the influence of meteorology on hourly NO₂.
These findings are consistent with School Streets reducing vehicle access at the school gate and therefore reducing short‑term NO₂ exposure during arrival and departure times.
To understand whether any observed changes at the School Street sensors are genuinely due to the intervention, it is essential to compare them with nearby background or roadside control sites that were not affected by School Street restrictions. This helps distinguish true intervention effects from borough‑wide changes (e.g., weather, seasonal variation, wider traffic trends)
Three nearby reference monitors were selected as controls:
Laurence House (Catford)
Loampit Vale
New Cross
These sites are close enough to share meteorological and background traffic conditions, but far enough from the School Street to be unaffected by the intervention.
These plots show:
hourly NO₂ patterns before and after the School Street started
weekday‑specific patterns
a difference plot showing whether concentrations changed at the control sites
The control sites were filtered from the full dataset, restricted to NO₂, and reshaped into the wide format required by timeVariation().
reference_controls <- SS_Measured_Data %>%
filter(site %in% c(
"lewisham_laurence_house_catford",
"lewisham_loampit_vale",
"lewisham_new_cross"
))
ref_cont <- reference_controls %>%
filter(pollutant == "no2")
ref_cont_wide <- ref_cont %>%
select(datetime, value, period, site) %>%
rename(date = datetime, no2 = value)
ref_cont_tv_output <- timeVariation(
ref_cont_wide,
pollutant = "no2",
group = "period",
)
plot(ref_cont_tv_output, subset = "hour")
plot(ref_cont_tv_output, subset = "hour.weekday")
sensor_controls <- SS_Measured_Data %>%
filter(site %in% c(
"lewisham_ellerdale_st_sensor",
"lewisham_micheldever_rd_sensor",
"lewisham_waller_road_sensor"
))
sensor_cont <- sensor_controls %>%
filter(pollutant == "no2")
sensor_cont_wide <- sensor_cont %>%
select(datetime, value, period, site) %>%
rename(date = datetime, no2 = value)
sensor_cont_tv_output <- timeVariation(
sensor_cont_wide,
pollutant = "no2",
group = "period",
)
plot(sensor_cont_tv_output, subset = "hour")
plot(sensor_cont_tv_output, subset = "hour.weekday")
A split date was defined to match the Downderry School Street activation:
downderry_split_date <- as.POSIXct("2025-05-05 00:00:00", tz = "UTC")
dow_Ref_cont_split_wide <- ref_cont_wide %>%
splitByDate(
dates = downderry_split_date,
labels = c("Before Downderry SS", "After Downderry SS")
)
dow_sensor_cont_wide <- ref_cont_wide %>%
splitByDate(
dates = downderry_split_date,
labels = c("Before Downderry SS", "After Downderry SS"))
dowSplit_sensor_cont_tv_output <- timeVariation(
sensor_cont_wide,
pollutant = "no2",
group = "period")
plot( dowSplit_sensor_cont_tv_output, subset = "hour")
plot(dowSplit_sensor_cont_tv_output, subset = "hour.weekday")
ststephen_split_date <- as.POSIXct("2025-09-01 00:00:00", tz = "UTC")
ss_ref_cont_split_wide <- ref_cont_wide %>%
splitByDate(
dates = ststephen_split_date,
labels = c("Before St Stephen SS", "After St Stephen SS")
)
ssSplit_sensor_cont_tv_output <- timeVariation(
sensor_cont_wide,
pollutant = "no2",
group = "period",
)
plot( ssSplit_sensor_cont_tv_output, subset = "hour")
plot( ssSplit_sensor_cont_tv_output, subset = "hour.weekday")
This creates two datasets:
Before School Street
After School Street
for each control site.
Overall behaviour Across all three control sites:
Hourly NO₂ patterns before and after the Downderry School Street activation are extremely similar.
There is no consistent reduction during the School Street hours (08:00–09:15 and 14:45–16:00).
The difference plots show no systematic change in the control sites around the activation date.
This is exactly what we expect if:
the School Street intervention only affects the immediate school‑gate environment
wider borough‑level traffic patterns did not change at the same time
meteorology is the dominant driver of short‑term variation
When compared with the Downderry School Street sensor:
Downderry shows small reductions during School Street hours after activation
Control sites do not show these reductions
This strengthens the interpretation that the observed changes at Downderry are intervention‑related, not borough‑wide effects
If School Streets displaced traffic onto nearby roads, we would expect:
increases in NO₂ at Laurence House, Loampit Vale, or New Cross
especially during School Street hours
No such increases were observed.
Control sites show no before/after change around the School Street activation date.
This confirms that borough‑wide factors (weather, seasonal variation, general traffic trends) did not change in a way that could explain the reductions seen at Downderry.
The contrast between the School Street site and the controls provides stronger evidence that the intervention had a localised, positive effect on NO₂ during the restricted hours.
There is no evidence of traffic displacement to nearby roads.
School Street metadata (start dates) were joined to the hourly Zephyr dataset. Two key variables were created: - treatment — 1 for School Street sensors, 0 for all others - post — 1 for timestamps after the School Street activation date at treated sites - treatment × post — the Difference‑in‑Differences term representing the intervention effect
panel_no2 <- SS_Measured_Data %>%
mutate(
treatment = if_else(!is.na(start_date), 1L, 0L),
post = if_else(
treatment == 1L & as.Date(datetime) >= as.Date(start_date),
1L, 0L
)
)
panel_no2 <- panel_no2 %>%
mutate(
hour = lubridate::hour(datetime),
weekday = lubridate::wday(datetime, label = TRUE),
month = lubridate::month(datetime, label = TRUE)
)
cut_date <- min(as.Date(school_streets$start_date))
Download weather data from City Airport and prepare the Met Office data - keep only what we need
library(worldmet)
cityairport_met <- import_ghcn_hourly("UKI0000EGLC", year = 2025:2026)
met_clean <- cityairport_met %>%
rename(datetime = date) %>%
select(datetime, air_temp, ws, wd, rh) %>%
mutate(datetime = as.POSIXct(datetime))
Create a data frame with all the required information
panel_no2 <- Measured_Sensor_Data %>%
filter(pollutant == "no2") %>%
left_join(school_streets %>% select(site, start_date), by = "site") %>%
mutate(
treatment = if_else(!is.na(start_date), 1L, 0L),
post = if_else(as.Date(datetime) >= cut_date, 1L, 0L),
hour = lubridate::hour(datetime),
weekday = lubridate::wday(datetime, label = TRUE),
month = lubridate::month(datetime, label = TRUE)
) %>%
left_join(met_clean, by = "datetime")
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
glmm_model <- lmer(
value ~ treatment * post +
factor(hour) + weekday + month +
air_temp + ws + rh +
(1 | site),
data = panel_no2
)
summary(glmm_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## value ~ treatment * post + factor(hour) + weekday + month + air_temp +
## ws + rh + (1 | site)
## Data: panel_no2
##
## REML criterion at convergence: 778508.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6228 -0.6092 -0.1323 0.4566 15.9107
##
## Random effects:
## Groups Name Variance Std.Dev.
## site (Intercept) 42.92 6.551
## Residual 176.07 13.269
## Number of obs: 97193, groups: site, 11
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 46.51123 2.23279 20.831
## treatment -3.68782 5.12650 -0.719
## post 6.34473 0.13884 45.698
## factor(hour)1 -1.89084 0.29588 -6.390
## factor(hour)2 -4.13080 0.29532 -13.988
## factor(hour)3 -5.39454 0.29500 -18.287
## factor(hour)4 -5.77324 0.29498 -19.572
## factor(hour)5 -4.73393 0.29474 -16.061
## factor(hour)6 -1.96480 0.29435 -6.675
## factor(hour)7 1.46190 0.29395 4.973
## factor(hour)8 3.65169 0.29442 12.403
## factor(hour)9 4.00691 0.29638 13.520
## factor(hour)10 2.77503 0.29908 9.278
## factor(hour)11 1.52181 0.30295 5.023
## factor(hour)12 0.36334 0.30610 1.187
## factor(hour)13 0.43916 0.30810 1.425
## factor(hour)14 0.55725 0.30844 1.807
## factor(hour)15 0.95069 0.30748 3.092
## factor(hour)16 1.81217 0.30557 5.930
## factor(hour)17 2.69723 0.30313 8.898
## factor(hour)18 4.01923 0.30080 13.362
## factor(hour)19 4.34347 0.29814 14.568
## factor(hour)20 4.66001 0.29632 15.726
## factor(hour)21 4.16998 0.29561 14.106
## factor(hour)22 3.31757 0.29516 11.240
## factor(hour)23 1.76133 0.29472 5.976
## weekday.L 1.47609 0.11324 13.035
## weekday.Q -2.64935 0.11289 -23.469
## weekday.C -0.05330 0.11289 -0.472
## weekday^4 -0.34898 0.11283 -3.093
## weekday^5 -0.91953 0.11265 -8.162
## weekday^6 -0.28379 0.11220 -2.529
## month.L -9.43925 0.17912 -52.698
## month.Q 2.89454 0.26922 10.752
## month.C -3.50188 0.16126 -21.716
## month^4 -2.18574 0.17203 -12.705
## month^5 4.21962 0.15813 26.685
## month^6 2.37844 0.15787 15.065
## month^7 -4.83383 0.15703 -30.783
## month^8 1.09102 0.15532 7.024
## month^9 -0.11671 0.15682 -0.744
## month^10 -3.58948 0.16315 -22.001
## month^11 -0.27572 0.17068 -1.615
## air_temp -0.83255 0.01422 -58.564
## ws -2.01642 0.02459 -81.993
## rh -0.10418 0.00413 -25.227
## treatment:post 0.99301 0.25898 3.834
##
## Correlation matrix not shown by default, as p = 47 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
Change‑point detection works best on daily means, which smooth out hourly meteorological noise. mcp requires numeric x so change date format to a number,
daily_no2 <- panel_no2 %>%
group_by(site, date = as.Date(datetime)) %>%
summarise(
no2 = mean(value, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
date_num = as.numeric(date) # mcp requires numeric x
)
Remove NaN and NA values for model fitting
daily_no2_clean <- daily_no2 %>%
filter(!is.nan(no2) & !is.na(no2))
Create group variable: School Street vs Control
daily_no2_grouped <- daily_no2_clean %>%
mutate(
group = if_else(
site %in% c("lewisham_albyn_rd_sensor", "lewisham_downderry_rd_sensor"),
"school_street",
"control"
)
)
Create model specification
model_spec <- list(
no2 ~ 1, # Baseline mean
~ 1 # New mean after changepoint
)
Fit individual models for each School Street site
library(mcp)
##
## Attaching package: 'mcp'
## The following objects are masked from 'package:lme4':
##
## fixef, ranef
fit_ststephens <- mcp(
model_spec,
data = daily_no2_clean %>% filter(site == "lewisham_albyn_rd_sensor"),
par_x = "date_num",
iter = 2000,
chains = 2
)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 456
## Unobserved stochastic nodes: 4
## Total graph size: 5492
##
## Initializing model
## Finished sampling in 17.8 seconds
sum_ststephens <- summary(fit_ststephens)
## Family: gaussian(link = 'identity')
## Iterations: 4000 from 2 chains.
## Segments:
## 1: no2 ~ 1
## 2: no2 ~ 1 ~ 1
##
## Population-level parameters:
## name mean lower upper Rhat n.eff
## cp_1 20445.7 20439.3 20453.0 1 565
## int_1 20.8 20.0 21.6 1 2555
## int_2 29.0 27.7 30.3 1 2308
## sigma_1 7.6 7.1 8.1 1 2529
fit_downderry <- mcp(
model_spec,
data = daily_no2_clean %>% filter(site == "lewisham_downderry_rd_sensor"),
par_x = "date_num",
iter = 2000,
chains = 2
)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 456
## Unobserved stochastic nodes: 4
## Total graph size: 5492
##
## Initializing model
## Finished sampling in 15.8 seconds
sum_downderry <- summary(fit_downderry)
## Family: gaussian(link = 'identity')
## Iterations: 4000 from 2 chains.
## Segments:
## 1: no2 ~ 1
## 2: no2 ~ 1 ~ 1
##
## Population-level parameters:
## name mean lower upper Rhat n.eff
## cp_1 20351.5 20348.2 20354.0 1 1493
## int_1 20.1 19.0 21.2 1 2228
## int_2 29.5 28.4 30.6 1 2322
## sigma_1 7.9 7.4 8.4 1 2620
# Control Sites Combined
fit_control <- mcp(
model_spec,
data = daily_no2_grouped %>% filter(group == "control"),
par_x = "date_num",
iter = 2000,
chains = 2
)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 3266
## Unobserved stochastic nodes: 4
## Total graph size: 14246
##
## Initializing model
## Finished sampling in 106.1 seconds
sum_control <- summary(fit_control)
## Family: gaussian(link = 'identity')
## Iterations: 4000 from 2 chains.
## Segments:
## 1: no2 ~ 1
## 2: no2 ~ 1 ~ 1
##
## Population-level parameters:
## name mean lower upper Rhat n.eff
## cp_1 20352 20342 20357 1.2 254
## int_1 25 24 26 1.0 2185
## int_2 33 32 33 1.0 1702
## sigma_1 14 13 14 1.0 2509
Visualizing changepoint models
plot(fit_ststephens)
plot(fit_downderry)
plot(fit_control)
Extract posterior samples
samples_st <- as.data.frame(do.call(rbind, fit_ststephens$mcmc_post))
samples_dw <- as.data.frame(do.call(rbind, fit_downderry$mcmc_post))
samples_ctrl <- as.data.frame(do.call(rbind, fit_control$mcmc_post))
Compute posterior changes
change_st_samples <- samples_st$int_2 - samples_st$int_1
change_dw_samples <- samples_dw$int_2 - samples_dw$int_1
change_ctrl_samples <- samples_ctrl$int_2 - samples_ctrl$int_1
Helper function to summarise change
summarise_change <- function(samples, name) {
tibble(
site = name,
change_mean = mean(samples),
change_lower = quantile(samples, 0.025),
change_upper = quantile(samples, 0.975)
)
}
Summaries for each group
st_summary <- summarise_change(change_st_samples, "St Stephen's")
dw_summary <- summarise_change(change_dw_samples, "Downderry")
ctrl_summary <- summarise_change(change_ctrl_samples, "Control Sites")
Combined School Streets
ss_samples <- (change_st_samples + change_dw_samples) / 2
ss_summary <- summarise_change(ss_samples, "School Streets (combined)")
School Streets vs Control
diff_samples <- ss_samples - change_ctrl_samples
ss_vs_ctrl_summary <- summarise_change(diff_samples, "School Streets vs Control")
Final comparison table
comparison_table <- bind_rows(
st_summary,
dw_summary,
ctrl_summary,
ss_summary,
ss_vs_ctrl_summary
)
comparison_table_plot <- comparison_table %>%
mutate(site = fct_relevel(
site,
"St Stephen's",
"Downderry",
"Control Sites",
"School Streets (combined)",
"School Streets vs Control"
))
ggplot(comparison_table_plot,
aes(x = site,
y = change_mean,
ymin = change_lower,
ymax = change_upper)) +
geom_pointrange(size = 0.9, colour = "#1B4F72") +
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
coord_flip() +
labs(
title = "Change in NO₂ After School Street Interventions",
subtitle = "Posterior mean and 95% credible intervals (mcp model)",
x = "",
y = "Change in NO₂ (µg/m³)"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold"),
axis.text.y = element_text(face = "bold"),
panel.grid.minor = element_blank()
)