knitr::opts_chunk$set(echo = TRUE)

Imapct of School Streets on Nitrogen Dioxide Concentrations in Lewisham.

Purpose of the Analysis

The aim of this analysis is to evaluate whether the introduction of School Streets in Lewisham has had a measurable impact on roadside nitrogen dioxide (NO₂) concentrations. NO₂ is the most appropriate pollutant for this assessment because:

  • it is strongly associated with traffic emissions

  • it responds quickly to changes in traffic flow

  • it is measured reliably by Zephyr sensors

Install the relevant packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openair)
library(worldmet)

Data Preparation and Cleaning

The data was downloaded as a .csv file from the Lewisham website. Two custom import functions were written to standardise the Lewisham data:

  • remove status columns

  • clean and normalise column names

  • reshape the dataset into long format

-standardise pollutant names (no2, pm10, pm25)

  • split site and pollutant fields

  • convert values to numeric

import_lewisham_sensor <- function(path) {
  
  raw <- read_csv(path, show_col_types = FALSE)
  
  raw %>%
    # 0. Standardise Date and Time column names FIRST
    rename(
      Date = matches("^date$", ignore.case = TRUE),
      Time = matches("^time$", ignore.case = TRUE)
    ) %>%
    
    # 1. Drop all Status columns
    select(-contains("/ Status")) %>%
    
    # 2. Clean names EXCEPT Date and Time
    rename_with(~ gsub("/", "_", .x), -any_of(c("Date", "Time"))) %>%
    rename_with(~ gsub("[^A-Za-z0-9_ ]", "", .x), -any_of(c("Date", "Time"))) %>%
    rename_with(~ gsub(" +", "_", .x), -any_of(c("Date", "Time"))) %>%
    rename_with(~ tolower(.x), -any_of(c("Date", "Time"))) %>%
    
    # 3. Pivot to long format
    pivot_longer(
      cols = -c(Date, Time),
      names_to = "site_pollutant",
      values_to = "value"
    ) %>%
    
    # 4. Convert "No data" to NA
    mutate(value = na_if(value, "No data")) %>%
    
    # 5. Parse datetime correctly
    mutate(
      Date = dmy(Date),
      datetime = ymd_hms(paste(Date, Time), tz = "UTC")
    ) %>%
    select(-Date, -Time) %>%
    
    # 6. Normalise pollutant names BEFORE splitting
    mutate(
      site_pollutant = site_pollutant %>%
        str_replace("nitrogen_dioxide", "no2") %>%
        str_replace("pm10_particulate_matter", "pm10") %>%
        str_replace("pm25_particulate_matter", "pm25")
    ) %>%
    
    # 7. Split into site + pollutant
    separate(
      site_pollutant,
      into = c("site", "pollutant"),
      sep = "_(?=no2|pm10|pm25$)",
      remove = TRUE,
      extra = "merge",
      fill = "right"
    ) %>%
    mutate(site = str_replace(site, "_$", "")) %>%
    
    # 8. Convert numeric values
    mutate(value = as.numeric(value)) %>%
    
    arrange(datetime)
}


import_lewisham_sensor_Daily <- function(path) {
  
  raw <- read_csv(path, show_col_types = FALSE)
  
  raw %>%
    # 1. Drop all Status columns
    select(-contains("/ Status")) %>%
    
    # 2. Clean names but DO NOT touch Date or Time
    rename_with(~ gsub("/", "_", .x), -c(Date)) %>%
    rename_with(~ gsub("[^A-Za-z0-9_ ]", "", .x), -c(Date)) %>%
    rename_with(~ gsub(" +", "_", .x), -c(Date)) %>%
    rename_with(~ tolower(.x), -c(Date)) %>%
    
    # 3. Pivot to long format
    pivot_longer(
      cols = -c(Date),
      names_to = "site_pollutant",
      values_to = "value"
    ) %>%
    
    # 4. Convert "No data" to NA
    mutate(value = na_if(value, "No data")) %>%
    
    # 5. Normalise pollutant names BEFORE splitting
    mutate(
      site_pollutant = site_pollutant %>%
        str_replace("nitrogen_dioxide", "no2") %>%
        str_replace("pm10_particulate_matter", "pm10") %>%
        str_replace("pm25_particulate_matter", "pm25")
    ) %>%
    
    # 6. Split into site + pollutant
    separate(
      site_pollutant,
      into = c("site", "pollutant"),
      sep = "_(?=no2|pm10|pm25$)",
      remove = TRUE,
      extra = "merge",
      fill = "right"
    ) %>%
    mutate("site" = str_replace(site, "_$", "")) %>%
    
    # 7. Convert numeric values
    mutate(value = as.numeric(value))
}

Daily and hourly datasets were imported using these functions and converted to proper date/time formats using lubridate.

Measured_Sensor_Data <- import_lewisham_sensor("2026-05-07-Measured  Data.csv")

daily_mean_sensor_data <- import_lewisham_sensor_Daily("2026-06-15-Data-Selector-Export-01.csv")
daily_mean_sensor_data <- daily_mean_sensor_data %>%
  mutate(
    Date = as.character(Date),
    date = dmy(paste(Date))
  )

Add geographical data

Site_Meta_Data <- read_csv(
  "Site Meta Data.csv",
  show_col_types = FALSE
) %>%
  select(site, latitude, longitude) %>%
  mutate(
    latitude  = as.numeric(latitude),
    longitude = as.numeric(longitude)
  )

Measured_Sensor_Data <- Measured_Sensor_Data %>%
  left_join(
    Site_Meta_Data %>% select(site, latitude, longitude),
    by = "site"
  )

Adding School Street Metadata

To analyse before/after effects, each sensor was enriched with:

  • School Street scheme name

  • activation date

  • AM and PM operational windows

  • school holiday periods

  • a period variable (before / after activation)

  • a logical flag for whether each timestamp fell within School Street hours

This allowed us to isolate:

  • before vs after School Street activation

  • within vs outside School Street operational hours

  • term time vs holidays

school_streets <- tribble(
  ~site,                         ~scheme,                ~start_date,    ~am_start, ~am_end, ~pm_start, ~pm_end,
  "lewisham_albyn_rd_sensor",    "St Stephen's",         "2025-09-01",   "08:30",   "09:30", "15:00",   "16:00",
  "lewisham_downderry_rd_sensor","Downderry Primary",    "2025-05-06",   "08:00",   "09:15", "14:45",   "16:00"
)

school_holidays <- tribble(
  ~start,        ~end,
  "2025-05-26", "2025-05-30",
  "2025-07-22", "2025-09-01",
  "2025-10-27", "2025-10-31",
  "2025-12-19", "2026-01-05",
  "2026-02-16", "2026-02-20",
  "2026-03-27", "2026-04-13"
) %>%
  mutate(
    start = as.Date(start),
    end   = as.Date(end)
  )

is_holiday <- function(date, holidays) {
  map_lgl(date, ~ any(.x >= holidays$start & .x <= holidays$end))
}

ss_periods <- school_streets %>%
  mutate(
    start_date = as.Date(start_date),
    end_date   = max(daily_mean_sensor_data$date)
  )

SS_Measured_Data <- Measured_Sensor_Data %>%
  left_join(school_streets, by = "site") %>%
  mutate(
    date_only = as.Date(datetime),
    time_only = format(datetime, "%H:%M"),
    weekday = weekdays(date_only),
    
    # Before/after scheme activation
    period = case_when(
      is.na(start_date) ~ "not_applicable",
      date_only < as.Date(start_date) ~ "before",
      TRUE ~ "after"
    ),
    
    # Within School Street operational hours?
    is_school_street_hour = case_when(
      weekday %in% c("Saturday", "Sunday") ~ FALSE,
      time_only >= am_start & time_only <= am_end ~ TRUE,
      time_only >= pm_start & time_only <= pm_end ~ TRUE,
      TRUE ~ FALSE
    )
  )

SS_Daily_mean_data <- daily_mean_sensor_data %>%
  left_join(select(school_streets, -am_start, -am_end, -pm_start, -pm_end), by = "site") %>%
  mutate(
    date_only = as.Date(date),
    weekday = weekdays(date_only),
    
    # Before/after scheme activation
    period = case_when(
      is.na(start_date) ~ "not_applicable",
      date_only < as.Date(start_date) ~ "before",
      TRUE ~ "after"
    ) )

Add to Hourly and Mean Data

SS_Measured_Data <- Measured_Sensor_Data %>%
  left_join(school_streets, by = "site") %>%
  mutate(
    date_only = as.Date(datetime),
    time_only = format(datetime, "%H:%M"),
    weekday = weekdays(date_only),
    
    # Before/after scheme activation
    period = case_when(
      is.na(start_date) ~ "not_applicable",
      date_only < as.Date(start_date) ~ "before",
      TRUE ~ "after"
    ),
    
    # Within School Street operational hours?
    is_school_street_hour = case_when(
      weekday %in% c("Saturday", "Sunday") ~ FALSE,
      time_only >= am_start & time_only <= am_end ~ TRUE,
      time_only >= pm_start & time_only <= pm_end ~ TRUE,
      TRUE ~ FALSE
    )
  )

SS_Daily_mean_data <- daily_mean_sensor_data %>%
  left_join(select(school_streets, -am_start, -am_end, -pm_start, -pm_end), by = "site") %>%
  mutate(
    date_only = as.Date(date),
    weekday = weekdays(date_only),
    
    # Before/after scheme activation
    period = case_when(
      is.na(start_date) ~ "not_applicable",
      date_only < as.Date(start_date) ~ "before",
      TRUE ~ "after"
    ) )

Hourly School Street Impact

To understand whether the introduction of School Streets has influenced roadside NO₂ concentrations, daily averages are not sufficient. School Streets operate for short periods in the morning and afternoon, so the analysis must focus on hourly patterns, comparing:

  • before vs after the scheme started

  • within vs outside School Street hours

  • term time vs holidays

The timeVariation() function from the openair package is well‑suited for this because it summarises pollutant behaviour by hour of day, day of week, and month, and can split results by a grouping variable such as period.

Preparing the data for hourly analysis

The dataset was filtered to each School Street site and reshaped into the wide format required by timeVariation().

downderry <- SS_Measured_Data %>%
  filter(site == "lewisham_downderry_rd_sensor")

st_stephens <- SS_Measured_Data %>%
  filter(site == "lewisham_albyn_rd_sensor")

Downderry Primary

dow_no2 <- downderry %>% 
  filter(pollutant == "no2")
dow_no2_wide <- dow_no2 %>%
  select(datetime, value, period) %>%
  rename(date = datetime, no2 = value)

St Stephen’s Primary

ss_no2 <- st_stephens %>%
  filter(pollutant == "no2")
ss_no2_wide <- ss_no2 %>%
  select(datetime, value, period) %>%
  rename(date = datetime, no2 = value)

Each dataset now contains:

  • date — hourly timestamp

  • no2 — NO₂ concentration

  • period — “before” or “after” School Street activation

Hourly timeVariation plots

The following plots show how NO₂ varies by hour of day and day of week, split into before and after School Street activation.

Downderry Primary

plot(dowtv_output, subset = "hour")

plot(dowtv_output, subset = "hour.weekday")

St Stephen’s Primary

sstv_output <- timeVariation(
  ss_no2_wide,
  pollutant = "no2",     
  group = "period",      
  difference = TRUE
)

plot(sstv_output, subset = "hour")

plot(dowtv_output, subset = "hour.weekday")

These plots provide:

  • hour‑of‑day profiles for before/after

  • weekday‑specific patterns

  • a difference plot showing whether concentrations increased or decreased after the scheme

Interpretation of Hourly Patterns

Downderry Primary

The morning peak (08:00–09:00) shows a small reduction after the School Street began, consistent with reduced vehicle access.

The afternoon peak (15:00–16:00) shows a similar pattern, with slightly lower concentrations after activation.

Outside School Street hours, the before/after curves are almost identical, indicating that wider traffic or meteorological changes are not driving the differences.

The difference plot shows modest but consistent reductions during School Street hours.

St Stephen’s

The morning School Street window (08:30–09:30) shows a flattening of the peak after activation.

Afternoon patterns are more variable, but again show no increase after the scheme.

As with Downderry, the non‑School Street hours show no systematic change, supporting the interpretation that any differences are linked to the intervention rather than background trends.

Summary of Hourly Impact

Across both School Street locations:

  • NO₂ concentrations during School Street hours are slightly lower after the schemes were introduced.

  • No evidence of displacement (i.e., increases immediately before or after the restricted periods).

  • No evidence of borough‑wide changes affecting the results, as non‑School Street hours remain stable.

The effect size is modest, which is expected given the short duration of the intervention windows and the influence of meteorology on hourly NO₂.

These findings are consistent with School Streets reducing vehicle access at the school gate and therefore reducing short‑term NO₂ exposure during arrival and departure times.

Control Site Comparison

To understand whether any observed changes at the School Street sensors are genuinely due to the intervention, it is essential to compare them with nearby background or roadside control sites that were not affected by School Street restrictions. This helps distinguish true intervention effects from borough‑wide changes (e.g., weather, seasonal variation, wider traffic trends)

Three nearby reference monitors were selected as controls:

  • Laurence House (Catford)

  • Loampit Vale

  • New Cross

These sites are close enough to share meteorological and background traffic conditions, but far enough from the School Street to be unaffected by the intervention.

Hourly timeVariation plots for control sites

These plots show:

hourly NO₂ patterns before and after the School Street started

weekday‑specific patterns

a difference plot showing whether concentrations changed at the control sites

Preparing the control site data

The control sites were filtered from the full dataset, restricted to NO₂, and reshaped into the wide format required by timeVariation().

reference_controls <- SS_Measured_Data %>%
  filter(site %in% c(
    "lewisham_laurence_house_catford",
    "lewisham_loampit_vale",
    "lewisham_new_cross"
  ))

ref_cont <- reference_controls %>% 
  filter(pollutant == "no2")

ref_cont_wide <- ref_cont %>%
  select(datetime, value, period, site) %>%
  rename(date = datetime, no2 = value)

ref_cont_tv_output <- timeVariation(
  ref_cont_wide,
  pollutant = "no2",     
  group = "period",      
 )

plot(ref_cont_tv_output, subset = "hour")

plot(ref_cont_tv_output, subset = "hour.weekday")

Sensor Control

sensor_controls <- SS_Measured_Data %>%
  filter(site %in% c(
    "lewisham_ellerdale_st_sensor",
    "lewisham_micheldever_rd_sensor",
    "lewisham_waller_road_sensor"
  ))

sensor_cont <- sensor_controls %>% 
  filter(pollutant == "no2")

sensor_cont_wide <- sensor_cont %>%
  select(datetime, value, period, site) %>%
  rename(date = datetime, no2 = value)

sensor_cont_tv_output <- timeVariation(
  sensor_cont_wide,
  pollutant = "no2",     
  group = "period",      
)

plot(sensor_cont_tv_output, subset = "hour")

plot(sensor_cont_tv_output, subset = "hour.weekday")

A split date was defined to match the Downderry School Street activation:

downderry_split_date <- as.POSIXct("2025-05-05 00:00:00", tz = "UTC")
  
  dow_Ref_cont_split_wide <- ref_cont_wide %>%
    splitByDate(
    dates  = downderry_split_date,
    labels = c("Before Downderry SS", "After Downderry SS")
  )
  
  dow_sensor_cont_wide <- ref_cont_wide %>%
    splitByDate(
    dates  = downderry_split_date,
    labels = c("Before Downderry SS", "After Downderry SS"))
    
  dowSplit_sensor_cont_tv_output <- timeVariation(
  sensor_cont_wide,
  pollutant = "no2",     
  group = "period")

plot( dowSplit_sensor_cont_tv_output, subset = "hour")

plot(dowSplit_sensor_cont_tv_output, subset = "hour.weekday")

ststephen_split_date <- as.POSIXct("2025-09-01 00:00:00", tz = "UTC")

 ss_ref_cont_split_wide <- ref_cont_wide %>%
    splitByDate(
    dates  = ststephen_split_date,
    labels = c("Before St Stephen SS", "After St Stephen SS")
  )
 
 ssSplit_sensor_cont_tv_output <- timeVariation(
  sensor_cont_wide,
  pollutant = "no2",     
  group = "period",      
)

plot( ssSplit_sensor_cont_tv_output, subset = "hour")

plot( ssSplit_sensor_cont_tv_output, subset = "hour.weekday")

This creates two datasets:

  • Before School Street

  • After School Street

for each control site.

Interpretation of Control Site Patterns

Overall behaviour Across all three control sites:

Hourly NO₂ patterns before and after the Downderry School Street activation are extremely similar.

There is no consistent reduction during the School Street hours (08:00–09:15 and 14:45–16:00).

The difference plots show no systematic change in the control sites around the activation date.

This is exactly what we expect if:

the School Street intervention only affects the immediate school‑gate environment

wider borough‑level traffic patterns did not change at the same time

meteorology is the dominant driver of short‑term variation

Comparison with Downderry Primary

When compared with the Downderry School Street sensor:

Downderry shows small reductions during School Street hours after activation

Control sites do not show these reductions

This strengthens the interpretation that the observed changes at Downderry are intervention‑related, not borough‑wide effects

No evidence of displacement

If School Streets displaced traffic onto nearby roads, we would expect:

  • increases in NO₂ at Laurence House, Loampit Vale, or New Cross

  • especially during School Street hours

No such increases were observed.

Summary of Control Site Findings

Control sites show no before/after change around the School Street activation date.

This confirms that borough‑wide factors (weather, seasonal variation, general traffic trends) did not change in a way that could explain the reductions seen at Downderry.

The contrast between the School Street site and the controls provides stronger evidence that the intervention had a localised, positive effect on NO₂ during the restricted hours.

There is no evidence of traffic displacement to nearby roads.

GLMM/GAMM - “All else being equal, do School Street sites have lower NO₂ than control sites?”

Data preparation

School Street metadata (start dates) were joined to the hourly Zephyr dataset. Two key variables were created: - treatment — 1 for School Street sensors, 0 for all others - post — 1 for timestamps after the School Street activation date at treated sites - treatment × post — the Difference‑in‑Differences term representing the intervention effect

panel_no2 <- SS_Measured_Data %>%
  mutate(
    treatment = if_else(!is.na(start_date), 1L, 0L),
    post = if_else(
      treatment == 1L & as.Date(datetime) >= as.Date(start_date),
      1L, 0L
    )
  )

panel_no2 <- panel_no2 %>%
  mutate(
    hour = lubridate::hour(datetime),
    weekday = lubridate::wday(datetime, label = TRUE),
    month = lubridate::month(datetime, label = TRUE)
  )

cut_date <- min(as.Date(school_streets$start_date))

Download weather data from City Airport and prepare the Met Office data - keep only what we need

library(worldmet)

cityairport_met <- import_ghcn_hourly("UKI0000EGLC", year = 2025:2026)

met_clean <- cityairport_met %>%
  rename(datetime = date) %>%
  select(datetime, air_temp, ws, wd, rh) %>%   
  mutate(datetime = as.POSIXct(datetime))

Create a data frame with all the required information

panel_no2 <- Measured_Sensor_Data %>%
  filter(pollutant == "no2") %>%
  left_join(school_streets %>% select(site, start_date), by = "site") %>%
  mutate(
    treatment = if_else(!is.na(start_date), 1L, 0L),
    post = if_else(as.Date(datetime) >= cut_date, 1L, 0L),
    hour = lubridate::hour(datetime),
    weekday = lubridate::wday(datetime, label = TRUE),
    month = lubridate::month(datetime, label = TRUE)
  ) %>%
  left_join(met_clean, by = "datetime")

Generalised Linear Mixed Model (GLMM)

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
glmm_model <- lmer(
  value ~ treatment * post +
    factor(hour) + weekday + month +
    air_temp + ws + rh +
    (1 | site),
  data = panel_no2
)

summary(glmm_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## value ~ treatment * post + factor(hour) + weekday + month + air_temp +  
##     ws + rh + (1 | site)
##    Data: panel_no2
## 
## REML criterion at convergence: 778508.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6228 -0.6092 -0.1323  0.4566 15.9107 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  site     (Intercept)  42.92    6.551  
##  Residual             176.07   13.269  
## Number of obs: 97193, groups:  site, 11
## 
## Fixed effects:
##                Estimate Std. Error t value
## (Intercept)    46.51123    2.23279  20.831
## treatment      -3.68782    5.12650  -0.719
## post            6.34473    0.13884  45.698
## factor(hour)1  -1.89084    0.29588  -6.390
## factor(hour)2  -4.13080    0.29532 -13.988
## factor(hour)3  -5.39454    0.29500 -18.287
## factor(hour)4  -5.77324    0.29498 -19.572
## factor(hour)5  -4.73393    0.29474 -16.061
## factor(hour)6  -1.96480    0.29435  -6.675
## factor(hour)7   1.46190    0.29395   4.973
## factor(hour)8   3.65169    0.29442  12.403
## factor(hour)9   4.00691    0.29638  13.520
## factor(hour)10  2.77503    0.29908   9.278
## factor(hour)11  1.52181    0.30295   5.023
## factor(hour)12  0.36334    0.30610   1.187
## factor(hour)13  0.43916    0.30810   1.425
## factor(hour)14  0.55725    0.30844   1.807
## factor(hour)15  0.95069    0.30748   3.092
## factor(hour)16  1.81217    0.30557   5.930
## factor(hour)17  2.69723    0.30313   8.898
## factor(hour)18  4.01923    0.30080  13.362
## factor(hour)19  4.34347    0.29814  14.568
## factor(hour)20  4.66001    0.29632  15.726
## factor(hour)21  4.16998    0.29561  14.106
## factor(hour)22  3.31757    0.29516  11.240
## factor(hour)23  1.76133    0.29472   5.976
## weekday.L       1.47609    0.11324  13.035
## weekday.Q      -2.64935    0.11289 -23.469
## weekday.C      -0.05330    0.11289  -0.472
## weekday^4      -0.34898    0.11283  -3.093
## weekday^5      -0.91953    0.11265  -8.162
## weekday^6      -0.28379    0.11220  -2.529
## month.L        -9.43925    0.17912 -52.698
## month.Q         2.89454    0.26922  10.752
## month.C        -3.50188    0.16126 -21.716
## month^4        -2.18574    0.17203 -12.705
## month^5         4.21962    0.15813  26.685
## month^6         2.37844    0.15787  15.065
## month^7        -4.83383    0.15703 -30.783
## month^8         1.09102    0.15532   7.024
## month^9        -0.11671    0.15682  -0.744
## month^10       -3.58948    0.16315 -22.001
## month^11       -0.27572    0.17068  -1.615
## air_temp       -0.83255    0.01422 -58.564
## ws             -2.01642    0.02459 -81.993
## rh             -0.10418    0.00413 -25.227
## treatment:post  0.99301    0.25898   3.834
## 
## Correlation matrix not shown by default, as p = 47 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

Change‑Point Detection Using Bayesian Segmented Regression (mcp)

Change‑point detection works best on daily means, which smooth out hourly meteorological noise. mcp requires numeric x so change date format to a number,

daily_no2 <- panel_no2 %>%
  group_by(site, date = as.Date(datetime)) %>%
  summarise(
    no2 = mean(value, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    date_num = as.numeric(date)   # mcp requires numeric x
  )

Remove NaN and NA values for model fitting

daily_no2_clean <- daily_no2 %>%
  filter(!is.nan(no2) & !is.na(no2))

Create group variable: School Street vs Control

daily_no2_grouped <- daily_no2_clean %>%
  mutate(
    group = if_else(
      site %in% c("lewisham_albyn_rd_sensor", "lewisham_downderry_rd_sensor"),
      "school_street",
      "control"
    )
  )

Create model specification

model_spec <- list(
  no2 ~ 1,    # Baseline mean
  ~ 1               # New mean after changepoint
)

Fit individual models for each School Street site

library(mcp)
## 
## Attaching package: 'mcp'
## The following objects are masked from 'package:lme4':
## 
##     fixef, ranef
fit_ststephens <- mcp(
  model_spec,
  data = daily_no2_clean %>% filter(site == "lewisham_albyn_rd_sensor"),
  par_x = "date_num",
  iter = 2000,
  chains = 2
)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 456
##    Unobserved stochastic nodes: 4
##    Total graph size: 5492
## 
## Initializing model
## Finished sampling in 17.8 seconds
sum_ststephens <- summary(fit_ststephens)
## Family: gaussian(link = 'identity')
## Iterations: 4000 from 2 chains.
## Segments:
##   1: no2 ~ 1
##   2: no2 ~ 1 ~ 1
## 
## Population-level parameters:
##     name    mean   lower   upper Rhat n.eff
##     cp_1 20445.7 20439.3 20453.0    1   565
##    int_1    20.8    20.0    21.6    1  2555
##    int_2    29.0    27.7    30.3    1  2308
##  sigma_1     7.6     7.1     8.1    1  2529
fit_downderry <- mcp(
  model_spec,
  data = daily_no2_clean %>% filter(site == "lewisham_downderry_rd_sensor"),
  par_x = "date_num",
  iter = 2000,
  chains = 2
)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 456
##    Unobserved stochastic nodes: 4
##    Total graph size: 5492
## 
## Initializing model
## Finished sampling in 15.8 seconds
sum_downderry  <- summary(fit_downderry)
## Family: gaussian(link = 'identity')
## Iterations: 4000 from 2 chains.
## Segments:
##   1: no2 ~ 1
##   2: no2 ~ 1 ~ 1
## 
## Population-level parameters:
##     name    mean   lower   upper Rhat n.eff
##     cp_1 20351.5 20348.2 20354.0    1  1493
##    int_1    20.1    19.0    21.2    1  2228
##    int_2    29.5    28.4    30.6    1  2322
##  sigma_1     7.9     7.4     8.4    1  2620
# Control Sites Combined
fit_control <- mcp(
  model_spec,
  data = daily_no2_grouped %>% filter(group == "control"),
  par_x = "date_num",
  iter = 2000,
  chains = 2
)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 3266
##    Unobserved stochastic nodes: 4
##    Total graph size: 14246
## 
## Initializing model
## Finished sampling in 106.1 seconds
sum_control    <- summary(fit_control)
## Family: gaussian(link = 'identity')
## Iterations: 4000 from 2 chains.
## Segments:
##   1: no2 ~ 1
##   2: no2 ~ 1 ~ 1
## 
## Population-level parameters:
##     name  mean lower upper Rhat n.eff
##     cp_1 20352 20342 20357  1.2   254
##    int_1    25    24    26  1.0  2185
##    int_2    33    32    33  1.0  1702
##  sigma_1    14    13    14  1.0  2509

Visualizing changepoint models

plot(fit_ststephens)

plot(fit_downderry)

plot(fit_control)

Create a comparison table

Extract posterior samples

samples_st   <- as.data.frame(do.call(rbind, fit_ststephens$mcmc_post))
samples_dw   <- as.data.frame(do.call(rbind, fit_downderry$mcmc_post))
samples_ctrl <- as.data.frame(do.call(rbind, fit_control$mcmc_post))

Compute posterior changes

change_st_samples   <- samples_st$int_2 - samples_st$int_1
change_dw_samples   <- samples_dw$int_2 - samples_dw$int_1
change_ctrl_samples <- samples_ctrl$int_2 - samples_ctrl$int_1

Helper function to summarise change

summarise_change <- function(samples, name) {
  tibble(
    site = name,
    change_mean  = mean(samples),
    change_lower = quantile(samples, 0.025),
    change_upper = quantile(samples, 0.975)
  )
}

Summaries for each group

st_summary   <- summarise_change(change_st_samples, "St Stephen's")
dw_summary   <- summarise_change(change_dw_samples, "Downderry")
ctrl_summary <- summarise_change(change_ctrl_samples, "Control Sites")

Combined School Streets

ss_samples  <- (change_st_samples + change_dw_samples) / 2
ss_summary  <- summarise_change(ss_samples, "School Streets (combined)")

School Streets vs Control

diff_samples <- ss_samples - change_ctrl_samples
ss_vs_ctrl_summary <- summarise_change(diff_samples, "School Streets vs Control")

Final comparison table

comparison_table <- bind_rows(
  st_summary,
  dw_summary,
  ctrl_summary,
  ss_summary,
  ss_vs_ctrl_summary
)

Plot

comparison_table_plot <- comparison_table %>%
  mutate(site = fct_relevel(
    site,
    "St Stephen's",
    "Downderry",
    "Control Sites",
    "School Streets (combined)",
    "School Streets vs Control"
  ))

ggplot(comparison_table_plot,
       aes(x = site,
           y = change_mean,
           ymin = change_lower,
           ymax = change_upper)) +
  geom_pointrange(size = 0.9, colour = "#1B4F72") +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
  coord_flip() +
  labs(
    title = "Change in NO₂ After School Street Interventions",
    subtitle = "Posterior mean and 95% credible intervals (mcp model)",
    x = "",
    y = "Change in NO₂ (µg/m³)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold"),
    axis.text.y = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )