Traffic Accident Analysis

Load packages

library(sf)

## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1

library(tmap)
library(tmaptools)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(tidymodels)

## -- Attaching packages -------------------------------------- tidymodels 0.1.2 --

## v broom     0.7.3      v recipes   0.1.15
## v dials     0.0.9      v rsample   0.0.8 
## v infer     0.5.3      v tune      0.1.2 
## v modeldata 0.1.0      v workflows 0.2.1 
## v parsnip   0.1.4      v yardstick 0.0.7

## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x car::recode()     masks dplyr::recode()
## x car::some()       masks purrr::some()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()

library(spatialreg)

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep

library(spgwr)

## Loading required package: sp

## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)

library(spdep)

## 
## Attaching package: 'spdep'

## The following objects are masked from 'package:spatialreg':
## 
##     anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm

library(RColorBrewer)
library(here)

## here() starts at C:/Users/ruoxu/Desktop/CASA0005 GIS/Assessment/Final

1. Load map

Load parliamentary constituencies map uk

Shapefile on github url:https://github.com/RuoxuanHong/GISAssessment/tree/main/Parliamentary_Constituencies

parliamentaryconstituencies <- st_read(here::here("Westminster_Parliamentary_Constituencies", 
                                 "Westminster_Parliamentary_Constituencies__December_2015__Boundaries.shp"))%>%
  st_transform(., 27700)

## Reading layer `Westminster_Parliamentary_Constituencies__December_2015__Boundaries' from data source `C:\Users\ruoxu\Desktop\CASA0005 GIS\Assessment\Final\Westminster_Parliamentary_Constituencies\Westminster_Parliamentary_Constituencies__December_2015__Boundaries.shp' using driver `ESRI Shapefile'
## Simple feature collection with 626 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -8.649996 ymin: 49.86474 xmax: 1.762942 ymax: 60.86078
## geographic CRS: WGS 84

qtm(parliamentaryconstituencies)

load parliamentary constituencies map london

londonparliamentaryconstituenciesmap <- parliamentaryconstituencies %>%
  dplyr::filter(str_detect(pcon15cd, "E14000540")|
                str_detect(pcon15cd, "E14000549")|
                str_detect(pcon15cd, "E14000551")|
                  str_detect(pcon15cd, "E14000553")|
                  str_detect(pcon15cd, "E14000555")|
                  str_detect(pcon15cd, "E14000558")|
                  str_detect(pcon15cd, "E14000591")|
                  str_detect(pcon15cd, "E14000592")|
                  str_detect(pcon15cd, "E14000593")|
                  str_detect(pcon15cd, "E14000604")|
                  str_detect(pcon15cd, "E14000615")|
                  str_detect(pcon15cd, "E14000621")|
                  str_detect(pcon15cd, "E14000629")|
                  str_detect(pcon15cd, "E14000634")|
                  str_detect(pcon15cd, "E14000636")|
                  str_detect(pcon15cd, "E14000639")|
                  str_detect(pcon15cd, "E14000654")|
                  str_detect(pcon15cd, "E14000655")|
                  str_detect(pcon15cd, "E14000656")|
                  str_detect(pcon15cd, "E14000657")|
                  str_detect(pcon15cd, "E14000673")|
                  str_detect(pcon15cd, "E14000674")|
                  str_detect(pcon15cd, "E14000675")|
                  str_detect(pcon15cd, "E14000676")|
                  str_detect(pcon15cd, "E14000679")|
                  str_detect(pcon15cd, "E14000687")|
                  str_detect(pcon15cd, "E14000690")|
                  str_detect(pcon15cd, "E14000691")|
                  str_detect(pcon15cd, "E14000692")|
                  str_detect(pcon15cd, "E14000696")|
                  str_detect(pcon15cd, "E14000701")|
                  str_detect(pcon15cd, "E14000703")|
                  str_detect(pcon15cd, "E14000718")|
                  str_detect(pcon15cd, "E14000720")|
                  str_detect(pcon15cd, "E14000721")|
                  str_detect(pcon15cd, "E14000726")|
                  str_detect(pcon15cd, "E14000727")|
                  str_detect(pcon15cd, "E14000731")|
                  str_detect(pcon15cd, "E14000732")|
                  str_detect(pcon15cd, "E14000737")|
                  str_detect(pcon15cd, "E14000741")|
                  str_detect(pcon15cd, "E14000750")|
                  str_detect(pcon15cd, "E14000751")|
                  str_detect(pcon15cd, "E14000752")|
                  str_detect(pcon15cd, "E14000759")|
                  str_detect(pcon15cd, "E14000760")|
                  str_detect(pcon15cd, "E14000763")|
                  str_detect(pcon15cd, "E14000764")|
                  str_detect(pcon15cd, "E14000768")|
                  str_detect(pcon15cd, "E14000770")|
                  str_detect(pcon15cd, "E14000787")|
                  str_detect(pcon15cd, "E14000788")|
                  str_detect(pcon15cd, "E14000789")|
                  str_detect(pcon15cd, "E14000790")|
                  str_detect(pcon15cd, "E14000823")|
                  str_detect(pcon15cd, "E14000869")|
                  str_detect(pcon15cd, "E14000872")|
                  str_detect(pcon15cd, "E14000882")|
                  str_detect(pcon15cd, "E14000887")|
                  str_detect(pcon15cd, "E14000896")|
                  str_detect(pcon15cd, "E14000900")|
                  str_detect(pcon15cd, "E14000906")|
                  str_detect(pcon15cd, "E14000978")|
                  str_detect(pcon15cd, "E14000984")|
                  str_detect(pcon15cd, "E14000998")|
                  str_detect(pcon15cd, "E14001002")|
                  str_detect(pcon15cd, "E14001005")|
                  str_detect(pcon15cd, "E14001007")|
                  str_detect(pcon15cd, "E14001008")|
                  str_detect(pcon15cd, "E14001013")|
                  str_detect(pcon15cd, "E14001032")|
                  str_detect(pcon15cd, "E14001036")|
                  str_detect(pcon15cd, "E14001040"))%>%
  st_transform(., 27700)

  qtm(londonparliamentaryconstituenciesmap)

2. Load traffic accident dataset

csv file on Github url:https://github.com/RuoxuanHong/GISAssessment/raw/main/traffic_accident_data.csv

  Trafficaccidentprofile <- read_csv("traffic_accident_data.csv", 
                                 col_names = TRUE, 
                                 locale = locale(encoding = 'Latin1'))

## 
## -- Column specification --------------------------------------------------------
## cols(
##   `Parliamentary constituency` = col_character(),
##   `Ons code` = col_character(),
##   all_accidents = col_double(),
##   cars_per_household = col_double(),
##   travel_by_bicycle_to_work = col_double(),
##   employment_rate = col_double(),
##   public_transport_accessibility = col_double(),
##   population_density = col_double(),
##   area_of_road = col_double()
## )

Map traffic accident

Trafficaccidentnumber <- londonparliamentaryconstituenciesmap%>%
    left_join(.,
              Trafficaccidentprofile, 
              by = c("pcon15cd" = "Ons code"))
  

tmap_mode("view")

## tmap mode set to interactive viewing

  qtm(Trafficaccidentnumber, 
      fill = "all_accidents", 
      borders = NULL,  
      fill.palette = "Blues")

3. Traffic accidents data transformation

Distribution of traddic accidents

Trafficaccidentnumber <- Trafficaccidentnumber %>%
    clean_names()

ggplot(Trafficaccidentnumber, aes(x=all_accidents)) + 
    geom_histogram(aes(y = ..density..),
                   binwidth = 100) + 
    geom_density(colour="red", 
                 size=1, 
                 adjust=1)

Tukey

symbox(~all_accidents, 
       Trafficaccidentnumber, 
       na.rm=T,
       powers=seq(-3,3,by=.25))

Plot transformed dependent variable distribution

ggplot(Trafficaccidentnumber, aes((x=all_accidents)^-0.75) )+ 
  geom_histogram(aes(y = ..density..),
                 binwidth = 0.003) + 
  geom_density(colour="red", 
               size=1, 
               adjust=1)

4. Spatial Weight matrix

###Calculate the centroids of all parliamentary constituencies

coordsW <- Trafficaccidentnumber%>%
  st_centroid()%>%
  st_geometry()

## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x

plot(coordsW)

Queen’s case and nerest neighbours comparison

LWard_nb <- Trafficaccidentnumber %>%
  poly2nb(., queen=T)


knn_wards <-coordsW %>%
  knearneigh(., k=4)

LWard_knn <- knn_wards %>%
  knn2nb()

plot(LWard_nb, st_geometry(coordsW), col="red")

plot(LWard_knn, st_geometry(coordsW), col="blue")

KNN4 spatial weight matrix

Lward.knn_4_weight <- LWard_knn %>%
  nb2listw(., style="C")

5. traffic accident spatial analysis

Moran’s I

Trafficaccidentnumbermoran <- Trafficaccidentnumber %>%
  st_drop_geometry()%>%
  dplyr::select(all_accidents)%>%
  pull()%>%
  moran.test(., Lward.knn_4_weight)
Trafficaccidentnumbermoran

## 
##  Moran I test under randomisation
## 
## data:  .  
## weights: Lward.knn_4_weight    
## 
## Moran I statistic standard deviate = 5.2859, p-value = 6.253e-08
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.30132933       -0.01388889        0.00355613

Traffic accident Geary’s C

TrafficaccidentnumberGeary <- Trafficaccidentnumber %>%
  st_drop_geometry()%>%
  dplyr::select(all_accidents)%>%
  pull()%>%
  geary.test(., Lward.knn_4_weight)
TrafficaccidentnumberGeary

## 
##  Geary C test under randomisation
## 
## data:  . 
## weights: Lward.knn_4_weight 
## 
## Geary C statistic standard deviate = 3.3484, p-value = 0.0004063
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance 
##        0.61692990        1.00000000        0.01308791

Getis Ord General G

TrafficaccidentnumberG <- Trafficaccidentnumber %>%
  st_drop_geometry()%>%
  dplyr::select(all_accidents)%>%
  pull()%>%
  globalG.test(., Lward.knn_4_weight)
TrafficaccidentnumberG

## 
##  Getis-Ord global G statistic
## 
## data:  . 
## weights: Lward.knn_4_weight 
## 
## standard deviate = 3.9577, p-value = 3.783e-05
## alternative hypothesis: greater
## sample estimates:
## Global G statistic        Expectation           Variance 
##       1.519722e-02       1.388889e-02       1.092809e-07

local Moran’s I

Trafficaccidentnumberlocalmoran <- Trafficaccidentnumber %>%
  st_drop_geometry()%>%
  dplyr::select(all_accidents)%>%
  pull()%>%
  localmoran(., Lward.knn_4_weight)%>%
  as_tibble()
Trafficaccidentnumberlocalmoran

## # A tibble: 73 x 5
##    Ii           E.Ii        Var.Ii     Z.Ii        `Pr(z > 0)`
##    <localmrn>   <localmrn>  <localmrn> <localmrn>  <localmrn> 
##  1 -0.001541294 -0.01388889 0.1499327   0.03188851 0.48728048 
##  2  0.092705836 -0.01388889 0.1499327   0.27528820 0.39154742 
##  3  0.444884871 -0.01388889 0.1499327   1.18481473 0.11804535 
##  4  0.679371386 -0.01388889 0.1499327   1.79039225 0.03669544 
##  5  0.665843338 -0.01388889 0.1499327   1.75545514 0.03959074 
##  6  0.624200456 -0.01388889 0.1499327   1.64790954 0.04968562 
##  7 -0.007604142 -0.01388889 0.1499327   0.01623079 0.49352514 
##  8  0.077168611 -0.01388889 0.1499327   0.23516224 0.40704140 
##  9 -0.151812323 -0.01388889 0.1499327  -0.35619674 0.63915338 
## 10  0.358180751 -0.01388889 0.1499327   0.96089539 0.16830238 
## # ... with 63 more rows

Map local Moran’s I

Trafficaccidentnumber <- Trafficaccidentnumber %>%
  mutate(all_accidents_I = as.numeric(Trafficaccidentnumberlocalmoran$Ii))%>%
  mutate(all_accidents_Iz =as.numeric(Trafficaccidentnumberlocalmoran$Z.Ii))

# set the breaks and colour
breaks1<-c(-1000,-2.58,-1.96,-1.65,1.65,1.96,2.58,1000)
MoranColours<- rev(brewer.pal(8, "RdGy"))

# Local Moran's I map
tm_shape(Trafficaccidentnumber) +
  tm_polygons("all_accidents_Iz",
              style="fixed",
              breaks=breaks1,
              palette=MoranColours,
              midpoint=NA,
              title="Local Moran's I, Traffic Accidents in London")

Local G map

TrafficaccidentnumberGi <- Trafficaccidentnumber %>%
  st_drop_geometry()%>%
  dplyr::select(all_accidents)%>%
  pull()%>%
  localG(., Lward.knn_4_weight)
TrafficaccidentnumberGi

##  [1] -0.59000355  1.15493570 -1.32805255  1.33353768  1.48580918 -1.43590428
##  [7] -0.04332678 -0.89280049 -0.83949865 -1.03943408  1.29543198 -0.74257624
## [13]  0.72218297 -0.53886926  0.08906777  3.81814115 -0.54828457 -0.56140640
## [19] -0.79675911 -0.99688441  0.52494398  0.49366998 -0.89479140  0.18332628
## [25]  0.35949705 -0.32491571 -0.72044494 -0.91397067 -0.52928968 -1.24416972
## [31] -0.14307863 -0.32047524  0.26487426  1.01824823  1.27875742  0.79595280
## [37]  1.31670081 -0.18125781 -1.26578962 -0.57854903 -0.57019500  0.28440344
## [43] -0.48606502  0.56225510 -0.88735666 -0.45834811  1.61680944  1.35095895
## [49]  4.11008758 -1.26941274 -0.11948853  0.35353086  0.78199116 -0.16349457
## [55] -0.51875738 -1.50425723 -1.43428590  1.38092041 -0.24065798 -0.66784274
## [61] -0.70795149 -1.34731721  0.68874691 -1.24634643 -0.52757749  0.06823690
## [67]  0.02780434 -0.82976260  4.17589330 -0.30128213  0.42715962  4.72618130
## [73] -0.84602827
## attr(,"gstari")
## [1] FALSE
## attr(,"call")
## localG(x = ., listw = Lward.knn_4_weight)
## attr(,"class")
## [1] "localG"

Trafficaccidentnumber <- Trafficaccidentnumber %>%
  mutate(all_accidents_G = as.numeric(TrafficaccidentnumberGi))

GIColours<- rev(brewer.pal(8, "RdBu"))

# plot
tm_shape(Trafficaccidentnumber) +
  tm_polygons("all_accidents_G",
              style="fixed",
              breaks=breaks1,
              palette=GIColours,
              midpoint=NA,
              title="Gi*, Traffic Accidents in London")

6. Multiple Linear regression

Model 1

Regressiondata<- Trafficaccidentnumber%>%
  clean_names()%>%
  dplyr::select(all_accidents, 
                cars_per_household,
                travel_by_bicycle_to_work,
                employment_rate,
                public_transport_accessibility,
                population_density,
                area_of_road)

model1 <- Regressiondata %>%
  lm(all_accidents^-0.75 ~
       cars_per_household+
       travel_by_bicycle_to_work+
       employment_rate+
       public_transport_accessibility+
       population_density+
       area_of_road,
     data=.)
summary(model1)

## 
## Call:
## lm(formula = all_accidents^-0.75 ~ cars_per_household + travel_by_bicycle_to_work + 
##     employment_rate + public_transport_accessibility + population_density + 
##     area_of_road, data = .)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0041941 -0.0012646 -0.0000427  0.0011093  0.0052693 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -3.974e-05  5.433e-03  -0.007 0.994186    
## cars_per_household              5.855e-03  2.570e-03   2.278 0.025966 *  
## travel_by_bicycle_to_work      -1.995e-04  1.516e-04  -1.316 0.192700    
## employment_rate                 2.123e-04  7.456e-05   2.847 0.005874 ** 
## public_transport_accessibility -5.073e-05  6.737e-05  -0.753 0.454072    
## population_density              9.034e-05  2.546e-05   3.548 0.000721 ***
## area_of_road                   -7.201e-04  2.534e-04  -2.842 0.005959 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00211 on 66 degrees of freedom
## Multiple R-squared:  0.6848, Adjusted R-squared:  0.6562 
## F-statistic:  23.9 on 6 and 66 DF,  p-value: 8.134e-15

vif test

vif(model1)

##             cars_per_household      travel_by_bicycle_to_work 
##                       9.351064                       3.667648 
##                employment_rate public_transport_accessibility 
##                       1.992852                       5.995516 
##             population_density                   area_of_road 
##                      14.593310                      18.703014

Model2

model2 <- Regressiondata %>%
  lm(all_accidents^-0.75 ~
       cars_per_household+
       travel_by_bicycle_to_work+
       employment_rate+
       public_transport_accessibility,
     data=.)
summary(model2)

## 
## Call:
## lm(formula = all_accidents^-0.75 ~ cars_per_household + travel_by_bicycle_to_work + 
##     employment_rate + public_transport_accessibility, data = .)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0049322 -0.0011667 -0.0002152  0.0011793  0.0052347 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                     8.945e-04  4.284e-03   0.209   0.8352   
## cars_per_household              3.941e-03  2.308e-03   1.708   0.0922 . 
## travel_by_bicycle_to_work      -1.246e-06  1.518e-04  -0.008   0.9935   
## employment_rate                 1.679e-04  7.888e-05   2.129   0.0369 * 
## public_transport_accessibility -1.592e-04  5.069e-05  -3.140   0.0025 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00227 on 68 degrees of freedom
## Multiple R-squared:  0.624,  Adjusted R-squared:  0.6019 
## F-statistic: 28.21 on 4 and 68 DF,  p-value: 8.009e-14

Model3

model3 <- Regressiondata %>%
  lm(all_accidents^-0.75 ~
       employment_rate+
       public_transport_accessibility,
     data=.)
summary(model3)

## 
## Call:
## lm(formula = all_accidents^-0.75 ~ employment_rate + public_transport_accessibility, 
##     data = .)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0046436 -0.0015206 -0.0002776  0.0015091  0.0051398 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     9.722e-05  4.254e-03   0.023 0.981830    
## employment_rate                 2.441e-04  5.959e-05   4.095 0.000112 ***
## public_transport_accessibility -2.532e-04  3.104e-05  -8.156 9.44e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002324 on 70 degrees of freedom
## Multiple R-squared:  0.5943, Adjusted R-squared:  0.5827 
## F-statistic: 51.27 on 2 and 70 DF,  p-value: 1.935e-14

Lagrange Multiplier Diagnostics

lm.LMtests(model3,Lward.knn_4_weight, zero.policy=NULL, test="all", spChk=NULL, naSubset=TRUE)

## Warning in lm.LMtests(model3, Lward.knn_4_weight, zero.policy = NULL, test =
## "all", : Spatial weights matrix not row standardized

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = all_accidents^-0.75 ~ employment_rate +
## public_transport_accessibility, data = .)
## weights: Lward.knn_4_weight
## 
## LMerr = 8.1614, df = 1, p-value = 0.004279
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = all_accidents^-0.75 ~ employment_rate +
## public_transport_accessibility, data = .)
## weights: Lward.knn_4_weight
## 
## LMlag = 5.2838, df = 1, p-value = 0.02152
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = all_accidents^-0.75 ~ employment_rate +
## public_transport_accessibility, data = .)
## weights: Lward.knn_4_weight
## 
## RLMerr = 2.8996, df = 1, p-value = 0.0886
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = all_accidents^-0.75 ~ employment_rate +
## public_transport_accessibility, data = .)
## weights: Lward.knn_4_weight
## 
## RLMlag = 0.021988, df = 1, p-value = 0.8821
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = all_accidents^-0.75 ~ employment_rate +
## public_transport_accessibility, data = .)
## weights: Lward.knn_4_weight
## 
## SARMA = 8.1834, df = 2, p-value = 0.01671

Print some model diagnositcs.

par(mfrow=c(2,2))
plot(model3)

Durbin-Watson test

DW <- durbinWatsonTest(model3)
tidy(DW)

## # A tibble: 1 x 5
##   statistic p.value autocorrelation method             alternative
##       <dbl>   <dbl>           <dbl> <chr>              <chr>      
## 1      1.83   0.504          0.0792 Durbin-Watson Test two.sided

Write the residuals outand add them to the shapelayer

model_data3 <- model3 %>%
  augment(., Regressiondata)


Trafficaccidentnumber <- Trafficaccidentnumber %>%
  mutate(model3resids = residuals(model3))

7. SLM and SEM to include spatial elements

plot the residuals

tmap_mode("view")

## tmap mode set to interactive viewing

tm_shape(Trafficaccidentnumber) +
  tm_polygons("model3resids",
              palette = "RdYlBu")

## Variable(s) "model3resids" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

residual moran’s I

Nearest_neighbour <- Trafficaccidentnumber %>%
  st_drop_geometry()%>%
  dplyr::select(model3resids)%>%
  pull()%>%
  moran.test(., Lward.knn_4_weight)%>%
  tidy()

Nearest_neighbour

## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic  p.value method            alternative
##       <dbl>     <dbl>     <dbl>     <dbl>    <dbl> <chr>             <chr>      
## 1     0.224   -0.0139   0.00579      3.13 0.000868 Moran I test und~ greater

Spatially lagged model

slag_dv_model3_knn4 <- lagsarlm(all_accidents^-0.75 ~
                                  employment_rate+
                                  public_transport_accessibility,
                                data=Trafficaccidentnumber, 
                                nb2listw(LWard_knn, 
                                         style="C"), 
                                method = "eigen")

## Warning: Function lagsarlm moved to the spatialreg package

## Warning in spatialreg::lagsarlm(formula = formula, data = data, listw = listw, : inversion of asymptotic covariance matrix failed for tol.solve = 1e-10 
##   reciprocal condition number = 3.77775e-11 - using numerical Hessian.

summary(slag_dv_model3_knn4)

## 
## Call:spatialreg::lagsarlm(formula = formula, data = data, listw = listw, 
##     na.action = na.action, Durbin = Durbin, type = type, method = method, 
##     quiet = quiet, zero.policy = zero.policy, interval = interval, 
##     tol.solve = tol.solve, trs = trs, control = control)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0048335 -0.0011642 -0.0003059  0.0011164  0.0046558 
## 
## Type: lag 
## Coefficients: (numerical Hessian approximate standard errors) 
##                                   Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)                    -3.0695e-03  4.1907e-03 -0.7324 0.4638964
## employment_rate                 2.1762e-04  5.6799e-05  3.8314 0.0001274
## public_transport_accessibility -1.9385e-04  3.8307e-05 -5.0603 4.185e-07
## 
## Rho: 0.31118, LR test value: 5.1604, p-value: 0.023107
## Approximate (numerical Hessian) standard error: 0.13112
##     z-value: 2.3732, p-value: 0.017636
## Wald statistic: 5.6319, p-value: 0.017636
## 
## Log likelihood: 343.2385 for lag model
## ML residual variance (sigma squared): 4.7246e-06, (sigma: 0.0021736)
## Number of observations: 73 
## Number of parameters estimated: 5 
## AIC: -676.48, (AIC for lm: -673.32)

Residual Moran test on SLM

Trafficaccidentnumber <- Trafficaccidentnumber %>%
  mutate(slag_dv_model3_knn_resids = residuals(slag_dv_model3_knn4))

KNN4Moran <- Trafficaccidentnumber %>%
  st_drop_geometry()%>%
  dplyr::select(slag_dv_model3_knn_resids)%>%
  pull()%>%
  moran.test(., Lward.knn_4_weight)%>%
  tidy()
KNN4Moran

## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1    0.0775   -0.0139   0.00579      1.20   0.115 Moran I test unde~ greater

Spatial error model

sem_model1 <- errorsarlm(all_accidents^-0.75 ~
                           employment_rate+
                           public_transport_accessibility,
                         data=Trafficaccidentnumber,
                         nb2listw(LWard_knn, style="C"), 
                         method = "eigen")

## Warning: Function errorsarlm moved to the spatialreg package

## Warning in spatialreg::errorsarlm(formula = formula, data = data, listw = listw, : inversion of asymptotic covariance matrix failed for tol.solve = 1e-10 
##   reciprocal condition number = 3.07418e-11 - using numerical Hessian.

tidy(sem_model1)

## # A tibble: 4 x 5
##   term                            estimate std.error statistic       p.value
##   <chr>                              <dbl>     <dbl>     <dbl>         <dbl>
## 1 (Intercept)                    -0.00447  0.00463      -0.967 0.334        
## 2 employment_rate                 0.000307 0.0000648     4.74  0.00000215   
## 3 public_transport_accessibility -0.000240 0.0000404    -5.95  0.00000000262
## 4 lambda                          0.498    0.144         3.45  0.000555

summary(sem_model1)

## 
## Call:spatialreg::errorsarlm(formula = formula, data = data, listw = listw, 
##     na.action = na.action, Durbin = Durbin, etype = etype, method = method, 
##     quiet = quiet, zero.policy = zero.policy, interval = interval, 
##     tol.solve = tol.solve, trs = trs, control = control)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.00458610 -0.00120734 -0.00019175  0.00117793  0.00492318 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                                   Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)                    -4.4747e-03  4.6276e-03 -0.9670    0.3336
## employment_rate                 3.0692e-04  6.4762e-05  4.7392 2.145e-06
## public_transport_accessibility -2.4039e-04  4.0376e-05 -5.9539 2.618e-09
## 
## Lambda: 0.49843, LR test value: 9.141, p-value: 0.0024994
## Approximate (numerical Hessian) standard error: 0.14436
##     z-value: 3.4526, p-value: 0.00055522
## Wald statistic: 11.92, p-value: 0.00055522
## 
## Log likelihood: 345.2288 for error model
## ML residual variance (sigma squared): 4.3101e-06, (sigma: 0.0020761)
## Number of observations: 73 
## Number of parameters estimated: 5 
## AIC: -680.46, (AIC for lm: -673.32)

Residual Moran test on SEM

Trafficaccidentnumber <- Trafficaccidentnumber %>%
  mutate(sem_model1 = residuals(sem_model1))

KNN4Moranerr <- Trafficaccidentnumber %>%
  st_drop_geometry()%>%
  dplyr::select(sem_model1)%>%
  pull()%>%
  moran.test(., Lward.knn_4_weight)%>%
  tidy()
KNN4Moranerr

## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1    0.0109   -0.0139   0.00578     0.327   0.372 Moran I test unde~ greater

8. GWR

Transformation

st_crs(Trafficaccidentnumber) = 27700

TrafficaccidentnumberSP <- Trafficaccidentnumber %>%
  as(., "Spatial")

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition

## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum OSGB 1936 in CRS definition

st_crs(coordsW) = 27700

coordsWSP <- coordsW %>%
  as(., "Spatial")

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum OSGB 1936 in CRS definition

coordsWSP

## class       : SpatialPoints 
## features    : 73 
## extent      : 508137.9, 556089.6, 160777.1, 198396.4  (xmin, xmax, ymin, ymax)
## crs         : +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs

Calculate kernel bandwidth

GWRbandwidth <- gwr.sel(all_accidents^-0.75 ~
                          employment_rate+
                          public_transport_accessibility, 
                        data = TrafficaccidentnumberSP, 
                        coords=coordsWSP,
                        adapt=T)

## Warning in gwr.sel(all_accidents^-0.75 ~ employment_rate +
## public_transport_accessibility, : data is Spatial* object, ignoring coords
## argument

## Adaptive q: 0.381966 CV score: 0.0003925858 
## Adaptive q: 0.618034 CV score: 0.0003960714 
## Adaptive q: 0.236068 CV score: 0.0003920144 
## Adaptive q: 0.2400818 CV score: 0.0003919166 
## Adaptive q: 0.2992006 CV score: 0.0003918024 
## Adaptive q: 0.3308142 CV score: 0.0003931159 
## Adaptive q: 0.2723567 CV score: 0.00039193 
## Adaptive q: 0.3112759 CV score: 0.0003922441 
## Adaptive q: 0.2889471 CV score: 0.0003910816 
## Adaptive q: 0.2826101 CV score: 0.0003912733 
## Adaptive q: 0.2882744 CV score: 0.0003910338 
## Adaptive q: 0.2866232 CV score: 0.000391047 
## Adaptive q: 0.2876437 CV score: 0.0003909922 
## Adaptive q: 0.2875042 CV score: 0.0003909997 
## Adaptive q: 0.287746 CV score: 0.0003909961 
## Adaptive q: 0.2876844 CV score: 0.0003909917 
## Adaptive q: 0.2876844 CV score: 0.0003909917

Run the gwr model

gwr.model = gwr(all_accidents^-0.75 ~
                  employment_rate+
                  public_transport_accessibility, 
                data = TrafficaccidentnumberSP, 
                coords=coordsWSP, 
                adapt=GWRbandwidth, 
                hatmatrix=TRUE, 
                se.fit=TRUE)

## Warning in gwr(all_accidents^-0.75 ~ employment_rate +
## public_transport_accessibility, : data is Spatial* object, ignoring coords
## argument

## Warning in proj4string(data): CRS object has comment, which is lost in output

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition

gwr.model

## Call:
## gwr(formula = all_accidents^-0.75 ~ employment_rate + public_transport_accessibility, 
##     data = TrafficaccidentnumberSP, coords = coordsWSP, adapt = GWRbandwidth, 
##     hatmatrix = TRUE, se.fit = TRUE)
## Kernel function: gwr.Gauss 
## Adaptive quantile: 0.2876844 (about 21 of 73 data points)
## Summary of GWR coefficient estimates at data points:
##                                       Min.     1st Qu.      Median     3rd Qu.
## X.Intercept.                   -0.00090132  0.00062896  0.00154089  0.00283420
## employment_rate                 0.00016150  0.00019828  0.00022028  0.00023569
## public_transport_accessibility -0.00028049 -0.00025824 -0.00023712 -0.00022964
##                                       Max. Global
## X.Intercept.                    0.00504764  1e-04
## employment_rate                 0.00026711  2e-04
## public_transport_accessibility -0.00022070 -3e-04
## Number of data points: 73 
## Effective number of parameters (residual: 2traceS - traceS'S): 7.43329 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 65.56671 
## Sigma (residual: 2traceS - traceS'S): 0.002283056 
## Effective number of parameters (model: traceS): 5.677823 
## Effective degrees of freedom (model: traceS): 67.32218 
## Sigma (model: traceS): 0.002253093 
## Sigma (ML): 0.002163698 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): -673.7562 
## AIC (GWR p. 96, eq. 4.22): -683.0038 
## Residual sum of squares: 0.0003417562 
## Quasi-global R2: 0.6332458

results <- as.data.frame(gwr.model$SDF)
names(results)

##  [1] "sum.w"                                
##  [2] "X.Intercept."                         
##  [3] "employment_rate"                      
##  [4] "public_transport_accessibility"       
##  [5] "X.Intercept._se"                      
##  [6] "employment_rate_se"                   
##  [7] "public_transport_accessibility_se"    
##  [8] "gwr.e"                                
##  [9] "pred"                                 
## [10] "pred.se"                              
## [11] "localR2"                              
## [12] "X.Intercept._se_EDF"                  
## [13] "employment_rate_se_EDF"               
## [14] "public_transport_accessibility_se_EDF"
## [15] "pred.se.1"

Attach coefficients to original SF

Trafficaccidentnumber2 <- Trafficaccidentnumber %>%
  mutate(coefEmployment = results$employment_rate,
         coefTransport = results$public_transport_accessibility)

Employment rate coefficient plot

tm_shape(Trafficaccidentnumber2) +
  tm_polygons(col = "coefEmployment", 
              palette = "-RdBu", 
              alpha = 0.9)

Public transport accessbility coefficient plot

tm_shape(Trafficaccidentnumber2) +
  tm_polygons(col = "coefTransport", 
              palette = "RdBu", 
              alpha = 0.9)

Run the significance test

sigTest = abs(gwr.model$SDF$"public_transport_accessibility")-2 * gwr.model$SDF$"public_transport_accessibility_se"
sigTest1 = abs(gwr.model$SDF$"employment_rate")-2 * gwr.model$SDF$"employment_rate_se"

Store significance results

Trafficaccidentnumber2 <- Trafficaccidentnumber2 %>%
  mutate(GWREmploymentSig = sigTest)

tm_shape(Trafficaccidentnumber2) +
  tm_polygons(col = "GWREmploymentSig", 
              palette = "RdYlBu")