=============================================================================

COSA Severe Pedestrian Injury Areas (SPIA) - Methods Section R Script

Author: Javier Sandoval

Course: PAD 6833 - 902

Purpose: Hypothesis: Longer high-injury corridors expose pedestrians to more vehicle

conflicts and therefore accumulate more injuries, all else equal. Corridor

length is the simplest available exposure measure and the cleanest variable

in the COSA file, which is why it is tested first.

library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.3
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
#Data
setwd("C:/Users/jalex/JS R- Homework")
COSA_Severe_Data <- read.csv("COSA Severe Pedestrian Injury Areas.csv") 
names(COSA_Severe_Data)
##  [1] "OBJECTID"               "CorridorID"             "StreetName"            
##  [4] "FromStreet"             "ToStreet"               "Incapacitated_Injuries"
##  [7] "Fata_Injuries"          "Total_Injuries"         "SPIA_Year"             
## [10] "Shape__Length"
str(COSA_Severe_Data)
## 'data.frame':    166 obs. of  10 variables:
##  $ OBJECTID              : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CorridorID            : int  56 165 131 4 166 86 157 164 110 196 ...
##  $ StreetName            : chr  "MARBACH" "Military FM 1535" "Callaghan" "Bandera Spur 421" ...
##  $ FromStreet            : chr  "HARNESS" "Braesview" "Greensboro" "BLOOMFIELD" ...
##  $ ToStreet              : chr  "MEADOW WAY" "Wedgewood" "Fredericksburg" "SUTTON" ...
##  $ Incapacitated_Injuries: int  2 1 2 5 1 0 2 1 1 1 ...
##  $ Fata_Injuries         : int  0 1 0 0 3 2 0 1 1 1 ...
##  $ Total_Injuries        : int  2 2 2 5 4 2 2 2 2 2 ...
##  $ SPIA_Year             : int  2017 2020 2020 2017 2020 2017 2020 2020 2017 2020 ...
##  $ Shape__Length         : num  307 1134 1951 3111 4041 ...
summary(COSA_Severe_Data)
##     OBJECTID        CorridorID      StreetName         FromStreet       
##  Min.   :  1.00   Min.   :  1.00   Length:166         Length:166        
##  1st Qu.: 42.25   1st Qu.: 57.25   Class :character   Class :character  
##  Median : 83.50   Median :124.50   Mode  :character   Mode  :character  
##  Mean   : 83.50   Mean   :111.70                                        
##  3rd Qu.:124.75   3rd Qu.:165.75                                        
##  Max.   :166.00   Max.   :207.00                                        
##    ToStreet         Incapacitated_Injuries Fata_Injuries   Total_Injuries  
##  Length:166         Min.   : 0.000         Min.   :0.000   Min.   : 2.000  
##  Class :character   1st Qu.: 1.000         1st Qu.:0.000   1st Qu.: 2.000  
##  Mode  :character   Median : 2.000         Median :1.000   Median : 3.000  
##                     Mean   : 2.633         Mean   :1.283   Mean   : 3.916  
##                     3rd Qu.: 3.000         3rd Qu.:2.000   3rd Qu.: 5.000  
##                     Max.   :12.000         Max.   :8.000   Max.   :15.000  
##    SPIA_Year    Shape__Length     
##  Min.   :2017   Min.   :   30.05  
##  1st Qu.:2017   1st Qu.: 1071.38  
##  Median :2020   Median : 1942.81  
##  Mean   :2019   Mean   : 2884.05  
##  3rd Qu.:2020   3rd Qu.: 3852.77  
##  Max.   :2020   Max.   :13314.60
head(COSA_Severe_Data)
##   OBJECTID CorridorID       StreetName FromStreet       ToStreet
## 1        1         56          MARBACH    HARNESS     MEADOW WAY
## 2        2        165 Military FM 1535  Braesview      Wedgewood
## 3        3        131        Callaghan Greensboro Fredericksburg
## 4        4          4 Bandera Spur 421 BLOOMFIELD         SUTTON
## 5        5        166 Military Loop 13 Commercial        Boswell
## 6        6         86         RANDOLPH   CRESTWAY        CARELIN
##   Incapacitated_Injuries Fata_Injuries Total_Injuries SPIA_Year Shape__Length
## 1                      2             0              2      2017      307.2034
## 2                      1             1              2      2020     1133.7522
## 3                      2             0              2      2020     1950.9909
## 4                      5             0              5      2017     3111.0340
## 5                      1             3              4      2020     4040.9123
## 6                      0             2              2      2017     1354.6860
mean(COSA_Severe_Data$Total_Injuries)
## [1] 3.915663
median(COSA_Severe_Data$Total_Injuries)
## [1] 3
range(COSA_Severe_Data$Total_Injuries)
## [1]  2 15
mean(COSA_Severe_Data$Shape__Length)
## [1] 2884.049
median(COSA_Severe_Data$Shape__Length)
## [1] 1942.811
range(COSA_Severe_Data$Shape__Length)
## [1]    30.04678 13314.60291
nrow(COSA_Severe_Data)
## [1] 166
hist(COSA_Severe_Data$Shape__Length,
     main = "Distribution of Corridor Lengths (feet)",
     xlab = "Shape__Length")

model_linear <- lm(Total_Injuries ~ Shape__Length, data = COSA_Severe_Data)
summary(model_linear)
## 
## Call:
## lm(formula = Total_Injuries ~ Shape__Length, data = COSA_Severe_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4825 -0.7899 -0.1972  0.5890  5.3795 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.316e+00  1.588e-01   8.291 3.87e-14 ***
## Shape__Length 9.013e-04  4.021e-05  22.418  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.397 on 164 degrees of freedom
## Multiple R-squared:  0.754,  Adjusted R-squared:  0.7525 
## F-statistic: 502.6 on 1 and 164 DF,  p-value: < 2.2e-16
plot(model_linear, which = 1)

raintest(model_linear)
## 
##  Rainbow test
## 
## data:  model_linear
## Rain = 2.1343, df1 = 83, df2 = 81, p-value = 0.0003671
dwtest(model_linear)
## 
##  Durbin-Watson test
## 
## data:  model_linear
## DW = 1.825, p-value = 0.1278
## alternative hypothesis: true autocorrelation is greater than 0
plot(model_linear, which = 3)

bptest(model_linear)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_linear
## BP = 29.437, df = 1, p-value = 5.777e-08
plot(model_linear, which = 2)

shapiro.test(residuals(model_linear))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_linear)
## W = 0.93869, p-value = 1.46e-06
safe_vif <- function(model) {
  # Count predictors (terms minus the intercept)
  n_predictors <- length(attr(terms(model), "term.labels"))
 
  if (n_predictors < 2) {
    message("VIF skipped: model has ", n_predictors,
            " predictor(s). Multicollinearity requires at least 2.")
    return(invisible(NULL))
  }
 
  vif(model)
}
safe_vif(model_linear)
## VIF skipped: model has 1 predictor(s). Multicollinearity requires at least 2.
model_log <- lm(Total_Injuries ~ log(Shape__Length), data = COSA_Severe_Data)
summary(model_log)
## 
## Call:
## lm(formula = Total_Injuries ~ log(Shape__Length), data = COSA_Severe_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5275 -1.5538 -0.7039  0.9853  8.3668 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -7.7773     1.0872  -7.153 2.67e-11 ***
## log(Shape__Length)   1.5659     0.1439  10.883  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.146 on 164 degrees of freedom
## Multiple R-squared:  0.4194, Adjusted R-squared:  0.4158 
## F-statistic: 118.4 on 1 and 164 DF,  p-value: < 2.2e-16
raintest(model_log)
## 
##  Rainbow test
## 
## data:  model_log
## Rain = 1.7981, df1 = 83, df2 = 81, p-value = 0.004324
model_poly <- lm(Total_Injuries ~ poly(Shape__Length, 2),
                 data = COSA_Severe_Data)
summary(model_poly)
## 
## Call:
## lm(formula = Total_Injuries ~ poly(Shape__Length, 2), data = COSA_Severe_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.8322 -0.7419 -0.1451  0.3928  5.5867 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               3.9157     0.1078  36.339   <2e-16 ***
## poly(Shape__Length, 2)1  31.3172     1.3883  22.558   <2e-16 ***
## poly(Shape__Length, 2)2   2.4271     1.3883   1.748   0.0823 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.388 on 163 degrees of freedom
## Multiple R-squared:  0.7585, Adjusted R-squared:  0.7555 
## F-statistic:   256 on 2 and 163 DF,  p-value: < 2.2e-16
raintest(model_poly)
## 
##  Rainbow test
## 
## data:  model_poly
## Rain = 2.1313, df1 = 83, df2 = 80, p-value = 0.0003945
aic_linear <- AIC(model_linear)
aic_log    <- AIC(model_log)
aic_poly   <- AIC(model_poly)
aic_linear
## [1] 586.066
aic_log
## [1] 728.6019
aic_poly
## [1] 584.9823
aic_table <- data.frame(
  Model     = c("Linear (baseline)",
                "Log transformation",
                "2nd-degree polynomial"),
  Params    = c(length(coef(model_linear)),
                length(coef(model_log)),
                length(coef(model_poly))),
  R_squared = c(summary(model_linear)$r.squared,
                summary(model_log)$r.squared,
                summary(model_poly)$r.squared),
  AIC       = c(aic_linear, aic_log, aic_poly)
)
aic_table$delta_AIC <- aic_table$AIC - min(aic_table$AIC)
aic_table <- aic_table[order(aic_table$AIC), ]
 
print(aic_table, row.names = FALSE)
##                  Model Params R_squared      AIC  delta_AIC
##  2nd-degree polynomial      3 0.7584894 584.9823   0.000000
##      Linear (baseline)      2 0.7539610 586.0660   1.083705
##     Log transformation      2 0.4193547 728.6019 143.619588
bptest(model_poly)                       
## 
##  studentized Breusch-Pagan test
## 
## data:  model_poly
## BP = 30.922, df = 2, p-value = 1.929e-07
shapiro.test(residuals(model_poly))      
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_poly)
## W = 0.92071, p-value = 7.099e-08
slope_per_foot <- coef(model_linear)["Shape__Length"]
slope_per_mile <- slope_per_foot * 5280
 
slope_per_foot
## Shape__Length 
##  0.0009013415
slope_per_mile
## Shape__Length 
##      4.759083
set.seed(2026) 
COSA_Severe_Data$Length_Sq <-
  (COSA_Severe_Data$Shape__Length^2) / 1e6 +
  rnorm(nrow(COSA_Severe_Data), mean = 0, sd = 0.1)
model_demo <- lm(Total_Injuries ~ Shape__Length + Length_Sq,
                 data = COSA_Severe_Data)
safe_vif(model_demo)
## Shape__Length     Length_Sq 
##      9.756969      9.756969

=============================================================================