library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
#Data
setwd("C:/Users/jalex/JS R- Homework")
COSA_Severe_Data <- read.csv("COSA Severe Pedestrian Injury Areas.csv")
names(COSA_Severe_Data)
## [1] "OBJECTID" "CorridorID" "StreetName"
## [4] "FromStreet" "ToStreet" "Incapacitated_Injuries"
## [7] "Fata_Injuries" "Total_Injuries" "SPIA_Year"
## [10] "Shape__Length"
str(COSA_Severe_Data)
## 'data.frame': 166 obs. of 10 variables:
## $ OBJECTID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CorridorID : int 56 165 131 4 166 86 157 164 110 196 ...
## $ StreetName : chr "MARBACH" "Military FM 1535" "Callaghan" "Bandera Spur 421" ...
## $ FromStreet : chr "HARNESS" "Braesview" "Greensboro" "BLOOMFIELD" ...
## $ ToStreet : chr "MEADOW WAY" "Wedgewood" "Fredericksburg" "SUTTON" ...
## $ Incapacitated_Injuries: int 2 1 2 5 1 0 2 1 1 1 ...
## $ Fata_Injuries : int 0 1 0 0 3 2 0 1 1 1 ...
## $ Total_Injuries : int 2 2 2 5 4 2 2 2 2 2 ...
## $ SPIA_Year : int 2017 2020 2020 2017 2020 2017 2020 2020 2017 2020 ...
## $ Shape__Length : num 307 1134 1951 3111 4041 ...
summary(COSA_Severe_Data)
## OBJECTID CorridorID StreetName FromStreet
## Min. : 1.00 Min. : 1.00 Length:166 Length:166
## 1st Qu.: 42.25 1st Qu.: 57.25 Class :character Class :character
## Median : 83.50 Median :124.50 Mode :character Mode :character
## Mean : 83.50 Mean :111.70
## 3rd Qu.:124.75 3rd Qu.:165.75
## Max. :166.00 Max. :207.00
## ToStreet Incapacitated_Injuries Fata_Injuries Total_Injuries
## Length:166 Min. : 0.000 Min. :0.000 Min. : 2.000
## Class :character 1st Qu.: 1.000 1st Qu.:0.000 1st Qu.: 2.000
## Mode :character Median : 2.000 Median :1.000 Median : 3.000
## Mean : 2.633 Mean :1.283 Mean : 3.916
## 3rd Qu.: 3.000 3rd Qu.:2.000 3rd Qu.: 5.000
## Max. :12.000 Max. :8.000 Max. :15.000
## SPIA_Year Shape__Length
## Min. :2017 Min. : 30.05
## 1st Qu.:2017 1st Qu.: 1071.38
## Median :2020 Median : 1942.81
## Mean :2019 Mean : 2884.05
## 3rd Qu.:2020 3rd Qu.: 3852.77
## Max. :2020 Max. :13314.60
head(COSA_Severe_Data)
## OBJECTID CorridorID StreetName FromStreet ToStreet
## 1 1 56 MARBACH HARNESS MEADOW WAY
## 2 2 165 Military FM 1535 Braesview Wedgewood
## 3 3 131 Callaghan Greensboro Fredericksburg
## 4 4 4 Bandera Spur 421 BLOOMFIELD SUTTON
## 5 5 166 Military Loop 13 Commercial Boswell
## 6 6 86 RANDOLPH CRESTWAY CARELIN
## Incapacitated_Injuries Fata_Injuries Total_Injuries SPIA_Year Shape__Length
## 1 2 0 2 2017 307.2034
## 2 1 1 2 2020 1133.7522
## 3 2 0 2 2020 1950.9909
## 4 5 0 5 2017 3111.0340
## 5 1 3 4 2020 4040.9123
## 6 0 2 2 2017 1354.6860
mean(COSA_Severe_Data$Total_Injuries)
## [1] 3.915663
median(COSA_Severe_Data$Total_Injuries)
## [1] 3
range(COSA_Severe_Data$Total_Injuries)
## [1] 2 15
mean(COSA_Severe_Data$Shape__Length)
## [1] 2884.049
median(COSA_Severe_Data$Shape__Length)
## [1] 1942.811
range(COSA_Severe_Data$Shape__Length)
## [1] 30.04678 13314.60291
nrow(COSA_Severe_Data)
## [1] 166
hist(COSA_Severe_Data$Shape__Length,
main = "Distribution of Corridor Lengths (feet)",
xlab = "Shape__Length")

model_linear <- lm(Total_Injuries ~ Shape__Length, data = COSA_Severe_Data)
summary(model_linear)
##
## Call:
## lm(formula = Total_Injuries ~ Shape__Length, data = COSA_Severe_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4825 -0.7899 -0.1972 0.5890 5.3795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.316e+00 1.588e-01 8.291 3.87e-14 ***
## Shape__Length 9.013e-04 4.021e-05 22.418 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.397 on 164 degrees of freedom
## Multiple R-squared: 0.754, Adjusted R-squared: 0.7525
## F-statistic: 502.6 on 1 and 164 DF, p-value: < 2.2e-16
plot(model_linear, which = 1)

raintest(model_linear)
##
## Rainbow test
##
## data: model_linear
## Rain = 2.1343, df1 = 83, df2 = 81, p-value = 0.0003671
dwtest(model_linear)
##
## Durbin-Watson test
##
## data: model_linear
## DW = 1.825, p-value = 0.1278
## alternative hypothesis: true autocorrelation is greater than 0
plot(model_linear, which = 3)

bptest(model_linear)
##
## studentized Breusch-Pagan test
##
## data: model_linear
## BP = 29.437, df = 1, p-value = 5.777e-08
plot(model_linear, which = 2)

shapiro.test(residuals(model_linear))
##
## Shapiro-Wilk normality test
##
## data: residuals(model_linear)
## W = 0.93869, p-value = 1.46e-06
safe_vif <- function(model) {
# Count predictors (terms minus the intercept)
n_predictors <- length(attr(terms(model), "term.labels"))
if (n_predictors < 2) {
message("VIF skipped: model has ", n_predictors,
" predictor(s). Multicollinearity requires at least 2.")
return(invisible(NULL))
}
vif(model)
}
safe_vif(model_linear)
## VIF skipped: model has 1 predictor(s). Multicollinearity requires at least 2.
model_log <- lm(Total_Injuries ~ log(Shape__Length), data = COSA_Severe_Data)
summary(model_log)
##
## Call:
## lm(formula = Total_Injuries ~ log(Shape__Length), data = COSA_Severe_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5275 -1.5538 -0.7039 0.9853 8.3668
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.7773 1.0872 -7.153 2.67e-11 ***
## log(Shape__Length) 1.5659 0.1439 10.883 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.146 on 164 degrees of freedom
## Multiple R-squared: 0.4194, Adjusted R-squared: 0.4158
## F-statistic: 118.4 on 1 and 164 DF, p-value: < 2.2e-16
raintest(model_log)
##
## Rainbow test
##
## data: model_log
## Rain = 1.7981, df1 = 83, df2 = 81, p-value = 0.004324
model_poly <- lm(Total_Injuries ~ poly(Shape__Length, 2),
data = COSA_Severe_Data)
summary(model_poly)
##
## Call:
## lm(formula = Total_Injuries ~ poly(Shape__Length, 2), data = COSA_Severe_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8322 -0.7419 -0.1451 0.3928 5.5867
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.9157 0.1078 36.339 <2e-16 ***
## poly(Shape__Length, 2)1 31.3172 1.3883 22.558 <2e-16 ***
## poly(Shape__Length, 2)2 2.4271 1.3883 1.748 0.0823 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.388 on 163 degrees of freedom
## Multiple R-squared: 0.7585, Adjusted R-squared: 0.7555
## F-statistic: 256 on 2 and 163 DF, p-value: < 2.2e-16
raintest(model_poly)
##
## Rainbow test
##
## data: model_poly
## Rain = 2.1313, df1 = 83, df2 = 80, p-value = 0.0003945
aic_linear <- AIC(model_linear)
aic_log <- AIC(model_log)
aic_poly <- AIC(model_poly)
aic_linear
## [1] 586.066
aic_log
## [1] 728.6019
aic_poly
## [1] 584.9823
aic_table <- data.frame(
Model = c("Linear (baseline)",
"Log transformation",
"2nd-degree polynomial"),
Params = c(length(coef(model_linear)),
length(coef(model_log)),
length(coef(model_poly))),
R_squared = c(summary(model_linear)$r.squared,
summary(model_log)$r.squared,
summary(model_poly)$r.squared),
AIC = c(aic_linear, aic_log, aic_poly)
)
aic_table$delta_AIC <- aic_table$AIC - min(aic_table$AIC)
aic_table <- aic_table[order(aic_table$AIC), ]
print(aic_table, row.names = FALSE)
## Model Params R_squared AIC delta_AIC
## 2nd-degree polynomial 3 0.7584894 584.9823 0.000000
## Linear (baseline) 2 0.7539610 586.0660 1.083705
## Log transformation 2 0.4193547 728.6019 143.619588
bptest(model_poly)
##
## studentized Breusch-Pagan test
##
## data: model_poly
## BP = 30.922, df = 2, p-value = 1.929e-07
shapiro.test(residuals(model_poly))
##
## Shapiro-Wilk normality test
##
## data: residuals(model_poly)
## W = 0.92071, p-value = 7.099e-08
slope_per_foot <- coef(model_linear)["Shape__Length"]
slope_per_mile <- slope_per_foot * 5280
slope_per_foot
## Shape__Length
## 0.0009013415
slope_per_mile
## Shape__Length
## 4.759083
set.seed(2026)
COSA_Severe_Data$Length_Sq <-
(COSA_Severe_Data$Shape__Length^2) / 1e6 +
rnorm(nrow(COSA_Severe_Data), mean = 0, sd = 0.1)
model_demo <- lm(Total_Injuries ~ Shape__Length + Length_Sq,
data = COSA_Severe_Data)
safe_vif(model_demo)
## Shape__Length Length_Sq
## 9.756969 9.756969