library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
setwd("C:/Users/jalex/OneDrive/Desktop/JS R Homework/City of San Antonio -Severe Pedestrain Injury Data")
#Data
COSA_Severe_Data <- read_csv("COSA Severe Pedestrian Injury Areas.csv")
## Rows: 166 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): StreetName, FromStreet, ToStreet
## dbl (7): OBJECTID, CorridorID, Incapacitated_Injuries, Fata_Injuries, Total_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(COSA_Severe_Data)
## [1] "OBJECTID" "CorridorID" "StreetName"
## [4] "FromStreet" "ToStreet" "Incapacitated_Injuries"
## [7] "Fata_Injuries" "Total_Injuries" "SPIA_Year"
## [10] "Shape__Length"
class(COSA_Severe_Data)
## [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
str(COSA_Severe_Data)
## spc_tbl_ [166 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ OBJECTID : num [1:166] 1 2 3 4 5 6 7 8 9 10 ...
## $ CorridorID : num [1:166] 56 165 131 4 166 86 157 164 110 196 ...
## $ StreetName : chr [1:166] "MARBACH" "Military FM 1535" "Callaghan" "Bandera Spur 421" ...
## $ FromStreet : chr [1:166] "HARNESS" "Braesview" "Greensboro" "BLOOMFIELD" ...
## $ ToStreet : chr [1:166] "MEADOW WAY" "Wedgewood" "Fredericksburg" "SUTTON" ...
## $ Incapacitated_Injuries: num [1:166] 2 1 2 5 1 0 2 1 1 1 ...
## $ Fata_Injuries : num [1:166] 0 1 0 0 3 2 0 1 1 1 ...
## $ Total_Injuries : num [1:166] 2 2 2 5 4 2 2 2 2 2 ...
## $ SPIA_Year : num [1:166] 2017 2020 2020 2017 2020 ...
## $ Shape__Length : num [1:166] 307 1134 1951 3111 4041 ...
## - attr(*, "spec")=
## .. cols(
## .. OBJECTID = col_double(),
## .. CorridorID = col_double(),
## .. StreetName = col_character(),
## .. FromStreet = col_character(),
## .. ToStreet = col_character(),
## .. Incapacitated_Injuries = col_double(),
## .. Fata_Injuries = col_double(),
## .. Total_Injuries = col_double(),
## .. SPIA_Year = col_double(),
## .. Shape__Length = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
head(COSA_Severe_Data)
## # A tibble: 6 × 10
## OBJECTID CorridorID StreetName FromStreet ToStreet Incapacitated_Injuries
## <dbl> <dbl> <chr> <chr> <chr> <dbl>
## 1 1 56 MARBACH HARNESS MEADOW … 2
## 2 2 165 Military FM 15… Braesview Wedgewo… 1
## 3 3 131 Callaghan Greensboro Frederi… 2
## 4 4 4 Bandera Spur 4… BLOOMFIELD SUTTON 5
## 5 5 166 Military Loop … Commercial Boswell 1
## 6 6 86 RANDOLPH CRESTWAY CARELIN 0
## # ℹ 4 more variables: Fata_Injuries <dbl>, Total_Injuries <dbl>,
## # SPIA_Year <dbl>, Shape__Length <dbl>
colnames(COSA_Severe_Data)
## [1] "OBJECTID" "CorridorID" "StreetName"
## [4] "FromStreet" "ToStreet" "Incapacitated_Injuries"
## [7] "Fata_Injuries" "Total_Injuries" "SPIA_Year"
## [10] "Shape__Length"
model <- lm(Total_Injuries ~ Shape__Length, data = COSA_Severe_Data)
summary(model)
##
## Call:
## lm(formula = Total_Injuries ~ Shape__Length, data = COSA_Severe_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4825 -0.7899 -0.1972 0.5890 5.3795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.316e+00 1.588e-01 8.291 3.87e-14 ***
## Shape__Length 9.013e-04 4.021e-05 22.418 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.397 on 164 degrees of freedom
## Multiple R-squared: 0.754, Adjusted R-squared: 0.7525
## F-statistic: 502.6 on 1 and 164 DF, p-value: < 2.2e-16
plot(COSA_Severe_Data$Shape__Length, COSA_Severe_Data$Total_Injuries)
abline(model, col = "red")
raintest(model)
##
## Rainbow test
##
## data: model
## Rain = 2.1343, df1 = 83, df2 = 81, p-value = 0.0003671
dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 1.825, p-value = 0.1278
## alternative hypothesis: true autocorrelation is greater than 0
The DW test produced a p-value of 0.1278, which is above significance level. The null hypothesis is rejected.
plot(model, which = 1)
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 29.437, df = 1, p-value = 5.777e-08
plot(model, which = 2)
shapiro.test(residuals(model))
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.93869, p-value = 1.46e-06