library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
setwd("C:/Users/jalex/OneDrive/Desktop/JS R Homework/City of San Antonio -Severe Pedestrain Injury Data")
#Data
COSA_Severe_Data <- read_csv("COSA Severe Pedestrian Injury Areas.csv")
## Rows: 166 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): StreetName, FromStreet, ToStreet
## dbl (7): OBJECTID, CorridorID, Incapacitated_Injuries, Fata_Injuries, Total_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(COSA_Severe_Data)
##  [1] "OBJECTID"               "CorridorID"             "StreetName"            
##  [4] "FromStreet"             "ToStreet"               "Incapacitated_Injuries"
##  [7] "Fata_Injuries"          "Total_Injuries"         "SPIA_Year"             
## [10] "Shape__Length"
class(COSA_Severe_Data)    
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"
str(COSA_Severe_Data)
## spc_tbl_ [166 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ OBJECTID              : num [1:166] 1 2 3 4 5 6 7 8 9 10 ...
##  $ CorridorID            : num [1:166] 56 165 131 4 166 86 157 164 110 196 ...
##  $ StreetName            : chr [1:166] "MARBACH" "Military FM 1535" "Callaghan" "Bandera Spur 421" ...
##  $ FromStreet            : chr [1:166] "HARNESS" "Braesview" "Greensboro" "BLOOMFIELD" ...
##  $ ToStreet              : chr [1:166] "MEADOW WAY" "Wedgewood" "Fredericksburg" "SUTTON" ...
##  $ Incapacitated_Injuries: num [1:166] 2 1 2 5 1 0 2 1 1 1 ...
##  $ Fata_Injuries         : num [1:166] 0 1 0 0 3 2 0 1 1 1 ...
##  $ Total_Injuries        : num [1:166] 2 2 2 5 4 2 2 2 2 2 ...
##  $ SPIA_Year             : num [1:166] 2017 2020 2020 2017 2020 ...
##  $ Shape__Length         : num [1:166] 307 1134 1951 3111 4041 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   OBJECTID = col_double(),
##   ..   CorridorID = col_double(),
##   ..   StreetName = col_character(),
##   ..   FromStreet = col_character(),
##   ..   ToStreet = col_character(),
##   ..   Incapacitated_Injuries = col_double(),
##   ..   Fata_Injuries = col_double(),
##   ..   Total_Injuries = col_double(),
##   ..   SPIA_Year = col_double(),
##   ..   Shape__Length = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
head(COSA_Severe_Data)
## # A tibble: 6 × 10
##   OBJECTID CorridorID StreetName      FromStreet ToStreet Incapacitated_Injuries
##      <dbl>      <dbl> <chr>           <chr>      <chr>                     <dbl>
## 1        1         56 MARBACH         HARNESS    MEADOW …                      2
## 2        2        165 Military FM 15… Braesview  Wedgewo…                      1
## 3        3        131 Callaghan       Greensboro Frederi…                      2
## 4        4          4 Bandera Spur 4… BLOOMFIELD SUTTON                        5
## 5        5        166 Military Loop … Commercial Boswell                       1
## 6        6         86 RANDOLPH        CRESTWAY   CARELIN                       0
## # ℹ 4 more variables: Fata_Injuries <dbl>, Total_Injuries <dbl>,
## #   SPIA_Year <dbl>, Shape__Length <dbl>
colnames(COSA_Severe_Data)
##  [1] "OBJECTID"               "CorridorID"             "StreetName"            
##  [4] "FromStreet"             "ToStreet"               "Incapacitated_Injuries"
##  [7] "Fata_Injuries"          "Total_Injuries"         "SPIA_Year"             
## [10] "Shape__Length"
model <- lm(Total_Injuries ~ Shape__Length, data = COSA_Severe_Data)
summary(model)
## 
## Call:
## lm(formula = Total_Injuries ~ Shape__Length, data = COSA_Severe_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4825 -0.7899 -0.1972  0.5890  5.3795 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.316e+00  1.588e-01   8.291 3.87e-14 ***
## Shape__Length 9.013e-04  4.021e-05  22.418  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.397 on 164 degrees of freedom
## Multiple R-squared:  0.754,  Adjusted R-squared:  0.7525 
## F-statistic: 502.6 on 1 and 164 DF,  p-value: < 2.2e-16
plot(COSA_Severe_Data$Shape__Length, COSA_Severe_Data$Total_Injuries)
abline(model, col = "red")

raintest(model)
## 
##  Rainbow test
## 
## data:  model
## Rain = 2.1343, df1 = 83, df2 = 81, p-value = 0.0003671
dwtest(model)
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 1.825, p-value = 0.1278
## alternative hypothesis: true autocorrelation is greater than 0

The DW test produced a p-value of 0.1278, which is above significance level. The null hypothesis is rejected.

plot(model, which = 1)   

bptest(model)
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 29.437, df = 1, p-value = 5.777e-08
plot(model, which = 2)   

shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.93869, p-value = 1.46e-06