R Markdown
#Library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
#Set Working Directory
setwd("City-of-San-Antonio---Sever-Injury-Data_files")
#Data
COSA_Severe_Data <- read_csv("COSA Severe Pedestrian Injury Areas.csv")
## Rows: 166 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): StreetName, FromStreet, ToStreet
## dbl (7): OBJECTID, CorridorID, Incapacitated_Injuries, Fata_Injuries, Total_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(COSA_Severe_Data)
## [1] "OBJECTID" "CorridorID" "StreetName"
## [4] "FromStreet" "ToStreet" "Incapacitated_Injuries"
## [7] "Fata_Injuries" "Total_Injuries" "SPIA_Year"
## [10] "Shape__Length"
hist(COSA_Severe_Data$Total_Injuries)

hist(COSA_Severe_Data$Fata_Injuries)

plot(COSA_Severe_Data$Total_Injuries,COSA_Severe_Data$Fata_Injuries)

cor(COSA_Severe_Data$Total_Injuries,COSA_Severe_Data$Fata_Injuries)
## [1] 0.7022154
model <- lm(COSA_Severe_Data$Fata_Injuries ~ COSA_Severe_Data$Total_Injuries, data = COSA_Severe_Data)
summary(model)
##
## Call:
## lm(formula = COSA_Severe_Data$Fata_Injuries ~ COSA_Severe_Data$Total_Injuries,
## data = COSA_Severe_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2349 -0.5837 0.0512 0.4163 3.5907
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.14658 0.13914 -1.053 0.294
## COSA_Severe_Data$Total_Injuries 0.36513 0.02891 12.631 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.043 on 164 degrees of freedom
## Multiple R-squared: 0.4931, Adjusted R-squared: 0.49
## F-statistic: 159.5 on 1 and 164 DF, p-value: < 2.2e-16
t.test(COSA_Severe_Data$Total_Injuries, COSA_Severe_Data$Fata_Injuries)
##
## Welch Two Sample t-test
##
## data: COSA_Severe_Data$Total_Injuries and COSA_Severe_Data$Fata_Injuries
## t = 10.718, df = 248.14, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.14875 3.11631
## sample estimates:
## mean of x mean of y
## 3.915663 1.283133