library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Austin_crash_report <- read_csv("C:/Users/desib/OneDrive/Documents/PAD 6833/Homework 4/Austin crash report.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 161088 Columns: 44
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): case_id, Primary address, Secondary address, rpt_street_name, rpt_...
## dbl (28): ID, Crash ID, fatal crash, rpt_block_num, crash_speed_limit, latit...
## lgl (7): crash_fatal_fl, road_constr_zone_fl, onsys_fl, private_dr_fl, Is d...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(Austin_crash_report$`fatal crash`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 2.000 1.994 2.000 2.000
summary(Austin_crash_report$crash_speed_limit)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 35.00 45.00 45.95 55.00 85.00
speeddeath<-Austin_crash_report %>% select(`fatal crash`,crash_speed_limit )
ggplot(speeddeath,aes(x=crash_speed_limit)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(speeddeath,aes(x=`fatal crash`)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

cor(speeddeath$crash_speed_limit,speeddeath$`fatal crash`)
## [1] -0.02207769