Research Paper Data Selection

library(readxl)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Austin_crash_report <- read_csv("C:/Users/desib/OneDrive/Documents/PAD 6833/Homework 4/Austin crash report.csv")

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## Rows: 161088 Columns: 44
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): case_id, Primary address, Secondary address, rpt_street_name, rpt_...
## dbl (28): ID, Crash ID, fatal crash, rpt_block_num, crash_speed_limit, latit...
## lgl  (7): crash_fatal_fl, road_constr_zone_fl, onsys_fl, private_dr_fl, Is d...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

summary(Austin_crash_report$`fatal crash`)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   2.000   1.994   2.000   2.000

summary(Austin_crash_report$crash_speed_limit)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   35.00   45.00   45.95   55.00   85.00

speeddeath<-Austin_crash_report %>% select(`fatal crash`,crash_speed_limit )

ggplot(speeddeath,aes(x=crash_speed_limit)) + geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(speeddeath,aes(x=`fatal crash`)) + geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

cor(speeddeath$crash_speed_limit,speeddeath$`fatal crash`)

## [1] -0.02207769

Research Paper Data Selection

Desiree Vitale

2024-09-26