R Markdown

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
#Data
COSA_Severe_Data <- read_csv("COSA Severe Pedestrian Injury Areas.csv")
## Rows: 166 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): StreetName, FromStreet, ToStreet
## dbl (7): OBJECTID, CorridorID, Incapacitated_Injuries, Fata_Injuries, Total_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(COSA_Severe_Data)
##  [1] "OBJECTID"               "CorridorID"             "StreetName"            
##  [4] "FromStreet"             "ToStreet"               "Incapacitated_Injuries"
##  [7] "Fata_Injuries"          "Total_Injuries"         "SPIA_Year"             
## [10] "Shape__Length"
stat.desc(COSA_Severe_Data$Incapacitated_Injuries)
##      nbr.val     nbr.null       nbr.na          min          max        range 
##  166.0000000    8.0000000    0.0000000    0.0000000   12.0000000   12.0000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##  437.0000000    2.0000000    2.6325301    0.1601597    0.3162266    4.2580869 
##      std.dev     coef.var 
##    2.0635132    0.7838517
COSA_clean <- COSA_Severe_Data %>% filter(!is.na(Incapacitated_Injuries))
ggplot(COSA_clean, aes(x = Incapacitated_Injuries)) +
  geom_histogram(bins = 30, fill = "blue", color = "white") +
  labs(
    title = "Distribution of Incapacitated Injuries",
    x = "Incapacitated Injuries",
    y = "Count"
  )

COSA_clean <- COSA_clean %>%
  mutate(log_injuries = log(Incapacitated_Injuries + 1))
ggplot(COSA_clean, aes(x = log_injuries)) +
  geom_histogram(bins = 30, fill = "red", color = "white") +
  labs(
    title = "Log Transformed Incapacitated Injuries",
    x = "Log(Incapacitated Injuries + 1)",
    y = "Count"
  )