Project 6 Final - Data 101

Author

Christopher Newman

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(leaflet)
Warning: package 'leaflet' was built under R version 4.3.3
setwd("C:/Users/chris/Downloads/Data Science R")
data <- read.csv("Crash_Reporting_-_Drivers_Data_20240407.csv")

data$Crash.Date.Time <- as.POSIXct(data$Crash.Date.Time, format="%m/%d/%Y %I:%M:%S %p")
data$Year <- format(data$Crash.Date.Time, "%Y")
data$Month <- format(data$Crash.Date.Time, "%m")
data$Hour <- format(data$Crash.Date.Time, "%H")
# Aggregate data by hour and severity
severity_by_hour <- data %>%
  group_by(Hour, Injury.Severity) %>%
  summarise(Count = n()) %>%
  ungroup()
`summarise()` has grouped output by 'Hour'. You can override using the
`.groups` argument.
# Plot using ggplot2
ggplot(severity_by_hour, aes(x = Hour, y = Count, fill = Injury.Severity)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "Accident Severity by Time of Day", x = "Hour", y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Aggregate data by collision type
collision_types <- data %>%
  group_by(Collision.Type) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count))

# Plot using ggplot2
ggplot(collision_types, aes(x = reorder(Collision.Type, -Count), y = Count, fill = Collision.Type)) +
  geom_bar(stat = "identity") +
  labs(title = "Collision Type Distribution", x = "Collision Type", y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Aggregate data by substance abuse and severity
substance_abuse_impact <- data %>%
  filter(Driver.Substance.Abuse != "") %>%
  group_by(Driver.Substance.Abuse, Injury.Severity) %>%
  summarise(Count = n()) %>%
  ungroup()
`summarise()` has grouped output by 'Driver.Substance.Abuse'. You can override
using the `.groups` argument.
# Plot using ggplot2
ggplot(substance_abuse_impact, aes(x = Driver.Substance.Abuse, y = Count, fill = Injury.Severity)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Impact of Driver Substance Abuse on Accident Severity", x = "Substance Abuse", y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Read the dataset (adjust the file name as needed)
data <- read.csv("Crash_Reporting_-_Drivers_Data_20240407.csv")

# Remove rows with missing coordinates
data_clean <- data %>%
  filter(!is.na(Latitude) & !is.na(Longitude))

# Initialize leaflet map
map <- leaflet(data_clean) %>%
  addTiles() %>%
  addCircleMarkers(
    ~Longitude, ~Latitude,
    radius = 5,
    color = "red",
    stroke = FALSE,
    fillOpacity = 0.7,
    popup = ~paste("Location:", Location, "<br>Collision Type:", Collision.Type, "<br>Injury Severity:", Injury.Severity)
  ) %>%
  setView(
    lng = mean(data_clean$Longitude, na.rm = TRUE),
    lat = mean(data_clean$Latitude, na.rm = TRUE),
    zoom = 10
  )

# Print the map
map

Paragraph

For this project I decided to make it easier than what I did in part 1 and clean it a little less so I could explore some more. First I made a graph, Accident Severity by Time of Day to see at what time of day are more crashes. Then I made a Collision Type Distribution graph to see all of the different tope of collision are in the dataset and then I made a impact of driver subtance abuse on accident serverity graph. With these 3 graphs, I feel like it covers a lot of the dataset, from these 3 we can really see what happened in the dataset. Then I made a interactive map to see where all of the different crashes happened.