Team Member (1)

Introduction : Unveiling Patterns in Montgomery County’s Crashes: A Look at Driver Data

data = read.csv("Crash_Reporting.csv")
glimpse(data)
## Rows: 172,105
## Columns: 43
## $ Report.Number                  <chr> "MCP3170003V", "MCP3254003K", "EJ788700…
## $ Local.Case.Number              <chr> "240000438", "230072050", "230074270", …
## $ Agency.Name                    <chr> "Montgomery County Police", "Montgomery…
## $ ACRS.Report.Type               <chr> "Property Damage Crash", "Injury Crash"…
## $ Crash.Date.Time                <chr> "01/03/2024 02:55:00 PM", "12/16/2023 1…
## $ Route.Type                     <chr> "", "Maryland (State)", "Maryland (Stat…
## $ Road.Name                      <chr> "", "GERMANTOWN RD", "GREAT SENECA HWY"…
## $ Cross.Street.Type              <chr> "", "County", "Municipality", "County",…
## $ Cross.Street.Name              <chr> "", "MIDDLEBROOK RD", "KENTLANDS BLVD",…
## $ Off.Road.Description           <chr> "IN FRONT OF 18900 BIRDSEYE DR", "", ""…
## $ Municipality                   <chr> "", "N/A", "GAITHERSBURG", "N/A", "N/A"…
## $ Related.Non.Motorist           <chr> "", "BICYCLIST", "", "", "", "", "PEDES…
## $ Collision.Type                 <chr> "OPPOSITE DIRECTION SIDESWIPE", "STRAIG…
## $ Weather                        <chr> "CLOUDY", "CLEAR", "CLEAR", "CLEAR", "R…
## $ Surface.Condition              <chr> "", "DRY", "DRY", "DRY", "WET", "DRY", …
## $ Light                          <chr> "DAYLIGHT", "DAYLIGHT", "DAYLIGHT", "DA…
## $ Traffic.Control                <chr> "NO CONTROLS", "TRAFFIC SIGNAL", "TRAFF…
## $ Driver.Substance.Abuse         <chr> "NONE DETECTED", "NONE DETECTED", "NONE…
## $ Non.Motorist.Substance.Abuse   <chr> "", "NONE DETECTED", "", "", "", "", "N…
## $ Person.ID                      <chr> "ACC015E9-08A4-4856-866E-0004005F986C",…
## $ Driver.At.Fault                <chr> "Yes", "No", "No", "No", "Yes", "Yes", …
## $ Injury.Severity                <chr> "NO APPARENT INJURY", "NO APPARENT INJU…
## $ Circumstance                   <chr> "N/A", "N/A", "N/A", "ANIMAL, N/A", "RA…
## $ Driver.Distracted.By           <chr> "LOOKED BUT DID NOT SEE", "NOT DISTRACT…
## $ Drivers.License.State          <chr> "MD", "MD", "MD", "MD", "MD", "MD", "MD…
## $ Vehicle.ID                     <chr> "4E492574-893B-4EB1-ADCA-53FDD633D6C4",…
## $ Vehicle.Damage.Extent          <chr> "FUNCTIONAL", "FUNCTIONAL", "FUNCTIONAL…
## $ Vehicle.First.Impact.Location  <chr> "SEVEN OCLOCK", "ELEVEN OCLOCK", "SIX O…
## $ Vehicle.Second.Impact.Location <chr> "SEVEN OCLOCK", "ELEVEN OCLOCK", "SIX O…
## $ Vehicle.Body.Type              <chr> "PASSENGER CAR", "PASSENGER CAR", "(SPO…
## $ Vehicle.Movement               <chr> "MOVING CONSTANT SPEED", "MOVING CONSTA…
## $ Vehicle.Continuing.Dir         <chr> "South", "North", "South", "South", "No…
## $ Vehicle.Going.Dir              <chr> "South", "West", "South", "South", "Nor…
## $ Speed.Limit                    <int> 0, 35, 35, 40, 20, 35, 35, 10, 35, 0, 2…
## $ Driverless.Vehicle             <chr> "No", "No", "No", "No", "No", "No", "No…
## $ Parked.Vehicle                 <chr> "No", "No", "No", "No", "No", "No", "No…
## $ Vehicle.Year                   <int> 2017, 2010, 2021, 2019, 2014, 1991, 201…
## $ Vehicle.Make                   <chr> "LEXUS", "TOYT", "SUBARU", "DODGE", "NI…
## $ Vehicle.Model                  <chr> "SUV", "PRIUS", "FORRESTER", "CHARGER",…
## $ Equipment.Problems             <chr> "NO MISUSE", "NO MISUSE", "NO MISUSE", …
## $ Latitude                       <dbl> 39.16500, 39.17878, 39.12357, 39.21174,…
## $ Longitude                      <dbl> -77.24931, -77.26719, -77.23177, -77.17…
## $ Location                       <chr> "(39.16500483, -77.24931)", "(39.178775…

Question to answer

Driver Behavior:

  • Distraction Analysis: Understand how different types of distractions (phone use, eating, etc.) affect crash risk.
  • Impaired Driving: See if the data suggests a correlation between crashes and factors like driving under the influence of alcohol or drugs.

Data Visualization:

  • Create charts and graphs to effectively represent the relationships between different factors and crash occurrences.
  • Use heat map to visualize high-risk locations or specific times of day with a higher crash frequency.

Outcome: Focus on Safety Improvements:

  • Use the analysis to identify areas for improvement in infrastructure, traffic management, and driver education programs.
  • Target interventions (e.g., public awareness campaigns) towards specific types of driver distractions or risky behaviors.

Future work

  • Based on the identified trends, you can potentially build a model to predict high-risk situations based on factors like weather, time of day, and location. After finish class DATA 4319 (maybe)
  • External Data Integration: Consider incorporating external data sources like traffic volume or road closure information to enrich the analysis.
  • Data Quality: Ensure the data in the crash reports is accurate and complete for reliable analysis.
# Convert "Crash Date/Time" column to Date type
data$Crash_Date <- as.POSIXct(data$Crash.Date.Time, format = "%m/%d/%Y %I:%M:%S %p")

# Extract year from "Crash Date/Time" column
data$Year <- lubridate::year(data$Crash_Date)

# distinct_years <- data %>%
#   distinct(Year)
# 
# distinct_years <- distinct_years$Year %>% sort()
# 
# print(distinct_years)

# Group by year and count the number of injuries
injury_counts <- data %>%
  group_by(Year, Injury.Severity) %>%
  summarise(Count = n()) %>%
  filter(Year != 2024)
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
# Plot the bar graph
ggplot(injury_counts, aes(x = factor(Year), y = Count)) +
  geom_bar(stat = "identity") +
  labs(x = "Year", y = "Total Number of Injuries", title = "Total Injuries by Year") 

# # Plot the bar graph with a trend line
# ggplot(injury_counts, aes(x = Year, y = Count)) +
#   geom_bar(stat = "identity") +
#   geom_smooth(method = "loess", se = FALSE, color = "red") +  # Add a linear trend line
#   labs(x = "Year", y = "Total Number of Injuries", title = "Total Injuries by Year with Trend Line") +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Plot by geom_line
ggplot(injury_counts, aes(x = Year, y = Count, color = Injury.Severity)) +
  geom_line() +
  labs(x = "Year", y = "Number of Injuries", title = "Injuries by Year and Injury Severity") +
  scale_x_continuous(breaks = seq(min(injury_counts$Year), max(injury_counts$Year), by = 1))

library(ggplot2)
library(ggmap)
## Warning: package 'ggmap' was built under R version 4.3.3
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
api_key <- "AIzaSyD_HHmd6lL3rQOXBwFWwtL0gI_EfpOe5Bc"


poi <- c(lon = mean(data$Longitude), lat = mean(data$Latitude))
gap <- 0
center <- poi
zoom <- 13
register_google(key = api_key)

map <- get_googlemap(center = center, zoom = zoom, maptype = "terrain", color = "bw")
## ℹ <https://maps.googleapis.com/maps/api/staticmap?center=39.083119,-77.112343&zoom=13&size=640x640&scale=2&maptype=terrain&key=xxx>
# Check for missing values in Longitude and Latitude columns
missing_values <- data[is.na(data$Longitude) | is.na(data$Latitude), ]
print(missing_values)
##  [1] Report.Number                  Local.Case.Number             
##  [3] Agency.Name                    ACRS.Report.Type              
##  [5] Crash.Date.Time                Route.Type                    
##  [7] Road.Name                      Cross.Street.Type             
##  [9] Cross.Street.Name              Off.Road.Description          
## [11] Municipality                   Related.Non.Motorist          
## [13] Collision.Type                 Weather                       
## [15] Surface.Condition              Light                         
## [17] Traffic.Control                Driver.Substance.Abuse        
## [19] Non.Motorist.Substance.Abuse   Person.ID                     
## [21] Driver.At.Fault                Injury.Severity               
## [23] Circumstance                   Driver.Distracted.By          
## [25] Drivers.License.State          Vehicle.ID                    
## [27] Vehicle.Damage.Extent          Vehicle.First.Impact.Location 
## [29] Vehicle.Second.Impact.Location Vehicle.Body.Type             
## [31] Vehicle.Movement               Vehicle.Continuing.Dir        
## [33] Vehicle.Going.Dir              Speed.Limit                   
## [35] Driverless.Vehicle             Parked.Vehicle                
## [37] Vehicle.Year                   Vehicle.Make                  
## [39] Vehicle.Model                  Equipment.Problems            
## [41] Latitude                       Longitude                     
## [43] Location                       Crash_Date                    
## [45] Year                          
## <0 rows> (or 0-length row.names)
# Plot the map with points colored by Injury Severity
ggmap(map) +
  geom_point(data = data, aes(x = Longitude, y = Latitude, color = Injury.Severity), alpha = 0.6, size = 0.1) +
  facet_wrap(~ Injury.Severity) +
  labs(title = "Injury Severity by Location") +
  theme(legend.position = "bottom")
## Warning: Removed 139034 rows containing missing values (`geom_point()`).

library(leaflet)
## Warning: package 'leaflet' was built under R version 4.3.3
fatal <- data %>%
  filter(Injury.Severity =="FATAL INJURY" | Injury.Severity =="SUSPECTED SERIOUS INJURY")
# Create a leaflet map
m <- leaflet(fatal) %>%
  addTiles() %>%
  setView(lng = mean(fatal$Longitude), lat = mean(fatal$Latitude), zoom = 10) # Set initial view to center around the data

# Add markers for each data point, colored by Injury Severity
m <- m %>%
  addCircleMarkers(~Longitude, ~Latitude, color = "red",
                   fillColor = ~Injury.Severity, fillOpacity = 0.6, radius = 1)

# Display the map
m