The “Crash Report Drivers Data of Montgomery County, Maryland” dataset offers a comprehensive view of motor vehicle operators involved in traffic collisions across the county’s roadways. Compiled through the Automated Crash Reporting System (ACRS) managed by the Maryland State Police and reported by various law enforcement agencies including the Montgomery County Police, Gaithersburg Police, Rockville Police, and the Maryland-National Capital Park Police, this dataset encapsulates detailed information pertaining to each recorded collision and the drivers implicated. The dataset can be downloaded at https://catalog.data.gov/dataset/crash-reporting-drivers-data.
Important features:
What’s interesting in that data set and some challenges:
data = read.csv("Crash_Reporting.csv")
glimpse(data)
## Rows: 172,105
## Columns: 43
## $ Report.Number <chr> "MCP3170003V", "MCP3254003K", "EJ788700…
## $ Local.Case.Number <chr> "240000438", "230072050", "230074270", …
## $ Agency.Name <chr> "Montgomery County Police", "Montgomery…
## $ ACRS.Report.Type <chr> "Property Damage Crash", "Injury Crash"…
## $ Crash.Date.Time <chr> "01/03/2024 02:55:00 PM", "12/16/2023 1…
## $ Route.Type <chr> "", "Maryland (State)", "Maryland (Stat…
## $ Road.Name <chr> "", "GERMANTOWN RD", "GREAT SENECA HWY"…
## $ Cross.Street.Type <chr> "", "County", "Municipality", "County",…
## $ Cross.Street.Name <chr> "", "MIDDLEBROOK RD", "KENTLANDS BLVD",…
## $ Off.Road.Description <chr> "IN FRONT OF 18900 BIRDSEYE DR", "", ""…
## $ Municipality <chr> "", "N/A", "GAITHERSBURG", "N/A", "N/A"…
## $ Related.Non.Motorist <chr> "", "BICYCLIST", "", "", "", "", "PEDES…
## $ Collision.Type <chr> "OPPOSITE DIRECTION SIDESWIPE", "STRAIG…
## $ Weather <chr> "CLOUDY", "CLEAR", "CLEAR", "CLEAR", "R…
## $ Surface.Condition <chr> "", "DRY", "DRY", "DRY", "WET", "DRY", …
## $ Light <chr> "DAYLIGHT", "DAYLIGHT", "DAYLIGHT", "DA…
## $ Traffic.Control <chr> "NO CONTROLS", "TRAFFIC SIGNAL", "TRAFF…
## $ Driver.Substance.Abuse <chr> "NONE DETECTED", "NONE DETECTED", "NONE…
## $ Non.Motorist.Substance.Abuse <chr> "", "NONE DETECTED", "", "", "", "", "N…
## $ Person.ID <chr> "ACC015E9-08A4-4856-866E-0004005F986C",…
## $ Driver.At.Fault <chr> "Yes", "No", "No", "No", "Yes", "Yes", …
## $ Injury.Severity <chr> "NO APPARENT INJURY", "NO APPARENT INJU…
## $ Circumstance <chr> "N/A", "N/A", "N/A", "ANIMAL, N/A", "RA…
## $ Driver.Distracted.By <chr> "LOOKED BUT DID NOT SEE", "NOT DISTRACT…
## $ Drivers.License.State <chr> "MD", "MD", "MD", "MD", "MD", "MD", "MD…
## $ Vehicle.ID <chr> "4E492574-893B-4EB1-ADCA-53FDD633D6C4",…
## $ Vehicle.Damage.Extent <chr> "FUNCTIONAL", "FUNCTIONAL", "FUNCTIONAL…
## $ Vehicle.First.Impact.Location <chr> "SEVEN OCLOCK", "ELEVEN OCLOCK", "SIX O…
## $ Vehicle.Second.Impact.Location <chr> "SEVEN OCLOCK", "ELEVEN OCLOCK", "SIX O…
## $ Vehicle.Body.Type <chr> "PASSENGER CAR", "PASSENGER CAR", "(SPO…
## $ Vehicle.Movement <chr> "MOVING CONSTANT SPEED", "MOVING CONSTA…
## $ Vehicle.Continuing.Dir <chr> "South", "North", "South", "South", "No…
## $ Vehicle.Going.Dir <chr> "South", "West", "South", "South", "Nor…
## $ Speed.Limit <int> 0, 35, 35, 40, 20, 35, 35, 10, 35, 0, 2…
## $ Driverless.Vehicle <chr> "No", "No", "No", "No", "No", "No", "No…
## $ Parked.Vehicle <chr> "No", "No", "No", "No", "No", "No", "No…
## $ Vehicle.Year <int> 2017, 2010, 2021, 2019, 2014, 1991, 201…
## $ Vehicle.Make <chr> "LEXUS", "TOYT", "SUBARU", "DODGE", "NI…
## $ Vehicle.Model <chr> "SUV", "PRIUS", "FORRESTER", "CHARGER",…
## $ Equipment.Problems <chr> "NO MISUSE", "NO MISUSE", "NO MISUSE", …
## $ Latitude <dbl> 39.16500, 39.17878, 39.12357, 39.21174,…
## $ Longitude <dbl> -77.24931, -77.26719, -77.23177, -77.17…
## $ Location <chr> "(39.16500483, -77.24931)", "(39.178775…
# Convert "Crash Date/Time" column to Date type
data$Crash_Date <- as.POSIXct(data$Crash.Date.Time, format = "%m/%d/%Y %I:%M:%S %p")
# Extract year from "Crash Date/Time" column
data$Year <- lubridate::year(data$Crash_Date)
# distinct_years <- data %>%
# distinct(Year)
#
# distinct_years <- distinct_years$Year %>% sort()
#
# print(distinct_years)
# Group by year and count the number of injuries
injury_counts <- data %>%
group_by(Year, Injury.Severity) %>%
summarise(Count = n()) %>%
filter(Year != 2024)
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
# Plot the bar graph
ggplot(injury_counts, aes(x = factor(Year), y = Count)) +
geom_bar(stat = "identity") +
labs(x = "Year", y = "Total Number of Injuries", title = "Total Injuries by Year")
# # Plot the bar graph with a trend line
# ggplot(injury_counts, aes(x = Year, y = Count)) +
# geom_bar(stat = "identity") +
# geom_smooth(method = "loess", se = FALSE, color = "red") + # Add a linear trend line
# labs(x = "Year", y = "Total Number of Injuries", title = "Total Injuries by Year with Trend Line") +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Plot by geom_line
ggplot(injury_counts, aes(x = Year, y = Count, color = Injury.Severity)) +
geom_line() +
labs(x = "Year", y = "Number of Injuries", title = "Injuries by Year and Injury Severity") +
scale_x_continuous(breaks = seq(min(injury_counts$Year), max(injury_counts$Year), by = 1))
library(ggplot2)
library(ggmap)
## Warning: package 'ggmap' was built under R version 4.3.3
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
api_key <- "AIzaSyD_HHmd6lL3rQOXBwFWwtL0gI_EfpOe5Bc"
poi <- c(lon = mean(data$Longitude), lat = mean(data$Latitude))
gap <- 0
center <- poi
zoom <- 13
register_google(key = api_key)
map <- get_googlemap(center = center, zoom = zoom, maptype = "terrain", color = "bw")
## ℹ <https://maps.googleapis.com/maps/api/staticmap?center=39.083119,-77.112343&zoom=13&size=640x640&scale=2&maptype=terrain&key=xxx>
# Check for missing values in Longitude and Latitude columns
missing_values <- data[is.na(data$Longitude) | is.na(data$Latitude), ]
print(missing_values)
## [1] Report.Number Local.Case.Number
## [3] Agency.Name ACRS.Report.Type
## [5] Crash.Date.Time Route.Type
## [7] Road.Name Cross.Street.Type
## [9] Cross.Street.Name Off.Road.Description
## [11] Municipality Related.Non.Motorist
## [13] Collision.Type Weather
## [15] Surface.Condition Light
## [17] Traffic.Control Driver.Substance.Abuse
## [19] Non.Motorist.Substance.Abuse Person.ID
## [21] Driver.At.Fault Injury.Severity
## [23] Circumstance Driver.Distracted.By
## [25] Drivers.License.State Vehicle.ID
## [27] Vehicle.Damage.Extent Vehicle.First.Impact.Location
## [29] Vehicle.Second.Impact.Location Vehicle.Body.Type
## [31] Vehicle.Movement Vehicle.Continuing.Dir
## [33] Vehicle.Going.Dir Speed.Limit
## [35] Driverless.Vehicle Parked.Vehicle
## [37] Vehicle.Year Vehicle.Make
## [39] Vehicle.Model Equipment.Problems
## [41] Latitude Longitude
## [43] Location Crash_Date
## [45] Year
## <0 rows> (or 0-length row.names)
# Plot the map with points colored by Injury Severity
ggmap(map) +
geom_point(data = data, aes(x = Longitude, y = Latitude, color = Injury.Severity), alpha = 0.6, size = 0.1) +
facet_wrap(~ Injury.Severity) +
labs(title = "Injury Severity by Location") +
theme(legend.position = "bottom")
## Warning: Removed 139034 rows containing missing values (`geom_point()`).
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.3.3
fatal <- data %>%
filter(Injury.Severity =="FATAL INJURY" | Injury.Severity =="SUSPECTED SERIOUS INJURY")
# Create a leaflet map
m <- leaflet(fatal) %>%
addTiles() %>%
setView(lng = mean(fatal$Longitude), lat = mean(fatal$Latitude), zoom = 10) # Set initial view to center around the data
# Add markers for each data point, colored by Injury Severity
m <- m %>%
addCircleMarkers(~Longitude, ~Latitude, color = "red",
fillColor = ~Injury.Severity, fillOpacity = 0.6, radius = 1)
# Display the map
m