import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.ticker import FuncFormatter
import plotly.graph_objects as go
import folium
import warnings
warnings.filterwarnings('ignore')
path = "C:/Users/jrodri57/Downloads/US_Accidents_March23.csv"
df = pd.read_csv(path, usecols=['State','Severity','Weather_Condition','Start_Time','Start_Lat','Start_Lng'])
df.Weather_Condition.fillna("Not Available", inplace=True)
df['Start_Time'] = df['Start_Time'].str.split('.').str[0]
df['Start_Time'] = pd.to_datetime(df['Start_Time'], format='%Y-%m-%d %H:%M:%S')
df['Month'] = df['Start_Time'].dt.month
df['MonthName'] = df['Start_Time'].dt.month_name()
df['Year'] = df['Start_Time'].dt.year
df['Day'] = df['Start_Time'].dt.day
df['Hour'] = df['Start_Time'].dt.hour
df['WeekDay'] = df['Start_Time'].dt.strftime('%a')
severity_mapping = {
1: 'Low',
2: 'Moderate',
3: 'High',
4: 'Critical'
}
df['Severity_Level'] = df['Severity'].map(severity_mapping)
filtered_df = df[df['Severity_Level'].isin(['High', 'Critical'])].copy()
Let’s look at traffic accident patterns in Maryland using information from several graphs covering 2016 to 2023.
This overview will cover:
We’ll also touch on related factors like weather and how
Maryland fits into the national picture. This summary helps to
understand the main risks associated with driving in Maryland based on
recent accident data.
This is a countrywide car accident dataset that covers 49 states of the USA. The accident data were collected from February 2016 to March 2023, using multiple APIs that provide streaming traffic incident (or event) data. These APIs broadcast traffic data captured by various entities, including the US and state departments of transportation, law enforcement agencies, traffic cameras, and traffic sensors within the road networks.
This dataset was collected in real-time using multiple Traffic APIs. It contains accident data collected from February 2016 to March 2023 for the Contiguous United States. For more details about this dataset.
The dataset currently contains approximately 7.7 million accident records.
Dataset file size: 2.84 GB
Dataset Name: A Countrywide Traffic Accident Dataset (2016 - 2023)
Dataset Source: https://www.kaggle.com/datasets/sobhanmoosavi/us-accidents
This is some general text about my findings before I show the individual charts in tabs. If you add .tabset-pills inside the curly braces, it will generate orange tab buttons
This bar chart displays the Top 20 US States ranked by the number of severe traffic accidents (defined as ‘High and Critical’ severity levels) that occurred between 2016 and 2020.
Highest Counts: California (CA) has significantly more severe accidents than any other state, with 285,316 incidents. Texas (TX) and Florida (FL) follow, with 127,652 and 117,214 accidents, respectively.
Average: The average (mean) number of severe accidents for these top 20 states is 30,094 (indicated by the dashed line).
Maryland, highlighted in red, ranks 14th among these 20 states with 33,810 severe accidents, which is slightly above the average for this group.
x2 = filtered_df.groupby(['State']).agg({'State':['count']})
x2.columns = ['Count']
x2 = x2.sort_values('Count',ascending=False).reset_index()
fig2 = plt.figure(figsize=(15, 10))
ax2 = fig2.add_subplot(1, 1, 1)
ax2.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
bar_colors = ['C0'] * len(x2['State'])
md_index = x2[x2['State'] == 'MD'].index[0]
bar_colors[md_index] = 'red'
bars = plt.bar(x2.loc[0:20, 'State'], x2.loc[0:20, 'Count'], color=bar_colors, label='State Count')
plt.xlabel('States')
plt.ylabel('Count of Accidents')
plt.title('Top 20 States - Accident Severity Level (High and Critical)\nUS Accidents (2016 - 2020)')
for bar in bars:
yval = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:,}', ha='center', va='bottom')
plt.xticks(rotation=0, ha='center')
mean = x2['Count'].mean()
ax2.axhline(mean, color='black', linestyle='dashed')
ax2.text('OH', mean - 3500, 'Mean = ' + str(int(mean)), rotation=0, size=10, va='center', ha='right')
plt.show()
This horizontal bar chart illustrates the top 15 weather conditions associated with severe traffic accidents (classified as ‘High’ and ‘Critical’ severity levels) in the United States. The data covers the period from 2016 to 2023.
Dominant Conditions: Contrary to what some might expect, the vast majority of severe accidents occur during seemingly non-hazardous weather. ‘Fair’ (approx. 300,000) and ‘Clear’ (approx. 271,000) conditions account for the highest number of severe accidents by a significant margin.
Cloudy Conditions: Various forms of cloudy weather (‘Mostly Cloudy’, ‘Partly Cloudy’, ‘Overcast’, ‘Cloudy’) also represent substantial portions of the severe accident counts, collectively contributing to hundreds of thousands of incidents.
Precipitation: Conditions involving precipitation generally rank lower. ‘Light Rain’ accounts for about 80,000 severe accidents, while ‘Light Snow’, ‘Rain’, ‘Heavy Rain’, and ‘Light Drizzle’ have progressively fewer counts within this top 15 list.
x1 = filtered_df.groupby(['Weather_Condition']).agg({'Weather_Condition':['count']}).reset_index()
x1.columns = ['Weather_Condition','Count']
x1 = x1.sort_values('Count',ascending=False).reset_index(drop=True)
fig = plt.figure(figsize=(15, 10))
ax1 = fig.add_subplot(1, 1, 1)
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{int(x/1000)}k' if x >= 1000 else f'{int(x)}'))
ax1.xaxis.set_major_locator(ticker.MultipleLocator(10000))
bars = plt.barh(x1.loc[0:15, 'Weather_Condition'][::-1], x1.loc[0:15, 'Count'][::-1], label='Weather Condition Count')
plt.ylabel('Weather Condition')
plt.xlabel('Count of Accidents')
plt.title('Top 15 Accident Weather Conditions - Severity Level (High and Critical)\nUS Accidents (2016 - 2023)')
for bar in bars:
xval = bar.get_width()
plt.text(xval, bar.get_y() + bar.get_height()/2, f'{xval:,}', ha='left', va='center')
plt.xticks(rotation=45, ha='center', fontsize=10)
plt.show()
This Map pinpointing the locations of severe traffic accidents within and around Maryland that occurred between 2016 and 2023.
Data Representation: Each dot on the map signifies the location of a reported accident.
Severity Coding: The color of the dot indicates the severity level of the accident:Purpose: The map visually represents the geographic distribution and concentration hot spots for these serious accidents across the state and its immediate vicinity during the specified timeframe.
The map reveals distinct patterns in the distribution of high and critical severity accidents in Maryland from 2016 to 2023:
Concentration along Highways: Accidents are heavily concentrated along major transportation corridors, particularly interstate highways like I-95, I-70, I-695 (Baltimore Beltway), I-495 (Capital Beltway), I-270, and US-50/301.
Urban/Suburban Density: The highest density of both critical (red) and high (orange) severity accidents occurs in the densely populated Baltimore-Washington metropolitan area. This includes significant clusters around Baltimore City, Washington D.C., and the surrounding suburban counties.
filtered_df_MD = filtered_df[filtered_df['State'].isin(['MD'])].copy().reset_index(drop=True)
df_MD = df[df['State'].isin(['MD'])].copy().reset_index(drop=True)
center_of_map = [38.842393, -77.390414] # Penn Station (Amtrack) Baltimore City
my_map = folium.Map(location = center_of_map, zoom_start = 9, width = '90%',
height = '100%', left = '5%', right = '5%', top = '0%')
tiles = [('Cartodb Positron'), ('OpenTopoMap'),('Cartodb dark_matter'),
('CyclOSM'),('OpenStreetMap.Mapnik')]
for tile_name in tiles:
folium.TileLayer(tile_name).add_to(my_map)
folium.LayerControl().add_to(my_map)
title_html = '<h3 align="center" style="font-size:20px">Maryland Accidents Locations (2016 - 2023)<br> Severity Level <span style="color:red;"> Critical = Red</span> , <span style="color:orange;">High = Orange</span></h3>'
my_map.get_root().html.add_child(folium.Element(title_html))
for i in range(0, len(filtered_df_MD)):
try:
severity = filtered_df_MD.loc[i, 'Severity_Level']
if severity == 'Critical':
color = 'red'
else:
color = 'orange'
folium.Circle(location = [filtered_df_MD.loc[i, 'Start_Lat'], filtered_df_MD.loc[i, 'Start_Lng']],
radius = 50,
color = color,
fill = True,
fill_color = color,
fill_opacity = 0.5).add_to(my_map)
except:
pass
#my_map.save('C:/Users/jrodri57/Downloads/US_Accidents.html')
my_map