import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/ProgramData/Anaconda3/Library/plugins/platforms'
This report explores patterns in traffic accident data, analyzing when and under what conditions most accidents occur. Using Python in R Markdown, this analysis includes visualizations for monthly trends, traffic control devices, weather effects, and injury correlations.
The dataset used includes detailed records of traffic accidents, including date, time, weather, location, and injury outcomes. The goal is to visualize high-risk conditions and identify trends that can support public safety planning.
The scatter plot below visualizes how traffic accidents are distributed across days of the week and months. Larger and darker points show where more accidents occur, suggesting specific days and months are riskier.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
path = r"\\apporto.com\\dfs\\LOYOLA\\Users\\cngutierrez_loyola\\Documents\\"
df = pd.read_csv(path + "traffic_accidents.csv")
df = df[df['crash_day_of_week'].notna() & df['crash_month'].notna()]
x = df.groupby(['crash_month', 'crash_day_of_week'])['crash_day_of_week'].count().reset_index(name='count')
plt.figure(figsize=(12, 7))
plt.scatter(x['crash_day_of_week'], x['crash_month'], c=x['count'], s=x['count'], cmap='viridis', edgecolors='black', marker='o')
plt.title("Accidents by Day and Month", fontsize=16)
plt.xlabel("Day of the Week")
plt.ylabel("Month")
plt.colorbar(label="Accident Count")
## <matplotlib.colorbar.Colorbar object at 0x0000027769149250>
plt.tight_layout()
plt.show()
This bar chart shows the five most common traffic control devices involved in accidents. These may represent design or behavioral issues.
x = df['traffic_control_device'].value_counts().reset_index()
x.columns = ['traffic_control_device', 'count']
plt.figure(figsize=(12, 6))
plt.bar(x.loc[0:4, 'traffic_control_device'], x.loc[0:4, 'count'], color='skyblue')
plt.title("Top 5 Traffic Control Devices in Accidents")
plt.xlabel("Device Type")
plt.ylabel("Count")
plt.grid(axis='y')
plt.tight_layout()
plt.show()
The line graph shows total injuries per month under different weather types. Weather has a strong impact on accident severity.
weather_types = ['CLEAR', 'RAIN', 'SNOW', 'CLOUDY']
plt.figure(figsize=(12, 6))
for weather in weather_types:
data = df[df['weather_condition'] == weather]
monthly_injuries = data.groupby('crash_month')['injuries_total'].sum()
plt.plot(monthly_injuries.index, monthly_injuries.values, marker='o', label=weather)
plt.title("Injuries per Month by Weather Condition")
plt.xlabel("Month")
plt.ylabel("Total Injuries")
plt.legend()
plt.grid(True)
plt.xticks(range(1, 13))
## ([<matplotlib.axis.XTick object at 0x000002776D0D4F10>, <matplotlib.axis.XTick object at 0x000002776D0D60D0>, <matplotlib.axis.XTick object at 0x000002776C95A310>, <matplotlib.axis.XTick object at 0x000002776D137510>, <matplotlib.axis.XTick object at 0x000002776D139790>, <matplotlib.axis.XTick object at 0x000002776D13BA50>, <matplotlib.axis.XTick object at 0x000002776D135D90>, <matplotlib.axis.XTick object at 0x000002776D13ED10>, <matplotlib.axis.XTick object at 0x000002776D140F90>, <matplotlib.axis.XTick object at 0x000002776D143250>, <matplotlib.axis.XTick object at 0x000002776D145610>, <matplotlib.axis.XTick object at 0x000002776D13F6D0>], [Text(1, 0, '1'), Text(2, 0, '2'), Text(3, 0, '3'), Text(4, 0, '4'), Text(5, 0, '5'), Text(6, 0, '6'), Text(7, 0, '7'), Text(8, 0, '8'), Text(9, 0, '9'), Text(10, 0, '10'), Text(11, 0, '11'), Text(12, 0, '12')])
plt.tight_layout()
plt.show()
The pie chart (donut style) shows the distribution of top traffic control devices involved in accidents.
filtered = df['traffic_control_device'].value_counts().head(5)
plt.figure(figsize=(8, 8))
wedges, texts, autotexts = plt.pie(filtered, labels=filtered.index,
autopct='%1.1f%%', startangle=90,
wedgeprops={'width': 0.4, 'edgecolor': 'white'})
plt.gca().add_artist(plt.Circle((0, 0), 0.60, fc='white'))
plt.title("Traffic Control Device Distribution")
plt.axis('equal')
## (-1.0999983614624234, 1.0999873014373285, -1.0999992079311627, 1.0999999622824363)
plt.tight_layout()
plt.show()
This heatmap shows the correlation between different injury types reported in accidents. Strong correlations may indicate reporting patterns.
correlation_columns = [
'injuries_total', 'injuries_fatal', 'injuries_incapacitating',
'injuries_non_incapacitating', 'injuries_reported_not_evident',
'injuries_no_indication'
]
df_corr = df[correlation_columns]
correlation_matrix = df_corr.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap of Injury Types")
plt.tight_layout()
plt.show()
The traffic accident data reveals critical patterns around time, weather, and traffic controls. These visualizations support better decision-making in traffic safety planning and risk prevention.