import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH']='D:/Anaconda3/Library/plugins/platforms'
Below you will find data based off crimes in Los Angeles, California. I have gathered information on the locations of crimes, the total crimes, types of crimes, crimes per year, victims sex and crimes per quarter all in different variations of visualizations. This information is from January 1, 2020- Present (March 2023). Do not let these numbers scare you! Los Angeles is a big and beautiful city with so much to explore and offer. I hope these visualizations will provide you with a bit more insight on crimes in Los Angeles.
import warnings
import pandas as pd
import folium
import textwrap
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import seaborn as sns
import matplotlib.ticker as mtick
import matplotlib.ticker as ticker
warnings.filterwarnings("ignore")
path= "U:/"
filename = path + "LACrime2020.csv"
df = pd.read_csv(filename)
This graph gives us more insight on the number of crimes in different areas of Los Angeles. To the right of each bar you can see the total amount of crimes in those areas, and the legend displaying the area names and their assigned color. We can gather that Central, 77th Street, and Pacific all have the highest crime rates, so these may be areas one may want to be more aware of your surroundings in, weather you are visiting or even if you are from the city.
crime_counts_by_area = df['AREA NAME'].value_counts()
start_color = 'navy'
end_color = 'lightblue'
colors = mcolors.LinearSegmentedColormap.from_list('navy_to_lightblue', [start_color, end_color], 7)
fig, ax = plt.subplots(figsize=(15,9))
for i, (area, count) in enumerate(zip(crime_counts_by_area.index, crime_counts_by_area.values)):
color = colors(i/len(crime_counts_by_area))
ax.barh(area, count, color=color, label=area)
ax.set_xlabel('Number of Crimes', fontsize= 15)
ax.set_ylabel('Area Name', fontsize= 15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=12)
formatter = ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x))
ax.yaxis.set_major_formatter(formatter)
for i, v in enumerate(crime_counts_by_area.values):
ax.text(v + 50, i - 0.15, '{:,.0f}'.format(v), color='black', fontsize=10)
ax.invert_yaxis()
ax.legend(loc='lower right', fontsize=12)
ax.set_title('Crimes in Los Angeles - By Area', fontsize=15)
ax.xaxis.set_major_formatter(formatter)
plt.show()
This heatmap allows us to see the number of crimes by year and month. The darkest blue represents the highest amount of crimes while the darkest red represents the least amount of crimes. One thing that really stands out is that crime has definitely been increasing since 2020 each year, with May and October of 2022 being the months with the most amount of crime. You may also notice that from March and forward of this year (2023) are all very light colors, and that is due to the fact that we just simply just wrapped up the March month and need to update the database to get the latest data.
df['DATE OCC'] = pd.to_datetime(df['DATE OCC'])
df['Year'] = df['DATE OCC'].dt.year
df['Month'] = pd.Categorical(df['DATE OCC'].dt.month_name(), categories=['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'], ordered=True)
crime_counts = df.groupby(['Year', 'Month'])['DR_NO'].count().reset_index()
crime_matrix = crime_counts.pivot('Month', 'Year', 'DR_NO')
fig, ax = plt.subplots(figsize=(12,8))
heatmap = sns.heatmap(crime_matrix, cmap='RdBu', annot=True, fmt=',', linewidths=.5, ax=ax, cbar_kws={'format': mtick.StrMethodFormatter('{x:,.0f}')})
ax.set_title('Number of Crimes by Year and Month');
ax.set_xlabel('Year');
ax.set_ylabel('Month');
ax.yaxis.set_major_formatter(mtick.FixedFormatter(crime_matrix.index));
plt.show()
This donut chart displays the quarterly crime distribution and displays the percentages of crimes of those quarters. All in all the quarters seem to be almost equally distributed throughout the three quarters, with quarter 1 being a slightly more popular time for crime.
df["DATE OCC"] = pd.to_datetime(df["DATE OCC"], infer_datetime_format=True)
df["Quarter"] = df["DATE OCC"].dt.quarter
grouped_df = df.groupby("Quarter")["Quarter"].count()
total_crimes = grouped_df.sum()
quarterly_crimes=grouped_df.values
percentages = grouped_df / total_crimes * 100
fig, ax = plt.subplots(figsize=(8, 8))
colors = ["#0072c6", "#ed1c24", "#ffd600", "#39b54a"]
overall_total = sum(quarterly_crimes)
wedges, texts, autotexts = ax.pie(percentages.values, startangle=90, pctdistance=0.85, colors=colors, autopct=lambda pct: f"{pct:.1f}%\n({int(pct / 100 * overall_total):,})")
ax.text(0, 0.1, "Overall Total Crimes", ha='center', va='center', color='black', fontsize=14, fontweight='bold');
ax.text(0, -0.1, f"{overall_total:,}", ha='center', va='center', color='black', fontsize=14, fontweight='bold');
for i, wedge in enumerate(wedges):
angle = (wedge.theta2 - wedge.theta1) / 2 + wedge.theta1
x = wedge.r * 0.7 * np.cos(np.deg2rad(angle))
y = wedge.r * 0.7 * np.sin(np.deg2rad(angle))
ax.text(x, y, f"Quarter {i+1}", ha='center', va='center', color='white', fontweight='bold', fontsize=12)
centre_circle = plt.Circle((0, 0), 0.50, fc='white')
fig.gca().add_artist(centre_circle)
ax.set_title("Quarterly Crime Distribution", fontsize=16, fontweight='bold', pad=20);
ax.axis('equal');
ax.grid(False);
ax.set_xticks([]);
ax.set_yticks([]);
plt.show()
In this pie chart we can see an analysis of the frequency of crime by sex. “F” stands for female, “M” stands for male, and “X” and “H” both mean unknown sex. Males and females are almost squally victims to crime in Los Angeles, with males being at a slightly higher risk of becoming a victim. Having an unknown portion could make either the males or females frequency go higher, but unfortunately there is no way to find out this information for various reasons.
crime_analysis_by_vict_sex = df.groupby('Vict Sex')['Crm Cd 1'].count()
fig, ax = plt.subplots(figsize=(8, 6))
ax.pie(crime_analysis_by_vict_sex.values, labels=crime_analysis_by_vict_sex.index, autopct='%1.1f%%', startangle=90, colors=['royalblue', 'crimson', 'darkorange', 'forestgreen', 'goldenrod', 'purple', 'deepskyblue'])
ax.set_title('Frequency of Crime Analysis by Victim Sex')
plt.show()
This bar chart gives us much insight into which are the top most popular crimes in Los Angeles. The top 3 most common crimes are stolen vehicles, battery - simple assault, and theft of identity. This graph is very interesting to see because we typically hear of violent crimes in the news and on our citizen apps, but we never really hear about identity theft, intimate partner simple assauly, or even the cost of vandalism, petty theft, and motor theft, all of which we could easily fall victim to but never really think about. At the top of the bars you could see the total count of these crimes, with the description on the x-axis.
crime_counts = df['Crm Cd Desc'].value_counts().head(10)
fig, ax = plt.subplots(figsize=(12, 8))
ax.bar(crime_counts.index, crime_counts.values, color='pink')
wrapped_labels = [textwrap.fill(label, width=12) for label in crime_counts.index]
ax.set_xticklabels(wrapped_labels, rotation=0, fontsize=8)
ax.get_yaxis().set_major_formatter(lambda x, p: format(int(x), ','))
ax.set_xlabel('Crime Type', fontsize=15)
ax.set_ylabel('Frequency', fontsize=15)
ax.set_title('Top 10 Crime Types in Los Angeles', fontsize=15)
for i, v in enumerate(crime_counts.values):
ax.text(i, v + 50, format(int(v), ','), ha='center', va='bottom', fontsize=9)
plt.show()
Feel free to click and hover your mouse anywhere on the map, because it is an interactive one! This map puts into perspective where 5 of the most common crimes occur in LA. If you would like to change the background of the map, there is an option to switch it on the upper right hand corner, where you can find various map layouts. If you hover your mouse over any dot on the map, a label will come up showing what crime occurred in that exact spot, and if you click on it, you will see more information on the date the crime occurred and in the area it occurred. This map is great to see what crimes are occurring around you.
import_cols = ['LAT', 'LON', 'Crm Cd Desc', 'DATE OCC', 'AREA NAME']
map_df = pd.read_csv(filename, usecols=import_cols, skiprows=0, nrows=100000)
neigh_df = map_df.groupby(['AREA NAME']).size().reset_index(name="Count")
neigh_df
center_of_map = [34.041818, -118.285046]
my_map = folium.Map(location=center_of_map,
zoom_start=12,
width='90%',
height='100%',
left='5%',
right='5%',
top='0%')
tiles = ['cartodbpositron', 'openstreetmap', 'stamenterrain', 'stamentoner']
for tile in tiles:
folium.TileLayer(tile).add_to(my_map)
folium.LayerControl().add_to(my_map)
for i in range(0, len(map_df)):
crime = map_df.loc[i, 'Crm Cd Desc']
if crime == 'BATTERY - SIMPLE ASSAULT':
color = 'green'
elif crime == 'VEHICLE - STOLEN':
color = 'red'
elif crime == 'ATTEMPTED ROBBERY':
color = 'blue'
elif crime == 'BURGLARY':
color = 'orange'
elif crime == 'THEFT OF IDENTITY':
color = 'purple'
else:
color = 'black'
try:
if color != 'black':
folium.Circle(location=[map_df.loc[i, 'LAT'], map_df.loc[i, 'LON']],
tooltip=map_df.loc[i, 'Crm Cd Desc'],
popup='Date:{}:\n Area Name:{}'.format(map_df.loc[i, 'DATE OCC'], map_df.loc[i, 'AREA NAME']),
radius=50,
color=color,
fill=True,
fill_color=color,
fill_opacity=0.3).add_to(my_map)
except:
pass
my_map