My data set for this assignment was a collection of crimes reported to and collected by the Baltimore Police Department over the years 1963-2020. Each column in this data set contains the date the crime was commited, the time of the crime, the location of the crime, the description of the crime, if a weapon was used or not, the district and neighborhood of the crime, as well as the longitude and latitude of the crime.
The first chart is a scatter plot showing the total number of crimes reported for each month from 2014-2020. In this chart the darker red areas show months where there was a higher number of crimes reported, opposed to the blue areas which show a lower number of reported crimes. This chart shows that the year with the most crimes reported was 2017. And the months with the most crimes reported were the summer months of June, July, August, September.
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'c:/ProgramData/Anaconda3/Library/plugins/platforms'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
data = pd.read_csv("BPD_Part_1_Victim_Based_Crime_Data.csv")
filename = "BPD_Part_1_Victim_Based_Crime_Data.csv"
df = pd.read_csv("BPD_Part_1_Victim_Based_Crime_Data.csv", usecols = ['Description', 'CrimeDate'])
df['CrimeDate'] = pd.to_datetime(df['CrimeDate'], format = '%m/%d/%Y')
df['Year'] = df['CrimeDate'].dt.year
df['Month'] = df['CrimeDate'].dt.month
df['Day'] = df['CrimeDate'].dt.day
df['Quarter'] = df['CrimeDate'].dt.quarter
df['DayOfTheWeek'] = df['CrimeDate'].dt.dayofweek
df['MonthName'] = df['CrimeDate'].dt.strftime('%b')
df['DayName'] = df['CrimeDate'].dt.strftime('%a')
x = df.groupby(['Month','MonthName', 'Year'])['Month'].count().reset_index(name = 'Count')
x = pd.DataFrame(x)
df_text = x[ x['Count'] > 50]
#Scatter plot comparing the months of each year where > 50 crimes were reported
plt.figure(figsize = (18,10))
plt.scatter(df_text['MonthName'], df_text['Year'], marker = 's', cmap = 'jet', c = df_text['Count'], s = 750, edgecolor = 'black')
plt.title('Crime Totals for Each Month Per Year (2014-2020)', fontsize = 18)
plt.xlabel('Months of the Year', fontsize = 15)
plt.ylabel('Reported Crimes', fontsize = 15)
cbar = plt.colorbar()
cbar.set_label('Crime Totals', rotation = 270, fontsize = 15, color = 'black', labelpad = 30)
my_colorbar_ticks = [*range(500, int(df_text['Count'].max()), 500)]
cbar.set_ticks(my_colorbar_ticks)
my_colorbar_tick_labels = [*range(500, int(df_text['Count'].max()), 500)]
my_colorbar_tick_labels = [ '{:,}'.format(each) for each in my_colorbar_tick_labels]
cbar.set_ticklabels(my_colorbar_tick_labels)
my_x_ticks = df_text['MonthName']
plt.xticks(my_x_ticks, fontsize = 13)
plt.show()
My second graph for this data set was a line plot showing the crime rates per month. This graph shows that the month of February has a much lower number of reported crimes than any other month of the year. It also shows that August is the month with the highest number of reported crimes.
x2 = df.groupby(['Month'])['Month'].count().reset_index(name = 'Count')
fig = plt.figure(figsize = (16,10))
ax = fig.add_subplot(1,1,1)
plt.plot(x2['Month'], x2['Count'], color = 'black', marker = 's', linewidth = 5,
markersize = 14, markerfacecolor = 'orange', markeredgecolor = 'black')
plt.title('Crime Rates Per Month', fontsize = 18)
plt.xlabel('Month', fontsize = 15)
plt.ylabel('Reported Crimes', fontsize = 15)
plt.grid(True)
my_x_ticks = x2['Month']
plt.xticks(my_x_ticks, fontsize = 13)
plt.show()
My third graph for this data set is a horizontal bar graph showing the number of reports for each type of crime reported to the Baltimore Police Department. The crime with the highest number of reports is Larceny with about 70000 reports. The crime with the lowest number of reports was Arson with less than 10000 reports.
x1 = df.groupby(['Description'])['Description'].count().reset_index(name = 'Count')
df_sorted = x1.sort_values('Count')
plt.figure(figsize = (16,10))
plt.barh('Description','Count', data = df_sorted, fc = 'red', ec = 'black', label = 'Crimes Reported')
plt.legend(loc='lower right', fontsize = 14)
plt.title('Count of Each Crime Reported(1963-2020)', fontsize = 18)
plt.ylabel('Crime Reported', fontsize = 15)
plt.xlabel('Number of Reports', fontsize = 15)
My fourth visualization for this data set is a donuts pie chart showing the crime rates by quarter and month, as well as the percentage each month and quarter makes up of the total number of reported crimes. This chart shows that the third quarter of the year is where the highest number of crimes were reported, accounting for 27.34% of all reported crimes.
pie_df = df.groupby(['Quarter', 'MonthName', 'Month'])['Description'].count().reset_index(name = "CrimesReported")
pie_df.sort_values(by = ['Month'], inplace = True)
pie_df.reset_index(inplace=True, drop = True)
del pie_df['Month']
number_outside_colors = len(pie_df.Quarter.unique())
outside_color_ref_number = np.arange(number_outside_colors)*4
number_inside_colors = len(pie_df.MonthName.unique())
all_color_ref_number = np.arange(number_outside_colors + number_inside_colors)
inside_color_ref_number = []
for each in all_color_ref_number:
if each not in outside_color_ref_number:
inside_color_ref_number.append(each)
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(1, 1, 1)
colormap = plt.get_cmap("tab20c") #20 colors in tab20c
outer_colors = colormap(outside_color_ref_number)
all_crimes = pie_df.CrimesReported.sum()
pie_df.groupby(['Quarter'])['CrimesReported'].sum().plot(
kind = 'pie', radius = 1, colors = outer_colors, pctdistance = 0.85,
labeldistance = 1.1, wedgeprops = dict(edgecolor = 'White'), textprops = {'fontsize': 18},
autopct = lambda p: '{:.2f}%\n({:.1f})'.format(p, (p/100)*all_crimes),
startangle = 90)
inner_colors = colormap(inside_color_ref_number)
pie_df.CrimesReported.plot(
kind = 'pie', radius = 0.7, colors = inner_colors, pctdistance = 0.55,
labeldistance = 0.8, wedgeprops = dict(edgecolor = 'White'), textprops = {'fontsize': 13},
labels = pie_df.MonthName,
autopct = '%1.2f%%',
startangle = 90)
hole = plt.Circle((0,0), 0.3, fc = 'white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.yaxis.set_visible(False)
plt.title('Crime Rates by Quarter and Month', fontsize = 18)
ax.text(0,0, 'Total Number of Crimes\n' + str(all_crimes),size = 18, ha = 'center', va = 'center')
ax.axis('equal')
plt.tight_layout()
plt.show()
My final visualization for this data set was a map of Baltimore showing the locations where violent crimes were reported.In this chart each color represents a different crime that was reported: Orange: Rape Red: Homicide Green: Shootings Yellow: Arson Blue: Robery - Residence Dark Blue: Robery - Carjacking
import folium
map_df = data
neigh_df = map_df.groupby(['Neighborhood', 'Longitude', 'Latitude']).size().reset_index(name = "Count")
center_of_map = [39.3024273, -76.6195023] #Penn Station Baltimore
my_map = folium.Map(location = center_of_map,
zoom_start = 12,
width = '90%',
height = '100%',
left = '5%',
right = '5%',
top = '0%',
)
tiles = ['cartodbpositron','openstreetmap','stamenterrain']
for tile in tiles:
folium.TileLayer(tile).add_to(my_map)
folium.LayerControl().add_to(my_map)
for i in range(0, len(map_df)):
crime = map_df.loc[i, 'Description']
if crime == 'RAPE':
color = 'orange'
elif crime == 'HOMICIDE':
color = 'red'
elif crime == 'SHOOTING':
color = 'green'
elif crime == 'ARSON':
color = 'yellow'
elif crime == 'ROBBERY - RESIDENCE':
color = 'blue'
elif crime == 'ROBBERY - CARJACKING':
color = 'darkblue'
else:
color = 'black'
try:
if color != 'black':
folium.CircleMarker(location = [map_df.loc[i, 'Latitude'], map_df.loc[i, 'Longitude']],
tooltip = map_df.loc[i, 'Description'],
popup = 'Neighborhood: {}:'.format(map_df.loc[i,'Neighborhood']),
radius = .5,
color = color,
fill = True,
fill_color = color,
fill_opacity = 0.5).add_to(my_map)
except:
pass
#my_map
This is a screenshot of my map since html files were not cooperating.
knitr::include_graphics("bpd_map_crime.PNG")
This Map Was Built using Folium and Python
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.