Here is an intro section.
Here is something about my data.
This is some general text about my findings before I show the individual charts in tabs. If you add .tabset-pills inside the curly braces, it will generate orange tab buttons
Here is what I have under Tab 1
import pandas as pd
import numpy as np
import folium
path = "C:/Users/pptallon/Dropbox/G/Teaching/Data Visualization Data Files/"
filename = "Baltimore911_Dec2020.csv"
import_cols = ['Location', 'Description', 'CallDateTime', 'Neighborhood']
map_df = pd.read_csv(path + filename, usecols = import_cols, skiprows = 0, nrows=100000)
c = 0
for each in map_df['Location']:
try:
x = str(each).replace(")", "").split("(")[-1].split(",")
map_df.loc[c, 'Lat'] = '%.6f' % float(x[0])
map_df.loc[c, 'Lon'] = '%.6f' % float(x[1])
except:
map_df.loc[c, 'Lat'] = np.NaN
map_df.loc[c, 'Lon'] = np.NaN
c+=1
neigh_df = map_df.groupby(['Neighborhood']).size().reset_index(name="Count")
center_of_map = [39.3024273,-76.6195023]
my_map = folium.Map(location = center_of_map, #Penn Station Baltimore Lat/Lon
zoom_start = 12,
tiles = 'cartodbpositron', # 'OpenStreetMap',
width='90%',
height='100%',
left='5%',
top='0%',) # 0 is furthest out shows earth, 11 is city level, 18 is closest
ch_map = folium.Choropleth(
geo_data = path + 'baltimore.txt',
name = 'choropleth',
data = neigh_df,
columns = ['Neighborhood', 'Count'],
key_on = 'feature.properties.name',
fill_color = 'RdPu',
fill_opacity = 0.9,
line_opacity = 0.4,
legend_name = 'Neighborhood Based on 911 Call Origination',
highlight=True
).add_to(my_map)
# Display Region Label
ch_map.geojson.add_child(
folium.features.GeoJsonTooltip(fields=['name'], aliases=['Neighborhood: '],
labels=True, style=('background-color: black; color: white;'))
)
my_map.save(path + 'Chloropleth_911_Baltimore.html')
Dual Axis Bar Charts
def autolabel(these_bars, this_ax, place_of_decimals, symbol):
for each_bar in these_bars:
height = each_bar.get_height()
this_ax.text(each_bar.get_x()+each_bar.get_width()/2, height*1.01, symbol+format(height, place_of_decimals),
fontsize=11, color='black', ha='center', va='bottom')
fig = plt.figure(figsize=(18, 10))
ax1 = fig.add_subplot(1, 1, 1)
ax2 = ax1.twinx()
bar_width = 0.4
x_pos = np.arange(10)
count_bars = ax1.bar(x_pos-(0.5*bar_width), d2.Count, bar_width, color='gray', edgecolor='black', label='Citation Count')
aver_fine_bars = ax2.bar(x_pos+(0.5*bar_width), d2.AverFine, bar_width, color='green', edgecolor='black', label='Average Fine')
ax1.set_xlabel('Vehicle Tag', fontsize=18)
ax1.set_ylabel('Count of Citations', fontsize=18, labelpad=20)
ax2.set_ylabel('Average Fine', fontsize=18, rotation=270, labelpad=20)
ax1.tick_params(axis='y', labelsize=14)
ax2.tick_params(axis='y', labelsize=14)
plt.title('Citation Count and Average Fine Analysis\n Top 10 Most Frequently Cited Tags', fontsize=18)
ax1.set_xticks(x_pos)
ax1.set_xticklabels(d2.Tag, fontsize=14)
count_color, count_label = ax1.get_legend_handles_labels()
fine_color, fine_label = ax2.get_legend_handles_labels()
legend = ax1.legend(count_color + fine_color, count_label + fine_label, loc='upper left', frameon=True, ncol=1, shadow=True,
borderpad=1, fontsize=14)
ax1.set_ylim(0, d2.Count.max()*1.50)
autolabel(count_bars, ax1, '.0f', '')
autolabel(aver_fine_bars, ax2, '.2f', '$')
plt.show()
Analysis of line plots.
fine_df = df.groupby(['Hour', 'WeekDay'])['ViolFine'].sum().reset_index(name='TotalFines')
from matplotlib.ticker import FuncFormatter
fig = plt.figure(figsize = (18, 10))
ax = fig.add_subplot(1, 1, 1)
my_colors = {'Mon':'blue',
'Tue':'red',
'Wed':'green',
'Thu':'gray',
'Fri':'purple',
'Sat':'gold',
'Sun':'brown'}
for key, grp in fine_df.groupby(['WeekDay']):
grp.plot(ax=ax, kind='line', x='Hour', y ='TotalFines', color=my_colors[key], label=key, marker='8')
plt.title('Total Fines by Hour', fontsize=18)
ax.set_xlabel('Hour (24 Hour Interval)', fontsize=18)
ax.set_ylabel('Total Fines ($M)', fontsize=18, labelpad=20)
ax.tick_params(axis='x', labelsize=14, rotation=0)
ax.tick_params(axis='y', labelsize=14, rotation=0)
ax.set_xticks(np.arange(24))
handles, labels = ax.get_legend_handles_labels()
handles = [ handles[1], handles[5], handles[6], handles[4], handles[0], handles[2], handles[3] ]
labels = [ labels[1], labels[5], labels[6], labels[4], labels[0], labels[2], labels[3] ]
plt.legend(handles, labels, loc='best', fontsize=14, ncol=1)
ax.yaxis.set_major_formatter( FuncFormatter( lambda x, pos:('$%1.1fM')%(x*1e-6)))
plt.show()
Pie Charts
I had to shrink the text sizes on this and the pie fig size to get it to look this way.
df['Quarter'] = 'Quarter ' + df.ViolDate.dt.quarter.astype('string')
pie_df = df.groupby(['Quarter', 'MonthName', 'Month'])['ViolFine'].sum().reset_index(name='TotalFines')
pie_df.sort_values(by=['Month'], inplace=True)
pie_df.reset_index(inplace=True, drop=True)
del pie_df['Month']
number_outside_colors = len(pie_df.Quarter.unique())
outside_color_ref_number = np.arange(number_outside_colors)*4
number_inside_colors = len(pie_df.MonthName.unique())
all_color_ref_number = np.arange(number_outside_colors + number_inside_colors)
inside_color_ref_number = []
for each in all_color_ref_number:
if each not in outside_color_ref_number:
inside_color_ref_number.append(each)
fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(1, 1, 1)
colormap = plt.get_cmap("tab20c")
outer_colors = colormap(outside_color_ref_number)
all_fines = pie_df.TotalFines.sum()
pie_df.groupby(['Quarter'])['TotalFines'].sum().plot(
kind='pie', radius=1, colors = outer_colors, pctdistance = 0.85, labeldistance = 1.1,
wedgeprops = dict(edgecolor='W'), textprops= {'fontsize':13},
autopct = lambda p: '{:.2f}%\n(${:.1f}M)'.format(p,(p/100)*all_fines/1e+6),
startangle=90)
inner_colors = colormap(inside_color_ref_number)
pie_df.TotalFines.plot(
kind='pie', radius=0.7, colors = inner_colors, pctdistance = 0.55, labeldistance = 0.8,
wedgeprops = dict(edgecolor='W'), textprops= {'fontsize':11},
labels = pie_df.MonthName,
autopct = '%1.2f%%',
startangle=90)
hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.yaxis.set_visible(False)
plt.title('Total Fines by Quarter and Month', fontsize=14)
ax.text(0, 0, 'Total Fines\n' + '$' + str(round(all_fines/1e6,2)) + 'M', size=13, ha='center', va='center' )
ax.axis('equal')
plt.tight_layout()
plt.show()
Plotly waterfall chart
To include this plotly chart, I had to output it as an HTML file and then read it immediately back in. I commented out the line for plt.show() and added some extra lines below this to output it to an html file.
wf_df = df[df['Year'] == 2019].groupby(['MonthName'])['ViolFine'].sum().reset_index(name='TotalFines')
wf_df['Budget'] = 4.6e6
wf_df['Deviation'] = wf_df.TotalFines - wf_df.Budget
wf_df.loc[wf_df.index.max()+1] = ['Total',
wf_df.TotalFines.sum(),
wf_df.Budget.sum(),
wf_df.TotalFines.sum() - wf_df.Budget.sum()]
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Total']
wf_df.MonthName = pd.Categorical(wf_df.MonthName, categories = months, ordered = True)
wf_df.sort_values(by='MonthName', inplace=True)
wf_df.reset_index(inplace = True, drop = True)
import plotly.graph_objects as go
if wf_df.loc[12, 'Deviation'] > 0:
end_color = 'black'
elif wf_df.loc[12, 'Deviation'] < 0:
end_color = 'red'
else: end_color = 'blue'
fig = go.Figure( go.Waterfall( name='', orientation = 'v', x = wf_df['MonthName'], textposition='outside',
measure = ['relative', 'relative', 'relative', 'relative', 'relative', 'relative',
'relative', 'relative', 'relative', 'relative', 'relative', 'relative', 'total' ],
y = wf_df['Deviation']/1e6,
text = ['${:.2f}M'.format(each/1e6) for each in wf_df['TotalFines']],
decreasing = {'marker':{'color':'red'}},
increasing = {'marker':{'color':'green'}},
totals = {'marker':{'color': end_color}},
hovertemplate = 'Cumulative Deviation to Date: ' + '$%{y:,.2f}M' + '<br>' +
'Total Fines in %{x}: %{text}'))
fig.layout = go.Layout(yaxis=dict(tickformat='.1f'))
fig.update_xaxes(title_text='Months', title_font = {'size': 18})
fig.update_yaxes(title_text='Total Fines (Running Total $M)', title_font = {'size':18},
dtick=0.5, tickprefix = '$', ticksuffix = 'M', zeroline=True )
fig.update_layout(title = dict( text='Deviation between Actual and Budgeted Monthly Fines in 2019 (Waterfall Diagram)<br>' +
'Surpluses appear in Green, Deficits appear in Red',
font = dict( family='Arial', size=18, color='black' )),
template='simple_white',
title_x = 0.5,
showlegend = False,
autosize=True,
margin=dict(l=30, r=30, t=60, b=30)
)
#fig.show()
import plotly.io as pio
pio.write_html(fig, path+"plotly_result.html", auto_open=False)
Bump chart.
bump_df = df.groupby(['Year', 'MonthName'])['ViolFine'].sum().reset_index(name='TotalFines')
bump_df = bump_df.pivot(index='Year', columns='MonthName', values = 'TotalFines')
month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
bump_df = bump_df.reindex(columns=month_order)
bump_df = bump_df.dropna()
bump_df_ranked = bump_df.rank(0, ascending=False, method='min')
bump_df_ranked = bump_df_ranked.T
fig = plt.figure(figsize=(20, 12))
ax = fig.add_subplot(1, 1, 1)
bump_df_ranked.plot(kind='line', ax=ax, marker='o', markeredgewidth=1, linewidth=6,
markersize=44,
markerfacecolor='white')
ax.invert_yaxis()
num_rows = bump_df_ranked.shape[0]
num_cols = bump_df_ranked.shape[1]
plt.ylabel('Monthly Ranking', fontsize=18, labelpad=10)
plt.title('Ranking of Total Fines by Month and by Year \n Bump Chart', fontsize=18, pad=15)
plt.xticks(np.arange(num_rows), month_order, fontsize=14)
plt.yticks(range(1, num_cols+1, 1), fontsize=14)
ax.set_xlabel('Month', fontsize=18)
handles, labels = ax.get_legend_handles_labels()
handles = [ handles[6], handles[5], handles[4], handles[3], handles[2], handles[1], handles[0] ]
labels = [ labels[6], labels[5], labels[4], labels[3], labels[2], labels[1], labels[0] ]
ax.legend(handles, labels, bbox_to_anchor=(1.01, 1.01), fontsize=14,
labelspacing = 1,
markerscale = .4,
borderpad = 1,
handletextpad = 0.8)
i = 0
j = 0
for eachcol in bump_df_ranked.columns:
for eachrow in bump_df_ranked.index:
this_rank = bump_df_ranked.iloc[i, j]
ax.text(i, this_rank, '$' + str(round(bump_df.iloc[j, i]/1e6,1)) + 'M', ha='center', va='center', fontsize=12)
i+=1
j+=1
i=0
plt.show()
Here is a caption at the end of my bump chart
We are now done with charts. Here is some general takeaways from my output.
You can add captions at the bottom of images. To add a caption, include the words fig.cap=“blah blah” inside the {….} at the top of the RMarkdown code you are using to include the image.
knitr::include_graphics("c:/Users/pptallon/Dropbox/G/Personal/Tallon005.jpg")
Courtesy of your favorite IT professor