Here is some general information on my data. Let’s now look at the results.
Here’s what I found in my bar chart.
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'c:/users/pptallon/Anaconda3/Library/plugins/platforms'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
path = "C:/Users/pptallon/Dropbox/G/Teaching/Data Visualization Data Files/Python_datafiles/"
filename = path + 'Baltimore_Traffic_Citations.csv'
df = pd.read_csv(filename, usecols = ['Tag', 'ViolFine', 'ViolDate'])
df.Tag.fillna("Not Available", inplace=True)
df['ViolDate'] = pd.to_datetime(df['ViolDate'], format = '%m/%d/%Y %I:%M:%S %p')
df['Hour'] = df.ViolDate.dt.hour
df['Day'] = df.ViolDate.dt.day
df['Month'] = df.ViolDate.dt.month
df['Year'] = df.ViolDate.dt.year
df['WeekDay'] = df.ViolDate.dt.strftime('%a')
df['MonthName'] = df.ViolDate.dt.strftime('%b')
x = df.groupby(['Tag']).agg({'Tag':['count'], 'ViolFine':['sum', 'mean']}).reset_index()
x.columns = ['Tag', 'Count', 'TotalFines', 'AverFine']
x = x.sort_values('Count', ascending=False)
PossibleBadTags = x['Tag'].str.contains('TAG|Not Available|Tag')
PossibleBadRows = x[ x['Tag'].str.contains('TAG|Not Available|Tag')]
KeepRows = 'SCRTAGT|LITAG8R|V1NTAGE|WTTAGW1|DATAGUY|TAG944|TAGDAT|TAG DAT|TAG ART'
deleteRows = PossibleBadRows[ -PossibleBadRows.Tag.str.contains(KeepRows) ]
a = deleteRows.Count.sum()
b = deleteRows.TotalFines.sum()
c = b/a
x = x[ -x['Tag'].isin(deleteRows.Tag) ]
x.loc[x.index.max()+1] = ['Missing', a, b, c]
x = x.sort_values('Count', ascending=False)
x.reset_index(inplace=True, drop=True)
def pick_colors_according_to_mean_count(this_data):
colors=[]
avg = this_data.Count.mean()
for each in this_data.Count:
if each > avg*1.01:
colors.append('lightcoral')
elif each < avg*0.99:
colors.append('green')
else:
colors.append('black')
return colors
import matplotlib.patches as mpatches
bottom1 = 1
top1 = 250
d1 = x.loc[bottom1:top1]
my_colors1 = pick_colors_according_to_mean_count(d1)
bottom2 = 1
top2 = 10
d2 = x.loc[bottom2:top2]
my_colors2 = pick_colors_according_to_mean_count(d2)
Above = mpatches.Patch(color='lightcoral', label='Above Average')
At = mpatches.Patch(color='black', label='Within 1% of the Average')
Below = mpatches.Patch(color='green', label='Below Average')
fig = plt.figure(figsize=(18, 16))
fig.suptitle('Frequency of Citation Analysis by Registration Tag:\n Top ' + str(top1) + ' and Top ' + str(top2),
fontsize=18, fontweight='bold')
ax1 = fig.add_subplot(2, 1, 1)
ax1.bar(d1.Tag, d1.Count, label='Count', color=my_colors1)
ax1.legend(handles=[Above, At, Below], fontsize=14)
plt.axhline(d1.Count.mean(), color='black', linestyle='dashed')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.axes.xaxis.set_visible(False)
ax1.set_title('Top ' + str(top1) + ' Citations', size=20)
ax1.text(top1-10, d1.Count.mean()+5, 'Mean = ' + str(d1.Count.mean()), rotation=0, fontsize=14 )
ax2 = fig.add_subplot(2, 1, 2)
ax2.bar(d2.Tag, d2.Count, label='Count', color=my_colors2)
ax2.legend(handles=[Above, At, Below], fontsize=14)
plt.axhline(d2.Count.mean(), color='black', linestyle='solid')
ax2.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.set_title('Top ' + str(top2) + ' Citations', size=20)
ax2.text(top2-1, d2.Count.mean()+5, 'Mean = ' + str(d2.Count.mean()), rotation=0, fontsize=14 )
fig.subplots_adjust(hspace = 0.35)
plt.show()
Here’s what I found in my next bar chart.
def autolabel(these_bars, this_ax, place_of_decimals, symbol):
for each_bar in these_bars:
height = each_bar.get_height()
this_ax.text(each_bar.get_x()+each_bar.get_width()/2, height*1.01, symbol+format(height, place_of_decimals),
fontsize=11, color='black', ha='center', va='bottom')
fig = plt.figure(figsize=(18, 10))
ax1 = fig.add_subplot(1, 1, 1)
ax2 = ax1.twinx()
bar_width = 0.4
x_pos = np.arange(10)
count_bars = ax1.bar(x_pos-(0.5*bar_width), d2.Count, bar_width, color='gray', edgecolor='black', label='Citation Count')
aver_fine_bars = ax2.bar(x_pos+(0.5*bar_width), d2.AverFine, bar_width, color='green', edgecolor='black', label='Average Fine')
ax1.set_xlabel('Vehicle Tag', fontsize=18)
ax1.set_ylabel('Count of Citations', fontsize=18, labelpad=20)
ax2.set_ylabel('Average Fine', fontsize=18, rotation=270, labelpad=20)
ax1.tick_params(axis='y', labelsize=14)
ax2.tick_params(axis='y', labelsize=14)
plt.title('Citation Count and Average Fine Analysis\n Top 10 Most Frequently Cited Tags', fontsize=18)
ax1.set_xticks(x_pos)
ax1.set_xticklabels(d2.Tag, fontsize=14)
count_color, count_label = ax1.get_legend_handles_labels()
fine_color, fine_label = ax2.get_legend_handles_labels()
legend = ax1.legend(count_color + fine_color, count_label + fine_label, loc='upper left', frameon=True, ncol=1, shadow=True,
borderpad=1, fontsize=14)
ax1.set_ylim(0, d2.Count.max()*1.50)
autolabel(count_bars, ax1, '.0f', '')
autolabel(aver_fine_bars, ax2, '.2f', '$')
plt.show()
Here is what I found.
fine_df = df.groupby(['Hour', 'WeekDay'])['ViolFine'].sum().reset_index(name='TotalFines')
from matplotlib.ticker import FuncFormatter
fig = plt.figure(figsize = (18, 10))
ax = fig.add_subplot(1, 1, 1)
my_colors = {'Mon':'blue',
'Tue':'red',
'Wed':'green',
'Thu':'gray',
'Fri':'purple',
'Sat':'gold',
'Sun':'brown'}
for key, grp in fine_df.groupby(['WeekDay']):
grp.plot(ax=ax, kind='line', x='Hour', y ='TotalFines', color=my_colors[key], label=key, marker='8')
plt.title('Total Fines by Hour', fontsize=18)
ax.set_xlabel('Hour (24 Hour Interval)', fontsize=18)
ax.set_ylabel('Total Fines ($M)', fontsize=18, labelpad=20)
ax.tick_params(axis='x', labelsize=14, rotation=0)
ax.tick_params(axis='y', labelsize=14, rotation=0)
ax.set_xticks(np.arange(24))
handles, labels = ax.get_legend_handles_labels()
handles = [ handles[1], handles[5], handles[6], handles[4], handles[0], handles[2], handles[3] ]
labels = [ labels[1], labels[5], labels[6], labels[4], labels[0], labels[2], labels[3] ]
plt.legend(handles, labels, loc='best', fontsize=14, ncol=1)
ax.yaxis.set_major_formatter( FuncFormatter( lambda x, pos:('$%1.1fM')%(x*1e-6)))
plt.show()
I had to shrink the text sizes on this and the pie fig size to get it to look this way.
df['Quarter'] = 'Quarter ' + df.ViolDate.dt.quarter.astype('string')
pie_df = df.groupby(['Quarter', 'MonthName', 'Month'])['ViolFine'].sum().reset_index(name='TotalFines')
pie_df.sort_values(by=['Month'], inplace=True)
pie_df.reset_index(inplace=True, drop=True)
del pie_df['Month']
number_outside_colors = len(pie_df.Quarter.unique())
outside_color_ref_number = np.arange(number_outside_colors)*4
number_inside_colors = len(pie_df.MonthName.unique())
all_color_ref_number = np.arange(number_outside_colors + number_inside_colors)
inside_color_ref_number = []
for each in all_color_ref_number:
if each not in outside_color_ref_number:
inside_color_ref_number.append(each)
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(1, 1, 1)
colormap = plt.get_cmap("tab20c")
outer_colors = colormap(outside_color_ref_number)
all_fines = pie_df.TotalFines.sum()
pie_df.groupby(['Quarter'])['TotalFines'].sum().plot(
kind='pie', radius=1, colors = outer_colors, pctdistance = 0.85, labeldistance = 1.1,
wedgeprops = dict(edgecolor='W'), textprops= {'fontsize':14},
autopct = lambda p: '{:.2f}%\n(${:.1f}M)'.format(p,(p/100)*all_fines/1e+6),
startangle=90)
inner_colors = colormap(inside_color_ref_number)
pie_df.TotalFines.plot(
kind='pie', radius=0.7, colors = inner_colors, pctdistance = 0.55, labeldistance = 0.8,
wedgeprops = dict(edgecolor='W'), textprops= {'fontsize':12},
labels = pie_df.MonthName,
autopct = '%1.2f%%',
startangle=90)
hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.yaxis.set_visible(False)
plt.title('Total Fines by Quarter and Month', fontsize=14)
ax.text(0, 0, 'Total Fines\n' + '$' + str(round(all_fines/1e6,2)) + 'M', size=14, ha='center', va='center' )
ax.axis('equal')
plt.tight_layout()
plt.show()
To include this plotly chart, I had to output it as an HTML file and then read it immediately back in. I commented out the line for plt.show() and added some extra lines below this to output it to an html file.
wf_df = df[df['Year'] == 2019].groupby(['MonthName'])['ViolFine'].sum().reset_index(name='TotalFines')
wf_df['Budget'] = 4.6e6
wf_df['Deviation'] = wf_df.TotalFines - wf_df.Budget
wf_df.loc[wf_df.index.max()+1] = ['Total',
wf_df.TotalFines.sum(),
wf_df.Budget.sum(),
wf_df.TotalFines.sum() - wf_df.Budget.sum()]
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Total']
wf_df.MonthName = pd.Categorical(wf_df.MonthName, categories = months, ordered = True)
wf_df.sort_values(by='MonthName', inplace=True)
wf_df.reset_index(inplace = True, drop = True)
import plotly.graph_objects as go
if wf_df.loc[12, 'Deviation'] > 0:
end_color = 'black'
elif wf_df.loc[12, 'Deviation'] < 0:
end_color = 'red'
else: end_color = 'blue'
fig = go.Figure( go.Waterfall( name='', orientation = 'v', x = wf_df['MonthName'], textposition='outside',
measure = ['relative', 'relative', 'relative', 'relative', 'relative', 'relative',
'relative', 'relative', 'relative', 'relative', 'relative', 'relative', 'total' ],
y = wf_df['Deviation']/1e6,
text = ['${:.2f}M'.format(each/1e6) for each in wf_df['TotalFines']],
decreasing = {'marker':{'color':'red'}},
increasing = {'marker':{'color':'green'}},
totals = {'marker':{'color': end_color}},
hovertemplate = 'Cumulative Deviation to Date: ' + '$%{y:,.2f}M' + '<br>' +
'Total Fines in %{x}: %{text}'))
fig.layout = go.Layout(yaxis=dict(tickformat='.1f'))
fig.update_xaxes(title_text='Months', title_font = {'size': 18})
fig.update_yaxes(title_text='Total Fines (Running Total $M)', title_font = {'size':18},
dtick=0.5, tickprefix = '$', ticksuffix = 'M', zeroline=True )
fig.update_layout(title = dict( text='Deviation between Actual and Budgeted Monthly Fines in 2019 (Waterfall Diagram)<br>' +
'Surpluses appear in Green, Deficits appear in Red',
font = dict( family='Arial', size=18, color='black' )),
template='simple_white',
title_x = 0.5,
showlegend = False,
autosize=True,
margin=dict(l=30, r=30, t=60, b=30)
)
#fig.show()
import plotly.io as pio
pio.write_html(fig, path+"plotly_result.html", auto_open=False)
This looks nice too.
bump_df = df.groupby(['Year', 'MonthName'])['ViolFine'].sum().reset_index(name='TotalFines')
bump_df = bump_df.pivot(index='Year', columns='MonthName', values = 'TotalFines')
month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
bump_df = bump_df.reindex(columns=month_order)
bump_df = bump_df.dropna()
bump_df_ranked = bump_df.rank(0, ascending=False, method='min')
bump_df_ranked = bump_df_ranked.T
fig = plt.figure(figsize=(20, 12))
ax = fig.add_subplot(1, 1, 1)
bump_df_ranked.plot(kind='line', ax=ax, marker='o', markeredgewidth=1, linewidth=6,
markersize=44,
markerfacecolor='white')
ax.invert_yaxis()
num_rows = bump_df_ranked.shape[0]
num_cols = bump_df_ranked.shape[1]
plt.ylabel('Monthly Ranking', fontsize=18, labelpad=10)
plt.title('Ranking of Total Fines by Month and by Year \n Bump Chart', fontsize=18, pad=15)
plt.xticks(np.arange(num_rows), month_order, fontsize=14)
plt.yticks(range(1, num_cols+1, 1), fontsize=14)
ax.set_xlabel('Month', fontsize=18)
handles, labels = ax.get_legend_handles_labels()
handles = [ handles[6], handles[5], handles[4], handles[3], handles[2], handles[1], handles[0] ]
labels = [ labels[6], labels[5], labels[4], labels[3], labels[2], labels[1], labels[0] ]
ax.legend(handles, labels, bbox_to_anchor=(1.01, 1.01), fontsize=14,
labelspacing = 1,
markerscale = .4,
borderpad = 1,
handletextpad = 0.8)
i = 0
j = 0
for eachcol in bump_df_ranked.columns:
for eachrow in bump_df_ranked.index:
this_rank = bump_df_ranked.iloc[i, j]
ax.text(i, this_rank, '$' + str(round(bump_df.iloc[j, i]/1e6,1)) + 'M', ha='center', va='center', fontsize=12)
i+=1
j+=1
i=0
plt.show()
Here is a caption at the end of my bump chart
You can add captions at the bottom of images. To add a caption, include the words fig.cap=“blah blah” inside the {….} at the top of the RMarkdown code you are using to include the image.
knitr::include_graphics("c:/Users/pptallon/Dropbox/G/Personal/Tallon005.jpg")
Courtesy of your favorite IT professor