# Load the reticulate package to support Python codelibrary(reticulate)# Determine if the data should be refreshed and pass response to Python codeif (params$refresh_data) {py_run_string('refresh_data = True')} else {py_run_string('refresh_data = False')}
Code
# Load Python libraries used in this analysisimport calendarimport foliumimport requestsimport textwrapimport geopandas as gpdimport numpy as npimport pandas as pdimport seaborn as snsimport branca.colormap as cmimport matplotlib.pyplot as pltimport plotly.graph_objects as gofrom datetime import datefrom matplotlib.colors import LogNormfrom matplotlib.ticker import FuncFormatter
Overview
The Montgomery County, Maryland Government is involved in a variety of contracts with vendors, including for-profit and non-profit organizations. This analysis examines the procurement activity of the County Government from several different perspectives. The data used in this analysis is sourced from the Montgomery County Open Data Portal, which provides access to a variety of datasets related to County Government activities.
and includes information regarding the contracts issued by the County Government, including the date of issue, the department issuing the contract, the contract administrator, the vendor, and the contract type. However, no contract value information is provided in this dataset.
Four data analysis tabs are presented below, each focusing on a different aspect of the contract data:
Contract Volume: This tab examines the number of contracts issued by the County Government over time.
Department: This tab examines the number of contracts issued by the County Government by department and contract duration.
Administrator: This tab examines the number of contracts issued by the County Government by specific contract administrators and contract buyers.
Vendor: This tab examines the percentage of contracts issued by the County Government by vendor type (non-profit versus for-profit) for select contract types, and the quantity of contracts issued by states in which the vendors are located.
Code
# Decide where to get the datarefresh_data =False# If refresh is requested, download. Otherwise use stored fileif (refresh_data ==True):# Download the data from the API# Set the request parameters# Set the URL for the API endpoint url ='https://data.montgomerycountymd.gov/resource/mytb-swri.json'# There are more than 2,000 records, so we need to set a higher record limit using the $limit parameter dl_params = {'$limit': 5000 }# Make the HTTP request response = requests.get(url, params = dl_params)# Convert the response to JSON data = response.json()# Convert the JSON to a DataFrame df = pd.DataFrame(data)# Save the data to a CSV file df.to_csv('MC_Contracts.csv', index=False)else: df = pd.read_csv('MC_Contracts.csv')# Clean data# Keep only the columns we needdf = df[['buyerfirst','buyerlast', 'contracttype', 'vendorst','nonprofit', 'deptname', 'cadmin','execution','expiration']]# Shorten Department Name by removing "Department of"df['deptname'] = ( df['deptname'] .str .replace('Department of ', ''))# Expand the execution date parameter for additional analysisdf['execution'] = pd.to_datetime(df['execution'])# Create a new column for the yeardf['Year'] = df['execution'].dt.year# Create a new column for the monthdf['Month'] = df['execution'].dt.month# Create a new column for the quarterdf['Quarter'] = df['execution'].dt.quarter# Create a new column for the month abbreviationdf['Month_Abbr'] = df['Month'].apply(lambda x: calendar.month_abbr[x])# Convert the expiration date to a datetime objectdf['expiration'] = pd.to_datetime(df['expiration'])# Create a new column for the duration of the contracts in yearsdf['duration'] = ((df['expiration'] - df['execution']).dt.days) /365# Create a new column for the buyer's full name and then drop buyerfirst and buyerlastdf.loc[:, 'buyer'] = df['buyerfirst'] +' '+ df['buyerlast']df = df.drop(['buyerfirst', 'buyerlast'], axis=1)
The volume of contracts issued by the County Government is explored in this section.
The number of contracts issued per month for 2020 through the present is presented in Figure 1. Based on this graph, it appears that the number of contracts per month was lower in 2020 and 2021, perhaps due to COVID-19 restrictions limiting contracting activity, and higher in 2022 and beyond. Further, the number of contracts seemed to spike upward in the spring and summer of 2022, roughly corresponding to the easing of many COVID-19 related restrictions.
Code
# For the first graph, determine the number of contracts issued per selected timeframes# Select only the date columns for the graphdf_exec = df.loc[ : , ['execution', 'Year', 'Month']]# Find the number of contracts issued by year and month# Restrict analysis to contracts issued in 2020 and laterdf_exec = df_exec[df_exec['Year'] >=2020]# Filter out current partial monthdf_exec = df_exec[df_exec['execution'] < pd.to_datetime(date.today().replace(day =1))]# Count the number of contracts by year and monthdf_exec = df_exec.groupby(['Year', 'Month']).size().reset_index(name ='Count')# Pivot the data to prepare for plottingdf_pivot = df_exec.pivot( index ='Month', columns ='Year', values ='Count')# Sort by month to prepare for plottingdf_pivot = df_pivot.sort_values('Month', ascending =True)
Code
# Create the first plot# Set up the plotfig = plt.figure(figsize = (8, 6))ax = fig.add_subplot(1, 1, 1)# Plot the datadf_pivot.plot( kind ='line', ax = ax, marker ='8')# Set the subtitle, and axis labelsplt.title(f'(Data as of {date.today().strftime("%B %d, %Y")}, Partial Months Excluded)', fontsize =12, pad =0)ax.set_xlabel('Month', fontsize =14, labelpad =5)ax.set_ylabel('Contracts Issued', fontsize =14, labelpad =5)# Set the tick label formattingax.tick_params( axis ='x', labelsize =12, rotation =0)ax.tick_params( axis ='y', labelsize =12, rotation =0)# Show all 12 months numbered 1 through 12ax.set_xticks( np.arange(12) +1)# Set the x-axis labels to the month abbreviations instead of numbersax.xaxis.set_major_formatter( FuncFormatter(lambda x, pos: f'{calendar.month_abbr[x]}' ))handles, labels = ax.get_legend_handles_labels()handles = [ handles[4], handles[3], handles[2], handles[1], handles[0]]labels = [ labels[4], labels[3], labels[2], labels[1], labels[0]]# Format and add title to the legendax.legend( handles, labels, title ='Year', title_fontsize =12, fontsize =10, loc ='upper left')# # Add a reference line for the mean number of contracts issued per monthmean_contracts = df_exec['Count'].mean()ax.axhline( y = mean_contracts, color ='black', linestyle ='--')# Label the reference lineax.text(10, mean_contracts -5,f'Mean: {mean_contracts:.1f} Contracts', fontsize =8, color ='black')# Find the month and year for the maximum number of contracts issuedmax_contracts = df_exec[df_exec['Count'] == df_exec['Count'].max()]# Label the peak numer of contracts issuedax.annotate(f'{max_contracts.Count.values[0]} Contracts', xy = (max_contracts.Month.values[0], max_contracts.Count.values[0]), xytext = (max_contracts.Month.values[0] -3, max_contracts.Count.values[0] -10), arrowprops =dict(facecolor ='black', shrink =0.1), fontsize =8)# Show the plotplt.show();
Figure 1: Contracts by Month (2020 - 2024)
The analysis presented in Figure 1 is further supported by the analysis presented in Figure 2. This graph shows the deviation from the average number of contracts issued per quarter for the years 2020 through 2023. The average number of contracts issued per quarter is calculated and then subtracted from the actual number of contracts issued per quarter. The resulting deviation from the average is plotted in a waterfall chart, showing that the number of contracts issued per quarter was below average through Q1 of 2022, and has remained above average since Q2 of 2022.
Code
# Prepare the waterfall chart data - Keep only 2020 through 2023df_wf = df[(df.Year >=2020) & (df.Year <=2023)]# Count the number of contracts by year and quarterdf_wf = df_wf.groupby(['Year', 'Quarter']).size().reset_index(name ='Count')# Calculate the average number of contracts issued per quarteraverage_contracts = df_wf.Count.mean()# Calculate the deviation from the averagedf_wf['Average'] = average_contractsdf_wf['Difference'] = df_wf['Count'] - df_wf['Average']df_wf['Period'] = df_wf['Year'].astype(str) +'-Q'+ df_wf['Quarter'].astype(str)# Drop the Year and Quarter columns since they are no longer neededdf_wf = df_wf.drop(['Year', 'Quarter'], axis=1)# Reorder the columns by placing 'Period' at the startdf_wf = df_wf[['Period'] + [col for col in df_wf.columns if col !='Period']]
Code
# Create the waterfall chartfig = go.Figure( go.Waterfall( name ='Contracts Issued by Quarter', orientation ='v', x = df_wf['Period'], textposition ='auto', measure = ["relative", "relative","relative", "relative", "relative", "relative","relative", "relative", "relative","relative", "relative", "relative","relative", "relative", "relative","relative" ], y = df_wf['Difference'], text = ['{:,.0f}'.format(i) for i in df_wf['Count']], decreasing = {'marker': {'color': 'red'}}, increasing = {'marker': {'color': 'green'}}, hovertemplate ='Cumulative Deviation to Date: '+'%{y:,.0f}'+'<br>'+'Period Total for '+'%{x}: %{text}'+'<extra></extra>' ))# Format axes and other parts of graphfig.update_layout( xaxis =dict( title_text ='Period', title_font = {'size': 18}, title_standoff =10, tickangle =315 ), yaxis =dict( title_text ='Running Total Deviation from Average', title_font = {'size': 18}, title_standoff =10, dtick =50, tickformat ='.0f', tickfont = {'size': 14}, zeroline =True,range= [-350, 20] ), template ='simple_white', showlegend =False, autosize =True, margin =dict( l =50, r =50, b =50, t =50, pad =4 ))
Figure 2: Deviation from Average Contracts Issued by Quarter (2020 - 2023)
Code
# Display the plotfig.show();
This section analyzes information related to the department issuing the contract.
The first graph in this section, Figure 3, shows the number of contracts issued by the top 10 departments in the County Government. The graph shows that the Department of General Services issued the most contracts, followed by the Department of Health and Human Services and the Department of Transportation. The graph also shows the distribution of contract types issued by each department, with the majority of contracts being open solicitations.
Code
# Find the departments with the most contractstop_10_contracts = df.groupby(['deptname']).size().sort_values(ascending=False).reset_index(name='Count').head(10)# Find the most common contract typestop_5_types = ( df.groupby(['contracttype']) .size() .sort_values(ascending=False) .reset_index(name='Count') .head(5))# All remaining contract types will be grouped into the category 'Other'top_5_types = pd.concat([ top_5_types, pd.DataFrame([{'contracttype': 'Other', 'Count': df['contracttype'].size - top_5_types['Count'].sum() }]) ], ignore_index=True)# If a contract type is not in the top 5 types, change to 'Other' to prepare for plottingdf_contract = df.copy()df_contract['contracttype'] = np.where( df_contract['contracttype'].isin(top_5_types['contracttype']), df_contract['contracttype'], 'Other')# Find the number of contracts by department and contract typecontract_types = ( df_contract[df_contract['deptname'].isin(top_10_contracts['deptname'])] .groupby(['deptname', 'contracttype']) .size() .reset_index(name='Count'))# Determine the maximum number of contracts by Department for plottingmax_contracts = contract_types.groupby('deptname').Count.sum().max()# Pivot the data to prepare for plottingdf_pivot = ( contract_types .pivot(index ='deptname', columns='contracttype', values='Count') .fillna(0))df_pivot = df_pivot.reindex(columns=top_5_types['contracttype'].values)# Sort to prepare for plottingdf_pivot['Total'] = df_pivot.sum(axis=1)df_pivot = df_pivot.sort_values('Total', ascending=True)
Code
# Set up the plotfig = plt.figure(figsize = (6.75, 6))ax = fig.add_subplot(1, 1, 1)# Plot the data, excluding the Total column( df_pivot .loc[:, df_pivot.columns !='Total'] .plot( kind ='barh', stacked=True, ax=ax ))# Set the suptitle, title, and axis labels# plt.suptitle(# 'Contracts Issued by Department and Contract Type',# fontsize = 24# )plt.title('(Top 10 Departments by Contract Volume)', fontsize =12, pad =0)plt.xlabel('Contracts Issued', fontsize =14, labelpad =5)plt.xlim(0, max_contracts *1.1);plt.ylabel('Department', fontsize =14, labelpad =5)# Set the tick label formattingax.tick_params( axis ='x', labelsize =12, rotation =0)ax.tick_params( axis ='y', labelsize =10, rotation =0)# Wrap long labels on the y-axis to multiple linesax.set_yticklabels( [textwrap.fill(l.get_text(), 18) for l in ax.get_yticklabels()])# Format and add title to the legendlegend = ax.legend( prop = {'size': 10})legend.set_title('Contract Type', prop = {'size': 12})# Add the total number of contracts to the right of each barfor i inrange(len(df_pivot)): ax.text( df_pivot['Total'].iloc[i] +5, i, f'{df_pivot["Total"].iloc[i]:.0f}', va ='center', fontsize =10 )plt.tight_layout()# Show the plotplt.show();
Figure 3: Contracts Issued by Department and Type
The second graph in this section, Figure 4, shows the distribution of contract durations, with the top 5 departments based on contract volume pulled out individually. The graph shows that the Department of Health & Human Services has the longest median contract durations, and the Department of Recreation has the shortest median contract durations. The graph also shows many outliers in terms of duration, with some contracts lasting nearly 40 years.
Code
# Try a different graph than the ones covered in the video - boxplot# Find the departments with the longest total, mean, and median contract durationstop_contracts = df.groupby('deptname').duration.agg(['mean', 'median', 'count']).sort_values(by = ['median', 'mean'], ascending =False)# Keep only departments with more than 100 contracts in the data settop_contracts = top_contracts[top_contracts['count'] >100].reset_index(drop =False)# Keep only the columns we need for the graphdf_box = df[['deptname', 'duration']].copy()# Convert departments not in top_contracts to 'All Other' to prepare for plottingdf_box['deptname'] = np.where( df_box['deptname'].isin(top_contracts['deptname']), df_box['deptname'], 'All Other')# Rerun top_contracts analysis including department 'Other'top_contracts = df_box.groupby('deptname').duration.agg(['mean', 'median']).sort_values(by = ['median', 'mean'], ascending =False).reset_index(drop =False)# Move row for 'All Other' to the end of the DataFrame for plot orderingrow_to_move = top_contracts[top_contracts['deptname'] =='All Other']top_contracts = top_contracts[top_contracts['deptname'] !='All Other']top_contracts = pd.concat([top_contracts, row_to_move], ignore_index =True)
Code
# Set up the plotbox = sns.boxplot( x ='duration', y ='deptname', hue ='deptname', data = df_box, orient ='h', order = top_contracts['deptname'].tolist())# Set subtitle and axis labelsbox.set_title('(Top 5 Departments by Contract Volume,\nAll Other Departments Combined)', fontsize =12, pad =5)box.set_xlabel('Contract Duration (Years)', fontsize =14, labelpad =5)box.set_ylabel('Department', fontsize =14, labelpad =5)box.set_yticklabels( [textwrap.fill(l.get_text(), 18) for l in box.get_yticklabels()])# Show the plotplt.tight_layout()plt.show();
Figure 4: Distribution of Contract Durations by Department
The first graph in this section, Figure 5, explores with contract administrator issued the most contracts in 2023. The bump chart shows the rank of the cumulative total of contracts issued per contract administrator by month for the top 6 contract administrators by volume. The graph shows how the rank of each administrator changes from month to month. Based on this chart, it is clear that Allison Cohen, by far, issued the most contracts in 2023.
Code
# Find the top 6 contract administrators by volume for 2023top_admin = df[df.Year ==2023].groupby(['cadmin']).size().sort_values(ascending=False).reset_index(name='Count').head(6)# Find the number of contracts issued by month for the top administratorsbump_df = df[df.Year ==2023].groupby(['Month_Abbr', 'cadmin']).size().reset_index(name ='Count').sort_values(by ='Month_Abbr', ascending =True)# Keep only the top 6 administrators by volumebump_df = bump_df[bump_df['cadmin'].isin(top_admin['cadmin'])].reset_index(drop =True)# Reorder the months to prepare for plottingmonth_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']bump_df_pivot = bump_df.pivot_table(index='cadmin', columns='Month_Abbr', values='Count', fill_value=0)bump_df_pivot = bump_df_pivot.reindex(columns = month_order)# Find the cumulative total of contracts issued by month for each administratorbump_df_pivot_cumulative = bump_df_pivot.cumsum(axis=1)# Find the month-by-month rankings of cumulative totalsbump_df_rank = bump_df_pivot_cumulative.rank(axis =0, method ='first', ascending =False)bump_df_rank = bump_df_rank.T
Code
# Set up the plotfig = plt.figure(figsize = (8, 6))ax = fig.add_subplot(1, 1, 1)# Create a bump chart from the rank databump_df_rank.plot( kind ='line', ax = ax, marker ='o', markersize =24, markeredgewidth =3, linewidth =3, markerfacecolor ='white')ax.invert_yaxis()# Find the number of rows and columns in the datan_rows = bump_df_rank.shape[0]n_cols = bump_df_rank.shape[1]# Set the subtitle, title, and axis labelsplt.title('(Top Contract Administrators by Volume,\nCumulative Total Displayed on Each Data Point)', fontsize =12, pad =0)plt.xlabel('Month', fontsize =14, fontweight ='bold', labelpad =5)plt.ylabel('Monthly Rank', fontsize =14, fontweight ='bold', labelpad =5)plt.xticks( ticks = np.arange(n_rows), labels = month_order, fontsize =12);plt.yticks( ticks = np.arange(1, n_cols +1), labels = np.arange(1, n_cols +1), fontsize =12);# Order the legend by rank of contract administratori =0ca_rank =dict()for eachcol in bump_df_rank.columns: this_rank = bump_df_rank.iloc[11, i] ca_rank[this_rank] = i i +=1handles, labels = ax.get_legend_handles_labels()handles = [ handles[ca_rank[1]], handles[ca_rank[2]], handles[ca_rank[3]], handles[ca_rank[4]], handles[ca_rank[5]], handles[ca_rank[6]],]labels = [ labels[ca_rank[1]], labels[ca_rank[2]], labels[ca_rank[3]], labels[ca_rank[4]], labels[ca_rank[5]], labels[ca_rank[6]]]ax.legend( handles, labels, title ='Administrator', title_fontsize =10, fontsize =9, loc ='upper right', bbox_to_anchor = (1.35, 1),# labelspacing = 1, markerscale =0.3,# borderpad = 1,# handletextpad = 0.8)# Fill in the data points with the cumulative totalsi =0j =0for eachcol in bump_df_rank.columns:for eachrow in bump_df_rank.index: this_rank = bump_df_rank.iloc[i, j] ax.text( i, this_rank,f'{bump_df_pivot_cumulative.iloc[j, i]: .0f}', ha ='center', va ='center', fontsize =9 ) i +=1 j +=1 i =0# Show the plotplt.tight_layout()plt.show();
Figure 5: Rank of Cumulative Total of Contracts Issued per Contract Administrator for 2023
The second graph in this section, Figure 6, explores how contract administrators and buyers are paired. The graph includes the top administrators and the top buyers based on contract volume. Based on the graph, it is clear that the top administrators and buyers primarily work as pairs, with a few exceptions.
Code
# Find the top 10 contract administrators and buyers by volumetop_cadmin = df.groupby('cadmin').size().sort_values(ascending=False).reset_index(name='Count').head(10)top_buyer = df.groupby('buyer').size().sort_values(ascending=False).reset_index(name='Count').head(10)# Keep only the top administrators and buyers by volumedf_heat = df[df['cadmin'].isin(top_cadmin['cadmin'])]df_heat = df_heat[df_heat['buyer'].isin(top_buyer['buyer'])]# Find the number of contracts issued by each administrator and buyer combinationdf_heat = df_heat.groupby(['cadmin', 'buyer']).size().sort_values(ascending=False).reset_index(name='Count')# Pivot the data to prepare for plottingdf_heat_pivot = df_heat.pivot_table(index ='cadmin', columns ='buyer', values ='Count', fill_value =0)# Create column and row totals for sortingdf_heat_pivot['Total'] = df_heat_pivot.sum(axis =1)df_heat_pivot.loc['Total'] = df_heat_pivot.sum(axis =0)# Sort the data to prepare for plottingdf_heat_pivot = df_heat_pivot.sort_values(by ='Total', ascending =False)df_heat_pivot = df_heat_pivot.sort_values(by ='Total',axis =1, ascending =False)# Drop the total columns and rowsdf_heat_pivot = df_heat_pivot.drop('Total', axis =1)df_heat_pivot = df_heat_pivot.drop('Total', axis =0)
Code
# Set up the plotfig = plt.figure(figsize = (8, 6))ax = fig.add_subplot(1, 1, 1)# Create the heatmapsns.heatmap( df_heat_pivot, ax = ax, norm = LogNorm(vmin =0.1, vmax = df_heat_pivot.values.max(), clip =True), linewidth =0.2, annot =True, cmap ='coolwarm', fmt =',.0f', square =True, annot_kws = {'size': 11}, cbar_kws = {'orientation': 'vertical'})# Add subtitle and axis labelsplt.title('(Top Administrators and Top Buyers\nby Contract Volume)', fontsize =12, pad =5)plt.xlabel('Buyer', fontsize =12, labelpad =5)plt.ylabel('Contract Administrator', fontsize =12, labelpad =5)# Using a log color scale due to the range of values to be plottedcbar = ax.collections[0].colorbarcbar_ticks = [1, 10, 100]cbar.set_ticks(cbar_ticks)cbar_labels = [i for i in cbar_ticks]cbar.set_ticklabels(cbar_labels)cbar.set_label('Number of Contracts', rotation =270, fontsize =14, color ='black', labelpad =10)# Show the plotplt.tight_layout()plt.show();
Figure 6: Heatmap of Administrator and Buyer Pairings by Contract Volume
The first graph in this section, Figure 7, explores the distribution of non-profit versus for-profit vendors based on contract type. The top three contract types for non-profit vendors, based on percentage, are displayed, along with the overall distribution of contracts by vendor type. The graph shows that the majority of contracts are awarded to for-profit vendors. Non-profit vendors receive the most contracts by percentage for non-competitive council resolutions, followed by other non-competitive contracts.
Code
# Determine the top 3 contract types for non-profit vendors by total contractsnonprofit_ct = df.groupby(['contracttype', 'nonprofit']).size().reset_index(name ='Count')nonprofit_ct = nonprofit_ct[nonprofit_ct['nonprofit'] =='Yes'].sort_values(by ='Count', ascending =False).head(3)# Also determine the overall number of non-profit and for-profit contractsoverall = df.groupby(['nonprofit']).size().reset_index(name ='Count')# Group all non-profit contracts together and all for-profit contracts togetherdf_np = df[df['contracttype'].isin(nonprofit_ct['contracttype'])]df_np = df_np.groupby(['contracttype', 'nonprofit']).size().reset_index(name ='Count')# Add the overall number of non-profit and for-profit contracts to the DataFramedf_np.loc[df_np.index.max() +1] = ['Overall', 'No', overall[overall.nonprofit =='No'].Count.values[0]]df_np.loc[df_np.index.max() +1] = ['Overall', 'Yes', overall[overall.nonprofit =='Yes'].Count.values[0]]# Sort the data to prepare for plottingdf_np = df_np.sort_values(by = ['nonprofit', 'Count'], ascending = [False, False]).reset_index(drop =True)# Rename from Yes / No to Non-Profit / For-Profitdf_np['type'] = np.where(df_np['nonprofit'] =='Yes', 'Non-Profit', 'For-Profit')# Calculate total contracts for each type for use in the pie charttotal_contracts = df_np.groupby(['contracttype']).Count.sum().reset_index(name ='Count')total_contracts = total_contracts.sort_values(by ='Count', ascending =False).reset_index(drop =True)# Sort by non-profit status for ordering of pie chartstop_contracts = df_np.groupby(['contracttype', 'nonprofit']).Count.sum().reset_index(name ='Count')top_contracts = top_contracts[top_contracts['nonprofit'] =='Non-Profit']top_contracts = top_contracts.sort_values(by ='Count', ascending =False).reset_index(drop =True)
Code
# Set up the pie chart colorsouter_colors = ['blue', 'darkorange']# Create a generic function for pie chart configurationdef pie_config(fig, ax, fig_loc): patches, texts, pcts = ax.pie( x = df_np[df_np['contracttype'] == df_np.loc[fig_loc, 'contracttype']].Count, labels = df_np[df_np['contracttype'] == df_np.loc[fig_loc, 'contracttype']].nonprofit, radius =1, labeldistance =None, pctdistance =0.8, autopct =lambda p: f'{p:.1f}%\n'+f'{p * total_contracts[total_contracts['contracttype'] == df_np.loc[fig_loc, 'contracttype']].Count.values[0] /100:.0f}', startangle =0, colors = outer_colors, wedgeprops =dict( width =0.4, edgecolor ='w' ), textprops =dict( fontsize =14, weight ='bold', color ='black' ) ) ax.yaxis.set_visible(False) ax.axis('equal')# Place the contract type in the center of the pie chart in bold ax.text(0, 0, f'{textwrap.fill(total_contracts[total_contracts['contracttype'] == df_np.loc[fig_loc, 'contracttype']].contracttype.values[0], 20)}\n', ha ='center', va ='center', fontsize =12, color ='black', weight ='bold' )# Place the total number of contracts in the center of the pie chart.# This text is placed below the contract type to avoid overlap, and is not# bolded to differentiate from the contract type. ax.text(0, -0.2, f'{total_contracts[total_contracts['contracttype'] == df_np.loc[fig_loc, 'contracttype']].Count.values[0]:.0f} Contracts', ha ='center', va ='center', fontsize =12, color ='black',# weight = 'bold' ) plt.setp(pcts, fontsize =10, color ='white') plt.tight_layout()return fig, ax
Code
# Set up the plotfig = plt.figure(figsize=(7.5, 7.5))# Create the first pie chart using the generic functionax1 = fig.add_subplot(2, 2, 1)fig, ax1 = pie_config(fig, ax1, 0)# Create the second pie chart using the generic functionax2 = fig.add_subplot(2, 2, 2)fig, ax2 = pie_config(fig, ax2, 1)# Create the third pie chart using the generic functionax3 = fig.add_subplot(2, 2, 3)fig, ax3 = pie_config(fig, ax3, 2)# Create the fourth pie chart using the generic functionax4 = fig.add_subplot(2, 2, 4)fig, ax4 = pie_config(fig, ax4, 3)# Create a legend in the middle for the entire figure to distinguish Non-Profit and For-Profithandles_labels = [ax.get_legend_handles_labels() for ax in fig.axes]handles, labels = [sum(i, []) for i inzip(*handles_labels)]fig.legend( handles = [handles[0], handles[1]], labels = ['Non-Profit', 'For-Profit'], title ='Vendor Type', loc ='center', prop = {'size': 14})# Show the plotplt.show();
Figure 7: Non-Profit vs. For-Profit Contract Recipients Overall and by Top Contract Types
The second graph in this section, Figure 8, shows the number of contracts to vendors by state. The graph shows that the majority of contracts are issued in Maryland, but several other states capture a significant number of contracts, including the surrounding areas of DC, Virginia, and Pennsylvania.
Code
# Prepare the data for mappingdf_state = df.groupby('vendorst').size().reset_index(name ='Count')# Load the shapefile for the US map provided by the US Census Bureauusmap_gdf = gpd.read_file('./cb_2018_us_state_500k/cb_2018_us_state_500k.shp')usmap_gdf = usmap_gdf.merge(df_state, left_on='STUSPS', right_on='vendorst', how='left').fillna(0)usmap_json = usmap_gdf.to_json(drop_id=True)# Create the color scale used for the maptick_list = [*range(200, df_state['Count'].max(), 200)]tick_list = [str(element) for element in tick_list]# The map appears to have some limitations regaarding color bars, so this is a work-around.# The color scale is set to be non-linear to better represent the distribution of the# number of contracts by state. However, when using this non-linear scale, the color bar, # as presented, does not show the all colors effectively. Thus, I am creating a second,# alternate color bar that better shows the distribution of colors.tick_list2 = [0, df_state['Count'].max(),]tick_list2 = [str(element) for element in tick_list2]# Create the non-lineaer color scale for the mapnonlinear = cm.LinearColormap( ['darkblue', 'blue', 'cyan', 'yellow', 'orange', 'red'], index = [0, df_state['Count'].quantile(0.2), df_state['Count'].quantile(0.4), df_state['Count'].quantile(0.6), df_state['Count'].quantile(0.8), # df_state['Count'].quantile(0.1), df_state['Count'].max() ], tick_labels = tick_list, vmin = df_state['Count'].min(), vmax = df_state['Count'].max(),)# Also create the alternate color scale for display in the maplinear = cm.LinearColormap( ['darkblue', 'blue', 'cyan', 'yellow', 'orange', 'red'], tick_labels = tick_list2, vmin = df_state['Count'].min(), vmax = df_state['Count'].max(), caption ='Number of Contracts Issued (Non-Linear Scale)')
Code
# Set up the mapm = folium.Map( location = [39.5, -97.35], zoom_start =4, tiles ='cartodbpositron',)# Prepare the cloropleth mapcp_m = folium.GeoJson( usmap_json, style_function =lambda x: {'fillColor': nonlinear(x['properties']['Count']),'color': 'black','weight': 2,'fillOpacity': 0.7, }, highlight_function =lambda x: {'color': 'red','weight': 2,'fillOpacity': 1 }).add_to(m);# Add the alternate color scale to the maplinear.add_to(m);# Display State Name and Contract Count on Hovercp_m.add_child( folium.features.GeoJsonTooltip( fields = ['NAME', 'Count'], aliases = ['State: ', 'Contracts: '], labels =True, style = ('background-color: black; color: white;') ));# Add the title to the mapmap_title ="Figure 8: Number of Contracts Issued to Vendors by State"title_html =f'<h1 align="left" style="font-size:16px">{map_title}</h1>'m.get_root().html.add_child(folium.Element(title_html));# save the map to an HTML file and then reload below for viewingm.save('MC_Contracts_Choropleth.html');