This is publicly available information on actual loans issued through Lending Club’s peer to peer loan platform. In this model Lending Club acts as a broker between two private parties to help issue loans. This data looks at information on loans issued, including:
This chart shows the distribution of loans issued versus the
recipient’s annual income. There is a positive relationship between
income and loan amount, where as as income increases the average loan
amount increases as well.
The points on the graph are color
coded to display the monthly installment amount paid for each loan. As
expected, when the loan amount goes up, the installment amount is
generally higher as well.
import wget
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import math
path = "C:/Users/samco/OneDrive/Documents/Loyola/Data Visualization/Python Data/"
filename = "loan_data.csv"
df = pd.read_csv(path + filename, nrows=5)
df = pd.read_csv(path + filename, usecols = ['loan_amnt', 'funded_amnt_inv', 'term', 'int_rate', 'installment', 'grade', 'sub_grade', 'home_ownership', 'annual_inc', 'issue_d', 'addr_state', 'open_acc', 'revol_bal', 'revol_util', 'total_pymnt'])
df = df[df['annual_inc'].notna() & df['open_acc'].notna() & df['revol_util'].notna() ]
df2 = df[df['annual_inc'] < 250000]
df2['int_rate'] = df2['int_rate'].str.rstrip('%').astype('float')
plt.figure(figsize=(18,10))
plt.scatter(df2['loan_amnt'], df2['annual_inc'], marker='.', cmap='plasma', c=df2['installment'])
plt.title('Loan Amount by Annual Income', fontsize=20)
cbar = plt.colorbar()
cbar.set_label('Installment Amount', rotation=270, fontsize=15, color='black', labelpad=30)
plt.xlabel('Loan Amount', fontsize=15)
plt.ylabel('Annual Income', fontsize=15)
plt.show()
As seen in this bar chart the majority of loan recipients have a
credit rating of B, followed by A, C, and D respectively.
The
average number of loans issued across each Credit Rating is 5,986, which
is brought down significantly by the low ratings on the right side of
the graph.
The second chart below shows the inverse relationship
between the recipient’s credit rating the and rising interest rate they
recieve as the credit rating goes down.
x1 = df2.groupby(['sub_grade']).agg({'sub_grade':['count'], 'loan_amnt':['sum', 'mean']}).reset_index()
x1.columns = ['SubGrade','Count', 'TotalLoans', 'AverLoans']
x1 = x1.sort_values('Count', ascending=False)
x2 = df2.groupby(['grade']).agg({'sub_grade':['count'], 'loan_amnt':['sum', 'mean'],'int_rate':['mean']}).reset_index()
x3 = df2.groupby(['grade']).agg({'sub_grade':['count'], 'loan_amnt':['sum', 'mean'], 'int_rate':['mean']}).reset_index()
x2.columns = ['Grade','Count', 'TotalLoans', 'AverLoans', 'AverIntRate']
def colors_by_mean(this_data):
colors=[]
avg = this_data.Count.mean()
for each in this_data.Count:
if each > avg*1.01:
colors.append('seagreen')
elif each < avg*0.99:
colors.append('bisque')
else:
colors.append('seagreen')
return colors
import matplotlib.patches as mpatches
top1 = 6
my_colors1 = colors_by_mean(x2)
fig = plt.figure(figsize=(18,16))
Above = mpatches.Patch(color='seagreen', label='Above Average')
Below = mpatches.Patch(color='bisque', label='Below Average')
ax1 = fig.add_subplot(2,1,1)
ax1.bar(x2.Grade, x2.Count, label='Count', color=my_colors1)
#ax1.legend(fontsize=15)
ax1.legend(handles=[Above, Below], fontsize=15)
plt.axhline(x2.Count.mean(), color='black', linestyle='dashed')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.set_title('Count of Loans by Credit Rating', size=20)
ax1.text(top1-1, x2.Count.mean()+300, 'Mean = ' + str(x2.Count.mean()))
ax1.set_xlabel('Customer Credit Rating', fontsize=18)
ax1.set_ylabel('Count of Loans', fontsize=18, labelpad=10)
ax1.yaxis.set_major_formatter(('{x:,.0f}'))
plt.show()
x3.columns = ['Grade','Count', 'TotalLoans', 'AverLoans', 'AverIntRate']
def autolabel(these_bars, this_ax, place_of_decimals, symbol):
for each_bar in these_bars:
height = each_bar.get_height()
this_ax.text(each_bar.get_x()+each_bar.get_width()/2, height*1.01, format(height, place_of_decimals)+symbol,
fontsize = 12, color='black', ha='center', va='bottom')
fig = plt.figure(figsize=(18,10))
ax1 = fig.add_subplot(1,1,1)
ax2 = ax1.twinx()
bar_width = .4
x_pos = np.arange(7)
count_bars = ax1.bar(x_pos-(0.5*bar_width), x2.Count, bar_width, color='seagreen', edgecolor='black', label='Loan Count')
aver_rate_bars = ax2.bar(x_pos+(0.5*bar_width), x2.AverIntRate, bar_width, color='bisque', edgecolor='black', label='Average Interest Rate')
ax1.set_xlabel('Customer Credit Rating', fontsize=18)
ax1.set_ylabel('Count of Loans', fontsize=18, labelpad=10)
ax2.set_ylabel('Average Interest Rate', fontsize=18, rotation=270, labelpad = 30)
ax1.tick_params(axis='y', labelsize=14)
ax2.tick_params(axis='y', labelsize=14)
plt.title('Loan Count and Average Interest Rate', fontsize=20)
ax1.set_xticks(x_pos)
ax1.set_xticklabels(x2.Grade, fontsize=18)
count_color, count_label = ax1.get_legend_handles_labels()
rate_color, rate_label = ax2.get_legend_handles_labels()
legend = ax1.legend(count_color + rate_color, count_label + rate_label, loc = 'upper center', frameon=True, ncol=1, shadow=True, borderpad=1, fontsize=14)
autolabel(count_bars, ax1, ',.0f', '')
autolabel(aver_rate_bars, ax2, '.2f', '%')
ax1.yaxis.set_major_formatter(('{x:,.0f}'))
ax2.yaxis.set_major_formatter(('{x:,.1f}' + "%"))
plt.show()
This line plot illustrates the total number of loans issued throughout the year, organized into the 7 different credit ratings.
df2['issue_d'] = pd.to_datetime(df2['issue_d'], format = '%m/%d/%Y')
df2['Year'] = df2['issue_d'].dt.year
df2['Quarter'] = df2['issue_d'].dt.quarter
df2['Month'] = df2['issue_d'].dt.month
df2['Day'] = df2['issue_d'].dt.day
df2['DayOfYear'] = df2['issue_d'].dt.dayofyear
df2['MonthName'] = df2['issue_d'].dt.strftime('%b')
df2['WeekDay'] = df2['issue_d'].dt.weekday
df2['DayName'] = df2['issue_d'].dt.strftime('%a')
loan_df = df2.groupby(['Month', 'DayName'])['loan_amnt'].sum().reset_index(name='TotalLoans')
month_df = df2.groupby(['MonthName', 'grade'])['loan_amnt'].count().reset_index(name='TotalLoans')
fig = plt.figure(figsize = (18,10))
ax = fig.add_subplot(1,1,1)
field = 'MonthName'
month_order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
month_df.MonthName = pd.Categorical(month_df.MonthName, categories = month_order, ordered = True)
month_df.sort_values(by='MonthName', inplace=True)
month_df.reset_index(inplace = True, drop = True)
my_colors = {'A':'darkblue',
'B':'crimson',
'C':'darkgreen',
'D':'teal',
'E':'darkorange',
'F':'red',
'G':'black',}
for key, grp in month_df.groupby(['grade']):
grp.plot(ax=ax, kind='line', x ='MonthName', y='TotalLoans', color=my_colors[key], label=key, marker='8')
plt.title('Loans issued by Grade per Month', fontsize=18)
ax.set_xlabel('Month', fontsize=16)
ax.set_ylabel('Loans Issued', fontsize=16)
ax.tick_params(axis='x', labelsize=14, rotation=45)
ax.tick_params(axis='y', labelsize=14, rotation=0)
ax.yaxis.set_major_formatter(('{x:,.0f}'))
ax.set_xticks(np.arange(12))
plt.show()
month_sum_df = df2.groupby(['MonthName', 'grade'])['loan_amnt'].sum().reset_index(name='TotalLoans')
month_order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
month_sum_df = month_sum_df.pivot(index='grade', columns='MonthName', values='TotalLoans')
month_sum_df = month_sum_df.reindex(columns=(month_order))
fig = plt.figure(figsize=(18,10))
ax = fig.add_subplot(1,1,1)
month_sum_df.T.plot(kind='bar', stacked=True, ax=ax)
plt.title('Total Loans Issued by Grade and Month \n Stacked Bar Plot', fontsize=16)
plt.ylabel('Total Loan Amount Issued', fontsize=14, labelpad=17)
plt.xticks(rotation=45, horizontalalignment = 'center', fontsize = 14)
plt.yticks(fontsize = 14)
ax.set_xlabel('Month Name', fontsize = 14, labelpad=5)
ax.yaxis.set_major_formatter(FuncFormatter(lambda x,pos: ('$%1.1fM')%(x*1e-6)))
plt.show()
The pie chart breaks down the living situation of the loan recipient, whether they rent, have a mortgage, or outright own their home.
pie_df = df2.groupby(['home_ownership'])['loan_amnt'].count().reset_index(name='TotalLoans')
number_colors = len(pie_df.home_ownership.unique())
color_ref_number = np.arange(number_colors)*3
print(color_ref_number)
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(1,1,1)
colormap = plt.get_cmap("tab20c")
mycolors = colormap(color_ref_number)
total_loans = pie_df.TotalLoans.sum()
pie_df.groupby(['home_ownership'])['TotalLoans'].sum().plot(
kind = 'pie', radius = 1, colors = mycolors, pctdistance = .70, labeldistance = 1.1,
wedgeprops = dict(edgecolor = 'white'),
textprops = {'fontsize':16},
autopct = lambda p: '{:.2f}%\n({:,.0f})'.format(p,(p/100)*total_loans),
startangle = 90)
hole = plt.Circle((0,0), 0.4, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.text(0,0, 'Total Loans by Home Type\n' + str(('{:,}'.format(total_loans))), ha='center', va='center', fontsize = 14)
ax.axis('equal')
plt.tight_layout()
ax.yaxis.set_visible(False)
plt.title("Loan Recipient Home Type", fontsize = 18, x=.5, y=.9)
plt.show()