Lending Club: A look into peer-to-peer lending

This is publicly available information on actual loans issued through Lending Club’s peer to peer loan platform. In this model Lending Club acts as a broker between two private parties to help issue loans. This data looks at information on loans issued, including:

Loans Issued by Annual Income

This chart shows the distribution of loans issued versus the recipient’s annual income. There is a positive relationship between income and loan amount, where as as income increases the average loan amount increases as well.

The points on the graph are color coded to display the monthly installment amount paid for each loan. As expected, when the loan amount goes up, the installment amount is generally higher as well.


import wget
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import math

path = "C:/Users/samco/OneDrive/Documents/Loyola/Data Visualization/Python Data/"

filename = "loan_data.csv"
df = pd.read_csv(path + filename, nrows=5)

df = pd.read_csv(path + filename, usecols = ['loan_amnt', 'funded_amnt_inv', 'term', 'int_rate', 'installment', 'grade', 'sub_grade', 'home_ownership', 'annual_inc', 'issue_d', 'addr_state', 'open_acc', 'revol_bal', 'revol_util', 'total_pymnt'])

df = df[df['annual_inc'].notna() & df['open_acc'].notna() & df['revol_util'].notna()    ]

df2 = df[df['annual_inc'] < 250000]

df2['int_rate'] = df2['int_rate'].str.rstrip('%').astype('float')
plt.figure(figsize=(18,10))
plt.scatter(df2['loan_amnt'], df2['annual_inc'], marker='.', cmap='plasma', c=df2['installment'])
plt.title('Loan Amount by Annual Income', fontsize=20)

cbar = plt.colorbar()
cbar.set_label('Installment Amount', rotation=270, fontsize=15, color='black', labelpad=30)

plt.xlabel('Loan Amount', fontsize=15)
plt.ylabel('Annual Income', fontsize=15)

plt.show()

Credit Rating Distribution

As seen in this bar chart the majority of loan recipients have a credit rating of B, followed by A, C, and D respectively.

The average number of loans issued across each Credit Rating is 5,986, which is brought down significantly by the low ratings on the right side of the graph.



The second chart below shows the inverse relationship between the recipient’s credit rating the and rising interest rate they recieve as the credit rating goes down.

x1 = df2.groupby(['sub_grade']).agg({'sub_grade':['count'], 'loan_amnt':['sum', 'mean']}).reset_index()
x1.columns = ['SubGrade','Count', 'TotalLoans', 'AverLoans']
x1 = x1.sort_values('Count', ascending=False)
x2 = df2.groupby(['grade']).agg({'sub_grade':['count'], 'loan_amnt':['sum', 'mean'],'int_rate':['mean']}).reset_index()
x3 = df2.groupby(['grade']).agg({'sub_grade':['count'], 'loan_amnt':['sum', 'mean'], 'int_rate':['mean']}).reset_index()
x2.columns = ['Grade','Count', 'TotalLoans', 'AverLoans', 'AverIntRate']

def colors_by_mean(this_data):
    colors=[]
    avg = this_data.Count.mean()
    for each in this_data.Count:
        if each > avg*1.01:
            colors.append('seagreen')
        elif each < avg*0.99:
            colors.append('bisque')
        else:
            colors.append('seagreen')
    return colors


import matplotlib.patches as mpatches

top1 = 6
my_colors1 = colors_by_mean(x2)
fig = plt.figure(figsize=(18,16))



Above = mpatches.Patch(color='seagreen', label='Above Average')
Below = mpatches.Patch(color='bisque', label='Below Average')

ax1 = fig.add_subplot(2,1,1)
ax1.bar(x2.Grade, x2.Count, label='Count', color=my_colors1)
#ax1.legend(fontsize=15)
ax1.legend(handles=[Above, Below], fontsize=15)
plt.axhline(x2.Count.mean(), color='black', linestyle='dashed')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.set_title('Count of Loans by Credit Rating', size=20)
ax1.text(top1-1, x2.Count.mean()+300, 'Mean = ' + str(x2.Count.mean()))

ax1.set_xlabel('Customer Credit Rating', fontsize=18)
ax1.set_ylabel('Count of Loans', fontsize=18, labelpad=10)
ax1.yaxis.set_major_formatter(('{x:,.0f}'))

plt.show()

x3.columns = ['Grade','Count', 'TotalLoans', 'AverLoans', 'AverIntRate']

def autolabel(these_bars, this_ax, place_of_decimals, symbol):
    for each_bar in these_bars:
        height = each_bar.get_height()
        this_ax.text(each_bar.get_x()+each_bar.get_width()/2, height*1.01, format(height, place_of_decimals)+symbol,
                    fontsize = 12, color='black', ha='center', va='bottom')

fig = plt.figure(figsize=(18,10))
ax1 = fig.add_subplot(1,1,1)
ax2 = ax1.twinx()
bar_width = .4

x_pos = np.arange(7)

count_bars = ax1.bar(x_pos-(0.5*bar_width), x2.Count, bar_width, color='seagreen', edgecolor='black', label='Loan Count')
aver_rate_bars = ax2.bar(x_pos+(0.5*bar_width), x2.AverIntRate, bar_width, color='bisque', edgecolor='black', label='Average Interest Rate')

ax1.set_xlabel('Customer Credit Rating', fontsize=18)
ax1.set_ylabel('Count of Loans', fontsize=18, labelpad=10)
ax2.set_ylabel('Average Interest Rate', fontsize=18, rotation=270, labelpad = 30)
ax1.tick_params(axis='y', labelsize=14)
ax2.tick_params(axis='y', labelsize=14)

plt.title('Loan Count and Average Interest Rate', fontsize=20)
ax1.set_xticks(x_pos)
ax1.set_xticklabels(x2.Grade, fontsize=18)

count_color, count_label = ax1.get_legend_handles_labels()
rate_color, rate_label   = ax2.get_legend_handles_labels()
legend = ax1.legend(count_color + rate_color, count_label + rate_label, loc = 'upper center', frameon=True, ncol=1, shadow=True, borderpad=1, fontsize=14)

autolabel(count_bars, ax1, ',.0f', '')
autolabel(aver_rate_bars, ax2, '.2f', '%')

ax1.yaxis.set_major_formatter(('{x:,.0f}'))
ax2.yaxis.set_major_formatter(('{x:,.1f}' + "%"))

plt.show()

Loans Issued per Month

This line plot illustrates the total number of loans issued throughout the year, organized into the 7 different credit ratings.


df2['issue_d'] = pd.to_datetime(df2['issue_d'], format = '%m/%d/%Y')
df2['Year'] = df2['issue_d'].dt.year
df2['Quarter'] = df2['issue_d'].dt.quarter
df2['Month'] = df2['issue_d'].dt.month
df2['Day'] = df2['issue_d'].dt.day
df2['DayOfYear'] = df2['issue_d'].dt.dayofyear
df2['MonthName'] = df2['issue_d'].dt.strftime('%b')
df2['WeekDay'] = df2['issue_d'].dt.weekday
df2['DayName'] = df2['issue_d'].dt.strftime('%a')
loan_df = df2.groupby(['Month', 'DayName'])['loan_amnt'].sum().reset_index(name='TotalLoans')

month_df = df2.groupby(['MonthName', 'grade'])['loan_amnt'].count().reset_index(name='TotalLoans')

fig = plt.figure(figsize = (18,10))
ax = fig.add_subplot(1,1,1)

field = 'MonthName'
month_order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

month_df.MonthName = pd.Categorical(month_df.MonthName, categories = month_order, ordered = True)

month_df.sort_values(by='MonthName', inplace=True)
month_df.reset_index(inplace = True, drop = True)

my_colors = {'A':'darkblue',
             'B':'crimson',
             'C':'darkgreen',
             'D':'teal',
             'E':'darkorange',
             'F':'red',
             'G':'black',}

for key, grp in month_df.groupby(['grade']):
    grp.plot(ax=ax, kind='line', x ='MonthName', y='TotalLoans', color=my_colors[key], label=key, marker='8')

plt.title('Loans issued by Grade per Month', fontsize=18)
ax.set_xlabel('Month', fontsize=16)
ax.set_ylabel('Loans Issued', fontsize=16)

ax.tick_params(axis='x', labelsize=14, rotation=45)
ax.tick_params(axis='y', labelsize=14, rotation=0)

ax.yaxis.set_major_formatter(('{x:,.0f}'))



ax.set_xticks(np.arange(12))


plt.show()

month_sum_df = df2.groupby(['MonthName', 'grade'])['loan_amnt'].sum().reset_index(name='TotalLoans')

month_order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

month_sum_df = month_sum_df.pivot(index='grade', columns='MonthName', values='TotalLoans')

month_sum_df = month_sum_df.reindex(columns=(month_order))


fig = plt.figure(figsize=(18,10))
ax = fig.add_subplot(1,1,1)

month_sum_df.T.plot(kind='bar', stacked=True, ax=ax)


plt.title('Total Loans Issued by Grade and Month \n Stacked Bar Plot', fontsize=16)
plt.ylabel('Total Loan Amount Issued', fontsize=14, labelpad=17)
plt.xticks(rotation=45, horizontalalignment = 'center', fontsize = 14)
plt.yticks(fontsize = 14)
ax.set_xlabel('Month Name', fontsize = 14, labelpad=5)

ax.yaxis.set_major_formatter(FuncFormatter(lambda x,pos: ('$%1.1fM')%(x*1e-6)))


plt.show()

Home Ownership

The pie chart breaks down the living situation of the loan recipient, whether they rent, have a mortgage, or outright own their home.


pie_df = df2.groupby(['home_ownership'])['loan_amnt'].count().reset_index(name='TotalLoans')

number_colors = len(pie_df.home_ownership.unique())
color_ref_number = np.arange(number_colors)*3
print(color_ref_number)
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(1,1,1)

colormap = plt.get_cmap("tab20c")
mycolors = colormap(color_ref_number)

total_loans = pie_df.TotalLoans.sum()


pie_df.groupby(['home_ownership'])['TotalLoans'].sum().plot(
    kind = 'pie', radius = 1, colors = mycolors, pctdistance = .70, labeldistance = 1.1,
    wedgeprops = dict(edgecolor = 'white'),
    textprops = {'fontsize':16},
    autopct = lambda p: '{:.2f}%\n({:,.0f})'.format(p,(p/100)*total_loans),
    startangle = 90)

hole = plt.Circle((0,0), 0.4, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)

ax.text(0,0, 'Total Loans by Home Type\n' + str(('{:,}'.format(total_loans))), ha='center', va='center', fontsize = 14)

ax.axis('equal')
plt.tight_layout()

ax.yaxis.set_visible(False)
plt.title("Loan Recipient Home Type", fontsize = 18, x=.5, y=.9)
plt.show()