fundedloans <- read_fst(path = paste0(path,"fundedloans_trad_vs_upstart.fst"),as.data.table = T)

1 APR

This section compares the difference between the APR the borrower would have received under the traditional underwriting model to the actual APR. The sample consists of funded loans in the second half of 2019 (CFPB model outputs are only available for applications in the second half of 2019)

1.1 Univariate evidence

aprsummary <- fundedloans[ficobin %in% 620:800,.(estapr=mean(estimated_apr,na.rm=T),estaprsd=sd(estimated_apr,na.rm=T),actapr=mean(apr,na.rm=T),actaprsd=sd(apr,na.rm=T)),by=ficobin]

aprsummary <- melt(aprsummary,id.vars = "ficobin")
aprsummary[,variable:=as.character(variable)]
aprsummary[,variable:=ifelse(variable=="estapr","APR based on traditional variables",ifelse(variable=="actapr","APR based on Upstart model",variable))]

ggplot()+
  geom_line(data=aprsummary[nchar(variable)>20],aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary[nchar(variable)>20],aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="APR (%)")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

1.2 Regression

Dependent variable: (Traditional APR - Actual APR)*100
Fixed effects: zip code
Standard errors: clustered at zip code

Results suggest younger, higher-educated, debt consolidating, and computer using applicants would have received a higher APR under the traditional model compared to the actual rate, when the FICO is lower.

r <- felm(aprdiff~one_over_fico100*incomelt50+one_over_fico100*agebelow30+one_over_fico100*factor(education)+one_over_fico100*chlt10yr+one_over_fico100*debtconsolidation+one_over_fico100*computer+one_over_fico100*loanaggrigator|zip.x|0|zip.x,data=fundedloans)

.printtable(r,column.labels = c("Traditional APR - Actual APR"))
## 
## ================================================================
##                                         Dependent variable:     
##                                     ----------------------------
##                                     Traditional APR - Actual APR
## ----------------------------------------------------------------
## one_over_fico100                             421.255***         
##                                               (13.158)          
## incomelt50                                    7.585***          
##                                               (1.248)           
## agebelow30                                   -12.400***         
##                                               (1.553)           
## factor(education)1                           -9.028***          
##                                               (1.285)           
## factor(education)2                           -22.525***         
##                                               (1.969)           
## chlt10yr                                      3.340**           
##                                               (1.501)           
## debtconsolidation                            -6.217***          
##                                               (1.370)           
## computer                                     -9.677***          
##                                               (1.238)           
## loanaggrigator                               12.139***          
##                                               (1.213)           
## one_over_fico100:incomelt50                  -92.797***         
##                                               (8.648)           
## one_over_fico100:agebelow30                  92.954***          
##                                               (10.736)          
## one_over_fico100:factor(education)1          78.421***          
##                                               (8.915)           
## one_over_fico100:factor(education)2          188.293***         
##                                               (13.640)          
## one_over_fico100:chlt10yr                    -25.442**          
##                                               (10.358)          
## one_over_fico100:debtconsolidation           64.451***          
##                                               (9.588)           
## one_over_fico100:computer                    72.812***          
##                                               (8.584)           
## one_over_fico100:loanaggrigator              -99.150***         
##                                               (8.411)           
## ----------------------------------------------------------------
##                                                                 
## Observations                                   86,999           
## Adjusted R2                                    0.324            
## ================================================================
## Note:                                *p<0.1; **p<0.05; ***p<0.01
## 

1.3 Univariate evidence splits

1.3.1 By income

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(estimated_apr,na.rm=T)-mean(apr,na.rm=T)),by=.(ficobin,incomelt50)]
aprsummary[,variable:=ifelse(incomelt50==1,"Income less than $50,000","Income greater than $50,000")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Traditional APR - Actual APR (%)")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

1.3.2 By education

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(estimated_apr,na.rm=T)-mean(apr,na.rm=T)),by=.(ficobin,education)]
aprsummary[,variable:=ifelse(education==2,"Advanced degree",ifelse(education==1,"College degree","No college degree"))]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Traditional APR - Actual APR (%)")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2","skyblue"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

1.3.3 By age

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(estimated_apr,na.rm=T)-mean(apr,na.rm=T)),by=.(ficobin,agebelow30)]
aprsummary[,variable:=ifelse(agebelow30==1,"Age less than 30","Age greater than 30")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Traditional APR - Actual APR (%)")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

1.3.4 By credit history

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(estimated_apr,na.rm=T)-mean(apr,na.rm=T)),by=.(ficobin,chlt10yr)]
aprsummary[,variable:=ifelse(chlt10yr==1,"Credit history less than 10 years","Credit history greater than 10 years")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Traditional APR - Actual APR (%)")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

1.3.5 By employment type

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(estimated_apr,na.rm=T)-mean(apr,na.rm=T)),by=.(ficobin,employed_hourly)]
aprsummary[,variable:=ifelse(employed_hourly==1,"Hourly worker","Non-hourly worker")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Traditional APR - Actual APR (%)")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

1.3.6 By device type

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(estimated_apr,na.rm=T)-mean(apr,na.rm=T)),by=.(ficobin,computer)]
aprsummary[,variable:=ifelse(computer==1,"Computer","Smartphone")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Traditional APR - Actual APR (%)")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

1.3.7 By channel

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(estimated_apr,na.rm=T)-mean(apr,na.rm=T)),by=.(ficobin,loanaggrigator)]
aprsummary[,variable:=ifelse(loanaggrigator==1,"Loan aggregator","Other")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Traditional APR - Actual APR (%)")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

2 Rejection rate

This section analyses the rejection rate by the traditional model for the loans funded by Upstart. The sample consists of funded loans in the second half of 2019 (CFPB model outputs are only available for applications in the second half of 2019)

2.1 Univariate evidence

rejecsummary <- fundedloans[ficobin %in% 620:800,.(rejecttrad=mean(rejecttrad,na.rm=T)*100),by=ficobin]

ggplot(rejecsummary,aes(x=ficobin,y=rejecttrad))+geom_line(size=1,color="dodgerblue4")+geom_point(size=3,color="dodgerblue4")+theme_minimal()+labs(x="",y="Pct of funded loans rejected by traditional model")+scale_x_continuous(breaks = unique(rejecsummary$ficobin))

2.2 Regression

Dependent variable: Rejected by the traditional model
Fixed effects: zip code
Standard errors: clustered at zip code

r <- felm(rejecttrad~one_over_fico100*incomelt50+one_over_fico100*agebelow30+one_over_fico100*factor(education)+one_over_fico100*chlt10yr+one_over_fico100*debtconsolidation+one_over_fico100*computer+one_over_fico100*loanaggrigator|zip.x|0|zip.x,data=fundedloans)

.printtable(r,column.labels = c("Rejected by traditional model"))
## 
## =================================================================
##                                          Dependent variable:     
##                                     -----------------------------
##                                     Rejected by traditional model
## -----------------------------------------------------------------
## one_over_fico100                              20.174***          
##                                                (0.580)           
## incomelt50                                    0.169***           
##                                                (0.056)           
## agebelow30                                    -0.720***          
##                                                (0.071)           
## factor(education)1                            -0.486***          
##                                                (0.057)           
## factor(education)2                            -0.753***          
##                                                (0.083)           
## chlt10yr                                       -0.118*           
##                                                (0.069)           
## debtconsolidation                             -0.597***          
##                                                (0.060)           
## computer                                      -0.217***          
##                                                (0.054)           
## loanaggrigator                                0.600***           
##                                                (0.053)           
## one_over_fico100:incomelt50                   -2.047***          
##                                                (0.387)           
## one_over_fico100:agebelow30                   5.045***           
##                                                (0.485)           
## one_over_fico100:factor(education)1           3.614***           
##                                                (0.393)           
## one_over_fico100:factor(education)2           5.827***           
##                                                (0.565)           
## one_over_fico100:chlt10yr                      0.866*            
##                                                (0.470)           
## one_over_fico100:debtconsolidation            4.489***           
##                                                (0.421)           
## one_over_fico100:computer                     1.542***           
##                                                (0.369)           
## one_over_fico100:loanaggrigator               -4.435***          
##                                                (0.366)           
## -----------------------------------------------------------------
##                                                                  
## Observations                                   86,999            
## Adjusted R2                                     0.219            
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01
## 

2.3 Univariate splits

2.3.1 By income

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(rejecttrad,na.rm=T)*100),by=.(ficobin,incomelt50)]
aprsummary[,variable:=ifelse(incomelt50==1,"Income less than $50,000","Income greater than $50,000")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Pct of funded loans rejected by traditional model")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

2.3.2 By education

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(rejecttrad,na.rm=T)*100),by=.(ficobin,college)]
aprsummary[,variable:=ifelse(college==1,"With college degree","Without college degree")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Pct of funded loans rejected by traditional model")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

2.3.3 By age

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(rejecttrad,na.rm=T)*100),by=.(ficobin,agebelow30)]
aprsummary[,variable:=ifelse(agebelow30==1,"Age less than 30","Age greater than 30")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Pct of funded loans rejected by traditional model")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

2.3.4 By credit history

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(rejecttrad,na.rm=T)*100),by=.(ficobin,chlt10yr)]
aprsummary[,variable:=ifelse(chlt10yr==1,"Credit history less than 10 years","Credit history greater than 10 years")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Pct of funded loans rejected by traditional model")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))

2.3.5 By employment type

aprsummary <- fundedloans[ficobin %in% 620:800,.(value=mean(rejecttrad,na.rm=T)*100),by=.(ficobin,hourly)]
aprsummary[,variable:=ifelse(hourly==1,"Hourly worker","Non-hourly worker")]

ggplot()+
  geom_line(data=aprsummary,aes(x=ficobin,y=value,color=variable),size=1)+
  geom_point(data=aprsummary,aes(x=ficobin,y=value,color=variable,shape=variable),size=3)+
  theme_minimal()+labs(x="",y="Pct of funded loans rejected by traditional model")+
  scale_color_manual(values=c("dodgerblue4","dodgerblue2"))+
  theme(legend.position = "bottom",legend.title = element_blank())+
  scale_x_continuous(breaks = unique(aprsummary$ficobin))