rm(list=ls())
library(RPostgres)
library(data.table)
library(lfe)
library(stargazer)
library(dplyr)
library(ggplot2)
library(fst)
library(stringr)
library(stringi)
library(lubridate)

gc()
##           used  (Mb) gc trigger (Mb) max used  (Mb)
## Ncells 2303637 123.1    4717485  252  2879160 153.8
## Vcells 3946913  30.2    8388608   64  6553565  50.0
regsample <- readRDS("C:/Users/dratnadiwakara2/Downloads/hmda_cleaned_all_mergers_10052023.rds")
gc()
##              used    (Mb)  gc trigger    (Mb)   max used    (Mb)
## Ncells    2863703   153.0     4717485   252.0    2879160   153.8
## Vcells 7093428629 54118.6 10217129105 77950.6 7093438937 54118.7
regsample[,refinance:=ifelse(purposeofloan==3,1,0)]
regsample[,race_eth:=ifelse(applicantrace1==3,"1. black","0. other")]
regsample[,race_eth:=ifelse(applicantethnicity==1,"2. hispanic",ifelse(is.na(applicantethnicity),NA,race_eth))]
regsample[,race:=ifelse(applicantrace1==3,"1. black","0. other")]
regsample[,bank_county:=paste(BHCRSSD,countycode)]
setorder(regsample,bank_county)

regsample <- regsample[asofdate>=2000]
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    2906285   155.3     4717485    252.0     2994690    160.0
## Vcells 6821762624 52046.0 14960971469 114143.2 14860395258 113375.9
r <- list()
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    2906816   155.3     4717485    252.0     3523758    188.2
## Vcells 6821785993 52046.1 14960971469 114143.2 14938539536 113972.1
r[[1]] <- felm(approved~msinc13+msinc46+suc_share+nationwide_msacq13+nationwide_msacq46+log(applicantincome)+log(amountofloan)+factor(race)+suc_share+refinance|bank_county+asofdate,data=regsample[typeofloan==1 ])
gc()
##               used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells     2949686   157.6     4717485    252.0     3523758    188.2
## Vcells 11606124276 88547.8 21543974914 164367.5 21510692611 164113.6
stargazer(r,type="text")
## 
## ================================================
##                          Dependent variable:    
##                      ---------------------------
##                               approved          
## ------------------------------------------------
## msinc13                       0.447***          
##                                (0.006)          
##                                                 
## msinc46                       0.969***          
##                                (0.007)          
##                                                 
## suc_share                     -0.171***         
##                                (0.002)          
##                                                 
## nationwide_msacq13           -0.0005***         
##                               (0.00001)         
##                                                 
## nationwide_msacq46            0.108***          
##                                (0.001)          
##                                                 
## log(applicantincome)          0.074***          
##                               (0.0001)          
##                                                 
## log(amountofloan)             0.019***          
##                               (0.0001)          
##                                                 
## factor(race)1. black          -0.102***         
##                               (0.0001)          
##                                                 
## refinance                     -0.100***         
##                               (0.0001)          
##                                                 
## ------------------------------------------------
## Observations                 167,573,916        
## R2                              0.080           
## Adjusted R2                     0.080           
## Residual Std. Error    0.451 (df = 167493809)   
## ================================================
## Note:                *p<0.1; **p<0.05; ***p<0.01
r <- list()
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    3068525   163.9     4717485    252.0     4717485    252.0
## Vcells 6914157090 52750.9 21543974914 164367.5 21510692611 164113.6
r[[1]] <- felm(approved~factor(msinc13Q)+factor(msinc46Q)+suc_share+nationwide_msacq13+nationwide_msacq46+log(applicantincome)+log(amountofloan)+factor(race)+suc_share+refinance|bank_county+asofdate,data=regsample[typeofloan==1 ])
gc()
##               used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells     3069185   164.0     4717485    252.0     4717485    252.0
## Vcells 12444177970 94941.6 21543974914 164367.5 21510890467 164115.1
stargazer(r,type="text")
## 
## ================================================
##                          Dependent variable:    
##                      ---------------------------
##                               approved          
## ------------------------------------------------
## factor(msinc13Q)1             0.028***          
##                               (0.0003)          
##                                                 
## factor(msinc13Q)2             0.018***          
##                               (0.0003)          
##                                                 
## factor(msinc13Q)3             0.014***          
##                               (0.0003)          
##                                                 
## factor(msinc13Q)4             0.023***          
##                               (0.0003)          
##                                                 
## factor(msinc46Q)1             0.014***          
##                               (0.0004)          
##                                                 
## factor(msinc46Q)2             0.023***          
##                               (0.0004)          
##                                                 
## factor(msinc46Q)3             0.051***          
##                               (0.0003)          
##                                                 
## factor(msinc46Q)4             0.043***          
##                               (0.0003)          
##                                                 
## suc_share                     -0.125***         
##                                (0.002)          
##                                                 
## nationwide_msacq13            -0.001***         
##                               (0.00001)         
##                                                 
## nationwide_msacq46            0.100***          
##                                (0.001)          
##                                                 
## log(applicantincome)          0.074***          
##                               (0.0001)          
##                                                 
## log(amountofloan)             0.019***          
##                               (0.0001)          
##                                                 
## factor(race)1. black          -0.102***         
##                               (0.0001)          
##                                                 
## refinance                     -0.100***         
##                               (0.0001)          
##                                                 
## ------------------------------------------------
## Observations                 167,573,916        
## R2                              0.080           
## Adjusted R2                     0.080           
## Residual Std. Error    0.451 (df = 167493803)   
## ================================================
## Note:                *p<0.1; **p<0.05; ***p<0.01
r <- list()
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    3068558   163.9     4717485    252.0     4717485    252.0
## Vcells 6914157593 52750.9 21543974914 164367.5 21510890467 164115.1
r[[1]] <- felm(approved~msinc13*factor(race_eth)+msinc46*factor(race_eth)+suc_share+nationwide_msacq13+nationwide_msacq46+log(applicantincome)+log(amountofloan)+suc_share+refinance|bank_county+asofdate,data=regsample[typeofloan==1 ])
gc()
##               used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells     3069246   164.0     4717485    252.0     4717485    252.0
## Vcells 10678751468 81472.5 21543974914 164367.5 21525095365 164223.5
stargazer(r,type="text")
## 
## ===============================================================
##                                         Dependent variable:    
##                                     ---------------------------
##                                              approved          
## ---------------------------------------------------------------
## msinc13                                      0.975***          
##                                               (0.008)          
##                                                                
## factor(race_eth)1. black                     -0.118***         
##                                              (0.0002)          
##                                                                
## factor(race_eth)2. hispanic                  -0.062***         
##                                              (0.0002)          
##                                                                
## msinc46                                      1.121***          
##                                               (0.013)          
##                                                                
## suc_share                                    -0.068***         
##                                               (0.002)          
##                                                                
## nationwide_msacq13                          -0.0003***         
##                                              (0.00001)         
##                                                                
## nationwide_msacq46                           0.284***          
##                                               (0.003)          
##                                                                
## log(applicantincome)                         0.068***          
##                                              (0.0001)          
##                                                                
## log(amountofloan)                            0.003***          
##                                              (0.0001)          
##                                                                
## refinance                                    -0.110***         
##                                              (0.0001)          
##                                                                
## msinc13:factor(race_eth)1. black              -0.021           
##                                               (0.021)          
##                                                                
## msinc13:factor(race_eth)2. hispanic          -0.574***         
##                                               (0.022)          
##                                                                
## factor(race_eth)1. black:msinc46              -0.040           
##                                               (0.039)          
##                                                                
## factor(race_eth)2. hispanic:msinc46          -0.095**          
##                                               (0.038)          
##                                                                
## ---------------------------------------------------------------
## Observations                                112,404,872        
## R2                                             0.074           
## Adjusted R2                                    0.073           
## Residual Std. Error                   0.453 (df = 112342276)   
## ===============================================================
## Note:                               *p<0.1; **p<0.05; ***p<0.01
r <- list()
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    3068579   163.9     4717485    252.0     4717485    252.0
## Vcells 6914158149 52750.9 21543974914 164367.5 21525095365 164223.5
r[[1]] <- felm(approved~msinc13*low_income+msinc46*low_income+suc_share+nationwide_msacq13+nationwide_msacq46+low_income+log(amountofloan)+factor(race)+suc_share+refinance|bank_county+asofdate,data=regsample[typeofloan==1 ])
gc()
##               used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells     3069235   164.0     4717485    252.0     4717485    252.0
## Vcells 11941456932 91106.1 21543974914 164367.5 21527274282 164240.1
stargazer(r,type="text")
## 
## ================================================
##                          Dependent variable:    
##                      ---------------------------
##                               approved          
## ------------------------------------------------
## msinc13                       0.533***          
##                                (0.008)          
##                                                 
## low_income                    -0.078***         
##                               (0.0001)          
##                                                 
## msinc46                       1.031***          
##                                (0.008)          
##                                                 
## suc_share                     -0.183***         
##                                (0.002)          
##                                                 
## nationwide_msacq13           -0.0005***         
##                               (0.00001)         
##                                                 
## nationwide_msacq46            0.106***          
##                                (0.001)          
##                                                 
## log(amountofloan)             0.030***          
##                               (0.0001)          
##                                                 
## factor(race)1. black          -0.106***         
##                               (0.0001)          
##                                                 
## refinance                     -0.102***         
##                               (0.0001)          
##                                                 
## msinc13:low_income            -0.216***         
##                                (0.011)          
##                                                 
## low_income:msinc46            -0.154***         
##                                (0.011)          
##                                                 
## ------------------------------------------------
## Observations                 167,573,916        
## R2                              0.078           
## Adjusted R2                     0.077           
## Residual Std. Error    0.452 (df = 167493807)   
## ================================================
## Note:                *p<0.1; **p<0.05; ***p<0.01
rm(r)
r <- list()
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    3068755   163.9     4717485    252.0     4717485    252.0
## Vcells 6914160002 52750.9 21543974914 164367.5 21527274282 164240.1
r[[1]] <- felm(approved~msinc13+msinc46+suc_share+nationwide_msacq13+nationwide_msacq46+log(applicantincome)+log(amountofloan)+factor(race_eth)+suc_share+refinance|bank_county+asofdate,data=regsample[typeofloan==2 ])
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    3069385   164.0     4717485    252.0     4717485    252.0
## Vcells 7302123197 55710.8 21543974914 164367.5 21527274282 164240.1
r[[2]] <- felm(approved~msinc13*factor(race_eth)+msinc46*factor(race_eth)+suc_share+nationwide_msacq13+nationwide_msacq46+log(applicantincome)+log(amountofloan)+suc_share+refinance|bank_county+asofdate,data=regsample[typeofloan==2 ])
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    3070054   164.0     4717485    252.0     4717485    252.0
## Vcells 7743116954 59075.3 21543974914 164367.5 21527274282 164240.1
r[[3]] <- felm(approved~msinc13*low_income+msinc46*low_income+suc_share+nationwide_msacq13+nationwide_msacq46+log(amountofloan)+factor(race_eth)+suc_share+refinance|bank_county+asofdate,data=regsample[typeofloan==2 ])
# gc()
# r[[2]] <- felm(approved~msinc13+msinc46|bank_cbsa+asofdate,data=regsample[pred_share_1>0])
# gc()
# r[[3]] <- felm(approved~msinc13+msinc46+log(applicantincome)|bank_cbsa+asofdate+purposeofloan,data=regsample[pred_share_1>0 & purposeofloan==1 & asofdate>2010])
gc()
##              used    (Mb)  gc trigger     (Mb)    max used     (Mb)
## Ncells    3070715   164.0     4717485    252.0     4717485    252.0
## Vcells 8157590204 62237.5 21543974914 164367.5 21527274282 164240.1
stargazer(r,type="text")
## 
## =====================================================================================================
##                                                            Dependent variable:                       
##                                     -----------------------------------------------------------------
##                                                                 approved                             
##                                              (1)                   (2)                   (3)         
## -----------------------------------------------------------------------------------------------------
## msinc13                                   -0.652***             -0.552***             -0.250***      
##                                            (0.032)               (0.036)               (0.053)       
##                                                                                                      
## low_income                                                                            -0.040***      
##                                                                                       (0.0003)       
##                                                                                                      
## msinc46                                   -1.047***             -0.964***             -0.739***      
##                                            (0.050)               (0.056)               (0.082)       
##                                                                                                      
## suc_share                                 -0.033***             -0.034***             -0.028***      
##                                            (0.006)               (0.006)               (0.006)       
##                                                                                                      
## nationwide_msacq13                        0.002***              0.002***              0.002***       
##                                           (0.00003)             (0.00003)             (0.00003)      
##                                                                                                      
## nationwide_msacq46                        -0.159***             -0.157***             -0.167***      
##                                            (0.014)               (0.014)               (0.014)       
##                                                                                                      
## log(applicantincome)                      0.082***              0.082***                             
##                                           (0.0003)              (0.0003)                             
##                                                                                                      
## log(amountofloan)                         0.005***              0.005***              0.042***       
##                                           (0.0003)              (0.0003)              (0.0003)       
##                                                                                                      
## factor(race_eth)1. black                  -0.067***             -0.067***             -0.070***      
##                                           (0.0004)              (0.0004)              (0.0004)       
##                                                                                                      
## factor(race_eth)2. hispanic               -0.022***             -0.021***             -0.029***      
##                                           (0.0004)              (0.0004)              (0.0004)       
##                                                                                                      
## refinance                                 -0.275***             -0.275***             -0.275***      
##                                           (0.0003)              (0.0003)              (0.0003)       
##                                                                                                      
## msinc13:factor(race_eth)1. black                                  0.035                              
##                                                                  (0.071)                             
##                                                                                                      
## msinc13:factor(race_eth)2. hispanic                             -1.147***                            
##                                                                  (0.096)                             
##                                                                                                      
## factor(race_eth)1. black:msinc46                                  0.099                              
##                                                                  (0.120)                             
##                                                                                                      
## factor(race_eth)2. hispanic:msinc46                             -0.980***                            
##                                                                  (0.148)                             
##                                                                                                      
## msinc13:low_income                                                                    -0.568***      
##                                                                                        (0.060)       
##                                                                                                      
## low_income:msinc46                                                                    -0.429***      
##                                                                                        (0.092)       
##                                                                                                      
## -----------------------------------------------------------------------------------------------------
## Observations                             13,260,180            13,260,180            13,260,180      
## R2                                          0.123                 0.123                 0.119        
## Adjusted R2                                 0.121                 0.121                 0.116        
## Residual Std. Error                 0.428 (df = 13223661) 0.428 (df = 13223657) 0.429 (df = 13223659)
## =====================================================================================================
## Note:                                                                     *p<0.1; **p<0.05; ***p<0.01