rm(list=ls())
library(data.table)
library(RSQLite)
library(DBI)

Documentation

Connecting to Database

dbdir = "/data/hmda.db"
con <- dbConnect(RSQLite::SQLite(),dbdir)

List tables

Table lar contains all the mortgage applications.

Table ts contains information on the lender

Tables lar and ts are linked by the variables respondentid and asofdate in both tables. respondentid is the lender’s unique id and asofdate is the year of reporting.

dbListTables(con)
## [1] "lar" "ts"

First 10 rows of lar

df <- data.table(dbGetQuery(con,"SELECT * FROM lar LIMIT 10"))

str(df)
## Classes 'data.table' and 'data.frame':   10 obs. of  100 variables:
##  $ asofdate                                : int  2018 2018 2018 2018 2018 2018 2018 2018 2018 2018
##  $ respondentid                            : chr  "549300CUF3Q2PQGM9256" "549300CUF3Q2PQGM9256" "549300CUF3Q2PQGM9256" "549300CUF3Q2PQGM9256" ...
##  $ msa                                     : int  31140 30460 30460 30460 99999 99999 30460 30460 21060 31140
##  $ state                                   : chr  "21" "21" "21" "21" ...
##  $ countycode                              : chr  "211" "239" "209" "067" ...
##  $ censustract                             : chr  "21211040101" "21239050103" "21209040601" "21067002800" ...
##  $ conforming_loan_limit                   : chr  "C" "C" "C" "C" ...
##  $ derived_loan_product_type               : chr  "Conventional:Subordinate Lien" "Conventional:Subordinate Lien" "Conventional:Subordinate Lien" "Conventional:Subordinate Lien" ...
##  $ propertytype                            : chr  "1" "1" "1" "1" ...
##  $ derived_ethnicity                       : chr  "Not Hispanic or Latino" "Not Hispanic or Latino" "Not Hispanic or Latino" "Not Hispanic or Latino" ...
##  $ derived_race                            : chr  "White" "White" "White" "White" ...
##  $ derived_sex                             : chr  "Male" "Joint" "Joint" "Joint" ...
##  $ actiontaken                             : int  1 3 1 3 3 1 3 1 1 1
##  $ typeofpurchaser                         : int  0 0 0 0 0 0 0 0 0 0
##  $ preapprovals                            : int  2 2 2 2 2 2 2 2 2 2
##  $ typeofloan                              : int  1 1 1 1 1 1 1 1 1 1
##  $ purposeofloan                           : int  31 31 2 4 31 4 2 2 2 31
##  $ lienstatus                              : int  2 2 2 2 2 1 2 2 2 2
##  $ reverse_mortgage                        : int  2 2 2 2 2 2 2 2 2 2
##  $ open_end_line_of_credit                 : int  2 2 2 2 2 2 2 2 2 2
##  $ business_or_commercial_purpose          : int  2 2 2 2 2 2 2 2 2 2
##  $ amountofloan                            : num  75 25 105 35 35 205 35 35 55 55
##  $ combined_loan_to_value_ratio            : chr  "79.806" NA "29.067" "99.54" ...
##  $ interest_rate                           : chr  "5.0" NA "4.5" NA ...
##  $ ratespread                              : chr  "-0.01" NA "-0.5" NA ...
##  $ hoepastatus                             : int  2 3 2 3 3 2 3 2 2 2
##  $ total_loan_costs                        : chr  NA NA NA NA ...
##  $ total_points_and_fees                   : chr  NA NA NA NA ...
##  $ origination_charges                     : chr  NA NA NA NA ...
##  $ discount_points                         : chr  NA NA NA NA ...
##  $ lender_credits                          : chr  NA NA NA NA ...
##  $ loan_term                               : chr  NA NA NA NA ...
##  $ prepayment_penalty_term                 : chr  NA NA NA NA ...
##  $ intro_rate_period                       : chr  "3" "3" "3" "3" ...
##  $ negative_amortization                   : int  2 2 2 2 2 2 2 2 2 2
##  $ interest_only_payment                   : int  2 2 2 2 2 2 2 2 2 2
##  $ balloon_payment                         : int  2 2 2 2 2 2 2 2 2 2
##  $ other_nonamortizing_features            : int  2 2 2 2 2 2 2 2 2 2
##  $ property_value                          : chr  "495000" NA "555000" NA ...
##  $ construction_method                     : int  1 1 1 1 1 1 1 1 1 1
##  $ occupancy                               : int  1 1 1 1 1 1 1 1 1 1
##  $ manufactured_home_secured_property_type : int  3 3 3 3 3 3 3 3 3 3
##  $ manufactured_home_land_property_interest: int  5 5 5 5 5 5 5 5 5 5
##  $ total_units                             : chr  "1" "1" "1" "1" ...
##  $ multifamily_affordable_units            : chr  NA NA NA NA ...
##  $ applicantincome                         : int  152 168 190 62 89 91 117 83 105 87
##  $ debt_to_income_ratio                    : chr  "37" "<20%" "<20%" "43" ...
##  $ applicant_credit_score_type             : int  3 3 3 3 3 3 3 3 3 3
##  $ co_applicant_credit_score_type          : int  3 3 3 3 3 3 3 3 3 3
##  $ applicantethnicity                      : int  2 2 2 2 2 2 2 2 2 2
##  $ applicant_ethnicity_2                   : int  NA NA NA NA NA NA NA NA NA NA
##  $ applicant_ethnicity_3                   : int  NA NA NA NA NA NA NA NA NA NA
##  $ applicant_ethnicity_4                   : int  NA NA NA NA NA NA NA NA NA NA
##  $ applicant_ethnicity_5                   : int  NA NA NA NA NA NA NA NA NA NA
##  $ co_applicant_ethnicity_1                : int  2 2 2 2 2 2 2 2 2 2
##  $ co_applicant_ethnicity_2                : int  NA NA NA NA NA NA NA NA NA NA
##  $ co_applicant_ethnicity_3                : int  NA NA NA NA NA NA NA NA NA NA
##  $ co_applicant_ethnicity_4                : int  NA NA NA NA NA NA NA NA NA NA
##  $ co_applicant_ethnicity_5                : int  NA NA NA NA NA NA NA NA NA NA
##  $ applicant_ethnicity_observed            : int  2 2 2 2 2 2 2 2 2 2
##  $ co_applicant_ethnicity_observed         : int  2 2 2 2 2 2 2 2 2 2
##  $ applicantrace1                          : int  5 5 5 5 5 5 5 5 5 5
##  $ applicant_race_2                        : int  NA NA NA NA NA NA NA NA NA NA
##  $ applicant_race_3                        : int  NA NA NA NA NA NA NA NA NA NA
##  $ applicant_race_4                        : int  NA NA NA NA NA NA NA NA NA NA
##  $ applicant_race_5                        : int  NA NA NA NA NA NA NA NA NA NA
##  $ co_applicant_race_1                     : int  5 5 5 5 5 5 5 5 5 5
##  $ co_applicant_race_2                     : int  NA NA NA NA NA NA NA NA NA NA
##  $ co_applicant_race_3                     : int  NA NA NA NA NA NA NA NA NA NA
##  $ co_applicant_race_4                     : int  NA NA NA NA NA NA NA NA NA NA
##  $ co_applicant_race_5                     : int  NA NA NA NA NA NA NA NA NA NA
##  $ applicant_race_observed                 : int  2 2 2 2 2 2 2 2 2 2
##  $ co_applicant_race_observed              : int  2 2 2 2 2 3 2 2 2 2
##  $ applicantsex                            : int  1 2 1 1 1 1 1 1 1 2
##  $ co_applicant_sex                        : int  1 1 2 2 2 2 2 2 2 1
##  $ applicant_sex_observed                  : int  2 2 2 2 2 2 2 2 2 2
##  $ co_applicant_sex_observed               : int  2 2 2 2 2 2 2 2 2 2
##  $ applicant_age                           : chr  "55-64" "45-54" "45-54" "25-34" ...
##  $ co_applicant_age                        : chr  "55-64" "45-54" "35-44" "25-34" ...
##  $ applicant_age_above_62                  : chr  "No" "No" "No" "No" ...
##  $ co_applicant_age_above_62               : chr  "No" "No" "No" "No" ...
##  $ submission_of_application               : int  1 1 1 1 1 1 1 1 1 1
##  $ initially_payable_to_institution        : int  1 1 1 1 1 1 1 1 1 1
##  $ aus_1                                   : int  6 6 6 6 6 6 6 6 6 6
##  $ aus_2                                   : int  NA NA NA NA NA NA NA NA NA NA
##  $ aus_3                                   : int  NA NA NA NA NA NA NA NA NA NA
##  $ aus_4                                   : int  NA NA NA NA NA NA NA NA NA NA
##  $ aus_5                                   : int  NA NA NA NA NA NA NA NA NA NA
##  $ denialreason1                           : int  10 7 10 7 1 10 7 10 10 10
##  $ denialreason2                           : int  NA NA NA NA NA NA NA NA NA NA
##  $ denialreason3                           : int  NA NA NA NA NA NA NA NA NA NA
##  $ denial_reason_4                         : int  NA NA NA NA NA NA NA NA NA NA
##  $ tract_population                        : int  3981 3173 2963 3699 4343 2083 4933 3835 8827 6675
##  $ tract_minority_population_percent       : num  2.96 34.57 5.74 15.76 4.77 ...
##  $ ffiec_msa_md_median_family_income       : int  70400 70100 70100 70100 49400 49400 70100 70100 61300 70400
##  $ tract_to_msa_income_percentage          : int  113 77 163 109 98 126 123 136 109 96
##  $ tract_owner_occupied_units              : int  1168 457 1013 1293 1259 804 1471 1074 2842 1426
##  $ tract_one_to_four_family_homes          : int  1470 1098 1164 1720 1877 1495 1816 1534 3487 2499
##  $ tract_median_age_of_housing_units       : int  23 28 32 57 45 25 11 28 27 32
##   [list output truncated]
##  - attr(*, ".internal.selfref")=<externalptr>

Select respondentid,censustract,amountofloan,property_value, amountofloan, and actiontaken for all applications in Louisiana (state=22) in year 2019

laapps <- data.table(dbGetQuery(con,"SELECT respondentid,censustract,amountofloan,property_value,amountofloan,actiontaken FROM lar where state='22' and asofdate=2019"))


head(laapps)
##            respondentid censustract amountofloan property_value amountofloan
## 1: 549300VORTI31GZTJL53 22071000904          105           <NA>          105
## 2: 549300VORTI31GZTJL53 22103040708          185         475000          185
## 3: 549300VORTI31GZTJL53 22033003801          105         175000          105
## 4: 549300VORTI31GZTJL53 22057021700           65           <NA>           65
## 5: 549300VORTI31GZTJL53 22051027809          135           <NA>          135
## 6: 549300VORTI31GZTJL53 22063040804          175         175000          175
##    actiontaken
## 1:           4
## 2:           3
## 3:           1
## 4:           4
## 5:           4
## 6:           1

First 10 rows of ts

df <- data.table(dbGetQuery(con,"SELECT * FROM ts "))

str(df)
## Classes 'data.table' and 'data.frame':   11265 obs. of  10 variables:
##  $ asofdate           : int  2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
##  $ calendar_quarter   : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ respondentid       : chr  "549300S0KDQNIC08M074" "5493001SXWZ4OFP8Z903" "549300YHR41F57LS0R28" "549300SX6THT96LFD245" ...
##  $ tax_id             : chr  "04-2430894" "74-2853239" "39-0273630" "26-1866970" ...
##  $ agency_code        : int  5 7 3 3 1 3 5 5 2 5 ...
##  $ respondent_name    : chr  "UMassFive College Federal Credit Union" "DHI Mortgage Company Limited" "Farmers and Merchants Union Bank" "Oakworth Capital Bank" ...
##  $ respondent_state   : chr  "MA" "TX" "WI" "AL" ...
##  $ respondent_city    : chr  "HADLEY" "Austin" "Columbus" "Birmingham" ...
##  $ respondent_zip_code: chr  "01035" "78750" "53925" "35209" ...
##  $ lar_count          : int  357 45691 131 29 97 109 89 53 312 74 ...
##  - attr(*, ".internal.selfref")=<externalptr>

Find the respondentid of Bank of America

df <- data.table(dbGetQuery(con,"SELECT respondentid,respondent_name,asofdate FROM ts where respondent_name like '%Bank of America%'"))

df
##            respondentid                respondent_name asofdate
## 1: 254900WTZC5SSKIN2M11 FIRST NATIONAL BANK OF AMERICA     2018
## 2: B4TYDEB6GKMZO031MB27             Bank of America NA     2018
## 3: 254900WTZC5SSKIN2M11 FIRST NATIONAL BANK OF AMERICA     2019
## 4: B4TYDEB6GKMZO031MB27             Bank of America NA     2019

Read respondentid,censustract,amountofloan,property_value, amountofloan, and actiontaken for all applications submitted to Bank of America in year 2019

bofa <- data.table(dbGetQuery(con,"SELECT respondentid,censustract,amountofloan,property_value,amountofloan,actiontaken FROM lar where respondentid='B4TYDEB6GKMZO031MB27' and asofdate=2019"))

head(bofa)
##            respondentid censustract amountofloan property_value amountofloan
## 1: B4TYDEB6GKMZO031MB27 36047052300           85           <NA>           85
## 2: B4TYDEB6GKMZO031MB27 36111950600          105           <NA>          105
## 3: B4TYDEB6GKMZO031MB27 36047028100          205           <NA>          205
## 4: B4TYDEB6GKMZO031MB27 12115001503           55           <NA>           55
## 5: B4TYDEB6GKMZO031MB27 45075011600           55         215000           55
## 6: B4TYDEB6GKMZO031MB27 13121008902           35           <NA>           35
##    actiontaken
## 1:           5
## 2:           5
## 3:           5
## 4:           5
## 5:           3
## 6:           5

Read respondentid,censustract,amountofloan,property_value, amountofloan, and actiontaken for all APPROVED applications submitted to Bank of America in year 2019

bofa <- data.table(dbGetQuery(con,"SELECT respondentid,censustract,amountofloan,property_value,amountofloan,actiontaken FROM lar where respondentid='B4TYDEB6GKMZO031MB27' and asofdate=2019 and actiontaken=1"))

head(bofa)
##            respondentid censustract amountofloan property_value amountofloan
## 1: B4TYDEB6GKMZO031MB27 09001020300          205        1205000          205
## 2: B4TYDEB6GKMZO031MB27 12057013319           35         255000           35
## 3: B4TYDEB6GKMZO031MB27 32003004925          115         145000          115
## 4: B4TYDEB6GKMZO031MB27 41005022710          405         425000          405
## 5: B4TYDEB6GKMZO031MB27 06059042331         1005        1255000         1005
## 6: B4TYDEB6GKMZO031MB27 36071013800          295         345000          295
##    actiontaken
## 1:           1
## 2:           1
## 3:           1
## 4:           1
## 5:           1
## 6:           1

Disconnect

dbDisconnect(con)