rm(list=ls())
library(data.table)
library(RSQLite)
library(DBI)
dbdir = "/data/hmda.db"
con <- dbConnect(RSQLite::SQLite(),dbdir)
Table lar contains all the mortgage applications.
Table ts contains information on the lender
Tables lar and ts are linked by the variables respondentid and asofdate in both tables. respondentid is the lender’s unique id and asofdate is the year of reporting.
dbListTables(con)
## [1] "lar" "ts"
lardf <- data.table(dbGetQuery(con,"SELECT * FROM lar LIMIT 10"))
str(df)
## Classes 'data.table' and 'data.frame': 10 obs. of 100 variables:
## $ asofdate : int 2018 2018 2018 2018 2018 2018 2018 2018 2018 2018
## $ respondentid : chr "549300CUF3Q2PQGM9256" "549300CUF3Q2PQGM9256" "549300CUF3Q2PQGM9256" "549300CUF3Q2PQGM9256" ...
## $ msa : int 31140 30460 30460 30460 99999 99999 30460 30460 21060 31140
## $ state : chr "21" "21" "21" "21" ...
## $ countycode : chr "211" "239" "209" "067" ...
## $ censustract : chr "21211040101" "21239050103" "21209040601" "21067002800" ...
## $ conforming_loan_limit : chr "C" "C" "C" "C" ...
## $ derived_loan_product_type : chr "Conventional:Subordinate Lien" "Conventional:Subordinate Lien" "Conventional:Subordinate Lien" "Conventional:Subordinate Lien" ...
## $ propertytype : chr "1" "1" "1" "1" ...
## $ derived_ethnicity : chr "Not Hispanic or Latino" "Not Hispanic or Latino" "Not Hispanic or Latino" "Not Hispanic or Latino" ...
## $ derived_race : chr "White" "White" "White" "White" ...
## $ derived_sex : chr "Male" "Joint" "Joint" "Joint" ...
## $ actiontaken : int 1 3 1 3 3 1 3 1 1 1
## $ typeofpurchaser : int 0 0 0 0 0 0 0 0 0 0
## $ preapprovals : int 2 2 2 2 2 2 2 2 2 2
## $ typeofloan : int 1 1 1 1 1 1 1 1 1 1
## $ purposeofloan : int 31 31 2 4 31 4 2 2 2 31
## $ lienstatus : int 2 2 2 2 2 1 2 2 2 2
## $ reverse_mortgage : int 2 2 2 2 2 2 2 2 2 2
## $ open_end_line_of_credit : int 2 2 2 2 2 2 2 2 2 2
## $ business_or_commercial_purpose : int 2 2 2 2 2 2 2 2 2 2
## $ amountofloan : num 75 25 105 35 35 205 35 35 55 55
## $ combined_loan_to_value_ratio : chr "79.806" NA "29.067" "99.54" ...
## $ interest_rate : chr "5.0" NA "4.5" NA ...
## $ ratespread : chr "-0.01" NA "-0.5" NA ...
## $ hoepastatus : int 2 3 2 3 3 2 3 2 2 2
## $ total_loan_costs : chr NA NA NA NA ...
## $ total_points_and_fees : chr NA NA NA NA ...
## $ origination_charges : chr NA NA NA NA ...
## $ discount_points : chr NA NA NA NA ...
## $ lender_credits : chr NA NA NA NA ...
## $ loan_term : chr NA NA NA NA ...
## $ prepayment_penalty_term : chr NA NA NA NA ...
## $ intro_rate_period : chr "3" "3" "3" "3" ...
## $ negative_amortization : int 2 2 2 2 2 2 2 2 2 2
## $ interest_only_payment : int 2 2 2 2 2 2 2 2 2 2
## $ balloon_payment : int 2 2 2 2 2 2 2 2 2 2
## $ other_nonamortizing_features : int 2 2 2 2 2 2 2 2 2 2
## $ property_value : chr "495000" NA "555000" NA ...
## $ construction_method : int 1 1 1 1 1 1 1 1 1 1
## $ occupancy : int 1 1 1 1 1 1 1 1 1 1
## $ manufactured_home_secured_property_type : int 3 3 3 3 3 3 3 3 3 3
## $ manufactured_home_land_property_interest: int 5 5 5 5 5 5 5 5 5 5
## $ total_units : chr "1" "1" "1" "1" ...
## $ multifamily_affordable_units : chr NA NA NA NA ...
## $ applicantincome : int 152 168 190 62 89 91 117 83 105 87
## $ debt_to_income_ratio : chr "37" "<20%" "<20%" "43" ...
## $ applicant_credit_score_type : int 3 3 3 3 3 3 3 3 3 3
## $ co_applicant_credit_score_type : int 3 3 3 3 3 3 3 3 3 3
## $ applicantethnicity : int 2 2 2 2 2 2 2 2 2 2
## $ applicant_ethnicity_2 : int NA NA NA NA NA NA NA NA NA NA
## $ applicant_ethnicity_3 : int NA NA NA NA NA NA NA NA NA NA
## $ applicant_ethnicity_4 : int NA NA NA NA NA NA NA NA NA NA
## $ applicant_ethnicity_5 : int NA NA NA NA NA NA NA NA NA NA
## $ co_applicant_ethnicity_1 : int 2 2 2 2 2 2 2 2 2 2
## $ co_applicant_ethnicity_2 : int NA NA NA NA NA NA NA NA NA NA
## $ co_applicant_ethnicity_3 : int NA NA NA NA NA NA NA NA NA NA
## $ co_applicant_ethnicity_4 : int NA NA NA NA NA NA NA NA NA NA
## $ co_applicant_ethnicity_5 : int NA NA NA NA NA NA NA NA NA NA
## $ applicant_ethnicity_observed : int 2 2 2 2 2 2 2 2 2 2
## $ co_applicant_ethnicity_observed : int 2 2 2 2 2 2 2 2 2 2
## $ applicantrace1 : int 5 5 5 5 5 5 5 5 5 5
## $ applicant_race_2 : int NA NA NA NA NA NA NA NA NA NA
## $ applicant_race_3 : int NA NA NA NA NA NA NA NA NA NA
## $ applicant_race_4 : int NA NA NA NA NA NA NA NA NA NA
## $ applicant_race_5 : int NA NA NA NA NA NA NA NA NA NA
## $ co_applicant_race_1 : int 5 5 5 5 5 5 5 5 5 5
## $ co_applicant_race_2 : int NA NA NA NA NA NA NA NA NA NA
## $ co_applicant_race_3 : int NA NA NA NA NA NA NA NA NA NA
## $ co_applicant_race_4 : int NA NA NA NA NA NA NA NA NA NA
## $ co_applicant_race_5 : int NA NA NA NA NA NA NA NA NA NA
## $ applicant_race_observed : int 2 2 2 2 2 2 2 2 2 2
## $ co_applicant_race_observed : int 2 2 2 2 2 3 2 2 2 2
## $ applicantsex : int 1 2 1 1 1 1 1 1 1 2
## $ co_applicant_sex : int 1 1 2 2 2 2 2 2 2 1
## $ applicant_sex_observed : int 2 2 2 2 2 2 2 2 2 2
## $ co_applicant_sex_observed : int 2 2 2 2 2 2 2 2 2 2
## $ applicant_age : chr "55-64" "45-54" "45-54" "25-34" ...
## $ co_applicant_age : chr "55-64" "45-54" "35-44" "25-34" ...
## $ applicant_age_above_62 : chr "No" "No" "No" "No" ...
## $ co_applicant_age_above_62 : chr "No" "No" "No" "No" ...
## $ submission_of_application : int 1 1 1 1 1 1 1 1 1 1
## $ initially_payable_to_institution : int 1 1 1 1 1 1 1 1 1 1
## $ aus_1 : int 6 6 6 6 6 6 6 6 6 6
## $ aus_2 : int NA NA NA NA NA NA NA NA NA NA
## $ aus_3 : int NA NA NA NA NA NA NA NA NA NA
## $ aus_4 : int NA NA NA NA NA NA NA NA NA NA
## $ aus_5 : int NA NA NA NA NA NA NA NA NA NA
## $ denialreason1 : int 10 7 10 7 1 10 7 10 10 10
## $ denialreason2 : int NA NA NA NA NA NA NA NA NA NA
## $ denialreason3 : int NA NA NA NA NA NA NA NA NA NA
## $ denial_reason_4 : int NA NA NA NA NA NA NA NA NA NA
## $ tract_population : int 3981 3173 2963 3699 4343 2083 4933 3835 8827 6675
## $ tract_minority_population_percent : num 2.96 34.57 5.74 15.76 4.77 ...
## $ ffiec_msa_md_median_family_income : int 70400 70100 70100 70100 49400 49400 70100 70100 61300 70400
## $ tract_to_msa_income_percentage : int 113 77 163 109 98 126 123 136 109 96
## $ tract_owner_occupied_units : int 1168 457 1013 1293 1259 804 1471 1074 2842 1426
## $ tract_one_to_four_family_homes : int 1470 1098 1164 1720 1877 1495 1816 1534 3487 2499
## $ tract_median_age_of_housing_units : int 23 28 32 57 45 25 11 28 27 32
## [list output truncated]
## - attr(*, ".internal.selfref")=<externalptr>
respondentid,censustract,amountofloan,property_value, amountofloan, and actiontaken for all applications in Louisiana (state=22) in year 2019laapps <- data.table(dbGetQuery(con,"SELECT respondentid,censustract,amountofloan,property_value,amountofloan,actiontaken FROM lar where state='22' and asofdate=2019"))
head(laapps)
## respondentid censustract amountofloan property_value amountofloan
## 1: 549300VORTI31GZTJL53 22071000904 105 <NA> 105
## 2: 549300VORTI31GZTJL53 22103040708 185 475000 185
## 3: 549300VORTI31GZTJL53 22033003801 105 175000 105
## 4: 549300VORTI31GZTJL53 22057021700 65 <NA> 65
## 5: 549300VORTI31GZTJL53 22051027809 135 <NA> 135
## 6: 549300VORTI31GZTJL53 22063040804 175 175000 175
## actiontaken
## 1: 4
## 2: 3
## 3: 1
## 4: 4
## 5: 4
## 6: 1
tsdf <- data.table(dbGetQuery(con,"SELECT * FROM ts "))
str(df)
## Classes 'data.table' and 'data.frame': 11265 obs. of 10 variables:
## $ asofdate : int 2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
## $ calendar_quarter : int 4 4 4 4 4 4 4 4 4 4 ...
## $ respondentid : chr "549300S0KDQNIC08M074" "5493001SXWZ4OFP8Z903" "549300YHR41F57LS0R28" "549300SX6THT96LFD245" ...
## $ tax_id : chr "04-2430894" "74-2853239" "39-0273630" "26-1866970" ...
## $ agency_code : int 5 7 3 3 1 3 5 5 2 5 ...
## $ respondent_name : chr "UMassFive College Federal Credit Union" "DHI Mortgage Company Limited" "Farmers and Merchants Union Bank" "Oakworth Capital Bank" ...
## $ respondent_state : chr "MA" "TX" "WI" "AL" ...
## $ respondent_city : chr "HADLEY" "Austin" "Columbus" "Birmingham" ...
## $ respondent_zip_code: chr "01035" "78750" "53925" "35209" ...
## $ lar_count : int 357 45691 131 29 97 109 89 53 312 74 ...
## - attr(*, ".internal.selfref")=<externalptr>
respondentid of Bank of Americadf <- data.table(dbGetQuery(con,"SELECT respondentid,respondent_name,asofdate FROM ts where respondent_name like '%Bank of America%'"))
df
## respondentid respondent_name asofdate
## 1: 254900WTZC5SSKIN2M11 FIRST NATIONAL BANK OF AMERICA 2018
## 2: B4TYDEB6GKMZO031MB27 Bank of America NA 2018
## 3: 254900WTZC5SSKIN2M11 FIRST NATIONAL BANK OF AMERICA 2019
## 4: B4TYDEB6GKMZO031MB27 Bank of America NA 2019
respondentid,censustract,amountofloan,property_value, amountofloan, and actiontaken for all applications submitted to Bank of America in year 2019bofa <- data.table(dbGetQuery(con,"SELECT respondentid,censustract,amountofloan,property_value,amountofloan,actiontaken FROM lar where respondentid='B4TYDEB6GKMZO031MB27' and asofdate=2019"))
head(bofa)
## respondentid censustract amountofloan property_value amountofloan
## 1: B4TYDEB6GKMZO031MB27 36047052300 85 <NA> 85
## 2: B4TYDEB6GKMZO031MB27 36111950600 105 <NA> 105
## 3: B4TYDEB6GKMZO031MB27 36047028100 205 <NA> 205
## 4: B4TYDEB6GKMZO031MB27 12115001503 55 <NA> 55
## 5: B4TYDEB6GKMZO031MB27 45075011600 55 215000 55
## 6: B4TYDEB6GKMZO031MB27 13121008902 35 <NA> 35
## actiontaken
## 1: 5
## 2: 5
## 3: 5
## 4: 5
## 5: 3
## 6: 5
respondentid,censustract,amountofloan,property_value, amountofloan, and actiontaken for all APPROVED applications submitted to Bank of America in year 2019bofa <- data.table(dbGetQuery(con,"SELECT respondentid,censustract,amountofloan,property_value,amountofloan,actiontaken FROM lar where respondentid='B4TYDEB6GKMZO031MB27' and asofdate=2019 and actiontaken=1"))
head(bofa)
## respondentid censustract amountofloan property_value amountofloan
## 1: B4TYDEB6GKMZO031MB27 09001020300 205 1205000 205
## 2: B4TYDEB6GKMZO031MB27 12057013319 35 255000 35
## 3: B4TYDEB6GKMZO031MB27 32003004925 115 145000 115
## 4: B4TYDEB6GKMZO031MB27 41005022710 405 425000 405
## 5: B4TYDEB6GKMZO031MB27 06059042331 1005 1255000 1005
## 6: B4TYDEB6GKMZO031MB27 36071013800 295 345000 295
## actiontaken
## 1: 1
## 2: 1
## 3: 1
## 4: 1
## 5: 1
## 6: 1
dbDisconnect(con)