rm(list=ls())
library(data.table)
library(RSQLite)
library(DBI)

Documentation

Connecting to Database

dbdir = "/data/freddie/freddie.db"
con <- dbConnect(RSQLite::SQLite(),dbdir)

List tables

dbListTables(con)
## [1] "acq"

First 10 rows of acq

Remove LIMIT 10 from the query to retrieve all the records from 1999.

df <- data.table(dbGetQuery(con,"SELECT * FROM acq LIMIT 10"))

str(df)
## Classes 'data.table' and 'data.frame':   10 obs. of  31 variables:
##  $ Credit_Score                 : int  796 805 730 762 777 660 784 792 791 712
##  $ First_Payment_Date           : int  201505 201504 201503 201505 201505 201505 201503 201504 201503 201504
##  $ First_Time_Homebuyer_Flag    : chr  "9" "9" "9" "9" ...
##  $ Maturity_Date                : int  203004 204503 203002 204504 204504 204504 204502 204503 204502 203003
##  $ MSA                          : int  43580 30700 NA NA 37900 NA NA 43580 35840 27740
##  $ Mortgage_Insurance_Percentage: int  0 0 0 0 0 0 0 0 0 0
##  $ Number_of_Units              : chr  "1" "1" "1" "1" ...
##  $ Occupancy_Status             : chr  "P" "P" "P" "P" ...
##  $ Original_Combined_LTV        : int  72 79 73 70 80 98 76 66 50 69
##  $ Original_DTI_Ratio           : int  21 20 32 14 40 999 29 17 30 43
##  $ Original_UPB                 : int  320000 110000 376000 295000 192000 178000 265000 230000 80000 81000
##  $ Original_LTV                 : int  72 79 73 70 80 72 76 66 50 69
##  $ Original_Interest_Rate       : num  2.62 3.88 2.88 3.38 3.62 ...
##  $ Channel                      : chr  "R" "B" "R" "R" ...
##  $ Prepayment_Penalty_Flag      : chr  "N" "N" "N" "N" ...
##  $ Amortization_Type            : chr  "FRM" "FRM" "FRM" "FRM" ...
##  $ Property_State               : chr  "IA" "NE" "KY" "CO" ...
##  $ Property_Type                : chr  "SF" "SF" "SF" "SF" ...
##  $ Postal_Code                  : int  51000 68500 40400 81200 61700 53000 81200 51000 34200 37600
##  $ Loan_Sequence_Number         : chr  "F15Q10000001" "F15Q10000002" "F15Q10000003" "F15Q10000004" ...
##  $ Loan_Purpose                 : chr  "C" "N" "N" "N" ...
##  $ Original_Loan_Term           : int  180 360 180 360 360 360 360 360 360 180
##  $ Number_of_Borrowers          : int  1 1 2 2 1 1 2 2 2 1
##  $ Seller_Name                  : chr  "Other sellers" "Other sellers" "Other sellers" "Other sellers" ...
##  $ Servicer_Name                : chr  "Other servicers" "Other servicers" "MATRIX FINANCIAL SERVICES CORPORATION" "U.S. BANK N.A." ...
##  $ Super_Conforming_Flag        : chr  "" "" "" "" ...
##  $ Pre_HARP_Loan_Sequence_Number: chr  "" "" "" "" ...
##  $ Program_Indicator            : chr  "9" "9" "9" "9" ...
##  $ HARP_Indicator               : chr  "" "" "" "" ...
##  $ Property_Valuation_Method    : int  9 9 9 9 9 9 9 9 9 9
##  $ Interest_Only_Indicator      : chr  "N" "N" "N" "N" ...
##  - attr(*, ".internal.selfref")=<externalptr>

Calculating Loan Origination Month

df[,orgmonth:=as.Date(paste0(substr(First_Payment_Date,1,4),"-",substr(First_Payment_Date,5,6),"-01"))]
df[,originationyear:=year(orgmonth)]