The following set of commands install R packages ‘fst’ and ‘data.table’. The data files are ‘fst’ files and once read the data is converted R ‘data.table’.
list.of.packages <- c("fst", "data.table")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
library(fst)
library(data.table)
Policy data set is split in to 5 smaller datasets. These files can be downloaded by clicking on the following links
Claims data set is available to download here
LA_policies <- read_fst("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/NFIP/policies/policy_data_LA.fst",from=1, to= 1000,as.data.table = TRUE)
str(LA_policies)
## Classes 'data.table' and 'data.frame': 1000 obs. of 62 variables:
## $ reportedzipcode : chr "03102" "07002" "07007" "07037" ...
## $ effectivemonth : num 2018 2018 2018 2018 2019 ...
## $ agriculturestructureindicator : chr "N" "N" "N" "N" ...
## $ basefloodelevation : num NA -3.5 NA 13 NA NA NA NA NA NA ...
## $ basementenclosurecrawlspacetype : int 0 0 0 0 0 0 0 0 0 0 ...
## $ cancellationdateoffloodpolicy : chr "" "" "" "" ...
## $ censustract : chr "221139501" "220510218" "220890628" "220050302" ...
## $ condominiumindicator : chr "N" "N" "N" "N" ...
## $ construction : chr "N" "N" "N" "N" ...
## $ countycode : int 22113 22051 22089 22005 22103 22051 22077 NA 22009 22099 ...
## $ crsdiscount : num 0 0.2 0 0.1 0 0.2 0 0.15 0 0 ...
## $ deductibleamountinbuildingcoverage: chr "F" "F" "1" "A" ...
## $ deductibleamountincontentscoverage: chr "F" "F" "1" "0" ...
## $ elevatedbuildingindicator : chr "N" "N" "N" "N" ...
## $ elevationcertificateindicator : chr "" "3" "" "3" ...
## $ elevationdifference : int 999 0 999 1 999 999 999 999 999 999 ...
## $ federalpolicyfee : int 25 50 25 50 25 50 35 35 35 35 ...
## $ floodzone : chr "X" "AE" "X" "A" ...
## $ hfiaasurcharge : int 25 250 250 25 25 250 0 0 0 0 ...
## $ houseofworshipindicator : chr "N" "N" "N" "N" ...
## $ latitude : num 30.1 30 29.9 30.3 30.3 29.9 NA NA NA NA ...
## $ locationofcontents : chr "Lowest floor only above ground level (No basement/enclosure/crawlspace/subgrade crawlspace)" "Lowest floor above ground level and higher floors (No basement/enclosure/crawlspace/subgrade crawlspace)" "Lowest floor only above ground level (No basement/enclosure/crawlspace/subgrade crawlspace)" "" ...
## $ longitude : num -92.2 -90.2 -90.4 -91 -89.7 -90.2 NA NA NA NA ...
## $ lowestadjacentgrade : num NA -3.9 NA 13.3 NA NA NA NA NA NA ...
## $ lowestfloorelevation : num NA -3.9 NA 14.1 NA NA NA NA NA NA ...
## $ nonprofitindicator : chr "N" "N" "N" "N" ...
## $ numberoffloorsininsuredbuilding : int 1 2 1 1 1 1 1 1 1 1 ...
## $ obstructiontype : chr "" "" "" "" ...
## $ occupancytype : int 1 2 1 1 1 1 1 1 1 1 ...
## $ originalconstructiondate : Date, format: "2016-03-10" "1973-07-01" ...
## $ originalnbdate : Date, format: "2017-11-04" "1997-07-30" ...
## $ policycost : int 450 1421 574 1192 450 904 1054 1071 1395 612 ...
## $ policycount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ policyeffectivedate : Date, format: "2017-11-04" "2018-07-30" ...
## $ policyterminationdate : Date, format: "2018-11-04" "2019-07-30" ...
## $ policytermindicator : int 1 1 1 1 1 1 1 1 1 1 ...
## $ postfirmconstructionindicator : num 1 0 0 1 1 0 0 0 1 0 ...
## $ primaryresidenceindicator : num 1 0 0 1 1 0 1 1 0 1 ...
## $ propertystate : chr "LA" "LA" "LA" "LA" ...
## $ ratemethod : chr "7" "B" "7" "1" ...
## $ regularemergencyprogramindicator : chr "R" "R" "R" "R" ...
## $ reportedcity : chr "KAPLAN" "METAIRIE" "LULING" "GONZALES" ...
## $ smallbusinessindicatorbuilding : chr "N" "N" "N" "N" ...
## $ totalbuildinginsurancecoverage : int 250000 250000 100000 150000 250000 55000 120900 138000 31100 60600 ...
## $ totalcontentsinsurancecoverage : int 100000 21000 40000 0 100000 0 18900 30000 2700 6800 ...
## $ totalinsurancepremiumofthepolicy : int 348 975 260 971 348 525 1019 1036 1360 577 ...
## $ censustract11 : chr "22113950100" "22051021804" "22089062800" "22005030205" ...
## $ policyduration : num 365 365 365 365 365 365 365 365 365 365 ...
## $ totalcoverage : int 350000 271000 140000 150000 350000 55000 139800 168000 33800 67400 ...
## $ premiumfor1dollarcov : num 99.4 359.8 185.7 647.3 99.4 ...
## $ policyCost1dollarcov : num 129 524 410 795 129 ...
## $ floodzone_cat : chr "X" "A" "X" "A" ...
## $ SFHA : num 0 1 0 1 0 1 1 1 1 1 ...
## $ low_risk : num 1 0 1 0 1 0 0 0 0 0 ...
## $ construction_year : num 2016 1973 1969 2002 2004 ...
## $ house_age : num NA NA NA NA NA NA NA NA NA NA ...
## $ nobasement : num 1 1 1 1 1 1 1 1 1 1 ...
## $ coastal_state : num 1 1 1 1 1 1 1 1 1 1 ...
## $ PRP : num 1 0 1 0 1 0 0 0 0 0 ...
## $ premium_cat : chr "C,B,X & PRP" "A" "C,B,X & PRP" "A" ...
## $ zhvi : int 221800 372600 NA NA 771500 NA NA NA NA NA ...
## $ effectiveyear : num 2018 2019 2018 2018 2019 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "reportedzipcode" "effectivemonth"
LA_policies <- read_fst("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/NFIP/policies/policy_data_LA.fst",columns = c("reportedzipcode","SFHA","effectiveyear","houseofworshipindicator","nonprofitindicator"),as.data.table = TRUE)
str(LA_policies)
## Classes 'data.table' and 'data.frame': 5461768 obs. of 5 variables:
## $ reportedzipcode : chr "03102" "07002" "07007" "07037" ...
## $ SFHA : num 0 1 0 1 0 1 1 1 1 1 ...
## $ effectiveyear : num 2018 2019 2018 2018 2019 ...
## $ houseofworshipindicator: chr "N" "N" "N" "N" ...
## $ nonprofitindicator : chr "N" "N" "N" "N" ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "reportedzipcode"
no_of_policies <- LA_policies[,.(number_of_policies=.N),by=.(SFHA,effectiveyear)]
no_of_policies <- no_of_policies[order(no_of_policies$SFHA,no_of_policies$effectiveyear)]
print.data.frame(no_of_policies)
## SFHA effectiveyear number_of_policies
## 1 0 2009 115421
## 2 0 2010 215245
## 3 0 2011 223490
## 4 0 2012 218774
## 5 0 2013 214244
## 6 0 2014 207679
## 7 0 2015 198401
## 8 0 2016 227781
## 9 0 2017 286336
## 10 0 2018 315403
## 11 0 2019 301757
## 12 0 2020 21428
## 13 1 2009 173422
## 14 1 2010 303299
## 15 1 2011 303745
## 16 1 2012 303228
## 17 1 2013 300012
## 18 1 2014 296621
## 19 1 2015 293181
## 20 1 2016 270899
## 21 1 2017 243040
## 22 1 2018 222157
## 23 1 2019 197740
## 24 1 2020 8465