1 Installing and Loading Packages

The following set of commands install R packages ‘fst’ and ‘data.table’. The data files are ‘fst’ files and once read the data is converted R ‘data.table’.

list.of.packages <- c("fst", "data.table","ggplot2","scales","rgdal")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

library(fst)
library(data.table)
library(ggplot2)
library(scales)
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-4, (SVN revision 833)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/dratnadiwakara2/Documents/R/R-3.6.1/library/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/dratnadiwakara2/Documents/R/R-3.6.1/library/rgdal/proj
##  Linking to sp version: 1.3-1
path = "Enter your folder path here"

2 Data Files

Policy data set is split in to 5 smaller datasets. These files can be downloaded by clicking on the following links

Claims data set is available to download here

3 Reading Policy Data

3.1 Reading First 1000 Rows

LA_policies <- read_fst(paste0(path,"policy_data_LA.fst"),from=1, to= 1000,as.data.table = TRUE)

3.2 Describing the data set

str(LA_policies)
## Classes 'data.table' and 'data.frame':   1000 obs. of  62 variables:
##  $ reportedzipcode                   : chr  "03102" "07002" "07007" "07037" ...
##  $ effectivemonth                    : num  2018 2018 2018 2018 2019 ...
##  $ agriculturestructureindicator     : chr  "N" "N" "N" "N" ...
##  $ basefloodelevation                : num  NA -3.5 NA 13 NA NA NA NA NA NA ...
##  $ basementenclosurecrawlspacetype   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ cancellationdateoffloodpolicy     : chr  "" "" "" "" ...
##  $ censustract                       : chr  "221139501" "220510218" "220890628" "220050302" ...
##  $ condominiumindicator              : chr  "N" "N" "N" "N" ...
##  $ construction                      : chr  "N" "N" "N" "N" ...
##  $ countycode                        : int  22113 22051 22089 22005 22103 22051 22077 NA 22009 22099 ...
##  $ crsdiscount                       : num  0 0.2 0 0.1 0 0.2 0 0.15 0 0 ...
##  $ deductibleamountinbuildingcoverage: chr  "F" "F" "1" "A" ...
##  $ deductibleamountincontentscoverage: chr  "F" "F" "1" "0" ...
##  $ elevatedbuildingindicator         : chr  "N" "N" "N" "N" ...
##  $ elevationcertificateindicator     : chr  "" "3" "" "3" ...
##  $ elevationdifference               : int  999 0 999 1 999 999 999 999 999 999 ...
##  $ federalpolicyfee                  : int  25 50 25 50 25 50 35 35 35 35 ...
##  $ floodzone                         : chr  "X" "AE" "X" "A" ...
##  $ hfiaasurcharge                    : int  25 250 250 25 25 250 0 0 0 0 ...
##  $ houseofworshipindicator           : chr  "N" "N" "N" "N" ...
##  $ latitude                          : num  30.1 30 29.9 30.3 30.3 29.9 NA NA NA NA ...
##  $ locationofcontents                : chr  "Lowest floor only above ground level (No basement/enclosure/crawlspace/subgrade crawlspace)" "Lowest floor above ground level and higher floors (No basement/enclosure/crawlspace/subgrade crawlspace)" "Lowest floor only above ground level (No basement/enclosure/crawlspace/subgrade crawlspace)" "" ...
##  $ longitude                         : num  -92.2 -90.2 -90.4 -91 -89.7 -90.2 NA NA NA NA ...
##  $ lowestadjacentgrade               : num  NA -3.9 NA 13.3 NA NA NA NA NA NA ...
##  $ lowestfloorelevation              : num  NA -3.9 NA 14.1 NA NA NA NA NA NA ...
##  $ nonprofitindicator                : chr  "N" "N" "N" "N" ...
##  $ numberoffloorsininsuredbuilding   : int  1 2 1 1 1 1 1 1 1 1 ...
##  $ obstructiontype                   : chr  "" "" "" "" ...
##  $ occupancytype                     : int  1 2 1 1 1 1 1 1 1 1 ...
##  $ originalconstructiondate          : Date, format: "2016-03-10" "1973-07-01" ...
##  $ originalnbdate                    : Date, format: "2017-11-04" "1997-07-30" ...
##  $ policycost                        : int  450 1421 574 1192 450 904 1054 1071 1395 612 ...
##  $ policycount                       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ policyeffectivedate               : Date, format: "2017-11-04" "2018-07-30" ...
##  $ policyterminationdate             : Date, format: "2018-11-04" "2019-07-30" ...
##  $ policytermindicator               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ postfirmconstructionindicator     : num  1 0 0 1 1 0 0 0 1 0 ...
##  $ primaryresidenceindicator         : num  1 0 0 1 1 0 1 1 0 1 ...
##  $ propertystate                     : chr  "LA" "LA" "LA" "LA" ...
##  $ ratemethod                        : chr  "7" "B" "7" "1" ...
##  $ regularemergencyprogramindicator  : chr  "R" "R" "R" "R" ...
##  $ reportedcity                      : chr  "KAPLAN" "METAIRIE" "LULING" "GONZALES" ...
##  $ smallbusinessindicatorbuilding    : chr  "N" "N" "N" "N" ...
##  $ totalbuildinginsurancecoverage    : int  250000 250000 100000 150000 250000 55000 120900 138000 31100 60600 ...
##  $ totalcontentsinsurancecoverage    : int  100000 21000 40000 0 100000 0 18900 30000 2700 6800 ...
##  $ totalinsurancepremiumofthepolicy  : int  348 975 260 971 348 525 1019 1036 1360 577 ...
##  $ censustract11                     : chr  "22113950100" "22051021804" "22089062800" "22005030205" ...
##  $ policyduration                    : num  365 365 365 365 365 365 365 365 365 365 ...
##  $ totalcoverage                     : int  350000 271000 140000 150000 350000 55000 139800 168000 33800 67400 ...
##  $ premiumfor1dollarcov              : num  99.4 359.8 185.7 647.3 99.4 ...
##  $ policyCost1dollarcov              : num  129 524 410 795 129 ...
##  $ floodzone_cat                     : chr  "X" "A" "X" "A" ...
##  $ SFHA                              : num  0 1 0 1 0 1 1 1 1 1 ...
##  $ low_risk                          : num  1 0 1 0 1 0 0 0 0 0 ...
##  $ construction_year                 : num  2016 1973 1969 2002 2004 ...
##  $ house_age                         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ nobasement                        : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ coastal_state                     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ PRP                               : num  1 0 1 0 1 0 0 0 0 0 ...
##  $ premium_cat                       : chr  "C,B,X & PRP" "A" "C,B,X & PRP" "A" ...
##  $ zhvi                              : int  221800 372600 NA NA 771500 NA NA NA NA NA ...
##  $ effectiveyear                     : num  2018 2019 2018 2018 2019 ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "sorted")= chr  "reportedzipcode" "effectivemonth"

3.3 Reading Selected Columns

LA_policies <- read_fst(paste0(path,"policy_data_LA.fst"),columns = c("reportedzipcode","SFHA","effectiveyear","houseofworshipindicator","nonprofitindicator"),as.data.table = TRUE)

str(LA_policies)
## Classes 'data.table' and 'data.frame':   5461768 obs. of  5 variables:
##  $ reportedzipcode        : chr  "03102" "07002" "07007" "07037" ...
##  $ SFHA                   : num  0 1 0 1 0 1 1 1 1 1 ...
##  $ effectiveyear          : num  2018 2019 2018 2018 2019 ...
##  $ houseofworshipindicator: chr  "N" "N" "N" "N" ...
##  $ nonprofitindicator     : chr  "N" "N" "N" "N" ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "sorted")= chr "reportedzipcode"

4 Number of Policies by Year and Flood Zone

no_of_policies <- LA_policies[,.(number_of_policies=.N),by=.(SFHA,effectiveyear)]

no_of_policies <- no_of_policies[order(no_of_policies$SFHA,no_of_policies$effectiveyear)]

print.data.frame(no_of_policies)
##    SFHA effectiveyear number_of_policies
## 1     0          2009             115421
## 2     0          2010             215245
## 3     0          2011             223490
## 4     0          2012             218774
## 5     0          2013             214244
## 6     0          2014             207679
## 7     0          2015             198401
## 8     0          2016             227781
## 9     0          2017             286336
## 10    0          2018             315403
## 11    0          2019             301757
## 12    0          2020              21428
## 13    1          2009             173422
## 14    1          2010             303299
## 15    1          2011             303745
## 16    1          2012             303228
## 17    1          2013             300012
## 18    1          2014             296621
## 19    1          2015             293181
## 20    1          2016             270899
## 21    1          2017             243040
## 22    1          2018             222157
## 23    1          2019             197740
## 24    1          2020               8465

5 Reading Complete Dataset

policies <- read_fst(paste0(path,"Policies_Nov2019.fst"),as.data.table = TRUE)

6 Descriptive Statistics

6.1 Plot Number of Policies

policies_ts <- policies[policies$effectiveyear %in% 2010:2019,.(count=.N/1e6),by=.(effectiveyear,premium_cat)]
policies_ts <- dcast(policies_ts,effectiveyear~premium_cat)
## Using 'count' as value column. Use 'value.var' to override
policies_ts[,c("NA"):=list(NULL)]
names(policies_ts) <- c("year","SFHA - A", "Non-SFHA and PRP","Non-SFHA and non-PRP","SFHA - V")

hr <- reshape2::melt(policies_ts,id.var="year")
hr$variable <- as.character(hr$variable)
hr <- hr[order(-hr$variable),]


ggplot(hr, aes(x = year, y = value, fill = variable)) + geom_area(position = 'stack')+theme_minimal()+xlab("")+ylab("Number of Policies (millions) ")+   scale_fill_manual(values=c("gray90","gray70","gray40","black")) +scale_y_continuous(labels=comma_format())+theme(legend.title = element_blank(),legend.position = "bottom")+scale_x_continuous(breaks = 2010:2019)

6.2 Total Strucuture Coverage

policies_ts <- policies[policies$effectiveyear %in% 2010:2019,.(coverage=sum(totalbuildinginsurancecoverage,na.rm = TRUE)/1e9),by=.(effectiveyear,premium_cat)]
policies_ts <- dcast(policies_ts,effectiveyear~premium_cat)
## Using 'coverage' as value column. Use 'value.var' to override
policies_ts[,c("NA"):=list(NULL)]
names(policies_ts) <- c("year","SFHA - A", "Non-SFHA and PRP","Non-SFHA and non-PRP","SFHA - V")

hr <- reshape2::melt(policies_ts,id.var="year")
hr$variable <- as.character(hr$variable)
hr <- hr[order(-hr$variable),]

ggplot(hr, aes(x = year, y = value, fill = variable)) + geom_area(position = 'stack')+theme_minimal()+xlab("")+ylab("Structure Coverage $ Billions")+   scale_fill_manual(values=c("gray90","gray70","gray40","black")) +scale_y_continuous(labels=comma_format())+theme(legend.title = element_blank(),legend.position = "bottom")+scale_x_continuous(breaks = 2010:2019)

6.3 Map of Policies by State

temp <- policies[policies$effectiveyear==2018]
temp[,state:=substr(temp$countycode,1,2)]


temp<-temp[,.(count=.N/100000),by=.(state)]
temp[,frac:=temp$count*100/sum(temp$count)]


us_state <- readOGR(paste0(path,"US States"),"cb_2014_us_state_20m")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US States", layer: "cb_2014_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
## Warning in readOGR(paste0(path, "US States"), "cb_2014_us_state_20m"): Z-
## dimension discarded
us_state <- fortify(us_state[!us_state$STATEFP %in% c("02","15","72"),],region="STATEFP")


t <- data.frame(us_state[,"id"],stringsAsFactors = FALSE)
names(t) <- "state"
t2 <- merge(t,temp,by="state",all.x=TRUE)

us_state <- cbind(us_state,t2)


gr <- ggplot()+
  geom_polygon(data = us_state,aes(x = long, y = lat, group = group,fill=frac),color="black")+
  theme_minimal()+
  theme(legend.position = "bottom",panel.grid = element_blank(),axis.text = element_blank())+
  xlab("")+
  ylab("")+
  labs(fill="Pct of Policies in 2018")+
  ggtitle("")+
  scale_fill_gradient(low = "gray90", high = "gray10")
gr