The following set of commands install R packages ‘fst’ and ‘data.table’. The data files are ‘fst’ files and once read the data is converted R ‘data.table’.
list.of.packages <- c("fst", "data.table","ggplot2","scales","rgdal")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
library(fst)
library(data.table)
library(ggplot2)
library(scales)
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-4, (SVN revision 833)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/dratnadiwakara2/Documents/R/R-3.6.1/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/dratnadiwakara2/Documents/R/R-3.6.1/library/rgdal/proj
## Linking to sp version: 1.3-1
path = "Enter your folder path here"
Policy data set is split in to 5 smaller datasets. These files can be downloaded by clicking on the following links
Claims data set is available to download here
LA_policies <- read_fst(paste0(path,"policy_data_LA.fst"),from=1, to= 1000,as.data.table = TRUE)
str(LA_policies)
## Classes 'data.table' and 'data.frame': 1000 obs. of 62 variables:
## $ reportedzipcode : chr "03102" "07002" "07007" "07037" ...
## $ effectivemonth : num 2018 2018 2018 2018 2019 ...
## $ agriculturestructureindicator : chr "N" "N" "N" "N" ...
## $ basefloodelevation : num NA -3.5 NA 13 NA NA NA NA NA NA ...
## $ basementenclosurecrawlspacetype : int 0 0 0 0 0 0 0 0 0 0 ...
## $ cancellationdateoffloodpolicy : chr "" "" "" "" ...
## $ censustract : chr "221139501" "220510218" "220890628" "220050302" ...
## $ condominiumindicator : chr "N" "N" "N" "N" ...
## $ construction : chr "N" "N" "N" "N" ...
## $ countycode : int 22113 22051 22089 22005 22103 22051 22077 NA 22009 22099 ...
## $ crsdiscount : num 0 0.2 0 0.1 0 0.2 0 0.15 0 0 ...
## $ deductibleamountinbuildingcoverage: chr "F" "F" "1" "A" ...
## $ deductibleamountincontentscoverage: chr "F" "F" "1" "0" ...
## $ elevatedbuildingindicator : chr "N" "N" "N" "N" ...
## $ elevationcertificateindicator : chr "" "3" "" "3" ...
## $ elevationdifference : int 999 0 999 1 999 999 999 999 999 999 ...
## $ federalpolicyfee : int 25 50 25 50 25 50 35 35 35 35 ...
## $ floodzone : chr "X" "AE" "X" "A" ...
## $ hfiaasurcharge : int 25 250 250 25 25 250 0 0 0 0 ...
## $ houseofworshipindicator : chr "N" "N" "N" "N" ...
## $ latitude : num 30.1 30 29.9 30.3 30.3 29.9 NA NA NA NA ...
## $ locationofcontents : chr "Lowest floor only above ground level (No basement/enclosure/crawlspace/subgrade crawlspace)" "Lowest floor above ground level and higher floors (No basement/enclosure/crawlspace/subgrade crawlspace)" "Lowest floor only above ground level (No basement/enclosure/crawlspace/subgrade crawlspace)" "" ...
## $ longitude : num -92.2 -90.2 -90.4 -91 -89.7 -90.2 NA NA NA NA ...
## $ lowestadjacentgrade : num NA -3.9 NA 13.3 NA NA NA NA NA NA ...
## $ lowestfloorelevation : num NA -3.9 NA 14.1 NA NA NA NA NA NA ...
## $ nonprofitindicator : chr "N" "N" "N" "N" ...
## $ numberoffloorsininsuredbuilding : int 1 2 1 1 1 1 1 1 1 1 ...
## $ obstructiontype : chr "" "" "" "" ...
## $ occupancytype : int 1 2 1 1 1 1 1 1 1 1 ...
## $ originalconstructiondate : Date, format: "2016-03-10" "1973-07-01" ...
## $ originalnbdate : Date, format: "2017-11-04" "1997-07-30" ...
## $ policycost : int 450 1421 574 1192 450 904 1054 1071 1395 612 ...
## $ policycount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ policyeffectivedate : Date, format: "2017-11-04" "2018-07-30" ...
## $ policyterminationdate : Date, format: "2018-11-04" "2019-07-30" ...
## $ policytermindicator : int 1 1 1 1 1 1 1 1 1 1 ...
## $ postfirmconstructionindicator : num 1 0 0 1 1 0 0 0 1 0 ...
## $ primaryresidenceindicator : num 1 0 0 1 1 0 1 1 0 1 ...
## $ propertystate : chr "LA" "LA" "LA" "LA" ...
## $ ratemethod : chr "7" "B" "7" "1" ...
## $ regularemergencyprogramindicator : chr "R" "R" "R" "R" ...
## $ reportedcity : chr "KAPLAN" "METAIRIE" "LULING" "GONZALES" ...
## $ smallbusinessindicatorbuilding : chr "N" "N" "N" "N" ...
## $ totalbuildinginsurancecoverage : int 250000 250000 100000 150000 250000 55000 120900 138000 31100 60600 ...
## $ totalcontentsinsurancecoverage : int 100000 21000 40000 0 100000 0 18900 30000 2700 6800 ...
## $ totalinsurancepremiumofthepolicy : int 348 975 260 971 348 525 1019 1036 1360 577 ...
## $ censustract11 : chr "22113950100" "22051021804" "22089062800" "22005030205" ...
## $ policyduration : num 365 365 365 365 365 365 365 365 365 365 ...
## $ totalcoverage : int 350000 271000 140000 150000 350000 55000 139800 168000 33800 67400 ...
## $ premiumfor1dollarcov : num 99.4 359.8 185.7 647.3 99.4 ...
## $ policyCost1dollarcov : num 129 524 410 795 129 ...
## $ floodzone_cat : chr "X" "A" "X" "A" ...
## $ SFHA : num 0 1 0 1 0 1 1 1 1 1 ...
## $ low_risk : num 1 0 1 0 1 0 0 0 0 0 ...
## $ construction_year : num 2016 1973 1969 2002 2004 ...
## $ house_age : num NA NA NA NA NA NA NA NA NA NA ...
## $ nobasement : num 1 1 1 1 1 1 1 1 1 1 ...
## $ coastal_state : num 1 1 1 1 1 1 1 1 1 1 ...
## $ PRP : num 1 0 1 0 1 0 0 0 0 0 ...
## $ premium_cat : chr "C,B,X & PRP" "A" "C,B,X & PRP" "A" ...
## $ zhvi : int 221800 372600 NA NA 771500 NA NA NA NA NA ...
## $ effectiveyear : num 2018 2019 2018 2018 2019 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "reportedzipcode" "effectivemonth"
LA_policies <- read_fst(paste0(path,"policy_data_LA.fst"),columns = c("reportedzipcode","SFHA","effectiveyear","houseofworshipindicator","nonprofitindicator"),as.data.table = TRUE)
str(LA_policies)
## Classes 'data.table' and 'data.frame': 5461768 obs. of 5 variables:
## $ reportedzipcode : chr "03102" "07002" "07007" "07037" ...
## $ SFHA : num 0 1 0 1 0 1 1 1 1 1 ...
## $ effectiveyear : num 2018 2019 2018 2018 2019 ...
## $ houseofworshipindicator: chr "N" "N" "N" "N" ...
## $ nonprofitindicator : chr "N" "N" "N" "N" ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "reportedzipcode"
no_of_policies <- LA_policies[,.(number_of_policies=.N),by=.(SFHA,effectiveyear)]
no_of_policies <- no_of_policies[order(no_of_policies$SFHA,no_of_policies$effectiveyear)]
print.data.frame(no_of_policies)
## SFHA effectiveyear number_of_policies
## 1 0 2009 115421
## 2 0 2010 215245
## 3 0 2011 223490
## 4 0 2012 218774
## 5 0 2013 214244
## 6 0 2014 207679
## 7 0 2015 198401
## 8 0 2016 227781
## 9 0 2017 286336
## 10 0 2018 315403
## 11 0 2019 301757
## 12 0 2020 21428
## 13 1 2009 173422
## 14 1 2010 303299
## 15 1 2011 303745
## 16 1 2012 303228
## 17 1 2013 300012
## 18 1 2014 296621
## 19 1 2015 293181
## 20 1 2016 270899
## 21 1 2017 243040
## 22 1 2018 222157
## 23 1 2019 197740
## 24 1 2020 8465
policies <- read_fst(paste0(path,"Policies_Nov2019.fst"),as.data.table = TRUE)
policies_ts <- policies[policies$effectiveyear %in% 2010:2019,.(count=.N/1e6),by=.(effectiveyear,premium_cat)]
policies_ts <- dcast(policies_ts,effectiveyear~premium_cat)
## Using 'count' as value column. Use 'value.var' to override
policies_ts[,c("NA"):=list(NULL)]
names(policies_ts) <- c("year","SFHA - A", "Non-SFHA and PRP","Non-SFHA and non-PRP","SFHA - V")
hr <- reshape2::melt(policies_ts,id.var="year")
hr$variable <- as.character(hr$variable)
hr <- hr[order(-hr$variable),]
ggplot(hr, aes(x = year, y = value, fill = variable)) + geom_area(position = 'stack')+theme_minimal()+xlab("")+ylab("Number of Policies (millions) ")+ scale_fill_manual(values=c("gray90","gray70","gray40","black")) +scale_y_continuous(labels=comma_format())+theme(legend.title = element_blank(),legend.position = "bottom")+scale_x_continuous(breaks = 2010:2019)
policies_ts <- policies[policies$effectiveyear %in% 2010:2019,.(coverage=sum(totalbuildinginsurancecoverage,na.rm = TRUE)/1e9),by=.(effectiveyear,premium_cat)]
policies_ts <- dcast(policies_ts,effectiveyear~premium_cat)
## Using 'coverage' as value column. Use 'value.var' to override
policies_ts[,c("NA"):=list(NULL)]
names(policies_ts) <- c("year","SFHA - A", "Non-SFHA and PRP","Non-SFHA and non-PRP","SFHA - V")
hr <- reshape2::melt(policies_ts,id.var="year")
hr$variable <- as.character(hr$variable)
hr <- hr[order(-hr$variable),]
ggplot(hr, aes(x = year, y = value, fill = variable)) + geom_area(position = 'stack')+theme_minimal()+xlab("")+ylab("Structure Coverage $ Billions")+ scale_fill_manual(values=c("gray90","gray70","gray40","black")) +scale_y_continuous(labels=comma_format())+theme(legend.title = element_blank(),legend.position = "bottom")+scale_x_continuous(breaks = 2010:2019)
temp <- policies[policies$effectiveyear==2018]
temp[,state:=substr(temp$countycode,1,2)]
temp<-temp[,.(count=.N/100000),by=.(state)]
temp[,frac:=temp$count*100/sum(temp$count)]
us_state <- readOGR(paste0(path,"US States"),"cb_2014_us_state_20m")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US States", layer: "cb_2014_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings: ALAND AWATER
## Warning in readOGR(paste0(path, "US States"), "cb_2014_us_state_20m"): Z-
## dimension discarded
us_state <- fortify(us_state[!us_state$STATEFP %in% c("02","15","72"),],region="STATEFP")
t <- data.frame(us_state[,"id"],stringsAsFactors = FALSE)
names(t) <- "state"
t2 <- merge(t,temp,by="state",all.x=TRUE)
us_state <- cbind(us_state,t2)
gr <- ggplot()+
geom_polygon(data = us_state,aes(x = long, y = lat, group = group,fill=frac),color="black")+
theme_minimal()+
theme(legend.position = "bottom",panel.grid = element_blank(),axis.text = element_blank())+
xlab("")+
ylab("")+
labs(fill="Pct of Policies in 2018")+
ggtitle("")+
scale_fill_gradient(low = "gray90", high = "gray10")
gr