Introduction

The BARI Building Permits dataset is actually a few different distinct items. First, it’s a

We wanted to explore some of the temporal dynamics in the building permits data. For this we’re going to start with the individual permit-level dataset (Permits.Records.Geocoded.20180122.csv) rather than one of the geographic aggregated datasets.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(tidyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# importing the data ####
# permit_data <- read.csv("C:/Users/ux305/Desktop/School/Semester 4/BARI/Automation/Building permit/Permits.Records.Geocoded.20180122 (1).csv")
BARIdrive_path <- "/Users/justin/Google Drive/BARI/BARI Research Team Data Library/"
permit_data <- read.csv(paste0(BARIdrive_path,"Permits/Data/Permits.Records.Geocoded.20180122.csv"))


# tidy work
names(permit_data) <- tolower(names(permit_data))
permit_data <- permit_data %>%
      mutate(year_issued = lubridate::year(issued_date))
## Warning in strptime(xx, f <- "%Y-%m-%d %H:%M:%OS", tz = tz): unknown
## timezone 'zone/tz/2018c.1.0/zoneinfo/America/New_York'

What percent of building permits in 2017 were residential vs. commercial vs. anything else?

Our first question was what types of buildings are getting renovated/constructed/demolished in Boston - basically, what types of development are most common? And are those types changing over time?

#Q1
#different types of occupancy
table(permit_data$occupancy)
## 
##        1-2FAM 1-3FAM 1-4FAM 1-7FAM  1Unit  2unit  3unit  4unit  4Unit 
##   7996 109903  36255   9815   2061   6120    618    717    374      7 
##  5unit  6unit  6Unit  7More  7unit   Comm   COMM  Mixed  MIXED  Multi 
##    277    299      4   4781     76 108984    184  13540      2  35872 
##  Other  VacLd 
##  13389   3408

Looks like some odd categorizations of this occupancy variable that distinguishes between commercial and mixed, but then subdivides residential into the specific number of units within the building. Let’s clean that up a bit into broader categories of development.

permit_data$occupancy_old <- permit_data$occupancy
permit_data <- permit_data %>%
  # drop_na(occupancy) %>% #removing NAs of the occupancy column
  mutate(occupancy = gsub("Comm", "Commercial", occupancy)) %>% #changing the categories to commercial, residential, mixed and other
  mutate(occupancy = gsub("COMM", "Commercial", occupancy)) %>%
  mutate(occupancy = gsub("Mixed", "Mixed-used", occupancy)) %>%
  mutate(occupancy = gsub("MIXED", "Mixed-used", occupancy)) %>%
  mutate(occupancy = gsub("1-3FAM|1-4FAM|1-7FAM|1Unit|2unit|3unit|4unit|4Unit|5unit|6unit|7More|7unit|residential|VacLd|6Unit|Multi|1-2FAM", "Residential", occupancy)) #considering Multi, xFAM and xUnit categories also residential
  
permit_data$occupancy[permit_data$occupancy==""] <- "Missing" # this seems problematic but we're going to hold onto it for now
permit_data$occupancy[permit_data$occupancy==" "] <- "Missing" # this seems problematic but we're going to hold onto it for now

permit_yearsumm <- permit_data %>%
      group_by(year_issued, occupancy) %>%
      summarize(total=n())

(yearsumm_occupancy_plot <- ggplot(permit_yearsumm) + 
            geom_bar(aes(x=year_issued,weight=total,fill=occupancy)) + 
            scale_x_continuous("Year Permit Issued",breaks=c(2005:2017),limits=c(2010,2018)) + 
            scale_fill_brewer("Occupancy type",type="qual") + 
            scale_y_continuous("Count") +
            theme_bw() + 
            theme(axis.text.y = element_text(angle = 45))
)
## Warning: Removed 7 rows containing non-finite values (stat_count).

ggsave(plot = yearsumm_occupancy_plot,filename = "figures/yearsumm_occupancy.pdf",height=7,width=10)
## Warning: Removed 7 rows containing non-finite values (stat_count).

This shows that the blank occupancy type drastically increases in 2015 whereas it was negligible in 2014 and previously. That’s odd. Let’s look at it in percentage terms.

#percentage of commercial permits
(length(which(permit_data$occupancy == "Commercial"))/nrow(permit_data))*100
## [1] 30.77912
#percentage of Mixed-use permits
(length(which(permit_data$occupancy == "Mixed-used"))/nrow(permit_data))*100
## [1] 3.818068
#percentage of residential permits
(length(which(permit_data$occupancy == "Residential"))/nrow(permit_data))*100
## [1] 59.37347
#percentage of Other permits
(length(which(permit_data$occupancy == "Other"))/nrow(permit_data))*100
## [1] 3.774931
#percentage of unknown permits
(length(which(permit_data$occupancy != "Other" &  permit_data$occupancy != "Residential" & permit_data$occupancy != "Mixed-used" & permit_data$occupancy != "Commercial"))/nrow(permit_data))*100
## [1] 2.254414
permit_yearsumm_grand <- permit_yearsumm %>%
      group_by(year_issued) %>%
      summarize(grandtotal = sum(total))
permit_yearsumm <- merge(permit_yearsumm,permit_yearsumm_grand, by="year_issued",all=T)
permit_yearsumm$perc <- permit_yearsumm$total/permit_yearsumm$grandtotal

subset(permit_yearsumm,year_issued>2009 & year_issued<2018)
##    year_issued   occupancy total grandtotal        perc
## 8         2010  Commercial 10309      34688 0.297192113
## 9         2010     Missing   157      34688 0.004526061
## 10        2010  Mixed-used  1008      34688 0.029059041
## 11        2010       Other  1298      34688 0.037419280
## 12        2010 Residential 21916      34688 0.631803506
## 13        2011  Commercial 12845      38691 0.331989352
## 14        2011     Missing   246      38691 0.006358068
## 15        2011  Mixed-used  1191      38691 0.030782352
## 16        2011       Other  1299      38691 0.033573699
## 17        2011 Residential 23110      38691 0.597296529
## 18        2012  Commercial 12986      40572 0.320072957
## 19        2012     Missing   370      40572 0.009119590
## 20        2012  Mixed-used  1492      40572 0.036774130
## 21        2012       Other  1565      40572 0.038573400
## 22        2012 Residential 24159      40572 0.595459923
## 23        2013  Commercial 12980      42845 0.302952503
## 24        2013     Missing   243      42845 0.005671607
## 25        2013  Mixed-used  1811      42845 0.042268643
## 26        2013       Other  1809      42845 0.042221963
## 27        2013 Residential 26002      42845 0.606885284
## 28        2014  Commercial 14027      46107 0.304227124
## 29        2014     Missing   203      46107 0.004402802
## 30        2014  Mixed-used  1986      46107 0.043073720
## 31        2014       Other  1801      46107 0.039061314
## 32        2014 Residential 28090      46107 0.609235040
## 33        2015  Commercial 15156      48248 0.314127010
## 34        2015     Missing  1815      48248 0.037618140
## 35        2015  Mixed-used  1874      48248 0.038840988
## 36        2015       Other  1886      48248 0.039089703
## 37        2015 Residential 27517      48248 0.570324159
## 38        2016  Commercial 15076      49766 0.302937749
## 39        2016     Missing  2183      49766 0.043865290
## 40        2016  Mixed-used  2024      49766 0.040670337
## 41        2016       Other  1840      49766 0.036973034
## 42        2016 Residential 28643      49766 0.575553591
## 43        2017  Commercial 14707      50528 0.291066339
## 44        2017     Missing  2582      50528 0.051100380
## 45        2017  Mixed-used  2012      50528 0.039819506
## 46        2017       Other  1799      50528 0.035604022
## 47        2017 Residential 29428      50528 0.582409753
reshape2::dcast(subset(permit_yearsumm,year_issued>2009 & year_issued<2018),formula = occupancy ~ year_issued,value.var = "perc")
##     occupancy        2010        2011       2012        2013        2014
## 1  Commercial 0.297192113 0.331989352 0.32007296 0.302952503 0.304227124
## 2     Missing 0.004526061 0.006358068 0.00911959 0.005671607 0.004402802
## 3  Mixed-used 0.029059041 0.030782352 0.03677413 0.042268643 0.043073720
## 4       Other 0.037419280 0.033573699 0.03857340 0.042221963 0.039061314
## 5 Residential 0.631803506 0.597296529 0.59545992 0.606885284 0.609235040
##         2015       2016       2017
## 1 0.31412701 0.30293775 0.29106634
## 2 0.03761814 0.04386529 0.05110038
## 3 0.03884099 0.04067034 0.03981951
## 4 0.03908970 0.03697303 0.03560402
## 5 0.57032416 0.57555359 0.58240975

Confirming that proportion missing increased tenfold in 2015. Residential seed a 3pp decrease, others pretty even across the board.

#percentage plot for different types of permits
(prop.all <- ggplot(permit_data) + 
    geom_bar(aes(x = occupancy, fill = occupancy,y = (..count..)/sum(..count..))) + 
    scale_y_continuous('Proportion') + 
       scale_x_discrete("Occupancy Type") + 
       # scale_fill_discrete("Occupancy Type") +
      theme_bw() + 
       theme(legend.position="none")
 )

ggsave(plot=prop.all,"figures/prop_occupancytype.pdf",height=7,width=10)
(prop.all.yearly <- ggplot(permit_yearsumm) + 
    geom_bar(aes(x = year_issued, fill = occupancy,y = perc),stat="identity") + 
    scale_y_continuous('Proportion') + 
       scale_x_continuous("Year Issued",limits=c(2009.5,2017.5),breaks=c(2010:2017)) + 
       scale_fill_discrete("Occupancy Type") +
      theme_bw()
 )
## Warning: Removed 12 rows containing missing values (position_stack).

ggsave(plot=prop.all.yearly,"figures/prop_occupancytype_year.pdf",height=7,width=10)
## Warning: Removed 12 rows containing missing values (position_stack).

Definitely some issues with this occupancy type. But going to save those for later.

Development type

Next we’re going to look into the type of development that each building permit allows. The main categories for these that the BARI data team created are new construction, demolition, addition, and renovation. Let’s see how the data divides into these categories - and how this breakdown varies over time.

table(permit_data$newcon)
## 
##      0      1 
## 344820   9862
table(permit_data$addition)
## 
##      0      1 
## 328209  26473
table(permit_data$demo)
## 
##      0      1 
## 349095   5587
table(permit_data$reno)
## 
##      0      1 
##  53261 301421
table(permit_data$specialevents)
## 
##      0      1 
## 353741    941
permit_data$dev_type <- NA
permit_data$dev_type[permit_data$newcon==1] <- "New Construction"
permit_data$dev_type[permit_data$addition==1] <- "Addition"
permit_data$dev_type[permit_data$demo==1] <- "Demolition"
permit_data$dev_type[permit_data$reno==1] <- "Renovation"
permit_data$dev_type[permit_data$specialevents==1] <- "Special Events"

table(permit_data$dev_type)
## 
##         Addition       Demolition New Construction       Renovation 
##            26472             5587             9858           300485 
##   Special Events 
##              941

Okay, but lets see how that varies by year:

permit_type_yearsumm <- subset(permit_data,year_issued>2009) %>%
      group_by(year_issued, dev_type) %>%
      summarize(total=n())
      
(yearsumm_devtype_plot <- ggplot(permit_type_yearsumm) + 
            geom_bar(aes(x=year_issued,weight=total,fill=dev_type)) + 
            scale_x_continuous("Year Permit Issued",breaks=c(2005:2017),limits=c(2010,2018)) + 
            scale_fill_brewer("Development type",type="qual") + 
            scale_y_continuous("Count") +
            theme_bw() + 
            theme(axis.text.y = element_text(angle = 45))
)

ggsave(plot = yearsumm_devtype_plot,filename = "figures/yearsumm_devtype.pdf",height=7,width=10)

Notice some definitely weird dynamics with the New Construction category starting in 2015, which is odd.

permit_type_yearsumm_grand <- permit_type_yearsumm %>%
      group_by(year_issued) %>%
      summarize(grandtotal = sum(total))
permit_type_yearsumm <- merge(permit_type_yearsumm,permit_type_yearsumm_grand, by="year_issued",all=T)
permit_type_yearsumm$perc <- permit_type_yearsumm$total/permit_type_yearsumm$grandtotal

reshape2::dcast(subset(permit_type_yearsumm,year_issued>2009 & year_issued<2018),formula = dev_type ~ year_issued,value.var = "perc")
##           dev_type        2010        2011        2012        2013
## 1         Addition 0.060539668 0.072445788 0.074484866 0.086637881
## 2       Demolition 0.017152906 0.015869324 0.014887114 0.015521064
## 3 New Construction 0.005563884 0.007314363 0.007147787 0.010479636
## 4       Renovation 0.878257611 0.867695330 0.865794144 0.855409033
## 5   Special Events 0.002911670 0.002196893 0.003007000 0.003921111
## 6             <NA> 0.035574262 0.034478302 0.034679089 0.028031276
##         2014        2015        2016        2017
## 1 0.07538985 0.077951418 0.077763935 0.069901837
## 2 0.01674366 0.016995523 0.015050436 0.014249525
## 3 0.01110460 0.047069309 0.051320178 0.060817764
## 4 0.86531329 0.827060189 0.822067275 0.817942527
## 5 0.00247251 0.002445697 0.002210344 0.002275966
## 6 0.02897608 0.028477864 0.031587831 0.034812381

Definitely a large increase from ~1% to ~5% of permits going to new construction starting in 2015.

(yearsumm_devprop_plot <- ggplot(permit_type_yearsumm) + 
    geom_bar(aes(x = year_issued, fill = dev_type,y = perc),stat="identity") + 
    scale_y_continuous('Proportion') + 
       scale_x_continuous("Year Issued",limits=c(2009.5,2017.5),breaks=c(2010:2017)) + 
       scale_fill_discrete("Development Type") +
      theme_bw()
 )
## Warning: Removed 6 rows containing missing values (position_stack).

ggsave(plot=yearsumm_devprop_plot,"figures/prop_devtype_year.pdf",height=7,width=10)
## Warning: Removed 6 rows containing missing values (position_stack).

Definitely weird. Is this coming from how the variable is constructed? Right now, using the BARI team’s code, it looks like the newcon variable is created from a complicated logical statement with multiple {r} | statements:

# NEWCON = (!is.na(permits$DESCRIPTION) & 
#             (permits$DESCRIPTION=="Excavation Borings Test Pits" | 
#                permits$DESCRIPTION == "New construction" | 
#                permits$DESCRIPTION == "Erect" | 
#                permits$DESCRIPTION == "New construction")) | 
#   (!is.na(permits$PermitTypeDescr) & 
#      (permits$PermitTypeDescr=="Erect/New Construction" |
#         permits$PermitTypeDescr == "Driveway Excavation Permit" | 
#         permits$PermitTypeDescr == "Emergency Excavation Permit" | 
#         permits$PermitTypeDescr == "Excavation Permit" | 
#         permits$PermitTypeDescr == "Foundation Permit"))

That’s not great coding practice — would be better to have an {r} %in% statement with a vector of values instead.

# checking NEWCON variable with a different way of creating it:
permit_data$newcon2 <- NA
permit_data$newcon2[which(permit_data$description %in% c("Excavation Borings Test Pits","New construction","Erect"))] <- 1
permit_data$newcon2[which(permit_data$permittypedescr %in% c("Erect/New Construction","Excavation Permit","Foundation Permit"))] <- 1
sum(permit_data$newcon!=permit_data$newcon2,na.rm=T) # 0 discrepancies, so this isn't why there's an issue...
## [1] 0
# fixing to put excavation with addition:
permit_data$addition2 <- permit_data$addition
permit_data$addition2[which(permit_data$description %in% c("Excavation Borings Test Pits"))] <- 1
permit_data$addition2[which(permit_data$permittypedescr %in% c("Excavation Permit","Foundation Permit"))] <- 1

# let's look at underlying variables:
permittypedescr_yearly <- permit_data %>%
      group_by(year_issued,permittypedescr) %>%
      summarize(total=n())

permittypedescr_yearly_grand <- permittypedescr_yearly %>%
      group_by(year_issued) %>%
      summarize(grandtotal = sum(total))
permittypedescr_yearly <- merge(permittypedescr_yearly,permittypedescr_yearly_grand, by="year_issued",all=T)
permittypedescr_yearly$perc <- permittypedescr_yearly$total/permittypedescr_yearly$grandtotal

reshape2::dcast(subset(permittypedescr_yearly,year_issued>2009 & year_issued<2018),formula = permittypedescr ~ year_issued,value.var = "perc")
##                 permittypedescr         2010         2011         2012
## 1      Amendment to a Long Form 0.0042089483 0.0058928433 0.0073942621
## 2      Certificate of Occupancy 0.0356319188 0.0345558399 0.0366015972
## 3        Electrical Fire Alarms 0.0368138838 0.0499599390 0.0533372769
## 4        Electrical Low Voltage 0.0618657749 0.0804838334 0.0928472838
## 5             Electrical Permit 0.2273985240 0.2117546716 0.2042295179
## 6  Electrical Temporary Service 0.0144430351 0.0157917862 0.0149117618
## 7        Erect/New Construction 0.0020756458 0.0040060996 0.0042640245
## 8             Excavation Permit           NA           NA           NA
## 9             Foundation Permit 0.0001729705 0.0002067664 0.0001232377
## 10                   Gas Permit 0.1193496310 0.1081905353 0.1056886523
## 11  Long Form/Alteration Permit 0.0559847786 0.0653898840 0.0666962437
## 12              Plumbing Permit 0.1404808579 0.1399808741 0.1403923888
## 13       Short Form Bldg Permit 0.2986623616 0.2815900339 0.2705067534
## 14              Use of Premises 0.0029116697 0.0021968933 0.0030069999
##            2013         2014         2015         2016         2017
## 1  0.0077488622 0.0075693496 0.0068811142 0.0091829763 0.0073622546
## 2  0.0330960439 0.0336825211 0.0338874150 0.0373749146 0.0409871754
## 3  0.0545454545 0.0562821264 0.0522923230 0.0587549733 0.0594917669
## 4  0.0905823317 0.0941722515 0.0890606865 0.0907446851 0.0795598480
## 5  0.2029641732 0.2075172967 0.1989927044 0.2016838806 0.1968413553
## 6  0.0132104096 0.0191077277 0.0122699387 0.0116344492 0.0137547498
## 7  0.0044345898 0.0051185286 0.0047463107 0.0057468955 0.0056998100
## 8            NA           NA 0.0357942298 0.0401077041 0.0497348005
## 9  0.0002100595 0.0002602642 0.0002072625 0.0001406583 0.0001583281
## 10 0.1023223247 0.1070336391 0.0905944288 0.0905638388 0.0978665294
## 11 0.0782821800 0.0671698441 0.0701168960 0.0678575734 0.0615302407
## 12 0.1358618275 0.1382653393 0.1251658100 0.1293654302 0.1337674161
## 13 0.2728206325 0.2613486022 0.2775451832 0.2546316762 0.2509697593
## 14 0.0039211110 0.0024725096 0.0024456972 0.0022103444 0.0022759658

Well, this looks like it could be one reason why. Excavation permits don’t exist before 2015, but then are 3.5% of the data in 2015!

(yearsumm_typedescrprop_plot <- ggplot(permittypedescr_yearly) + 
    geom_bar(aes(x = year_issued, fill = permittypedescr,y = perc),stat="identity") + 
    scale_y_continuous('Proportion') + 
       scale_x_continuous("Year Issued",limits=c(2009.5,2017.5),breaks=c(2010:2017)) + 
       scale_fill_discrete("Type Description") +
      theme_bw()
 )
## Warning: Removed 23 rows containing missing values (position_stack).

ggsave(plot=yearsumm_typedescrprop_plot,"figures/prop_typedescr_year.pdf",height=7,width=10)
## Warning: Removed 23 rows containing missing values (position_stack).
# could it be because this was something else in the description field pre-2015?
permit_data$boring <- ifelse(permit_data$description=="Excavation  Borings Test Pits",1,0)
subset(permit_data,year_issued>2009) %>%
      group_by(year_issued,boring) %>%
      summarize(total = n()) %>%
      reshape2::dcast(formula = boring ~ year_issued,value.var = "total")
##   boring  2010  2011  2012  2013  2014  2015  2016  2017 2018
## 1      0 34559 38544 40410 42704 45950 48089 49610 50375 2179
## 2      1   129   147   162   141   157   159   156   153   12

Not that, since it looks pretty consistent across 2014-15, so it must be the PermitTypeDescr value of Excavation!

Residential Development: temporal variation

What percent of residential building permits were renovation/new building/demolition/etc.?

#looking only at the residential building permits
residential_data <- permit_data %>%
  filter(occupancy == "Residential")

res_yearly_summ <- subset(residential_data,year_issued>2009) %>%
      group_by(year_issued,occupancy_old,dev_type) %>%
      summarize(total=n())

res_reno_yearly_summ <- subset(res_yearly_summ,dev_type=="Renovation")

res_reno_yearly_grand <- res_reno_yearly_summ %>%
      group_by(year_issued) %>%
      summarize(grandtotal = sum(total))
res_reno_yearly_summ <- merge(res_reno_yearly_summ,res_reno_yearly_grand, by="year_issued",all=T)
res_reno_yearly_summ$perc <- res_reno_yearly_summ$total/res_reno_yearly_summ$grandtotal

reshape2::dcast(subset(res_reno_yearly_summ,year_issued>2014 & year_issued<2018),formula = occupancy_old ~ year_issued,value.var = "perc")
##   occupancy_old         2015         2016       2017
## 1        1-2FAM 5.713423e-01 5.612771e-01 0.54160431
## 2        1-3FAM 1.727024e-01 1.805184e-01 0.17925022
## 3        1-4FAM 6.168518e-02 4.730908e-02 0.04560760
## 4         7More 4.023821e-05 3.874617e-05         NA
## 5         Multi 1.834862e-01 1.961719e-01 0.21228674
## 6         VacLd 1.074360e-02 1.468480e-02 0.02125112

Well, that’s a mess over the years. Let’s see how this looks graphically:

(res_reno_summ_prop_plot <- ggplot(res_reno_yearly_summ) + 
    geom_bar(aes(x = year_issued, fill = occupancy_old,y = perc),stat="identity") + 
    scale_y_continuous('Proportion') + 
       scale_x_continuous("Year Issued",limits=c(2009.5,2017.5),breaks=c(2010:2017)) + 
       scale_fill_discrete("Occupancy Type") +
      theme_bw()
 )
## Warning: Removed 5 rows containing missing values (position_stack).

Pretty consistent in the last 3 years, but definitely a big shift in categories between 2012 and 2013.

ggsave(plot=res_reno_summ_prop_plot,filename="figures/res_reno_summ_prop_plot.pdf",height=7,width=10)
## Warning: Removed 5 rows containing missing values (position_stack).
(res_reno_summ_prop_plot2 <- ggplot(res_reno_yearly_summ) + 
    geom_bar(aes(x = year_issued, fill = occupancy_old,y = perc),stat="identity") + 
    scale_y_continuous('Proportion') + 
       scale_x_continuous("Year Issued",limits=c(2014.5,2017.5),breaks=c(2010:2017)) + 
       scale_fill_discrete("Occupancy Type") +
      theme_bw()
 )
## Warning: Removed 65 rows containing missing values (position_stack).

ggsave(plot=res_reno_summ_prop_plot2,filename="figures/res_reno_summ_prop_plot_1517.pdf",height=7,width=10)
## Warning: Removed 65 rows containing missing values (position_stack).

Residential Development: geographic variation

Now let’s see where this residential development has been over the last few years. To do this we’re going to use the geography-aggregated files from BARI.

library(tidycensus)
## Warning: package 'tidycensus' was built under R version 3.4.2
library(tigris) # to pull census shapefiles from the web API
## As of version 0.5.1, tigris does not cache downloaded data by default. To enable caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
## 
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
## 
##     plot
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(leaflet)
library(RColorBrewer) # color scales
library(classInt) # color scales
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
source('~/Dropbox (MIT)/Teaching/PS Methods Workshop/2014 Geospatial/functions.R')

permits_ct <- read.csv(paste0(BARIdrive_path,"Permits/Data/CT/Long/Permits.Ecometrics.CT.Long.csv"))
options(tigris_class = "sp",tigris_use_cache = T)
tracts <- tigris::tracts("MA","Suffolk",year = "2016")
towns <- tigris::county_subdivisions("MA","Suffolk",year = "2016")
boston <- towns[which(towns@data$NAME == "Boston"),]
boston_tracts <- tracts[boston,]

# add CT-level building data to CT shapefiles:
boston_tracts@data$newcon_dv_pp_2017 <- permits_ct$NEWCON_DV_PP.2017[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_pp_2017 <- permits_ct$NEWCON_PP.2017[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_count_2017 <- permits_ct$NEWCON_count.2017[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_dv_pp_2016 <- permits_ct$NEWCON_DV_PP.2016[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_pp_2016 <- permits_ct$NEWCON_PP.2016[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_count_2016 <- permits_ct$NEWCON_count.2016[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts <- boston_tracts[!is.na(boston_tracts@data$newcon_pp_2017),]

Now that the data is combined with geographic shapefiles, let’s make some simple visuals

var <- boston_tracts$newcon_pp_2017
nclr <- 5
plotclr <- brewer.pal(nclr,"Blues")
class <- classIntervals(var,nclr,style="quantile")
colcode <- findColours(class,plotclr)

# pdf("figures/newcon_pp_ct_2017.pdf")
# par(mar=c(2,2,4,2))
plot(boston_tracts,col=colcode,border="grey")
# title(main="Demographic change, pre-2015")
legend(x="bottomright",cex=1, fill=attr(colcode,"palette"), bty="n", legend=round.intervals(colcode,rnd=2), title="Tract Prop. of parcels\nwith new construction, quantiles",ncol=2,xpd=T)

# dev.off()

Now lets make some better maps:

## Checking rgeos availability: TRUE
## Warning: package 'rgdal' was built under R version 3.4.2
## rgdal: version: 1.2-13, (SVN revision 686)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-5
## 
## Attaching package: 'rgdal'
## The following object is masked from 'package:proj4':
## 
##     project
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
## 49 tiles needed, this may take a while (try a smaller zoom).
## Map from URL : http://tile.stamen.com/toner/13/2475/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3034.png
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
(b1 <- b0 + 
      geom_polygon(data=tracts_merged2,aes(x=long,y=lat,group=group,fill=cut_number(newcon_pp_2017,5)),color = "black",alpha=0.4) 
      + labs(fill="Tract prop. of parcels\nwith new construction, 2017") 
      + scale_fill_brewer(type="div",palette="RdBu")
      + theme(legend.position = c(0.85,0.15),plot.margin = unit(c(0.1,0.1,0.1,0.1),"cm"))
      )

ggsave(b1,filename = "figures/newcon_pp_ct_2017_gg.pdf",height=7,width=7)

Compare to 2016:

(b16 <- b0 + 
      geom_polygon(data=tracts_merged2,aes(x=long,y=lat,group=group,fill=cut_number(newcon_pp_2016,5)),color = "black",alpha=0.4) 
      + labs(fill="Tract prop. of parcels\nwith new construction, 2016") 
      + scale_fill_brewer(type="div",palette="RdBu")
      + theme(legend.position = c(0.85,0.15),plot.margin = unit(c(0.1,0.1,0.1,0.1),"cm"))
      )

ggsave(b16,filename = "figures/newcon_pp_ct_2016_gg.pdf",height=7,width=7)

Saina additional work here:

table(residential_data$description)
## 
##                                                      Addition 
##                           1467                           1298 
##             Annual Maintenance    Application to Correct a Vi 
##                            310                            409 
##                         Awning                 Awning Renewal 
##                             36                             10 
##                         Canopy                 Canopy Renewal 
##                            174                             12 
##            Capital Improvement                 Cellular Tower 
##                              0                             35 
##               Change Occupancy                 City of Boston 
##                           1700                           2171 
##             Commercial Parking          Demolition - Exterior 
##                             28                            562 
##          Demolition - Interior          Driveway Installation 
##                           2146                            309 
##                      Dumpsters                     Electrical 
##                              0                          37768 
##                          Erect  Excavation  Borings Test Pits 
##                           1801                            438 
##         Fast Track Application                        Fencing 
##                             53                              1 
##       Fencing Renovation < 6Ft        Fencing Renovation >6ft 
##                            679                             42 
##                     Fire Alarm   Fire Protection Sprinkler >9 
##                           7404                             39 
##      Fire Protection/Sprinkler     Flammable and/or Explosive 
##                            283                              1 
## From TimeMatters-PZ Conversion                         Garage 
##                            246                             95 
##                            Gas                        General 
##                          30471                             11 
##                     Generators                 Holiday Vendor 
##                            147                             33 
##              Industrial Boiler             Industrial Furnace 
##                             28                             12 
## Installation of Decorative Mat Installation of Floor Covering 
##                              1                              2 
## Installation of InteriorFinish Installation/Evaluation of Mat 
##                             13                              1 
##                     Insulation         Interior/Exterior Work 
##                           5354                          14540 
##                    Low Voltage                    Maintenance 
##                          10029                              3 
##                            New               New construction 
##                              1                            691 
##         No Record of Occupancy                          Other 
##                           1521                           6345 
##                Outside Seating                       Plumbing 
##                             14                          33691 
##  Prinvate Contractor-Emergency           Removal of structure 
##                              0                            198 
##        Renewal of Signs Permit         Renovations - Exterior 
##                             16                          10567 
##     Renovations -Interior  NSC                         Repair 
##                          17447                              1 
##            Residential Parking             Retractable Awning 
##                            221                             10 
##                        Roofing                        Service 
##                          10085                              3 
##                 Service Change                         Siding 
##                           1652                           2339 
##                          Signs                   Solar Panels 
##                             39                           4390 
##                  Special Event                 Special Events 
##                              0                            164 
##                        Staging      Subdivision combining lot 
##                              0                            302 
##      Summer Program Fast Track               Television Truck 
##                             63                              1 
##                       Temp COO  Temporary Change of Use & Occ 
##                              8                              6 
##           Temporary Enclosures              Temporary Service 
##                              0                            606 
##        Temporary Signs/Banners             Temporary Trailers 
##                              1                             37 
##                           Tent                         Trench 
##                              3                              3
#tidy work
residential_data <- residential_data %>%
  # drop_na(description) %>% #removing NAs of the description column
  mutate(description = gsub("Awning Renewal", "Awning", description)) %>%
  mutate(description = gsub("Retractable Awning", "Awning", description)) %>%
  mutate(description = gsub("Canopy Renewal", "Canopy", description)) %>%
  mutate(description = gsub("Demolition - Exterior", "Demolition", description)) %>%
  mutate(description = gsub("Demolition - Interior", "Demolition", description)) %>%
  mutate(description = gsub("Fencing Renovation < 6Ft", "Renovation", description)) %>%
  mutate(description = gsub("Fencing Renovation >6ft", "Renovation", description)) %>%
  mutate(description = gsub("Fire Protection Sprinkler >9", "Fire Protection/Sprinkler", description)) %>%
  mutate(description = gsub("New construction", "New", description)) %>%
  mutate(description = gsub("New", "New-construction", description)) %>%
  mutate(description = gsub("Renovations - Exterior", "Renovation", description)) %>%
  mutate(description = gsub("Service Change", "Service", description)) %>%
  mutate(description = gsub("Renovations -Interior  NSC", "Renovation", description))

#exploring a bit
#percentage of Renovation permits
(length(which(residential_data$description == "Renovation"))/nrow(residential_data))*100
## [1] 13.64519
#percentage of Demolition permits
(length(which(residential_data$description == "Demolition"))/nrow(residential_data))*100
## [1] 1.285929
#percentage of New construction permits
(length(which(residential_data$description == "New-construction"))/nrow(residential_data))*100
## [1] 0.3286053
residential_freq <- residential_data %>%
  group_by(description) %>%
  count %>%
  arrange(desc(n)) %>%
  filter(n > 600)


# ggplot(residential_freq , aes(x = description, y = n, fill = description)) +
#   geom_bar(stat = "identity") +
#   ylab("") +
#   xlab("") +
#   theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
#   scale_fill_manual(values=c("grey50", "grey50", "grey50", "grey50", "red", "grey50", "grey50", "grey50", "grey50", "grey50", "grey50", "grey50", "green", "grey50", "grey50", "grey50", "blue", "grey50", "grey50", "grey50", "grey50", "grey50"))

Q3. Where were the new housing units in Boston in 2017? Could map the geographic concentration of housing development

new_const <- residential_data %>%
  filter(description == "New-construction")

# bos_nhoods <- sf::read_sf("C:/Users/ux305/Desktop/School/Semester 3/Big-Data-for-Cities-fall17/data/Boston_Neighborhoods.geojson")

#aes(fill = newcon)


# ggplot(bos_nhoods) +
#   geom_sf() +
#   geom_count(data = new_const, aes(x = x, y = y, color = description))