The BARI Building Permits dataset is actually a few different distinct items. First, it’s a
We wanted to explore some of the temporal dynamics in the building permits data. For this we’re going to start with the individual permit-level dataset (Permits.Records.Geocoded.20180122.csv) rather than one of the geographic aggregated datasets.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(tidyr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# importing the data ####
# permit_data <- read.csv("C:/Users/ux305/Desktop/School/Semester 4/BARI/Automation/Building permit/Permits.Records.Geocoded.20180122 (1).csv")
BARIdrive_path <- "/Users/justin/Google Drive/BARI/BARI Research Team Data Library/"
permit_data <- read.csv(paste0(BARIdrive_path,"Permits/Data/Permits.Records.Geocoded.20180122.csv"))
# tidy work
names(permit_data) <- tolower(names(permit_data))
permit_data <- permit_data %>%
mutate(year_issued = lubridate::year(issued_date))
## Warning in strptime(xx, f <- "%Y-%m-%d %H:%M:%OS", tz = tz): unknown
## timezone 'zone/tz/2018c.1.0/zoneinfo/America/New_York'
Our first question was what types of buildings are getting renovated/constructed/demolished in Boston - basically, what types of development are most common? And are those types changing over time?
#Q1
#different types of occupancy
table(permit_data$occupancy)
##
## 1-2FAM 1-3FAM 1-4FAM 1-7FAM 1Unit 2unit 3unit 4unit 4Unit
## 7996 109903 36255 9815 2061 6120 618 717 374 7
## 5unit 6unit 6Unit 7More 7unit Comm COMM Mixed MIXED Multi
## 277 299 4 4781 76 108984 184 13540 2 35872
## Other VacLd
## 13389 3408
Looks like some odd categorizations of this occupancy variable that distinguishes between commercial and mixed, but then subdivides residential into the specific number of units within the building. Let’s clean that up a bit into broader categories of development.
permit_data$occupancy_old <- permit_data$occupancy
permit_data <- permit_data %>%
# drop_na(occupancy) %>% #removing NAs of the occupancy column
mutate(occupancy = gsub("Comm", "Commercial", occupancy)) %>% #changing the categories to commercial, residential, mixed and other
mutate(occupancy = gsub("COMM", "Commercial", occupancy)) %>%
mutate(occupancy = gsub("Mixed", "Mixed-used", occupancy)) %>%
mutate(occupancy = gsub("MIXED", "Mixed-used", occupancy)) %>%
mutate(occupancy = gsub("1-3FAM|1-4FAM|1-7FAM|1Unit|2unit|3unit|4unit|4Unit|5unit|6unit|7More|7unit|residential|VacLd|6Unit|Multi|1-2FAM", "Residential", occupancy)) #considering Multi, xFAM and xUnit categories also residential
permit_data$occupancy[permit_data$occupancy==""] <- "Missing" # this seems problematic but we're going to hold onto it for now
permit_data$occupancy[permit_data$occupancy==" "] <- "Missing" # this seems problematic but we're going to hold onto it for now
permit_yearsumm <- permit_data %>%
group_by(year_issued, occupancy) %>%
summarize(total=n())
(yearsumm_occupancy_plot <- ggplot(permit_yearsumm) +
geom_bar(aes(x=year_issued,weight=total,fill=occupancy)) +
scale_x_continuous("Year Permit Issued",breaks=c(2005:2017),limits=c(2010,2018)) +
scale_fill_brewer("Occupancy type",type="qual") +
scale_y_continuous("Count") +
theme_bw() +
theme(axis.text.y = element_text(angle = 45))
)
## Warning: Removed 7 rows containing non-finite values (stat_count).
ggsave(plot = yearsumm_occupancy_plot,filename = "figures/yearsumm_occupancy.pdf",height=7,width=10)
## Warning: Removed 7 rows containing non-finite values (stat_count).
This shows that the blank occupancy type drastically increases in 2015 whereas it was negligible in 2014 and previously. That’s odd. Let’s look at it in percentage terms.
#percentage of commercial permits
(length(which(permit_data$occupancy == "Commercial"))/nrow(permit_data))*100
## [1] 30.77912
#percentage of Mixed-use permits
(length(which(permit_data$occupancy == "Mixed-used"))/nrow(permit_data))*100
## [1] 3.818068
#percentage of residential permits
(length(which(permit_data$occupancy == "Residential"))/nrow(permit_data))*100
## [1] 59.37347
#percentage of Other permits
(length(which(permit_data$occupancy == "Other"))/nrow(permit_data))*100
## [1] 3.774931
#percentage of unknown permits
(length(which(permit_data$occupancy != "Other" & permit_data$occupancy != "Residential" & permit_data$occupancy != "Mixed-used" & permit_data$occupancy != "Commercial"))/nrow(permit_data))*100
## [1] 2.254414
permit_yearsumm_grand <- permit_yearsumm %>%
group_by(year_issued) %>%
summarize(grandtotal = sum(total))
permit_yearsumm <- merge(permit_yearsumm,permit_yearsumm_grand, by="year_issued",all=T)
permit_yearsumm$perc <- permit_yearsumm$total/permit_yearsumm$grandtotal
subset(permit_yearsumm,year_issued>2009 & year_issued<2018)
## year_issued occupancy total grandtotal perc
## 8 2010 Commercial 10309 34688 0.297192113
## 9 2010 Missing 157 34688 0.004526061
## 10 2010 Mixed-used 1008 34688 0.029059041
## 11 2010 Other 1298 34688 0.037419280
## 12 2010 Residential 21916 34688 0.631803506
## 13 2011 Commercial 12845 38691 0.331989352
## 14 2011 Missing 246 38691 0.006358068
## 15 2011 Mixed-used 1191 38691 0.030782352
## 16 2011 Other 1299 38691 0.033573699
## 17 2011 Residential 23110 38691 0.597296529
## 18 2012 Commercial 12986 40572 0.320072957
## 19 2012 Missing 370 40572 0.009119590
## 20 2012 Mixed-used 1492 40572 0.036774130
## 21 2012 Other 1565 40572 0.038573400
## 22 2012 Residential 24159 40572 0.595459923
## 23 2013 Commercial 12980 42845 0.302952503
## 24 2013 Missing 243 42845 0.005671607
## 25 2013 Mixed-used 1811 42845 0.042268643
## 26 2013 Other 1809 42845 0.042221963
## 27 2013 Residential 26002 42845 0.606885284
## 28 2014 Commercial 14027 46107 0.304227124
## 29 2014 Missing 203 46107 0.004402802
## 30 2014 Mixed-used 1986 46107 0.043073720
## 31 2014 Other 1801 46107 0.039061314
## 32 2014 Residential 28090 46107 0.609235040
## 33 2015 Commercial 15156 48248 0.314127010
## 34 2015 Missing 1815 48248 0.037618140
## 35 2015 Mixed-used 1874 48248 0.038840988
## 36 2015 Other 1886 48248 0.039089703
## 37 2015 Residential 27517 48248 0.570324159
## 38 2016 Commercial 15076 49766 0.302937749
## 39 2016 Missing 2183 49766 0.043865290
## 40 2016 Mixed-used 2024 49766 0.040670337
## 41 2016 Other 1840 49766 0.036973034
## 42 2016 Residential 28643 49766 0.575553591
## 43 2017 Commercial 14707 50528 0.291066339
## 44 2017 Missing 2582 50528 0.051100380
## 45 2017 Mixed-used 2012 50528 0.039819506
## 46 2017 Other 1799 50528 0.035604022
## 47 2017 Residential 29428 50528 0.582409753
reshape2::dcast(subset(permit_yearsumm,year_issued>2009 & year_issued<2018),formula = occupancy ~ year_issued,value.var = "perc")
## occupancy 2010 2011 2012 2013 2014
## 1 Commercial 0.297192113 0.331989352 0.32007296 0.302952503 0.304227124
## 2 Missing 0.004526061 0.006358068 0.00911959 0.005671607 0.004402802
## 3 Mixed-used 0.029059041 0.030782352 0.03677413 0.042268643 0.043073720
## 4 Other 0.037419280 0.033573699 0.03857340 0.042221963 0.039061314
## 5 Residential 0.631803506 0.597296529 0.59545992 0.606885284 0.609235040
## 2015 2016 2017
## 1 0.31412701 0.30293775 0.29106634
## 2 0.03761814 0.04386529 0.05110038
## 3 0.03884099 0.04067034 0.03981951
## 4 0.03908970 0.03697303 0.03560402
## 5 0.57032416 0.57555359 0.58240975
Confirming that proportion missing increased tenfold in 2015. Residential seed a 3pp decrease, others pretty even across the board.
#percentage plot for different types of permits
(prop.all <- ggplot(permit_data) +
geom_bar(aes(x = occupancy, fill = occupancy,y = (..count..)/sum(..count..))) +
scale_y_continuous('Proportion') +
scale_x_discrete("Occupancy Type") +
# scale_fill_discrete("Occupancy Type") +
theme_bw() +
theme(legend.position="none")
)
ggsave(plot=prop.all,"figures/prop_occupancytype.pdf",height=7,width=10)
(prop.all.yearly <- ggplot(permit_yearsumm) +
geom_bar(aes(x = year_issued, fill = occupancy,y = perc),stat="identity") +
scale_y_continuous('Proportion') +
scale_x_continuous("Year Issued",limits=c(2009.5,2017.5),breaks=c(2010:2017)) +
scale_fill_discrete("Occupancy Type") +
theme_bw()
)
## Warning: Removed 12 rows containing missing values (position_stack).
ggsave(plot=prop.all.yearly,"figures/prop_occupancytype_year.pdf",height=7,width=10)
## Warning: Removed 12 rows containing missing values (position_stack).
Definitely some issues with this occupancy type. But going to save those for later.
Next we’re going to look into the type of development that each building permit allows. The main categories for these that the BARI data team created are new construction, demolition, addition, and renovation. Let’s see how the data divides into these categories - and how this breakdown varies over time.
table(permit_data$newcon)
##
## 0 1
## 344820 9862
table(permit_data$addition)
##
## 0 1
## 328209 26473
table(permit_data$demo)
##
## 0 1
## 349095 5587
table(permit_data$reno)
##
## 0 1
## 53261 301421
table(permit_data$specialevents)
##
## 0 1
## 353741 941
permit_data$dev_type <- NA
permit_data$dev_type[permit_data$newcon==1] <- "New Construction"
permit_data$dev_type[permit_data$addition==1] <- "Addition"
permit_data$dev_type[permit_data$demo==1] <- "Demolition"
permit_data$dev_type[permit_data$reno==1] <- "Renovation"
permit_data$dev_type[permit_data$specialevents==1] <- "Special Events"
table(permit_data$dev_type)
##
## Addition Demolition New Construction Renovation
## 26472 5587 9858 300485
## Special Events
## 941
Okay, but lets see how that varies by year:
permit_type_yearsumm <- subset(permit_data,year_issued>2009) %>%
group_by(year_issued, dev_type) %>%
summarize(total=n())
(yearsumm_devtype_plot <- ggplot(permit_type_yearsumm) +
geom_bar(aes(x=year_issued,weight=total,fill=dev_type)) +
scale_x_continuous("Year Permit Issued",breaks=c(2005:2017),limits=c(2010,2018)) +
scale_fill_brewer("Development type",type="qual") +
scale_y_continuous("Count") +
theme_bw() +
theme(axis.text.y = element_text(angle = 45))
)
ggsave(plot = yearsumm_devtype_plot,filename = "figures/yearsumm_devtype.pdf",height=7,width=10)
Notice some definitely weird dynamics with the New Construction category starting in 2015, which is odd.
permit_type_yearsumm_grand <- permit_type_yearsumm %>%
group_by(year_issued) %>%
summarize(grandtotal = sum(total))
permit_type_yearsumm <- merge(permit_type_yearsumm,permit_type_yearsumm_grand, by="year_issued",all=T)
permit_type_yearsumm$perc <- permit_type_yearsumm$total/permit_type_yearsumm$grandtotal
reshape2::dcast(subset(permit_type_yearsumm,year_issued>2009 & year_issued<2018),formula = dev_type ~ year_issued,value.var = "perc")
## dev_type 2010 2011 2012 2013
## 1 Addition 0.060539668 0.072445788 0.074484866 0.086637881
## 2 Demolition 0.017152906 0.015869324 0.014887114 0.015521064
## 3 New Construction 0.005563884 0.007314363 0.007147787 0.010479636
## 4 Renovation 0.878257611 0.867695330 0.865794144 0.855409033
## 5 Special Events 0.002911670 0.002196893 0.003007000 0.003921111
## 6 <NA> 0.035574262 0.034478302 0.034679089 0.028031276
## 2014 2015 2016 2017
## 1 0.07538985 0.077951418 0.077763935 0.069901837
## 2 0.01674366 0.016995523 0.015050436 0.014249525
## 3 0.01110460 0.047069309 0.051320178 0.060817764
## 4 0.86531329 0.827060189 0.822067275 0.817942527
## 5 0.00247251 0.002445697 0.002210344 0.002275966
## 6 0.02897608 0.028477864 0.031587831 0.034812381
Definitely a large increase from ~1% to ~5% of permits going to new construction starting in 2015.
(yearsumm_devprop_plot <- ggplot(permit_type_yearsumm) +
geom_bar(aes(x = year_issued, fill = dev_type,y = perc),stat="identity") +
scale_y_continuous('Proportion') +
scale_x_continuous("Year Issued",limits=c(2009.5,2017.5),breaks=c(2010:2017)) +
scale_fill_discrete("Development Type") +
theme_bw()
)
## Warning: Removed 6 rows containing missing values (position_stack).
ggsave(plot=yearsumm_devprop_plot,"figures/prop_devtype_year.pdf",height=7,width=10)
## Warning: Removed 6 rows containing missing values (position_stack).
Definitely weird. Is this coming from how the variable is constructed? Right now, using the BARI team’s code, it looks like the newcon variable is created from a complicated logical statement with multiple {r} | statements:
# NEWCON = (!is.na(permits$DESCRIPTION) &
# (permits$DESCRIPTION=="Excavation Borings Test Pits" |
# permits$DESCRIPTION == "New construction" |
# permits$DESCRIPTION == "Erect" |
# permits$DESCRIPTION == "New construction")) |
# (!is.na(permits$PermitTypeDescr) &
# (permits$PermitTypeDescr=="Erect/New Construction" |
# permits$PermitTypeDescr == "Driveway Excavation Permit" |
# permits$PermitTypeDescr == "Emergency Excavation Permit" |
# permits$PermitTypeDescr == "Excavation Permit" |
# permits$PermitTypeDescr == "Foundation Permit"))
That’s not great coding practice — would be better to have an {r} %in% statement with a vector of values instead.
# checking NEWCON variable with a different way of creating it:
permit_data$newcon2 <- NA
permit_data$newcon2[which(permit_data$description %in% c("Excavation Borings Test Pits","New construction","Erect"))] <- 1
permit_data$newcon2[which(permit_data$permittypedescr %in% c("Erect/New Construction","Excavation Permit","Foundation Permit"))] <- 1
sum(permit_data$newcon!=permit_data$newcon2,na.rm=T) # 0 discrepancies, so this isn't why there's an issue...
## [1] 0
# fixing to put excavation with addition:
permit_data$addition2 <- permit_data$addition
permit_data$addition2[which(permit_data$description %in% c("Excavation Borings Test Pits"))] <- 1
permit_data$addition2[which(permit_data$permittypedescr %in% c("Excavation Permit","Foundation Permit"))] <- 1
# let's look at underlying variables:
permittypedescr_yearly <- permit_data %>%
group_by(year_issued,permittypedescr) %>%
summarize(total=n())
permittypedescr_yearly_grand <- permittypedescr_yearly %>%
group_by(year_issued) %>%
summarize(grandtotal = sum(total))
permittypedescr_yearly <- merge(permittypedescr_yearly,permittypedescr_yearly_grand, by="year_issued",all=T)
permittypedescr_yearly$perc <- permittypedescr_yearly$total/permittypedescr_yearly$grandtotal
reshape2::dcast(subset(permittypedescr_yearly,year_issued>2009 & year_issued<2018),formula = permittypedescr ~ year_issued,value.var = "perc")
## permittypedescr 2010 2011 2012
## 1 Amendment to a Long Form 0.0042089483 0.0058928433 0.0073942621
## 2 Certificate of Occupancy 0.0356319188 0.0345558399 0.0366015972
## 3 Electrical Fire Alarms 0.0368138838 0.0499599390 0.0533372769
## 4 Electrical Low Voltage 0.0618657749 0.0804838334 0.0928472838
## 5 Electrical Permit 0.2273985240 0.2117546716 0.2042295179
## 6 Electrical Temporary Service 0.0144430351 0.0157917862 0.0149117618
## 7 Erect/New Construction 0.0020756458 0.0040060996 0.0042640245
## 8 Excavation Permit NA NA NA
## 9 Foundation Permit 0.0001729705 0.0002067664 0.0001232377
## 10 Gas Permit 0.1193496310 0.1081905353 0.1056886523
## 11 Long Form/Alteration Permit 0.0559847786 0.0653898840 0.0666962437
## 12 Plumbing Permit 0.1404808579 0.1399808741 0.1403923888
## 13 Short Form Bldg Permit 0.2986623616 0.2815900339 0.2705067534
## 14 Use of Premises 0.0029116697 0.0021968933 0.0030069999
## 2013 2014 2015 2016 2017
## 1 0.0077488622 0.0075693496 0.0068811142 0.0091829763 0.0073622546
## 2 0.0330960439 0.0336825211 0.0338874150 0.0373749146 0.0409871754
## 3 0.0545454545 0.0562821264 0.0522923230 0.0587549733 0.0594917669
## 4 0.0905823317 0.0941722515 0.0890606865 0.0907446851 0.0795598480
## 5 0.2029641732 0.2075172967 0.1989927044 0.2016838806 0.1968413553
## 6 0.0132104096 0.0191077277 0.0122699387 0.0116344492 0.0137547498
## 7 0.0044345898 0.0051185286 0.0047463107 0.0057468955 0.0056998100
## 8 NA NA 0.0357942298 0.0401077041 0.0497348005
## 9 0.0002100595 0.0002602642 0.0002072625 0.0001406583 0.0001583281
## 10 0.1023223247 0.1070336391 0.0905944288 0.0905638388 0.0978665294
## 11 0.0782821800 0.0671698441 0.0701168960 0.0678575734 0.0615302407
## 12 0.1358618275 0.1382653393 0.1251658100 0.1293654302 0.1337674161
## 13 0.2728206325 0.2613486022 0.2775451832 0.2546316762 0.2509697593
## 14 0.0039211110 0.0024725096 0.0024456972 0.0022103444 0.0022759658
Well, this looks like it could be one reason why. Excavation permits don’t exist before 2015, but then are 3.5% of the data in 2015!
(yearsumm_typedescrprop_plot <- ggplot(permittypedescr_yearly) +
geom_bar(aes(x = year_issued, fill = permittypedescr,y = perc),stat="identity") +
scale_y_continuous('Proportion') +
scale_x_continuous("Year Issued",limits=c(2009.5,2017.5),breaks=c(2010:2017)) +
scale_fill_discrete("Type Description") +
theme_bw()
)
## Warning: Removed 23 rows containing missing values (position_stack).
ggsave(plot=yearsumm_typedescrprop_plot,"figures/prop_typedescr_year.pdf",height=7,width=10)
## Warning: Removed 23 rows containing missing values (position_stack).
# could it be because this was something else in the description field pre-2015?
permit_data$boring <- ifelse(permit_data$description=="Excavation Borings Test Pits",1,0)
subset(permit_data,year_issued>2009) %>%
group_by(year_issued,boring) %>%
summarize(total = n()) %>%
reshape2::dcast(formula = boring ~ year_issued,value.var = "total")
## boring 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 1 0 34559 38544 40410 42704 45950 48089 49610 50375 2179
## 2 1 129 147 162 141 157 159 156 153 12
Not that, since it looks pretty consistent across 2014-15, so it must be the PermitTypeDescr value of Excavation!
What percent of residential building permits were renovation/new building/demolition/etc.?
#looking only at the residential building permits
residential_data <- permit_data %>%
filter(occupancy == "Residential")
res_yearly_summ <- subset(residential_data,year_issued>2009) %>%
group_by(year_issued,occupancy_old,dev_type) %>%
summarize(total=n())
res_reno_yearly_summ <- subset(res_yearly_summ,dev_type=="Renovation")
res_reno_yearly_grand <- res_reno_yearly_summ %>%
group_by(year_issued) %>%
summarize(grandtotal = sum(total))
res_reno_yearly_summ <- merge(res_reno_yearly_summ,res_reno_yearly_grand, by="year_issued",all=T)
res_reno_yearly_summ$perc <- res_reno_yearly_summ$total/res_reno_yearly_summ$grandtotal
reshape2::dcast(subset(res_reno_yearly_summ,year_issued>2014 & year_issued<2018),formula = occupancy_old ~ year_issued,value.var = "perc")
## occupancy_old 2015 2016 2017
## 1 1-2FAM 5.713423e-01 5.612771e-01 0.54160431
## 2 1-3FAM 1.727024e-01 1.805184e-01 0.17925022
## 3 1-4FAM 6.168518e-02 4.730908e-02 0.04560760
## 4 7More 4.023821e-05 3.874617e-05 NA
## 5 Multi 1.834862e-01 1.961719e-01 0.21228674
## 6 VacLd 1.074360e-02 1.468480e-02 0.02125112
Well, that’s a mess over the years. Let’s see how this looks graphically:
(res_reno_summ_prop_plot <- ggplot(res_reno_yearly_summ) +
geom_bar(aes(x = year_issued, fill = occupancy_old,y = perc),stat="identity") +
scale_y_continuous('Proportion') +
scale_x_continuous("Year Issued",limits=c(2009.5,2017.5),breaks=c(2010:2017)) +
scale_fill_discrete("Occupancy Type") +
theme_bw()
)
## Warning: Removed 5 rows containing missing values (position_stack).
Pretty consistent in the last 3 years, but definitely a big shift in categories between 2012 and 2013.
ggsave(plot=res_reno_summ_prop_plot,filename="figures/res_reno_summ_prop_plot.pdf",height=7,width=10)
## Warning: Removed 5 rows containing missing values (position_stack).
(res_reno_summ_prop_plot2 <- ggplot(res_reno_yearly_summ) +
geom_bar(aes(x = year_issued, fill = occupancy_old,y = perc),stat="identity") +
scale_y_continuous('Proportion') +
scale_x_continuous("Year Issued",limits=c(2014.5,2017.5),breaks=c(2010:2017)) +
scale_fill_discrete("Occupancy Type") +
theme_bw()
)
## Warning: Removed 65 rows containing missing values (position_stack).
ggsave(plot=res_reno_summ_prop_plot2,filename="figures/res_reno_summ_prop_plot_1517.pdf",height=7,width=10)
## Warning: Removed 65 rows containing missing values (position_stack).
Now let’s see where this residential development has been over the last few years. To do this we’re going to use the geography-aggregated files from BARI.
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 3.4.2
library(tigris) # to pull census shapefiles from the web API
## As of version 0.5.1, tigris does not cache downloaded data by default. To enable caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
##
## plot
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(leaflet)
library(RColorBrewer) # color scales
library(classInt) # color scales
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
source('~/Dropbox (MIT)/Teaching/PS Methods Workshop/2014 Geospatial/functions.R')
permits_ct <- read.csv(paste0(BARIdrive_path,"Permits/Data/CT/Long/Permits.Ecometrics.CT.Long.csv"))
options(tigris_class = "sp",tigris_use_cache = T)
tracts <- tigris::tracts("MA","Suffolk",year = "2016")
towns <- tigris::county_subdivisions("MA","Suffolk",year = "2016")
boston <- towns[which(towns@data$NAME == "Boston"),]
boston_tracts <- tracts[boston,]
# add CT-level building data to CT shapefiles:
boston_tracts@data$newcon_dv_pp_2017 <- permits_ct$NEWCON_DV_PP.2017[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_pp_2017 <- permits_ct$NEWCON_PP.2017[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_count_2017 <- permits_ct$NEWCON_count.2017[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_dv_pp_2016 <- permits_ct$NEWCON_DV_PP.2016[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_pp_2016 <- permits_ct$NEWCON_PP.2016[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts@data$newcon_count_2016 <- permits_ct$NEWCON_count.2016[match(boston_tracts@data$GEOID,permits_ct$CT_ID_10)]
boston_tracts <- boston_tracts[!is.na(boston_tracts@data$newcon_pp_2017),]
Now that the data is combined with geographic shapefiles, let’s make some simple visuals
var <- boston_tracts$newcon_pp_2017
nclr <- 5
plotclr <- brewer.pal(nclr,"Blues")
class <- classIntervals(var,nclr,style="quantile")
colcode <- findColours(class,plotclr)
# pdf("figures/newcon_pp_ct_2017.pdf")
# par(mar=c(2,2,4,2))
plot(boston_tracts,col=colcode,border="grey")
# title(main="Demographic change, pre-2015")
legend(x="bottomright",cex=1, fill=attr(colcode,"palette"), bty="n", legend=round.intervals(colcode,rnd=2), title="Tract Prop. of parcels\nwith new construction, quantiles",ncol=2,xpd=T)
# dev.off()
Now lets make some better maps:
## Checking rgeos availability: TRUE
## Warning: package 'rgdal' was built under R version 3.4.2
## rgdal: version: 1.2-13, (SVN revision 686)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/proj
## Linking to sp version: 1.2-5
##
## Attaching package: 'rgdal'
## The following object is masked from 'package:proj4':
##
## project
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
## 49 tiles needed, this may take a while (try a smaller zoom).
## Map from URL : http://tile.stamen.com/toner/13/2475/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3028.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3029.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3030.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3031.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3032.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3033.png
## Map from URL : http://tile.stamen.com/toner/13/2475/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2476/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2477/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2478/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2479/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2480/3034.png
## Map from URL : http://tile.stamen.com/toner/13/2481/3034.png
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
(b1 <- b0 +
geom_polygon(data=tracts_merged2,aes(x=long,y=lat,group=group,fill=cut_number(newcon_pp_2017,5)),color = "black",alpha=0.4)
+ labs(fill="Tract prop. of parcels\nwith new construction, 2017")
+ scale_fill_brewer(type="div",palette="RdBu")
+ theme(legend.position = c(0.85,0.15),plot.margin = unit(c(0.1,0.1,0.1,0.1),"cm"))
)
ggsave(b1,filename = "figures/newcon_pp_ct_2017_gg.pdf",height=7,width=7)
Compare to 2016:
(b16 <- b0 +
geom_polygon(data=tracts_merged2,aes(x=long,y=lat,group=group,fill=cut_number(newcon_pp_2016,5)),color = "black",alpha=0.4)
+ labs(fill="Tract prop. of parcels\nwith new construction, 2016")
+ scale_fill_brewer(type="div",palette="RdBu")
+ theme(legend.position = c(0.85,0.15),plot.margin = unit(c(0.1,0.1,0.1,0.1),"cm"))
)
ggsave(b16,filename = "figures/newcon_pp_ct_2016_gg.pdf",height=7,width=7)
Saina additional work here:
table(residential_data$description)
##
## Addition
## 1467 1298
## Annual Maintenance Application to Correct a Vi
## 310 409
## Awning Awning Renewal
## 36 10
## Canopy Canopy Renewal
## 174 12
## Capital Improvement Cellular Tower
## 0 35
## Change Occupancy City of Boston
## 1700 2171
## Commercial Parking Demolition - Exterior
## 28 562
## Demolition - Interior Driveway Installation
## 2146 309
## Dumpsters Electrical
## 0 37768
## Erect Excavation Borings Test Pits
## 1801 438
## Fast Track Application Fencing
## 53 1
## Fencing Renovation < 6Ft Fencing Renovation >6ft
## 679 42
## Fire Alarm Fire Protection Sprinkler >9
## 7404 39
## Fire Protection/Sprinkler Flammable and/or Explosive
## 283 1
## From TimeMatters-PZ Conversion Garage
## 246 95
## Gas General
## 30471 11
## Generators Holiday Vendor
## 147 33
## Industrial Boiler Industrial Furnace
## 28 12
## Installation of Decorative Mat Installation of Floor Covering
## 1 2
## Installation of InteriorFinish Installation/Evaluation of Mat
## 13 1
## Insulation Interior/Exterior Work
## 5354 14540
## Low Voltage Maintenance
## 10029 3
## New New construction
## 1 691
## No Record of Occupancy Other
## 1521 6345
## Outside Seating Plumbing
## 14 33691
## Prinvate Contractor-Emergency Removal of structure
## 0 198
## Renewal of Signs Permit Renovations - Exterior
## 16 10567
## Renovations -Interior NSC Repair
## 17447 1
## Residential Parking Retractable Awning
## 221 10
## Roofing Service
## 10085 3
## Service Change Siding
## 1652 2339
## Signs Solar Panels
## 39 4390
## Special Event Special Events
## 0 164
## Staging Subdivision combining lot
## 0 302
## Summer Program Fast Track Television Truck
## 63 1
## Temp COO Temporary Change of Use & Occ
## 8 6
## Temporary Enclosures Temporary Service
## 0 606
## Temporary Signs/Banners Temporary Trailers
## 1 37
## Tent Trench
## 3 3
#tidy work
residential_data <- residential_data %>%
# drop_na(description) %>% #removing NAs of the description column
mutate(description = gsub("Awning Renewal", "Awning", description)) %>%
mutate(description = gsub("Retractable Awning", "Awning", description)) %>%
mutate(description = gsub("Canopy Renewal", "Canopy", description)) %>%
mutate(description = gsub("Demolition - Exterior", "Demolition", description)) %>%
mutate(description = gsub("Demolition - Interior", "Demolition", description)) %>%
mutate(description = gsub("Fencing Renovation < 6Ft", "Renovation", description)) %>%
mutate(description = gsub("Fencing Renovation >6ft", "Renovation", description)) %>%
mutate(description = gsub("Fire Protection Sprinkler >9", "Fire Protection/Sprinkler", description)) %>%
mutate(description = gsub("New construction", "New", description)) %>%
mutate(description = gsub("New", "New-construction", description)) %>%
mutate(description = gsub("Renovations - Exterior", "Renovation", description)) %>%
mutate(description = gsub("Service Change", "Service", description)) %>%
mutate(description = gsub("Renovations -Interior NSC", "Renovation", description))
#exploring a bit
#percentage of Renovation permits
(length(which(residential_data$description == "Renovation"))/nrow(residential_data))*100
## [1] 13.64519
#percentage of Demolition permits
(length(which(residential_data$description == "Demolition"))/nrow(residential_data))*100
## [1] 1.285929
#percentage of New construction permits
(length(which(residential_data$description == "New-construction"))/nrow(residential_data))*100
## [1] 0.3286053
residential_freq <- residential_data %>%
group_by(description) %>%
count %>%
arrange(desc(n)) %>%
filter(n > 600)
# ggplot(residential_freq , aes(x = description, y = n, fill = description)) +
# geom_bar(stat = "identity") +
# ylab("") +
# xlab("") +
# theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
# scale_fill_manual(values=c("grey50", "grey50", "grey50", "grey50", "red", "grey50", "grey50", "grey50", "grey50", "grey50", "grey50", "grey50", "green", "grey50", "grey50", "grey50", "blue", "grey50", "grey50", "grey50", "grey50", "grey50"))
Q3. Where were the new housing units in Boston in 2017? Could map the geographic concentration of housing development
new_const <- residential_data %>%
filter(description == "New-construction")
# bos_nhoods <- sf::read_sf("C:/Users/ux305/Desktop/School/Semester 3/Big-Data-for-Cities-fall17/data/Boston_Neighborhoods.geojson")
#aes(fill = newcon)
# ggplot(bos_nhoods) +
# geom_sf() +
# geom_count(data = new_const, aes(x = x, y = y, color = description))