install.packages(‘ggplot2’)
library(ggplot2)
# read file locally due to large file size
bk <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/BK2017V11.csv", header = TRUE)
br <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/BX2017V11.csv", header = TRUE)
mn <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/MN2017V11.csv", header = TRUE)
qn <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/QN2017V11.csv", header = TRUE)
si <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/SI2017V11.csv", header = TRUE)
# Check if the dataset
head(bk)
## Borough Block Lot CD CT2010 CB2010 SchoolDist Council ZipCode FireComp
## 1 BK 1 1 302 21 NA 13 33 11201 L118
## 2 BK 1 50 302 21 2000 13 33 11201 L118
## 3 BK 1 7501 302 21 2000 13 33 11201 L118
## 4 BK 3 1 302 21 3002 13 33 11201 L118
## 5 BK 3 5 302 21 NA 13 33 11201 L118
## 6 BK 3 35 302 21 NA 13 33 11201 L118
## PolicePrct HealthCenterDistrict HealthArea SanitBoro SanitDistrict
## 1 84 36 1000 3 2
## 2 84 36 1000 3 2
## 3 84 36 1000 1 2
## 4 84 36 1000 3 2
## 5 84 36 1000 3 2
## 6 84 36 1000 3 2
## SanitSub Address ZoneDist1 ZoneDist2 ZoneDist3 ZoneDist4 Overlay1
## 1 1B JOHN STREET M3-1 M1-4/R8A
## 2 1B 10 JAY STREET M1-4/R8A M3-1
## 3 1 JOHN STREET M3-1
## 4 1B JAY STREET M3-1
## 5 1B JAY STREET M3-1
## 6 1B GOLD STREET M3-1
## Overlay2 SPDist1 SPDist2 SPDist3 LtdHeight SplitZone BldgClass LandUse
## 1 MX-2 Y V1 11
## 2 MX-2 Y O6 5
## 3 N RM 4
## 4 N U4 7
## 5 N T2 7
## 6 N T2 7
## Easements OwnerType OwnerName LotArea BldgArea ComArea
## 1 1 P BROOKLYN BRIDGE PARK 151930 0 0
## 2 0 SAFDI PLAZA REALTY IN 19682 163894 163894
## 3 1 0 102534 4686
## 4 1 CONSOLIDED EDISON CO/ 387060 0 0
## 5 0 C NYC DSBS 6384 0 0
## 6 0 C NYC DSBS 0 0 0
## ResArea OfficeArea RetailArea GarageArea StrgeArea FactryArea OtherArea
## 1 0 0 0 0 0 0 0
## 2 0 147505 16389 0 0 0 0
## 3 97848 0 4686 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## AreaSource NumBldgs NumFloors UnitsRes UnitsTotal LotFront LotDepth
## 1 4 0 0 0 0 206.25 337.42
## 2 2 1 10 0 2 109.08 207.25
## 3 7 1 12 42 44 0.00 0.00
## 4 7 15 0 0 0 1051.00 469.00
## 5 7 0 0 0 0 0.00 0.00
## 6 7 0 0 0 0 0.00 0.00
## BldgFront BldgDepth Ext ProxCode IrrLotCode LotType BsmtCode AssessLand
## 1 0 0 0 Y 5 5 1371105
## 2 88 195 1 Y 3 0 834300
## 3 0 0 0 N 0 5 389694
## 4 0 0 0 Y 5 5 0
## 5 0 0 0 N 8 5 21600
## 6 0 0 0 N 5 5 1854
## AssessTot ExemptLand ExemptTot YearBuilt YearAlter1 YearAlter2
## 1 1371105 0 0 0 0 0
## 2 10156950 0 0 1920 1994 2015
## 3 6603514 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 21600 21600 21600 0 0 0
## 6 1854 1854 1854 0 0 0
## HistDist Landmark BuiltFAR ResidFAR CommFAR FacilFAR
## 1 0.00 0.0 2 0.0
## 2 DUMBO Historic District 8.33 5.4 2 6.5
## 3 0.00 0.0 2 0.0
## 4 0.00 0.0 2 0.0
## 5 0.00 0.0 2 0.0
## 6 0.00 0.0 2 0.0
## BoroCode BBL CondoNo Tract2010 XCoord YCoord ZoneMap ZMCode
## 1 3 3000010001 0 21 NA NA 12d
## 2 3 3000010050 0 21 987838 195989 12d
## 3 3 3000017501 3819 21 987624 195992 12d
## 4 3 3000030001 0 21 988568 196151 12d
## 5 3 3000030005 0 21 NA NA 12d
## 6 3 3000030035 0 21 NA NA
## Sanborn TaxMap EDesigNum APPBBL APPDate PLUTOMapID FIRM07_FLAG
## 1 302 007 30101 3000010001 11/26/2013 1 1
## 2 302 007 30101 E-231 0 1 1
## 3 302 007 30101 3000010002 3/4/2016 1 1
## 4 302 007 30101 0 1 1
## 5 302 007 30101 0 4 1
## 6 302 007 30101 0 2 NA
## PFIRM15_FLAG Version
## 1 1 17v1.1
## 2 1 17v1.1
## 3 1 17v1.1
## 4 1 17v1.1
## 5 1 17v1.1
## 6 NA 17v1.1
# Since the question is only about number of floors, so we can subset the column "borogh" and "number of floors"
sub_bk <- bk[c("Borough", "NumFloors")]
sub_br <- br[c("Borough", "NumFloors")]
sub_mn <- mn[c("Borough", "NumFloors")]
sub_qn <- qn[c("Borough", "NumFloors")]
sub_si <- si[c("Borough", "NumFloors")]
# Combine the 5 borough dataset vertically using rbind
nyc <- rbind(sub_bk, sub_br, sub_mn, sub_qn, sub_si)
head(nyc)
## Borough NumFloors
## 1 BK 0
## 2 BK 10
## 3 BK 12
## 4 BK 0
## 5 BK 0
## 6 BK 0
#View(nyc)
# Remove the rows with 0 for number of floors.
#complete.cases(nyc$NumFloors)
range(nyc$NumFloors)
## [1] 0 119
ggplot(nyc, aes(nyc$NumFloors)) +
geom_histogram(bins = 20)