install.packages(‘ggplot2’)

Queions 1
After a few building collapses, the City of New York is going to begin investigating older buildings for safety. The city is particularly worried about buildings that were unusually tall when they were built, since best-practices for safety hadn’t yet been determined. Create a graph that shows how many buildings of a certain number of floors were built in each year (note: you may want to use a log scale for the number of buildings). Find a strategy to bin buildings (It should be clear 20-29-story buildings, 30-39-story buildings, and 40-49-story buildings were first built in large numbers, but does it make sense to continue in this way as you get taller?)
library(ggplot2)

# read file locally due to large file size

bk <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/BK2017V11.csv", header = TRUE)
br <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/BX2017V11.csv", header = TRUE)
mn <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/MN2017V11.csv", header = TRUE)
qn <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/QN2017V11.csv", header = TRUE)
si <- read.csv("~/Project 2 Dataset/PLUTO17v1.1/SI2017V11.csv", header = TRUE)

# Check if the dataset
head(bk)
##   Borough Block  Lot  CD CT2010 CB2010 SchoolDist Council ZipCode FireComp
## 1      BK     1    1 302     21     NA         13      33   11201     L118
## 2      BK     1   50 302     21   2000         13      33   11201     L118
## 3      BK     1 7501 302     21   2000         13      33   11201     L118
## 4      BK     3    1 302     21   3002         13      33   11201     L118
## 5      BK     3    5 302     21     NA         13      33   11201     L118
## 6      BK     3   35 302     21     NA         13      33   11201     L118
##   PolicePrct HealthCenterDistrict HealthArea SanitBoro SanitDistrict
## 1         84                   36       1000         3             2
## 2         84                   36       1000         3             2
## 3         84                   36       1000         1             2
## 4         84                   36       1000         3             2
## 5         84                   36       1000         3             2
## 6         84                   36       1000         3             2
##   SanitSub       Address ZoneDist1 ZoneDist2 ZoneDist3 ZoneDist4 Overlay1
## 1       1B   JOHN STREET      M3-1  M1-4/R8A                             
## 2       1B 10 JAY STREET  M1-4/R8A      M3-1                             
## 3          1 JOHN STREET      M3-1                                       
## 4       1B    JAY STREET      M3-1                                       
## 5       1B    JAY STREET      M3-1                                       
## 6       1B   GOLD STREET      M3-1                                       
##   Overlay2 SPDist1 SPDist2 SPDist3 LtdHeight SplitZone BldgClass LandUse
## 1             MX-2                                   Y        V1      11
## 2             MX-2                                   Y        O6       5
## 3                                                    N        RM       4
## 4                                                    N        U4       7
## 5                                                    N        T2       7
## 6                                                    N        T2       7
##   Easements OwnerType             OwnerName LotArea BldgArea ComArea
## 1         1         P  BROOKLYN BRIDGE PARK  151930        0       0
## 2         0           SAFDI PLAZA REALTY IN   19682   163894  163894
## 3         1                                       0   102534    4686
## 4         1           CONSOLIDED EDISON CO/  387060        0       0
## 5         0         C              NYC DSBS    6384        0       0
## 6         0         C              NYC DSBS       0        0       0
##   ResArea OfficeArea RetailArea GarageArea StrgeArea FactryArea OtherArea
## 1       0          0          0          0         0          0         0
## 2       0     147505      16389          0         0          0         0
## 3   97848          0       4686          0         0          0         0
## 4       0          0          0          0         0          0         0
## 5       0          0          0          0         0          0         0
## 6       0          0          0          0         0          0         0
##   AreaSource NumBldgs NumFloors UnitsRes UnitsTotal LotFront LotDepth
## 1          4        0         0        0          0   206.25   337.42
## 2          2        1        10        0          2   109.08   207.25
## 3          7        1        12       42         44     0.00     0.00
## 4          7       15         0        0          0  1051.00   469.00
## 5          7        0         0        0          0     0.00     0.00
## 6          7        0         0        0          0     0.00     0.00
##   BldgFront BldgDepth Ext ProxCode IrrLotCode LotType BsmtCode AssessLand
## 1         0         0            0          Y       5        5    1371105
## 2        88       195            1          Y       3        0     834300
## 3         0         0            0          N       0        5     389694
## 4         0         0            0          Y       5        5          0
## 5         0         0            0          N       8        5      21600
## 6         0         0            0          N       5        5       1854
##   AssessTot ExemptLand ExemptTot YearBuilt YearAlter1 YearAlter2
## 1   1371105          0         0         0          0          0
## 2  10156950          0         0      1920       1994       2015
## 3   6603514          0         0         0          0          0
## 4         0          0         0         0          0          0
## 5     21600      21600     21600         0          0          0
## 6      1854       1854      1854         0          0          0
##                  HistDist Landmark BuiltFAR ResidFAR CommFAR FacilFAR
## 1                                      0.00      0.0       2      0.0
## 2 DUMBO Historic District              8.33      5.4       2      6.5
## 3                                      0.00      0.0       2      0.0
## 4                                      0.00      0.0       2      0.0
## 5                                      0.00      0.0       2      0.0
## 6                                      0.00      0.0       2      0.0
##   BoroCode        BBL CondoNo Tract2010 XCoord YCoord ZoneMap ZMCode
## 1        3 3000010001       0        21     NA     NA     12d       
## 2        3 3000010050       0        21 987838 195989     12d       
## 3        3 3000017501    3819        21 987624 195992     12d       
## 4        3 3000030001       0        21 988568 196151     12d       
## 5        3 3000030005       0        21     NA     NA     12d       
## 6        3 3000030035       0        21     NA     NA               
##   Sanborn TaxMap EDesigNum     APPBBL    APPDate PLUTOMapID FIRM07_FLAG
## 1 302 007  30101           3000010001 11/26/2013          1           1
## 2 302 007  30101     E-231          0                     1           1
## 3 302 007  30101           3000010002   3/4/2016          1           1
## 4 302 007  30101                    0                     1           1
## 5 302 007  30101                    0                     4           1
## 6 302 007  30101                    0                     2          NA
##   PFIRM15_FLAG Version
## 1            1  17v1.1
## 2            1  17v1.1
## 3            1  17v1.1
## 4            1  17v1.1
## 5            1  17v1.1
## 6           NA  17v1.1
# Since the question is only about number of floors, so we can subset the column "borogh" and "number of floors"
sub_bk <- bk[c("Borough", "NumFloors")]
sub_br <- br[c("Borough", "NumFloors")]
sub_mn <- mn[c("Borough", "NumFloors")]
sub_qn <- qn[c("Borough", "NumFloors")]
sub_si <- si[c("Borough", "NumFloors")]

# Combine the 5 borough dataset vertically using rbind
nyc <- rbind(sub_bk, sub_br, sub_mn, sub_qn, sub_si)
head(nyc)
##   Borough NumFloors
## 1      BK         0
## 2      BK        10
## 3      BK        12
## 4      BK         0
## 5      BK         0
## 6      BK         0
#View(nyc)

# Remove the rows with 0 for number of floors.
#complete.cases(nyc$NumFloors)

range(nyc$NumFloors)
## [1]   0 119
ggplot(nyc, aes(nyc$NumFloors)) +
  geom_histogram(bins = 20)