library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(bit64)
## Loading required package: bit
## Attaching package bit
## package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
## creators: bit bitwhich
## coercion: as.logical as.integer as.bit as.bitwhich which
## operator: ! & | xor != ==
## querying: print length any all min max range sum summary
## bit access: length<- [ [<- [[ [[<-
## for more help type ?bit
##
## Attaching package: 'bit'
## The following object is masked from 'package:data.table':
##
## setattr
## The following object is masked from 'package:base':
##
## xor
## Attaching package bit64
## package:bit64 (c) 2011-2012 Jens Oehlschlaegel
## creators: integer64 seq :
## coercion: as.integer64 as.vector as.logical as.integer as.double as.character as.bin
## logical operator: ! & | xor != == < <= >= >
## arithmetic operator: + - * / %/% %% ^
## math: sign abs sqrt log log2 log10
## math: floor ceiling trunc round
## querying: is.integer64 is.vector [is.atomic} [length] format print str
## values: is.na is.nan is.finite is.infinite
## aggregation: any all min max range sum prod
## cumulation: diff cummin cummax cumsum cumprod
## access: length<- [ [<- [[ [[<-
## combine: c rep cbind rbind as.data.frame
## WARNING don't use as subscripts
## WARNING semantics differ from integer
## for more help type ?bit64
##
## Attaching package: 'bit64'
## The following object is masked from 'package:bit':
##
## still.identical
## The following objects are masked from 'package:base':
##
## %in%, :, is.double, match, order, rank
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(class)
## Warning: package 'class' was built under R version 3.5.3
library(OneR)
## Warning: package 'OneR' was built under R version 3.5.3
library(plotly)
## Warning: package 'plotly' was built under R version 3.5.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(proj4)
## Warning: package 'proj4' was built under R version 3.5.3
library(sf)
## Warning: package 'sf' was built under R version 3.5.3
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.5.3
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
library(reticulate)
## Warning: package 'reticulate' was built under R version 3.5.3
path <- "C:/Users/Gurpreet/Documents/DATA608/nyc_pluto/PLUTO17v1.1/"
borough_list <- c("BK", "BX", "MN", "QN", "SI")
for (i in borough_list){
temp <- fread(paste0(path,i, "2017V11.csv"))
assign(paste0("pluto_",i),temp)
rm(temp)
}
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
pluto_all <-do.call('rbind.fill',lapply(ls(pattern="pluto_"),get))
#unloadNamespace("plyr")
dim(pluto_all)
## [1] 859223 89
table(pluto_all$YearBuilt)
##
## 0 1661 1665 1706 1729 1765 1779 1780 1785 1798 1799 1800
## 43699 1 1 1 1 1 1 1 2 1 1 165
## 1801 1802 1804 1805 1807 1812 1814 1816 1821 1822 1823 1824
## 1 2 1 1 1 1 1 2 2 3 1 6
## 1825 1826 1827 1829 1830 1831 1832 1833 1834 1835 1836 1837
## 4 6 4 20 14 6 5 1 7 6 7 2
## 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849
## 5 7 21 10 11 11 19 19 9 9 15 20
## 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861
## 45 8 14 12 6 13 11 8 7 9 36 3
## 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873
## 2 3 3 8 6 5 8 12 58 20 5 5
## 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
## 4 15 2 1 3 3 179 49 11 8 12 25
## 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897
## 18 16 18 17 440 49 12 14 13 50 42 19
## 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909
## 33 24870 9586 25619 135 181 273 7620 752 647 351 877
## 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921
## 46675 704 722 578 598 16398 646 515 254 309 91340 1158
## 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933
## 1160 1632 2132 70699 3146 3274 4352 2127 77128 32530 1236 964
## 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945
## 371 25433 641 667 761 964 38322 745 323 198 177 24952
## 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957
## 345 379 603 754 47803 763 692 570 646 26804 764 944
## 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969
## 811 931 37181 819 957 1089 947 19641 611 617 629 508
## 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981
## 17139 498 631 852 854 9739 660 728 1009 625 5454 551
## 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993
## 778 1161 1333 3473 3738 3800 3597 3607 3507 2261 3169 2458
## 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
## 2130 2557 3464 3162 2761 3724 4190 4534 4180 4431 6529 6012
## 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
## 4857 4085 2907 1809 1198 1403 1488 1362 1637 1675 1443 17
## 2040
## 1
min(pluto_all$YearBuilt)
## [1] 0
max(pluto_all$YearBuilt)
## [1] 2040
min(pluto_all$NumFloors)
## [1] 0
max(pluto_all$NumFloors)
## [1] 119
The YearBuilt with zero and 2040 are data errors/missing data, we will remove those records.
pluto_all <- pluto_all %>% filter(!YearBuilt %in% c(0,2040, NA))
As stated in data dictionary: If the NUMBER OF FLOORS is zero and the NUMBER OF BUILDINGS is greater than zero, then NUMBER OF FLOORS is not available for the tax lot. If the NUMBER OF FLOORS is zero and the NUMBER OF BUILDINGS is zero, then NUMBER OF FLOORS is not applicable for the tax lot.
Since we are focussing on number of floors, we want to filter for those categories for our analysis for this part.
q1 <- pluto_all %>% filter(NumBldgs!=0 | NumFloors!=0 | !is.na(NumFloors)|!is.na(NumBldgs)) %>%
group_by(YearBuilt, NumFloors) %>%dplyr::summarize(num_build =n()) %>%
mutate(l_numbuild = log(num_build)) %>% ungroup()
p0<-ggplot(q1, aes(x=YearBuilt, y=num_build)) +geom_line() + scale_y_log10()
ggplotly()
A look at the plot reveals that the from around 1830 approximately and back there are not that many buildings reported. That might be due to the fact the data might be misssing information due to data entry error or there were only buildings that were recorded into the system or the buildings my have been demolished and replaced by the newer ones. From that inference we might want to take a look at the data from 1830 and forward.
We will bin the years into a decade and floors will be binned into intervals of 10 and plot it to find the number of buildings in a decade with specific floors.
round_to_decade = function(value){ return(round(value / 10) * 10) }
#https://stackoverflow.com/questions/35352914/floor-a-year-to-the-decade-in-r
q1 <- q1 %>% filter(YearBuilt> 1830) %>%
mutate(floorbin= cut(NumFloors,seq(0,120,10) ,right=FALSE),
yrbin= round_to_decade(YearBuilt))
p1 <-ggplot(q1,aes(x=yrbin,y=l_numbuild, color=floorbin)) +
geom_bar(stat="identity") +theme_minimal()
ggplotly()
Earlier decades of 1900’s seems to be a start of those buildings with a little dip in the middle.
We will work with the columns AssesTotal, AssessLand, Latitude and Longitude for this part. In order to analyze the developed and underdeveloped area, a number of factors are to be considered including facilities available in the neighbourhood. School districts, shopping areas, crime statistics. In our situation we can look at the value of land and property. In addition to this, number of floors can also be considered. However we will drop the column number of floors, as the total value of land is an important factor than number of floors. For example : In a specific area, 20 floor building will have more value than 5 floor building. We will focus on assess land and assess total value only for this part. In addition, NYC has different class property based pn the tax and property assessment. We will not go into details and will focus on the visualization. Making an assumption if the building value is less than 70% of the Total assessment, it is flagged underdeveloped.
ny <- fread("C:/Users/Gurpreet/Documents/DATA608/q2.csv", colClasses="character")
q2 <- ny %>% dplyr::mutate( AssessTot = as.integer(AssessTot),
AssessLand =as.integer(AssessLand),
build_val = AssessTot-AssessLand,
under_f = ifelse(build_val < AssessTot*0.7, "y", "")) %>%
filter(under_f=="y") %>% select(-under_f)
## Warning: NAs introduced by coercion to integer range
## Warning: NAs introduced by coercion to integer range
import datashader as ds
## C:\Users\Gurpreet\AppData\Local\Programs\Python\Python37\lib\site-packages\datashader\transfer_functions.py:21: FutureWarning: xarray subclass Image should explicitly define __slots__
## class Image(xr.DataArray):
import datashader.glyphs
#mport plotly.plotly as py
#import plotly.graph_objs as go
import datashader.transfer_functions as tf
from plotly import tools
from functools import partial
from datashader import reductions
from pyproj import Proj, transform
from datashader.core import bypixel
Refrences : https://community.rstudio.com/t/how-to-add-my-api-key-into-get-map/15992/5 https://rpubs.com/jhofman/nycmaps http://rforpublichealth.blogspot.com/2015/10/mapping-with-ggplot-create-nice.html