install.packages("foreign")
## Installing package(s) into
## '/Applications/RStudio.app/Contents/Resources/R/library' (as 'lib' is
## unspecified)
## Error: trying to use CRAN without setting a mirror
library(foreign)
## Warning: package 'foreign' was built under R version 2.15.3
MN <- read.dbf("/Users/xiwang/Dropbox/GEOG 5023 - offline/Data/mnmappluto.dbf")
plot(MN$YCoord ~ MN$XCoord) # Looks funny
summary(MN[, c("YCoord", "XCoord")])
## YCoord XCoord
## Min. : 0 Min. : 0
## 1st Qu.:207645 1st Qu.: 986617
## Median :219274 Median : 991591
## Mean :217078 Mean : 977235
## 3rd Qu.:231006 3rd Qu.: 998354
## Max. :259301 Max. :1009761
MN <- MN[MN$YCoord > 0 & MN$XCoord > 0, ] # Returns row for which observation x,y coords are > 0
dim(MN)
## [1] 43318 78
plot(MN$YCoord ~ MN$XCoord) # Looks better
## Explore
trumpBldgs <- grep("trump", MN$OwnerName, ignore.case = TRUE)
MN[trumpBldgs, c("Address", "AssessTot", "OwnerName")] # Print address, value of the building
## Address AssessTot OwnerName
## 13619 200 EAST 69 STREET 53710629 TRUMP PALACE COMPANY
## 39891 1030 3 AVENUE 23940000 TRUMP PLAZA OWNERS IN
summary(MN$HistDist)
## African Burial Ground & The Commons
## 9
## Audubon Terrace
## 7
## Carnegie Hill
## 94
## Charlton-King-Vandam
## 72
## Chelsea
## 142
## Chelsea Historic District Extension
## 108
## East 17th Street/Irving Place
## 10
## Ellis Island
## 1
## Expanded Carnegie Hill
## 267
## Fraunces Tavern Block
## 11
## Gansevoort Market
## 77
## Governors Island
## 1
## Gramercy Park
## 64
## Gramercy Park Extension
## 1
## Greenwich Village
## 1864
## Greenwich Village Extension
## 37
## Hamilton Heights
## 203
## Hamilton Heights Extension
## 51
## Hamilton Heights/Sugar Hill
## 186
## Hamilton Heights/Sugar Hill District Ext
## 15
## Hamilton Heights/Sugar Hill Northeast
## 32
## Hamilton Heights/Sugar Hill Northwest
## 108
## Hardenbergh / Rhinelander
## 7
## Henderson Place
## 21
## Jumel Terrace
## 56
## Ladies' Mile
## 348
## MacDougal-Sullivan Gardens
## 21
## Madison Square North
## 93
## Metropolitan Museum
## 131
## Mount Morris Park
## 257
## Murray Hill
## 76
## Murray Hill Historic District Extension
## 12
## NoHo
## 102
## Noho East
## 39
## Riverside - West 105th Street
## 30
## Riverside Drive-West 80th-81 Street
## 36
## Riverside-West End
## 260
## Sniffen Court
## 9
## SoHo-Cast Iron
## 445
## South Street Seaport
## 70
## South Street Seaport Extension
## 12
## St. Mark's
## 31
## St. Mark's Extension
## 2
## St. Nicholas
## 146
## Stone Street
## 14
## Stuyvesant Square
## 55
## Treadwell Farm
## 76
## Tribeca East
## 188
## Tribeca North
## 62
## Tribeca South
## 62
## Tribeca South Extension
## 23
## Tribeca West
## 179
## Tudor City
## 21
## Turtle Bay Gardens
## 19
## Upper East Side
## 980
## Upper West Side/Central Park West
## 1864
## Weehawken Street
## 9
## West 71st Street
## 34
## West End - Collegiate
## 144
## NA's
## 34024
is.na(MN[1:100, "HistDist"]) # is.na() is a logical expression returning true if a value is missing (NA)
## [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [12] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [23] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [34] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [45] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [56] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [67] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [78] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE
## [89] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [100] TRUE
MN$HD <- ifelse(is.na(MN[, "HistDist"]), 0, 1) # Create a dummy variable for houses that are in a historical district
summary(MN$HD)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.215 0.000 1.000
# A factor is any column that contains categorical data
MN$HD <- as.factor(MN$HD) # Convert MN$HD to a factor
summary(MN$HD) # Summary changes after making MD a factor
## 0 1
## 34024 9294
# Map historic districs
plot(y = MN$YCoord, x = MN$XCoord, col = MN$HD, pch = 16, cex = 0.5) # 'col' changes the color of dots depending upon the value in the 'HD' column, 'pch' sets the symbol to a solid dot, 'cex' makes the dot .5 the normal size
# Split into two tables, one for the historic buildings (inHD) and one for
# the buildings outside a historic district (outHD)
inHD <- MN[MN$HD == 1, ] # Historic buildings
outHD <- MN[MN$HD == 0, ] # Non-historic buildings
t.test(x = inHD$AssessTot, y = outHD$AssessTot) # Hypothesis Test 1: There is no difference in the average value of historic buildings compared to those not in historic districts
##
## Welch Two Sample t-test
##
## data: inHD$AssessTot and outHD$AssessTot
## t = -15.05, df = 43286, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1724546 -1327117
## sample estimates:
## mean of x mean of y
## 1233743 2759574
# The t statistics is very large indicating that the difference between
# historic and non-historic properties is very large, the p-value is
# significant, so we reject the null
Hypothesis test 1 shows the average value of all historic buildings and compares that to the average of all non-historic bildings.
t.test(x = inHD$BldgArea, y = outHD$BldgArea) # Hypothesis Test 2: The average size of historic buildings is the same as those in non-historic areas
##
## Welch Two Sample t-test
##
## data: inHD$BldgArea and outHD$BldgArea
## t = -9.037, df = 15819, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -25103 -16154
## sample estimates:
## mean of x mean of y
## 22050 42678
The average size of the buildings inside and outside the historic districts are significantly different. So this confirms why, in part, the average value of historic and non-historic buildings are different–in part because the difference in square footage.
# Select buildings on the same block as a historic district
blocks <- inHD$Block # Get a list of all blocks that contain historic buildings
head(blocks) # Display the first 5 rows of blocks
## [1] 1 1 7 7 7 7
HDB <- MN[MN$Block %in% blocks, ] # Select all buildings (from MN) that are on the same block as historic buildings
HDB_out <- HDB[HDB$HD == 0, ] # For blocks with historic buildings, subset the non-historic proprerties
HDB_in <- HDB[HDB$HD == 1, ] # For blocks with historic buildings, subset the historic proprerties
t.test(x = HDB_in$AssessTot, y = HDB_out$AssessTot) # Hypothesis Test 3: There is no difference between the average value of historic and non-historic properties on the same block
##
## Welch Two Sample t-test
##
## data: HDB_in$AssessTot and HDB_out$AssessTot
## t = -9.728, df = 4349, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1507426 -1001727
## sample estimates:
## mean of x mean of y
## 1233743 2488319
The historic designation is associated with a significantly lower property value, even after controlling for location.
summary(HDB_in$BldgArea) # Some buildings have 0 area
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 4160 6370 22100 13000 17600000
HDB_in_sqft <- HDB_in[HDB_in$BldgArea > 0, "AssessTot"]/HDB_in[HDB_in$BldgArea >
0, "BldgArea"] # Calcuate price per square foot only for historic buildings with an area greater than 0
HDB_out_sqft <- HDB_out[HDB_out$BldgArea > 0, "AssessTot"]/HDB_out[HDB_out$BldgArea >
0, "BldgArea"] # Calcuate the price per square foot for non-historic buildings
t.test(x = HDB_in_sqft, y = HDB_out_sqft) # The null is that there is no difference in the price per square foot for historic and non-historic properties
##
## Welch Two Sample t-test
##
## data: HDB_in_sqft and HDB_out_sqft
## t = -1.664, df = 4521, p-value = 0.09614
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -36.413 2.976
## sample estimates:
## mean of x mean of y
## 66.76 83.48
Since the p-value is 0.09614, we accept the null that there is no significant differences in the price per square foot value of historic and non-historic buildings, because for this hypothesis test, we have controlled for location with the same blocks and building size by looking at the price per area, rather than overall price.
(cor.test(MN$YCoord, MN$AssessTot)) # The null hypothesis is that the observed correlation is 0.
##
## Pearson's product-moment correlation
##
## data: MN$YCoord and MN$AssessTot
## t = -12.43, df = 43316, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06902 -0.05025
## sample estimates:
## cor
## -0.05964
It appears from the cor.test() that it is statistically significant that the correlation is not 0. The correlation of -0.05964 indicates that uptown properties have higher values that downtown ones.