Stauffer Homework 2

# Homework 2: Hypothesis Testing Done by: Andy Staffer

# install.packages('foreign') #Only need to do this once per computer.
library("foreign")

MN <- read.dbf("G:/Quant/Assignments/Homework2/mnmappluto.dbf")
plot(MN$YCoord ~ MN$XCoord)  #This is bad b/c there is data at (0,0).

plot of chunk unnamed-chunk-1

# Clean up the dbf data...
summary(MN[, c("YCoord", "XCoord")])  #what is the first parameter?  Why is it blank?
##      YCoord           XCoord       
##  Min.   :     0   Min.   :      0  
##  1st Qu.:207645   1st Qu.: 986617  
##  Median :219274   Median : 991591  
##  Mean   :217078   Mean   : 977235  
##  3rd Qu.:231006   3rd Qu.: 998354  
##  Max.   :259301   Max.   :1009761
MN_fix <- MN[MN$YCoord > 0 & MN$XCoord > 0, ]
dim(MN)  #Number of rows before data was fixed
## [1] 43992    78
dim(MN_fix)  #Number of rows after data was fixed
## [1] 43318    78
plot(MN_fix$YCoord ~ MN_fix$XCoord)

plot of chunk unnamed-chunk-1


# For Fun: Use String-searching functions with grep()
trumpBldgs <- grep("trump", MN_fix$OwnerName, ignore.case = TRUE)
MN_fix[trumpBldgs, c("Address", "AssessTot", "OwnerName")]
##                  Address AssessTot             OwnerName
## 13619 200 EAST 69 STREET  53710629  TRUMP PALACE COMPANY
## 39891      1030 3 AVENUE  23940000 TRUMP PLAZA OWNERS IN

# Identify Historical Districts
summary(MN_fix$HistDist)
##      African Burial Ground & The Commons 
##                                        9 
##                          Audubon Terrace 
##                                        7 
##                            Carnegie Hill 
##                                       94 
##                     Charlton-King-Vandam 
##                                       72 
##                                  Chelsea 
##                                      142 
##      Chelsea Historic District Extension 
##                                      108 
##            East 17th Street/Irving Place 
##                                       10 
##                             Ellis Island 
##                                        1 
##                   Expanded Carnegie Hill 
##                                      267 
##                    Fraunces Tavern Block 
##                                       11 
##                        Gansevoort Market 
##                                       77 
##                         Governors Island 
##                                        1 
##                            Gramercy Park 
##                                       64 
##                  Gramercy Park Extension 
##                                        1 
##                        Greenwich Village 
##                                     1864 
##              Greenwich Village Extension 
##                                       37 
##                         Hamilton Heights 
##                                      203 
##               Hamilton Heights Extension 
##                                       51 
##              Hamilton Heights/Sugar Hill 
##                                      186 
## Hamilton Heights/Sugar Hill District Ext 
##                                       15 
##    Hamilton Heights/Sugar Hill Northeast 
##                                       32 
##    Hamilton Heights/Sugar Hill Northwest 
##                                      108 
##                Hardenbergh / Rhinelander 
##                                        7 
##                          Henderson Place 
##                                       21 
##                            Jumel Terrace 
##                                       56 
##                             Ladies' Mile 
##                                      348 
##               MacDougal-Sullivan Gardens 
##                                       21 
##                     Madison Square North 
##                                       93 
##                      Metropolitan Museum 
##                                      131 
##                        Mount Morris Park 
##                                      257 
##                              Murray Hill 
##                                       76 
##  Murray Hill Historic District Extension 
##                                       12 
##                                     NoHo 
##                                      102 
##                                Noho East 
##                                       39 
##                       Riverside-West End 
##                                      260 
##            Riverside - West 105th Street 
##                                       30 
##      Riverside Drive-West 80th-81 Street 
##                                       36 
##                            Sniffen Court 
##                                        9 
##                           SoHo-Cast Iron 
##                                      445 
##                     South Street Seaport 
##                                       70 
##           South Street Seaport Extension 
##                                       12 
##                               St. Mark's 
##                                       31 
##                     St. Mark's Extension 
##                                        2 
##                             St. Nicholas 
##                                      146 
##                             Stone Street 
##                                       14 
##                        Stuyvesant Square 
##                                       55 
##                           Treadwell Farm 
##                                       76 
##                             Tribeca East 
##                                      188 
##                            Tribeca North 
##                                       62 
##                            Tribeca South 
##                                       62 
##                  Tribeca South Extension 
##                                       23 
##                             Tribeca West 
##                                      179 
##                               Tudor City 
##                                       21 
##                       Turtle Bay Gardens 
##                                       19 
##                          Upper East Side 
##                                      980 
##        Upper West Side/Central Park West 
##                                     1864 
##                         Weehawken Street 
##                                        9 
##                         West 71st Street 
##                                       34 
##                    West End - Collegiate 
##                                      144 
##                                     NA's 
##                                    34024
# Massage the HistDist field using is.na() and ifelse()
MN_fix$HD <- ifelse(is.na(MN_fix[1:dim(MN_fix), "HistDist"]), 0, 1)
## Warning: numerical expression has 2 elements: only the first used
summary(MN_fix$HD)  #Didn't give result like previous summary.  How can I count number of 1's and 0's to check my work
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.215   0.000   1.000
MN_fix$HD <- as.factor(MN_fix$HD)
summary(MN_fix$HD)
##     0     1 
## 34024  9294
# Create a categorical map
plot(y = MN_fix$YCoord, x = MN_fix$XCoord, col = MN_fix$HD, psh = 16, cex = 0.5)
## Warning: "psh" is not a graphical parameter
## Warning: "psh" is not a graphical parameter
## Warning: "psh" is not a graphical parameter
## Warning: "psh" is not a graphical parameter
## Warning: "psh" is not a graphical parameter
## Warning: "psh" is not a graphical parameter

plot of chunk unnamed-chunk-1

# Get historical district data where 1 indicates that the building is IN a
# historical district
inHD <- MN_fix[MN_fix$HD == 1, ]
outHD <- MN_fix[MN_fix$HD == 0, ]

######################################################## Hypothesis
######################################################## Testing######################################################################
######################################################## H_o = historic
######################################################## districts have no
######################################################## effect on
######################################################## property values
######################################################## aka. H_o =
######################################################## inHD_propertyValues
######################################################## =
######################################################## outHD_propertyValues
t.test(x = inHD$AssessTot, y = outHD$AssessTot)
## 
##  Welch Two Sample t-test
## 
## data:  inHD$AssessTot and outHD$AssessTot 
## t = -15.05, df = 43286, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval:
##  -1724546 -1327117 
## sample estimates:
## mean of x mean of y 
##   1233743   2759574

######## QUESTION 1: What does this hypothesis test tell yoU?######## We
######## can reject the null hypothesis with 95% confidence and there is a
######## significant difference between the two groups of property values.

# H_o = inHD_size = outHD_size
t.test(x = inHD$BldgArea, y = outHD$BldgArea)
## 
##  Welch Two Sample t-test
## 
## data:  inHD$BldgArea and outHD$BldgArea 
## t = -9.037, df = 15819, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval:
##  -25103 -16154 
## sample estimates:
## mean of x mean of y 
##     22050     42678
######## QUESTION 2: What does hypothesis test2 tell you about the size of
######## buildings inside and outside of historic districts######## We can
######## reject the null hypothesis with 95% confidence and that there is
######## a signicant difference between the two groups of area/size.

# Refine HD_out to include some spatial relationships:
blocks <- inHD$Block  #gets the blocks that part of historical districts
head(blocks)  #What is this doing?  The result was 1 1 7 7 7 7.  What do these numbers mean?
## [1] 1 1 7 7 7 7
HDB <- MN_fix[MN_fix$Block %in% blocks, ]  #What is %in%?  Uses blocks to select appropriate rows from MN_fix?
# Refine definitions of historical districts based on spatial
# relationships: HDB_in is all of the histrocal buildings HDB_out is only
# the non-historical buildings that are on the same block as a historical
# building
HDB_out <- HDB[HDB$HD == 0, ]
HDB_in <- HDB[HDB$HD == 1, ]

# H_o = inHDB_propertyValues = outHDB_propertyValues
t.test(x = HDB_in$AssessTot, y = HDB_out$AssessTot)
## 
##  Welch Two Sample t-test
## 
## data:  HDB_in$AssessTot and HDB_out$AssessTot 
## t = -9.728, df = 4349, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval:
##  -1507426 -1001727 
## sample estimates:
## mean of x mean of y 
##   1233743   2488319
######## QUESTION 3: After controlling for location, is the historic
######## district designation associated with a difference in property
######## values? Are the buildings in the distoric district different from
######## their non-historic neighbors?  Use the p-value from hypothesis
######## test 3 to support your conclusions.######## After controlling for
######## location, there is a significant difference in property values
######## between buildings that are in versus out of the historic district
######## boundaries.  Since the p-value was less than 0.05, we can reject
######## the null hypothesis with a confience of 95%.

# control for size of building and spatial relationships.  Calculate price
# per square foot...
summary(HDB_in$BldgArea)  #min area is 0.
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0     4160     6370    22100    13000 17600000
HDB_in_sqft <- (HDB_in[HDB_in$BldgArea > 0, "AssessTot"]/HDB_in[HDB_in$BldgArea > 
    0, "BldgArea"])  #calculate price per square foot for hist. bldgs
HDB_out_sqft <- (HDB_out[HDB_out$BldgArea > 0, "AssessTot"]/HDB_out[HDB_out$BldgArea > 
    0, "BldgArea"])  #price per sq. foot for non-hist bldgs
# H_o = HDB_insqft_PropertyValue = HDB_outsqft_PropertyValue
t.test(x = HDB_in_sqft, y = HDB_out_sqft)
## 
##  Welch Two Sample t-test
## 
## data:  HDB_in_sqft and HDB_out_sqft 
## t = -1.664, df = 4521, p-value = 0.09614
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval:
##  -36.413   2.976 
## sample estimates:
## mean of x mean of y 
##     66.76     83.48
######## QUESTION 4: After controlling for locaiton and building size, do
######## historic and non-historic buildings have significantly different
######## values?  If your conclusion has changed between the first and
######## fourth hypothesis tests, explain why.######## With 95%
######## confidence, we can accept the null hypothesis (p-value = 0.096).
######## There is no difference between the property values of histroical
######## and non-historical buildings.  This result has changed since the
######## first hypothesis test that just compared building values between
######## all historical and all non-historical buildings.  Since, we have
######## refined our serach radius of non-historical buildings to only
######## include those that direcly neighbor historical districts
######## (Tobler's First Law).  We also normalized the property values by
######## building area.  This remove the bias of very large buildings
######## being more expensive simply because they are larger.




########################################################
######################################################## Correlation######################################################################
######################################################## QUESTION 5:
######################################################## Factors other
######################################################## than historic
######################################################## designation
######################################################## affect the price
######################################################## of real estate in
######################################################## MN.  Is there a
######################################################## significant
######################################################## correlation
######################################################## between a
######################################################## building's N-S
######################################################## position on the
######################################################## island and its
######################################################## total assessed
######################################################## value?  Are
######################################################## downtown
######################################################## buildings worth
######################################################## more than uptown
######################################################## buildings?########
######################################################## H_o = p = 0
######################################################## between N-S
######################################################## location and
######################################################## property Value
cor.test(MN_fix$YCoord, MN_fix$AssessTot)
## 
##  Pearson's product-moment correlation
## 
## data:  MN_fix$YCoord and MN_fix$AssessTot 
## t = -12.43, df = 43316, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0 
## 95 percent confidence interval:
##  -0.06902 -0.05025 
## sample estimates:
##      cor 
## -0.05964
# With 95% confidence, we can reject the null hypothesis.  There is a
# negative correlation between the two variables.
plot(MN_fix$YCoord ~ MN_fix$AssessTot)

plot of chunk unnamed-chunk-1


# normalize the property values based on area.
summary(MN_fix$BldgArea)  #Some have areas of 0.  Eliminate them.
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0     4100     8360    38300    20200 22400000
MN_bigBldg <- MN_fix[MN_fix$BldgArea > 0, ]
MN_bigBldg$normVal <- MN_bigBldg$AssessTot/MN_bigBldg$BldgArea
# H_o = p = 0 between N-W location and property value per square foot.
cor.test(MN_bigBldg$YCoord, MN_bigBldg$normVal)
## 
##  Pearson's product-moment correlation
## 
## data:  MN_bigBldg$YCoord and MN_bigBldg$normVal 
## t = 0.2044, df = 40826, p-value = 0.8381
## alternative hypothesis: true correlation is not equal to 0 
## 95 percent confidence interval:
##  -0.008689  0.010711 
## sample estimates:
##      cor 
## 0.001011
plot(MN_bigBldg$YCoord ~ MN_bigBldg$normVal)

plot of chunk unnamed-chunk-1

# Answer: With 95% confidence, we can accept the null hypothesis (p-value
# = 0.8381).  There is no correlation between property values in a N-S
# direction.


######## QUESTION 6: Use the layout() function to make a chart showing
######## buildings on the island of Manhattan in one panel and a scatter
######## plot of the correlation between Ycoord and AssessTot in the other
######## panel.######## Answer:
myLayout <- layout(mat = matrix(c(1, 2), 1, 2), respect = TRUE)
plot(MN_fix$YCoord ~ MN_fix$XCoord, xlab = "xCoords", ylab = "yCoords", main = "Manhattan Buildings")
plot(MN_fix$YCoord ~ MN_fix$AssessTot, xlab = "Property Value", ylab = "YCoord(N-S directionality)", 
    main = "Correlation between N-S position and Property Value")

plot of chunk unnamed-chunk-1