The research and findings in the following project have been completed on behalf of StreetEasy.

Required Packages

library(xts)
library(lattice)
library(ggplot2)
library(tidyr)
library(reshape)
library(car)
library(dplyr)
library(lubridate)
library(quantmod)
library(lmtest)
library(plm)
library(stargazer, quietly = T)
library(caret)
library(nnet)
library(hts)
library(knitr)

Preprocessing Data

url1 <- 'https://s3.amazonaws.com/streeteasy-market-
data-api/data_repository/E1_rentalInventory_All.zip'

url2 <- 'https://s3.amazonaws.com/streeteasy-market-
data-api/data_repository/A9_daysOnMarket_All.zip'

url3 <- 'https://s3.amazonaws.com/streeteasy-market-
data-api/data_repository/A6_medianSalePrice_All.zip'

url4 <- 'https://streeteasy-market-data-api.s3.amazonaws.com/
data_repository/E2_medianAskingRent_All.zip'

temp = tempfile()

download.file(url4, temp)
med_AskRent <- read.csv(unz(temp, "E2_medianAskingRent_All.csv"))

download.file(url3, temp)
medSalePrice  <- read.csv(unz(temp, "A6_medianSalePrice_All.csv"))

download.file(url2, temp)
daysOnMarket <- read.csv(unz(temp, "A9_daysOnMarket_All.csv"))

download.file(url1, temp)
totalRentInv <- read.csv(unz(temp, "E1_rentalInventory_All.csv"))

unlink(temp)

Remove NAs

totalRentInv <- na.exclude(totalRentInv)
med_AskRent <-na.exclude(med_AskRent)
daysOnMarket <-na.exclude(daysOnMarket)
medSalePrice <- na.exclude(medSalePrice)

Median Sale Price

medSalePriceSub <- subset(medSalePrice, as.character(medSalePrice$Area) %in% as.character(daysOnMarket$Area) & medSalePrice$AreaType == "neighborhood")
medSalePriceLong <- gather(medSalePriceSub, Date, "Median Sale Price", 4:ncol(medSalePrice), factor_key = T)
medSalePriceLong <- medSalePriceLong[order(medSalePriceLong$Area, medSalePriceLong$Date),]
medSalePriceLong$Date <- sub("X", "", medSalePriceLong[,4])
medSalePriceLong$Date <- sub(".", "-", medSalePriceLong[,4], fixed = T)
medSalePriceLong$Date <- as.Date(paste(medSalePriceLong$Date,1, sep = "-"), "%Y-%m-%d")
medSalePrice_zoo <- as.zooreg(medSalePriceLong[,c(4,1:3,5)])

medSalePriceCast <- cast(medSalePriceLong, Date ~ Area)
medSalePrice_ts <- xts(medSalePriceCast, order.by = medSalePriceCast$Date)

Median Asking Rent

medAskRent <-subset(med_AskRent, as.character(med_AskRent$Area) %in% as.character(daysOnMarket$Area) & med_AskRent$AreaType == "neighborhood")
medAskRentLong <- gather(medAskRent, Date, "Median Asking Rent", 4:ncol(med_AskRent), factor_key = T)
medAskRentLong <- medAskRentLong[order(medAskRentLong$Area, medAskRentLong$Date),]
row.names(medAskRentLong) <- 1:NROW(medAskRentLong)
medAskRentLong$Date <- sub("X", "", medAskRentLong[,4])
medAskRentLong$Date <- sub(".", "-", medAskRentLong[,4], fixed = T)
medAskRentLong$Date <- as.Date(paste(medAskRentLong$Date,1,sep="-"),"%Y-%m-%d")
med_AskRent_zoo <- as.zooreg(medAskRentLong[,c(4,1:3,5)])
medAskRentCast <- cast(medAskRentLong, Date ~ Area)
medAskRent_ts = xts(medAskRentCast, order.by = medAskRentCast$Date)

Days on Market i.e. Housing supply

daysOnMktSub <- subset(daysOnMarket, as.character(daysOnMarket$Area) %in% as.character(medAskRent$Area))
daysOnMarketLong <- gather(daysOnMktSub, Date, "Days on Market", 4:ncol(daysOnMarket), factor_key = T)
daysOnMarketLong <- daysOnMarketLong[order(daysOnMarketLong$Area, daysOnMarketLong$Date),]
row.names(daysOnMarket) <- 1:NROW(daysOnMarket)
daysOnMarketLong$Date <- sub("X", "", daysOnMarketLong[,4])
daysOnMarketLong$Date <- sub(".", "-", daysOnMarketLong[,4], fixed = T)
daysOnMarketLong$Date <- as.Date(paste(daysOnMarketLong$Date,1,sep="-"),"%Y-%m-%d")
daysOnMarket_zoo <- as.zooreg(daysOnMarketLong[,c(4,1,2,3,5)])
daysOnMarketCast <- cast(daysOnMarketLong, Date ~ Area)
daysOnMarketCast$Date <- as.yearmon(as.character(daysOnMarketCast$Date))
daysOnMarket_ts <- as.xts(daysOnMarketCast, order.by = as.yearmon(daysOnMarketCast$Date))

Total Rental Inventory

ttlRentInv <- subset(totalRentInv, as.character(totalRentInv$Area) %in% as.character(daysOnMarket$Area) & totalRentInv$AreaType == "neighborhood")
ttlRentIvLong <- gather(ttlRentInv, Date, " Total Rent Inventory", 4:ncol(totalRentInv), factor_key = T)
ttlRentIvLong <- ttlRentIvLong[order(ttlRentIvLong$Area,ttlRentIvLong$Date),]
ttlRentIvLong$Date <- sub("X", "", ttlRentIvLong[,4])
ttlRentIvLong$Date <- sub(".", "-", ttlRentIvLong[,4], fixed =  T)
ttlRentIvLong$Date <- as.Date(paste(ttlRentIvLong$Date,1, sep = "-"), "%Y-%m-%d")
ttlRentInv_zoo <-as.zooreg(ttlRentIvLong[,c(4,1:3,5)])
ttlRentIvCast <- cast(ttlRentIvLong, Date ~ Area)
ttlRentInv_ts <- as.xts(ttlRentIvCast, order.by = ttlRentIvCast$Date)

Price-Rent Ratio calculation

m1 <- apply(as.matrix.noquote(medAskRent_ts), 2, as.numeric)
m2 <- apply(as.matrix.noquote(medSalePrice_ts), 2, as.numeric)
m <- t(m2)/t(m1*12)
priceRentRatio <- t(m)
priceRentRatio <- xts(priceRentRatio, order.by = medAskRentCast$Date)
priceRentIndx <- gather(as.data.frame(priceRentRatio), Date, "Price Rent Index", 1:9, factor_key = T )

Monthly growth rate for housing supply

diffDaysOnMkt_1 <- diff(daysOnMarket_ts)/lag.xts(daysOnMarket_ts,1)

diffDaysOnMkt <- gather(as.data.frame(diffDaysOnMkt_1), Date, "Month Over Month Homes Supply", 1:9,factor_key = T)

diffDaysOnMkt[is.na(diffDaysOnMkt)] <- 0

Panel data

panelData = daysOnMarketLong[,c(4,1:3,5)]

panelData<- cbind.data.frame(panelData, medAskRentLong$`Median Asking Rent`, medSalePriceLong$`Median Sale Price`, ttlRentIvLong$` Total Rent Inventory`)

colnames(panelData) <- c("Date", "Area", "Boro", "Area Type", "Days on Market", "Median Asking Rent", "Median Sale Price", "Total Rent Inventory")

panelData$Date <- as.yearmon(as.character(daysOnMarketLong$Date))

panelData$'Price Rent Index'<- priceRentIndx[,2]

panelData$'Home Supply Growth' <- diffDaysOnMkt$`Month Over Month Homes Supply`

panelData$Area <- factor(panelData$Area)

rownames(panelData) <- 1:NROW(panelData)

panelData$`Median Asking Rent` <-panelData$`Median Asking Rent`
panelData$indexTRI <- cut(panelData$`Total Rent Inventory`, breaks = 100, labels = F)
panelData$indexPI <- cut(panelData$`Price Rent Index`, breaks = 30, labels = F) 
head(panelData)
##       Date           Area      Boro    Area Type Days on Market
## 1 Jan 2010 Central Harlem Manhattan neighborhood          207.0
## 2 Feb 2010 Central Harlem Manhattan neighborhood          210.0
## 3 Mar 2010 Central Harlem Manhattan neighborhood          193.0
## 4 Apr 2010 Central Harlem Manhattan neighborhood           79.0
## 5 May 2010 Central Harlem Manhattan neighborhood          105.0
## 6 Jun 2010 Central Harlem Manhattan neighborhood          206.5
##   Median Asking Rent Median Sale Price Total Rent Inventory
## 1             1992.5            366022                  412
## 2             1995.0            394887                  382
## 3             1895.0            393025                  333
## 4             1897.5            332085                  298
## 5             1900.0            308763                  272
## 6             1950.0            531000                  309
##   Price Rent Index Home Supply Growth indexTRI indexPI
## 1         15.30832         0.00000000        8       5
## 2         16.49486         0.01449275        7       6
## 3         17.28342        -0.08095238        6       6
## 4         14.58432        -0.59067358        5       5
## 5         13.54224         0.32911392        4       4
## 6         22.69231         0.96666667        5       9
scatterplot(panelData$`Median Asking Rent` ~panelData$`Days on Market` + panelData$Area-1, reg.line=F, xlab = "Days on Market", ylab = "Median Asking Rent", legend.title = "Neighborhoods", grid =F )

Covariance tests: Fixed Effects vs. Random Effects vs. Partial Pooling

olS <-lm(panelData$`Price Rent Index`~ panelData$`Days on Market`)
kable(summary(olS)$coef, digits=3)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25.963 0.466 55.769 0
panelData$Days on Market -0.022 0.005 -4.008 0
fixed<- plm(panelData$`Price Rent Index`~  panelData$`Days on Market`,index = c('Area','Date'), data = panelData, model = 'within')
kable(summary(fixed)$coef, digits=3)
Estimate Std. Error t-value Pr(>|t|)
panelData$Days on Market -0.008 0.005 -1.635 0.102
pFtest(fixed, olS, lower.tail=T) #p-value < .05, choose FE
## 
##  F test for individual effects
## 
## data:  panelData$`Price Rent Index` ~ panelData$`Days on Market`
## F = 51.45, df1 = 8, df2 = 863, p-value < 2.2e-16
## alternative hypothesis: significant effects
random <- plm(panelData$`Price Rent Index`~ panelData$`Days on Market`, index = c('Area', 'Date'), data = panelData, model = 'random')

kable(summary(random)$coef, digits=3)
Estimate Std. Error t-value Pr(>|t|)
(Intercept) 24.926 1.277 19.525 0.000
panelData$Days on Market -0.008 0.005 -1.703 0.089
plot(random$residuals, within = F, pooling = F, random = T, 
    xlab = "Days on Market", ylab = "Residuals", sub = "Panel effect")

phtest(fixed,random) #p-value > .05, choose RE
## 
##  Hausman Test
## 
## data:  panelData$`Price Rent Index` ~ panelData$`Days on Market`
## chisq = 1.3684, df = 1, p-value = 0.2421
## alternative hypothesis: one model is inconsistent
pool<- plm(panelData$`Price Rent Index`~ panelData$`Days on Market`, index = c('Area','Date'), data = panelData, model = 'pooling')

kable(summary(pool)$coef, digits=3)
Estimate Std. Error t-value Pr(>|t|)
(Intercept) 25.963 0.466 55.769 0
panelData$Days on Market -0.022 0.005 -4.008 0
plmtest(pool, type = c("bp")) #p-value < .05, H0 of no panel effect rejected, choose RE
## 
##  Lagrange Multiplier Test - (Breusch-Pagan) for balanced panels
## 
## data:  panelData$`Price Rent Index` ~ panelData$`Days on Market`
## chisq = 4013.4, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
pcdtest(fixed, test = "lm") #p-value <.05, cross-sectional dependence 
## 
##  Breusch-Pagan LM test for cross-sectional dependence in panels
## 
## data:  panelData$`Price Rent Index` ~ panelData$`Days on Market`
## chisq = 408.13, df = 36, p-value < 2.2e-16
## alternative hypothesis: cross-sectional dependence
coeftest(random, vcovHC(random, type = 'HC0', method = 'white1'))
## 
## t test of coefficients:
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                24.9262562  1.2691316 19.6404  < 2e-16 ***
## panelData$`Days on Market` -0.0083689  0.0043462 -1.9256  0.05448 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Construct IV using random effects model

iv1 <- predict(random)

panelData$iv1 <- iv1 

Plots for exploration

tsRainbow <- rainbow(ncol(medAskRent_ts))
xyplot.ts(medAskRent_ts, col = tsRainbow, ylab = "Median Asking Rent")

diffDaysOnMkt_1 %>% as.data.frame() %>% na.omit() %>% GGally::ggpairs(axisLabels = "none", title = "Days on Market Monthly Growth Rate")

Sampling for softmax regression

set.seed(3214)
trainingRows <- sample(1:nrow(panelData), .7*nrow(panelData))
trainS <- panelData[sort(trainingRows),]
testS <- panelData[sort(-trainingRows),]
table(trainS$Area)
## 
##    Central Harlem           Chelsea Greenwich Village      Midtown East 
##                68                72                69                68 
##      Midtown West        Park Slope   Upper East Side   Upper West Side 
##                71                69                61                66 
##      Williamsburg 
##                67

Standardize variables for classification algorithm

stdZ01 <- function(x){(x-min(x))/(max(x)-min(x))
  }
train <- apply(trainS[,c(5:10,13)], 2, stdZ01) %>%
  cbind.data.frame(trainS[,c(1:2,11:12)]) 


test <- apply(testS[,c(5:10,13)], 2, stdZ01) %>%
  cbind.data.frame(testS[,c(1:2,11:12)]) 

Multinomial log-linear models via neural networks

options(contrasts = c("indexTRI", "indexPI"))

mod1<- multinom(Area ~`Days on Market` +  `Days on Market`*indexTRI +indexTRI, data = train, maxit=500) 

summary(mod1)
varImp(mod1)

mod2<- multinom(Area ~  indexTRI  , data = train) %>%
  update(.~.  + iv1*indexTRI + iv1, maxit=500, Hess= T)

summary(mod2)
varImp(mod2)
stargazer(mod1, type = "html", out = "mod1.htm",
           intercept.bottom = F, model.numbers = F,
          align = T)
Dependent variable:
Chelsea Greenwich Village Midtown East Midtown West Park Slope Upper East Side Upper West Side Williamsburg
Constant 1.685** 5.886*** -5.951*** -2.070*** 3.380*** -20.775*** -11.864*** -0.127
(0.683) (0.827) (1.109) (0.745) (0.715) (2.929) (2.265) (0.733)
Days on Market -7.224*** -19.096*** 1.112 -1.351 -8.180*** 8.973 -0.701 -5.926**
(2.338) (3.489) (2.830) (1.966) (2.871) (9.706) (9.495) (2.558)
indexTRI -0.040 -0.336*** 0.170*** 0.071** -0.072* 0.371*** 0.286*** 0.036
(0.036) (0.062) (0.038) (0.033) (0.040) (0.057) (0.053) (0.034)
Days on Market:indexTRI 0.224 0.839*** 0.234* 0.253** -0.529* 0.425* 0.278 0.226
(0.156) (0.295) (0.140) (0.127) (0.271) (0.230) (0.234) (0.146)
Akaike Inf. Crit. 1,754.090 1,754.090 1,754.090 1,754.090 1,754.090 1,754.090 1,754.090 1,754.090
Note: p<0.1; p<0.05; p<0.01
stargazer(mod2, type = "html", out = "mod2.htm",
           intercept.bottom = F, model.numbers = F,
          align = T) 
Dependent variable:
Chelsea Greenwich Village Midtown East Midtown West Park Slope Upper East Side Upper West Side Williamsburg
Constant -4.903*** -10.225*** -3.830** -2.752** -4.349*** -7.803* -15.790** -5.277***
(1.522) (2.099) (1.800) (1.376) (1.545) (4.720) (6.967) (1.704)
indexTRI 0.490*** 0.790*** 0.549*** 0.450*** 0.321** 0.850*** 0.917*** 0.526***
(0.127) (0.188) (0.123) (0.117) (0.144) (0.158) (0.187) (0.125)
iv1 6.113*** 16.639*** -3.379 -0.154 6.806*** -14.580** 1.910 4.439*
(2.175) (2.998) (2.710) (2.021) (2.209) (6.880) (9.409) (2.398)
indexTRI:iv1 -0.588*** -1.278*** -0.389** -0.387*** -0.470** -0.496** -0.670*** -0.534***
(0.161) (0.258) (0.154) (0.146) (0.183) (0.199) (0.239) (0.156)
Akaike Inf. Crit. 1,853.530 1,853.530 1,853.530 1,853.530 1,853.530 1,853.530 1,853.530 1,853.530
Note: p<0.1; p<0.05; p<0.01
oddsRatioTable1 <- exp(coef(mod1))
kable(oddsRatioTable1, digits = 5, align = "c")#log-odds
(Intercept) Days on Market indexTRI Days on Market:indexTRI
Chelsea 5.39237 0.00073 0.96083 1.25101
Greenwich Village 359.83946 0.00000 0.71441 2.31483
Midtown East 0.00260 3.03910 1.18535 1.26317
Midtown West 0.12618 0.25892 1.07317 1.28728
Park Slope 29.38536 0.00028 0.93064 0.58925
Upper East Side 0.00000 7887.04534 1.44874 1.52988
Upper West Side 0.00001 0.49622 1.33067 1.32015
Williamsburg 0.88064 0.00267 1.03625 1.25363
oddsRatioTable2 <- exp(coef(mod2))
kable(oddsRatioTable2, digits = 3, align = "c")#log-odds
(Intercept) indexTRI iv1 indexTRI:iv1
Chelsea 0.007 1.632 451.639 0.555
Greenwich Village 0.000 2.204 16838453.373 0.279
Midtown East 0.022 1.732 0.034 0.678
Midtown West 0.064 1.568 0.857 0.679
Park Slope 0.013 1.379 903.449 0.625
Upper East Side 0.000 2.339 0.000 0.609
Upper West Side 0.000 2.501 6.752 0.512
Williamsburg 0.005 1.691 84.649 0.586

For this m x m confusion matrix, negative recall (specificity) is high, a good sign in terms of our prediction goals. Additionally, balanced accuracy is the preferred evaluation metric here to decipher algorithm performance

t1 <- predict(mod1,test, type = "class")
cfMat <- confusionMatrix(t1, test$Area)
print(cfMat)
## Confusion Matrix and Statistics
## 
##                    Reference
## Prediction          Central Harlem Chelsea Greenwich Village Midtown East
##   Central Harlem                 9       3                 5            0
##   Chelsea                       10       6                 1            0
##   Greenwich Village              1       1                17            0
##   Midtown East                   0       0                 0           15
##   Midtown West                   2       5                 0            6
##   Park Slope                     5       4                 5            0
##   Upper East Side                0       0                 0            2
##   Upper West Side                0       0                 0            6
##   Williamsburg                   2       6                 0            0
##                    Reference
## Prediction          Midtown West Park Slope Upper East Side
##   Central Harlem               3          1               0
##   Chelsea                      3          3               0
##   Greenwich Village            0         12               0
##   Midtown East                 4          0               1
##   Midtown West                 7          0               0
##   Park Slope                   0         10               0
##   Upper East Side              0          0              28
##   Upper West Side              0          0               7
##   Williamsburg                 9          2               0
##                    Reference
## Prediction          Upper West Side Williamsburg
##   Central Harlem                  0            5
##   Chelsea                         0            0
##   Greenwich Village               0            5
##   Midtown East                   10            5
##   Midtown West                    0            4
##   Park Slope                      0            4
##   Upper East Side                 3            0
##   Upper West Side                18            0
##   Williamsburg                    0            7
## 
## Overall Statistics
##                                          
##                Accuracy : 0.4466         
##                  95% CI : (0.3854, 0.509)
##     No Information Rate : 0.1374         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.3768         
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: Central Harlem Class: Chelsea
## Sensitivity                        0.31034        0.24000
## Specificity                        0.92704        0.92827
## Pos Pred Value                     0.34615        0.26087
## Neg Pred Value                     0.91525        0.92050
## Prevalence                         0.11069        0.09542
## Detection Rate                     0.03435        0.02290
## Detection Prevalence               0.09924        0.08779
## Balanced Accuracy                  0.61869        0.58414
##                      Class: Greenwich Village Class: Midtown East
## Sensitivity                           0.60714             0.51724
## Specificity                           0.91880             0.91416
## Pos Pred Value                        0.47222             0.42857
## Neg Pred Value                        0.95133             0.93833
## Prevalence                            0.10687             0.11069
## Detection Rate                        0.06489             0.05725
## Detection Prevalence                  0.13740             0.13359
## Balanced Accuracy                     0.76297             0.71570
##                      Class: Midtown West Class: Park Slope
## Sensitivity                      0.26923           0.35714
## Specificity                      0.92797           0.92308
## Pos Pred Value                   0.29167           0.35714
## Neg Pred Value                   0.92017           0.92308
## Prevalence                       0.09924           0.10687
## Detection Rate                   0.02672           0.03817
## Detection Prevalence             0.09160           0.10687
## Balanced Accuracy                0.59860           0.64011
##                      Class: Upper East Side Class: Upper West Side
## Sensitivity                          0.7778                 0.5806
## Specificity                          0.9779                 0.9437
## Pos Pred Value                       0.8485                 0.5806
## Neg Pred Value                       0.9651                 0.9437
## Prevalence                           0.1374                 0.1183
## Detection Rate                       0.1069                 0.0687
## Detection Prevalence                 0.1260                 0.1183
## Balanced Accuracy                    0.8778                 0.7622
##                      Class: Williamsburg
## Sensitivity                      0.23333
## Specificity                      0.91810
## Pos Pred Value                   0.26923
## Neg Pred Value                   0.90254
## Prevalence                       0.11450
## Detection Rate                   0.02672
## Detection Prevalence             0.09924
## Balanced Accuracy                0.57572
kable(cfMat$table, align = "c")
Central Harlem Chelsea Greenwich Village Midtown East Midtown West Park Slope Upper East Side Upper West Side Williamsburg
Central Harlem 9 3 5 0 3 1 0 0 5
Chelsea 10 6 1 0 3 3 0 0 0
Greenwich Village 1 1 17 0 0 12 0 0 5
Midtown East 0 0 0 15 4 0 1 10 5
Midtown West 2 5 0 6 7 0 0 0 4
Park Slope 5 4 5 0 0 10 0 0 4
Upper East Side 0 0 0 2 0 0 28 3 0
Upper West Side 0 0 0 6 0 0 7 18 0
Williamsburg 2 6 0 0 9 2 0 0 7
t2 <- predict(mod2,test, type = "class")
cfMat2 <- confusionMatrix(t2, test$Area)
print(cfMat2)
## Confusion Matrix and Statistics
## 
##                    Reference
## Prediction          Central Harlem Chelsea Greenwich Village Midtown East
##   Central Harlem                14       0                 4            0
##   Chelsea                        3      10                 1            0
##   Greenwich Village              3       1                18            0
##   Midtown East                   0       0                 0           21
##   Midtown West                   1       5                 0            3
##   Park Slope                     7       3                 5            0
##   Upper East Side                0       0                 0            0
##   Upper West Side                0       0                 0            5
##   Williamsburg                   1       6                 0            0
##                    Reference
## Prediction          Midtown West Park Slope Upper East Side
##   Central Harlem               6          3               0
##   Chelsea                      7          6               0
##   Greenwich Village            0         13               0
##   Midtown East                 2          0               2
##   Midtown West                10          0               0
##   Park Slope                   0          2               0
##   Upper East Side              0          0              26
##   Upper West Side              0          0               8
##   Williamsburg                 1          4               0
##                    Reference
## Prediction          Upper West Side Williamsburg
##   Central Harlem                  0            6
##   Chelsea                         0            0
##   Greenwich Village               0            6
##   Midtown East                    8            7
##   Midtown West                    0            8
##   Park Slope                      0            1
##   Upper East Side                 9            0
##   Upper West Side                14            1
##   Williamsburg                    0            1
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4427          
##                  95% CI : (0.3816, 0.5052)
##     No Information Rate : 0.1374          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3727          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Central Harlem Class: Chelsea
## Sensitivity                        0.48276        0.40000
## Specificity                        0.91845        0.92827
## Pos Pred Value                     0.42424        0.37037
## Neg Pred Value                     0.93450        0.93617
## Prevalence                         0.11069        0.09542
## Detection Rate                     0.05344        0.03817
## Detection Prevalence               0.12595        0.10305
## Balanced Accuracy                  0.70061        0.66414
##                      Class: Greenwich Village Class: Midtown East
## Sensitivity                            0.6429             0.72414
## Specificity                            0.9017             0.91845
## Pos Pred Value                         0.4390             0.52500
## Neg Pred Value                         0.9548             0.96396
## Prevalence                             0.1069             0.11069
## Detection Rate                         0.0687             0.08015
## Detection Prevalence                   0.1565             0.15267
## Balanced Accuracy                      0.7723             0.82130
##                      Class: Midtown West Class: Park Slope
## Sensitivity                      0.38462          0.071429
## Specificity                      0.92797          0.931624
## Pos Pred Value                   0.37037          0.111111
## Neg Pred Value                   0.93191          0.893443
## Prevalence                       0.09924          0.106870
## Detection Rate                   0.03817          0.007634
## Detection Prevalence             0.10305          0.068702
## Balanced Accuracy                0.65629          0.501526
##                      Class: Upper East Side Class: Upper West Side
## Sensitivity                         0.72222                0.45161
## Specificity                         0.96018                0.93939
## Pos Pred Value                      0.74286                0.50000
## Neg Pred Value                      0.95595                0.92735
## Prevalence                          0.13740                0.11832
## Detection Rate                      0.09924                0.05344
## Detection Prevalence                0.13359                0.10687
## Balanced Accuracy                   0.84120                0.69550
##                      Class: Williamsburg
## Sensitivity                     0.033333
## Specificity                     0.948276
## Pos Pred Value                  0.076923
## Neg Pred Value                  0.883534
## Prevalence                      0.114504
## Detection Rate                  0.003817
## Detection Prevalence            0.049618
## Balanced Accuracy               0.490805
kable(cfMat2$table, align = "c")
Central Harlem Chelsea Greenwich Village Midtown East Midtown West Park Slope Upper East Side Upper West Side Williamsburg
Central Harlem 14 0 4 0 6 3 0 0 6
Chelsea 3 10 1 0 7 6 0 0 0
Greenwich Village 3 1 18 0 0 13 0 0 6
Midtown East 0 0 0 21 2 0 2 8 7
Midtown West 1 5 0 3 10 0 0 0 8
Park Slope 7 3 5 0 0 2 0 0 1
Upper East Side 0 0 0 0 0 0 26 9 0
Upper West Side 0 0 0 5 0 0 8 14 1
Williamsburg 1 6 0 0 1 4 0 0 1

Price-to-Rent Ratio plot

priceRentTs <- ts(priceRentRatio, start = 2010, frequency = 12)
priceRent_hts <- hts(priceRentTs)
plot1 <- aggts(priceRent_hts, levels = 1)
plot0<- aggts(priceRent_hts, levels = 0)
p1 <- autoplot(plot1[,c(1:5)])+xlab("Year") + ylab("Price to Rent Index") +scale_color_discrete(guide= guide_legend("Neighborhood"))
p2 <- autoplot(plot1[,c(6:9)])+xlab("Year") + ylab("Price to Rent Index") +scale_color_discrete(guide= guide_legend("Neighborhood"))
p0<- autoplot(plot0)+xlab("Year") + ylab("Price to Rent Index") + ggtitle("Total")

lay=rbind(c(1,1),c(2,2), c(3,3))
gridExtra::grid.arrange(p0,p1,p2, layout_matrix=lay)