The research and findings in the following project have been completed on behalf of StreetEasy.
Required Packages
library(xts)
library(lattice)
library(ggplot2)
library(tidyr)
library(reshape)
library(car)
library(dplyr)
library(lubridate)
library(quantmod)
library(lmtest)
library(plm)
library(stargazer, quietly = T)
library(caret)
library(nnet)
library(hts)
library(knitr)
url1 <- 'https://s3.amazonaws.com/streeteasy-market-
data-api/data_repository/E1_rentalInventory_All.zip'
url2 <- 'https://s3.amazonaws.com/streeteasy-market-
data-api/data_repository/A9_daysOnMarket_All.zip'
url3 <- 'https://s3.amazonaws.com/streeteasy-market-
data-api/data_repository/A6_medianSalePrice_All.zip'
url4 <- 'https://streeteasy-market-data-api.s3.amazonaws.com/
data_repository/E2_medianAskingRent_All.zip'
temp = tempfile()
download.file(url4, temp)
med_AskRent <- read.csv(unz(temp, "E2_medianAskingRent_All.csv"))
download.file(url3, temp)
medSalePrice <- read.csv(unz(temp, "A6_medianSalePrice_All.csv"))
download.file(url2, temp)
daysOnMarket <- read.csv(unz(temp, "A9_daysOnMarket_All.csv"))
download.file(url1, temp)
totalRentInv <- read.csv(unz(temp, "E1_rentalInventory_All.csv"))
unlink(temp)
Remove NAs
totalRentInv <- na.exclude(totalRentInv)
med_AskRent <-na.exclude(med_AskRent)
daysOnMarket <-na.exclude(daysOnMarket)
medSalePrice <- na.exclude(medSalePrice)
Median Sale Price
medSalePriceSub <- subset(medSalePrice, as.character(medSalePrice$Area) %in% as.character(daysOnMarket$Area) & medSalePrice$AreaType == "neighborhood")
medSalePriceLong <- gather(medSalePriceSub, Date, "Median Sale Price", 4:ncol(medSalePrice), factor_key = T)
medSalePriceLong <- medSalePriceLong[order(medSalePriceLong$Area, medSalePriceLong$Date),]
medSalePriceLong$Date <- sub("X", "", medSalePriceLong[,4])
medSalePriceLong$Date <- sub(".", "-", medSalePriceLong[,4], fixed = T)
medSalePriceLong$Date <- as.Date(paste(medSalePriceLong$Date,1, sep = "-"), "%Y-%m-%d")
medSalePrice_zoo <- as.zooreg(medSalePriceLong[,c(4,1:3,5)])
medSalePriceCast <- cast(medSalePriceLong, Date ~ Area)
medSalePrice_ts <- xts(medSalePriceCast, order.by = medSalePriceCast$Date)
Median Asking Rent
medAskRent <-subset(med_AskRent, as.character(med_AskRent$Area) %in% as.character(daysOnMarket$Area) & med_AskRent$AreaType == "neighborhood")
medAskRentLong <- gather(medAskRent, Date, "Median Asking Rent", 4:ncol(med_AskRent), factor_key = T)
medAskRentLong <- medAskRentLong[order(medAskRentLong$Area, medAskRentLong$Date),]
row.names(medAskRentLong) <- 1:NROW(medAskRentLong)
medAskRentLong$Date <- sub("X", "", medAskRentLong[,4])
medAskRentLong$Date <- sub(".", "-", medAskRentLong[,4], fixed = T)
medAskRentLong$Date <- as.Date(paste(medAskRentLong$Date,1,sep="-"),"%Y-%m-%d")
med_AskRent_zoo <- as.zooreg(medAskRentLong[,c(4,1:3,5)])
medAskRentCast <- cast(medAskRentLong, Date ~ Area)
medAskRent_ts = xts(medAskRentCast, order.by = medAskRentCast$Date)
Days on Market i.e. Housing supply
daysOnMktSub <- subset(daysOnMarket, as.character(daysOnMarket$Area) %in% as.character(medAskRent$Area))
daysOnMarketLong <- gather(daysOnMktSub, Date, "Days on Market", 4:ncol(daysOnMarket), factor_key = T)
daysOnMarketLong <- daysOnMarketLong[order(daysOnMarketLong$Area, daysOnMarketLong$Date),]
row.names(daysOnMarket) <- 1:NROW(daysOnMarket)
daysOnMarketLong$Date <- sub("X", "", daysOnMarketLong[,4])
daysOnMarketLong$Date <- sub(".", "-", daysOnMarketLong[,4], fixed = T)
daysOnMarketLong$Date <- as.Date(paste(daysOnMarketLong$Date,1,sep="-"),"%Y-%m-%d")
daysOnMarket_zoo <- as.zooreg(daysOnMarketLong[,c(4,1,2,3,5)])
daysOnMarketCast <- cast(daysOnMarketLong, Date ~ Area)
daysOnMarketCast$Date <- as.yearmon(as.character(daysOnMarketCast$Date))
daysOnMarket_ts <- as.xts(daysOnMarketCast, order.by = as.yearmon(daysOnMarketCast$Date))
Total Rental Inventory
ttlRentInv <- subset(totalRentInv, as.character(totalRentInv$Area) %in% as.character(daysOnMarket$Area) & totalRentInv$AreaType == "neighborhood")
ttlRentIvLong <- gather(ttlRentInv, Date, " Total Rent Inventory", 4:ncol(totalRentInv), factor_key = T)
ttlRentIvLong <- ttlRentIvLong[order(ttlRentIvLong$Area,ttlRentIvLong$Date),]
ttlRentIvLong$Date <- sub("X", "", ttlRentIvLong[,4])
ttlRentIvLong$Date <- sub(".", "-", ttlRentIvLong[,4], fixed = T)
ttlRentIvLong$Date <- as.Date(paste(ttlRentIvLong$Date,1, sep = "-"), "%Y-%m-%d")
ttlRentInv_zoo <-as.zooreg(ttlRentIvLong[,c(4,1:3,5)])
ttlRentIvCast <- cast(ttlRentIvLong, Date ~ Area)
ttlRentInv_ts <- as.xts(ttlRentIvCast, order.by = ttlRentIvCast$Date)
Price-Rent Ratio calculation
m1 <- apply(as.matrix.noquote(medAskRent_ts), 2, as.numeric)
m2 <- apply(as.matrix.noquote(medSalePrice_ts), 2, as.numeric)
m <- t(m2)/t(m1*12)
priceRentRatio <- t(m)
priceRentRatio <- xts(priceRentRatio, order.by = medAskRentCast$Date)
priceRentIndx <- gather(as.data.frame(priceRentRatio), Date, "Price Rent Index", 1:9, factor_key = T )
Monthly growth rate for housing supply
diffDaysOnMkt_1 <- diff(daysOnMarket_ts)/lag.xts(daysOnMarket_ts,1)
diffDaysOnMkt <- gather(as.data.frame(diffDaysOnMkt_1), Date, "Month Over Month Homes Supply", 1:9,factor_key = T)
diffDaysOnMkt[is.na(diffDaysOnMkt)] <- 0
Panel data
panelData = daysOnMarketLong[,c(4,1:3,5)]
panelData<- cbind.data.frame(panelData, medAskRentLong$`Median Asking Rent`, medSalePriceLong$`Median Sale Price`, ttlRentIvLong$` Total Rent Inventory`)
colnames(panelData) <- c("Date", "Area", "Boro", "Area Type", "Days on Market", "Median Asking Rent", "Median Sale Price", "Total Rent Inventory")
panelData$Date <- as.yearmon(as.character(daysOnMarketLong$Date))
panelData$'Price Rent Index'<- priceRentIndx[,2]
panelData$'Home Supply Growth' <- diffDaysOnMkt$`Month Over Month Homes Supply`
panelData$Area <- factor(panelData$Area)
rownames(panelData) <- 1:NROW(panelData)
panelData$`Median Asking Rent` <-panelData$`Median Asking Rent`
panelData$indexTRI <- cut(panelData$`Total Rent Inventory`, breaks = 100, labels = F)
panelData$indexPI <- cut(panelData$`Price Rent Index`, breaks = 30, labels = F)
head(panelData)
## Date Area Boro Area Type Days on Market
## 1 Jan 2010 Central Harlem Manhattan neighborhood 207.0
## 2 Feb 2010 Central Harlem Manhattan neighborhood 210.0
## 3 Mar 2010 Central Harlem Manhattan neighborhood 193.0
## 4 Apr 2010 Central Harlem Manhattan neighborhood 79.0
## 5 May 2010 Central Harlem Manhattan neighborhood 105.0
## 6 Jun 2010 Central Harlem Manhattan neighborhood 206.5
## Median Asking Rent Median Sale Price Total Rent Inventory
## 1 1992.5 366022 412
## 2 1995.0 394887 382
## 3 1895.0 393025 333
## 4 1897.5 332085 298
## 5 1900.0 308763 272
## 6 1950.0 531000 309
## Price Rent Index Home Supply Growth indexTRI indexPI
## 1 15.30832 0.00000000 8 5
## 2 16.49486 0.01449275 7 6
## 3 17.28342 -0.08095238 6 6
## 4 14.58432 -0.59067358 5 5
## 5 13.54224 0.32911392 4 4
## 6 22.69231 0.96666667 5 9
scatterplot(panelData$`Median Asking Rent` ~panelData$`Days on Market` + panelData$Area-1, reg.line=F, xlab = "Days on Market", ylab = "Median Asking Rent", legend.title = "Neighborhoods", grid =F )
Covariance tests: Fixed Effects vs. Random Effects vs. Partial Pooling
olS <-lm(panelData$`Price Rent Index`~ panelData$`Days on Market`)
kable(summary(olS)$coef, digits=3)
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 25.963 | 0.466 | 55.769 | 0 |
panelData$Days on Market |
-0.022 | 0.005 | -4.008 | 0 |
fixed<- plm(panelData$`Price Rent Index`~ panelData$`Days on Market`,index = c('Area','Date'), data = panelData, model = 'within')
kable(summary(fixed)$coef, digits=3)
| Estimate | Std. Error | t-value | Pr(>|t|) | |
|---|---|---|---|---|
panelData$Days on Market |
-0.008 | 0.005 | -1.635 | 0.102 |
pFtest(fixed, olS, lower.tail=T) #p-value < .05, choose FE
##
## F test for individual effects
##
## data: panelData$`Price Rent Index` ~ panelData$`Days on Market`
## F = 51.45, df1 = 8, df2 = 863, p-value < 2.2e-16
## alternative hypothesis: significant effects
random <- plm(panelData$`Price Rent Index`~ panelData$`Days on Market`, index = c('Area', 'Date'), data = panelData, model = 'random')
kable(summary(random)$coef, digits=3)
| Estimate | Std. Error | t-value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 24.926 | 1.277 | 19.525 | 0.000 |
panelData$Days on Market |
-0.008 | 0.005 | -1.703 | 0.089 |
plot(random$residuals, within = F, pooling = F, random = T,
xlab = "Days on Market", ylab = "Residuals", sub = "Panel effect")
phtest(fixed,random) #p-value > .05, choose RE
##
## Hausman Test
##
## data: panelData$`Price Rent Index` ~ panelData$`Days on Market`
## chisq = 1.3684, df = 1, p-value = 0.2421
## alternative hypothesis: one model is inconsistent
pool<- plm(panelData$`Price Rent Index`~ panelData$`Days on Market`, index = c('Area','Date'), data = panelData, model = 'pooling')
kable(summary(pool)$coef, digits=3)
| Estimate | Std. Error | t-value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 25.963 | 0.466 | 55.769 | 0 |
panelData$Days on Market |
-0.022 | 0.005 | -4.008 | 0 |
plmtest(pool, type = c("bp")) #p-value < .05, H0 of no panel effect rejected, choose RE
##
## Lagrange Multiplier Test - (Breusch-Pagan) for balanced panels
##
## data: panelData$`Price Rent Index` ~ panelData$`Days on Market`
## chisq = 4013.4, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
pcdtest(fixed, test = "lm") #p-value <.05, cross-sectional dependence
##
## Breusch-Pagan LM test for cross-sectional dependence in panels
##
## data: panelData$`Price Rent Index` ~ panelData$`Days on Market`
## chisq = 408.13, df = 36, p-value < 2.2e-16
## alternative hypothesis: cross-sectional dependence
coeftest(random, vcovHC(random, type = 'HC0', method = 'white1'))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.9262562 1.2691316 19.6404 < 2e-16 ***
## panelData$`Days on Market` -0.0083689 0.0043462 -1.9256 0.05448 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Construct IV using random effects model
iv1 <- predict(random)
panelData$iv1 <- iv1
Plots for exploration
tsRainbow <- rainbow(ncol(medAskRent_ts))
xyplot.ts(medAskRent_ts, col = tsRainbow, ylab = "Median Asking Rent")
diffDaysOnMkt_1 %>% as.data.frame() %>% na.omit() %>% GGally::ggpairs(axisLabels = "none", title = "Days on Market Monthly Growth Rate")
Sampling for softmax regression
set.seed(3214)
trainingRows <- sample(1:nrow(panelData), .7*nrow(panelData))
trainS <- panelData[sort(trainingRows),]
testS <- panelData[sort(-trainingRows),]
table(trainS$Area)
##
## Central Harlem Chelsea Greenwich Village Midtown East
## 68 72 69 68
## Midtown West Park Slope Upper East Side Upper West Side
## 71 69 61 66
## Williamsburg
## 67
Standardize variables for classification algorithm
stdZ01 <- function(x){(x-min(x))/(max(x)-min(x))
}
train <- apply(trainS[,c(5:10,13)], 2, stdZ01) %>%
cbind.data.frame(trainS[,c(1:2,11:12)])
test <- apply(testS[,c(5:10,13)], 2, stdZ01) %>%
cbind.data.frame(testS[,c(1:2,11:12)])
Multinomial log-linear models via neural networks
options(contrasts = c("indexTRI", "indexPI"))
mod1<- multinom(Area ~`Days on Market` + `Days on Market`*indexTRI +indexTRI, data = train, maxit=500)
summary(mod1)
varImp(mod1)
mod2<- multinom(Area ~ indexTRI , data = train) %>%
update(.~. + iv1*indexTRI + iv1, maxit=500, Hess= T)
summary(mod2)
varImp(mod2)
stargazer(mod1, type = "html", out = "mod1.htm",
intercept.bottom = F, model.numbers = F,
align = T)
| Dependent variable: | ||||||||
| Chelsea | Greenwich Village | Midtown East | Midtown West | Park Slope | Upper East Side | Upper West Side | Williamsburg | |
| Constant | 1.685** | 5.886*** | -5.951*** | -2.070*** | 3.380*** | -20.775*** | -11.864*** | -0.127 |
| (0.683) | (0.827) | (1.109) | (0.745) | (0.715) | (2.929) | (2.265) | (0.733) | |
Days on Market
|
-7.224*** | -19.096*** | 1.112 | -1.351 | -8.180*** | 8.973 | -0.701 | -5.926** |
| (2.338) | (3.489) | (2.830) | (1.966) | (2.871) | (9.706) | (9.495) | (2.558) | |
| indexTRI | -0.040 | -0.336*** | 0.170*** | 0.071** | -0.072* | 0.371*** | 0.286*** | 0.036 |
| (0.036) | (0.062) | (0.038) | (0.033) | (0.040) | (0.057) | (0.053) | (0.034) | |
Days on Market:indexTRI
|
0.224 | 0.839*** | 0.234* | 0.253** | -0.529* | 0.425* | 0.278 | 0.226 |
| (0.156) | (0.295) | (0.140) | (0.127) | (0.271) | (0.230) | (0.234) | (0.146) | |
| Akaike Inf. Crit. | 1,754.090 | 1,754.090 | 1,754.090 | 1,754.090 | 1,754.090 | 1,754.090 | 1,754.090 | 1,754.090 |
| Note: | p<0.1; p<0.05; p<0.01 | |||||||
stargazer(mod2, type = "html", out = "mod2.htm",
intercept.bottom = F, model.numbers = F,
align = T)
| Dependent variable: | ||||||||
| Chelsea | Greenwich Village | Midtown East | Midtown West | Park Slope | Upper East Side | Upper West Side | Williamsburg | |
| Constant | -4.903*** | -10.225*** | -3.830** | -2.752** | -4.349*** | -7.803* | -15.790** | -5.277*** |
| (1.522) | (2.099) | (1.800) | (1.376) | (1.545) | (4.720) | (6.967) | (1.704) | |
| indexTRI | 0.490*** | 0.790*** | 0.549*** | 0.450*** | 0.321** | 0.850*** | 0.917*** | 0.526*** |
| (0.127) | (0.188) | (0.123) | (0.117) | (0.144) | (0.158) | (0.187) | (0.125) | |
| iv1 | 6.113*** | 16.639*** | -3.379 | -0.154 | 6.806*** | -14.580** | 1.910 | 4.439* |
| (2.175) | (2.998) | (2.710) | (2.021) | (2.209) | (6.880) | (9.409) | (2.398) | |
| indexTRI:iv1 | -0.588*** | -1.278*** | -0.389** | -0.387*** | -0.470** | -0.496** | -0.670*** | -0.534*** |
| (0.161) | (0.258) | (0.154) | (0.146) | (0.183) | (0.199) | (0.239) | (0.156) | |
| Akaike Inf. Crit. | 1,853.530 | 1,853.530 | 1,853.530 | 1,853.530 | 1,853.530 | 1,853.530 | 1,853.530 | 1,853.530 |
| Note: | p<0.1; p<0.05; p<0.01 | |||||||
oddsRatioTable1 <- exp(coef(mod1))
kable(oddsRatioTable1, digits = 5, align = "c")#log-odds
| (Intercept) | Days on Market |
indexTRI | Days on Market:indexTRI |
|
|---|---|---|---|---|
| Chelsea | 5.39237 | 0.00073 | 0.96083 | 1.25101 |
| Greenwich Village | 359.83946 | 0.00000 | 0.71441 | 2.31483 |
| Midtown East | 0.00260 | 3.03910 | 1.18535 | 1.26317 |
| Midtown West | 0.12618 | 0.25892 | 1.07317 | 1.28728 |
| Park Slope | 29.38536 | 0.00028 | 0.93064 | 0.58925 |
| Upper East Side | 0.00000 | 7887.04534 | 1.44874 | 1.52988 |
| Upper West Side | 0.00001 | 0.49622 | 1.33067 | 1.32015 |
| Williamsburg | 0.88064 | 0.00267 | 1.03625 | 1.25363 |
oddsRatioTable2 <- exp(coef(mod2))
kable(oddsRatioTable2, digits = 3, align = "c")#log-odds
| (Intercept) | indexTRI | iv1 | indexTRI:iv1 | |
|---|---|---|---|---|
| Chelsea | 0.007 | 1.632 | 451.639 | 0.555 |
| Greenwich Village | 0.000 | 2.204 | 16838453.373 | 0.279 |
| Midtown East | 0.022 | 1.732 | 0.034 | 0.678 |
| Midtown West | 0.064 | 1.568 | 0.857 | 0.679 |
| Park Slope | 0.013 | 1.379 | 903.449 | 0.625 |
| Upper East Side | 0.000 | 2.339 | 0.000 | 0.609 |
| Upper West Side | 0.000 | 2.501 | 6.752 | 0.512 |
| Williamsburg | 0.005 | 1.691 | 84.649 | 0.586 |
For this m x m confusion matrix, negative recall (specificity) is high, a good sign in terms of our prediction goals. Additionally, balanced accuracy is the preferred evaluation metric here to decipher algorithm performance
t1 <- predict(mod1,test, type = "class")
cfMat <- confusionMatrix(t1, test$Area)
print(cfMat)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Central Harlem Chelsea Greenwich Village Midtown East
## Central Harlem 9 3 5 0
## Chelsea 10 6 1 0
## Greenwich Village 1 1 17 0
## Midtown East 0 0 0 15
## Midtown West 2 5 0 6
## Park Slope 5 4 5 0
## Upper East Side 0 0 0 2
## Upper West Side 0 0 0 6
## Williamsburg 2 6 0 0
## Reference
## Prediction Midtown West Park Slope Upper East Side
## Central Harlem 3 1 0
## Chelsea 3 3 0
## Greenwich Village 0 12 0
## Midtown East 4 0 1
## Midtown West 7 0 0
## Park Slope 0 10 0
## Upper East Side 0 0 28
## Upper West Side 0 0 7
## Williamsburg 9 2 0
## Reference
## Prediction Upper West Side Williamsburg
## Central Harlem 0 5
## Chelsea 0 0
## Greenwich Village 0 5
## Midtown East 10 5
## Midtown West 0 4
## Park Slope 0 4
## Upper East Side 3 0
## Upper West Side 18 0
## Williamsburg 0 7
##
## Overall Statistics
##
## Accuracy : 0.4466
## 95% CI : (0.3854, 0.509)
## No Information Rate : 0.1374
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3768
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Central Harlem Class: Chelsea
## Sensitivity 0.31034 0.24000
## Specificity 0.92704 0.92827
## Pos Pred Value 0.34615 0.26087
## Neg Pred Value 0.91525 0.92050
## Prevalence 0.11069 0.09542
## Detection Rate 0.03435 0.02290
## Detection Prevalence 0.09924 0.08779
## Balanced Accuracy 0.61869 0.58414
## Class: Greenwich Village Class: Midtown East
## Sensitivity 0.60714 0.51724
## Specificity 0.91880 0.91416
## Pos Pred Value 0.47222 0.42857
## Neg Pred Value 0.95133 0.93833
## Prevalence 0.10687 0.11069
## Detection Rate 0.06489 0.05725
## Detection Prevalence 0.13740 0.13359
## Balanced Accuracy 0.76297 0.71570
## Class: Midtown West Class: Park Slope
## Sensitivity 0.26923 0.35714
## Specificity 0.92797 0.92308
## Pos Pred Value 0.29167 0.35714
## Neg Pred Value 0.92017 0.92308
## Prevalence 0.09924 0.10687
## Detection Rate 0.02672 0.03817
## Detection Prevalence 0.09160 0.10687
## Balanced Accuracy 0.59860 0.64011
## Class: Upper East Side Class: Upper West Side
## Sensitivity 0.7778 0.5806
## Specificity 0.9779 0.9437
## Pos Pred Value 0.8485 0.5806
## Neg Pred Value 0.9651 0.9437
## Prevalence 0.1374 0.1183
## Detection Rate 0.1069 0.0687
## Detection Prevalence 0.1260 0.1183
## Balanced Accuracy 0.8778 0.7622
## Class: Williamsburg
## Sensitivity 0.23333
## Specificity 0.91810
## Pos Pred Value 0.26923
## Neg Pred Value 0.90254
## Prevalence 0.11450
## Detection Rate 0.02672
## Detection Prevalence 0.09924
## Balanced Accuracy 0.57572
kable(cfMat$table, align = "c")
| Central Harlem | Chelsea | Greenwich Village | Midtown East | Midtown West | Park Slope | Upper East Side | Upper West Side | Williamsburg | |
|---|---|---|---|---|---|---|---|---|---|
| Central Harlem | 9 | 3 | 5 | 0 | 3 | 1 | 0 | 0 | 5 |
| Chelsea | 10 | 6 | 1 | 0 | 3 | 3 | 0 | 0 | 0 |
| Greenwich Village | 1 | 1 | 17 | 0 | 0 | 12 | 0 | 0 | 5 |
| Midtown East | 0 | 0 | 0 | 15 | 4 | 0 | 1 | 10 | 5 |
| Midtown West | 2 | 5 | 0 | 6 | 7 | 0 | 0 | 0 | 4 |
| Park Slope | 5 | 4 | 5 | 0 | 0 | 10 | 0 | 0 | 4 |
| Upper East Side | 0 | 0 | 0 | 2 | 0 | 0 | 28 | 3 | 0 |
| Upper West Side | 0 | 0 | 0 | 6 | 0 | 0 | 7 | 18 | 0 |
| Williamsburg | 2 | 6 | 0 | 0 | 9 | 2 | 0 | 0 | 7 |
t2 <- predict(mod2,test, type = "class")
cfMat2 <- confusionMatrix(t2, test$Area)
print(cfMat2)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Central Harlem Chelsea Greenwich Village Midtown East
## Central Harlem 14 0 4 0
## Chelsea 3 10 1 0
## Greenwich Village 3 1 18 0
## Midtown East 0 0 0 21
## Midtown West 1 5 0 3
## Park Slope 7 3 5 0
## Upper East Side 0 0 0 0
## Upper West Side 0 0 0 5
## Williamsburg 1 6 0 0
## Reference
## Prediction Midtown West Park Slope Upper East Side
## Central Harlem 6 3 0
## Chelsea 7 6 0
## Greenwich Village 0 13 0
## Midtown East 2 0 2
## Midtown West 10 0 0
## Park Slope 0 2 0
## Upper East Side 0 0 26
## Upper West Side 0 0 8
## Williamsburg 1 4 0
## Reference
## Prediction Upper West Side Williamsburg
## Central Harlem 0 6
## Chelsea 0 0
## Greenwich Village 0 6
## Midtown East 8 7
## Midtown West 0 8
## Park Slope 0 1
## Upper East Side 9 0
## Upper West Side 14 1
## Williamsburg 0 1
##
## Overall Statistics
##
## Accuracy : 0.4427
## 95% CI : (0.3816, 0.5052)
## No Information Rate : 0.1374
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3727
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Central Harlem Class: Chelsea
## Sensitivity 0.48276 0.40000
## Specificity 0.91845 0.92827
## Pos Pred Value 0.42424 0.37037
## Neg Pred Value 0.93450 0.93617
## Prevalence 0.11069 0.09542
## Detection Rate 0.05344 0.03817
## Detection Prevalence 0.12595 0.10305
## Balanced Accuracy 0.70061 0.66414
## Class: Greenwich Village Class: Midtown East
## Sensitivity 0.6429 0.72414
## Specificity 0.9017 0.91845
## Pos Pred Value 0.4390 0.52500
## Neg Pred Value 0.9548 0.96396
## Prevalence 0.1069 0.11069
## Detection Rate 0.0687 0.08015
## Detection Prevalence 0.1565 0.15267
## Balanced Accuracy 0.7723 0.82130
## Class: Midtown West Class: Park Slope
## Sensitivity 0.38462 0.071429
## Specificity 0.92797 0.931624
## Pos Pred Value 0.37037 0.111111
## Neg Pred Value 0.93191 0.893443
## Prevalence 0.09924 0.106870
## Detection Rate 0.03817 0.007634
## Detection Prevalence 0.10305 0.068702
## Balanced Accuracy 0.65629 0.501526
## Class: Upper East Side Class: Upper West Side
## Sensitivity 0.72222 0.45161
## Specificity 0.96018 0.93939
## Pos Pred Value 0.74286 0.50000
## Neg Pred Value 0.95595 0.92735
## Prevalence 0.13740 0.11832
## Detection Rate 0.09924 0.05344
## Detection Prevalence 0.13359 0.10687
## Balanced Accuracy 0.84120 0.69550
## Class: Williamsburg
## Sensitivity 0.033333
## Specificity 0.948276
## Pos Pred Value 0.076923
## Neg Pred Value 0.883534
## Prevalence 0.114504
## Detection Rate 0.003817
## Detection Prevalence 0.049618
## Balanced Accuracy 0.490805
kable(cfMat2$table, align = "c")
| Central Harlem | Chelsea | Greenwich Village | Midtown East | Midtown West | Park Slope | Upper East Side | Upper West Side | Williamsburg | |
|---|---|---|---|---|---|---|---|---|---|
| Central Harlem | 14 | 0 | 4 | 0 | 6 | 3 | 0 | 0 | 6 |
| Chelsea | 3 | 10 | 1 | 0 | 7 | 6 | 0 | 0 | 0 |
| Greenwich Village | 3 | 1 | 18 | 0 | 0 | 13 | 0 | 0 | 6 |
| Midtown East | 0 | 0 | 0 | 21 | 2 | 0 | 2 | 8 | 7 |
| Midtown West | 1 | 5 | 0 | 3 | 10 | 0 | 0 | 0 | 8 |
| Park Slope | 7 | 3 | 5 | 0 | 0 | 2 | 0 | 0 | 1 |
| Upper East Side | 0 | 0 | 0 | 0 | 0 | 0 | 26 | 9 | 0 |
| Upper West Side | 0 | 0 | 0 | 5 | 0 | 0 | 8 | 14 | 1 |
| Williamsburg | 1 | 6 | 0 | 0 | 1 | 4 | 0 | 0 | 1 |
Price-to-Rent Ratio plot
priceRentTs <- ts(priceRentRatio, start = 2010, frequency = 12)
priceRent_hts <- hts(priceRentTs)
plot1 <- aggts(priceRent_hts, levels = 1)
plot0<- aggts(priceRent_hts, levels = 0)
p1 <- autoplot(plot1[,c(1:5)])+xlab("Year") + ylab("Price to Rent Index") +scale_color_discrete(guide= guide_legend("Neighborhood"))
p2 <- autoplot(plot1[,c(6:9)])+xlab("Year") + ylab("Price to Rent Index") +scale_color_discrete(guide= guide_legend("Neighborhood"))
p0<- autoplot(plot0)+xlab("Year") + ylab("Price to Rent Index") + ggtitle("Total")
lay=rbind(c(1,1),c(2,2), c(3,3))
gridExtra::grid.arrange(p0,p1,p2, layout_matrix=lay)