# Instructions
# . The research question is can we predict which three zip codes provide the best
# investment opportunity for the Syracuse Real Estate Investment Trust (SREIT)?
# . Using the base data available from Zillow
# (files.zillowstatic.com/research/public/Zip/Zip_Zhvi_SingleFamilyResidence.csv)
# o Review the data - clean as appropriate
# o Provide an initial data analysis
# . Develop a thematic map(s) of the United States:
# o By state
# o Median housing for Dec (state average)
# o Present years from 2003-2011
# . Develop time series plots for the following Arkansas metro areas:
# o Hot Springs, Little Rock, Fayetteville, Searcy
# o Present all values from 1997 to 2013
# o Average at the metro area level
# . Using data from Zillow and other sources (think Bureau of Labor Statistics and Census data):
# o Develop model for forecasting average median housing value for 2014
# o Use the average of 2014-01, 2014-02, 2014-03 as your test set
# o Consolidate monthly data into an annual average
# . Answer the following questions:
# o What three zip codes provide the best investment opportunity for the SREIT?
# o Why?
# Well, I tried many many options, so I installed a ton of packages.
# Install packages from the class lecture
library(PBSmapping)
library(maptools)
library(RCurl)
library(RJSONIO)
library(foreign)
# Additional packages
# devtools::install_github("twitter/AnomalyDetection") # AnomalyDetection
library(AnomalyDetection)
library(ggplot2)
library(Rcpp)
library(timeDate)
library(data.table)
library(tseries)
library(lubridate)
library(forecast)
# devtools::install_github("ellisp/forecastxgb-r-package/pkg") # forecastxgb
library(forecastxgb)
library(caret)
library(qlcMatrix)
library(xgboost)
library(dplyr)
library(plotly)
library(ggthemes)
library(USAboundaries)
library(sp)
library(maptools)
library(ggplot2)
library(rgeos)
library(rgdal)
library(reshape)
library(gpclib)
library(dplyr)
library(data.table)
library(USAboundaries)
library(raster)
library(RCurl)
library(tidyr)
library(stringr)
library(maps)
library(ggmap)
# Time Series packages
require(astsa, quietly=TRUE, warn.conflicts=FALSE)
require(knitr)
library(astsa)
library(knitr)
# Set the directory.
setwd("C:/DC/Advanced Information Analytics/Labs")
# Remove scientific notation
options(scipen=999)
# 1. Review the data - clean as appropriate.
real.estate <- read.csv("Zillo BB data.csv", header = TRUE)
str(real.estate) # 11279 obs. of 228 variables.
summary(real.estate)
glimpse(real.estate)
# Remove NAs
real.estate.cleaned <- na.omit(real.estate)
str(real.estate.cleaned) # 9283 obs. of 228 variables.
# Save this new data frame as .csv file.
write.csv(real.estate.cleaned, file = "real.estate.cleaned.csv")
str(real.estate.cleaned) # 9283 obs. of 228 variables. So, we deleted 1996 rows, about 15%.
# However, some columns still have empty spaces, for instance "Metro" column has 219 empty cells.
# But it is not crucial for our research.
summary(real.estate.cleaned)
glimpse(real.estate.cleaned)
# Question 2. Develop a thematic map(s) of the United States:
# o By state
# o Median housing for Dec (state average)
# Aggregate data by state
real.estate.cleaned.by.state <- aggregate(real.estate.cleaned, list(real.estate.cleaned$State), mean)
write.csv(real.estate.cleaned.by.state, file = "real.estate.cleaned.by.state.csv")
# It gave us summarize data by state and mean of median housing.
# Correctness of this code was double checked in Excel.
# But because of removed data I now have only ... 40 states. 10 states were lost.
# Data in December was cleaned in Excel.
# I deleted everything besides December data and calculated average for each state.
real.estate.cleaned.by.state.Dec <- read.csv("real.estate.cleaned.by.state2.csv", header = TRUE)
# Ten states are absent due to omit.na()
# Alaska, Idaho, Kansas, Maine, Montana, New Mexico, North Dakota, South Dakota, Vermont, West Virginia
# I put 0 value there
head(real.estate.cleaned.by.state.Dec)
## State Mean.of.median.housing
## 1 Alabama 116777.7
## 2 Arkansas 104362.0
## 3 Arizona 188516.7
## 4 California 416353.8
## 5 Colorado 217096.9
## 6 Connecticut 285076.4
# State Mean.of.median.housing
# 1 Alabama 116777.7
# 2 Arkanzas 104362.0
# Map with "fiftystater" package
# devtools::install_github("wmurphyrd/fiftystater")
library(fiftystater)
data("fifty_states") # this line is optional due to lazy data loading
december <- real.estate.cleaned.by.state.Dec # shorten the title
names(december) # "State" "Mean.of.median.housing"
## [1] "State" "Mean.of.median.housing"
# Well, it was difficult, so I used two sources:
# First, this package "fiftystater"
# https://github.com/wmurphyrd/fiftystater
# Second, I asked a question on stackoverflow, because I faced some problems
# https://stackoverflow.com/questions/45107302/error-in-seq-lennrowdata-1-argument-must-be-coercible-to-non-negative-in/45110592#45110592
# tolower
december$statelower <- tolower(december$State)
# Create a map
Map <- ggplot(december, aes(map_id = december$statelower)) +
geom_map(aes(fill = december$Mean.of.median.housing), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states by the mean of median housing price in December",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
Map

# So, now we can see that the highest median housing is obviously in California and Hawaii.
# Ten states has dark blue color, because data was empty and I put zero there.
# Question 3.
# . Develop a thematic map(s) of the United States:
# o By state
# o Present years from 2003-2011
# Now I need to summarize data by year
# Again, data was cleaned in Excel.
# I also filled in all ten absent states with 0.
housing.by.year <- read.csv("real.estate.cleaned.by.state03-11.csv", header = TRUE)
housing.by.year$State
## [1] Alabama Arkansas Arizona California
## [5] Colorado Connecticut Delaware Florida
## [9] Georgia Hawaii Iowa Illinois
## [13] Indiana Kentucky Louisiana Massachusetts
## [17] Maryland Michigan Minnesota Missouri
## [21] Mississippi North Carolina Nebraska New Hampshire
## [25] New Jersey Nevada New York Ohio
## [29] Oklahoma Oregon Pennsylvania Rhode Island
## [33] South Carolina Tennessee Texas Utah
## [37] Virginia Washington Wisconsin Wyoming
## [41] Alaska Idaho Kansas Maine
## [45] Montana New Mexico NorthDakota South Dakota
## [49] Vermont West Virginia
## 50 Levels: Alabama Alaska Arizona Arkansas California ... Wyoming
# Now I need to map all years from 2003 to 2011.
# Map 2003
housing.by.year$statelower <- tolower(housing.by.year$State) # tolower
m2003 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2003), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2003",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2003

# Map 2004
m2004 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2004), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2004",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2004

# Map 2005
m2005 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2005), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2005",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2005

# Map 2006
m2006 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2006), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2006",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2006

# Map 2007
m2007 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2007), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2007",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2007

# Map 2008
m2008 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2008), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2008",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2008

# Map 2009
m2009 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2009), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2009",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2009

# Map 2010
m2010 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2010), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2010",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2010

# Map 2011
m2011 <- ggplot(housing.by.year, aes(map_id = housing.by.year$statelower)) +
geom_map(aes(fill = housing.by.year$Year.2011), color="#ffffff", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(legend.position = "bottom",
panel.background = element_blank()) +
labs(title = "US states median housing price in 2011",
subtitle = "Source: zillowstatic.com") + guides(fill=guide_legend(title=NULL)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.title = element_text("Mean of median housing")) +
guides(fill=guide_legend(title=NULL))
m2011

# So, what can we see? Distribution varies by year. Different scales are used.
# One of the reason may be the financial crisis in the US in 2008.
# Question 4.
# . Develop time series plots for the following Arkansas metro areas:
# o Hot Springs, Little Rock, Fayetteville, Searcy
# o Present all values from 1997 to 2013
# o Average at the metro area level
# I cleaned data in Excel. I have data for four cities and separately mean of each city.
arkansas <- read.csv("Arkansas metro1.csv", header = TRUE)
str(arkansas)
## 'data.frame': 204 obs. of 5 variables:
## $ Month : Factor w/ 204 levels "1-Apr","1-Aug",..: 174 170 186 158 190 182 178 162 202 198 ...
## $ Hot.Springs : int 70400 70150 70500 71450 72550 73350 74100 74700 75250 75550 ...
## $ Little.Rock : num 94708 95383 96108 96483 96700 ...
## $ Fayetteville: num 100133 100300 100467 100367 100533 ...
## $ Searcy : int 78600 78800 78800 78400 78100 78200 78600 79000 79100 79100 ...
glimpse(arkansas)
## Observations: 204
## Variables: 5
## $ Month <fctr> Jan-97, Feb-97, Mar-97, Apr-97, May-97, Jun-97, ...
## $ Hot.Springs <int> 70400, 70150, 70500, 71450, 72550, 73350, 74100, ...
## $ Little.Rock <dbl> 94708.33, 95383.33, 96108.33, 96483.33, 96700.00,...
## $ Fayetteville <dbl> 100133.3, 100300.0, 100466.7, 100366.7, 100533.3,...
## $ Searcy <int> 78600, 78800, 78800, 78400, 78100, 78200, 78600, ...
colnames(arkansas) # "Month" "Hot.Springs" "Little.Rock" "Fayetteville" "Searcy"
## [1] "Month" "Hot.Springs" "Little.Rock" "Fayetteville"
## [5] "Searcy"
# Trend for Arkansas cities
# Hot Springs
hot.springs <- ggplot(arkansas, aes(Month, Hot.Springs, group = 1)) + geom_line(color = "red") +
labs(title = "Median housing price in Hot Springs, AR (1997-2013)",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
hot.springs

# Little Rock
little.rock <- ggplot(arkansas, aes(Month, Little.Rock, group = 1)) + geom_line(color = "blue") +
labs(title = "Median housing price in Little Rock, AR (1997-2013)",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
little.rock

# Searcy
searcy <- ggplot(arkansas, aes(Month, Searcy, group = 1)) + geom_line(color = "green") +
labs(title = "Median housing price in Searcy, AR (1997-2013)",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
searcy

# Fayetteville
fayetteville <- ggplot(arkansas, aes(Month, Fayetteville, group = 1)) + geom_line(color = "orange") +
labs(title = "Median housing price in Fayetteville, AR (1997-2013)",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
fayetteville

# Mean for each city
mean(arkansas$Hot.Springs) # 107042.4
## [1] 107042.4
mean(arkansas$Little.Rock) # 134420.6
## [1] 134420.6
mean(arkansas$Fayetteville) # 144649.7
## [1] 144649.7
mean(arkansas$Searcy) # 100102.9
## [1] 100102.9
# Question 5.
# . Using data from Zillow and other sources (think Bureau of Labor Statistics and Census data):
# o Develop model for forecasting average median housing value for 2014
# o Use the average of 2014-01, 2014-02, 2014-03 as your test set
# o Consolidate monthly data into an annual average
# I created in Excel means for each month for forecasting average median housing value for 2014.
# I suppose that means of three months are not enough to make precise forecasting,
# so, I use the whole 2014 year (10 months)
forecast.mean <- read.csv("forecast.mean.csv", header = TRUE)
colnames(forecast.mean)
## [1] "ï..Month" "Mean.of.median.housing"
# Plot the trend
forecast.mean.plot <- ggplot(forecast.mean, aes(x=forecast.mean$ï..Month,
y=forecast.mean$Mean.of.median.housing,
group = 1,
color=Mean.of.median.housing)) +
geom_line() + geom_smooth(method = "lm") +
labs(title = "Trend in means of median housing prices in 2014",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Month",
y = "Median housing price") +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.position="none")
forecast.mean.plot

# What can we see here? Blue line is a general positive trend, it is increasing.
# Another line shows us changes in average price per month. Fluctuation is within $10,000.
# Besides I want to use all months for more precise estimation.
forecast.all.data <- read.csv("forecast.all.data.csv", header = TRUE)
colnames(forecast.all.data) #"Mean.of.median.housing" "Month"
## [1] "Mean.of.median.housing" "Month"
# Plot the trend
forecast.all.data.plot <- ggplot(forecast.all.data, aes(x=forecast.all.data$Month,
y=forecast.all.data$Mean.of.median.housing,
group = 1,
color=Mean.of.median.housing)) +
geom_line() + geom_smooth(method = "lm") +
labs(title = "Trend in the US (1996-2014)",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Month",
y = "Median housing price") +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(legend.position="none") +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
forecast.all.data.plot

# I deleted names of months and years. But this is actually much cooler.
# We vividly see the trend. As a result, we can predict the approximate price
# based on this trend.
# Trend Decomposition
# We next decompose monthly ARIMA into its underlying trends.
# These include the observed monthly ARIMA values, a generalized trend,
# a seasonal trend, and the remainder (noise) which is unexplanied by either
# the general or seasonal trends.
monthly.ARIMA <- ts(na.omit(forecast.all.data$Month), frequency=30)
decomp <- stl(monthly.ARIMA, s.window="periodic")
deseasonal_cnt <- seasadj(decomp)
plot(decomp, main='Trends')

# We can see interesting tendencies.
# The generalized trend serves to further smooth monthly ARIMA.
# Again, our previous seasonal trend is confirmed with this model.
# Seasonal trend has the similar pattern with the general trend.
# Question 6.
# . Answer the following questions:
# o What three zip codes provide the best investment opportunity for the SREIT?
# o Why?
# Analysis of Syracuse data
# For this I created a subset in Excel.
syracuse <- read.csv("Syracuse.real.estate.csv", header = TRUE)
colnames(syracuse)
## [1] "Month" "Zip13210" "Zip13208" "Zip13204" "Zip13205" "Zip13206"
## [7] "Zip13203" "Zip13207" "Zip13224"
# [1] "Month" "Zip13210" "Zip13208" "Zip13204" "Zip13205" "Zip13206" "Zip13203" "Zip13207"
# [9] "Zip13224"
# Here we have 8 zipcodes and I will show the trend for each of them.
# Zip 13210 (I lived with this zip two years on Westcott area :))
zip13210 <- ggplot(syracuse, aes(Month, Zip13210, group = 1)) + geom_line(color = "darkorange") +
labs(title = "Zip 13210 trend (1996-2014) in Syracuse, NY",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
zip13210 # We can see that current price is about $70,000.

# Zip 13208
zip13208 <- ggplot(syracuse, aes(Month, Zip13208, group = 1)) + geom_line(color = "darkorange") +
labs(title = "Zip 13208 trend (1996-2014) in Syracuse, NY",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
zip13208 # We can see that the current price is about $55,000.

# Zip 13204
zip13204 <- ggplot(syracuse, aes(Month, Zip13204, group = 1)) + geom_line(color = "darkorange") +
labs(title = "Zip 13204 trend (1996-2014) in Syracuse, NY",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
zip13204 # We can see that the current price is about $45,000.

# Zip 13205
zip13205 <- ggplot(syracuse, aes(Month, Zip13205, group = 1)) + geom_line(color = "darkorange") +
labs(title = "Zip 13205 trend (1996-2014) in Syracuse, NY",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
zip13205 # We can see that the current price is about $50,000.

# Zip 13206
zip13206 <- ggplot(syracuse, aes(Month, Zip13206, group = 1)) + geom_line(color = "darkorange") +
labs(title = "Zip 13206 trend (1996-2014) in Syracuse, NY",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
zip13206 # We can see that the current price is about $60,000+.

# Zip 13203
zip13203 <- ggplot(syracuse, aes(Month, Zip13203, group = 1)) + geom_line(color = "darkorange") +
labs(title = "Zip 13203 trend (1996-2014) in Syracuse, NY",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
zip13203 # We can see that the current price is about $55,000.

# Zip 13207
zip13207 <- ggplot(syracuse, aes(Month, Zip13207, group = 1)) + geom_line(color = "darkorange") +
labs(title = "Zip 13207 trend (1996-2014) in Syracuse, NY",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
zip13207 # We can see that the current price is about $55,000.

# Zip 13224
zip13224 <- ggplot(syracuse, aes(Month, Zip13224, group = 1)) + geom_line(color = "darkorange") +
labs(title = "Zip 13224 trend (1996-2014) in Syracuse, NY",
subtitle = "Source: zillowstatic.com") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Year",
y = "Median housing price") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
zip13224 # We can see that the current price is about $85,000.

# Well, I firmly believe that investing based on charts is not a good idea, but...
# 13210 (especially Westcott area), 13224, and 13206 are my three choices.
# Additional sources for mapping data in US states
# http://eriqande.github.io/rep-res-web/lectures/making-maps-with-R.html
# http://rpubs.com/jfbratt/basic-mapping
# https://rpubs.com/alyssafahringer/165330
# https://github.com/wmurphyrd/fiftystater
# http://api.rpubs.com/jbrnbrg/project2_607
# http://eriqande.github.io/rep-res-web/lectures/making-maps-with-R.html#maps-package-and-ggplot
# https://rpbs.com/jfbratt/basic-mapping
# https://uchicagoconsulting.wordpress.com/
# http://www.kevjohnson.org/making-maps-in-r/
# https://stackoverflow.com/questions/29614972/ggplot-us-state-map-colors-are-fine-polygons-jagged-r
# http://adamolson.org/2015/07/15/post_about_maps/
# http://rforpublichealth.blogspot.com/2015/10/mapping-with-ggplot-create-nice.html
# http://stat405.had.co.nz/ggmap.pdf