Median Sale Price comes from zillow http://files.zillowstatic.com/research/public/City/Sale_Prices_City.csv
library(readr)
library(ggplot2)
library(reshape2)
library(plotly)
library(dplyr)
## Read data
d = read_csv("Sale_Prices_City.csv")
bay.area.cities = c("Fremont","Union City","Newark","Milpitas",
"Sunnyvale","Santa Clara","San Jose","Cupertino",
'East Palo Alto',"Palo Alto","Menlo Park","Mountain View")
## Bay area data
subd = d %>%
filter(RegionName %in% bay.area.cities) %>%
select(RegionName,`2008-03`:`2018-01`)
md = melt(data = subd,id.vars = "RegionName",variable.name = "time",value.name = "price")
p = ggplot(data = md,aes(x = `time`,y = price,group=RegionName,color=RegionName))+
geom_line()+
geom_point()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplotly(p)
md$time=as.Date(paste(as.character(md$time),"-01",sep=""))
## data since 2014
md2 = md %>%
filter(time > as.Date("2013-12-01"))
md2$year = format(md2$time,"%Y")
md2$month = format(md2$time,"%m")
### calculate the the average of each year's increasement
dd = md2 %>%
group_by(RegionName,year) %>%
summarise(year.increase = (price[12]-price[1])/price[1]) %>%
filter(year != 2018)
p1 = ggplot(data = dd,aes(x = year,y = year.increase,group=RegionName,,color=RegionName,linetype=RegionName))+
geom_line()+
geom_point()+
geom_hline(yintercept = 0,linetype=2)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Increase for each year")
ggplotly(p1)
## Average increase over 2014 to 2018
dd2 = dd %>%
group_by(RegionName) %>%
summarise(avg.year.increase = mean(year.increase,na.rm = T)) %>%
mutate(four.year.total.increase=(1+avg.year.increase)^4 - 1)
## Order by increasement
dd2 = dd2[order(dd2$avg.year.increase,decreasing = TRUE),]
dd2$RegionName = factor(dd2$RegionName,levels = dd2$RegionName)
print(as.data.frame(dd2))
## RegionName avg.year.increase four.year.total.increase
## 1 Santa Clara 0.18525807 0.9735662
## 2 Sunnyvale 0.14992291 0.7485373
## 3 Newark 0.11369413 0.5383804
## 4 Cupertino 0.11150941 0.5263446
## 5 San Jose 0.10638225 0.4983760
## 6 Union City 0.10575060 0.4949571
## 7 Mountain View 0.09884709 0.4579715
## 8 Milpitas 0.09631613 0.4445854
## 9 Palo Alto 0.07732427 0.3470564
## 10 Fremont 0.07716310 0.3462505
## 11 Menlo Park 0.06356391 0.2795415
## 12 East Palo Alto 0.05984143 0.2617217
md.dd2 = melt(dd2,id.vars = "RegionName",value.name = "Increase",variable.name = "type")
p2 =
ggplot(data = md.dd2,aes(x = RegionName,y = Increase,fill=type))+
geom_bar(stat = "identity",position = "dodge")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Average increase and total increase over 4 years")
ggplotly(p2)