Data

Median Sale Price comes from zillow http://files.zillowstatic.com/research/public/City/Sale_Prices_City.csv

Median Sale price trend (2008 - 2018)

library(readr)
library(ggplot2)
library(reshape2)
library(plotly)
library(dplyr)

## Read data
d = read_csv("Sale_Prices_City.csv")

bay.area.cities = c("Fremont","Union City","Newark","Milpitas",
                    "Sunnyvale","Santa Clara","San Jose","Cupertino",
                    'East Palo Alto',"Palo Alto","Menlo Park","Mountain View")

## Bay area data
subd = d %>% 
        filter(RegionName %in% bay.area.cities) %>%
        select(RegionName,`2008-03`:`2018-01`)

md = melt(data = subd,id.vars = "RegionName",variable.name = "time",value.name = "price")

p = ggplot(data = md,aes(x = `time`,y = price,group=RegionName,color=RegionName))+
  geom_line()+
  geom_point()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplotly(p)

Which city increases the most from 2014 to 2018

md$time=as.Date(paste(as.character(md$time),"-01",sep=""))

## data since 2014
md2 = md %>%
      filter(time > as.Date("2013-12-01"))

md2$year = format(md2$time,"%Y")
md2$month = format(md2$time,"%m")

### calculate the the average of each year's increasement
dd = md2 %>%
  group_by(RegionName,year) %>%
  summarise(year.increase = (price[12]-price[1])/price[1]) %>%
  filter(year != 2018)

p1 = ggplot(data = dd,aes(x = year,y = year.increase,group=RegionName,,color=RegionName,linetype=RegionName))+
   geom_line()+
  geom_point()+
  geom_hline(yintercept = 0,linetype=2)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  ggtitle("Increase for each year")
  
ggplotly(p1)
## Average increase over 2014 to 2018
dd2 = dd %>% 
    group_by(RegionName) %>%
    summarise(avg.year.increase = mean(year.increase,na.rm = T)) %>%
    mutate(four.year.total.increase=(1+avg.year.increase)^4 - 1)

## Order by increasement
dd2 = dd2[order(dd2$avg.year.increase,decreasing = TRUE),]
dd2$RegionName = factor(dd2$RegionName,levels = dd2$RegionName)
print(as.data.frame(dd2))
##        RegionName avg.year.increase four.year.total.increase
## 1     Santa Clara        0.18525807                0.9735662
## 2       Sunnyvale        0.14992291                0.7485373
## 3          Newark        0.11369413                0.5383804
## 4       Cupertino        0.11150941                0.5263446
## 5        San Jose        0.10638225                0.4983760
## 6      Union City        0.10575060                0.4949571
## 7   Mountain View        0.09884709                0.4579715
## 8        Milpitas        0.09631613                0.4445854
## 9       Palo Alto        0.07732427                0.3470564
## 10        Fremont        0.07716310                0.3462505
## 11     Menlo Park        0.06356391                0.2795415
## 12 East Palo Alto        0.05984143                0.2617217
md.dd2 = melt(dd2,id.vars = "RegionName",value.name = "Increase",variable.name = "type")


p2 = 
  ggplot(data = md.dd2,aes(x = RegionName,y = Increase,fill=type))+
  geom_bar(stat = "identity",position = "dodge")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  ggtitle("Average increase and total increase over 4 years")
  
 ggplotly(p2)