setwd("D:/Jobs/Capital One")
packages<-c("readr","dplyr","stringr","ggplot2","plotly","tidyr")
new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
sapply(packages, library, character.only = TRUE)
airbnb<-read.csv("listings.csv",stringsAsFactors = F)
zillow <- read.csv("Zip_Zhvi_2bedroom.csv",stringsAsFactors = F)
zillow_NYC<-filter(zillow,City=="New York")
airbnb_2br<-filter(airbnb, bedrooms==2) %>%
select ("zipcode","neighbourhood_cleansed",
"neighbourhood_group_cleansed","price","property_type",
"room_type","availability_30","availability_60","availability_90","availability_365")
airbnb_2br$zipcode<-as.numeric(strtrim(airbnb_2br$zipcode, width=5))
airbnb_2br<-filter(airbnb_2br,nchar(zipcode)==5)
airbnb_2br$zipcode<-as.factor(airbnb_2br$zipcode)
airbnb_2br$price<-as.numeric(str_remove_all(airbnb_2br$price,"[\\$\\,]"))
airbnb_2br<-filter(airbnb_2br,price<=quantile(airbnb_2br$price,probs= 0.999))
colnames(airbnb_2br)[colnames(airbnb_2br)=="neighbourhood_group_cleansed"] <- "Borough"
colnames(zillow_NYC)[colnames(zillow_NYC)=="RegionName"] <- "zipcode"
zillow_NYC$zipcode<-as.factor(zillow_NYC$zipcode)
zillow_NYC $CostofProperty<-0
for(i in 1:nrow(zillow_NYC)){
time_series = ts(as.vector(t(zillow_NYC[,c(226:262)])[,i]),start =c(2014,6),frequency = 12)
fit = arima(time_series, order=c(1,1,1),optim.control =list(maxit = 1000),seasonal=list(order=c(1,0,1),period=NA),method="ML")
predict = predict(fit, n.ahead = 11)
prediction <- predict$pred
zillow_NYC$CostofProperty[i] <- prediction[length(prediction)]
}
zillow_final<-select(zillow_NYC,zipcode,CostofProperty)
airbnb_2br<- mutate(airbnb_2br,availability = ((30-availability_30)/30) * 0.4 +
((60-availability_60)/60) * 0.3 +
((90-availability_90)/90) * 0.2 +
((365-availability_365)/365) * 0.1)
airbnb_2br$price<-ifelse(airbnb_2br$room_type=="Private room",airbnb_2br$price*2,airbnb_2br$price)
data<- merge(airbnb_2br,zillow_final,id="zipcode")
my_theme<-function(){
theme_light() +
theme(plot.title = element_text(size = 13, color = "gray20"),
plot.subtitle = element_text(size = 11, color = "black"),
panel.grid = element_blank(),
panel.border = element_blank(),
axis.ticks = element_blank(),
legend.position="bottom")
}
data1<-data %>% group_by(Borough,zipcode,CostofProperty) %>% summarise(cost = median(price)*0.75*365) %>% mutate(breakeven_years= as.integer(CostofProperty/cost))
data1<-data %>% group_by(Borough,zipcode, CostofProperty) %>% summarise(cost = median(price)*mean(availability)*365) %>% mutate(breakeven_years= as.integer(CostofProperty/cost))
no_of_years<-100
data2<-data1 %>% slice(rep(row_number(), no_of_years)) %>% mutate(year=rep(1:no_of_years),profit=((year*cost)-CostofProperty)/1000000)
g<-ggplot(data2,aes(x=year,y=profit,color=zipcode)) + geom_smooth(se=F) + ylim(0,6.1) + xlab("Years of investment") + ylab("Cumulative Profits (in million)")
ggplotly(g)