setwd("D:/Jobs/Capital One")

packages<-c("readr","dplyr","stringr","ggplot2","plotly","tidyr")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

sapply(packages, library, character.only = TRUE)

airbnb<-read.csv("listings.csv",stringsAsFactors = F)
zillow <- read.csv("Zip_Zhvi_2bedroom.csv",stringsAsFactors = F)
zillow_NYC<-filter(zillow,City=="New York")

airbnb_2br<-filter(airbnb, bedrooms==2) %>% 
  select ("zipcode","neighbourhood_cleansed",
          "neighbourhood_group_cleansed","price","property_type",
        "room_type","availability_30","availability_60","availability_90","availability_365")

airbnb_2br$zipcode<-as.numeric(strtrim(airbnb_2br$zipcode, width=5))
airbnb_2br<-filter(airbnb_2br,nchar(zipcode)==5)
airbnb_2br$zipcode<-as.factor(airbnb_2br$zipcode)

airbnb_2br$price<-as.numeric(str_remove_all(airbnb_2br$price,"[\\$\\,]"))

airbnb_2br<-filter(airbnb_2br,price<=quantile(airbnb_2br$price,probs= 0.999))

colnames(airbnb_2br)[colnames(airbnb_2br)=="neighbourhood_group_cleansed"] <- "Borough"

colnames(zillow_NYC)[colnames(zillow_NYC)=="RegionName"] <- "zipcode"
zillow_NYC$zipcode<-as.factor(zillow_NYC$zipcode)

zillow_NYC $CostofProperty<-0
for(i in 1:nrow(zillow_NYC)){
    time_series = ts(as.vector(t(zillow_NYC[,c(226:262)])[,i]),start =c(2014,6),frequency = 12) 
  
    fit = arima(time_series, order=c(1,1,1),optim.control =list(maxit = 1000),seasonal=list(order=c(1,0,1),period=NA),method="ML")
    predict = predict(fit, n.ahead = 11)
    prediction <- predict$pred 
    zillow_NYC$CostofProperty[i] <- prediction[length(prediction)] 
}

zillow_final<-select(zillow_NYC,zipcode,CostofProperty)

airbnb_2br<- mutate(airbnb_2br,availability = ((30-availability_30)/30) * 0.4 + 
                        ((60-availability_60)/60) * 0.3 +
                        ((90-availability_90)/90) * 0.2 +
                        ((365-availability_365)/365) * 0.1)


airbnb_2br$price<-ifelse(airbnb_2br$room_type=="Private room",airbnb_2br$price*2,airbnb_2br$price)

data<- merge(airbnb_2br,zillow_final,id="zipcode")

my_theme<-function(){
      theme_light() +
      theme(plot.title = element_text(size = 13, color = "gray20"),  
            plot.subtitle = element_text(size = 11, color = "black"), 
            panel.grid = element_blank(),
            panel.border = element_blank(),
            axis.ticks = element_blank(),
            legend.position="bottom")
}

data1<-data %>% group_by(Borough,zipcode,CostofProperty) %>% summarise(cost = median(price)*0.75*365) %>% mutate(breakeven_years= as.integer(CostofProperty/cost)) 

data1<-data %>% group_by(Borough,zipcode, CostofProperty) %>% summarise(cost = median(price)*mean(availability)*365) %>% mutate(breakeven_years= as.integer(CostofProperty/cost)) 
no_of_years<-100

data2<-data1 %>% slice(rep(row_number(), no_of_years)) %>% mutate(year=rep(1:no_of_years),profit=((year*cost)-CostofProperty)/1000000)

g<-ggplot(data2,aes(x=year,y=profit,color=zipcode)) + geom_smooth(se=F)  + ylim(0,6.1) + xlab("Years of investment") + ylab("Cumulative Profits (in million)") 

ggplotly(g)