Rent <- read_csv("SeattleRent.csv", col_types = cols(Date = col_date(format = "%m/%d/%Y")))
RentCol <- colnames(Rent)[-1]

Visualization

Raw Data

ts_plot(Rent, title="Monthly Rent over Time in Seattle - Raw Data")

Create Time Series from Raw Data

Assign Values

## data start time May 15
starttime <- 2015+5/12
## data end time Mar 23
endtime <- 2023+3/12

## start time of the forecast horizon Mar 2020
starth <- 2020+3/12

## interested time period from Jul 2023 to Sep 2023
chsn1 <- 2023+7/12
chsn2 <- 2023+9/12
## growth rate time interval (exclude starting month)
Date <- subset(Rent[1], Date >= as.Date("2015-05-31")+30 & Date <= as.Date("2023-03-31"))

#predict how many months ahead
predtime <- 12
for (i in RentCol){
     X_name <- paste0("Rent$", i)
     assign(i, ts(eval(parse(text=X_name)), start=starttime,end=endtime,frequency=12))
}

Decompose of All Monthly Rent Data

#long data
Rentlong <- melt(Rent, id.vars="Date")
Rentlong <- Rentlong %>%
  group_by(Date) %>%
  summarize(mean_rent = mean(value))
Rentlong <- ts(Rentlong$mean_rent, start=starttime,end=endtime,frequency=12)

dec.Rentlong <- decompose(Rentlong, type="additive")
plot(dec.Rentlong)

Decompose for each data

par(mfrow=c(5,2))
for (i in RentCol) {
  plot(decompose(eval(parse(text=i)),type="additive"),xlab=i)
}

Seasonally-Adjusted Time Series

log_list <- list()
adj_list <- list()
for (i in RentCol){
     adj_name <- paste0("adj", i)
     adj_list <- append(adj_list, adj_name)
     #create ts for seasonally-adjusted monthly rent
     ssn <- decompose(eval(parse(text=i)),type="additive")[["seasonal"]]
     assign(adj_name, eval(parse(text=i)) - ssn)

     #create ts for growth rate of monthly rent
     log_name <- paste0("log", i)
     log_list <- append(log_list, log_name)
     for (n in (1:length(adj_list))){
       assign(log_name, ts(diff(log(eval(parse(text=adj_list[[n]]))),lag=1), 
           start=starttime+1/12, end=endtime, frequency=12))
     }
}

ACF PACF

#for (i in (1:length(log_list))) {
  
#  data <- eval(parse(text=log_list[[i]]))
#  par(mfrow=c(2,1))
#  acf2(ts(data), 48, main=log_list[[i]])
#}

Growth rate of rent

## create a data frame to store all rent growth rate
logRent <- data.frame(matrix(ncol = 0, nrow = 12*(endtime-starttime)+1))

for (i in 1:length(log_list)) {
  data <- as.numeric(eval(parse(text=log_list[[i]])))
  logRent <- cbind.data.frame(logRent,data)
}

logRent <- cbind.data.frame(logRent,Date)
ts_plot(logRent[1:(length(RentCol)+1)])

ADF Test

for (i in 1:length(log_list)) {
  data <- eval(parse(text=log_list[[i]]))
  if (adf.test(data)$p.value <= 0.05){
    cat("For", log_list[[i]], "model, reject H_0\n")
  }
  else {
    cat("For", log_list[[i]], "model, do not reject H_0\n")
  }
}
## For logRedmond model, reject H_0
## For logDist4 model, do not reject H_0
## For logDist6 model, do not reject H_0
## For logKirkland model, reject H_0
## For logPuyallup model, reject H_0
## For logDist3 model, do not reject H_0
## For logBellevue model, reject H_0
## For logDist7 model, reject H_0
## For logTacoma model, reject H_0

Model Creating

Forecast Horizon

for (n in RentCol){
      samp <- paste0(RentCol,"_s")
      horz <- paste0(RentCol,"_h")
      best <- paste0("best",RentCol)
}   
for (i in 1:length(log_list)) {
      data <- eval(parse(text=log_list[i]))
      assign(samp[i], window(data,start=NULL,end=starth-1/12))
      assign(horz[i], window(data,start=starth,end=NULL))
}

ARIMA Model Identification

pqd <- expand.grid(p=1:3, q=1:3, d=0)
pqd1 <- expand.grid(p=1:3, q=1:3, d=1:2)
#print out the best model of all time series in a nice way
bestmodel <- data.frame(matrix(ncol = 5, nrow = 0, dimnames=list(NULL, c("p", "q", "d","V1", "AIC"))))

for (i in 1:length(samp)) {
  data <- eval(parse(text=log_list[[i]]))
  ## if already stationary then d=0
  if (adf.test(data)$p.value <= 0.05){
    ## create a general function that can fit arima models + get rmse + AIC
    log_arima <- function(order) {
      arima <- Arima(data, order = order, method="ML")
      ## accuracy(fit)[2] = rmse
      c(accuracy(arima)[2],  AIC = AIC(arima))
    }
    ## apply our pqd combos to previously created function
    return <- apply(pqd, 1, log_arima)
    ## create data frame with pqd combos and corresponding rmse and AIC
    result <- cbind(pqd, t(return))
    ## only print out model with lowest AIC
    assign(best[[i]],head(result %>% arrange(AIC), n=1))
    bestmodel <- full_join(bestmodel, eval(parse(text=best[[i]])), by=colnames(bestmodel))
  }
  
  ## if not stationary then test d >= 1
  else {
      log_arima <- function(order) {
      arima2 <- Arima(data, order = order, method="ML")
      c(accuracy(arima2)[2],  AIC = AIC(arima2))
    }
    return <- apply(pqd1, 1, log_arima)
    result <- cbind(pqd1, t(return))
    assign(best[[i]],head(result %>% arrange(AIC), n=1))
    bestmodel <- full_join(bestmodel,eval(parse(text=best[[i]])), by=colnames(bestmodel))
  }
}
logcol <- data.frame()
#best model data name column
for (i in 1:length(log_list)){
  logcol <- rbind(logcol, log_list[i])
}
#rename column
logcol <- logcol %>% rename("Data"=colnames(logcol[1]))
cbind(logcol, bestmodel)

One-step ahead Rolling Forecast

for (n in RentCol){
  #store growth rate in horizon - forecast data
      fore <- paste0("fore.", RentCol)
      updt <- paste0(RentCol,"_updt")
}

for (n in 1:length(fore)){
      assign((fore[n]),zoo())
}
for (n in 1:length(RentCol)){
  for (i in 1:((endtime-starth)*12+1)){
      data <- eval(parse(text=log_list[n]))
      data2 <- window(data, start=NULL, end=starth-1/12+i/12)
      assign(updt[n], Arima(data2, 
                            order=c(as.numeric(eval(parse(text=best[n]))[1]),
                                    as.numeric(eval(parse(text=best[n]))[3]),
                                    as.numeric(eval(parse(text=best[n]))[2]))))
      assign(fore[n], ts(c(eval(parse(text=fore[n])),
                        forecast(eval(parse(text=updt[n])),h=1)$mean),
             start=starth,frequency=12))
  }
}

Forecast Accuracy - rmse

rmse <- data.frame()

for (n in 1:length(RentCol)){
      rmse <- rbind(rmse,
                    accuracy(eval(parse(text=fore[n])),
                             eval(parse(text=horz[n])))
                    [2])
}

#rename col name to rmse
rmse <- rmse %>% rename("rmse"=colnames(rmse[1]))

cbind(logcol,rmse)
for (n in 1:length(RentCol)){
  ts.plot(eval(parse(text=log_list[n])),
          eval(parse(text=fore[n])),
          eval(parse(text=horz[n])),
          gpars=list(col=c(1,2,1),main=log_list[n]))
}

Monthly Rent in Horizon

for (n in 1:length(RentCol)){
  for (i in 1:((endtime-starth)*12+1)){
      data <- eval(parse(text=log_list[n]))
      data2 <- window(data, start=NULL, end=starth-1/12+i/12)
      assign(updt[n], Arima(data2, 
                            order=c(as.numeric(eval(parse(text=best[n]))[1]),
                                    as.numeric(eval(parse(text=best[n]))[3]),
                                    as.numeric(eval(parse(text=best[n]))[2]))))
      assign(fore[n], ts(c(eval(parse(text=fore[n])),
                        forecast(eval(parse(text=updt[n])),h=1)$mean),
             start=starth,frequency=12))
  }
}

Future Prediction

Growth Rate Forecast in One Year

for (n in RentCol){
  pred <- paste0("pred", RentCol)
  #store growth rate in two years - forecast data
  ftr <- paste0("g.ftr", RentCol)
  #store growth rate in chosen interval
  chosen <- paste0("chsn", RentCol)
  #store rent in chosen interval
  r.chsn <- paste0("r.chsn", RentCol)
}
for (n in RentCol){
  #store arima function - forecast data
      updt2 <- paste0(RentCol,"_updt2")
}
for (n in 1:length(ftr)){
      assign((ftr[n]),zoo())
}

In Sample Multistep Forecast

for (n in 1:length(RentCol)){
  datah <- eval(parse(text=log_list[n]))
  assign(updt2[n], Arima(datah, 
                            order=c(as.numeric(eval(parse(text=best[n]))[1]),
                                    as.numeric(eval(parse(text=best[n]))[3]),
                                    as.numeric(eval(parse(text=best[n]))[2])), 
                         method="ML"))
  assign(ftr[n], forecast(eval(parse(text=updt2[n])),h=predtime)$mean)
  ts.plot(datah,eval(parse(text=ftr[n])), xlim=c(endtime-1,endtime+predtime/12),
          gpars=list(main=log_list[n],
                     col=1:2))
}

Rent Forecast in One Year

for (n in RentCol){
  #store monthly rent in the forecast horizon interval - forecast data
  r.horz <- paste0("r.horz", RentCol)
  #store the monthly rent right before the forecast horizon interval to start with- real data
  temp <- paste0("temp", RentCol)
  temp2 <- paste0("temp2", RentCol)
  #store monthly rent in the future - forecast data
  r.pred <- paste0("r.pred", RentCol)
}

In Forecast Horizon

for (i in 1:length(RentCol)) {
  #get the last value in in-sample data to calculate monthly rent for first month in forecast horizon
  assign(temp[i], window(eval(parse(text=RentCol[i])),
                     start=starth-1/12,end=starth-1/12, frequency=12))
}
for (n in RentCol) {
    #store ts of forecast horizon forecast
    fh.f <- paste0("fh.f", RentCol)
}
for (i in 1:length(RentCol)){
  assign(r.horz[i],data.frame())
  
  #calculate monthly rent in the forecast horizon interval - forecast data
  for (n in 1:((endtime-starth)*12+1)) {
    #a temporary next month rent holder
    assign(r.horz[i], rbind(eval(parse(text=r.horz[i])),eval(parse(text=temp[i]))))
    #convert back to next month's rent by multiplying this month's rent by exponential of this month's growth rate + average seasonality of that month based on previous seasonality data of that month
    ##seasonality
    ssn <- decompose(eval(parse(text=RentCol[i])),
                     type="additive")[["seasonal"]]
    ##find out which month that is
    month <- NULL
    if ((starth%%1*12+1+n)%%12 == 0) {
      month <- 12
    }
    else {
      month <- (starth%%1*12+1+n)%%12
    }
    
    assign(temp[i], eval(parse(text=temp[i])) * (exp(eval(parse(text=fore[i]))[n]))
                         + mean(ssn[cycle(ssn)==month]))
  }
  #store forecast horizon forecast to corresponding fh.f
  assign(fh.f[i], ts(as.vector(unlist(t(eval(parse(text=r.horz[i]))[,1]))),
                    start=starth-1/12,end=endtime-1/12,frequency=12))
}

In one year

for (i in 1:length(RentCol)) {
  #get the last value in the whole data to calculate monthly rent for first month in out of sample forecast
  assign(temp2[i], window(eval(parse(text=RentCol[i])),
                     start=endtime,end=endtime, frequency=12))
}
for (n in RentCol) {
    #store ts of forecast horizon forecast
    oos.f <- paste0("oos.f", RentCol)
}
for (i in 1:length(RentCol)){
  assign(r.pred[i],data.frame())
  #calculate monthly rent in the future - forecast data
  for (n in 1:(predtime)) {
    #a temporary next month rent holder
    assign(r.pred[i], rbind(eval(parse(text=r.pred[i])),eval(parse(text=temp2[i]))))
    
    #convert back to next month's rent by multiplying this month's rent by exponential of this month's growth rate + average seasonality of that month based on previous seasonality data of that month
    ssn <- decompose(eval(parse(text=RentCol[i])),
                     type="additive")[["seasonal"]]
    ##find out which month that is to match the correct monthly seasonality
    month <- NULL
    if ((endtime%%1*12+1+n)%%12 == 0) {
      month <- 12
    }
    else {
      month <- (endtime%%1*12+1+n)%%12
    }
    assign(temp2[i],eval(parse(text=temp2[i])) * (exp(eval(parse(text=ftr[i]))[n])) +
                          mean(ssn[cycle(ssn)==month]))
  }
  #store out-of-sample forecast to corresponding oos.f
  assign(oos.f[i], ts(as.vector(unlist(t(eval(parse(text=r.pred[i]))[,1]))),
             start=endtime,end=endtime+predtime/12-1/12,frequency=12))
}

Graph for Both + Original Data

for (i in 1:length(RentCol)){
  #plot all monthly rent data, real and forecast
  ts.plot(eval(parse(text=RentCol[i])),
          
          #plot horizon forecast
          eval(parse(text=fh.f[i])),
          
          #plot out of sample forecast
          eval(parse(text=oos.f[i])),
          
          col=1:3,xlim=c(starttime,endtime+predtime/12),
          gpars=list(main=RentCol[i]))
}

Forecast Growth Rate for July 2023-Sep 2023

rate <- data.frame()
#extract growth rate for the chosen period from the above prediction
for (n in 1:length(RentCol)){
  assign(chosen[n], window(eval(parse(text=ftr[n])),start=chsn1-1/12,end=chsn2-1/12))
  rate <- rbind(rate,eval(parse(text=chosen[n])))
}
#change col names
rate <- rate %>% rename("Jul"=colnames(rate[1]),
                          "Aug"=colnames(rate[2]),
                          "Sep"=colnames(rate[3]))
sortedrate <- cbind(logcol,rate*100)
sortedrate[order(sortedrate$Jul),]

Forecast Rent for July 2023-Oct 2023

rent.ftr <- data.frame()
#extract growth rate for the chosen period from the above prediction
for (n in 1:length(RentCol)){
  assign(r.chsn[n], window(eval(parse(text=oos.f[n])),start=chsn1-1/12,end=chsn2-1/12, frequency=12))
}
for (n in 1:length(RentCol)) {
  rent.ftr <- rbind(rent.ftr,eval(parse(text=r.chsn[n][1])))
}
#change col names
rent.ftr <- rent.ftr %>% rename("Jul"=colnames(rent.ftr[1]),
                          "Aug"=colnames(rent.ftr[2]),
                          "Sep"=colnames(rent.ftr[3]))
sortedrate <- cbind(RentCol,rent.ftr)
sortedrate[order(sortedrate$Jul),]