1. Libraries

library(readxl)
library(dplyr)
library(seasonal)
library(ggplot2)
library(forecast)
library(tidyverse)
library(tsibble)
library(feasts)
library(lubridate)
library(tsibbledata)
library(fable)
library(zoo)
library(fst)

 
 

2. Data Exploration

#=
d <- "data"
rData <- file.path(d)
data <- read_fst(file.path(rData, "bumble_updated.fst"))
last.1 <- tail(data, 365)
last.3 <- tail(data, 365*3)
str(data)
'data.frame':   2311 obs. of  6 variables:
 $ ID        : num  1 2 3 4 5 6 7 8 9 10 ...
 $ NumReviews: num  1 1 10 11 8 8 5 3 2 3 ...
 $ Day       : num  29 30 1 2 3 4 5 6 7 8 ...
 $ Month     : num  11 11 12 12 12 12 12 12 12 12 ...
 $ Year      : num  2015 2015 2015 2015 2015 ...
 $ Date      : Date, format: "2015-11-29" "2015-11-30" ...

 

2.1 All Data

myts.all <- ts(data$NumReviews, frequency=365, start=2015)

tsdisplay(myts.all)

data.ts.1 <- data %>%
  mutate(Month = data$Date) %>%
  as_tsibble(index = Month)

data.ts.1 %>%
  model(
    classical_decomposition(NumReviews, type="additive")
  ) %>%
  components() %>%
  autoplot()

 

This does not suffice. The data must be split into monthly bins. The frequency set above was 365 to ensure accurate Dates on the x-axis.

 
 

3. Data Prep

# need to convert data to monthly bins

nyr <- function(yr, n.yr, l.yr){ 
  if((n.yr != yr) && (n.yr != l.yr)){
    return(yr)
  }else{
    return(n.yr)
  }
}

d.m <- data.frame(matrix(ncol=4))
names(d.m)[1:4] <- c("ID", "NumReviews", "Month", "Year")

id = 1
r.t = data[1,2] # review tally
yr = data[1,5]
mo = data[1,4]
yr.c = tail(data[,5],1)
for(i in 1:nrow(data)){
  
  if((data[i+1,4] == mo) || (i == nrow(data))){
    if(i == nrow(data)){
      d.m[id,1:4] = c(id, r.t, mo, yr)
    }
    r.t = r.t + data[i+1,2]
  }else{
    d.m[id,1:4] = c(id, r.t, mo, yr)
    
    r.t = data[i+1,2]
    id = id + 1
    mo = data[i+1,4]
    yr = nyr(data[i+1,5], data[i,5], yr.c)
  }
}
d.m$Date <- with(d.m, paste(d.m$Year, d.m$Month, sep="-"))
c.d <- d.m[,c(1,2,5)] # condensed data

 

3.1 Checking Data and Type Comparison

myts.3 <- ts(c.d$NumReviews, frequency=12, start=2015.83)
tsdisplay(myts.3)

# much much better
data.ts.4 <- c.d %>%
  mutate(Month = yearmonth(c.d$Date)) %>%
  as_tsibble(index = Month)

data.ts.4 %>%
  model(
    classical_decomposition(NumReviews, type="additive")
  ) %>%
  components() %>%
  autoplot()

data.ts.4 %>%
  model(
    classical_decomposition(NumReviews, type="multiplicative")
  ) %>%
  components() %>%
  autoplot()

3. Models

 

m.stats <- function(m.n, m.s){ # model.type, model.stats
  aic <- round(m.s$aic, 2)
  aicc <- round(m.s$aicc, 2)
  bic <- round(m.s$bic, 2)
  
  return(c(m.n, aic,aicc,bic))
}

 
 

3.1 Model 1 – ANN

mye.1=ets(myts.3)
mye.1
ETS(A,N,N) 

Call:
 ets(y = myts.3) 

  Smoothing parameters:
    alpha = 0.7469 

  Initial states:
    l = 50.27 

  sigma:  298.8154

     AIC     AICc      BIC 
1216.220 1216.549 1223.251 
plot(mye.1)

3.2 Model 2 – ANA

mye.2=ets(myts.3, model="ANA")
mye.2
ETS(A,N,A) 

Call:
 ets(y = myts.3, model = "ANA") 

  Smoothing parameters:
    alpha = 0.7605 
    gamma = 1e-04 

  Initial states:
    l = 621.4159 
    s = 85.1771 -55.879 66.6563 147.9391 -49.9818 -60.1897
           47.3424 77.679 24.4539 31.4468 -132.3066 -182.3375

  sigma:  314.659

     AIC     AICc      BIC 
1234.751 1242.620 1269.908 
plot(mye.2)

3.3 Model 3 – AAN

mye.3=ets(myts.3, model="AAN")
mye.3
ETS(A,A,N) 

Call:
 ets(y = myts.3, model = "AAN") 

  Smoothing parameters:
    alpha = 0.7026 
    beta  = 1e-04 

  Initial states:
    l = 15.6175 
    b = 34.6958 

  sigma:  298.9927

     AIC     AICc      BIC 
1218.230 1219.075 1229.949 
plot(mye.3)

3.4 Model 4 – AAA

mye.4=ets(myts.3, model="AAA")
mye.4
ETS(A,A,A) 

Call:
 ets(y = myts.3, model = "AAA") 

  Smoothing parameters:
    alpha = 0.7493 
    beta  = 2e-04 
    gamma = 1e-04 

  Initial states:
    l = 71.5819 
    b = 46.3498 
    s = 113.0274 16.8848 88.5558 161.5107 -28.7733 -66.5659
           14.9479 87.8106 -22.9684 -77.8626 -144.2723 -142.2947

  sigma:  315.2491

     AIC     AICc      BIC 
1236.555 1246.928 1276.400 
plot(mye.4)

3.5 Model 5 – MAM

mye.5=ets(myts.3, model="MAM")
mye.5
ETS(M,A,M) 

Call:
 ets(y = myts.3, model = "MAM") 

  Smoothing parameters:
    alpha = 0.4057 
    beta  = 0.0147 
    gamma = 1e-04 

  Initial states:
    l = 40.0439 
    b = 49.1157 
    s = 1.073 1.0834 1.2005 1.1612 0.7789 0.811
           0.8845 1.0254 0.9928 1.0774 1.0282 0.8836

  sigma:  0.338

     AIC     AICc      BIC 
1265.414 1275.787 1305.259 
plot(mye.5)

3.6 Model Evaluation

m.e <- data.frame(matrix(ncol=4)) # model.evaluation
names(m.e)[1:4] <- c("Model", "aic", "aicc", "bic")

m.e[1,] <- m.stats("ANN", mye.1)
m.e[2,] <- m.stats("ANA", mye.2)
m.e[3,] <- m.stats("AAN", mye.3)
m.e[4,] <- m.stats("AAA", mye.4)
m.e[5,] <- m.stats("MAM", mye.5)

m.e
  Model     aic    aicc     bic
1   ANN 1216.22 1216.55 1223.25
2   ANA 1234.75 1242.62 1269.91
3   AAN 1218.23 1219.08 1229.95
4   AAA 1236.56 1246.93  1276.4
5   MAM 1265.41 1275.79 1305.26
plot(mye.1)

ANN is the best model on all metrics. If you do not specify a type, the model will choose ANN.

 
 

4. Forecast

myts.3 <- ts(c.d$NumReviews, frequency=13, start=2015.83)

test <- forecast( 
  myts.3,
  method=c("ets")
)
plot(test)