library(readxl)
library(dplyr)
library(seasonal)
library(ggplot2)
library(forecast)
library(tidyverse)
library(tsibble)
library(feasts)
library(lubridate)
library(tsibbledata)
library(fable)
library(zoo)
library(fst)
#=
d <- "data"
rData <- file.path(d)
data <- read_fst(file.path(rData, "bumble_updated.fst"))
last.1 <- tail(data, 365)
last.3 <- tail(data, 365*3)
str(data)
'data.frame': 2311 obs. of 6 variables:
$ ID : num 1 2 3 4 5 6 7 8 9 10 ...
$ NumReviews: num 1 1 10 11 8 8 5 3 2 3 ...
$ Day : num 29 30 1 2 3 4 5 6 7 8 ...
$ Month : num 11 11 12 12 12 12 12 12 12 12 ...
$ Year : num 2015 2015 2015 2015 2015 ...
$ Date : Date, format: "2015-11-29" "2015-11-30" ...
myts.all <- ts(data$NumReviews, frequency=365, start=2015)
tsdisplay(myts.all)
data.ts.1 <- data %>%
mutate(Month = data$Date) %>%
as_tsibble(index = Month)
data.ts.1 %>%
model(
classical_decomposition(NumReviews, type="additive")
) %>%
components() %>%
autoplot()
This does not suffice. The data must be split into monthly bins. The frequency set above was 365 to ensure accurate Dates on the x-axis.
# need to convert data to monthly bins
nyr <- function(yr, n.yr, l.yr){
if((n.yr != yr) && (n.yr != l.yr)){
return(yr)
}else{
return(n.yr)
}
}
d.m <- data.frame(matrix(ncol=4))
names(d.m)[1:4] <- c("ID", "NumReviews", "Month", "Year")
id = 1
r.t = data[1,2] # review tally
yr = data[1,5]
mo = data[1,4]
yr.c = tail(data[,5],1)
for(i in 1:nrow(data)){
if((data[i+1,4] == mo) || (i == nrow(data))){
if(i == nrow(data)){
d.m[id,1:4] = c(id, r.t, mo, yr)
}
r.t = r.t + data[i+1,2]
}else{
d.m[id,1:4] = c(id, r.t, mo, yr)
r.t = data[i+1,2]
id = id + 1
mo = data[i+1,4]
yr = nyr(data[i+1,5], data[i,5], yr.c)
}
}
d.m$Date <- with(d.m, paste(d.m$Year, d.m$Month, sep="-"))
c.d <- d.m[,c(1,2,5)] # condensed data
myts.3 <- ts(c.d$NumReviews, frequency=12, start=2015.83)
tsdisplay(myts.3)
# much much better
data.ts.4 <- c.d %>%
mutate(Month = yearmonth(c.d$Date)) %>%
as_tsibble(index = Month)
data.ts.4 %>%
model(
classical_decomposition(NumReviews, type="additive")
) %>%
components() %>%
autoplot()
data.ts.4 %>%
model(
classical_decomposition(NumReviews, type="multiplicative")
) %>%
components() %>%
autoplot()
m.stats <- function(m.n, m.s){ # model.type, model.stats
aic <- round(m.s$aic, 2)
aicc <- round(m.s$aicc, 2)
bic <- round(m.s$bic, 2)
return(c(m.n, aic,aicc,bic))
}
mye.1=ets(myts.3)
mye.1
ETS(A,N,N)
Call:
ets(y = myts.3)
Smoothing parameters:
alpha = 0.7469
Initial states:
l = 50.27
sigma: 298.8154
AIC AICc BIC
1216.220 1216.549 1223.251
plot(mye.1)
mye.2=ets(myts.3, model="ANA")
mye.2
ETS(A,N,A)
Call:
ets(y = myts.3, model = "ANA")
Smoothing parameters:
alpha = 0.7605
gamma = 1e-04
Initial states:
l = 621.4159
s = 85.1771 -55.879 66.6563 147.9391 -49.9818 -60.1897
47.3424 77.679 24.4539 31.4468 -132.3066 -182.3375
sigma: 314.659
AIC AICc BIC
1234.751 1242.620 1269.908
plot(mye.2)
mye.3=ets(myts.3, model="AAN")
mye.3
ETS(A,A,N)
Call:
ets(y = myts.3, model = "AAN")
Smoothing parameters:
alpha = 0.7026
beta = 1e-04
Initial states:
l = 15.6175
b = 34.6958
sigma: 298.9927
AIC AICc BIC
1218.230 1219.075 1229.949
plot(mye.3)
mye.4=ets(myts.3, model="AAA")
mye.4
ETS(A,A,A)
Call:
ets(y = myts.3, model = "AAA")
Smoothing parameters:
alpha = 0.7493
beta = 2e-04
gamma = 1e-04
Initial states:
l = 71.5819
b = 46.3498
s = 113.0274 16.8848 88.5558 161.5107 -28.7733 -66.5659
14.9479 87.8106 -22.9684 -77.8626 -144.2723 -142.2947
sigma: 315.2491
AIC AICc BIC
1236.555 1246.928 1276.400
plot(mye.4)
mye.5=ets(myts.3, model="MAM")
mye.5
ETS(M,A,M)
Call:
ets(y = myts.3, model = "MAM")
Smoothing parameters:
alpha = 0.4057
beta = 0.0147
gamma = 1e-04
Initial states:
l = 40.0439
b = 49.1157
s = 1.073 1.0834 1.2005 1.1612 0.7789 0.811
0.8845 1.0254 0.9928 1.0774 1.0282 0.8836
sigma: 0.338
AIC AICc BIC
1265.414 1275.787 1305.259
plot(mye.5)
m.e <- data.frame(matrix(ncol=4)) # model.evaluation
names(m.e)[1:4] <- c("Model", "aic", "aicc", "bic")
m.e[1,] <- m.stats("ANN", mye.1)
m.e[2,] <- m.stats("ANA", mye.2)
m.e[3,] <- m.stats("AAN", mye.3)
m.e[4,] <- m.stats("AAA", mye.4)
m.e[5,] <- m.stats("MAM", mye.5)
m.e
Model aic aicc bic
1 ANN 1216.22 1216.55 1223.25
2 ANA 1234.75 1242.62 1269.91
3 AAN 1218.23 1219.08 1229.95
4 AAA 1236.56 1246.93 1276.4
5 MAM 1265.41 1275.79 1305.26
plot(mye.1)
ANN is the best model on all metrics. If you do not specify a type, the model will choose ANN.
myts.3 <- ts(c.d$NumReviews, frequency=13, start=2015.83)
test <- forecast(
myts.3,
method=c("ets")
)
plot(test)