library(doParallel)
library(forecast)
library(TSA)
library(AnomalyDetection)
library(TimeProjection)
library(timeDate)
library(ggplot2)
set.seed(1234)

# Nécessaire pour éviter les problèmes avec diverses fonctions.
Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
# Utiliser plusieurs core.
registerDoParallel(cores=4); 

Commençons par lire et inspecter nos données.

timeserie <- read.csv("C:/sb-waq/R/timeseries/sessions.csv", sep="\t", colClasses=c("Date", "integer"))
summary(timeserie)
##       Date               Sessions     
##  Min.   :2015-01-01   Min.   :187723  
##  1st Qu.:2015-07-02   1st Qu.:395620  
##  Median :2016-01-01   Median :541033  
##  Mean   :2016-01-01   Mean   :523087  
##  3rd Qu.:2016-07-01   3rd Qu.:613652  
##  Max.   :2016-12-31   Max.   :874993
ggplot( data = timeserie, aes( Date, Sessions )) + geom_line()

Extraire des features depuis les dates.

timeserie.pd <- projectDate(timeserie$Date, holidays=holidayTSX(2015:2017))
timeserie.pd[is.na(timeserie.pd)] <- 2
head(timeserie.pd)
##   year month yweek mweek yday mday  weekday bizday season
## 1 2015     1     1     1    1    1 Thursday  FALSE Winter
## 2 2015     1     1     1    2    2   Friday   TRUE Winter
## 3 2015     1     1     1    3    3 Saturday  FALSE Winter
## 4 2015     1     1     1    4    4   Sunday  FALSE Winter
## 5 2015     1     1     1    5    5   Monday   TRUE Winter
## 6 2015     1     1     1    6    6  Tuesday   TRUE Winter

Détecter automatiquement des saisonalités en utilisant une transformation de fourier.

(https://anomaly.io/detect-seasonality-using-fourier-transform-r/)

period <- periodogram(timeserie[,2], plot=FALSE)
dd = data.frame(freq=period$freq, spec=period$spec)
order = dd[order(-dd$spec),]
top_Period <- head(order, 1)
time = 1 / top_Period$f
time
## [1] 7.009346

Décomposer la serie temporelle.

timeserie.ts <- ts(timeserie[,2], frequency=7)
fitSTL <- stl(timeserie.ts, t.window=c(7, 365.25), s.window="periodic", robust=TRUE)
plot(fitSTL)

Détection des anomalies.

timeserie.anomaly <- data.frame(timestamp = as.numeric(as.POSIXct(timeserie$Date, format="%Y-%m-%d")), data = timeserie$Sessions) # Transformation nécessaire pour la fonction AnomalyDetectionTs qui nécessite un data frame avec2 colonnes (timestamps et un integer)
anomaly <- AnomalyDetectionTs(timeserie.anomaly, direction='both', plot=TRUE, ylabel="Sessions")
anomaly$plot

anomaly$anoms$timestamp
##  [1] "2015-01-01 UTC" "2015-01-05 UTC" "2015-04-01 UTC" "2015-04-30 UTC"
##  [5] "2015-05-01 UTC" "2015-06-01 UTC" "2015-07-02 UTC" "2015-09-07 UTC"
##  [9] "2015-10-01 UTC" "2015-10-02 UTC" "2015-10-05 UTC" "2015-10-12 UTC"
## [13] "2015-12-24 UTC" "2015-12-25 UTC" "2016-01-01 UTC" "2016-01-05 UTC"
## [17] "2016-04-01 UTC" "2016-05-23 UTC" "2016-06-24 UTC" "2016-09-01 UTC"
## [21] "2016-09-05 UTC" "2016-10-10 UTC" "2016-12-25 UTC" "2016-12-26 UTC"

Première prédiction avec un modèle Holt-Winter.

fitHW <- hw(timeserie.ts, h=365, seasonal="additive", level=c(95), frequency=7)
plot(fitHW)

head(fitHW$mean, 31)
## Time Series:
## Start = c(105, 4) 
## End = c(109, 6) 
## Frequency = 7 
##  [1] 335866.1 548638.9 533330.2 547606.0 646190.2 515324.5 322784.8
##  [8] 339598.2 552371.0 537062.3 551338.1 649922.2 519056.5 326516.9
## [15] 343330.3 556103.1 540794.3 555070.1 653654.3 522788.6 330248.9
## [22] 347062.3 559835.2 544526.4 558802.2 657386.4 526520.7 333981.0
## [29] 350794.4 563567.2 548258.5

Une seconde prédiction avec un modèle TBATS.

timeserie.msts <- msts(timeserie[,2], start=c(2015,1,1), seasonal.periods=c(7, 365.25))
fitBATS <- tbats(timeserie.msts)
fcBATS <- forecast(fitBATS, h=365, level=95)
plot(fcBATS)

head(fcBATS$mean, 31)
## Time Series:
## Start = c(2017, 2) 
## End = c(2017, 32) 
## Frequency = 365 
##  [1] 338673.0 542012.3 552397.1 566608.3 642358.6 514209.0 341620.0
##  [8] 358008.3 547246.3 535192.7 555974.3 661570.8 525642.8 338067.0
## [15] 351409.5 553271.0 545684.2 557273.5 654171.3 526315.0 342670.6
## [22] 354122.6 550751.0 544660.2 561301.4 659321.8 526473.2 342095.3
## [29] 355411.2 554349.6 546220.7