TBATS_TS_Production

# Author Irina Max, Lead Principal Data Scientist

# This code based on the Time series TBATS model for daily Claims prediction. This model was deployed to production and include partially pipline,
# This script is my individul contribution and research for the XXX company.
# Script also included Arima Model with seasonal frequency using fourier function with was not deliver the best result in particular case study.
#I commented some code which is not need to be processed now for RPub
getwd()

## [1] "/Users/irinamax/Documents/R/CHC"

#setwd("/home/irina/R_model" )

#
#library(RJDBC)
#library(aws.s3)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(reshape2)
library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:reshape2':
## 
##     smiths

library(ggplot2)
library(anytime)
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ tibble  3.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x data.table::between() masks dplyr::between()
## x dplyr::filter()       masks stats::filter()
## x data.table::first()   masks dplyr::first()
## x dplyr::lag()          masks stats::lag()
## x data.table::last()    masks dplyr::last()
## x purrr::transpose()    masks data.table::transpose()

library(magrittr)

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(mltools)

## 
## Attaching package: 'mltools'

## The following object is masked from 'package:tidyr':
## 
##     replace_na

library(fpp2)

## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──

## ✓ fma       2.4     ✓ expsmooth 2.3

## ── Conflicts ───────────────────────────────────────────────── fpp2_conflicts ──
## x magrittr::extract()   masks tidyr::extract()
## x magrittr::set_names() masks purrr::set_names()
## x purrr::transpose()    masks data.table::transpose()

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(caret)

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

library(pryr)

## 
## Attaching package: 'pryr'

## The following objects are masked from 'package:purrr':
## 
##     compose, partial

## The following object is masked from 'package:data.table':
## 
##     address

library(ggfortify)

## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast

library(rJava)

# Sys.setenv("AWS_ACCESS_KEY_ID" = "...",
#            "AWS_SECRET_ACCESS_KEY" = "...",
#            "AWS_DEFAULT_REGION" = "us-east-1")

# download and u Amazon Redshift JDBC driver
# download.file('http://s3.amazonaws.com/redshift-downloads/drivers/RedshiftJDBC41-1.1.9.1009.jar','RedshiftJDBC41-1.1.9.1009.jar')

# Writing
mywrite <- function(x, file) {
  write.csv(x, file, row.names=FALSE)
}

# connect to Amazon Redshift
# driver <- JDBC("com.amazon.redshift.jdbc41.Driver", "RedshiftJDBC41-1.1.9.1009.jar", identifier.quote="`")
# url <- "jdbc:redshift://...."
# conn <- dbConnect(driver, url)


# query3 = "select the_given_date, sum
# from prod_metrics.claims_exec_report_data"
# 
# cl_data = dbGetQuery(conn, query3)
# write.csv(cl_data, "cl_data.csv")

#===============================================================   work with file till 05_07====================
#feature engineering with claims
#cl <- read.csv("claims_Daily_data.txt", header = T, sep = "|", stringsAsFactors = F)
cl <- read.csv("cl_data.csv")
cl %>% summary

##        X         the_given_date          sum         
##  Min.   :  1.0   Length:498         Min.   : 521261  
##  1st Qu.:125.2   Class :character   1st Qu.:1663746  
##  Median :249.5   Mode  :character   Median :7356952  
##  Mean   :249.5                      Mean   :5588034  
##  3rd Qu.:373.8                      3rd Qu.:7841872  
##  Max.   :498.0                      Max.   :9438760

cl <- cl%>% group_by(the_given_date) %>% summarise(sum = sum(sum))
cl %>% head

## # A tibble: 6 x 2
##   the_given_date     sum
##   <chr>            <int>
## 1 2018-12-16      684635
## 2 2018-12-17     8394740
## 3 2018-12-18     8378415
## 4 2018-12-19     7836314
## 5 2018-12-20     7639615
## 6 2018-12-21     7575768

cl %>% tail

## # A tibble: 6 x 2
##   the_given_date     sum
##   <chr>            <int>
## 1 2020-04-21     5255620
## 2 2020-04-22     5076559
## 3 2020-04-23     4906584
## 4 2020-04-24     4981335
## 5 2020-04-25      940347
## 6 2020-04-26      570651

cl %>% dim #486

## [1] 498   2

cl %>% summary

##  the_given_date          sum         
##  Length:498         Min.   : 521261  
##  Class :character   1st Qu.:1663746  
##  Mode  :character   Median :7356952  
##                     Mean   :5588034  
##                     3rd Qu.:7841872  
##                     Max.   :9438760

cl %>% head

## # A tibble: 6 x 2
##   the_given_date     sum
##   <chr>            <int>
## 1 2018-12-16      684635
## 2 2018-12-17     8394740
## 3 2018-12-18     8378415
## 4 2018-12-19     7836314
## 5 2018-12-20     7639615
## 6 2018-12-21     7575768

cl %>% tail

## # A tibble: 6 x 2
##   the_given_date     sum
##   <chr>            <int>
## 1 2020-04-21     5255620
## 2 2020-04-22     5076559
## 3 2020-04-23     4906584
## 4 2020-04-24     4981335
## 5 2020-04-25      940347
## 6 2020-04-26      570651

cl %>% dim #486   2

## [1] 498   2

#cl <- cl[-c(1:16),]  # 470
names(cl) <- c("date", "CVol")
#cl$date<-as.POSIXct(cl$date,format="%m/%d/%y ")
cl %>% head

## # A tibble: 6 x 2
##   date          CVol
##   <chr>        <int>
## 1 2018-12-16  684635
## 2 2018-12-17 8394740
## 3 2018-12-18 8378415
## 4 2018-12-19 7836314
## 5 2018-12-20 7639615
## 6 2018-12-21 7575768

cl %>% tail

## # A tibble: 6 x 2
##   date          CVol
##   <chr>        <int>
## 1 2020-04-21 5255620
## 2 2020-04-22 5076559
## 3 2020-04-23 4906584
## 4 2020-04-24 4981335
## 5 2020-04-25  940347
## 6 2020-04-26  570651

# look at the weekely decomposition
ts_w <- ts(cl[,2],  frequency=7, start=c( 1))

# Plot the data with facetting
autoplot(ts_w, facets = F)

decomp_w <- decompose(ts_w)
plot(decomp_w, col = "dark red")

# monthly
ts_c <- ts(cl[,2],  frequency=30, start=c( 1))  
autoplot(ts_c, facets = F)

decomp <- decompose(ts_c)
plot(decomp, col = "dark blue")

ts_c <- ts(cl[,2])
y <- msts(ts_c, seasonal.periods=c(7,30.5))
fit_tbat <- tbats(y)
# tbat <- forecast::forecast(fit_tbat)
# plot(tbat)
tbat <- forecast::forecast(fit_tbat, h=30)  ## for next 30 days
accuracy(tbat)

##                   ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 23712.3 733689.7 406427.7 -2.176473 10.54369 0.1012882 0.04453563

plot(tbat)

# plot of residuals
plot(tbat$residuals, ylab= "residuals")

tbat

##          Point Forecast     Lo 80     Hi 80      Lo 95     Hi 95
## 17.60000      5767748.4 4764525.5 6890645.0 4280192.64 7535177.7
## 17.63333      5546349.1 4506668.6 6720980.6 4008841.83 7399450.5
## 17.66667      5642183.6 4588456.7 6832110.5 4083688.89 7519193.6
## 17.70000      5558242.2 4510620.7 6742691.5 4009308.51 7427158.5
## 17.73333      5425108.0 4388708.6 6598957.3 3893548.01 7278104.1
## 17.76667       875960.3  557061.9 1287153.8  422598.04 1545043.2
## 17.80000       317138.8  156285.6  552192.3   97502.76  710047.5
## 17.83333      5670346.5 4589699.8 6893920.1 4073252.59 7601684.9
## 17.86667      5871613.3 4761573.2 7127132.5 4230578.99 7852861.6
## 17.90000      5895637.1 4777647.3 7160656.8 4243040.83 7892073.6
## 17.93333      5736784.6 4633332.6 6987721.0 4106562.98 7711907.4
## 17.96667      5586216.6 4496505.1 6823928.9 3977174.11 7541366.3
## 18.00000       943020.6  598251.6 1388128.4  453069.99 1667490.9
## 18.03333       360840.4  180231.7  623102.2  113709.80  798657.8
## 18.06667      5872724.3 4732801.4 7166592.9 4189214.28 7916244.7
## 18.10000      5919383.9 4768674.8 7225762.0 4220043.68 7982764.4
## 18.13333      5747586.4 4613460.9 7037775.1 4073720.86 7786411.8
## 18.16667      5475033.1 4371038.2 6734756.5 3847057.87 7467182.5
## 18.20000      5345770.5 4253907.3 6593937.2 3736535.33 7320522.4
## 18.23333       898670.0  555478.4 1347676.8  412982.40 1631657.7
## 18.26667       354459.8  171546.2  624012.7  105405.44  805819.2
## 18.30000      5857071.2 4685350.2 7192582.7 4128670.20 7968500.7
## 18.33333      5864153.0 4686814.5 7206742.4 4127716.95 7987031.9
## 18.36667      5679510.0 4521382.7 7003114.7 3972495.97 7773494.1
## 18.40000      5467201.1 4332546.3 6767297.5 3796017.11 7525268.7
## 18.43333      5422431.5 4289206.7 6722219.7 3753847.35 7480521.4
## 18.46667       927150.6  567662.8 1399754.8  419170.97 1699486.2
## 18.50000       345490.2  161631.5  620708.6   96465.77  807811.6
## 18.53333      5576535.9 4410009.3 6914706.5 3858986.36 7695471.9
## 18.56667      5404240.2 4256885.0 6723329.5 3715996.88 7494077.8

# fitted value
tbat %>% summary

## 
## Forecast method: TBATS(0.388, {0,3}, 0.8, {<7,3>, <30.5,5>})
## 
## Model Information:
## TBATS(0.388, {0,3}, 0.8, {<7,3>, <30.5,5>})
## 
## Call: tbats(y = y)
## 
## Parameters
##   Lambda: 0.388205
##   Alpha: 0.07741392
##   Beta: 0.008708724
##   Damping Parameter: 0.800032
##   Gamma-1 Values: 0.0009744367 6.369535e-05
##   Gamma-2 Values: -2.136673e-05 0.001364328
##   MA coefficients: 0.289193 -0.029751 -0.010675
## 
## Seed States:
##               [,1]
##  [1,] 1031.2009448
##  [2,]  -26.6113697
##  [3,] -291.8043947
##  [4,] -169.2487962
##  [5,]  -65.5639624
##  [6,]  133.5986366
##  [7,]  147.8119551
##  [8,]   66.3299465
##  [9,]    2.0707887
## [10,]    1.5236682
## [11,]   -0.1360861
## [12,]   -6.9076830
## [13,]    5.6470044
## [14,]  -10.5938211
## [15,]    5.4815555
## [16,]    5.5350740
## [17,]   -5.1366407
## [18,]    5.7436719
## [19,]    0.0000000
## [20,]    0.0000000
## [21,]    0.0000000
## attr(,"lambda")
## [1] 0.3882048
## 
## Sigma: 60.55284
## AIC: 16532.53
## 
## Error measures:
##                   ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 23712.3 733689.7 406427.7 -2.176473 10.54369 0.1012882 0.04453563
## 
## Forecasts:
##          Point Forecast     Lo 80     Hi 80      Lo 95     Hi 95
## 17.60000      5767748.4 4764525.5 6890645.0 4280192.64 7535177.7
## 17.63333      5546349.1 4506668.6 6720980.6 4008841.83 7399450.5
## 17.66667      5642183.6 4588456.7 6832110.5 4083688.89 7519193.6
## 17.70000      5558242.2 4510620.7 6742691.5 4009308.51 7427158.5
## 17.73333      5425108.0 4388708.6 6598957.3 3893548.01 7278104.1
## 17.76667       875960.3  557061.9 1287153.8  422598.04 1545043.2
## 17.80000       317138.8  156285.6  552192.3   97502.76  710047.5
## 17.83333      5670346.5 4589699.8 6893920.1 4073252.59 7601684.9
## 17.86667      5871613.3 4761573.2 7127132.5 4230578.99 7852861.6
## 17.90000      5895637.1 4777647.3 7160656.8 4243040.83 7892073.6
## 17.93333      5736784.6 4633332.6 6987721.0 4106562.98 7711907.4
## 17.96667      5586216.6 4496505.1 6823928.9 3977174.11 7541366.3
## 18.00000       943020.6  598251.6 1388128.4  453069.99 1667490.9
## 18.03333       360840.4  180231.7  623102.2  113709.80  798657.8
## 18.06667      5872724.3 4732801.4 7166592.9 4189214.28 7916244.7
## 18.10000      5919383.9 4768674.8 7225762.0 4220043.68 7982764.4
## 18.13333      5747586.4 4613460.9 7037775.1 4073720.86 7786411.8
## 18.16667      5475033.1 4371038.2 6734756.5 3847057.87 7467182.5
## 18.20000      5345770.5 4253907.3 6593937.2 3736535.33 7320522.4
## 18.23333       898670.0  555478.4 1347676.8  412982.40 1631657.7
## 18.26667       354459.8  171546.2  624012.7  105405.44  805819.2
## 18.30000      5857071.2 4685350.2 7192582.7 4128670.20 7968500.7
## 18.33333      5864153.0 4686814.5 7206742.4 4127716.95 7987031.9
## 18.36667      5679510.0 4521382.7 7003114.7 3972495.97 7773494.1
## 18.40000      5467201.1 4332546.3 6767297.5 3796017.11 7525268.7
## 18.43333      5422431.5 4289206.7 6722219.7 3753847.35 7480521.4
## 18.46667       927150.6  567662.8 1399754.8  419170.97 1699486.2
## 18.50000       345490.2  161631.5  620708.6   96465.77  807811.6
## 18.53333      5576535.9 4410009.3 6914706.5 3858986.36 7695471.9
## 18.56667      5404240.2 4256885.0 6723329.5 3715996.88 7494077.8

tbat_claims <- data.frame(tbat)
tbat_claims %>% str

## 'data.frame':    30 obs. of  5 variables:
##  $ Point.Forecast: num  5767748 5546349 5642184 5558242 5425108 ...
##  $ Lo.80         : num  4764525 4506669 4588457 4510621 4388709 ...
##  $ Hi.80         : num  6890645 6720981 6832110 6742691 6598957 ...
##  $ Lo.95         : num  4280193 4008842 4083689 4009309 3893548 ...
##  $ Hi.95         : num  7535178 7399450 7519194 7427158 7278104 ...

tbat_claims$Point.Forecast[tbat_claims$Point.Forecast <0] <- 0
# Create matrics for write result by days for dashboard
startDate <- as.Date("2020-05-11")
dnew <- seq(startDate, by="1 day", length.out=30)
tbat_claims <- cbind(dnew,tbat_claims) 
# tbat_claims$date <- dnew
# claimsent <- cbind(claimsent[,5], claimsent[,1:4])
tbat_claims %>% head

##                dnew Point.Forecast     Lo.80   Hi.80   Lo.95   Hi.95
## 17.60000 2020-05-11      5767748.4 4764525.5 6890645 4280193 7535178
## 17.63333 2020-05-12      5546349.1 4506668.6 6720981 4008842 7399450
## 17.66667 2020-05-13      5642183.6 4588456.7 6832110 4083689 7519194
## 17.70000 2020-05-14      5558242.2 4510620.7 6742691 4009309 7427158
## 17.73333 2020-05-15      5425108.0 4388708.6 6598957 3893548 7278104
## 17.76667 2020-05-16       875960.3  557061.9 1287154  422598 1545043

plot(tbat_claims$Point.Forecast)  # clearly can see weekend days with low volume

#write.csv(tbat_claims,"claims_TBAT_30days_after05-07.csv" )
#write.csv(tbat_claims,"claims_TBAT_30days_after05-11.csv" )

#Check residual error distribution
tbat$residuals %>% head(10)

## Multi-Seasonal Time Series:
## Start: 1 1
## Seasonal Periods: 7 30.5
## Data:
##              CVol
##  [1,]  -13.506292
##  [2,]   67.245091
##  [3,]   52.224061
##  [4,]   44.979924
##  [5,]   53.720549
##  [6,]   50.298470
##  [7,]    4.704759
##  [8,]   19.838816
##  [9,]  -65.328704
## [10,] -380.063651

plot(tbat$residuals)

acf(tbat$residuals,lag = 30,main = "Correlogram")

pacf(tbat$residuals, main = " Parcial Correlogram")

# ACF and PACF autocorrelation confirming the significantly important lags at 7 and 14

# Ljung Box test confirming significantce of p-value = 0.003106 and can see on the histogram,
# all residuals are follow a normal distribution and close to zero.
#It makes statistically valid inference of the model
Box.test(tbat$residuals, lag = 7, type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  tbat$residuals
## X-squared = 21.492, df = 7, p-value = 0.003106

hist(tbat$residuals,
     col = "red",
     xlab = "Error",
     main = "histogram of residuals",
     freq = F)

lines(density(tbat$residuals))

#=============================================END======================================

# ______________________________auto arima with seasonal frequency using fourier function
y <- ts(ts_c, frequency=7)
z <- fourier(ts(cl[,2], frequency=30.5/4), K=3)
zf <- forecast::fourier(ts(ts_c, frequency=30.5/4), K=3, h=30)

fit <- auto.arima(y, xreg=cbind(z), seasonal=T)
accuracy(fit)

##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -1668.783 779778.5 430380.8 -0.8494264 12.87863 0.8682568
##                      ACF1
## Training set 0.0004663419

fc <- forecast::forecast(fit, xreg=cbind(zf), h=30)
plot(fc)

autoplot(fc)+ xlab("month")

#retiving result and adding 30 datys
# fitted value
fc %>% summary

## 
## Forecast method: Regression with ARIMA(2,0,2)(0,1,1)[7] errors
## 
## Model Information:
## Series: y 
## Regression with ARIMA(2,0,2)(0,1,1)[7] errors 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2     sma1      drift       S1-8
##       0.9476  0.0154  -0.5125  -0.3162  -0.8862  -1995.248  -92635.17
## s.e.  0.1537  0.1410   0.1471   0.1070   0.0521   2937.171   58880.65
##            C1-8       S2-8      C2-8      S3-8       C3-8
##       -13547.88  -2289.627  63138.44  21909.20   4776.366
## s.e.   59330.60  46896.345  46954.15  34178.41  34183.118
## 
## sigma^2 estimated as 6.322e+11:  log likelihood=-7366.4
## AIC=14758.79   AICc=14759.55   BIC=14813.34
## 
## Error measures:
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -1668.783 779778.5 430380.8 -0.8494264 12.87863 0.8682568
##                      ACF1
## Training set 0.0004663419
## 
## Forecasts:
##          Point Forecast      Lo 80     Hi 80    Lo 95   Hi 95
## 72.14286     6321888.43  5302935.2 7340841.7  4763534 7880243
## 72.28571     6110437.94  4999202.0 7221673.9  4410949 7809927
## 72.42857     5807447.58  4690412.3 6924482.9  4099090 7515806
## 72.57143     5667174.86  4544280.2 6790069.6  3949856 7384494
## 72.71429     5509417.03  4381116.7 6637717.4  3783831 7235003
## 72.85714      561738.71  -571558.0 1695035.5 -1171489 2294967
## 73.00000     -147573.21 -1285489.7  990343.3 -1887866 1592720
## 73.14286     6116162.68  4958178.0 7274147.4  4345178 7887147
## 73.28571     6185155.97  5018058.1 7352253.8  4400234 7970078
## 73.42857     5877631.31  4705856.6 7049406.0  4085557 7669706
## 73.57143     5741827.16  4565677.3 6917977.0  3943061 7540593
## 73.71429     5554229.03  4374032.0 6734426.1  3749273 7359185
## 73.85714      694988.78  -488953.9 1878931.5 -1115695 2505673
## 74.00000      -14365.21 -1201775.3 1173044.9 -1830352 1801622
## 74.14286     6061612.35  4856920.2 7266304.5  4219195 7904030
## 74.28571     6177369.47  4965175.8 7389563.2  4323479 8031260
## 74.42857     5936025.85  4720187.6 7151864.1  4076562 7795490
## 74.57143     5794242.06  4574986.8 7013497.4  3929552 7658932
## 74.71429     5599464.97  4377046.0 6821883.9  3729937 7468993
## 74.85714      736556.57  -488792.7 1961905.8 -1137453 2610567
## 75.00000      137393.51 -1090670.4 1365457.4 -1740768 2015555
## 75.14286     6103313.70  4859929.5 7346697.9  4201722 8004906
## 75.28571     6093430.93  4843655.7 7343206.1  4182065 8004797
## 75.42857     5955096.43  4702368.8 7207824.0  4039215 7870978
## 75.57143     5832666.71  4577166.0 7088167.4  3912544 7752789
## 75.71429     5641729.20  4383659.6 6899798.7  3717678 7565781
## 75.85714      753174.32  -507275.9 2013624.5 -1174518 2680867
## 76.00000      206527.30 -1056129.4 1469184.0 -1724539 2137594
## 76.14286     6237392.67  4960839.8 7513945.5  4285074 8189712
## 76.28571     6045308.00  4763152.3 7327463.7  4084420 8006196

fc$model$variance

## NULL

fc$upper

## Time Series:
## Start = c(72, 2) 
## End = c(76, 3) 
## Frequency = 7 
##                80%     95%
## 72.14286 7340841.7 7880243
## 72.28571 7221673.9 7809927
## 72.42857 6924482.9 7515806
## 72.57143 6790069.6 7384494
## 72.71429 6637717.4 7235003
## 72.85714 1695035.5 2294967
## 73.00000  990343.3 1592720
## 73.14286 7274147.4 7887147
## 73.28571 7352253.8 7970078
## 73.42857 7049406.0 7669706
## 73.57143 6917977.0 7540593
## 73.71429 6734426.1 7359185
## 73.85714 1878931.5 2505673
## 74.00000 1173044.9 1801622
## 74.14286 7266304.5 7904030
## 74.28571 7389563.2 8031260
## 74.42857 7151864.1 7795490
## 74.57143 7013497.4 7658932
## 74.71429 6821883.9 7468993
## 74.85714 1961905.8 2610567
## 75.00000 1365457.4 2015555
## 75.14286 7346697.9 8004906
## 75.28571 7343206.1 8004797
## 75.42857 7207824.0 7870978
## 75.57143 7088167.4 7752789
## 75.71429 6899798.7 7565781
## 75.85714 2013624.5 2680867
## 76.00000 1469184.0 2137594
## 76.14286 7513945.5 8189712
## 76.28571 7327463.7 8006196

fc_claims <- data.frame(fc)
fc_claims %>% str

## 'data.frame':    30 obs. of  5 variables:
##  $ Point.Forecast: num  6321888 6110438 5807448 5667175 5509417 ...
##  $ Lo.80         : num  5302935 4999202 4690412 4544280 4381117 ...
##  $ Hi.80         : num  7340842 7221674 6924483 6790070 6637717 ...
##  $ Lo.95         : num  4763534 4410949 4099090 3949856 3783831 ...
##  $ Hi.95         : num  7880243 7809927 7515806 7384494 7235003 ...

# claimsent <-  fc_df[,1:4] 
# claimsent <- t(claimsent)   # transpose matrix long format
# claimsent
#the date for deployment can be replaced for the actual Sys.date
startDate <- as.Date("2020-05-07")
dnew <- seq(startDate, by="1 day", length.out=30)
fc_claims <- cbind(dnew,fc_claims) 
# fc_claims$date <- dnew
# claimsent <- cbind(claimsent[,5], claimsent[,1:4])
fc_claims$Point.Forecast[fc_claims$Point.Forecast <0] <- 0

fc_claims %>% tail

##                dnew Point.Forecast      Lo.80   Hi.80    Lo.95   Hi.95
## 75.57143 2020-05-31      5832666.7  4577166.0 7088167  3912544 7752789
## 75.71429 2020-06-01      5641729.2  4383659.6 6899799  3717678 7565781
## 75.85714 2020-06-02       753174.3  -507275.9 2013625 -1174518 2680867
## 76.00000 2020-06-03       206527.3 -1056129.4 1469184 -1724539 2137594
## 76.14286 2020-06-04      6237392.7  4960839.8 7513946  4285074 8189712
## 76.28571 2020-06-05      6045308.0  4763152.3 7327464  4084420 8006196

# write.csv(fc_claims,"claimsent_FourierArima_30_days_predAfter05_07.csv" )
# check <-read.csv("claimsent_FourierArima_30_days_predAfter05_07.csv")
# check
#analysis of residuals
acf(fc$residuals, main = "Correlogram")

pacf(fc$residuals, main = " Parcial Correlogram")

# Ljung Box test does not prove statistical significance
Box.test(fc$residuals, lag = 20, type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  fc$residuals
## X-squared = 22.449, df = 20, p-value = 0.3166

hist(fc$residuals,
     col = "red",
     xlab = "Error",
     main = "histogram of residuals",
     freq = F)

lines(density(fc$residuals))

TBATS_TS_Production_model.R

irinamax

2021-08-05