# Author Irina Max, Lead Principal Data Scientist
# This code based on the Time series TBATS model for daily Claims prediction. This model was deployed to production and include partially pipline,
# This script is my individul contribution and research for the XXX company.
# Script also included Arima Model with seasonal frequency using fourier function with was not deliver the best result in particular case study.
#I commented some code which is not need to be processed now for RPub
getwd()
## [1] "/Users/irinamax/Documents/R/CHC"
#setwd("/home/irina/R_model" )
#
#library(RJDBC)
#library(aws.s3)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
library(ggplot2)
library(anytime)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x data.table::between() masks dplyr::between()
## x dplyr::filter() masks stats::filter()
## x data.table::first() masks dplyr::first()
## x dplyr::lag() masks stats::lag()
## x data.table::last() masks dplyr::last()
## x purrr::transpose() masks data.table::transpose()
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(mltools)
##
## Attaching package: 'mltools'
## The following object is masked from 'package:tidyr':
##
## replace_na
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ fma 2.4 ✓ expsmooth 2.3
## ── Conflicts ───────────────────────────────────────────────── fpp2_conflicts ──
## x magrittr::extract() masks tidyr::extract()
## x magrittr::set_names() masks purrr::set_names()
## x purrr::transpose() masks data.table::transpose()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(pryr)
##
## Attaching package: 'pryr'
## The following objects are masked from 'package:purrr':
##
## compose, partial
## The following object is masked from 'package:data.table':
##
## address
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
library(rJava)
# Sys.setenv("AWS_ACCESS_KEY_ID" = "...",
# "AWS_SECRET_ACCESS_KEY" = "...",
# "AWS_DEFAULT_REGION" = "us-east-1")
# download and u Amazon Redshift JDBC driver
# download.file('http://s3.amazonaws.com/redshift-downloads/drivers/RedshiftJDBC41-1.1.9.1009.jar','RedshiftJDBC41-1.1.9.1009.jar')
# Writing
mywrite <- function(x, file) {
write.csv(x, file, row.names=FALSE)
}
# connect to Amazon Redshift
# driver <- JDBC("com.amazon.redshift.jdbc41.Driver", "RedshiftJDBC41-1.1.9.1009.jar", identifier.quote="`")
# url <- "jdbc:redshift://...."
# conn <- dbConnect(driver, url)
# query3 = "select the_given_date, sum
# from prod_metrics.claims_exec_report_data"
#
# cl_data = dbGetQuery(conn, query3)
# write.csv(cl_data, "cl_data.csv")
#=============================================================== work with file till 05_07====================
#feature engineering with claims
#cl <- read.csv("claims_Daily_data.txt", header = T, sep = "|", stringsAsFactors = F)
cl <- read.csv("cl_data.csv")
cl %>% summary
## X the_given_date sum
## Min. : 1.0 Length:498 Min. : 521261
## 1st Qu.:125.2 Class :character 1st Qu.:1663746
## Median :249.5 Mode :character Median :7356952
## Mean :249.5 Mean :5588034
## 3rd Qu.:373.8 3rd Qu.:7841872
## Max. :498.0 Max. :9438760
cl <- cl%>% group_by(the_given_date) %>% summarise(sum = sum(sum))
cl %>% head
## # A tibble: 6 x 2
## the_given_date sum
## <chr> <int>
## 1 2018-12-16 684635
## 2 2018-12-17 8394740
## 3 2018-12-18 8378415
## 4 2018-12-19 7836314
## 5 2018-12-20 7639615
## 6 2018-12-21 7575768
cl %>% tail
## # A tibble: 6 x 2
## the_given_date sum
## <chr> <int>
## 1 2020-04-21 5255620
## 2 2020-04-22 5076559
## 3 2020-04-23 4906584
## 4 2020-04-24 4981335
## 5 2020-04-25 940347
## 6 2020-04-26 570651
cl %>% dim #486
## [1] 498 2
cl %>% summary
## the_given_date sum
## Length:498 Min. : 521261
## Class :character 1st Qu.:1663746
## Mode :character Median :7356952
## Mean :5588034
## 3rd Qu.:7841872
## Max. :9438760
cl %>% head
## # A tibble: 6 x 2
## the_given_date sum
## <chr> <int>
## 1 2018-12-16 684635
## 2 2018-12-17 8394740
## 3 2018-12-18 8378415
## 4 2018-12-19 7836314
## 5 2018-12-20 7639615
## 6 2018-12-21 7575768
cl %>% tail
## # A tibble: 6 x 2
## the_given_date sum
## <chr> <int>
## 1 2020-04-21 5255620
## 2 2020-04-22 5076559
## 3 2020-04-23 4906584
## 4 2020-04-24 4981335
## 5 2020-04-25 940347
## 6 2020-04-26 570651
cl %>% dim #486 2
## [1] 498 2
#cl <- cl[-c(1:16),] # 470
names(cl) <- c("date", "CVol")
#cl$date<-as.POSIXct(cl$date,format="%m/%d/%y ")
cl %>% head
## # A tibble: 6 x 2
## date CVol
## <chr> <int>
## 1 2018-12-16 684635
## 2 2018-12-17 8394740
## 3 2018-12-18 8378415
## 4 2018-12-19 7836314
## 5 2018-12-20 7639615
## 6 2018-12-21 7575768
cl %>% tail
## # A tibble: 6 x 2
## date CVol
## <chr> <int>
## 1 2020-04-21 5255620
## 2 2020-04-22 5076559
## 3 2020-04-23 4906584
## 4 2020-04-24 4981335
## 5 2020-04-25 940347
## 6 2020-04-26 570651
# look at the weekely decomposition
ts_w <- ts(cl[,2], frequency=7, start=c( 1))
# Plot the data with facetting
autoplot(ts_w, facets = F)

decomp_w <- decompose(ts_w)
plot(decomp_w, col = "dark red")

# monthly
ts_c <- ts(cl[,2], frequency=30, start=c( 1))
autoplot(ts_c, facets = F)

decomp <- decompose(ts_c)
plot(decomp, col = "dark blue")

ts_c <- ts(cl[,2])
y <- msts(ts_c, seasonal.periods=c(7,30.5))
fit_tbat <- tbats(y)
# tbat <- forecast::forecast(fit_tbat)
# plot(tbat)
tbat <- forecast::forecast(fit_tbat, h=30) ## for next 30 days
accuracy(tbat)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 23712.3 733689.7 406427.7 -2.176473 10.54369 0.1012882 0.04453563
plot(tbat)

# plot of residuals
plot(tbat$residuals, ylab= "residuals")

tbat
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 17.60000 5767748.4 4764525.5 6890645.0 4280192.64 7535177.7
## 17.63333 5546349.1 4506668.6 6720980.6 4008841.83 7399450.5
## 17.66667 5642183.6 4588456.7 6832110.5 4083688.89 7519193.6
## 17.70000 5558242.2 4510620.7 6742691.5 4009308.51 7427158.5
## 17.73333 5425108.0 4388708.6 6598957.3 3893548.01 7278104.1
## 17.76667 875960.3 557061.9 1287153.8 422598.04 1545043.2
## 17.80000 317138.8 156285.6 552192.3 97502.76 710047.5
## 17.83333 5670346.5 4589699.8 6893920.1 4073252.59 7601684.9
## 17.86667 5871613.3 4761573.2 7127132.5 4230578.99 7852861.6
## 17.90000 5895637.1 4777647.3 7160656.8 4243040.83 7892073.6
## 17.93333 5736784.6 4633332.6 6987721.0 4106562.98 7711907.4
## 17.96667 5586216.6 4496505.1 6823928.9 3977174.11 7541366.3
## 18.00000 943020.6 598251.6 1388128.4 453069.99 1667490.9
## 18.03333 360840.4 180231.7 623102.2 113709.80 798657.8
## 18.06667 5872724.3 4732801.4 7166592.9 4189214.28 7916244.7
## 18.10000 5919383.9 4768674.8 7225762.0 4220043.68 7982764.4
## 18.13333 5747586.4 4613460.9 7037775.1 4073720.86 7786411.8
## 18.16667 5475033.1 4371038.2 6734756.5 3847057.87 7467182.5
## 18.20000 5345770.5 4253907.3 6593937.2 3736535.33 7320522.4
## 18.23333 898670.0 555478.4 1347676.8 412982.40 1631657.7
## 18.26667 354459.8 171546.2 624012.7 105405.44 805819.2
## 18.30000 5857071.2 4685350.2 7192582.7 4128670.20 7968500.7
## 18.33333 5864153.0 4686814.5 7206742.4 4127716.95 7987031.9
## 18.36667 5679510.0 4521382.7 7003114.7 3972495.97 7773494.1
## 18.40000 5467201.1 4332546.3 6767297.5 3796017.11 7525268.7
## 18.43333 5422431.5 4289206.7 6722219.7 3753847.35 7480521.4
## 18.46667 927150.6 567662.8 1399754.8 419170.97 1699486.2
## 18.50000 345490.2 161631.5 620708.6 96465.77 807811.6
## 18.53333 5576535.9 4410009.3 6914706.5 3858986.36 7695471.9
## 18.56667 5404240.2 4256885.0 6723329.5 3715996.88 7494077.8
# fitted value
tbat %>% summary
##
## Forecast method: TBATS(0.388, {0,3}, 0.8, {<7,3>, <30.5,5>})
##
## Model Information:
## TBATS(0.388, {0,3}, 0.8, {<7,3>, <30.5,5>})
##
## Call: tbats(y = y)
##
## Parameters
## Lambda: 0.388205
## Alpha: 0.07741392
## Beta: 0.008708724
## Damping Parameter: 0.800032
## Gamma-1 Values: 0.0009744367 6.369535e-05
## Gamma-2 Values: -2.136673e-05 0.001364328
## MA coefficients: 0.289193 -0.029751 -0.010675
##
## Seed States:
## [,1]
## [1,] 1031.2009448
## [2,] -26.6113697
## [3,] -291.8043947
## [4,] -169.2487962
## [5,] -65.5639624
## [6,] 133.5986366
## [7,] 147.8119551
## [8,] 66.3299465
## [9,] 2.0707887
## [10,] 1.5236682
## [11,] -0.1360861
## [12,] -6.9076830
## [13,] 5.6470044
## [14,] -10.5938211
## [15,] 5.4815555
## [16,] 5.5350740
## [17,] -5.1366407
## [18,] 5.7436719
## [19,] 0.0000000
## [20,] 0.0000000
## [21,] 0.0000000
## attr(,"lambda")
## [1] 0.3882048
##
## Sigma: 60.55284
## AIC: 16532.53
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 23712.3 733689.7 406427.7 -2.176473 10.54369 0.1012882 0.04453563
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 17.60000 5767748.4 4764525.5 6890645.0 4280192.64 7535177.7
## 17.63333 5546349.1 4506668.6 6720980.6 4008841.83 7399450.5
## 17.66667 5642183.6 4588456.7 6832110.5 4083688.89 7519193.6
## 17.70000 5558242.2 4510620.7 6742691.5 4009308.51 7427158.5
## 17.73333 5425108.0 4388708.6 6598957.3 3893548.01 7278104.1
## 17.76667 875960.3 557061.9 1287153.8 422598.04 1545043.2
## 17.80000 317138.8 156285.6 552192.3 97502.76 710047.5
## 17.83333 5670346.5 4589699.8 6893920.1 4073252.59 7601684.9
## 17.86667 5871613.3 4761573.2 7127132.5 4230578.99 7852861.6
## 17.90000 5895637.1 4777647.3 7160656.8 4243040.83 7892073.6
## 17.93333 5736784.6 4633332.6 6987721.0 4106562.98 7711907.4
## 17.96667 5586216.6 4496505.1 6823928.9 3977174.11 7541366.3
## 18.00000 943020.6 598251.6 1388128.4 453069.99 1667490.9
## 18.03333 360840.4 180231.7 623102.2 113709.80 798657.8
## 18.06667 5872724.3 4732801.4 7166592.9 4189214.28 7916244.7
## 18.10000 5919383.9 4768674.8 7225762.0 4220043.68 7982764.4
## 18.13333 5747586.4 4613460.9 7037775.1 4073720.86 7786411.8
## 18.16667 5475033.1 4371038.2 6734756.5 3847057.87 7467182.5
## 18.20000 5345770.5 4253907.3 6593937.2 3736535.33 7320522.4
## 18.23333 898670.0 555478.4 1347676.8 412982.40 1631657.7
## 18.26667 354459.8 171546.2 624012.7 105405.44 805819.2
## 18.30000 5857071.2 4685350.2 7192582.7 4128670.20 7968500.7
## 18.33333 5864153.0 4686814.5 7206742.4 4127716.95 7987031.9
## 18.36667 5679510.0 4521382.7 7003114.7 3972495.97 7773494.1
## 18.40000 5467201.1 4332546.3 6767297.5 3796017.11 7525268.7
## 18.43333 5422431.5 4289206.7 6722219.7 3753847.35 7480521.4
## 18.46667 927150.6 567662.8 1399754.8 419170.97 1699486.2
## 18.50000 345490.2 161631.5 620708.6 96465.77 807811.6
## 18.53333 5576535.9 4410009.3 6914706.5 3858986.36 7695471.9
## 18.56667 5404240.2 4256885.0 6723329.5 3715996.88 7494077.8
tbat_claims <- data.frame(tbat)
tbat_claims %>% str
## 'data.frame': 30 obs. of 5 variables:
## $ Point.Forecast: num 5767748 5546349 5642184 5558242 5425108 ...
## $ Lo.80 : num 4764525 4506669 4588457 4510621 4388709 ...
## $ Hi.80 : num 6890645 6720981 6832110 6742691 6598957 ...
## $ Lo.95 : num 4280193 4008842 4083689 4009309 3893548 ...
## $ Hi.95 : num 7535178 7399450 7519194 7427158 7278104 ...
tbat_claims$Point.Forecast[tbat_claims$Point.Forecast <0] <- 0
# Create matrics for write result by days for dashboard
startDate <- as.Date("2020-05-11")
dnew <- seq(startDate, by="1 day", length.out=30)
tbat_claims <- cbind(dnew,tbat_claims)
# tbat_claims$date <- dnew
# claimsent <- cbind(claimsent[,5], claimsent[,1:4])
tbat_claims %>% head
## dnew Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 17.60000 2020-05-11 5767748.4 4764525.5 6890645 4280193 7535178
## 17.63333 2020-05-12 5546349.1 4506668.6 6720981 4008842 7399450
## 17.66667 2020-05-13 5642183.6 4588456.7 6832110 4083689 7519194
## 17.70000 2020-05-14 5558242.2 4510620.7 6742691 4009309 7427158
## 17.73333 2020-05-15 5425108.0 4388708.6 6598957 3893548 7278104
## 17.76667 2020-05-16 875960.3 557061.9 1287154 422598 1545043
plot(tbat_claims$Point.Forecast) # clearly can see weekend days with low volume

#write.csv(tbat_claims,"claims_TBAT_30days_after05-07.csv" )
#write.csv(tbat_claims,"claims_TBAT_30days_after05-11.csv" )
#Check residual error distribution
tbat$residuals %>% head(10)
## Multi-Seasonal Time Series:
## Start: 1 1
## Seasonal Periods: 7 30.5
## Data:
## CVol
## [1,] -13.506292
## [2,] 67.245091
## [3,] 52.224061
## [4,] 44.979924
## [5,] 53.720549
## [6,] 50.298470
## [7,] 4.704759
## [8,] 19.838816
## [9,] -65.328704
## [10,] -380.063651
plot(tbat$residuals)

acf(tbat$residuals,lag = 30,main = "Correlogram")

pacf(tbat$residuals, main = " Parcial Correlogram")

# ACF and PACF autocorrelation confirming the significantly important lags at 7 and 14
# Ljung Box test confirming significantce of p-value = 0.003106 and can see on the histogram,
# all residuals are follow a normal distribution and close to zero.
#It makes statistically valid inference of the model
Box.test(tbat$residuals, lag = 7, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: tbat$residuals
## X-squared = 21.492, df = 7, p-value = 0.003106
hist(tbat$residuals,
col = "red",
xlab = "Error",
main = "histogram of residuals",
freq = F)
lines(density(tbat$residuals))

#=============================================END======================================
# ______________________________auto arima with seasonal frequency using fourier function
y <- ts(ts_c, frequency=7)
z <- fourier(ts(cl[,2], frequency=30.5/4), K=3)
zf <- forecast::fourier(ts(ts_c, frequency=30.5/4), K=3, h=30)
fit <- auto.arima(y, xreg=cbind(z), seasonal=T)
accuracy(fit)
## ME RMSE MAE MPE MAPE MASE
## Training set -1668.783 779778.5 430380.8 -0.8494264 12.87863 0.8682568
## ACF1
## Training set 0.0004663419
fc <- forecast::forecast(fit, xreg=cbind(zf), h=30)
plot(fc)

autoplot(fc)+ xlab("month")

#retiving result and adding 30 datys
# fitted value
fc %>% summary
##
## Forecast method: Regression with ARIMA(2,0,2)(0,1,1)[7] errors
##
## Model Information:
## Series: y
## Regression with ARIMA(2,0,2)(0,1,1)[7] errors
##
## Coefficients:
## ar1 ar2 ma1 ma2 sma1 drift S1-8
## 0.9476 0.0154 -0.5125 -0.3162 -0.8862 -1995.248 -92635.17
## s.e. 0.1537 0.1410 0.1471 0.1070 0.0521 2937.171 58880.65
## C1-8 S2-8 C2-8 S3-8 C3-8
## -13547.88 -2289.627 63138.44 21909.20 4776.366
## s.e. 59330.60 46896.345 46954.15 34178.41 34183.118
##
## sigma^2 estimated as 6.322e+11: log likelihood=-7366.4
## AIC=14758.79 AICc=14759.55 BIC=14813.34
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1668.783 779778.5 430380.8 -0.8494264 12.87863 0.8682568
## ACF1
## Training set 0.0004663419
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 72.14286 6321888.43 5302935.2 7340841.7 4763534 7880243
## 72.28571 6110437.94 4999202.0 7221673.9 4410949 7809927
## 72.42857 5807447.58 4690412.3 6924482.9 4099090 7515806
## 72.57143 5667174.86 4544280.2 6790069.6 3949856 7384494
## 72.71429 5509417.03 4381116.7 6637717.4 3783831 7235003
## 72.85714 561738.71 -571558.0 1695035.5 -1171489 2294967
## 73.00000 -147573.21 -1285489.7 990343.3 -1887866 1592720
## 73.14286 6116162.68 4958178.0 7274147.4 4345178 7887147
## 73.28571 6185155.97 5018058.1 7352253.8 4400234 7970078
## 73.42857 5877631.31 4705856.6 7049406.0 4085557 7669706
## 73.57143 5741827.16 4565677.3 6917977.0 3943061 7540593
## 73.71429 5554229.03 4374032.0 6734426.1 3749273 7359185
## 73.85714 694988.78 -488953.9 1878931.5 -1115695 2505673
## 74.00000 -14365.21 -1201775.3 1173044.9 -1830352 1801622
## 74.14286 6061612.35 4856920.2 7266304.5 4219195 7904030
## 74.28571 6177369.47 4965175.8 7389563.2 4323479 8031260
## 74.42857 5936025.85 4720187.6 7151864.1 4076562 7795490
## 74.57143 5794242.06 4574986.8 7013497.4 3929552 7658932
## 74.71429 5599464.97 4377046.0 6821883.9 3729937 7468993
## 74.85714 736556.57 -488792.7 1961905.8 -1137453 2610567
## 75.00000 137393.51 -1090670.4 1365457.4 -1740768 2015555
## 75.14286 6103313.70 4859929.5 7346697.9 4201722 8004906
## 75.28571 6093430.93 4843655.7 7343206.1 4182065 8004797
## 75.42857 5955096.43 4702368.8 7207824.0 4039215 7870978
## 75.57143 5832666.71 4577166.0 7088167.4 3912544 7752789
## 75.71429 5641729.20 4383659.6 6899798.7 3717678 7565781
## 75.85714 753174.32 -507275.9 2013624.5 -1174518 2680867
## 76.00000 206527.30 -1056129.4 1469184.0 -1724539 2137594
## 76.14286 6237392.67 4960839.8 7513945.5 4285074 8189712
## 76.28571 6045308.00 4763152.3 7327463.7 4084420 8006196
fc$model$variance
## NULL
fc$upper
## Time Series:
## Start = c(72, 2)
## End = c(76, 3)
## Frequency = 7
## 80% 95%
## 72.14286 7340841.7 7880243
## 72.28571 7221673.9 7809927
## 72.42857 6924482.9 7515806
## 72.57143 6790069.6 7384494
## 72.71429 6637717.4 7235003
## 72.85714 1695035.5 2294967
## 73.00000 990343.3 1592720
## 73.14286 7274147.4 7887147
## 73.28571 7352253.8 7970078
## 73.42857 7049406.0 7669706
## 73.57143 6917977.0 7540593
## 73.71429 6734426.1 7359185
## 73.85714 1878931.5 2505673
## 74.00000 1173044.9 1801622
## 74.14286 7266304.5 7904030
## 74.28571 7389563.2 8031260
## 74.42857 7151864.1 7795490
## 74.57143 7013497.4 7658932
## 74.71429 6821883.9 7468993
## 74.85714 1961905.8 2610567
## 75.00000 1365457.4 2015555
## 75.14286 7346697.9 8004906
## 75.28571 7343206.1 8004797
## 75.42857 7207824.0 7870978
## 75.57143 7088167.4 7752789
## 75.71429 6899798.7 7565781
## 75.85714 2013624.5 2680867
## 76.00000 1469184.0 2137594
## 76.14286 7513945.5 8189712
## 76.28571 7327463.7 8006196
fc_claims <- data.frame(fc)
fc_claims %>% str
## 'data.frame': 30 obs. of 5 variables:
## $ Point.Forecast: num 6321888 6110438 5807448 5667175 5509417 ...
## $ Lo.80 : num 5302935 4999202 4690412 4544280 4381117 ...
## $ Hi.80 : num 7340842 7221674 6924483 6790070 6637717 ...
## $ Lo.95 : num 4763534 4410949 4099090 3949856 3783831 ...
## $ Hi.95 : num 7880243 7809927 7515806 7384494 7235003 ...
# claimsent <- fc_df[,1:4]
# claimsent <- t(claimsent) # transpose matrix long format
# claimsent
#the date for deployment can be replaced for the actual Sys.date
startDate <- as.Date("2020-05-07")
dnew <- seq(startDate, by="1 day", length.out=30)
fc_claims <- cbind(dnew,fc_claims)
# fc_claims$date <- dnew
# claimsent <- cbind(claimsent[,5], claimsent[,1:4])
fc_claims$Point.Forecast[fc_claims$Point.Forecast <0] <- 0
fc_claims %>% tail
## dnew Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 75.57143 2020-05-31 5832666.7 4577166.0 7088167 3912544 7752789
## 75.71429 2020-06-01 5641729.2 4383659.6 6899799 3717678 7565781
## 75.85714 2020-06-02 753174.3 -507275.9 2013625 -1174518 2680867
## 76.00000 2020-06-03 206527.3 -1056129.4 1469184 -1724539 2137594
## 76.14286 2020-06-04 6237392.7 4960839.8 7513946 4285074 8189712
## 76.28571 2020-06-05 6045308.0 4763152.3 7327464 4084420 8006196
# write.csv(fc_claims,"claimsent_FourierArima_30_days_predAfter05_07.csv" )
# check <-read.csv("claimsent_FourierArima_30_days_predAfter05_07.csv")
# check
#analysis of residuals
acf(fc$residuals, main = "Correlogram")

pacf(fc$residuals, main = " Parcial Correlogram")

# Ljung Box test does not prove statistical significance
Box.test(fc$residuals, lag = 20, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: fc$residuals
## X-squared = 22.449, df = 20, p-value = 0.3166
hist(fc$residuals,
col = "red",
xlab = "Error",
main = "histogram of residuals",
freq = F)
lines(density(fc$residuals))
