Description

This report is produce for Final Project on Data Science Course-net. Predict data kas kantor and kas echannel for next 30 days using Exponential Time Series model. Dataset from Internal Bank B.

Report Outline
1. Data Extraction
2. Exploratory Data Analysis
3. Data Preparation
4. Modelling
5. Evaluation
6. Recommendation

for the first time, make sure to clear all variables in workspace also install all package and load.

1. Data Extraction

Extract data in csv format into dataframe in R.

cash_df <- read.csv("Data/train.csv")

Create data frame using date or periode

cash_df$date <- as.Date(cash_df$periode)

Date is the new variable. Using for prediction time series

2. Exploratory Data Analysis

Time Plot

library(ggplot2)
library(gridExtra)
plot1 <- ggplot(cash_df, aes(x=date, y=kas_kantor)) +
  geom_line()

plot2 <- ggplot(cash_df, aes(x=date, y=kas_echannel)) +
  geom_line()

grid.arrange(plot1, plot2, nrow = 2)

There is a trend in kas kantor plot (plot1) but irregular in kas echannel plot (plot2)

3. Data Preparation

declare this as time series data

Data 1: Kas Kantor

require(xts)
kas_kantor_ts <- xts(x = cash_df$kas_kantor,
                     order.by = cash_df$date)
plot(kas_kantor_ts)

Data 2: Kas eChannel

kas_echannel_ts <- xts(x = cash_df$kas_echannel,
                       order.by = cash_df$date)
plot(kas_echannel_ts)

4. Modelling (Exponential Time Series - ETS)

Kas Kantor

library(fpp2)
fit_kas_kantor <- ets(kas_kantor_ts)

Check Residual Kas Kantor

checkresiduals(fit_kas_kantor)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,A,N)
## Q* = 74.775, df = 6, p-value = 4.274e-14
## 
## Model df: 4.   Total lags used: 10

Plot Kas Kantor

plot(fit_kas_kantor)

Kas eChannel

fit_kas_echannel <- ets(kas_echannel_ts)

Check Residual Kas eChannel

checkresiduals(fit_kas_echannel)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 140.84, df = 8, p-value < 2.2e-16
## 
## Model df: 2.   Total lags used: 10

Plot Kas eChannel

plot(fit_kas_echannel)

5. Evaluation

Predict Kas Kantor

predict_kas_kantor <- forecast(fit_kas_kantor, 30)
plot(predict_kas_kantor, 
     main = "Prediksi Kas Kantor BANK", 
     xlab = "Tanggal",
     ylab = "Kas (Rupiah)")

Predict Kas eChannel

predict_kas_echannel <- forecast(fit_kas_echannel, 30)
plot(predict_kas_echannel, 
     main = "Prediksi Kas E-Channel BANK", 
     xlab = "Tanggal",
     ylab = "Kas (Rupiah)")

Extract test data

test <- read.csv("Data/test.csv")

Filter test data

actual_kas_kantor <- test$kas_kantor
print(summary(actual_kas_kantor))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.057e+13 1.086e+13 1.100e+13 1.103e+13 1.112e+13 1.141e+13
actual_kas_echannel <- test$kas_echannel
print(summary(actual_kas_echannel))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 4.667e+09 5.293e+09 5.691e+09 5.629e+09 5.899e+09 6.923e+09

Prediction test data based on Mean

prediction_kas_kantor <- predict_kas_kantor$mean
print(summary(prediction_kas_kantor))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.055e+13 1.075e+13 1.095e+13 1.095e+13 1.115e+13 1.135e+13
prediction_kas_echannel <- predict_kas_echannel$mean
print(summary(prediction_kas_echannel))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 5.209e+09 5.209e+09 5.209e+09 5.209e+09 5.209e+09 5.209e+09

New data frame for test data

baru_df <- data.frame(actual_kas_echannel, actual_kas_kantor, prediction_kas_echannel, prediction_kas_kantor)

Performance test data fucntion

performance <- function(prediction, actual, method){
  e <- prediction - actual
  se <- e^2
  sse <- sum(se)
  mse <- mean(se)
  rmse <- sqrt(mse)
  r <- cor(prediction,actual)
  result <- paste("==Method Name:",method,
                  "\nRoot Mean Square Error (RMSE) = ", round(rmse,2),
                  "\nCorrelation Coefficient (R) =", round(r,2) ) 
  cat(result)
}

Performance kas kantor

performance(baru_df$prediction_kas_kantor, baru_df$actual_kas_kantor, 'ETS')
## ==Method Name: ETS 
## Root Mean Square Error (RMSE) =  111719530147.26 
## Correlation Coefficient (R) = 0.95

Performance kas echannel

performance(baru_df$prediction_kas_echannel, baru_df$actual_kas_echannel, 'ETS')
## Warning in cor(prediction, actual): the standard deviation is zero
## ==Method Name: ETS 
## Root Mean Square Error (RMSE) =  665225246.28 
## Correlation Coefficient (R) = NA

Visualization: Actual vs Prediction

ggplot | x = actual, y = prediction, geom_point

Kas Kantor

p1<- ggplot(data = baru_df,aes(x = actual_kas_kantor, y = prediction_kas_kantor)) + geom_point()+
    labs(title = "ETS Kas Kantor", 
       x = "Actual Kas Kantor", 
       y = "Prediction Kas Kantor")
plot(p1)

Kas echannel

p2<- ggplot(data = baru_df,aes(x = actual_kas_echannel, y = prediction_kas_echannel)) + geom_point()+
  labs(title = "ETS Kas echannel", 
       x = "Actual Kas echannel", 
       y = "Prediction Kas echannel")
plot(p2)

6. Recommendation

Memerlukan analisa dengan metode time series lainnya untuk prediksi kas echannel yang lebih signifikan

R Markdown

Final Project

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.