This report is produce for Final Project on Data Science Course-net. Predict data kas kantor and kas echannel for next 30 days using Exponential Time Series model. Dataset from Internal Bank B.
Report Outline
1. Data Extraction
2. Exploratory Data Analysis
3. Data Preparation
4. Modelling
5. Evaluation
6. Recommendation
for the first time, make sure to clear all variables in workspace also install all package and load.
Extract data in csv format into dataframe in R.
cash_df <- read.csv("Data/train.csv")
Create data frame using date or periode
cash_df$date <- as.Date(cash_df$periode)
Date is the new variable. Using for prediction time series
library(ggplot2)
library(gridExtra)
plot1 <- ggplot(cash_df, aes(x=date, y=kas_kantor)) +
geom_line()
plot2 <- ggplot(cash_df, aes(x=date, y=kas_echannel)) +
geom_line()
grid.arrange(plot1, plot2, nrow = 2)
There is a trend in kas kantor plot (plot1) but irregular in kas echannel plot (plot2)
require(xts)
kas_kantor_ts <- xts(x = cash_df$kas_kantor,
order.by = cash_df$date)
plot(kas_kantor_ts)
kas_echannel_ts <- xts(x = cash_df$kas_echannel,
order.by = cash_df$date)
plot(kas_echannel_ts)
library(fpp2)
fit_kas_kantor <- ets(kas_kantor_ts)
print(summary(fit_kas_kantor))
## ETS(A,A,N)
##
## Call:
## ets(y = kas_kantor_ts)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.0055
##
## Initial states:
## l = 81472379855.4238
## b = 37377673234.1305
##
## sigma: 72630360475
##
## AIC AICc BIC
## 22124.47 22124.63 22144.37
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -4521598053 72261676061 50625123630 -15.59676 17.51278 1.212806
## ACF1
## Training set 0.03729902
## ME RMSE MAE MPE MAPE MASE
## Training set -4521598053 72261676061 50625123630 -15.59676 17.51278 1.212806
## ACF1
## Training set 0.03729902
checkresiduals(fit_kas_kantor)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,N)
## Q* = 74.775, df = 6, p-value = 4.274e-14
##
## Model df: 4. Total lags used: 10
plot(fit_kas_kantor)
fit_kas_echannel <- ets(kas_echannel_ts)
print(summary(fit_kas_echannel))
## ETS(A,N,N)
##
## Call:
## ets(y = kas_echannel_ts)
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 3391844996.5115
##
## sigma: 446604475
##
## AIC AICc BIC
## 18100.23 18100.29 18112.17
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4601727 445472396 309886535 -0.614285 8.548342 1.00119 0.1355351
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4601727 445472396 309886535 -0.614285 8.548342 1.00119 0.1355351
checkresiduals(fit_kas_echannel)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,N)
## Q* = 140.84, df = 8, p-value < 2.2e-16
##
## Model df: 2. Total lags used: 10
plot(fit_kas_echannel)
predict_kas_kantor <- forecast(fit_kas_kantor, 30)
plot(predict_kas_kantor,
main = "Prediksi Kas Kantor BANK",
xlab = "Tanggal",
ylab = "Kas (Rupiah)")
predict_kas_echannel <- forecast(fit_kas_echannel, 30)
plot(predict_kas_echannel,
main = "Prediksi Kas E-Channel BANK",
xlab = "Tanggal",
ylab = "Kas (Rupiah)")
test <- read.csv("Data/test.csv")
actual_kas_kantor <- test$kas_kantor
print(summary(actual_kas_kantor))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.057e+13 1.086e+13 1.100e+13 1.103e+13 1.112e+13 1.141e+13
actual_kas_echannel <- test$kas_echannel
print(summary(actual_kas_echannel))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.667e+09 5.293e+09 5.691e+09 5.629e+09 5.899e+09 6.923e+09
prediction_kas_kantor <- predict_kas_kantor$mean
print(summary(prediction_kas_kantor))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.055e+13 1.075e+13 1.095e+13 1.095e+13 1.115e+13 1.135e+13
prediction_kas_echannel <- predict_kas_echannel$mean
print(summary(prediction_kas_echannel))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.209e+09 5.209e+09 5.209e+09 5.209e+09 5.209e+09 5.209e+09
baru_df <- data.frame(actual_kas_echannel, actual_kas_kantor, prediction_kas_echannel, prediction_kas_kantor)
performance <- function(prediction, actual, method){
e <- prediction - actual
se <- e^2
sse <- sum(se)
mse <- mean(se)
rmse <- sqrt(mse)
r <- cor(prediction,actual)
result <- paste("==Method Name:",method,
"\nRoot Mean Square Error (RMSE) = ", round(rmse,2),
"\nCorrelation Coefficient (R) =", round(r,2) )
cat(result)
}
performance(baru_df$prediction_kas_kantor, baru_df$actual_kas_kantor, 'ETS')
## ==Method Name: ETS
## Root Mean Square Error (RMSE) = 111719530147.26
## Correlation Coefficient (R) = 0.95
performance(baru_df$prediction_kas_echannel, baru_df$actual_kas_echannel, 'ETS')
## Warning in cor(prediction, actual): the standard deviation is zero
## ==Method Name: ETS
## Root Mean Square Error (RMSE) = 665225246.28
## Correlation Coefficient (R) = NA
p1<- ggplot(data = baru_df,aes(x = actual_kas_kantor, y = prediction_kas_kantor)) + geom_point()+
labs(title = "ETS Kas Kantor",
x = "Actual Kas Kantor",
y = "Prediction Kas Kantor")
plot(p1)
p2<- ggplot(data = baru_df,aes(x = actual_kas_echannel, y = prediction_kas_echannel)) + geom_point()+
labs(title = "ETS Kas echannel",
x = "Actual Kas echannel",
y = "Prediction Kas echannel")
plot(p2)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.