library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v forecast 8.15 v expsmooth 2.3
## v fma 2.4
##
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(timetk)
## Registered S3 method overwritten by 'tune':
## method from
## required_pkgs.model_spec parsnip
library(imputeTS)
library(TSstudio)
data3<- readxl::read_excel("C:\\Users\\malia\\Downloads\\Data Set for Class (1).xls")
datas03 <- data3 %>% filter(category == "S03")
datas03<- data3 %>% select(c(SeriesInd, Var05, Var07))
datas03 %>%
ggplot(aes(x = SeriesInd, y = Var05)) +
geom_line() +
ggtitle("S03Var05")
datas03 %>%
ggplot(aes(x = SeriesInd, y = Var07)) +
geom_line() +
ggtitle("S03Var07")
### The plot for var05 and var07 looks identical with only one outlier value. Next I am going to be explore missing values.
colSums(is.na(datas03))
## SeriesInd Var05 Var07
## 0 26 26
na_mean(datas03)
## # A tibble: 9,732 x 3
## SeriesInd Var05 Var07
## <dbl> <dbl> <dbl>
## 1 40669 30.5 30.6
## 2 40669 10.2 10.3
## 3 40669 26.2 26.0
## 4 40669 27.0 27.3
## 5 40669 68.7 69.2
## 6 40669 16.9 17.1
## 7 40670 30.7 30.6
## 8 40670 10.4 11.0
## 9 40670 26.0 25.9
## 10 40670 27.3 28.1
## # ... with 9,722 more rows
#datas03 <- log(datas03)
tsdata5<-tk_ts(datas03$Var05)
tsdata6<-tk_ts(datas03$Var07)
gglagplot(tsdata5)
gglagplot(tsdata6)
ggAcf(tsdata5)
ggAcf(tsdata6)
ggPacf(tsdata5)
ggPacf(tsdata6)
### Lag plot and correlogram show that data is strongly autocorelated. Some seasonality is observed in the data. ### Next we are going to split our dataset and will apply ARIMA and Exponential Smooth Modeling.
tsdata5split <- ts_split((tsdata5), sample.out = 200)
tsdata6split <- ts_split((tsdata6), sample.out = 200)
train5 <- tsdata5split$train
test5 <- tsdata5split $test
train6 <- tsdata6split$train
test6 <- tsdata6split$test
fcses5 <- ses(train5, h = 140)
## Warning in ets(x, "ANN", alpha = alpha, opt.crit = "mse", lambda = lambda, :
## Missing values encountered. Using longest contiguous portion of time series
accuracy(fcses5,tsdata5)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.57402777 24.75081 20.43633 -50.39051 79.02222 0.5594048
## Test set 0.07614851 25.65711 21.80439 -46.19999 73.73257 0.5968526
## ACF1 Theil's U
## Training set -0.5514336 NA
## Test set -0.4837395 0.5787201
autoplot(fcses5)
### Exponential Smoothing model for variable 7
fcses6 <- ses(train6, h = 140)
## Warning in ets(x, "ANN", alpha = alpha, opt.crit = "mse", lambda = lambda, :
## Missing values encountered. Using longest contiguous portion of time series
accuracy(fcses6,tsdata6)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5720487 24.74892 20.43505 -50.44913 79.07348 0.5591979
## Test set 0.1043134 25.64183 21.76768 -46.03134 73.55556 0.5956649
## ACF1 Theil's U
## Training set -0.5516164 NA
## Test set -0.4833601 0.5780955
autoplot(fcses6)
### I am going to explore ARIMA model next.
arimav5 <- auto.arima(train5)
fcarima5 <- forecast(arimav5 , h=140)
autoplot(fcarima5)
arimav6 <- auto.arima(train6)
fcarima6 <- forecast(arimav6 , h=140)
autoplot(fcarima6)
accuracy(fcarima5,tsdata5)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.002135023 15.52192 12.67084 -4.776052 36.58413 0.3128434
## Test set -1.841830102 27.74783 23.41991 -55.561995 79.02190 0.5782382
## ACF1 Theil's U
## Training set -0.1849434 NA
## Test set -0.6853825 0.3233927
accuracy(fcses5,tsdata5)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.57402777 24.75081 20.43633 -50.39051 79.02222 0.5594048
## Test set 0.07614851 25.65711 21.80439 -46.19999 73.73257 0.5968526
## ACF1 Theil's U
## Training set -0.5514336 NA
## Test set -0.4837395 0.5787201
accuracy(fcarima6,tsdata6)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.00239195 15.46666 12.65244 -4.67112 36.62513 0.3122971
## Test set -1.98810286 27.76886 23.48834 -56.01707 79.40023 0.5797568
## ACF1 Theil's U
## Training set -0.1871857 NA
## Test set -0.6850078 0.321962
accuracy(fcses6,tsdata6)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5720487 24.74892 20.43505 -50.44913 79.07348 0.5591979
## Test set 0.1043134 25.64183 21.76768 -46.03134 73.55556 0.5956649
## ACF1 Theil's U
## Training set -0.5516164 NA
## Test set -0.4833601 0.5780955
#write.csv(fcses5, 's03_v05_forecast_Maliat.csv')
#write.csv(fcses6, 's03_v07_forecast_Maliat.csv')