library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v forecast 8.15 v expsmooth 2.3
## v fma 2.4
##
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(timetk)
## Registered S3 method overwritten by 'tune':
## method from
## required_pkgs.model_spec parsnip
library(imputeTS)
data<- readxl::read_excel("C:\\Users\\malia\\Downloads\\Data Set for Class.xls")
datas01 <- data %>% filter(category == "S01")
datas01<- data %>% select(c(SeriesInd, Var01, Var02))
datas01 %>%
ggplot(aes(x = SeriesInd, y = Var01)) +
geom_line() +
ggtitle("S01Var01")
datas01 %>%
ggplot(aes(x = SeriesInd, y = Var02)) +
geom_line() +
ggtitle("S01Var02")
### We can observe a upward trend in Var01 and a downward trend in Var02.Furthermore we will explore missing values and impute our missing data.Seasonality is not observed in the data.
colSums(is.na(datas01))
## SeriesInd Var01 Var02
## 0 14 2
na_mean(datas01)
## # A tibble: 9,732 x 3
## SeriesInd Var01 Var02
## <dttm> <dbl> <dbl>
## 1 2011-05-06 00:00:00 30.6 123432400
## 2 2011-05-06 00:00:00 10.3 60855800
## 3 2011-05-06 00:00:00 26.6 10369300
## 4 2011-05-06 00:00:00 27.5 39335700
## 5 2011-05-06 00:00:00 69.3 27809100
## 6 2011-05-06 00:00:00 17.2 16587400
## 7 2011-05-07 00:00:00 30.8 150476200
## 8 2011-05-07 00:00:00 11.2 215620200
## 9 2011-05-07 00:00:00 26.3 10943800
## 10 2011-05-07 00:00:00 28.2 55416000
## # ... with 9,722 more rows
tsdata1<-tk_ts(datas01$Var01)
tsdata2<-tk_ts(datas01$Var02)
autoplot(tsdata1) + ggtitle("S01Var01") +
xlab("Time") + ylab("Variables")
autoplot(tsdata2) + ggtitle("S01Var02") +
xlab("Time") + ylab("Variables")
### Lag Plots
gglagplot(tsdata1)
gglagplot(tsdata2)
ggAcf(tsdata1)
ggAcf(tsdata2)
### The lag plot and correlogram from v01 shows some seasonality. However, seasonality is not visible in lag plot and in correlogram for variable 2
# Create the training data as train
train1 <- subset(tsdata1, end = 2015)
# Compute naive forecasts and save to naive_fc
naive_fc1 <- naive(train1, h = 140)
# Compute mean forecasts and save to mean_fc
mean_fc1 <- meanf(train1, h = 140)
# Use accuracy() to compute RMSE statistics
accuracy(naive_fc1, tsdata1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0278873 32.57380 27.36840 -59.4440 117.9316 1.000290
## Test set -48.9826113 54.10544 48.98261 -221.2453 221.2453 1.790269
## ACF1 Theil's U
## Training set -0.7334223 NA
## Test set -0.5507803 1.48193
accuracy(mean_fc1, tsdata1)
## ME RMSE MAE MPE MAPE MASE
## Training set -3.679629e-16 19.73002 15.19516 -36.10575 59.72449 0.5553692
## Test set 4.637308e+00 23.44370 17.71282 -22.75315 55.30648 0.6473871
## ACF1 Theil's U
## Training set -0.3637759 NA
## Test set -0.5507803 0.6355887
# Create the training data as train
train2 <- subset(tsdata2, end = 2015)
# Compute naive forecasts and save to naive_fc
naive_fc2 <- naive(train2, h = 140)
# Compute mean forecasts and save to mean_fc
mean_fc2 <- meanf(train2, h = 140)
# Use accuracy() to compute RMSE statistics
accuracy(naive_fc2, tsdata2)
## ME RMSE MAE MPE MAPE MASE
## Training set -52168.55 79719685 53822436 -114.12255 167.80715 0.9997293
## Test set 21416134.29 37625116 25284019 12.40691 61.05844 0.4696401
## ACF1 Theil's U
## Training set -0.2995095 NA
## Test set 0.1479567 0.8451807
accuracy(mean_fc2, tsdata2)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.413029e-09 59341260 43257890 -123.7513 152.6358 0.8034973
## Test set -1.610240e+07 34875290 30731508 -166.0340 180.9195 0.5708250
## ACF1 Theil's U
## Training set 0.09759004 NA
## Test set 0.14795674 0.9964229
fcses1 <- ses(train1, h = 140)
## Warning in ets(x, "ANN", alpha = alpha, opt.crit = "mse", lambda = lambda, :
## Missing values encountered. Using longest contiguous portion of time series
accuracy(fcses1,tsdata1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.7224665 19.50435 14.87799 -32.04563 56.78346 0.5389065
## Test set -1.7481373 23.04688 19.20306 -46.39102 70.22276 0.6955681
## ACF1 Theil's U
## Training set -0.4265213 NA
## Test set -0.5507803 0.5531524
autoplot(fcses1)
### Exponential Smoothing Method for Variable 2
fcses2 <- ses(train2, h = 140)
## Warning in ets(x, "ANN", alpha = alpha, opt.crit = "mse", lambda = lambda, :
## Missing values encountered. Using longest contiguous portion of time series
accuracy(fcses2,tsdata2)
## ME RMSE MAE MPE MAPE MASE
## Training set -890607.1 56828040 42008410 -117.2648 146.0612 0.8010105
## Test set -5310036.0 31387816 26262183 -114.7047 138.1582 0.5007636
## ACF1 Theil's U
## Training set 0.06212235 NA
## Test set 0.14795674 0.7725387
autoplot(fcses2)
### ARIMA Modelling: ### ARIMA modeling is applied to variable 1
arimav1 <- auto.arima(train1)
fcarima1 <- forecast(arimav1 , h=140)
autoplot(fcarima1)
accuracy(fcarima1,tsdata1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01184683 12.59879 9.570145 -7.410071 33.05340 0.3497800
## Test set -2.04214650 22.34719 18.745518 -45.356899 68.64091 0.6851314
## ACF1 Theil's U
## Training set -0.1096318 NA
## Test set -0.5544047 0.5387595
arimav2 <- auto.arima(train2)
fcarima2 <- forecast(arimav2 , h=140)
autoplot(fcarima2)
accuracy(fcarima2,tsdata2)
## ME RMSE MAE MPE MAPE MASE
## Training set -81637.22 67656455 46615997 -97.629114 132.95304 0.8658727
## Test set 20727196.88 37186514 24984596 9.270364 61.72749 0.4640785
## ACF1 Theil's U
## Training set -0.01773849 NA
## Test set 0.15069241 0.8307772
#write.csv(fcarima1, 's01_v051_forecast.csv')
#write.csv(fcses2, 's01_v02_forecast.csv')