rm(list = ls())
# load necessary libraries
library(ggplot2)
library(tseries)
library(forecast)
library(kableExtra)
library(dplyr)
https://www.kaggle.com/syahminormokles/tour-de-france-1930-2020-data
# chose where you store the data
riders=read.csv(file.choose())
# convert it into dataframe
as.data.frame(riders)
my_ts= ts(riders$Hours, start=c(1903), end = c(2019), frequency=12)
ts.plot(my_ts,xlab = "Year", ylab= "Number of hours",
main = "Bicyle Forecasting")
## Split data into train and test set
train.my_ts = window(my_ts, start=c(1903), end = c(2018), frequency=12)
test.my_ts = window(my_ts, start=c(2019), frequency=12)
## Plotting the train and Test set
autoplot(train.my_ts, series="Train") +
autolayer(test.my_ts, series="Test") +
ggtitle("number of hours Traning and Test data") +
xlab("Year") + ylab("Hours") +
guides(colour=guide_legend(title="Forecast"))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
par(mfrow=c(1,2))
man.arima = arima(train.my_ts, order = c(1,1,1), seasonal = c(1,1,1), method = 'ML')
man.arima
##
## Call:
## arima(x = train.my_ts, order = c(1, 1, 1), seasonal = c(1, 1, 1), method = "ML")
##
## Coefficients:
## ar1 ma1 sar1 sma1
## 0.6056 -0.7543 -0.0404 -0.9569
## s.e. 0.1087 0.0909 0.0320 0.0138
##
## sigma^2 estimated as 75.47: log likelihood = -4213.18, aic = 8436.37
plot(forecast(man.arima, h=12), shadecols = "oldstyle")
accuracy(forecast(man.arima, 24), test.my_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1543339 8.639480 2.971727 NaN Inf 0.2738655
## Test set 1.1665138 1.166514 1.166514 0.7725257 0.7725257 0.1075024
## ACF1
## Training set -0.03052486
## Test set NA
auto.fit = auto.arima(train.my_ts, trace = F, seasonal = T)
auto.fit
## Series: train.my_ts
## ARIMA(2,1,0)
##
## Coefficients:
## ar1 ar2
## -0.1572 -0.0731
## s.e. 0.0290 0.0289
##
## sigma^2 estimated as 75.06: log likelihood=-4235.86
## AIC=8477.72 AICc=8477.74 BIC=8493.41
plot(forecast(auto.fit, h=12), ylab = "Number of Hours", xlab = "Year")
accuracy(forecast(auto.fit, 24), test.my_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1302523 8.652955 1.940939 NaN Inf 0.17887114
## Test set 1.0000000 1.000000 1.000000 0.6622517 0.6622517 0.09215702
## ACF1
## Training set -0.005076334
## Test set NA