Remove current environment and load the packages required

rm(list = ls())

# load necessary libraries
library(ggplot2)
library(tseries)
library(forecast)
library(kableExtra)
library(dplyr)

1.0 Data Set

Read data in R using the below function

# chose where you store the data
riders=read.csv(file.choose())


# convert it into dataframe
as.data.frame(riders)

Assign it to a variable name

my_ts= ts(riders$Hours, start=c(1903), end = c(2019), frequency=12)

2.0 Plot

Plot time series plot to check the pattern in data. For this part, we use the time completed (number of hours)

ts.plot(my_ts,xlab = "Year", ylab= "Number of hours",
        main = "Bicyle Forecasting")

## Split data into train and test set
train.my_ts = window(my_ts, start=c(1903), end = c(2018), frequency=12)
test.my_ts = window(my_ts, start=c(2019), frequency=12)
## Plotting the train and Test set 
autoplot(train.my_ts, series="Train") +
  autolayer(test.my_ts, series="Test") +
  ggtitle("number of hours  Traning and Test data") +
  xlab("Year") + ylab("Hours") +
  guides(colour=guide_legend(title="Forecast"))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

par(mfrow=c(1,2))

3.0 Use ARIMA

Fit arimamodel by using machine learning method

man.arima = arima(train.my_ts, order = c(1,1,1), seasonal = c(1,1,1), method = 'ML')
man.arima
## 
## Call:
## arima(x = train.my_ts, order = c(1, 1, 1), seasonal = c(1, 1, 1), method = "ML")
## 
## Coefficients:
##          ar1      ma1     sar1     sma1
##       0.6056  -0.7543  -0.0404  -0.9569
## s.e.  0.1087   0.0909   0.0320   0.0138
## 
## sigma^2 estimated as 75.47:  log likelihood = -4213.18,  aic = 8436.37

Plotting the forecast of manual arima for 12 advance periods

plot(forecast(man.arima, h=12), shadecols = "oldstyle")

Now we find the accuracy of the manual arima model

accuracy(forecast(man.arima, 24), test.my_ts)
##                      ME     RMSE      MAE       MPE      MAPE      MASE
## Training set -0.1543339 8.639480 2.971727       NaN       Inf 0.2738655
## Test set      1.1665138 1.166514 1.166514 0.7725257 0.7725257 0.1075024
##                     ACF1
## Training set -0.03052486
## Test set              NA

Fit auto arima for comparison

auto.fit = auto.arima(train.my_ts, trace = F, seasonal = T)
auto.fit
## Series: train.my_ts 
## ARIMA(2,1,0) 
## 
## Coefficients:
##           ar1      ar2
##       -0.1572  -0.0731
## s.e.   0.0290   0.0289
## 
## sigma^2 estimated as 75.06:  log likelihood=-4235.86
## AIC=8477.72   AICc=8477.74   BIC=8493.41
plot(forecast(auto.fit, h=12), ylab = "Number of Hours", xlab = "Year")

Accuracy of the Auto arima model, pick the best one

accuracy(forecast(auto.fit, 24), test.my_ts) 
##                      ME     RMSE      MAE       MPE      MAPE       MASE
## Training set -0.1302523 8.652955 1.940939       NaN       Inf 0.17887114
## Test set      1.0000000 1.000000 1.000000 0.6622517 0.6622517 0.09215702
##                      ACF1
## Training set -0.005076334
## Test set               NA

Tadaaa you can now choose which one is better.