Download Data

library(knitr)
baba=read.csv("C:/Users/jfbia/OneDrive/Documents/predictive analytics/discussions/BABA.csv", header = TRUE)
baba <- ts(baba[,2], start=c(2018,10,27), end = c(2020,10,27),frequency=250)

plot(baba)

Trend Fit And Plot

library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3
## Warning: package 'forecast' was built under R version 4.0.3
## Warning: package 'fma' was built under R version 4.0.3
## Warning: package 'expsmooth' was built under R version 4.0.3
## 
trend_model=tslm(baba~trend)
fcast<-forecast(trend_model)

#Plot actual and fitted
autoplot(baba, series="Data") +
  autolayer(fitted(trend_model), series="Fitted") +
  autolayer(fcast, series="Forecast") +
  xlab("Year") + ylab("Price ($)") +
  ggtitle("Baba Linear Prediction") +
  guides(colour=guide_legend(title=" "))

Nonlinear Trend Fit And Plot, clearly superior but risks overfitting

trend_nonlinear_model=tslm(baba~trend+I(trend^2)+I(trend^3), data=baba)
fcast<-forecast(trend_nonlinear_model)

#Plot actual and fitted
autoplot(baba, series="Data") +
  autolayer(fitted(trend_nonlinear_model), series="Fitted") +
  autolayer(fcast, series="Forecast") +
  xlab("Year") + ylab("Price") +
  ggtitle("Baba Non-Linear Prediction") +
  guides(colour=guide_legend(title=" "))

#head(baba.xts)

Check Residuals, nonlinear model gets rid of increasing residuals at end of plot. Clearly, there is autocorrelation in bothhowever

checkresiduals(trend_model)

## 
##  Breusch-Godfrey test for serial correlation of order up to 100
## 
## data:  Residuals from Linear regression model
## LM test = 483.05, df = 100, p-value < 2.2e-16
checkresiduals(trend_nonlinear_model)

## 
##  Breusch-Godfrey test for serial correlation of order up to 100
## 
## data:  Residuals from Linear regression model
## LM test = 463.76, df = 100, p-value < 2.2e-16

Check Residuals, nonlinear model outperforms on RMSE

accuracy(trend_model)
##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -2.092042e-15 19.88282 16.71296 -0.7724323 8.444423 0.2808105
##                   ACF1
## Training set 0.9698372
accuracy(trend_nonlinear_model)
##                         ME     RMSE     MAE        MPE     MAPE      MASE
## Training set -1.700147e-15 13.64912 11.1408 -0.5047621 5.853373 0.1871873
##                   ACF1
## Training set 0.9509385