Chapter 1. Dealing with Dataset

1.1. Importing Libraries

library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tseries)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr   1.1.2     ✔ readr   2.1.4
## ✔ forcats 1.0.0     ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2     ✔ tibble  3.2.1
## ✔ purrr   1.0.1     ✔ tidyr   1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(imputeTS)
## 
## Attaching package: 'imputeTS'
## 
## The following object is masked from 'package:tseries':
## 
##     na.remove

1.2. Loading the Dataset

amazon <- read.csv("C:/Users/nazir ali khan/Downloads/Amazon.csv", stringsAsFactors=TRUE)
View(amazon)

1.3. Removing unnecessary columns

amazon <- amazon %>% select(Date, Close)
head(amazon)
##         Date    Close
## 1 1997-05-15 1.958333
## 2 1997-05-16 1.729167
## 3 1997-05-19 1.708333
## 4 1997-05-20 1.635417
## 5 1997-05-21 1.427083
## 6 1997-05-22 1.395833
tail(amazon)
##            Date   Close
## 6150 2021-10-20 3415.06
## 6151 2021-10-21 3435.01
## 6152 2021-10-22 3335.55
## 6153 2021-10-25 3320.37
## 6154 2021-10-26 3376.07
## 6155 2021-10-27 3396.19

1.4. Dealing with Data types

amazon$Date <- ymd(amazon$Date)
glimpse(amazon)
## Rows: 6,155
## Columns: 2
## $ Date  <date> 1997-05-15, 1997-05-16, 1997-05-19, 1997-05-20, 1997-05-21, 199…
## $ Close <dbl> 1.958333, 1.729167, 1.708333, 1.635417, 1.427083, 1.395833, 1.50…

Chapter 2. Dealing with Time Series

2.1. Dealing with Missing Time Series

# Creating a complete date sequence covering the desired range
complete_dates <- seq(min(amazon$Date), max(amazon$Date), by = "days")

# Merging the complete date sequence with the original data
complete_amazon <- merge(amazon, data.frame(Date = complete_dates), all = TRUE)

# Sorting the data by date to ensure it is in the correct order
amazon <- complete_amazon[order(complete_amazon$Date), ]
View(amazon)

# Performing linear interpolation to fill missing values
amazon_na <- na_interpolation(amazon, option = "linear")
View(amazon_na)

2.2. Converting Dataframe into Time Series

class(amazon_na)
## [1] "data.frame"
ts_amazon <- ts(amazon_na$Close, start = c(1997, 5, 15), frequency = 365)
class(ts_amazon)
## [1] "ts"

2.3. Plotting the Time Series to check for stationary

plot.ts(ts_amazon)

adf.test(ts_amazon)
## Warning in adf.test(ts_amazon): p-value greater than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_amazon
## Dickey-Fuller = 0.76791, Lag order = 20, p-value = 0.99
## alternative hypothesis: stationary

2.4. Making the Time Series stationary

ld_close <- diff(log(ts_amazon))
plot.ts(ld_close)

2.5. Again Plotting the Time Series to check for stationary

acf_plot <- acf(ld_close)

pacf_plot <- pacf(ld_close)

Chapter 3. ARIMA Model

3.1. Creating the ARIMA Model

auto.arima(ld_close) # finding out the best fit model
## Series: ld_close 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1   mean
##       0.0743  8e-04
## s.e.  0.0106  3e-04
## 
## sigma^2 = 0.0007366:  log likelihood = 19539.9
## AIC=-39073.81   AICc=-39073.81   BIC=-39052.52
arima_model <- arima(ld_close, order = c(1,0,0)) # 1,0,0 is the best model

3.2. Forecasting for 5 years (365 days * 5)

forecast_amazon <- forecast(arima_model, h = 1825)
plot(forecast_amazon)

3.3. Checking the Accuracy of the Model

accuracy(arima_model)
##                         ME      RMSE        MAE MPE MAPE      MASE         ACF1
## Training set -1.917987e-07 0.0271382 0.01550335 NaN  Inf 0.7908129 -0.001232615