Chapter 1. Dealing with Dataset
1.1. Importing Libraries
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(imputeTS)
##
## Attaching package: 'imputeTS'
##
## The following object is masked from 'package:tseries':
##
## na.remove
1.2. Loading the Dataset
amazon <- read.csv("C:/Users/nazir ali khan/Downloads/Amazon.csv", stringsAsFactors=TRUE)
View(amazon)
1.3. Removing unnecessary columns
amazon <- amazon %>% select(Date, Close)
head(amazon)
## Date Close
## 1 1997-05-15 1.958333
## 2 1997-05-16 1.729167
## 3 1997-05-19 1.708333
## 4 1997-05-20 1.635417
## 5 1997-05-21 1.427083
## 6 1997-05-22 1.395833
tail(amazon)
## Date Close
## 6150 2021-10-20 3415.06
## 6151 2021-10-21 3435.01
## 6152 2021-10-22 3335.55
## 6153 2021-10-25 3320.37
## 6154 2021-10-26 3376.07
## 6155 2021-10-27 3396.19
1.4. Dealing with Data types
amazon$Date <- ymd(amazon$Date)
glimpse(amazon)
## Rows: 6,155
## Columns: 2
## $ Date <date> 1997-05-15, 1997-05-16, 1997-05-19, 1997-05-20, 1997-05-21, 199…
## $ Close <dbl> 1.958333, 1.729167, 1.708333, 1.635417, 1.427083, 1.395833, 1.50…
Chapter 2. Dealing with Time Series
2.1. Dealing with Missing Time Series
# Creating a complete date sequence covering the desired range
complete_dates <- seq(min(amazon$Date), max(amazon$Date), by = "days")
# Merging the complete date sequence with the original data
complete_amazon <- merge(amazon, data.frame(Date = complete_dates), all = TRUE)
# Sorting the data by date to ensure it is in the correct order
amazon <- complete_amazon[order(complete_amazon$Date), ]
View(amazon)
# Performing linear interpolation to fill missing values
amazon_na <- na_interpolation(amazon, option = "linear")
View(amazon_na)
2.2. Converting Dataframe into Time Series
class(amazon_na)
## [1] "data.frame"
ts_amazon <- ts(amazon_na$Close, start = c(1997, 5, 15), frequency = 365)
class(ts_amazon)
## [1] "ts"
2.3. Plotting the Time Series to check for stationary
plot.ts(ts_amazon)

adf.test(ts_amazon)
## Warning in adf.test(ts_amazon): p-value greater than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ts_amazon
## Dickey-Fuller = 0.76791, Lag order = 20, p-value = 0.99
## alternative hypothesis: stationary
2.4. Making the Time Series stationary
ld_close <- diff(log(ts_amazon))
plot.ts(ld_close)

2.5. Again Plotting the Time Series to check for stationary
acf_plot <- acf(ld_close)

pacf_plot <- pacf(ld_close)

Chapter 3. ARIMA Model
3.1. Creating the ARIMA Model
auto.arima(ld_close) # finding out the best fit model
## Series: ld_close
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.0743 8e-04
## s.e. 0.0106 3e-04
##
## sigma^2 = 0.0007366: log likelihood = 19539.9
## AIC=-39073.81 AICc=-39073.81 BIC=-39052.52
arima_model <- arima(ld_close, order = c(1,0,0)) # 1,0,0 is the best model
3.2. Forecasting for 5 years (365 days * 5)
forecast_amazon <- forecast(arima_model, h = 1825)
plot(forecast_amazon)

3.3. Checking the Accuracy of the Model
accuracy(arima_model)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.917987e-07 0.0271382 0.01550335 NaN Inf 0.7908129 -0.001232615