Chapter 1. Dealing with Dataset

1.1. Importing Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(prophet)
## Loading required package: Rcpp
## Loading required package: rlang
## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
library(imputeTS)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

1.2. Loading the Dataset

amazon <- read.csv("C:/Users/nazir ali khan/Downloads/Amazon.csv", stringsAsFactors=TRUE)
View(amazon)

1.3. Removing unnecessary columns

amazon <- amazon %>% select(Date, Close)
head(amazon)
##         Date    Close
## 1 1997-05-15 1.958333
## 2 1997-05-16 1.729167
## 3 1997-05-19 1.708333
## 4 1997-05-20 1.635417
## 5 1997-05-21 1.427083
## 6 1997-05-22 1.395833
tail(amazon)
##            Date   Close
## 6150 2021-10-20 3415.06
## 6151 2021-10-21 3435.01
## 6152 2021-10-22 3335.55
## 6153 2021-10-25 3320.37
## 6154 2021-10-26 3376.07
## 6155 2021-10-27 3396.19

1.4. Dealing with Data Types

amazon$Date <- ymd(amazon$Date)
glimpse(amazon)
## Rows: 6,155
## Columns: 2
## $ Date  <date> 1997-05-15, 1997-05-16, 1997-05-19, 1997-05-20, 1997-05-21, 199…
## $ Close <dbl> 1.958333, 1.729167, 1.708333, 1.635417, 1.427083, 1.395833, 1.50…

Chapter 2. Dealing with Time Series

2.1. Dealing with Missing Time Series

# Creating a complete date sequence covering the desired range
complete_dates <- seq(min(amazon$Date), max(amazon$Date), by = "days")

# Merging the complete date sequence with the original data
complete_amazon <- merge(amazon, data.frame(Date = complete_dates), all = TRUE)

# Sorting the data by date to ensure it is in the correct order
amazon <- complete_amazon[order(complete_amazon$Date), ]

# Performing linear interpolation to fill missing values
amazon_na <- na_interpolation(amazon, option = "linear")
sum(is.na(amazon_na)) # checking if NAs have been removed
## [1] 0

2.2. Preparing the Columns for Modelling

# Converting Column names into "ds" and "y" because Prophet takes dataframe into ds and y
colnames(amazon_na) <- c("ds", "y")

Chapter 3. Prophet Model & Forecasting

3.1. Creating the Model

model_prophet <- prophet(amazon_na)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

3.2. Forecasting the Stock Price for Next 5 Years

future_data <- make_future_dataframe(model_prophet, periods = 1875) # making a dataset of future dates
forecase_future <- predict(model_prophet, future_data)

3.3. Plotting the Outcomes

dyplot.prophet(model_prophet, forecase_future)
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `select()` instead.
## ℹ The deprecated feature was likely used in the prophet package.
##   Please report the issue at <https://github.com/facebook/prophet/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
prophet_plot_components(model_prophet, forecase_future)