Chapter 1. Dealing with Dataset
1.1. Importing Libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(prophet)
## Loading required package: Rcpp
## Loading required package: rlang
##
## Attaching package: 'rlang'
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
library(imputeTS)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
1.2. Loading the Dataset
amazon <- read.csv("C:/Users/nazir ali khan/Downloads/Amazon.csv", stringsAsFactors=TRUE)
View(amazon)
1.3. Removing unnecessary columns
amazon <- amazon %>% select(Date, Close)
head(amazon)
## Date Close
## 1 1997-05-15 1.958333
## 2 1997-05-16 1.729167
## 3 1997-05-19 1.708333
## 4 1997-05-20 1.635417
## 5 1997-05-21 1.427083
## 6 1997-05-22 1.395833
tail(amazon)
## Date Close
## 6150 2021-10-20 3415.06
## 6151 2021-10-21 3435.01
## 6152 2021-10-22 3335.55
## 6153 2021-10-25 3320.37
## 6154 2021-10-26 3376.07
## 6155 2021-10-27 3396.19
1.4. Dealing with Data Types
amazon$Date <- ymd(amazon$Date)
glimpse(amazon)
## Rows: 6,155
## Columns: 2
## $ Date <date> 1997-05-15, 1997-05-16, 1997-05-19, 1997-05-20, 1997-05-21, 199…
## $ Close <dbl> 1.958333, 1.729167, 1.708333, 1.635417, 1.427083, 1.395833, 1.50…
Chapter 2. Dealing with Time Series
2.1. Dealing with Missing Time Series
# Creating a complete date sequence covering the desired range
complete_dates <- seq(min(amazon$Date), max(amazon$Date), by = "days")
# Merging the complete date sequence with the original data
complete_amazon <- merge(amazon, data.frame(Date = complete_dates), all = TRUE)
# Sorting the data by date to ensure it is in the correct order
amazon <- complete_amazon[order(complete_amazon$Date), ]
# Performing linear interpolation to fill missing values
amazon_na <- na_interpolation(amazon, option = "linear")
sum(is.na(amazon_na)) # checking if NAs have been removed
## [1] 0
2.2. Preparing the Columns for Modelling
# Converting Column names into "ds" and "y" because Prophet takes dataframe into ds and y
colnames(amazon_na) <- c("ds", "y")
Chapter 3. Prophet Model & Forecasting
3.1. Creating the Model
model_prophet <- prophet(amazon_na)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
3.2. Forecasting the Stock Price for Next 5 Years
future_data <- make_future_dataframe(model_prophet, periods = 1875) # making a dataset of future dates
forecase_future <- predict(model_prophet, future_data)
3.3. Plotting the Outcomes
dyplot.prophet(model_prophet, forecase_future)
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `select()` instead.
## ℹ The deprecated feature was likely used in the prophet package.
## Please report the issue at <https://github.com/facebook/prophet/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
prophet_plot_components(model_prophet, forecase_future)
