# loading required libraries
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(zoo)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(stats)
library(forecast)
library(astsa)
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
library(corrplot)
## corrplot 0.92 loaded
library(AER)
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: lmtest
## Loading required package: sandwich
## Loading required package: survival
library(vars)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: strucchange
## Loading required package: urca
library(dynlm)
library(vars)
library(TSstudio)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.3 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ stringr::boundary() masks strucchange::boundary()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks xts::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks xts::last()
## ✖ car::recode() masks dplyr::recode()
## ✖ MASS::select() masks dplyr::select()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sarima)
## Loading required package: stats4
##
## Attaching package: 'sarima'
##
## The following object is masked from 'package:astsa':
##
## sarima
##
## The following object is masked from 'package:stats':
##
## spectrum
library(dygraphs)
# import dataset
df<-read.csv("/Users/valeriacantulobo/Downloads/entretainment_stocks.csv")
# Transform it to a date format:
df$Date <- as.Date(df$Date,"%m/%d/%Y")
head(df)
## Date Disney_Adj_Close Netflix_Adj_Close Nintendo_Adj_Close
## 1 2007-01-01 29.12 3.26 37.10
## 2 2007-02-01 28.36 3.22 33.10
## 3 2007-03-01 28.51 3.31 36.30
## 4 2007-04-01 28.96 3.17 40.05
## 5 2007-05-01 29.34 3.13 43.65
## 6 2007-06-01 28.65 2.77 45.85
## WBD_Adj_Close EA_Adj_Close Paramount_Adj_Close
## 1 8.47 49.48 22.05
## 2 8.21 49.90 21.48
## 3 9.78 49.84 21.64
## 4 11.11 49.89 22.64
## 5 11.95 48.36 23.70
## 6 11.75 46.83 23.90
colnames(df)
## [1] "Date" "Disney_Adj_Close" "Netflix_Adj_Close"
## [4] "Nintendo_Adj_Close" "WBD_Adj_Close" "EA_Adj_Close"
## [7] "Paramount_Adj_Close"
### VISUALIZATION
#Plot the stock price / variable using a time series format.
# time series plot 1
plot(df$Date,df$Disney_Adj_Close,type="l",col="blue", lwd=2, xlab ="Date",ylab ="Adjusted Close Price", main = "Disney Stock Price")

#Decompose the time series data in observed, trend, seasonality, and random.
# 2) Decompose a time series
# Decomposition: observed + trend + seasonal + random
# observed: data observations
# trend: increasing / decreasing value of data observations
# seasonality: repeating short-term cycle in time series
# noise: random variation in time series
DSts<-ts(df$Disney_Adj_Close,frequency=4,start=c(2007,1),end=c(2022,4))
DS_ts_decompose<-decompose(DSts)
plot(DS_ts_decompose)

#Briefly comment on the following components:
#Do the time series data show a trend?
#it shows some positive trend as it shows the plot because from aproximatly year 2014 to the following the trend line goes up
#ii. Do the time series data show seasonality? How is the change of the seasonal component over time?
#the graph also shows seasonality as we can see how every year the graph behaves the same, in the begging of the years we can see how it is the lowest point and by the middle-end of the year it comes to its highest.
#### ESTIMATION
#Detect if the time series data is stationary.
# checking if variables are stationary or non-stationary
adf.test(DSts) # non-stationary (p-value > 0.05)
##
## Augmented Dickey-Fuller Test
##
## data: DSts
## Dickey-Fuller = -2.1834, Lag order = 3, p-value = 0.5006
## alternative hypothesis: stationary
acf(df$Disney_Adj_Close) #there is autocorrelation

### ARMA
# time series modeling
summary(DS_ARMA<-arma(log(df$Disney_Adj_Close),order=c(1,1)))
##
## Call:
## arma(x = log(df$Disney_Adj_Close), order = c(1, 1))
##
## Model:
## ARMA(1,1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2224570 -0.0384418 0.0002564 0.0444826 0.2104628
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## ar1 0.99033 0.00831 119.176 <2e-16 ***
## ma1 0.05651 0.07998 0.706 0.48
## intercept 0.04591 0.03502 1.311 0.19
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Fit:
## sigma^2 estimated as 0.005385, Conditional Sum-of-Squares = 1.02, AIC = -452.16
plot(DS_ARMA)



DS_estimated_stock_price<-exp(DS_ARMA$fitted.values)
plot(DS_estimated_stock_price)

# model evaluation
# Testing serial autocorrelation in regression residuals
# Ho: There is no serial autocorrelation
# Ha: There is serial autocorrelation
DS_ARMA_residuals<-DS_ARMA$residuals
Box.test(DS_ARMA_residuals,lag=5,type="Ljung-Box") # Reject the Ho. P-value is < 0.05 indicating that ARMA Model does show residual serial autocorrelation.
##
## Box-Ljung test
##
## data: DS_ARMA_residuals
## X-squared = 4.0425, df = 5, p-value = 0.5433
adf.test(na.omit(DS_estimated_stock_price))
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(DS_estimated_stock_price)
## Dickey-Fuller = -2.9519, Lag order = 5, p-value = 0.1778
## alternative hypothesis: stationary
### ARIMA
### plotting log-transformation and differences
plot(df$Date,df$Disney_Adj_Close, type="l",col="blue", lwd=2, xlab ="Date",ylab ="Stock Price", main = "DISNEY Stock Price")

plot(df$Date,log(df$Disney_Adj_Close), type="l",col="blue", lwd=2, xlab ="Date",ylab ="log(Stock Price)", main = "DISNEY Stock Price")

plot(diff(log(df$Disney_Adj_Close)),type="l",ylab="first order difference",main = "Diff - DISNEY Stock Price")

adf.test(log(df$Disney_Adj_Close))
##
## Augmented Dickey-Fuller Test
##
## data: log(df$Disney_Adj_Close)
## Dickey-Fuller = -1.48, Lag order = 5, p-value = 0.7937
## alternative hypothesis: stationary
adf.test(diff(log(df$Disney_Adj_Close)))
## Warning in adf.test(diff(log(df$Disney_Adj_Close))): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(log(df$Disney_Adj_Close))
## Dickey-Fuller = -5.6716, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
# forecast (ARIMA)
DS_ARIMA <- Arima(df$Disney_Adj_Close,order=c(1,1,1))
print(DS_ARIMA)
## Series: df$Disney_Adj_Close
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## -0.2016 0.1505
## s.e. 0.6994 0.7028
##
## sigma^2 = 52.72: log likelihood = -648.66
## AIC=1303.33 AICc=1303.46 BIC=1313.09
plot(DS_ARIMA$residuals,main="ARIMA(1,1,1) - DISNEY Stock Price")

acf(DS_ARIMA$residuals,main="ACF - ARIMA (1,1,1)") # ACF plot displays weak or no autocorrelation.

Box.test(DS_ARIMA$residuals,lag=1,type="Ljung-Box") # P-value is > 0.05 indicating that ARMA model does not show residual serial autocorrelation.
##
## Box-Ljung test
##
## data: DS_ARIMA$residuals
## X-squared = 6.2494e-05, df = 1, p-value = 0.9937
adf.test(DS_ARIMA$residuals) # ADF test suggest that ARMA residuals are stationary since p-value is < 0.05.
## Warning in adf.test(DS_ARIMA$residuals): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: DS_ARIMA$residuals
## Dickey-Fuller = -4.8989, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
### EVALUATION
#Based on diagnostic tests, compare the 2 estimated time series regression models, and select the results that you consider might generate the best forecast.
#evaluating both models selected (ARMA and ARIMA) and evaluating all the factors to choose the best model first of all I check the AIC which for the best model it needs to be lowest AIC of all the models, in the ARMA the AIC was -452.16 and in the ARIMA was 1303.33 , so in the factor of AIC the best model is ARMA, then following this the next factor is about the residuals in the models, for both models the residuals are small so is a great sign because it is better to select a model with small amount of residuals, for the ARMA in the Augmented Dickey-Fuller Test had a p value of 0.1778 wich suggests non stationary, in the ARIMA for the same model the p value is 0.01 so there is some stationary, the in the ARMA model checking the Box-Ljung test the p value gave 0.5433 indicating no serial correlation and in ARIMA the p value was 0.9937 suggesting also no correlation
#checking the 3 factors in order of importance: stationary, serial correlation and at last AIC this were the results:
#ARMA:
# stationary: non stationary
# serial correlation: no serial correlation
#AIC: 1303.33
#ARIMA:
# stationary: stationary
# serial correlation: no serial correlation
#AIC: -452.16
#the model I select is ARMA because the non stationary has more importance in the model than the AIC, so for future forecasts this would be the best model
### FORECAST - ARMA
#By using the selected model, make a forecast for the next 5 periods. In doing so, include a time series plot showing your forecast.
# forecast (ARMA)
DS_ARMA_forecast<-forecast(DS_estimated_stock_price,h=5)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
DS_ARMA_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 193 97.93749 88.20034 107.6746 83.04581 112.8292
## 194 97.93749 84.45913 111.4158 77.32413 118.5508
## 195 97.93749 81.53639 114.3386 72.85418 123.0208
## 196 97.93749 79.04822 116.8268 69.04884 126.8261
## 197 97.93749 76.84023 119.0347 65.67202 130.2030
plot(DS_ARMA_forecast)

autoplot(DS_ARMA_forecast)
