# Loading
library(xts)
library(dplyr)
library(zoo)
library(tseries)
library(stats)
library(forecast)
library(astsa)
library(corrplot)
library(AER)
library(vars)
library(dynlm)
library(vars)
library(TSstudio)
library(tidyverse)
library(sarima)
library(dygraphs)
# Import dataset

stock_db<-read.csv( "/Users/genarorodriguezalcantara/Desktop/Tec/Introduction to econometrics/EXAM/entretainment_stocks.csv")

head(stock_db)
##       Date Disney_Adj_Close Netflix_Adj_Close Nintendo_Adj_Close WBD_Adj_Close
## 1 1/1/2007            29.12              3.26              37.10          8.47
## 2 2/1/2007            28.36              3.22              33.10          8.21
## 3 3/1/2007            28.51              3.31              36.30          9.78
## 4 4/1/2007            28.96              3.17              40.05         11.11
## 5 5/1/2007            29.34              3.13              43.65         11.95
## 6 6/1/2007            28.65              2.77              45.85         11.75
##   EA_Adj_Close Paramount_Adj_Close
## 1        49.48               22.05
## 2        49.90               21.48
## 3        49.84               21.64
## 4        49.89               22.64
## 5        48.36               23.70
## 6        46.83               23.90
colnames(stock_db)
## [1] "Date"                "Disney_Adj_Close"    "Netflix_Adj_Close"  
## [4] "Nintendo_Adj_Close"  "WBD_Adj_Close"       "EA_Adj_Close"       
## [7] "Paramount_Adj_Close"
# Setting time series format
# Here we are checking the date data in our database
stock_db$Date
##   [1] "1/1/2007"  "2/1/2007"  "3/1/2007"  "4/1/2007"  "5/1/2007"  "6/1/2007" 
##   [7] "7/1/2007"  "8/1/2007"  "9/1/2007"  "10/1/2007" "11/1/2007" "12/1/2007"
##  [13] "1/1/2008"  "2/1/2008"  "3/1/2008"  "4/1/2008"  "5/1/2008"  "6/1/2008" 
##  [19] "7/1/2008"  "8/1/2008"  "9/1/2008"  "10/1/2008" "11/1/2008" "12/1/2008"
##  [25] "1/1/2009"  "2/1/2009"  "3/1/2009"  "4/1/2009"  "5/1/2009"  "6/1/2009" 
##  [31] "7/1/2009"  "8/1/2009"  "9/1/2009"  "10/1/2009" "11/1/2009" "12/1/2009"
##  [37] "1/1/2010"  "2/1/2010"  "3/1/2010"  "4/1/2010"  "5/1/2010"  "6/1/2010" 
##  [43] "7/1/2010"  "8/1/2010"  "9/1/2010"  "10/1/2010" "11/1/2010" "12/1/2010"
##  [49] "1/1/2011"  "2/1/2011"  "3/1/2011"  "4/1/2011"  "5/1/2011"  "6/1/2011" 
##  [55] "7/1/2011"  "8/1/2011"  "9/1/2011"  "10/1/2011" "11/1/2011" "12/1/2011"
##  [61] "1/1/2012"  "2/1/2012"  "3/1/2012"  "4/1/2012"  "5/1/2012"  "6/1/2012" 
##  [67] "7/1/2012"  "8/1/2012"  "9/1/2012"  "10/1/2012" "11/1/2012" "12/1/2012"
##  [73] "1/1/2013"  "2/1/2013"  "3/1/2013"  "4/1/2013"  "5/1/2013"  "6/1/2013" 
##  [79] "7/1/2013"  "8/1/2013"  "9/1/2013"  "10/1/2013" "11/1/2013" "12/1/2013"
##  [85] "1/1/2014"  "2/1/2014"  "3/1/2014"  "4/1/2014"  "5/1/2014"  "6/1/2014" 
##  [91] "7/1/2014"  "8/1/2014"  "9/1/2014"  "10/1/2014" "11/1/2014" "12/1/2014"
##  [97] "1/1/2015"  "2/1/2015"  "3/1/2015"  "4/1/2015"  "5/1/2015"  "6/1/2015" 
## [103] "7/1/2015"  "8/1/2015"  "9/1/2015"  "10/1/2015" "11/1/2015" "12/1/2015"
## [109] "1/1/2016"  "2/1/2016"  "3/1/2016"  "4/1/2016"  "5/1/2016"  "6/1/2016" 
## [115] "7/1/2016"  "8/1/2016"  "9/1/2016"  "10/1/2016" "11/1/2016" "12/1/2016"
## [121] "1/1/2017"  "2/1/2017"  "3/1/2017"  "4/1/2017"  "5/1/2017"  "6/1/2017" 
## [127] "7/1/2017"  "8/1/2017"  "9/1/2017"  "10/1/2017" "11/1/2017" "12/1/2017"
## [133] "1/1/2018"  "2/1/2018"  "3/1/2018"  "4/1/2018"  "5/1/2018"  "6/1/2018" 
## [139] "7/1/2018"  "8/1/2018"  "9/1/2018"  "10/1/2018" "11/1/2018" "12/1/2018"
## [145] "1/1/2019"  "2/1/2019"  "3/1/2019"  "4/1/2019"  "5/1/2019"  "6/1/2019" 
## [151] "7/1/2019"  "8/1/2019"  "9/1/2019"  "10/1/2019" "11/1/2019" "12/1/2019"
## [157] "1/1/2020"  "2/1/2020"  "3/1/2020"  "4/1/2020"  "5/1/2020"  "6/1/2020" 
## [163] "7/1/2020"  "8/1/2020"  "9/1/2020"  "10/1/2020" "11/1/2020" "12/1/2020"
## [169] "1/1/2021"  "2/1/2021"  "3/1/2021"  "4/1/2021"  "5/1/2021"  "6/1/2021" 
## [175] "7/1/2021"  "8/1/2021"  "9/1/2021"  "10/1/2021" "11/1/2021" "12/1/2021"
## [181] "1/1/2022"  "2/1/2022"  "3/1/2022"  "4/1/2022"  "5/1/2022"  "6/1/2022" 
## [187] "7/1/2022"  "8/1/2022"  "9/1/2022"  "10/1/2022" "11/1/2022" "12/1/2022"
# Suponiendo que las fechas están en formato "dd/mm/yyyy"
# Convert the date column to the desired format
stock_db$Date <- as.Date(stock_db$Date, format = "%m/%d/%Y")

# Plot the stock price using the updated date format
plot(stock_db$Date, stock_db$Disney_Adj_Close, type = "l", col = "blue", lwd = 1, 
     xlab = "Date", ylab = "Adjusted Close Price", main = "Disney Stock Price")

 # para que tome en cuenta el mes, dia y  año - permite visualizar la linea de tiempo
summary(stock_db$Disney_Adj_Close) # checar el precio mínimo, máximo y mediana
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.44   31.55   85.92   78.71  108.26  189.04

Visualization

• Plot the stock price / variable using a time series format.

class(stock_db$Date)
## [1] "Date"
# time series plot 1
#plot(stock_db$Date,stock_db$Disney_Adj_Close,type="l",col="blue", lwd=1, xlab ="Date",ylab ="Adjusted Close Price", main = "Disney Stock Price")
# time series plot 1
plot(stock_db$Date, stock_db$Disney_Adj_Close, type = "l", col = "blue", lwd = 1, 
     xlab = "Date", ylab = "Adjusted Close Price", main = "Disney Stock Price")

#en esta serie de tiempo primero se observa una tendencia decreciente ( en este caso )
  #hay que hacer la serie estaciona
# Time series plot 2

stock_dbxts<-xts(stock_db$Disney_Adj_Close,order.by=stock_db$Date)
plot(stock_dbxts)

# ti series plot 3 ## mejor plot
dygraph(stock_dbxts, main = "Disney Stock") %>% 
  dyOptions(colors = RColorBrewer::brewer.pal(4, "Dark2")) %>%
  dyShading(from = "2018-07-02",
            to = "2022-07-04", 
            color = "#FFE6E6")

• Decompose the time series data in observed, trend, seasonality, and random.

stock_dbts<-ts(stock_db$Disney_Adj_Close,frequency=52,start=c(2015,1))
stock_db_ts_decompose<-decompose(stock_dbts)
plot(stock_db_ts_decompose)

# ver el comportamiendo de las variables randome (cambio abruptos, cariables desconocidas, si es constante y no hay mucho cambio significa que
# la variaciĂłn de las acciones depende solamente de la informaciĂłn conocida)

• Briefly comment on the following components:

  1. Do the time series data show a trend? Yes, the stock price does show a tend. The trend component obtained from the decomposition of the time series data indicates a consistent up and downward trend trend over the analyzed period. This suggests that the stock price has been slowly increasing over time with some retractions.

  2. Do the time series data show seasonality? How is the change of the seasonal component over time? Regarding the seasonality, the graph does shows a season trend, pretty consistent except on 2018 to 2022, where the trend changes for unexpected conditions of the market. #### Estimation

• Detect if the time series data is stationary.

# Stationary Test
adf.test(stock_db$Disney_Adj_Close)   ### H0: Non-stationary and HA: Stationary. p-values < 0.05 reject the H0. 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  stock_db$Disney_Adj_Close
## Dickey-Fuller = -2.363, Lag order = 5, p-value = 0.4243
## alternative hypothesis: stationary
                         ### P-Value > 0.05. Fails to Reject the H0. Time series data is non-stationary.

• Detect if the time series data shows serial autocorrelation.

acf(stock_db$Disney_Adj_Close,main="Significant Autocorrelations") 

• Estimate 3 different time series regression models. You might want to consider ARMA (p,q) and / or ARIMA (p,d,q).

# esta en primer orden + t-1
summary(stock_db_ARMA<-arma(log(stock_db$Disney_Adj_Close),order=c(1,1))) #transforma en estacionaria
## 
## Call:
## arma(x = log(stock_db$Disney_Adj_Close), order = c(1, 1))
## 
## Model:
## ARMA(1,1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2224570 -0.0384418  0.0002564  0.0444826  0.2104628 
## 
## Coefficient(s):
##            Estimate  Std. Error  t value Pr(>|t|)    
## ar1         0.99033     0.00831  119.176   <2e-16 ***
## ma1         0.05651     0.07998    0.706     0.48    
## intercept   0.04591     0.03502    1.311     0.19    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Fit:
## sigma^2 estimated as 0.005385,  Conditional Sum-of-Squares = 1.02,  AIC = -452.16
plot(stock_db_ARMA)

# Omit NA
stock_db_ARMA$residuals
##   [1]            NA -3.975615e-02 -6.044651e-03  2.486883e-03 -4.682321e-04
##   [6] -3.700972e-02 -4.509770e-02  6.643849e-03  9.138083e-03 -6.663564e-03
##  [11] -5.690224e-02 -3.712740e-02 -7.948148e-02  7.241990e-02 -5.047956e-02
##  [16]  2.156059e-02  2.062639e-02 -8.892377e-02 -3.678583e-02  5.151027e-02
##  [21] -6.952477e-02 -1.799639e-01 -1.457826e-01 -1.846651e-03 -9.495974e-02
##  [26] -2.224570e-01  7.230671e-02  1.638043e-01  7.402301e-02 -5.833052e-02
##  [31]  6.025503e-02  1.628143e-02  3.665065e-02 -2.078199e-02  8.470976e-02
##  [36]  4.590505e-02 -9.227371e-02  4.624878e-02  9.460692e-02  3.553551e-02
##  [41] -1.118422e-01 -6.607403e-02  5.713915e-02 -5.148164e-02  6.478177e-03
##  [46]  7.384472e-02 -6.301334e-03  1.488772e-02  3.360568e-02  1.044289e-01
##  [51] -3.167807e-02 -8.698597e-03 -4.484135e-02 -7.276143e-02 -1.841382e-02
##  [56] -1.365597e-01 -1.267670e-01  1.381448e-01  7.016250e-03  3.198019e-02
##  [61]  3.946329e-02  6.271580e-02  2.728959e-02 -2.740278e-02  4.957406e-02
##  [66]  4.623694e-02  1.209182e-03 -2.818300e-03  4.624146e-02 -7.369546e-02
##  [71]  5.695589e-03 -6.845109e-03  8.532090e-02  7.976967e-05  3.154162e-02
##  [76]  9.147944e-02 -8.119839e-03 -5.252415e-03  1.696302e-02 -6.833754e-02
##  [81]  5.522627e-02  5.201340e-02  1.910206e-02  7.308297e-02 -4.778269e-02
##  [86]  1.044712e-01 -1.933280e-02 -1.237401e-02  5.349745e-02  1.346458e-02
##  [91] -2.785664e-03  4.200977e-02 -1.506293e-02  2.356390e-02  7.903912e-03
##  [96]  1.465884e-02 -2.586885e-02  1.331360e-01 -1.393636e-03  3.434622e-02
## [101]  1.189001e-02  3.177280e-02  4.747387e-02 -1.608194e-01  1.039884e-02
## [106]  1.045221e-01 -9.014406e-03 -7.703948e-02 -8.308617e-02 -7.636156e-04
## [111]  3.650607e-02  3.496646e-02 -4.350341e-02 -1.376218e-02 -2.074365e-02
## [116] -9.491410e-03 -1.896487e-02 -3.445737e-03  6.465541e-02  4.459701e-02
## [121]  6.354404e-02 -9.520665e-03  2.929485e-02  1.700555e-02 -6.986750e-02
## [126] -1.290793e-02  3.352632e-02 -7.812199e-02 -2.345382e-02 -8.352717e-03
## [131]  6.772082e-02  2.028754e-02  1.654701e-02 -5.383188e-02 -2.497595e-02
## [136] -1.322910e-03 -1.008671e-02  5.112833e-02  7.610134e-02 -1.035024e-02
## [141]  4.306430e-02 -2.071430e-02  6.692561e-03 -5.249303e-02  2.691099e-02
## [146]  9.855998e-03 -1.707354e-02  2.104628e-01 -4.700060e-02  5.974712e-02
## [151]  2.220681e-02 -3.415246e-02 -4.834130e-02  8.386999e-04  1.552296e-01
## [156] -5.313111e-02 -3.358486e-02 -1.581299e-01 -1.880086e-01  1.218493e-01
## [161]  7.370739e-02 -5.462299e-02  5.030629e-02  1.174457e-01 -6.623194e-02
## [166] -1.862356e-02  2.009604e-01  1.932666e-01 -8.107006e-02  1.252103e-01
## [171] -2.649887e-02  1.413687e-02 -3.660300e-02 -9.953395e-03  6.057120e-03
## [176]  3.330035e-02 -6.675778e-02  6.884287e-03 -1.509604e-01  7.740749e-02
## [181] -8.160393e-02  4.436810e-02 -7.923403e-02 -1.998060e-01  2.564112e-04
## [186] -1.573660e-01  1.237955e-01  4.702761e-02 -1.753487e-01  1.296848e-01
## [191] -9.297644e-02 -1.154471e-01
stock_db_ARMA$residuals <- na.omit(stock_db_ARMA$residuals)
stock_db_estimated_stock_price<-exp(stock_db_ARMA$fitted.values) # se vuelven a transformar las variables a los originales(si aplica log se tiene que aplicar el exponencial(es lo contrario))
plot(stock_db_estimated_stock_price)

stock_db_estimated_stock_price<-exp(stock_db_ARMA$fitted.values)

Model Evaluation:

# Ho: There is no serial autocorrelation 
# H1: There is serial autocorrelation
stock_db_ARMA_residuals<-stock_db_ARMA$residuals
Box.test(stock_db_ARMA_residuals,lag=5,type="Ljung-Box") # Reject the Ho. P-value is < 0.05 indicating that ARMA Model does show residual serial autocorrelation. 
## 
##  Box-Ljung test
## 
## data:  stock_db_ARMA_residuals
## X-squared = 4.0425, df = 5, p-value = 0.5433
adf.test(stock_db_ARMA$residuals)
## Warning in adf.test(stock_db_ARMA$residuals): p-value smaller than printed
## p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  stock_db_ARMA$residuals
## Dickey-Fuller = -5.6068, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
# Forecast (ARMA)
stock_db_ARMA_forecast<-forecast(stock_db_estimated_stock_price,h=5)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
stock_db_ARMA_forecast
##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 193       97.93749 88.20034 107.6746 83.04581 112.8292
## 194       97.93749 84.45913 111.4158 77.32413 118.5508
## 195       97.93749 81.53639 114.3386 72.85418 123.0208
## 196       97.93749 79.04822 116.8268 69.04884 126.8261
## 197       97.93749 76.84023 119.0347 65.67202 130.2030
plot(stock_db_ARMA_forecast)

autoplot(stock_db_ARMA_forecast)

# Forecast (ARIMA)

stock_db_ARIMA <- Arima(stock_db$Disney_Adj_Close,order=c(1,1,1))
print(stock_db_ARIMA)
## Series: stock_db$Disney_Adj_Close 
## ARIMA(1,1,1) 
## 
## Coefficients:
##           ar1     ma1
##       -0.2016  0.1505
## s.e.   0.6994  0.7028
## 
## sigma^2 = 52.72:  log likelihood = -648.66
## AIC=1303.33   AICc=1303.46   BIC=1313.09
plot(stock_db_ARIMA$residuals,main="ARIMA(1,1,1) - stock_db Stock Price")

acf(stock_db_ARIMA$residuals,main="ACF - ARIMA (1,1,1)")                # ACF plot displays weak or no autocorrelation. 

Box.test(stock_db_ARIMA$residuals,lag=1,type="Ljung-Box")               # P-value is > 0.05 indicating that ARMA model does not show residual serial autocorrelation. 
## 
##  Box-Ljung test
## 
## data:  stock_db_ARIMA$residuals
## X-squared = 6.2494e-05, df = 1, p-value = 0.9937
adf.test(stock_db_ARIMA$residuals)                                      # ADF test suggest that ARMA residuals are stationary since p-value is < 0.05.
## Warning in adf.test(stock_db_ARIMA$residuals): p-value smaller than printed
## p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  stock_db_ARIMA$residuals
## Dickey-Fuller = -4.8989, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
# Plotting log-transformation and differences
plot(stock_db$Date,stock_db$Disney_Adj_Close, type="l",col="blue", lwd=2, xlab ="Date",ylab ="Stock Price", main = "stock_db Stock Price") # serie original

plot(stock_db$Date,log(stock_db$Disney_Adj_Close), type="l",col="blue", lwd=2, xlab ="Date",ylab ="log(Stock Price)", main = "stock_db Stock Price") # variable con log, mismo comportamiento

plot(diff(log(stock_db$Disney_Adj_Close)),type="l",ylab="first order difference",main = "Diff - stock_db Stock Price") # cambia el comportamiento, ahora es estacionaria!! y se puede aplicar modelo ARIMA para pronĂłstico (no es el precio real de la acciĂłn)

adf.test(log(stock_db$Disney_Adj_Close))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  log(stock_db$Disney_Adj_Close)
## Dickey-Fuller = -1.48, Lag order = 5, p-value = 0.7937
## alternative hypothesis: stationary
adf.test(diff(log(stock_db$Disney_Adj_Close)))
## Warning in adf.test(diff(log(stock_db$Disney_Adj_Close))): p-value smaller than
## printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(log(stock_db$Disney_Adj_Close))
## Dickey-Fuller = -5.6716, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
# Forecast using ARIMA(1,1,1)
stock_db_ARIMA_forecast <- forecast(stock_db_ARIMA, h = 5)
stock_db_ARIMA_forecast
##     Point Forecast    Lo 80     Hi 80    Lo 95    Hi 95
## 193       87.35756 78.05268  96.66244 73.12698 101.5881
## 194       87.26126 74.43399 100.08853 67.64365 106.8789
## 195       87.28068 71.65380 102.90756 63.38143 111.1799
## 196       87.27677 69.29024 105.26329 59.76875 114.7848
## 197       87.27756 67.20518 107.34993 56.57951 117.9756
# Plotting the forecast
plot(stock_db_ARIMA_forecast, main = "Forecast - ARIMA(1,1,1)")

# Print the forecast results for each model
print(stock_db_ARMA_forecast)
##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 193       97.93749 88.20034 107.6746 83.04581 112.8292
## 194       97.93749 84.45913 111.4158 77.32413 118.5508
## 195       97.93749 81.53639 114.3386 72.85418 123.0208
## 196       97.93749 79.04822 116.8268 69.04884 126.8261
## 197       97.93749 76.84023 119.0347 65.67202 130.2030
print(stock_db_ARIMA_forecast)
##     Point Forecast    Lo 80     Hi 80    Lo 95    Hi 95
## 193       87.35756 78.05268  96.66244 73.12698 101.5881
## 194       87.26126 74.43399 100.08853 67.64365 106.8789
## 195       87.28068 71.65380 102.90756 63.38143 111.1799
## 196       87.27677 69.29024 105.26329 59.76875 114.7848
## 197       87.27756 67.20518 107.34993 56.57951 117.9756
# Plotting the forecast
plot(stock_db_ARIMA_forecast, main = "Forecast - ARIMA(1,1,1)")

# Plotting the forecast
plot(stock_db_ARIMA_forecast, main = "Forecast - ARIMA(1,1,1)")
Describe meaningful information from data analytics
  1. Stationarity: Based on the Augmented Dickey-Fuller test results, the p-value is 0.4243. Since the p-value is greater than the significance level of 0.05, we fail to reject the null hypothesis. This suggests that the time series data for the Disney stock price is non-stationary. Non-stationary data typically exhibits trends, seasonality, or other patterns that change over time. To make the time series data stationary, you can apply transformations such as differencing or logarithmic transformations to remove trends and seasonality.

  2. Forecast Based on the forecasted values, it appears that the ARIMA model (ARIMA(1,1,1)) might generate the best forecast. The forecasted values for the next 5 periods are as follows: Period 193: Point Forecast = 87.35756, 80% Lower Bound = 78.05268, 80% Upper Bound = 96.66244, 95% Lower Bound = 73.12698, 95% Upper Bound = 101.5881 Period 194: Point Forecast = 87.26126, 80% Lower Bound = 74.43399, 80% Upper Bound = 100.08853, 95% Lower Bound = 67.64365, 95% Upper Bound = 106.8789 Period 195: Point Forecast = 87.28068, 80% Lower Bound = 71.65380, 80% Upper Bound = 102.90756, 95% Lower Bound = 63.38143, 95% Upper Bound = 111.1799 Period 196: Point Forecast = 87.27677, 80% Lower Bound = 69.29024, 80% Upper Bound = 105.26329, 95% Lower Bound = 59.76875, 95% Upper Bound = 114.7848 Period 197: Point Forecast = 87.27756, 80% Lower Bound = 67.20518, 80% Upper Bound = 107.34993, 95% Lower Bound = 56.57951, 95% Upper Bound = 117.9756