#required libraries 
library(xts)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(dplyr)

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:xts':
## 
##     first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(zoo)
library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(stats)
library(forecast)
library(astsa)

## 
## Attaching package: 'astsa'

## The following object is masked from 'package:forecast':
## 
##     gas

library(corrplot)

## corrplot 0.92 loaded

#library(AER)
library(vars)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: strucchange

## Loading required package: sandwich

## Loading required package: urca

## Loading required package: lmtest

library(dynlm)
library(vars)
library(TSstudio)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.3     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ stringr::boundary() masks strucchange::boundary()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks xts::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::last()       masks xts::last()
## ✖ MASS::select()      masks dplyr::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(sarima)

## Loading required package: stats4
## 
## Attaching package: 'sarima'
## 
## The following object is masked from 'package:astsa':
## 
##     sarima
## 
## The following object is masked from 'package:stats':
## 
##     spectrum

library(dygraphs)
library(tseries)

# import dataset
DQ<-read.csv("ts_stock_prices/DQ.csv")
head(DQ)

##         Date  Open  High   Low Close Adj.Close  Volume
## 1 2015-01-05 5.036 5.080 4.164 4.796     4.796 2532500
## 2 2015-01-12 4.760 4.760 4.002 4.098     4.098 2404500
## 3 2015-01-19 4.086 4.100 3.540 3.866     3.866 1776000
## 4 2015-01-26 3.900 4.020 3.784 3.936     3.936 1177000
## 5 2015-02-02 3.980 4.900 3.838 3.984     3.984 4395000
## 6 2015-02-09 3.912 4.458 3.774 4.208     4.208 2046500

# setting time series format 
DQ$Date<-as.Date(DQ$Date,"%Y-%m-%d") 
summary(DQ$Adj.Close)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.442   4.714   8.430  21.631  40.320 123.830

# visualizing time series data 

# time series plot 1
plot(DQ$Date,DQ$Adj.Close,type="l",col="blue", lwd=2, xlab ="Date",ylab ="Adjusted Close Price", main = "Dago New Energy Stock Price")

# time series plot 2
DQxts<-xts(DQ$Adj.Close,order.by=DQ$Date)
plot(DQxts)

What did happen to DQ Stock Price?

i) began to have a downward trend in March 2023

ii) Daqo New Energy’s performance in the second quarter of 2023 fell short of expectations.

Source: https://finance.yahoo.com/news/daqo-energy-dq-unit-approves-135400489.html

# time series plot 3
dygraph(DQxts, main = "Dago New Energy Stock Price") %>% 
  dyOptions(colors = RColorBrewer::brewer.pal(4, "Dark2")) %>%
  dyShading(from = "2018-12-3",
            to = "2022-12-26", 
            color = "#FFE6E6")

model estimation

# 1) Stationary Test 
adf.test(DQ$Adj.Close)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  DQ$Adj.Close
## Dickey-Fuller = -2.3643, Lag order = 7, p-value = 0.4235
## alternative hypothesis: stationary

2) Decompose a time series

Decomposition: observed + trend + seasonal + random

observed: data observations

trend: increasing / decreasing value of data observations

seasonality: repeating short-term cycle in time series

noise: random variation in time series

DQts<-ts(DQ$Adj.Close,frequency=52,start=c(2015,1))
DQ_ts_decompose<-decompose(DQts)
plot(DQ_ts_decompose)

3) Serial Autocorrelation

ACF plots: correlation between two periods in a time series is referred as autocorrelation function (acf)

acf(DQ$Adj.Close,main="Significant Autocorrelations")  # Generally, non-stationary time series data show the presence of serial autocorrelation.

# time series modeling 
summary(DQ_ARMA<-arma(log(DQ$Adj.Close),order=c(1,1)))

## 
## Call:
## arma(x = log(DQ$Adj.Close), order = c(1, 1))
## 
## Model:
## ARMA(1,1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.302793 -0.056798 -0.003254  0.056210  0.300941 
## 
## Coefficient(s):
##            Estimate  Std. Error  t value Pr(>|t|)    
## ar1        0.996800    0.004339  229.720   <2e-16 ***
## ma1       -0.016989    0.047336   -0.359    0.720    
## intercept  0.012918    0.011622    1.111    0.266    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Fit:
## sigma^2 estimated as 0.009422,  Conditional Sum-of-Squares = 3.91,  AIC = -755.78

plot(DQ_ARMA)

## c. Estimation ####- Detect if the time series data is stationary.

####The original data has a p-value of 0.4235, so the original data is non-stationary.

####- Detect if the time series data shows serial autocorrelation.

We used the autocorrelation function plots to check for serial autocorrelation in the data and it shows serial autocorrelation.

- Estimate 3 different time series regression models. You might want to consider ARMA (p,q) and / or ARIMA (p,d,q).

We estimated both ARMA and ARIMA models, but the ARMA model has residual autocorrelation.

d. model evaluation

Testing serial autocorrelation in regression residuals

Ho: There is no serial autocorrelation

Ha: There is serial autocorrelation

DQ_ARMA_residuals<-DQ_ARMA$residuals
Box.test(DQ_ARMA_residuals,lag=5,type="Ljung-Box") # Accept the H0. P-value is > 0.05 indicating that ARMA Model does not show residual serial autocorrelation.

## 
##  Box-Ljung test
## 
## data:  DQ_ARMA_residuals
## X-squared = 1.1324, df = 5, p-value = 0.9512

DQ_estimated_stock_price <- exp(DQ_ARMA$fitted.values)
plot(DQ_estimated_stock_price)

missing_values <- is.na(DQ_estimated_stock_price)
DQ_estimated_stock_price <- DQ_estimated_stock_price[!missing_values]
adf.test(DQ_estimated_stock_price)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  DQ_estimated_stock_price
## Dickey-Fuller = -2.3565, Lag order = 7, p-value = 0.4268
## alternative hypothesis: stationary

- Based on diagnostic tests, compare the 3 estimated time series regression models, and select the results that you consider might generate the best forecast.

ARIMA(1,1,1) appears to be the best model because of several reasons:

It achieved stationarity through differencing, it did not show significant residual autocorrelation and it provides a good framework for modeling and forecasting time series. It also shows a p-value of 0.685 compared to ARMA`s p-value of 0.9512.

# forecast (ARMA)
DQ_ARMA_forecast<-forecast(DQ_estimated_stock_price,h=5)
DQ_ARMA_forecast

##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 417       40.63238 35.56019 45.70457 32.87514 48.38962
## 418       40.63238 33.45222 47.81254 29.65127 51.61349
## 419       40.63238 31.82182 49.44294 27.15779 54.10697
## 420       40.63238 30.43712 50.82764 25.04008 56.22468
## 421       40.63238 29.20833 52.05643 23.16080 58.10396

plot(DQ_ARMA_forecast)

autoplot(DQ_ARMA_forecast)

### plotting log-transformation and differences
plot(DQ$Date,DQ$Adj.Close, type="l",col="blue", lwd=2, xlab ="Date",ylab ="Stock Price", main = "GE Stock Price")

plot(DQ$Date,log(DQ$Adj.Close), type="l",col="blue", lwd=2, xlab ="Date",ylab ="log(Stock Price)", main = "GE Stock Price")

plot(diff(log(DQ$Adj.Close)),type="l",ylab="first order difference",main = "Diff - DQ Stock Price")

adf.test(log(DQ$Adj.Close))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  log(DQ$Adj.Close)
## Dickey-Fuller = -2.1766, Lag order = 7, p-value = 0.5028
## alternative hypothesis: stationary

adf.test(diff(log(DQ$Adj.Close)))

## Warning in adf.test(diff(log(DQ$Adj.Close))): p-value smaller than printed
## p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(log(DQ$Adj.Close))
## Dickey-Fuller = -6.9675, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

# forecast (ARIMA)
DQ_ARIMA <- Arima(DQ$Adj.Close,order=c(1,1,1))
print(DQ_ARIMA)

## Series: DQ$Adj.Close 
## ARIMA(1,1,1) 
## 
## Coefficients:
##           ar1     ma1
##       -0.4593  0.2823
## s.e.   0.1328  0.1384
## 
## sigma^2 = 15.89:  log likelihood = -1164.62
## AIC=2335.24   AICc=2335.3   BIC=2347.34

plot(DQ_ARIMA$residuals,main="ARIMA(1,1,1) - DQ Stock Price")

acf(DQ_ARIMA$residuals,main="ACF - ARIMA (1,1,1)")                # ACF plot displays weak or no autocorrelation.

Box.test(DQ_ARIMA$residuals,lag=1,type="Ljung-Box")               # P-value is > 0.05 indicating that ARMA model does not show residual serial autocorrelation.

## 
##  Box-Ljung test
## 
## data:  DQ_ARIMA$residuals
## X-squared = 0.16455, df = 1, p-value = 0.685

adf.test(DQ_ARIMA$residuals)                                      # ADF test suggest that ARMA residuals are stationary since p-value is < 0.05.

## Warning in adf.test(DQ_ARIMA$residuals): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  DQ_ARIMA$residuals
## Dickey-Fuller = -7.7777, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

First of all in the box test, we can see that the p-value from the residuals ARIMA Model is 0.686 >0.05, which means that is not significant and has not residual serial autocorrelation. The ADF test shows that those residuals are stationary because of its p-value of 0.01< 0.05.

Assignment3

Cecilia Rivas

2023-09-03