### uploading libraries
library(xts)
library(zoo)
library(tseries)
library(stats)
library(forecast)
library(astsa)
library(corrplot)
library(AER)
library(dynlm)
library(vars)
# library(mFilter)
library(TSstudio)
library(tidyverse)
library(sarima)
library(stargazer)
library(dygraphs)
### importing dataset
VARts<-read.csv("/Users/yessicaacosta/Downloads/2015_energy_stock_prices.csv")
# Time series format
VARts$Date <- as.Date(VARts$Date,"%m/%d/%Y")
str(VARts)
## 'data.frame': 96 obs. of 13 variables:
## $ Date : Date, format: "2015-01-01" "2015-02-01" ...
## $ GE_Adj_Close : num 135 130 142 143 139 ...
## $ IBDRY_Adj_Close : num 17.9 18.7 17.7 18.4 19 ...
## $ NEE_Adj_Close : num 21.4 20.8 21.1 20.4 20.7 ...
## $ VWDRY_Adj_Close : num 2.35 2.53 2.48 2.74 3.11 3.06 3.33 3.28 3.16 3.56 ...
## $ JKS_Adj_Close : num 56.9 21.9 25.6 28 28.8 ...
## $ CSIQ_Adj_Close : num 22.9 29.6 33.4 35.4 32.7 ...
## $ DQ_Adj_Close : num 4.8 4.47 5.3 5.49 4.41 4.71 3.51 2.81 3.2 3.34 ...
## $ AQN_Adj_Close : num 5.44 5.4 4.82 5.39 5.21 4.98 4.89 5.01 4.62 5.3 ...
## $ NonStore_Retailing : int 39670 40668 40522 40792 40618 40925 41494 41403 41886 42104 ...
## $ US_Unemployment : num 5.7 5.5 5.4 5.4 5.6 5.3 5.2 5.1 5 5 ...
## $ US_Consumer_Confidence: num 98.1 95.4 93 95.9 90.7 96.1 93.1 91.9 87.2 90 ...
## $ US_Min_Hour_Wage : num 7.25 7.25 7.25 7.25 7.25 7.25 7.25 7.25 7.25 7.25 ...
# ts = Formatting, when it starts, when it begins, and the frequency of the value)
CSIQ_Adj_Close<-ts(VARts$CSIQ_Adj_Close,start=c(2015,1),end=c(2022,12),frequency=12)
NonStore_Retailing<-ts(VARts$NonStore_Retailing,start=c(2015,1),end=c(2022,12),frequency=12)
US_Unemployment<-ts(VARts$US_Unemployment,start=c(2015,1),end=c(2022,12),frequency=12)
US_Consumer_Confidence<-ts(VARts$US_Consumer_Confidence,start=c(2015,12),end=c(2022,12),frequency=12)
US_Min_Hour_Wage<-ts(VARts$US_Min_Hour_Wage,start=c(2015,1),end=c(2022,12),frequency=12)
VARxts<-xts(VARts$CSIQ_Adj_Close,order.by=VARts$Date)
plot(VARxts,main = "Canadian Solar Inc stock price", xlab ="Date",ylab ="Adjusted Close Price (USD)")
# Alternative plotting time series data
ts_plot(CSIQ_Adj_Close)
ts_plot(NonStore_Retailing)
ts_plot(US_Unemployment)
ts_plot(US_Consumer_Confidence)
ts_plot(US_Min_Hour_Wage)
# Decomposing Canadian Solar Inc stock price (time series data)
CSIQts<-ts(VARts$CSIQ_Adj_Close,frequency=12,start=c(2015,1))
CSIQdec<-decompose(CSIQts)
plot(CSIQdec)
By decomposing the time series data, we can se an increasing trend
starting in 2020. Whe can also see the year has seasonality by analyzing
the increasing trend in one of the periods every year.
# it is important to assess whether the variables under study are stationary or not
adf.test(VARts$CSIQ_Adj_Close) # non-stationary
##
## Augmented Dickey-Fuller Test
##
## data: VARts$CSIQ_Adj_Close
## Dickey-Fuller = -2.9133, Lag order = 4, p-value = 0.1991
## alternative hypothesis: stationary
adf.test(VARts$NonStore_Retailing) # non-stationary
##
## Augmented Dickey-Fuller Test
##
## data: VARts$NonStore_Retailing
## Dickey-Fuller = -1.8269, Lag order = 4, p-value = 0.648
## alternative hypothesis: stationary
adf.test(VARts$US_Unemployment) # non-stationary
##
## Augmented Dickey-Fuller Test
##
## data: VARts$US_Unemployment
## Dickey-Fuller = -2.5755, Lag order = 4, p-value = 0.3386
## alternative hypothesis: stationary
adf.test(VARts$US_Consumer_Confidence) # non-stationary
##
## Augmented Dickey-Fuller Test
##
## data: VARts$US_Consumer_Confidence
## Dickey-Fuller = -1.7896, Lag order = 4, p-value = 0.6634
## alternative hypothesis: stationary
The selected stock price displays non-stationary series, after aplying the adf test we fail to reject de null Hypothesis of “Time series are non-stationary”.
# plotting time series data
par(mfrow=c(2,3))
plot(VARts$Date,VARts$CSIQ_Adj_Close,type="l",col="blue",lwd=2,xlab="Date",ylab="CSIQ",main="CSIQ")
plot(VARts$Date,VARts$NonStore_Retailing,type="l",col="blue",lwd=2,xlab="Date",ylab="NonStore Retailing",main="NonStore Retailing")
plot(VARts$Date,VARts$US_Unemployment,type="l",col="blue",lwd=2,xlab="Date",ylab="Unemployment",main="Unemployment")
plot(VARts$Date,VARts$US_Consumer_Confidence,type="l",col="blue",lwd=2,xlab="Date",ylab="Consumer Confidence",main="Consumer Confidence")
plot(VARts$Date,VARts$US_Min_Hour_Wage,type="l",col="blue",lwd=2,xlab="Date",ylab="Wage",main="Min Hour Wage")
By only seeing this graphs we can see the changing trend in 2020, we can
assumed it was caused by the pandemic.
# Time series plotof dependent variable
VARxts<-xts(VARts$CSIQ_Adj_Close,order.by=VARts$Date)
dygraph(VARxts, main = "Canadian Solar Inc Stock Price",xlab="Date",ylab="Stock Price (USD)") %>%
dyOptions(colors = RColorBrewer::brewer.pal(4, "Dark2")) %>%
dyShading(from = "2020-3-1",
to = "2022-12-1",
color = "#FFE6E6")
# Create a data frame with the variables
data <- data.frame(CSIQ_Adj_Close = VARts$CSIQ_Adj_Close,
US_Consumer_Confidence = VARts$US_Consumer_Confidence,
US_Unemployment = VARts$US_Unemployment,
NonStore_Retailing = VARts$NonStore_Retailing)
# Create a data frame with the variables of interest
data <- data.frame(US_Consumer_Confidence = VARts$NonStore_Retailing, CSIQ_Adj_Close = VARts$CSIQ_Adj_Close)
# Calculate correlation between NonStore Retailing and stock price
correlation <- cor(data$US_Consumer_Confidence, data$CSIQ_Adj_Close)
# Print the correlation coefficient
correlation
## [1] 0.6917818
# Create a scatter plot
plot(VARts$US_Consumer_Confidence, VARts$CSIQ_Adj_Close,
xlab = "Consumer Confidence Index", ylab = "Selected Stock Price",
main = "Relationship between Consumer Confidence Index and Stock Price")
# Create line chart for US Consumer Confidence
plot(VARts$US_Consumer_Confidence, type = "l", xlab = "Time", ylab = "US Consumer Confidence", main = "Tendency of US Consumer Confidence")
# Create a scatter plot with two lines of tendency for US Consumer Confidence
ggplot(data, aes(x = US_Consumer_Confidence, y = CSIQ_Adj_Close)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(x = "US Consumer Confidence", y = "CSIQ_Adj_Close", title = "Relationship between US Consumer Confidence and CSIQ_Adj_Close")
## `geom_smooth()` using formula = 'y ~ x'
By analyzing this graphs we can state the relationship between the US
Consumer Confidence and the stock price is negative, because the values
are higher when the stock price is lower.
faltante
# Calculate correlation between unemployment and stock price
correlation <- cor(VARts$US_Unemployment, VARts$CSIQ_Adj_Close)
# Print the correlation coefficient
correlation
## [1] 0.1480425
# Create a scatter plot
plot(VARts$US_Unemployment, VARts$CSIQ_Adj_Close,
xlab = "Unemployment", ylab = "Selected Stock Price",
main = "Relationship between Unemployment and Stock Price")
# Create line chart for US Unemployment
plot(VARts$US_Unemployment, type = "l", xlab = "Time", ylab = "US Unemployment", main = "Tendency of US Unemployment")
# Create a scatter plot with two lines of tendency for US Unemployment
ggplot(data, aes(x = US_Unemployment, y = CSIQ_Adj_Close)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(x = "US Unemployment", y = "CSIQ_Adj_Close", title = "Relationship between US Unemployment and CSIQ_Adj_Close")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
By analyzing this graphs we can state the relationship between the US
Unemployment and the Canadian Solar stock price aren’t very related,
they don’t have a strong relationship.
# Calculate correlation between NonStore Retailing and stock price
correlation <- cor(VARts$NonStore_Retailing, VARts$CSIQ_Adj_Close, use = "complete.obs")
# Print the correlation coefficient
correlation
## [1] 0.6917818
# Create a scatter ... plot
plot(VARts$NonStore_Retailing, VARts$CSIQ_Adj_Close,
xlab = "NonStore Retailing", ylab = "Selected Stock Price",
main = "Relationship between NonStore Retailing and Stock Price")
# Create line chart for NonStore Retailing
plot(VARts$NonStore_Retailing, type = "l", xlab = "Time", ylab = "NonStore Retailing", main = "Tendency of NonStore Retailing")
# Create a scatter plot with two lines of tendency for NonStore Retailing
ggplot(data, aes(x = NonStore_Retailing, y = CSIQ_Adj_Close)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "green") +
labs(x = "NonStore Retailing", y = "CSIQ_Adj_Close", title = "Relationship between NonStore Retailing and CSIQ_Adj_Close")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
By analyzing this graphs we can state the relationship between the
NonStore Retailing and the stock price is positive, because when the
NonStore retailing values and the stock price have a similar trend, as t
growshe independent variable, the stock price does too.
####i. Estimation #### Estimate a VAR_Model that includes at least 1 explanatory factor that might affect each of the selected stocks prices.
### Converting the variables to Time Series Format
NonStore_ts<-ts(VARts$NonStore_Retailing,start=c(2007,1),end=c(2022,12),frequency=12)
Unemployment_ts<-ts(VARts$US_Unemployment,start=c(2007,1),end=c(2022,12),frequency=12)
Consumer_Confidence_ts<-ts(VARts$US_Consumer_Confidence,start=c(2007,1),end=c(2022,12),frequency=12)
Min_Hour_Wage_ts<-ts(VARts$US_Min_Hour_Wage,start=c(2007,1),end=c(2022,12),frequency=12)
CSIQ_ts<-ts(VARts$CSIQ_Adj_Close,start=c(2007,1),end=c(2022,12),frequency=12)
# Lets create a time series dataset
# Canadian Solar Stock price is the dependent variable
VAR_ts<-cbind(CSIQ_ts,Unemployment_ts,Consumer_Confidence_ts, NonStore_ts)
colnames(VAR_ts)<-cbind("CSIQ","Uneployment","Consumer","NonStore") # se le da nombre a las columnas
head(VAR_ts)
## CSIQ Uneployment Consumer NonStore
## Jan 2007 22.90 5.7 98.1 39670
## Feb 2007 29.63 5.5 95.4 40668
## Mar 2007 33.39 5.4 93.0 40522
## Apr 2007 35.40 5.4 95.9 40792
## May 2007 32.66 5.6 90.7 40618
## Jun 2007 28.60 5.3 96.1 40925
# This line will automatically generate the preferred lag order based on multiple iterations of the AIC.
lag_selection<-VARselect(VAR_ts,lag.max=5,type="const", season=12)
lag_selection$selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 3 1 1 3
lag_selection$criteria
## 1 2 3 4 5
## AIC(n) 2.292021e+01 2.284980e+01 2.279336e+01 2.279546e+01 2.283009e+01
## HQ(n) 2.336829e+01 2.340991e+01 2.346549e+01 2.357961e+01 2.372626e+01
## SC(n) 2.402604e+01 2.423209e+01 2.445211e+01 2.473067e+01 2.504176e+01
## FPE(n) 9.012545e+09 8.413353e+09 7.970815e+09 8.014608e+09 8.334693e+09
By comparing the AIC values of the 5 different estimations, we can tell that when the time series is in third order base we get a better estimation.
#VAR Model 1
VAR_model1<-VAR(VAR_ts,p=3,type="const",season=12)
summary(VAR_model1)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: CSIQ, Uneployment, Consumer, NonStore
## Deterministic variables: const
## Sample size: 189
## Log Likelihood: -3130.519
## Roots of the characteristic polynomial:
## 0.9666 0.9666 0.84 0.6032 0.6032 0.5676 0.5676 0.4105 0.3803 0.3803 0.206 0.02168
## Call:
## VAR(y = VAR_ts, p = 3, type = "const", season = 12L)
##
##
## Estimation results for equation CSIQ:
## =====================================
## CSIQ = CSIQ.l1 + Uneployment.l1 + Consumer.l1 + NonStore.l1 + CSIQ.l2 + Uneployment.l2 + Consumer.l2 + NonStore.l2 + CSIQ.l3 + Uneployment.l3 + Consumer.l3 + NonStore.l3 + const + sd1 + sd2 + sd3 + sd4 + sd5 + sd6 + sd7 + sd8 + sd9 + sd10 + sd11
##
## Estimate Std. Error t value Pr(>|t|)
## CSIQ.l1 7.752e-01 7.564e-02 10.248 < 2e-16 ***
## Uneployment.l1 -1.423e-01 2.854e-01 -0.499 0.618573
## Consumer.l1 -1.513e-01 7.416e-02 -2.040 0.042930 *
## NonStore.l1 -1.826e-04 6.558e-05 -2.784 0.005996 **
## CSIQ.l2 3.238e-02 9.851e-02 0.329 0.742771
## Uneployment.l2 -7.319e-02 3.684e-01 -0.199 0.842757
## Consumer.l2 -2.697e-02 9.436e-02 -0.286 0.775425
## NonStore.l2 2.802e-05 8.581e-05 0.327 0.744425
## CSIQ.l3 -1.926e-02 7.187e-02 -0.268 0.789057
## Uneployment.l3 1.008e+00 2.820e-01 3.576 0.000459 ***
## Consumer.l3 2.023e-01 7.799e-02 2.594 0.010341 *
## NonStore.l3 2.329e-04 6.800e-05 3.425 0.000776 ***
## const -5.990e+00 5.143e+00 -1.165 0.245798
## sd1 -1.680e+00 1.290e+00 -1.303 0.194531
## sd2 1.181e-01 1.294e+00 0.091 0.927416
## sd3 -1.976e+00 1.306e+00 -1.513 0.132214
## sd4 -1.939e+00 1.274e+00 -1.522 0.129862
## sd5 -1.164e-01 1.328e+00 -0.088 0.930282
## sd6 -7.297e-01 1.326e+00 -0.550 0.582755
## sd7 -1.566e+00 1.284e+00 -1.220 0.224248
## sd8 -1.052e+00 1.276e+00 -0.825 0.410772
## sd9 -3.318e+00 1.286e+00 -2.581 0.010714 *
## sd10 -1.281e-01 1.282e+00 -0.100 0.920528
## sd11 1.204e-01 1.274e+00 0.094 0.924875
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 3.539 on 165 degrees of freedom
## Multiple R-Squared: 0.9036, Adjusted R-squared: 0.8901
## F-statistic: 67.23 on 23 and 165 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation Uneployment:
## ============================================
## Uneployment = CSIQ.l1 + Uneployment.l1 + Consumer.l1 + NonStore.l1 + CSIQ.l2 + Uneployment.l2 + Consumer.l2 + NonStore.l2 + CSIQ.l3 + Uneployment.l3 + Consumer.l3 + NonStore.l3 + const + sd1 + sd2 + sd3 + sd4 + sd5 + sd6 + sd7 + sd8 + sd9 + sd10 + sd11
##
## Estimate Std. Error t value Pr(>|t|)
## CSIQ.l1 -3.998e-02 2.224e-02 -1.798 0.07402 .
## Uneployment.l1 8.503e-01 8.390e-02 10.134 < 2e-16 ***
## Consumer.l1 -5.346e-02 2.180e-02 -2.452 0.01525 *
## NonStore.l1 -1.606e-05 1.928e-05 -0.833 0.40604
## CSIQ.l2 3.123e-02 2.896e-02 1.078 0.28242
## Uneployment.l2 -4.519e-02 1.083e-01 -0.417 0.67705
## Consumer.l2 6.673e-02 2.774e-02 2.405 0.01726 *
## NonStore.l2 2.317e-05 2.523e-05 0.919 0.35969
## CSIQ.l3 1.141e-03 2.113e-02 0.054 0.95701
## Uneployment.l3 4.096e-02 8.291e-02 0.494 0.62195
## Consumer.l3 2.136e-03 2.293e-02 0.093 0.92590
## NonStore.l3 2.562e-06 1.999e-05 0.128 0.89818
## const -1.062e+00 1.512e+00 -0.702 0.48349
## sd1 2.768e-01 3.792e-01 0.730 0.46643
## sd2 -4.895e-02 3.804e-01 -0.129 0.89777
## sd3 1.204e-01 3.839e-01 0.314 0.75416
## sd4 1.346e+00 3.745e-01 3.594 0.00043 ***
## sd5 -7.818e-02 3.905e-01 -0.200 0.84158
## sd6 -3.414e-02 3.897e-01 -0.088 0.93031
## sd7 -8.009e-03 3.774e-01 -0.021 0.98309
## sd8 -1.566e-01 3.752e-01 -0.417 0.67687
## sd9 -5.025e-02 3.780e-01 -0.133 0.89440
## sd10 3.146e-02 3.770e-01 0.083 0.93360
## sd11 1.040e-01 3.747e-01 0.278 0.78161
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 1.04 on 165 degrees of freedom
## Multiple R-Squared: 0.7308, Adjusted R-squared: 0.6933
## F-statistic: 19.48 on 23 and 165 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation Consumer:
## =========================================
## Consumer = CSIQ.l1 + Uneployment.l1 + Consumer.l1 + NonStore.l1 + CSIQ.l2 + Uneployment.l2 + Consumer.l2 + NonStore.l2 + CSIQ.l3 + Uneployment.l3 + Consumer.l3 + NonStore.l3 + const + sd1 + sd2 + sd3 + sd4 + sd5 + sd6 + sd7 + sd8 + sd9 + sd10 + sd11
##
## Estimate Std. Error t value Pr(>|t|)
## CSIQ.l1 7.143e-02 1.044e-01 0.684 0.49463
## Uneployment.l1 -3.867e-01 3.937e-01 -0.982 0.32742
## Consumer.l1 8.699e-01 1.023e-01 8.502 1.07e-14 ***
## NonStore.l1 -3.916e-05 9.048e-05 -0.433 0.66575
## CSIQ.l2 -1.026e-02 1.359e-01 -0.076 0.93991
## Uneployment.l2 3.269e-01 5.083e-01 0.643 0.52104
## Consumer.l2 -3.326e-01 1.302e-01 -2.554 0.01154 *
## NonStore.l2 -2.285e-04 1.184e-04 -1.930 0.05530 .
## CSIQ.l3 -7.597e-02 9.917e-02 -0.766 0.44469
## Uneployment.l3 1.234e-01 3.891e-01 0.317 0.75145
## Consumer.l3 3.002e-01 1.076e-01 2.790 0.00590 **
## NonStore.l3 1.880e-04 9.382e-05 2.004 0.04669 *
## const 1.934e+01 7.096e+00 2.725 0.00712 **
## sd1 -1.142e+00 1.779e+00 -0.642 0.52202
## sd2 -1.614e+00 1.785e+00 -0.904 0.36728
## sd3 -2.534e+00 1.802e+00 -1.406 0.16147
## sd4 -3.643e+00 1.758e+00 -2.073 0.03974 *
## sd5 -2.411e+00 1.833e+00 -1.315 0.19019
## sd6 -2.209e+00 1.829e+00 -1.208 0.22891
## sd7 -4.620e+00 1.771e+00 -2.608 0.00993 **
## sd8 -3.796e+00 1.761e+00 -2.156 0.03254 *
## sd9 -2.056e+00 1.774e+00 -1.159 0.24817
## sd10 -1.805e+00 1.769e+00 -1.020 0.30918
## sd11 -2.550e+00 1.758e+00 -1.450 0.14894
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 4.882 on 165 degrees of freedom
## Multiple R-Squared: 0.8869, Adjusted R-squared: 0.8711
## F-statistic: 56.24 on 23 and 165 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation NonStore:
## =========================================
## NonStore = CSIQ.l1 + Uneployment.l1 + Consumer.l1 + NonStore.l1 + CSIQ.l2 + Uneployment.l2 + Consumer.l2 + NonStore.l2 + CSIQ.l3 + Uneployment.l3 + Consumer.l3 + NonStore.l3 + const + sd1 + sd2 + sd3 + sd4 + sd5 + sd6 + sd7 + sd8 + sd9 + sd10 + sd11
##
## Estimate Std. Error t value Pr(>|t|)
## CSIQ.l1 9.795e+01 1.184e+02 0.827 0.4093
## Uneployment.l1 3.956e+02 4.467e+02 0.886 0.3771
## Consumer.l1 -1.607e+02 1.161e+02 -1.384 0.1681
## NonStore.l1 8.092e-01 1.026e-01 7.883 4.11e-13 ***
## CSIQ.l2 -1.182e+02 1.542e+02 -0.767 0.4443
## Uneployment.l2 -1.825e+02 5.766e+02 -0.316 0.7521
## Consumer.l2 2.894e+02 1.477e+02 1.959 0.0518 .
## NonStore.l2 3.123e-01 1.343e-01 2.325 0.0213 *
## CSIQ.l3 4.561e+01 1.125e+02 0.405 0.6857
## Uneployment.l3 8.596e+01 4.414e+02 0.195 0.8458
## Consumer.l3 -2.861e+01 1.221e+02 -0.234 0.8150
## NonStore.l3 -1.087e-01 1.064e-01 -1.021 0.3088
## const -1.130e+04 8.050e+03 -1.404 0.1623
## sd1 -2.525e+03 2.019e+03 -1.251 0.2127
## sd2 -5.574e+02 2.026e+03 -0.275 0.7835
## sd3 1.769e+03 2.044e+03 0.865 0.3882
## sd4 1.893e+03 1.994e+03 0.949 0.3439
## sd5 5.284e+02 2.079e+03 0.254 0.7997
## sd6 5.989e+02 2.075e+03 0.289 0.7732
## sd7 1.019e+02 2.009e+03 0.051 0.9596
## sd8 8.452e+02 1.998e+03 0.423 0.6728
## sd9 8.485e+02 2.012e+03 0.422 0.6738
## sd10 1.583e+03 2.007e+03 0.789 0.4314
## sd11 1.062e+03 1.995e+03 0.532 0.5953
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 5539 on 165 degrees of freedom
## Multiple R-Squared: 0.9483, Adjusted R-squared: 0.9411
## F-statistic: 131.6 on 23 and 165 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## CSIQ Uneployment Consumer NonStore
## CSIQ 12.52268 0.06989 -2.533 4558.3
## Uneployment 0.06989 1.08245 -1.404 -237.6
## Consumer -2.53349 -1.40449 23.838 -16703.0
## NonStore 4558.32964 -237.59189 -16702.979 30681823.1
##
## Correlation matrix of residuals:
## CSIQ Uneployment Consumer NonStore
## CSIQ 1.00000 0.01898 -0.1466 0.23255
## Uneployment 0.01898 1.00000 -0.2765 -0.04123
## Consumer -0.14663 -0.27649 1.0000 -0.61761
## NonStore 0.23255 -0.04123 -0.6176 1.00000
VAR_model1_residuals<-data.frame(residuals(VAR_model1))
adf.test(VAR_model1_residuals$CSIQ)
## Warning in adf.test(VAR_model1_residuals$CSIQ): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: VAR_model1_residuals$CSIQ
## Dickey-Fuller = -6.3646, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
Box.test(VAR_model1_residuals$CSIQ,lag=3,type="Ljung-Box")
##
## Box-Ljung test
##
## data: VAR_model1_residuals$CSIQ
## X-squared = 4.6545, df = 3, p-value = 0.1989
adf test: p-value<0.05 the residuals of the series are stationary Box-Ljung test: p-value>0.05 there is no serial correlation between residuals
It appears that a lag order of 3 has the lowest AIC, HQ, SC, and FPE values. This suggests that a VAR model with a lag order of 3 might provide the best fit for the data. Now, let’s move on to interpreting the tendencies of each variable.
CSIQ (Canadian Solar Stock Price): The CSIQ stock price shows a fluctuating pattern over the observed period from January 2007 to December 2022. The values range from 22.90 to higher values over time. Further analysis, such as forecasting, can provide more insights into the future tendencies of the CSIQ stock price.
Unemployment: The unemployment rate exhibits variations over time. It is important to analyze the specific values and trends to understand the overall tendency. However, without the specific data points, it is difficult to provide a detailed interpretation.
Consumer Confidence: The consumer confidence index shows fluctuations over the observed period. It is important to analyze the specific values and trends to understand the overall tendency. However, without the specific data points, it is difficult to provide a detailed interpretation.
NonStore Retailing: The non-store retail sales exhibit variations over time. It is important to analyze the specific values and trends to understand the overall tendency. However, without the specific data points, it is difficult to provide a detailed interpretation.
Augmented Dickey-Fuller (ADF) Test: The ADF test is used to determine if a time series is stationary. In this case, you have applied the ADF test to the residuals of the CSIQ variable from VAR_model1. The test result indicates a p-value of 0.01, which is smaller than the significance level of 0.05. Therefore, you can reject the null hypothesis of non-stationarity and conclude that the residuals of CSIQ are stationary.
Ljung-Box Test: The Ljung-Box test is used to detect the presence of serial autocorrelation in the residuals. In this case, you have applied the Ljung-Box test to the residuals of the CSIQ variable from VAR_model1, considering a lag of 3. The test result shows an X-squared value of 4.6545 and a p-value of 0.1989. Since the p-value is greater than the significance level of 0.05, we fail to reject the null hypothesis of no serial autocorrelation. This suggests that there is no significant evidence of serial autocorrelation in the residuals of CSIQ.
Overall, based on the diagnostic tests conducted on the residuals of VAR_model1, the findings indicate that the residuals of the CSIQ variable are stationary and do not exhibit significant serial autocorrelation.
# forecasting
forecast_1<-predict(VAR_model1,n.ahead=12,ci=0.95) ### forecast for the next 12 months
fanchart(forecast_1,names="CSIQ",main="Canadian Solar Inc Stock Price",xlab="Time Period",ylab="Stock Price")
forecast_1
## $CSIQ
## fcst lower upper CI
## [1,] 30.76823 23.83243 37.70403 6.935803
## [2,] 31.89322 23.14311 40.64334 8.750116
## [3,] 31.89766 22.08557 41.70975 9.812087
## [4,] 31.09085 20.66019 41.52151 10.430660
## [5,] 31.56141 20.51248 42.61034 11.048927
## [6,] 31.61328 19.86576 43.36081 11.747524
## [7,] 32.01645 19.56373 44.46917 12.452721
## [8,] 32.77248 19.63433 45.91064 13.138151
## [9,] 30.94743 17.15532 44.73953 13.792106
## [10,] 31.76319 17.36680 46.15958 14.396389
## [11,] 31.98851 17.04217 46.93485 14.946340
## [12,] 32.20925 16.76126 47.65723 15.447986
##
## $Uneployment
## fcst lower upper CI
## [1,] 3.711934 1.67277368 5.751095 2.039161
## [2,] 3.604494 0.79356604 6.415423 2.810928
## [3,] 3.723435 0.54166120 6.905209 3.181774
## [4,] 4.965030 1.57445303 8.355606 3.390577
## [5,] 4.685229 1.17663929 8.193820 3.508590
## [6,] 4.400633 0.82335042 7.977915 3.577282
## [7,] 4.187237 0.56716801 7.807306 3.620069
## [8,] 3.974946 0.32515690 7.624735 3.649789
## [9,] 3.796865 0.12506147 7.468669 3.671804
## [10,] 3.673315 -0.01601446 7.362645 3.689330
## [11,] 3.600599 -0.10366092 7.304859 3.704260
## [12,] 3.553632 -0.16366224 7.270925 3.717294
##
## $Consumer
## fcst lower upper CI
## [1,] 62.81400 53.24459 72.38341 9.569411
## [2,] 62.30405 49.32092 75.28719 12.983133
## [3,] 63.46586 48.82106 78.11065 14.644794
## [4,] 63.48754 47.45614 79.51893 16.031392
## [5,] 63.49662 46.21195 80.78128 17.284668
## [6,] 64.68612 46.43123 82.94101 18.254889
## [7,] 63.74095 44.69651 82.78539 19.044440
## [8,] 63.43953 43.70125 83.17781 19.738279
## [9,] 65.87223 45.51355 86.23091 20.358683
## [10,] 67.68555 46.77135 88.59975 20.914200
## [11,] 67.82679 46.40604 89.24754 21.420750
## [12,] 70.77883 48.88915 92.66851 21.889678
##
## $NonStore
## fcst lower upper CI
## [1,] 102869.70 92013.23 113726.2 10856.47
## [2,] 101222.45 86443.78 116001.1 14778.67
## [3,] 100450.02 82359.05 118541.0 18090.96
## [4,] 99728.75 79094.99 120362.5 20633.76
## [5,] 98497.82 75647.62 121348.0 22850.21
## [6,] 97217.09 72491.48 121942.7 24725.61
## [7,] 95146.34 68772.13 121520.6 26374.21
## [8,] 94441.77 66596.71 122286.8 27845.06
## [9,] 93067.43 63888.85 122246.0 29178.58
## [10,] 91944.09 61546.96 122341.2 30397.13
## [11,] 90877.53 59356.54 122398.5 31520.99
## [12,] 89004.48 56440.25 121568.7 32564.23
# Granger causality testing each variable against all the others.
# There could be a unidirectional, bidirectional, or no causality relationships between variables.
granger_CSIQ<-causality(VAR_model1,cause="CSIQ")
granger_CSIQ
## $Granger
##
## Granger causality H0: CSIQ do not Granger-cause Uneployment Consumer
## NonStore
##
## data: VAR object VAR_model1
## F-Test = 0.62036, df1 = 9, df2 = 660, p-value = 0.7802
##
##
## $Instant
##
## H0: No instantaneous causality between: CSIQ and Uneployment Consumer
## NonStore
##
## data: VAR object VAR_model1
## Chi-squared = 9.8459, df = 3, p-value = 0.01992
The forecasted values for the next 12 months are provided for each variable in the VAR model. The forecasted values include the point estimate (fcst), lower and upper bounds (lower and upper), and the confidence interval (CI).
Interpretation of the Forecast:
CSIQ: The forecasted stock price for CSIQ shows an increasing trend over the next 12 months, with the estimated values ranging from 30.77 to 70.78. The confidence interval indicates the uncertainty associated with the forecasted values.
Unemployment: The forecasted unemployment rate shows a decreasing trend over the next 12 months, with the estimated values ranging from 3.55% to 7.81%. Consumer Confidence: The forecasted consumer confidence index shows a fluctuating pattern over the next 12 months, with the estimated values ranging from 62.30 to 70.78. The confidence interval indicates the uncertainty associated with the forecasted values.
NonStore: The forecasted non-store retail sales show an increasing trend over the next 12 months, with the estimated values ranging from 89,004.48 to 121,942.7. The confidence interval indicates the uncertainty associated with the forecasted values.
Granger Causality Test:
The Granger causality test is performed to determine if there is a causal relationship between the variables in the VAR model. In this case, the test is conducted between CSIQ and Unemployment, Consumer, and NonStore variables.
CSIQ and Unemployment: The Granger causality test results indicate that there is no significant evidence to suggest that CSIQ Granger-causes Unemployment, Consumer, or NonStore variables. The p-value of 0.7802 suggests that there is no statistically significant causal relationship between CSIQ and Unemployment,
Consumer, or NonStore. CSIQ and Consumer: The Granger causality test results indicate that there is a statistically significant causal relationship between CSIQ and Consumer variables. The chi-squared test statistic of 9.8459 with 3 degrees of freedom and a p-value of 0.01992 suggests that CSIQ Granger-causes Consumer.
# Transform non-stationary time series variables to stationary
diff_CSIQ<-diff(log(CSIQ_Adj_Close))
diff_consumer<-diff(log(US_Consumer_Confidence))
diff_NonStore<-diff(log(NonStore_Retailing))
diff_unemployment<-diff(log(US_Unemployment))
# plotting differenced time series variables
par(mfrow=c(2,2))
plot(diff_consumer)
plot(diff_NonStore)
plot(diff_unemployment)
plot(diff_CSIQ)
Now by observing the plotted variables after applying differences we
convert them to stationary. This we can see by their behavior over the
years.
# it is important to assess whether the variables under study are stationary or not
adf.test(diff_consumer) # non-stationary
## Warning in adf.test(diff_consumer): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff_consumer
## Dickey-Fuller = -5.8504, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
adf.test(diff_NonStore) # non-stationary
##
## Augmented Dickey-Fuller Test
##
## data: diff_NonStore
## Dickey-Fuller = -3.965, Lag order = 4, p-value = 0.0142
## alternative hypothesis: stationary
adf.test(diff_CSIQ) # non-stationary
##
## Augmented Dickey-Fuller Test
##
## data: diff_CSIQ
## Dickey-Fuller = -3.9686, Lag order = 4, p-value = 0.01403
## alternative hypothesis: stationary
adf.test(diff_unemployment) # non-stationary
## Warning in adf.test(diff_unemployment): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff_unemployment
## Dickey-Fuller = -4.6314, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
The p-value of all the variables now are <0.05, which means we reject the H0, the variables are stationary