Assignment 4. Vector Autoregressive Model

Equipo:

A00833617 | Yessica Acosta Blancheth

A00834241 | Regina Rodríguez Chávez

A01275763 | Eli Gabriel Hernández Medina

A00833172 | Genaro Rodríguez Alcántara

### uploading libraries 
library(xts)
library(zoo)
library(tseries)
library(stats)
library(forecast)
library(astsa)
library(corrplot)
library(AER)
library(dynlm)
library(vars)
# library(mFilter)
library(TSstudio)
library(tidyverse)
library(sarima)
library(stargazer)
library(dygraphs)

### importing dataset

VARts<-read.csv("/Users/yessicaacosta/Downloads/2015_energy_stock_prices.csv")

# Time series format
VARts$Date <- as.Date(VARts$Date,"%m/%d/%Y")
str(VARts)

## 'data.frame':    96 obs. of  13 variables:
##  $ Date                  : Date, format: "2015-01-01" "2015-02-01" ...
##  $ GE_Adj_Close          : num  135 130 142 143 139 ...
##  $ IBDRY_Adj_Close       : num  17.9 18.7 17.7 18.4 19 ...
##  $ NEE_Adj_Close         : num  21.4 20.8 21.1 20.4 20.7 ...
##  $ VWDRY_Adj_Close       : num  2.35 2.53 2.48 2.74 3.11 3.06 3.33 3.28 3.16 3.56 ...
##  $ JKS_Adj_Close         : num  56.9 21.9 25.6 28 28.8 ...
##  $ CSIQ_Adj_Close        : num  22.9 29.6 33.4 35.4 32.7 ...
##  $ DQ_Adj_Close          : num  4.8 4.47 5.3 5.49 4.41 4.71 3.51 2.81 3.2 3.34 ...
##  $ AQN_Adj_Close         : num  5.44 5.4 4.82 5.39 5.21 4.98 4.89 5.01 4.62 5.3 ...
##  $ NonStore_Retailing    : int  39670 40668 40522 40792 40618 40925 41494 41403 41886 42104 ...
##  $ US_Unemployment       : num  5.7 5.5 5.4 5.4 5.6 5.3 5.2 5.1 5 5 ...
##  $ US_Consumer_Confidence: num  98.1 95.4 93 95.9 90.7 96.1 93.1 91.9 87.2 90 ...
##  $ US_Min_Hour_Wage      : num  7.25 7.25 7.25 7.25 7.25 7.25 7.25 7.25 7.25 7.25 ...

# ts = Formatting, when it starts, when it begins, and the frequency of the value)
CSIQ_Adj_Close<-ts(VARts$CSIQ_Adj_Close,start=c(2015,1),end=c(2022,12),frequency=12)
NonStore_Retailing<-ts(VARts$NonStore_Retailing,start=c(2015,1),end=c(2022,12),frequency=12)
US_Unemployment<-ts(VARts$US_Unemployment,start=c(2015,1),end=c(2022,12),frequency=12)
US_Consumer_Confidence<-ts(VARts$US_Consumer_Confidence,start=c(2015,12),end=c(2022,12),frequency=12)
US_Min_Hour_Wage<-ts(VARts$US_Min_Hour_Wage,start=c(2015,1),end=c(2022,12),frequency=12)

a. Visualization

Plot the selected stocks price over the time period from Jan-2015 to Dec-2022.

VARxts<-xts(VARts$CSIQ_Adj_Close,order.by=VARts$Date)
plot(VARxts,main = "Canadian Solar Inc stock price", xlab ="Date",ylab ="Adjusted Close Price (USD)")

# Alternative plotting time series data
ts_plot(CSIQ_Adj_Close)

ts_plot(NonStore_Retailing)

ts_plot(US_Unemployment)

ts_plot(US_Consumer_Confidence)

ts_plot(US_Min_Hour_Wage)

Decompose the stock price and describe the trend and seasonal components of the time series data

# Decomposing Canadian Solar Inc stock price (time series data)
CSIQts<-ts(VARts$CSIQ_Adj_Close,frequency=12,start=c(2015,1))
CSIQdec<-decompose(CSIQts)
plot(CSIQdec)

By decomposing the time series data, we can se an increasing trend starting in 2020. Whe can also see the year has seasonality by analyzing the increasing trend in one of the periods every year.

Do the selected stock price display non-stationary series or stationary series?

# it is important to assess whether the variables under study are stationary or not
adf.test(VARts$CSIQ_Adj_Close) # non-stationary

## 
##  Augmented Dickey-Fuller Test
## 
## data:  VARts$CSIQ_Adj_Close
## Dickey-Fuller = -2.9133, Lag order = 4, p-value = 0.1991
## alternative hypothesis: stationary

adf.test(VARts$NonStore_Retailing) # non-stationary

## 
##  Augmented Dickey-Fuller Test
## 
## data:  VARts$NonStore_Retailing
## Dickey-Fuller = -1.8269, Lag order = 4, p-value = 0.648
## alternative hypothesis: stationary

adf.test(VARts$US_Unemployment) # non-stationary

## 
##  Augmented Dickey-Fuller Test
## 
## data:  VARts$US_Unemployment
## Dickey-Fuller = -2.5755, Lag order = 4, p-value = 0.3386
## alternative hypothesis: stationary

adf.test(VARts$US_Consumer_Confidence) # non-stationary

## 
##  Augmented Dickey-Fuller Test
## 
## data:  VARts$US_Consumer_Confidence
## Dickey-Fuller = -1.7896, Lag order = 4, p-value = 0.6634
## alternative hypothesis: stationary

The selected stock price displays non-stationary series, after aplying the adf test we fail to reject de null Hypothesis of “Time series are non-stationary”.

b. Describing Dynamic Interactions

# plotting time series data
par(mfrow=c(2,3))
plot(VARts$Date,VARts$CSIQ_Adj_Close,type="l",col="blue",lwd=2,xlab="Date",ylab="CSIQ",main="CSIQ")
plot(VARts$Date,VARts$NonStore_Retailing,type="l",col="blue",lwd=2,xlab="Date",ylab="NonStore Retailing",main="NonStore Retailing")
plot(VARts$Date,VARts$US_Unemployment,type="l",col="blue",lwd=2,xlab="Date",ylab="Unemployment",main="Unemployment")
plot(VARts$Date,VARts$US_Consumer_Confidence,type="l",col="blue",lwd=2,xlab="Date",ylab="Consumer Confidence",main="Consumer Confidence")
plot(VARts$Date,VARts$US_Min_Hour_Wage,type="l",col="blue",lwd=2,xlab="Date",ylab="Wage",main="Min Hour Wage")

By only seeing this graphs we can see the changing trend in 2020, we can assumed it was caused by the pandemic.

# Time series plotof dependent variable

VARxts<-xts(VARts$CSIQ_Adj_Close,order.by=VARts$Date)
dygraph(VARxts, main = "Canadian Solar Inc Stock Price",xlab="Date",ylab="Stock Price (USD)") %>%
  dyOptions(colors = RColorBrewer::brewer.pal(4, "Dark2")) %>%
  dyShading(from = "2020-3-1",
            to = "2022-12-1", 
            color = "#FFE6E6")

# Create a data frame with the variables
data <- data.frame(CSIQ_Adj_Close = VARts$CSIQ_Adj_Close,
                   US_Consumer_Confidence = VARts$US_Consumer_Confidence,
                   US_Unemployment = VARts$US_Unemployment,
                   NonStore_Retailing = VARts$NonStore_Retailing)

What is the relationship between Costumer Confidence Index and the selected stock’s price performance?

# Create a data frame with the variables of interest
data <- data.frame(US_Consumer_Confidence = VARts$NonStore_Retailing, CSIQ_Adj_Close = VARts$CSIQ_Adj_Close)

# Calculate correlation between NonStore Retailing and stock price
correlation <- cor(data$US_Consumer_Confidence, data$CSIQ_Adj_Close)

# Print the correlation coefficient
correlation

## [1] 0.6917818

# Create a scatter plot
plot(VARts$US_Consumer_Confidence, VARts$CSIQ_Adj_Close, 
     xlab = "Consumer Confidence Index", ylab = "Selected Stock Price",
     main = "Relationship between Consumer Confidence Index and Stock Price")

# Create line chart for US Consumer Confidence
plot(VARts$US_Consumer_Confidence, type = "l", xlab = "Time", ylab = "US Consumer Confidence", main = "Tendency of US Consumer Confidence")

# Create a scatter plot with two lines of tendency for US Consumer Confidence
ggplot(data, aes(x = US_Consumer_Confidence, y = CSIQ_Adj_Close)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(x = "US Consumer Confidence", y = "CSIQ_Adj_Close", title = "Relationship between US Consumer Confidence and CSIQ_Adj_Close")

## `geom_smooth()` using formula = 'y ~ x'

By analyzing this graphs we can state the relationship between the US Consumer Confidence and the stock price is negative, because the values are higher when the stock price is lower.

What is the relationship between Uneployment and the selected stock’s price performance?

faltante

# Calculate correlation between unemployment and stock price
correlation <- cor(VARts$US_Unemployment, VARts$CSIQ_Adj_Close)

# Print the correlation coefficient
correlation

## [1] 0.1480425

# Create a scatter plot
plot(VARts$US_Unemployment, VARts$CSIQ_Adj_Close, 
     xlab = "Unemployment", ylab = "Selected Stock Price",
     main = "Relationship between Unemployment and Stock Price")

# Create line chart for US Unemployment
plot(VARts$US_Unemployment, type = "l", xlab = "Time", ylab = "US Unemployment", main = "Tendency of US Unemployment")

# Create a scatter plot with two lines of tendency for US Unemployment
ggplot(data, aes(x = US_Unemployment, y = CSIQ_Adj_Close)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(x = "US Unemployment", y = "CSIQ_Adj_Close", title = "Relationship between US Unemployment and CSIQ_Adj_Close")

## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'

By analyzing this graphs we can state the relationship between the US Unemployment and the Canadian Solar stock price aren’t very related, they don’t have a strong relationship.

What is the relationship between NonStore Retailing and the selected stock’s price performance?

# Calculate correlation between NonStore Retailing and stock price
correlation <- cor(VARts$NonStore_Retailing, VARts$CSIQ_Adj_Close, use = "complete.obs")

# Print the correlation coefficient
correlation

## [1] 0.6917818

# Create a scatter ... plot
plot(VARts$NonStore_Retailing, VARts$CSIQ_Adj_Close, 
     xlab = "NonStore Retailing", ylab = "Selected Stock Price",
     main = "Relationship between NonStore Retailing and Stock Price")

# Create line chart for NonStore Retailing
plot(VARts$NonStore_Retailing, type = "l", xlab = "Time", ylab = "NonStore Retailing", main = "Tendency of NonStore Retailing")

# Create a scatter plot with two lines of tendency for NonStore Retailing
ggplot(data, aes(x = NonStore_Retailing, y = CSIQ_Adj_Close)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "green") +
  labs(x = "NonStore Retailing", y = "CSIQ_Adj_Close", title = "Relationship between NonStore Retailing and CSIQ_Adj_Close")

## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'

By analyzing this graphs we can state the relationship between the NonStore Retailing and the stock price is positive, because when the NonStore retailing values and the stock price have a similar trend, as t growshe independent variable, the stock price does too.

c. Estimation & Model Selection

####i. Estimation #### Estimate a VAR_Model that includes at least 1 explanatory factor that might affect each of the selected stocks prices.

### Converting the variables to Time Series Format
NonStore_ts<-ts(VARts$NonStore_Retailing,start=c(2007,1),end=c(2022,12),frequency=12)
Unemployment_ts<-ts(VARts$US_Unemployment,start=c(2007,1),end=c(2022,12),frequency=12)
Consumer_Confidence_ts<-ts(VARts$US_Consumer_Confidence,start=c(2007,1),end=c(2022,12),frequency=12)
Min_Hour_Wage_ts<-ts(VARts$US_Min_Hour_Wage,start=c(2007,1),end=c(2022,12),frequency=12)
CSIQ_ts<-ts(VARts$CSIQ_Adj_Close,start=c(2007,1),end=c(2022,12),frequency=12)

# Lets create a time series dataset
# Canadian Solar Stock price is the dependent variable 

VAR_ts<-cbind(CSIQ_ts,Unemployment_ts,Consumer_Confidence_ts, NonStore_ts)
colnames(VAR_ts)<-cbind("CSIQ","Uneployment","Consumer","NonStore") # se le da nombre a las columnas
head(VAR_ts)

##           CSIQ Uneployment Consumer NonStore
## Jan 2007 22.90         5.7     98.1    39670
## Feb 2007 29.63         5.5     95.4    40668
## Mar 2007 33.39         5.4     93.0    40522
## Apr 2007 35.40         5.4     95.9    40792
## May 2007 32.66         5.6     90.7    40618
## Jun 2007 28.60         5.3     96.1    40925

# This line will automatically generate the preferred lag order based on multiple iterations of the AIC. 

lag_selection<-VARselect(VAR_ts,lag.max=5,type="const", season=12) 
lag_selection$selection

## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      3      1      1      3

lag_selection$criteria

##                   1            2            3            4            5
## AIC(n) 2.292021e+01 2.284980e+01 2.279336e+01 2.279546e+01 2.283009e+01
## HQ(n)  2.336829e+01 2.340991e+01 2.346549e+01 2.357961e+01 2.372626e+01
## SC(n)  2.402604e+01 2.423209e+01 2.445211e+01 2.473067e+01 2.504176e+01
## FPE(n) 9.012545e+09 8.413353e+09 7.970815e+09 8.014608e+09 8.334693e+09

By comparing the AIC values of the 5 different estimations, we can tell that when the time series is in third order base we get a better estimation.

#VAR Model 1
VAR_model1<-VAR(VAR_ts,p=3,type="const",season=12) 
summary(VAR_model1)

## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: CSIQ, Uneployment, Consumer, NonStore 
## Deterministic variables: const 
## Sample size: 189 
## Log Likelihood: -3130.519 
## Roots of the characteristic polynomial:
## 0.9666 0.9666  0.84 0.6032 0.6032 0.5676 0.5676 0.4105 0.3803 0.3803 0.206 0.02168
## Call:
## VAR(y = VAR_ts, p = 3, type = "const", season = 12L)
## 
## 
## Estimation results for equation CSIQ: 
## ===================================== 
## CSIQ = CSIQ.l1 + Uneployment.l1 + Consumer.l1 + NonStore.l1 + CSIQ.l2 + Uneployment.l2 + Consumer.l2 + NonStore.l2 + CSIQ.l3 + Uneployment.l3 + Consumer.l3 + NonStore.l3 + const + sd1 + sd2 + sd3 + sd4 + sd5 + sd6 + sd7 + sd8 + sd9 + sd10 + sd11 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## CSIQ.l1         7.752e-01  7.564e-02  10.248  < 2e-16 ***
## Uneployment.l1 -1.423e-01  2.854e-01  -0.499 0.618573    
## Consumer.l1    -1.513e-01  7.416e-02  -2.040 0.042930 *  
## NonStore.l1    -1.826e-04  6.558e-05  -2.784 0.005996 ** 
## CSIQ.l2         3.238e-02  9.851e-02   0.329 0.742771    
## Uneployment.l2 -7.319e-02  3.684e-01  -0.199 0.842757    
## Consumer.l2    -2.697e-02  9.436e-02  -0.286 0.775425    
## NonStore.l2     2.802e-05  8.581e-05   0.327 0.744425    
## CSIQ.l3        -1.926e-02  7.187e-02  -0.268 0.789057    
## Uneployment.l3  1.008e+00  2.820e-01   3.576 0.000459 ***
## Consumer.l3     2.023e-01  7.799e-02   2.594 0.010341 *  
## NonStore.l3     2.329e-04  6.800e-05   3.425 0.000776 ***
## const          -5.990e+00  5.143e+00  -1.165 0.245798    
## sd1            -1.680e+00  1.290e+00  -1.303 0.194531    
## sd2             1.181e-01  1.294e+00   0.091 0.927416    
## sd3            -1.976e+00  1.306e+00  -1.513 0.132214    
## sd4            -1.939e+00  1.274e+00  -1.522 0.129862    
## sd5            -1.164e-01  1.328e+00  -0.088 0.930282    
## sd6            -7.297e-01  1.326e+00  -0.550 0.582755    
## sd7            -1.566e+00  1.284e+00  -1.220 0.224248    
## sd8            -1.052e+00  1.276e+00  -0.825 0.410772    
## sd9            -3.318e+00  1.286e+00  -2.581 0.010714 *  
## sd10           -1.281e-01  1.282e+00  -0.100 0.920528    
## sd11            1.204e-01  1.274e+00   0.094 0.924875    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 3.539 on 165 degrees of freedom
## Multiple R-Squared: 0.9036,  Adjusted R-squared: 0.8901 
## F-statistic: 67.23 on 23 and 165 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation Uneployment: 
## ============================================ 
## Uneployment = CSIQ.l1 + Uneployment.l1 + Consumer.l1 + NonStore.l1 + CSIQ.l2 + Uneployment.l2 + Consumer.l2 + NonStore.l2 + CSIQ.l3 + Uneployment.l3 + Consumer.l3 + NonStore.l3 + const + sd1 + sd2 + sd3 + sd4 + sd5 + sd6 + sd7 + sd8 + sd9 + sd10 + sd11 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## CSIQ.l1        -3.998e-02  2.224e-02  -1.798  0.07402 .  
## Uneployment.l1  8.503e-01  8.390e-02  10.134  < 2e-16 ***
## Consumer.l1    -5.346e-02  2.180e-02  -2.452  0.01525 *  
## NonStore.l1    -1.606e-05  1.928e-05  -0.833  0.40604    
## CSIQ.l2         3.123e-02  2.896e-02   1.078  0.28242    
## Uneployment.l2 -4.519e-02  1.083e-01  -0.417  0.67705    
## Consumer.l2     6.673e-02  2.774e-02   2.405  0.01726 *  
## NonStore.l2     2.317e-05  2.523e-05   0.919  0.35969    
## CSIQ.l3         1.141e-03  2.113e-02   0.054  0.95701    
## Uneployment.l3  4.096e-02  8.291e-02   0.494  0.62195    
## Consumer.l3     2.136e-03  2.293e-02   0.093  0.92590    
## NonStore.l3     2.562e-06  1.999e-05   0.128  0.89818    
## const          -1.062e+00  1.512e+00  -0.702  0.48349    
## sd1             2.768e-01  3.792e-01   0.730  0.46643    
## sd2            -4.895e-02  3.804e-01  -0.129  0.89777    
## sd3             1.204e-01  3.839e-01   0.314  0.75416    
## sd4             1.346e+00  3.745e-01   3.594  0.00043 ***
## sd5            -7.818e-02  3.905e-01  -0.200  0.84158    
## sd6            -3.414e-02  3.897e-01  -0.088  0.93031    
## sd7            -8.009e-03  3.774e-01  -0.021  0.98309    
## sd8            -1.566e-01  3.752e-01  -0.417  0.67687    
## sd9            -5.025e-02  3.780e-01  -0.133  0.89440    
## sd10            3.146e-02  3.770e-01   0.083  0.93360    
## sd11            1.040e-01  3.747e-01   0.278  0.78161    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 1.04 on 165 degrees of freedom
## Multiple R-Squared: 0.7308,  Adjusted R-squared: 0.6933 
## F-statistic: 19.48 on 23 and 165 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation Consumer: 
## ========================================= 
## Consumer = CSIQ.l1 + Uneployment.l1 + Consumer.l1 + NonStore.l1 + CSIQ.l2 + Uneployment.l2 + Consumer.l2 + NonStore.l2 + CSIQ.l3 + Uneployment.l3 + Consumer.l3 + NonStore.l3 + const + sd1 + sd2 + sd3 + sd4 + sd5 + sd6 + sd7 + sd8 + sd9 + sd10 + sd11 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## CSIQ.l1         7.143e-02  1.044e-01   0.684  0.49463    
## Uneployment.l1 -3.867e-01  3.937e-01  -0.982  0.32742    
## Consumer.l1     8.699e-01  1.023e-01   8.502 1.07e-14 ***
## NonStore.l1    -3.916e-05  9.048e-05  -0.433  0.66575    
## CSIQ.l2        -1.026e-02  1.359e-01  -0.076  0.93991    
## Uneployment.l2  3.269e-01  5.083e-01   0.643  0.52104    
## Consumer.l2    -3.326e-01  1.302e-01  -2.554  0.01154 *  
## NonStore.l2    -2.285e-04  1.184e-04  -1.930  0.05530 .  
## CSIQ.l3        -7.597e-02  9.917e-02  -0.766  0.44469    
## Uneployment.l3  1.234e-01  3.891e-01   0.317  0.75145    
## Consumer.l3     3.002e-01  1.076e-01   2.790  0.00590 ** 
## NonStore.l3     1.880e-04  9.382e-05   2.004  0.04669 *  
## const           1.934e+01  7.096e+00   2.725  0.00712 ** 
## sd1            -1.142e+00  1.779e+00  -0.642  0.52202    
## sd2            -1.614e+00  1.785e+00  -0.904  0.36728    
## sd3            -2.534e+00  1.802e+00  -1.406  0.16147    
## sd4            -3.643e+00  1.758e+00  -2.073  0.03974 *  
## sd5            -2.411e+00  1.833e+00  -1.315  0.19019    
## sd6            -2.209e+00  1.829e+00  -1.208  0.22891    
## sd7            -4.620e+00  1.771e+00  -2.608  0.00993 ** 
## sd8            -3.796e+00  1.761e+00  -2.156  0.03254 *  
## sd9            -2.056e+00  1.774e+00  -1.159  0.24817    
## sd10           -1.805e+00  1.769e+00  -1.020  0.30918    
## sd11           -2.550e+00  1.758e+00  -1.450  0.14894    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 4.882 on 165 degrees of freedom
## Multiple R-Squared: 0.8869,  Adjusted R-squared: 0.8711 
## F-statistic: 56.24 on 23 and 165 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation NonStore: 
## ========================================= 
## NonStore = CSIQ.l1 + Uneployment.l1 + Consumer.l1 + NonStore.l1 + CSIQ.l2 + Uneployment.l2 + Consumer.l2 + NonStore.l2 + CSIQ.l3 + Uneployment.l3 + Consumer.l3 + NonStore.l3 + const + sd1 + sd2 + sd3 + sd4 + sd5 + sd6 + sd7 + sd8 + sd9 + sd10 + sd11 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## CSIQ.l1         9.795e+01  1.184e+02   0.827   0.4093    
## Uneployment.l1  3.956e+02  4.467e+02   0.886   0.3771    
## Consumer.l1    -1.607e+02  1.161e+02  -1.384   0.1681    
## NonStore.l1     8.092e-01  1.026e-01   7.883 4.11e-13 ***
## CSIQ.l2        -1.182e+02  1.542e+02  -0.767   0.4443    
## Uneployment.l2 -1.825e+02  5.766e+02  -0.316   0.7521    
## Consumer.l2     2.894e+02  1.477e+02   1.959   0.0518 .  
## NonStore.l2     3.123e-01  1.343e-01   2.325   0.0213 *  
## CSIQ.l3         4.561e+01  1.125e+02   0.405   0.6857    
## Uneployment.l3  8.596e+01  4.414e+02   0.195   0.8458    
## Consumer.l3    -2.861e+01  1.221e+02  -0.234   0.8150    
## NonStore.l3    -1.087e-01  1.064e-01  -1.021   0.3088    
## const          -1.130e+04  8.050e+03  -1.404   0.1623    
## sd1            -2.525e+03  2.019e+03  -1.251   0.2127    
## sd2            -5.574e+02  2.026e+03  -0.275   0.7835    
## sd3             1.769e+03  2.044e+03   0.865   0.3882    
## sd4             1.893e+03  1.994e+03   0.949   0.3439    
## sd5             5.284e+02  2.079e+03   0.254   0.7997    
## sd6             5.989e+02  2.075e+03   0.289   0.7732    
## sd7             1.019e+02  2.009e+03   0.051   0.9596    
## sd8             8.452e+02  1.998e+03   0.423   0.6728    
## sd9             8.485e+02  2.012e+03   0.422   0.6738    
## sd10            1.583e+03  2.007e+03   0.789   0.4314    
## sd11            1.062e+03  1.995e+03   0.532   0.5953    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 5539 on 165 degrees of freedom
## Multiple R-Squared: 0.9483,  Adjusted R-squared: 0.9411 
## F-statistic: 131.6 on 23 and 165 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##                   CSIQ Uneployment   Consumer   NonStore
## CSIQ          12.52268     0.06989     -2.533     4558.3
## Uneployment    0.06989     1.08245     -1.404     -237.6
## Consumer      -2.53349    -1.40449     23.838   -16703.0
## NonStore    4558.32964  -237.59189 -16702.979 30681823.1
## 
## Correlation matrix of residuals:
##                 CSIQ Uneployment Consumer NonStore
## CSIQ         1.00000     0.01898  -0.1466  0.23255
## Uneployment  0.01898     1.00000  -0.2765 -0.04123
## Consumer    -0.14663    -0.27649   1.0000 -0.61761
## NonStore     0.23255    -0.04123  -0.6176  1.00000

VAR_model1_residuals<-data.frame(residuals(VAR_model1))
adf.test(VAR_model1_residuals$CSIQ)

## Warning in adf.test(VAR_model1_residuals$CSIQ): p-value smaller than printed
## p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  VAR_model1_residuals$CSIQ
## Dickey-Fuller = -6.3646, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

Box.test(VAR_model1_residuals$CSIQ,lag=3,type="Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  VAR_model1_residuals$CSIQ
## X-squared = 4.6545, df = 3, p-value = 0.1989

adf test: p-value<0.05 the residuals of the series are stationary Box-Ljung test: p-value>0.05 there is no serial correlation between residuals

Interpretation of the tendencies of each variable:

It appears that a lag order of 3 has the lowest AIC, HQ, SC, and FPE values. This suggests that a VAR model with a lag order of 3 might provide the best fit for the data. Now, let’s move on to interpreting the tendencies of each variable.

CSIQ (Canadian Solar Stock Price): The CSIQ stock price shows a fluctuating pattern over the observed period from January 2007 to December 2022. The values range from 22.90 to higher values over time. Further analysis, such as forecasting, can provide more insights into the future tendencies of the CSIQ stock price.
Unemployment: The unemployment rate exhibits variations over time. It is important to analyze the specific values and trends to understand the overall tendency. However, without the specific data points, it is difficult to provide a detailed interpretation.
Consumer Confidence: The consumer confidence index shows fluctuations over the observed period. It is important to analyze the specific values and trends to understand the overall tendency. However, without the specific data points, it is difficult to provide a detailed interpretation.
NonStore Retailing: The non-store retail sales exhibit variations over time. It is important to analyze the specific values and trends to understand the overall tendency. However, without the specific data points, it is difficult to provide a detailed interpretation.

Interpretation of model:

Augmented Dickey-Fuller (ADF) Test: The ADF test is used to determine if a time series is stationary. In this case, you have applied the ADF test to the residuals of the CSIQ variable from VAR_model1. The test result indicates a p-value of 0.01, which is smaller than the significance level of 0.05. Therefore, you can reject the null hypothesis of non-stationarity and conclude that the residuals of CSIQ are stationary.
Ljung-Box Test: The Ljung-Box test is used to detect the presence of serial autocorrelation in the residuals. In this case, you have applied the Ljung-Box test to the residuals of the CSIQ variable from VAR_model1, considering a lag of 3. The test result shows an X-squared value of 4.6545 and a p-value of 0.1989. Since the p-value is greater than the significance level of 0.05, we fail to reject the null hypothesis of no serial autocorrelation. This suggests that there is no significant evidence of serial autocorrelation in the residuals of CSIQ.

Overall, based on the diagnostic tests conducted on the residuals of VAR_model1, the findings indicate that the residuals of the CSIQ variable are stationary and do not exhibit significant serial autocorrelation.

d. Forecasting

- Based on the selected VAR_Model, forecast the stock price for the next 5 months.

- Display the forecast in a time series plot.

- Briefly comment on your estimated forecast and the FY 2022 stock price of the selected renewable energy company.

# forecasting
forecast_1<-predict(VAR_model1,n.ahead=12,ci=0.95) ### forecast for the next 12 months
fanchart(forecast_1,names="CSIQ",main="Canadian Solar Inc Stock Price",xlab="Time Period",ylab="Stock Price")

forecast_1

## $CSIQ
##           fcst    lower    upper        CI
##  [1,] 30.76823 23.83243 37.70403  6.935803
##  [2,] 31.89322 23.14311 40.64334  8.750116
##  [3,] 31.89766 22.08557 41.70975  9.812087
##  [4,] 31.09085 20.66019 41.52151 10.430660
##  [5,] 31.56141 20.51248 42.61034 11.048927
##  [6,] 31.61328 19.86576 43.36081 11.747524
##  [7,] 32.01645 19.56373 44.46917 12.452721
##  [8,] 32.77248 19.63433 45.91064 13.138151
##  [9,] 30.94743 17.15532 44.73953 13.792106
## [10,] 31.76319 17.36680 46.15958 14.396389
## [11,] 31.98851 17.04217 46.93485 14.946340
## [12,] 32.20925 16.76126 47.65723 15.447986
## 
## $Uneployment
##           fcst       lower    upper       CI
##  [1,] 3.711934  1.67277368 5.751095 2.039161
##  [2,] 3.604494  0.79356604 6.415423 2.810928
##  [3,] 3.723435  0.54166120 6.905209 3.181774
##  [4,] 4.965030  1.57445303 8.355606 3.390577
##  [5,] 4.685229  1.17663929 8.193820 3.508590
##  [6,] 4.400633  0.82335042 7.977915 3.577282
##  [7,] 4.187237  0.56716801 7.807306 3.620069
##  [8,] 3.974946  0.32515690 7.624735 3.649789
##  [9,] 3.796865  0.12506147 7.468669 3.671804
## [10,] 3.673315 -0.01601446 7.362645 3.689330
## [11,] 3.600599 -0.10366092 7.304859 3.704260
## [12,] 3.553632 -0.16366224 7.270925 3.717294
## 
## $Consumer
##           fcst    lower    upper        CI
##  [1,] 62.81400 53.24459 72.38341  9.569411
##  [2,] 62.30405 49.32092 75.28719 12.983133
##  [3,] 63.46586 48.82106 78.11065 14.644794
##  [4,] 63.48754 47.45614 79.51893 16.031392
##  [5,] 63.49662 46.21195 80.78128 17.284668
##  [6,] 64.68612 46.43123 82.94101 18.254889
##  [7,] 63.74095 44.69651 82.78539 19.044440
##  [8,] 63.43953 43.70125 83.17781 19.738279
##  [9,] 65.87223 45.51355 86.23091 20.358683
## [10,] 67.68555 46.77135 88.59975 20.914200
## [11,] 67.82679 46.40604 89.24754 21.420750
## [12,] 70.77883 48.88915 92.66851 21.889678
## 
## $NonStore
##            fcst    lower    upper       CI
##  [1,] 102869.70 92013.23 113726.2 10856.47
##  [2,] 101222.45 86443.78 116001.1 14778.67
##  [3,] 100450.02 82359.05 118541.0 18090.96
##  [4,]  99728.75 79094.99 120362.5 20633.76
##  [5,]  98497.82 75647.62 121348.0 22850.21
##  [6,]  97217.09 72491.48 121942.7 24725.61
##  [7,]  95146.34 68772.13 121520.6 26374.21
##  [8,]  94441.77 66596.71 122286.8 27845.06
##  [9,]  93067.43 63888.85 122246.0 29178.58
## [10,]  91944.09 61546.96 122341.2 30397.13
## [11,]  90877.53 59356.54 122398.5 31520.99
## [12,]  89004.48 56440.25 121568.7 32564.23

# Granger causality testing each variable against all the others.
# There could be a unidirectional, bidirectional, or no causality relationships between variables.
granger_CSIQ<-causality(VAR_model1,cause="CSIQ")
granger_CSIQ

## $Granger
## 
##  Granger causality H0: CSIQ do not Granger-cause Uneployment Consumer
##  NonStore
## 
## data:  VAR object VAR_model1
## F-Test = 0.62036, df1 = 9, df2 = 660, p-value = 0.7802
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: CSIQ and Uneployment Consumer
##  NonStore
## 
## data:  VAR object VAR_model1
## Chi-squared = 9.8459, df = 3, p-value = 0.01992

Interpretation of the forecast of CSIQ Stock Price:

The forecasted values for the next 12 months are provided for each variable in the VAR model. The forecasted values include the point estimate (fcst), lower and upper bounds (lower and upper), and the confidence interval (CI).

Interpretation of the Forecast:

CSIQ: The forecasted stock price for CSIQ shows an increasing trend over the next 12 months, with the estimated values ranging from 30.77 to 70.78. The confidence interval indicates the uncertainty associated with the forecasted values.
Unemployment: The forecasted unemployment rate shows a decreasing trend over the next 12 months, with the estimated values ranging from 3.55% to 7.81%. Consumer Confidence: The forecasted consumer confidence index shows a fluctuating pattern over the next 12 months, with the estimated values ranging from 62.30 to 70.78. The confidence interval indicates the uncertainty associated with the forecasted values.
NonStore: The forecasted non-store retail sales show an increasing trend over the next 12 months, with the estimated values ranging from 89,004.48 to 121,942.7. The confidence interval indicates the uncertainty associated with the forecasted values.

Granger Causality Test:

The Granger causality test is performed to determine if there is a causal relationship between the variables in the VAR model. In this case, the test is conducted between CSIQ and Unemployment, Consumer, and NonStore variables.

CSIQ and Unemployment: The Granger causality test results indicate that there is no significant evidence to suggest that CSIQ Granger-causes Unemployment, Consumer, or NonStore variables. The p-value of 0.7802 suggests that there is no statistically significant causal relationship between CSIQ and Unemployment,
Consumer, or NonStore. CSIQ and Consumer: The Granger causality test results indicate that there is a statistically significant causal relationship between CSIQ and Consumer variables. The chi-squared test statistic of 9.8459 with 3 degrees of freedom and a p-value of 0.01992 suggests that CSIQ Granger-causes Consumer.

# Transform non-stationary time series variables to stationary 
diff_CSIQ<-diff(log(CSIQ_Adj_Close))
diff_consumer<-diff(log(US_Consumer_Confidence))
diff_NonStore<-diff(log(NonStore_Retailing))
diff_unemployment<-diff(log(US_Unemployment))

# plotting differenced time series variables 
par(mfrow=c(2,2))
plot(diff_consumer)
plot(diff_NonStore)
plot(diff_unemployment)
plot(diff_CSIQ)

Now by observing the plotted variables after applying differences we convert them to stationary. This we can see by their behavior over the years.

# it is important to assess whether the variables under study are stationary or not
adf.test(diff_consumer) # non-stationary

## Warning in adf.test(diff_consumer): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_consumer
## Dickey-Fuller = -5.8504, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

adf.test(diff_NonStore) # non-stationary

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_NonStore
## Dickey-Fuller = -3.965, Lag order = 4, p-value = 0.0142
## alternative hypothesis: stationary

adf.test(diff_CSIQ) # non-stationary

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_CSIQ
## Dickey-Fuller = -3.9686, Lag order = 4, p-value = 0.01403
## alternative hypothesis: stationary

adf.test(diff_unemployment) # non-stationary

## Warning in adf.test(diff_unemployment): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_unemployment
## Dickey-Fuller = -4.6314, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

The p-value of all the variables now are <0.05, which means we reject the H0, the variables are stationary