# ECO 5428 Time Series Analysis - Problem Set 1
# Load the quantmod package
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# ============================================================================
# Retrieve Walmart (WMT) stock data from Yahoo Finance (2010-2019)
# ============================================================================
getSymbols("WMT", src = "yahoo", from = "2010-01-01", to = "2019-12-31")
## [1] "WMT"
# ============================================================================
# Part (a): How many variables and observations are in the WMT data?
# ============================================================================
ncol(WMT) # Number of variables
## [1] 6
nrow(WMT) # Number of observations
## [1] 2515
# ============================================================================
# Part (b): Daily close prices analysis
# ============================================================================
# Extract close prices
p <- WMT$WMT.Close
# Time series plot of close prices (Not Stationary)
plot(p, main = "Walmart (WMT) Daily Close Prices (2010-2019)",
ylab = "Price ($)", xlab = "Date", col = "blue")

# ACF of close prices
acf(as.numeric(p), main = "ACF of WMT Close Prices", lag.max = 20)

# Get ACF values for first four lags (Very strong serial dependence)
acf_p <- acf(as.numeric(p), plot = FALSE)
acf_p$acf[2] # rho_1
## [1] 0.9970167
acf_p$acf[3] # rho_2
## [1] 0.9941237
acf_p$acf[4] # rho_3
## [1] 0.9912814
acf_p$acf[5] # rho_4
## [1] 0.9884424
# ============================================================================
# Part (c): Daily log returns analysis
# ============================================================================
# Generate daily log returns
rd <- diff(log(p))
# Remove missing values
rd <- na.omit(rd)
length(rd) # Number of daily return observations
## [1] 2514
# Time series plot of daily returns (Stationary)
plot(rd, main = "Walmart (WMT) Daily Log Returns (2010-2019)",
ylab = "Log Return", xlab = "Date", col = "darkgreen")

# ACF of daily returns (No Serial dependence)
acf(as.numeric(rd), main = "ACF of WMT Daily Log Returns", lag.max = 20)

# Get ACF values for first four lags
acf_rd <- acf(as.numeric(rd), plot = FALSE)
acf_rd$acf[2] # rho_1
## [1] -0.0432239
acf_rd$acf[3] # rho_2
## [1] -0.01743386
acf_rd$acf[4] # rho_3
## [1] -0.003433017
acf_rd$acf[5] # rho_4
## [1] -0.02404845
# ============================================================================
# Part (d): Summary statistics of daily returns
# ============================================================================
# First 5 observations
head(rd, 5)
## WMT.Close
## 2010-01-05 -0.010007513
## 2010-01-06 -0.002237606
## 2010-01-07 0.000559871
## 2010-01-08 -0.005050052
## 2010-01-11 0.016366353
# Summary statistics
mean(rd) # Mean
## [1] 0.0003139399
median(rd) # Median
## [1] 0.0005703705
sd(rd) # Standard Deviation
## [1] 0.0108949
# ============================================================================
# Part (e): Weekly and Monthly Returns
# ============================================================================
# Calculate weekly and monthly log returns
rw <- apply.weekly(rd, FUN = sum)
rm <- apply.monthly(rd, FUN = sum)
# Part (e)(i): Number of observations
length(rw) # Weekly returns observations
## [1] 522
length(rm) # Monthly returns observations
## [1] 120
# Part (e)(ii): Time series plots in 3 rows x 1 column
par(mfrow = c(3, 1))
plot(rd, main = "Daily Returns", ylab = "Log Return", xlab = "Date", col = "darkgreen")
plot(rw, main = "Weekly Returns", ylab = "Log Return", xlab = "Date", col = "blue")
plot(rm, main = "Monthly Returns", ylab = "Log Return", xlab = "Date", col = "red")

par(mfrow = c(1, 1)) # Restore to single plot layout
# Part (e)(iii): ACFs of weekly and monthly returns
acf(as.numeric(rw), main = "ACF of Weekly Log Returns", lag.max = 20)

acf(as.numeric(rm), main = "ACF of Monthly Log Returns", lag.max = 20)

# Get ACF values for reporting
acf_rw <- acf(as.numeric(rw), plot = FALSE)
acf_rm <- acf(as.numeric(rm), plot = FALSE)
# Weekly ACF values
acf_rw$acf[2] # rho_1
## [1] -0.09798199
acf_rw$acf[3] # rho_2
## [1] 0.06652177
acf_rw$acf[4] # rho_3
## [1] -0.04358709
acf_rw$acf[5] # rho_4
## [1] 0.03360296
# Monthly ACF values
acf_rm$acf[2] # rho_1
## [1] -0.03016586
acf_rm$acf[3] # rho_2
## [1] 0.1036199
acf_rm$acf[4] # rho_3
## [1] -0.07480843
acf_rm$acf[5] # rho_4
## [1] -0.1150897
# Part (e)(iv): Ljung-Box tests (10 lags)
# H0: The first 10 autocorrelations are jointly equal to zero (no serial correlation)
# H1: At least one autocorrelation is different from zero (serial correlation exists)
lb_daily <- Box.test(rd, lag = 10, type = "Ljung-Box")
lb_daily
##
## Box-Ljung test
##
## data: rd
## X-squared = 12.329, df = 10, p-value = 0.2636
lb_weekly <- Box.test(rw, lag = 10, type = "Ljung-Box")
lb_weekly
##
## Box-Ljung test
##
## data: rw
## X-squared = 15.358, df = 10, p-value = 0.1195
lb_monthly <- Box.test(rm, lag = 10, type = "Ljung-Box")
lb_monthly
##
## Box-Ljung test
##
## data: rm
## X-squared = 6.9229, df = 10, p-value = 0.7327