This report is a summary of lesson by David S.Matteson, Data Camp
# Print the Nile dataset
print(Nile)
## Time Series:
## Start = 1871
## End = 1970
## Frequency = 1
## [1] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 995 935 1110 994 1020
## [16] 960 1180 799 958 1140 1100 1210 1150 1250 1260 1220 1030 1100 774 840
## [31] 874 694 940 833 701 916 692 1020 1050 969 831 726 456 824 702
## [46] 1120 1100 832 764 821 768 845 864 862 698 845 744 796 1040 759
## [61] 781 865 845 944 984 897 822 1010 771 676 649 846 812 742 801
## [76] 1040 860 874 848 890 744 749 838 1050 918 986 797 923 975 815
## [91] 1020 906 901 1170 912 746 919 718 714 740
# List the number of observations in the Nile dataset
length(Nile)
## [1] 100
# Display the first 10 elements of the Nile dataset
head(Nile, 10)
## [1] 1120 1160 963 1210 1160 1160 813 1230 1370 1140
# Display the last 12 elements of the Nile dataset
tail(Nile, 12)
## [1] 975 815 1020 906 901 1170 912 746 919 718 714 740
# Plot the Nile data with xlab and ylab arguments
plot(Nile, xlab = "Year", ylab = "River Volume (1e9 m^{3})")
# Plot the Nile data with xlab, ylab, main, and type arguments
plot(Nile, xlab = "Year", ylab = "River Volume (1e9 m^{3})", main = "Annual River Nile Volume at Aswan, 1871-1970", type ="b")
ts(start = ..., frequency = ...)is.ts()data_vector <- c(10, 6, 11, 8, 10, 3, 6, 9)
time_series <- ts(data_vector)
plot(time_series)
time_series <- ts(data_vector, start = 2001, frequency = 1)
plot(time_series)
head(EuStockMarkets)
## DAX SMI CAC FTSE
## [1,] 1628.75 1678.1 1772.8 2443.6
## [2,] 1613.63 1688.5 1750.5 2460.2
## [3,] 1606.51 1678.6 1718.0 2448.2
## [4,] 1621.04 1684.1 1708.1 2470.4
## [5,] 1618.16 1686.6 1723.1 2484.7
## [6,] 1610.61 1671.6 1714.3 2466.8
# Check whether EuStockMarkets is a ts object
is.ts(EuStockMarkets)
## [1] TRUE
# View the start, end, and frequency of EuStockMarkets
start(EuStockMarkets)
## [1] 1991 130
end(EuStockMarkets)
## [1] 1998 169
frequency(EuStockMarkets)
## [1] 260
# Generate a simple plot of EuStockMarkets
plot(EuStockMarkets)
# Use ts.plot with EuStockMarkets
ts.plot(EuStockMarkets, col = 1:4, xlab = "Year", ylab = "Index Value", main = "Major European Stock Indices, 1991-1998")
# Add a legend to your ts.plot
legend("topleft", colnames(EuStockMarkets), lty = 1, col = 1:4, bty = "n")
log()
diff(lag = ...)
lag: can remove periodic trends(called
seasonal difference transformation)diff를 적용하면 lag 만큼 개수가
적어짐p: the autoregressive orderd: the order of integration (or differencing)q: the moving average orderWhite Noise(WN) is the simplest example of a stationary
process
A weak white noise process has:
arima.sim(model = list(order = c(0, 0, 0)), n = ..., mean = ..., sd = ...):
Simulating WNarima(x, order = c(0, 0, 0)): Estimating WN로 mean, sd
추정치를 반환# Simulate a WN model with list(order = c(0, 0, 0))
white_noise <- arima.sim(model = list(order = c(0, 0, 0)), n = 100)
# Plot your white_noise data
ts.plot(white_noise)
# Simulate from the WN model with: mean = 100, sd = 10
white_noise_2 <- arima.sim(model = list(order = c(0, 0, 0)), n = 100, mean = 100, sd = 10)
# Plot your white_noise_2 data
ts.plot(white_noise_2)
# Fit the WN model to y using the arima command
arima(white_noise, order = c(0, 0, 0))
##
## Call:
## arima(x = white_noise, order = c(0, 0, 0))
##
## Coefficients:
## intercept
## -0.2306
## s.e. 0.1083
##
## sigma^2 estimated as 1.172: log likelihood = -149.85, aic = 303.7
# Calculate the sample mean and sample variance of y
mean(white_noise)
## [1] -0.2305964
var(white_noise)
## [1] 1.184274
Random Walk(RW) is a simple example of a non-statinary
process
A random walk has:
The random walk recursion:
\(Today = Yesterday + Noise\)
More formally:
\(Y_t = Y_{t-1} + \epsilon_t\)
where \(\epsilon_t\) is mean zero white noise (WN)
diff(Y) is WN
The random walk with a drift:
\(Today = Constant + Yesterday + Noise\)
More formally:
\(Y_t = c + Y_{t-1} + \epsilon_t\)
where \(\epsilon_t\) is mean zero white noise (WN)
# Generate a RW model with a drift uing arima.sim
rw_drift <- arima.sim(model = list(order = c(0, 1, 0)), n = 100, mean = 1)
# Plot rw_drift
ts.plot(rw_drift)
# Calculate the first difference series
rw_drift_diff <- diff(rw_drift)
# Plot rw_drift_diff
ts.plot(rw_drift_diff)
# Now fit the WN model to the differenced data
model_wn <- arima(rw_drift_diff, order = c(0, 0, 0))
model_wn
##
## Call:
## arima(x = rw_drift_diff, order = c(0, 0, 0))
##
## Coefficients:
## intercept
## 0.9340
## s.e. 0.1115
##
## sigma^2 estimated as 1.243: log likelihood = -152.75, aic = 309.5
# Store the value of the estimated time trend (intercept)
int_wn <- model_wn$coef
# Plot the original random_walk data
ts.plot(rw_drift, main = "with Drift")
# Use abline(0, ...) to add time trend to the figure
abline(0, int_wn)
Observed time series:
Weak satationary:mean, vairance, covariance constant
over time
\(Y_1, Y_2, ...\) is a weekly
stationary process if:
\(Cov(Y_2,Y_5) = Cov(Y_7,Y_{10})\)
since each pair is separated by three units of time.
Many financial time series do not exhibit stationarity, however:
대부분의 금융 시계열 데이터들은 추세를 가지기 때문에 non-stationary를 보이지만 차분(differncing)을 사용하면 stationary를 보일 수 있음
The white noise (WN) and random walk (RW) models are very closely related. However, only the RW is always non-stationary, both with and without a drift term.
cumsum()# Use arima.sim() to generate WN data
white_noise <- arima.sim(model = list(order = c(0, 0, 0)), n = 100)
# Use cumsum() to convert your WN data to RW
random_walk <- cumsum(white_noise)
# Use arima.sim() to generate WN drift data
wn_drift <- arima.sim(model = list(order = c(0, 0, 0)), n = 100, mean = 0.4)
# Use cumsum() to convert your WN drift data to RW
rw_drift <- cumsum(wn_drift)
# Plot all four data objects
plot.ts(cbind(white_noise, random_walk, wn_drift, rw_drift))
The change in appearance between daily prices and daily returns is typically substantial, while the difference between daily returns and log returns is usually small.
diff(log()): 로그 수익률 반환plot(EuStockMarkets)
returns <- EuStockMarkets[-1,] / EuStockMarkets[-1860,] -1
# Convert returns to ts
returns <- ts(returns, start = c(1991, 130), frequency = 260)
# Plot returns
plot(returns)
# Use this code to convert prices to log returns
logreturns <- diff(log(EuStockMarkets))
# Plot logreturns
plot(logreturns)
Because of these extreme returns, the distribution of daily asset returns is not normal, but heavy-tailed, and sometimes skewed.
hist()qqnorm()returns_percent <- returns * 100
# Generate means from eu_percentreturns
colMeans(returns_percent)
## DAX SMI CAC FTSE
## 0.07052174 0.08609470 0.04979471 0.04637479
# Use apply to calculate sample variance from eu_percentreturns
apply(returns_percent, MARGIN = 2, FUN = var)
## DAX SMI CAC FTSE
## 1.0569648 0.8523711 1.2159091 0.6344767
# Use apply to calculate standard deviation from eu_percentreturns
apply(returns_percent, MARGIN = 2, FUN = sd)
## DAX SMI CAC FTSE
## 1.0280879 0.9232394 1.1026827 0.7965405
# # Display histogram and normal quantile plots
# par(mfrow = c(2,2))
# apply(returns_percent, MARGIN = 2, FUN = hist, main = "", xlab = "Percentage Return")
# par(mfrow = c(2,2))
# apply(returns_percent, MARGIN = 2, FUN = qqnorm, main = "")
# qqline(returns_percent)
pairs(): make a scatterplotpairs(EuStockMarkets)
pairs(logreturns)
cov()cor()acf(x = ..., lag.max = ..., plot = T/F)Autocorrelations or lagged correlations are:
length = n & lag = k then,
length = n - k인 vector 2개 생성 가능
x[-(1:k)]x[-(n-k+1:n)]x <- rw_drift
n <- 100
# Define x_t0 as x[-1]
x_t0 <- x[-1]
# Define x_t1 as x[-n]
x_t1 <- x[-n]
# Confirm that x_t0 and x_t1 are (x[t], x[t-1]) pairs
head(cbind(x_t0, x_t1))
## x_t0 x_t1
## [1,] 0.3254267 0.1124751
## [2,] 2.9014636 0.3254267
## [3,] 3.9389896 2.9014636
## [4,] 3.1284810 3.9389896
## [5,] 3.5280729 3.1284810
## [6,] 3.3127873 3.5280729
# Plot x_t0 and x_t1
plot(x_t0, x_t1)
# View the correlation between x_t0 and x_t1
cor(x_t0, x_t1)
## [1] 0.9981547
# Use acf with x
acf(x, lag.max = 1, plot = FALSE)
##
## Autocorrelations of series 'x', by lag
##
## 0 1
## 1.00 0.97
# Confirm that difference factor is (n-1)/n
cor(x_t1, x_t0) * (n-1)/n
## [1] 0.9881731
acf(x)
acf(white_noise)
The Autoregressive(AR) recursion:
\(Today = Constant + Slope * Yesterday + Noise\)
Mean centered version:
(Today - Mean) = Slope * (Yesterday - Mean) + Noise
More formally:
\(Y_t - \mu = \phi(Y_{t-1}-\mu) + \epsilon_t\)
where \(\epsilon_t\) is mean zero white noise(WN)
arima.sim(model = list(ar = phi), n = ...)# Simulate an AR model with 0.5 slope
x <- arima.sim(model = list(ar = 0.5), n = 100)
# Simulate an AR model with 0.9 slope
y <- arima.sim(model = list(ar = 0.9), n = 100)
# Simulate an AR model with -0.75 slope
z <- arima.sim(model = list(ar = -0.75), n = 100)
# Plot your simulated data
plot.ts(cbind(x, y, z))
# Calculate the ACF for x
acf(x)
# Calculate the ACF for y
acf(y)
# Calculate the ACF for z
acf(z)
Persistence is defined by a high correlation between an observation and its lag, while anti-persistence is defined by a large amount of variation between an observation and its lag.
The random walk (RW) model is a special case of the autoregressive (AR) model, in which the slope parameter is equal to 1
# Simulate and plot AR model with slope 0.9
x <- arima.sim(model = list(ar = 0.9), n = 200)
ts.plot(x)
acf(x)
# Simulate and plot AR model with slope 0.98
y <- arima.sim(model = list(ar = 0.98), n = 200)
ts.plot(y)
acf(y)
# Simulate and plot RW model
z <- arima.sim(model = list(order = c(0,1,0)), n = 200)
ts.plot(z)
acf(z)
AR processes: inflation rate with Mishkin dataset
head(Mishkin)
## pai1 pai3 tb1 tb3 cpi
## [1,] -3.552289 1.129370 1.100854 1.129406 23.5
## [2,] 5.247540 4.001566 1.125513 1.137254 23.6
## [3,] 1.692860 4.492160 1.115715 1.142319 23.6
## [4,] 5.064298 7.817513 1.146380 1.177902 23.7
## [5,] 6.719322 9.433580 1.158520 1.167777 23.8
## [6,] 11.668920 9.976068 1.151104 1.167652 24.1
inflation <- as.ts(Mishkin[, 1])
ts.plot(inflation)
acf(inflation)
ACF를 보면 상당히 persistent한 모습을 보이고 있으며
lag가 증가함에 따라 decay하고 있다. 이러한 데이터에 AR model이
적합하다.
AR_inflation <- arima(inflation, order = c(1, 0, 0))
print(AR_inflation)
##
## Call:
## arima(x = inflation, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.5960 3.9745
## s.e. 0.0364 0.3471
##
## sigma^2 estimated as 9.713: log likelihood = -1255.05, aic = 2516.09
\(Y_t - \mu = \phi(Y_{t-1}-\mu) + \epsilon_t\)
\(\hat{Y_t} = \hat{\mu} + \hat{\phi}(Y_{t-1}-\hat{\mu})\)
\(\hat{\epsilon_t} = Y_t - \hat{Y_t}\)
residuals()predict(..., n.ahead = ...)ts.plot(inflation)
AR_inflation_fitted <- inflation - residuals(AR_inflation)
points(AR_inflation_fitted, type = "l", col = "red", lty = 2)
predict(AR_inflation)
## $pred
## Jan
## 1991 1.605797
##
## $se
## Jan
## 1991 3.116526
predict(AR_inflation, n.ahead = 6)
## $pred
## Jan Feb Mar Apr May Jun
## 1991 1.605797 2.562810 3.133165 3.473082 3.675664 3.796398
##
## $se
## Jan Feb Mar Apr May Jun
## 1991 3.116526 3.628023 3.793136 3.850077 3.870101 3.877188
Nile
dataset# Fit an AR model to Nile
AR_fit <- arima(Nile, order = c(1, 0, 0))
print(AR_fit)
##
## Call:
## arima(x = Nile, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.5063 919.5685
## s.e. 0.0867 29.1410
##
## sigma^2 estimated as 21125: log likelihood = -639.95, aic = 1285.9
# Use predict() to make a 1-step forecast
predict_AR <- predict(AR_fit)
# Obtain the 1-step forecast using $pred[1]
predict_AR$pred[1]
## [1] 828.6576
# Use predict to make 1-step through 10-step forecasts
predict(AR_fit, n.ahead = 10)
## $pred
## Time Series:
## Start = 1971
## End = 1980
## Frequency = 1
## [1] 828.6576 873.5426 896.2668 907.7715 913.5960 916.5448 918.0377 918.7935
## [9] 919.1762 919.3699
##
## $se
## Time Series:
## Start = 1971
## End = 1980
## Frequency = 1
## [1] 145.3439 162.9092 167.1145 168.1754 168.4463 168.5156 168.5334 168.5380
## [9] 168.5391 168.5394
# Run to plot the Nile series plus the forecast and 95% prediction intervals
ts.plot(Nile, xlim = c(1871, 1980))
AR_forecast <- predict(AR_fit, n.ahead = 10)$pred
AR_forecast_se <- predict(AR_fit, n.ahead = 10)$se
points(AR_forecast, type = "l", col = 2)
points(AR_forecast - 2*AR_forecast_se, type = "l", col = 2, lty = 2)
points(AR_forecast + 2*AR_forecast_se, type = "l", col = 2, lty = 2)
The simple moving average(MA) model:
\(Today = Mean + Noise + Slope * (Yesterday's Noise)\)
More formally:
\(Y_t = \mu + \epsilon_t + \theta_{\epsilon_{t-1}}\)
where \(\epsilon_t\) is mean zero
white noise(WN)
Three parameters:
# Generate MA model with slope 0.5
x <- arima.sim(model = list(ma = 0.5), n = 100)
# Generate MA model with slope 0.9
y <- arima.sim(model = list(ma = 0.9), n = 100)
# Generate MA model with slope -0.5
z <- arima.sim(model = list(ma = -0.5), n = 100)
# Plot all three models together
plot.ts(cbind(x, y, z))
# Calculate ACF for x
acf(x)
# Calculate ACF for y
acf(y)
# Calculate ACF for z
acf(z)
MA processes: inflation rate with Mishkin dataset
head(Mishkin)
## pai1 pai3 tb1 tb3 cpi
## [1,] -3.552289 1.129370 1.100854 1.129406 23.5
## [2,] 5.247540 4.001566 1.125513 1.137254 23.6
## [3,] 1.692860 4.492160 1.115715 1.142319 23.6
## [4,] 5.064298 7.817513 1.146380 1.177902 23.7
## [5,] 6.719322 9.433580 1.158520 1.167777 23.8
## [6,] 11.668920 9.976068 1.151104 1.167652 24.1
inflation <- as.ts(Mishkin[, 1])
inflation_changes <- diff(inflation)
ts.plot(inflation, main = "inflation changes")
ts.plot(inflation_changes, main = "inflation changes: more mean reverting")
Plot the series and its sample ACF:
acf(inflation_changes, lag.max = 24)
MA_inflation_changes <- arima(inflation_changes,
order = c(0, 0, 1))
print(MA_inflation_changes)
##
## Call:
## arima(x = inflation_changes, order = c(0, 0, 1))
##
## Coefficients:
## ma1 intercept
## -0.7932 0.0010
## s.e. 0.0355 0.0281
##
## sigma^2 estimated as 8.882: log likelihood = -1230.85, aic = 2467.7
1 lag autocorr이 굉장히 큰 negative를 보이고 있고 나머지 autocorr은 모두 0에 수렴하므로 MA model에 적합하다.
\(Y_t = \mu + \epsilon_t + \theta_{\epsilon_{t-1}}\)
\(\hat{Y_t} = \hat{\mu} + \hat{\theta_\hat{\epsilon_{t-1}}}\)
\(\hat{\epsilon_t} = Y_t - \hat{Y_t}\)
ts.plot(inflation_changes)
MA_inflation_changes_fitted <-
inflation_changes - residuals(MA_inflation_changes)
points(MA_inflation_changes_fitted, type = "l", col = "red", lty = 2)
predict(MA_inflation_changes)
## $pred
## Jan
## 1991 4.831632
##
## $se
## Jan
## 1991 2.980203
predict(MA_inflation_changes, n.ahead = 6)
## $pred
## Jan Feb Mar Apr May Jun
## 1991 4.831631501 0.001049346 0.001049346 0.001049346 0.001049346 0.001049346
##
## $se
## Jan Feb Mar Apr May Jun
## 1991 2.980203 3.803826 3.803826 3.803826 3.803826 3.803826
AIC()BIC()