packages <- c("ggthemes", "knitr", "kableExtra", "scales", "ggforce", "ggsci",
              "broom", "lubridate", "fBasics", "tidyquant", "tidyverse", "sweep")
sapply(packages, require, character.only = TRUE, quietly = TRUE)
options(knitr.table.format = "html", knitr.kable.NA = "")
theme_set(theme_tufte(base_size = 14) +
                theme(panel.border = element_rect(fill = NA),
                      panel.grid.major = element_line(color = "gray78"),
                      legend.background = element_rect(),
                      legend.position = "top",
                      strip.background = element_rect(fill = "grey88")))
url <- "https://www.sedlabanki.is/xmltimeseries/Default.aspx?DagsFra=1999-01-05&DagsTil=2018-09-28&TimeSeriesID=4064&Type=csv"
data <- read_delim(url, delim = ";", col_names = as.character(seq(1:8))) %>%
      select(7:8) %>%
      set_names(c("Date", "Series")) %>%
      mutate(Date = mdy_hms(Date))
data %>%
      ggplot(aes(Date, Series)) +
      geom_line() +
      scale_y_log10(breaks = pretty_breaks(8))

1. hluti: Ávöxtun

# Hér kemur R-kóðablokk.
# Nauðsynlegt er að hafa mismunandi heiti á öllum blokkum í .Rmd skránni.
# T.d. heitir þessi blokk 'return'.
# basicStats(...)
data <- data %>%
      mutate(Log = c(NA, diff(log(Series))),
             Simple = exp(Log) - 1)
data %>%
      gather(Type, Return, Log, Simple) %>%
      ggplot(aes(Date, Return)) +
      geom_line() +
      facet_grid("Type") +
      scale_y_continuous(breaks = pretty_breaks(8), labels = percent) +
      coord_cartesian(ylim = c(-0.25, 0.25))

data %>%
      na.omit %>%
      summarize(N = n() - 1,
                Væntigildi = mean(Log),
                Dreifni = var(Log),
                Staðalfrávik = sqrt(Dreifni),
                Skeifni = sum((Log - Væntigildi)^3) / (N * Staðalfrávik^3),
                `Umram reisni` = sum((Log - Væntigildi)^4) / (N * Staðalfrávik^4) - 3) %>%
      gather(Stat, Value) %>%
      kable(format = "html") %>%
      kable_styling(bootstrap_options = c("striped", "hover"))
Stat Value
N 4919.0000000
Væntigildi 0.0000928
Dreifni 0.0000701
Staðalfrávik 0.0083728
Skeifni 1.5059743
Umram reisni 202.8778739

2. hluti: Dreifing

data %>%
      na.omit %>%
      summarize(N = n(),
                Væntigildi = mean(Log),
                Dreifni = var(Log),
                Staðalfrávik = sqrt(Dreifni),
                Skeifni = sum((Log - Væntigildi)^3) / ((N - 1) * Staðalfrávik^3),
                `Umram reisni` = sum((Log - Væntigildi)^4) / ((N - 1) * Staðalfrávik^4) - 3) %>%
      summarize(z_Væntigildi = Væntigildi / (Staðalfrávik / sqrt(N)),
                z_Samhverfa = Skeifni / sqrt(6 / N),
                z_Halar = (`Umram reisni` - 3) / sqrt(24 / N),
                p_Væntigildi = qt(z_Væntigildi, df = 4919),
                p_Samhvefa = 2 * (1  - pnorm(z_Samhverfa)),
                p_Halar = 2 * (1 - pnorm(z_Halar))) %>%
      (function(x) {
            Z <- t(x)[1:3, , drop = T]
            P <- t(x)[4:6, , drop = T]
            Breyta <- names(x)[1:3]
            cbind(Breyta, Z, P)
      }) %>%
      as_tibble %>%
      mutate(Breyta = str_replace(Breyta, "z_", "")) %>%
      mutate_at(c("Z", "P"), as.numeric) %>%
      kable(format = "html") %>%
      kable_styling(bootstrap_options = c("striped", "hover"))
Breyta Z P
Væntigildi 0.7776649 0.7643922
Samhverfa 43.1245399 0.0000000
Halar 2861.8156331 0.0000000
plot_dat <- summarize(data, mean = mean(Log, na.rm = T), sd = sd(Log, na.rm = T))
ggplot(data, aes(Log)) +
      geom_density(aes(col = "Raundreifing")) +
      stat_function(fun = dnorm, n = 1000, args = list(mean = plot_dat$mean,
                                                       sd = plot_dat$sd),
                    aes(col = "Vænt dreifing")) +
      scale_color_jama(guide = guide_legend(title = "")) +
      coord_cartesian(xlim = c(-0.025, 0.025))

3. hluti: Sjálffylgnifall

data %>%
      select(Date, Log) %>%
      tq_mutate(select = Log, mutate_fun = lag.xts, k = 1:2,
                col_rename = paste0("Lag_", 1:2)) %>%
      gather(Lag, value, contains("Lag")) %>%
      group_by(Lag) %>%
      summarize(AC_cor = cor(Log, value, use = "pairwise.complete.obs"),
                AC_cov = cov(Log, value, use = "pairwise.complete.obs") / 
                      (sqrt(var(Log, use = "pairwise.complete.obs") * 
                                  var(value, use = "pairwise.complete.obs"))),
                mean = mean(Log, na.rm = T),
                AC_fmla = sum((Log - mean) * (value - mean), na.rm = T) / 
                      sum((Log - mean)^2, na.rm = T)) %>%
      select(-mean) %>%
      kable(format = "html") %>%
      kable_styling(bootstrap_options = c("striped", "hover"))
Lag AC_cor AC_cov AC_fmla
Lag_1 -0.1560283 -0.1560421 -0.1560262
Lag_2 0.1139076 0.1139269 0.1138348
k <- 1:35
col_names <- paste0("lag_", k)
data %>%
      select(Date, Log) %>%
      tq_mutate(select = Log, mutate_fun = lag.xts, k = k,
                col_rename = col_names) %>%
      gather(Lag, value, -Date, -Log) %>%
      mutate(Lag = factor(Lag, levels = col_names, labels = k),
             Lag = as.character(Lag),
             Lag = as.numeric(Lag)) %>%
      group_by(Lag) %>%
      summarize(AC = cor(Log, value, use = "pairwise.complete.obs"),
                Type = ifelse(abs(AC) >= qnorm(0.975) / sqrt(n()),
                              "Já",
                              "Nei")) %>%
      mutate(Lag = str_extract(Lag, "[0-9]+") %>% as.numeric) %>%
      ggplot(aes(Lag, AC, col = Type)) +
      geom_hline(yintercept = qnorm(0.975) / sqrt(nrow(data)), lty = 2, col = "slateblue") +
      geom_hline(yintercept = qnorm(0.025) / sqrt(nrow(data)), lty = 2, col = "slateblue") +
      geom_segment(aes(xend = Lag, yend = 0), col = "gray50") +
      geom_point() +
      scale_x_continuous(breaks = c(1, seq(5, 35, 5))) +
      scale_y_continuous(breaks = pretty_breaks(8)) +
      coord_cartesian(ylim = c(-0.15, 0.15)) +
      scale_color_jama(guide = guide_legend(title = "Marktækt frábruðgið núll?",
                                            title.position = "top",
                                            title.hjust = 0))

Box.test(data$Log, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  data$Log
## X-squared = 119.85, df = 1, p-value < 2.2e-16

4. hluti: \(AR(2)\) líkan

mod <- arima(data$Log[-1], order = c(2, 0, 0))
mod %>%
      tidy %>%
      select(term, estimate) %>%
      spread(term, estimate) %>%
      mutate(ar0 = intercept * (1 - ar1 - ar2)) %>%
      select(ar0, ar1, ar2) %>%
      mutate_at(vars(contains("ar")), function(x) round(x, digits = 5)) %>%
      mutate(fmla = paste(ar0, " - ", 
                          abs(ar1), " $\\cdot$ $r_{t - 1}$ + ", 
                          ar2, " $\\cdot$ $r_{t - 2}$")) %>%
      gather(term, estimate, contains("ar")) %>%
      select(term, estimate, fmla) %>%
      set_names(c("Stiki", "Gildi", "Formúla")) %>%
      kable(format = "html", escape = FALSE) %>%
      kable_styling %>%
      collapse_rows(3)
Stiki Gildi Formúla
ar0 0.00010 1e-04 - 0.14168 \(\cdot\) \(r_{t - 1}\) + 0.0918 \(\cdot\) \(r_{t - 2}\)
ar1 -0.14168
ar2 0.09180
mod %>%
      tidy %>%
      select(term, estimate) %>%
      spread(term, estimate) %>%
      summarize(x1 = (ar1 + sqrt(ar1^2 + 4 * ar2)) / 2,
                x2 = (ar1 - sqrt(ar1^2 + 4 * ar2)) / 2) %>%
      kable %>%
      kable_styling()
x1 x2
0.2403175 -0.3820002
# head(logReturn) # Sýnum fyrstu 6 gildi raðarinnar.
# ar2likanid$residuals
data %>%
      select(Date, Log) %>%
      (function(x) {
            x$fitted <- forecast:::fitted.Arima(arima(x$Log, order = c(2, 0, 0)))
            x$resid <- x$Log - x$fitted
            x
      }) %>%
      slice(3:5) %>%
      kable %>%
      kable_styling
Date Log fitted resid
1999-01-07 10:53:00 -0.0036923 5.485125e-04 -0.004240824
1999-01-08 10:49:00 -0.0016042 3.502394e-04 -0.001954485
1999-01-11 10:53:00 -0.0047041 -1.425006e-05 -0.004689893
# acf(ar2likanid$residuals)
# ( Box.test(..., lag=..., type='Ljung' ) )
Box.test(fitted(arima(data$Log, order = c(2, 0, 0))), type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  fitted(arima(data$Log, order = c(2, 0, 0)))
## X-squared = 1661.7, df = 1, p-value < 2.2e-16
data %>%
      select(Date, Log) %>%
      (function(x) {
            x$fitted <- fitted(arima(x$Log, order = c(2, 0, 0)))
            x$resid <- x$Log - x$fitted
            x
      }) %>%
      select(Date, resid) %>%
      tq_mutate(select = resid, 
                mutate_fun = lag.xts, 
                k = 1:35, 
                col_rename = paste0("Lag_", 1:35)) %>%
      gather(Lag, value, contains("Lag")) %>%
      group_by(Lag) %>%
      summarize(AC = cor(resid, value, use = "pairwise.complete.obs"),
                Type = ifelse(abs(AC) >= qnorm(0.975) / sqrt(n()),
                              "Já",
                              "Nei")) %>%
      mutate(Lag = str_extract(Lag, "[0-9]+") %>% as.numeric) %>%
      ggplot(aes(Lag, AC, col = Type)) +
      geom_hline(yintercept = qnorm(0.975) / sqrt(nrow(data)), lty = 2, col = "slateblue") +
      geom_hline(yintercept = qnorm(0.025) / sqrt(nrow(data)), lty = 2, col = "slateblue") +
      geom_segment(aes(xend = Lag, yend = 0), col = "gray50") +
      geom_point() +
      scale_x_continuous(breaks = c(1, seq(5, 35, 5))) +
      scale_y_continuous(breaks = pretty_breaks(8)) +
      coord_cartesian(ylim = c(-0.15, 0.15)) +
      scale_color_jama(guide = guide_legend(title = "Marktækt frábruðgið núll?",
                                            title.position = "top",
                                            title.hjust = 0))

# tail(logReturn) # Sýnum síðustu 6 gildi.
# ( ... <- predict(ar2likanid, n.ahead=3, se.fit=TRUE) )
coefs <- mod %>%
      tidy %>%
      select(term, estimate) %>%
      spread(term, estimate) %>%
      mutate(ar0 = intercept * (1 - ar1 - ar2)) %>%
      select(ar2, ar1, ar0)

data %>%
      mutate(Date = as.Date(Date)) %>%
      select(Date, Log) %>%
      tail(2) %>%
      (function(x) {
            log <- x$Log
            date <- x$Date
            for (i in 1:3) {
                  n <- length(log)
                  new_log <- coefs$ar0 + coefs$ar1 * log[n] + coefs$ar2 * log[n - 1]
                  log <- c(log, new_log)
            }
            date <- c(date, date[2] + 1:3)
            data_frame(Date = date, my_Log = log)
      }) %>%
      mutate(predict_Log = c(my_Log[1:2], predict(mod, n.ahead = 3)$pred)) %>%
      kable %>%
      kable_styling
Date my_Log predict_Log
2018-09-26 -0.0203501 -0.0203501
2018-09-27 0.0000000 0.0000000
2018-09-28 -0.0017706 -0.0017706
2018-09-29 0.0003484 0.0003484
2018-09-30 -0.0001144 -0.0001144

5. hluti: \(AR(p)\) líkan

# ( ... <- ar(..., aic=TRUE) )
# ...$aic
ar_p <- ar(data$Log[-1], aic = TRUE)

AR(p) líkanið hefur order 35. Helsta annað gildi sem kemur til greina er \(p = 34\) þar sem AIC hækkar ekki um of við að sleppa lag nr 35.

data_frame(aic = ar_p$aic) %>%
      mutate(AR = seq_along(aic) - 1) %>%
      select(AR, aic) %>%
      top_n(8, wt = -aic) %>%
      arrange(aic) %>%
      kable %>%
      kable_styling
AR aic
35 0.000000
34 1.125256
36 1.999368
30 6.983614
31 8.862479
32 10.821246
33 11.426641
21 15.712463
# ( Box.test(..., lag=..., type='Ljung' ) )
data %>%
      na.omit %>%
      select(Date) %>%
      mutate(resid = ar_p$resid) %>%
      na.omit %>%
      tq_mutate(select = resid, mutate_fun = lag.xts, k = k,
                col_rename = col_names) %>%
      gather(Lag, value, -Date, -resid) %>%
      mutate(Lag = factor(Lag, levels = col_names, labels = k),
             Lag = as.character(Lag),
             Lag = as.numeric(Lag)) %>%
      group_by(Lag) %>%
      summarize(AC = cor(resid, value, use = "pairwise.complete.obs"),
                Type = ifelse(abs(AC) >= qnorm(0.975) / sqrt(n()),
                              "Já",
                              "Nei")) %>%
      mutate(Lag = str_extract(Lag, "[0-9]+") %>% as.numeric) %>%
      ggplot(aes(Lag, AC, col = Type)) +
      geom_hline(yintercept = qnorm(0.975) / sqrt(nrow(data)), lty = 2, col = "slateblue") +
      geom_hline(yintercept = qnorm(0.025) / sqrt(nrow(data)), lty = 2, col = "slateblue") +
      geom_segment(aes(xend = Lag, yend = 0), col = "gray50") +
      geom_point() +
      scale_x_continuous(breaks = c(1, seq(5, 35, 5))) +
      scale_y_continuous(breaks = pretty_breaks(8)) +
      coord_cartesian(ylim = c(-0.15, 0.15)) +
      scale_color_jama(guide = guide_legend(title = "Marktækt frábruðgið núll?",
                                            title.position = "top",
                                            title.hjust = 0))

# ( ... <- predict(..., n.ahead=20, se.fit=TRUE) )
data %>%
      mutate(Date = as.Date(Date)) %>%
      select(Date, Log) %>%
      tail(60) %>%
      mutate(se = 0) %>%
      (function(x) {
            date <- c(x$Date, max(x$Date) + 1:20)
            pred <- predict(ar_p, n.ahead = 20)
            log <- c(x$Log, pred$pred)
            se <- c(x$se, pred$se)
            data_frame(Date = date, Log = log, se = se)
      }) %>%
      ggplot(aes(Date, Log,
                 ymin = Log - se,
                 ymax = Log + se)) +
      geom_line() +
      geom_ribbon(alpha = 0.3) +
      labs(title = "Forspáð lograáxötun með eins staðalfráviks öryggismörkum",
           x = "Date", 
           y = "Lograávöxtun")