library(tseries)
library(tidyverse)
library(plotly)
library(timetk)
library(lubridate)
library(plotly)
library(pracma)

Series 1

Exercice 1.1 - backward differencing

set.seed(22)
# Apply backward differencing on a timeseries created by the following model:
# create timeseries object
t <- seq(1, 100, length = 100)
data <- 2 * t + 0.5 + runif(100, -5, 5)
ts <- ts(data)
# backward differencing
ts_diff <- diff(ts)
# compare lengths of arrays with
print("length difference:")
## [1] "length difference:"
(length_diff <- length(ts) - length(ts_diff))
## [1] 1
# adf.test(ts) # be careful!
plot(ts)

plot(ts_diff)

Exercice 1.2 - remove linear trend

# create reusable function
generate_and_plot_ts <- function(model) {
  set.seed(22)
  t <- seq(1, 100, length = 100)
  data <- model
  ts <- ts(data)
  
  # backward differencing
  ts_diff <- diff(ts)
  plot(ts)
  plot(ts_diff)
}
generate_and_plot_ts(0.5*t + 1 + runif(100, -1, 1))

generate_and_plot_ts(2*t^2 + 3*t + runif(100, -200, 200))

Exercice 1.3

d <- co2
glimpse(d)
##  Time-Series [1:468] from 1959 to 1998: 315 316 316 318 318 ...
head(d)
## [1] 315.42 316.31 316.50 317.56 318.13 318.00
tail(d)
## [1] 364.52 362.57 360.24 360.83 362.49 364.34
# already a time series object!
# ts <- ts(d, start = 1959, end = 1998, frequency = 12)
plot(co2, main = "co2 data")

# generate interactive plot
d_index <- tk_index(d)
(p <- plot_ly(x = ~d_index, y = ~d, mode = 'lines'))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
acf(d)

pacf(d)

# try and error
# d_diff <- (diff(d, lag = 1))
# d_diff2 <- (diff(d_diff, lag = 1))
# d_diff3 <- (diff(d_diff2, lag = 1))
# d_diff4 <- (diff(d_diff3, lag = 1))
# d_diff5 <- (diff(d_diff4, lag = 1))

# 5 differences is optimal
d_diff <- (diff(d, lag = 1, differences = 5))
acf(d_diff)

pacf(d_diff)

# check result with interactive plot
d_diff_index <- tk_index(d_diff)
(p2 <- plot_ly(x = ~d_diff_index, y = ~d_diff, mode = 'lines'))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
# success!

Exercice 1.4

# help trough https://anomaly.io/seasonal-trend-decomposition-in-r/
d <- co2
d_stl <- stl(d, s.window = "periodic")
plot(d_stl)

plot(d_stl, xlim=c(1960, 1961))

# seasonal part:
d_trend <- d_stl$time.series[, 2]
d_season <- d_stl$time.series[, 1]
d_random <- d_stl$time.series[, 3]
# check if it worked correctly
head(d-(d_trend+d_season+d_random))
## [1] 0 0 0 0 0 0
# success - worked!

trend_removed <- d - d_trend
month <- factor(rep(1:12, 39))
print("Season means by month")
## [1] "Season means by month"
(seasn_est <- tapply(trend_removed, month, mean, na.rm = TRUE))
##           1           2           3           4           5           6 
## -0.04487635  0.62146454  1.36652339  2.50163284  2.98469101  2.32710066 
##           7           8           9          10          11          12 
##  0.81156158 -1.25119685 -3.07062195 -3.25086740 -2.05880517 -0.93985088
# decompose gives same results as stl()
d_decomposed = decompose(d, "additive")
plot(as.ts(d_decomposed$seasonal))

plot(as.ts(d_decomposed$trend))

plot(as.ts(d_decomposed$random))

plot(d_decomposed)

Exercice 1.5

# a)
d <- read.table(file = "rainDay.txt")
d$DATE <- dmy(d$DATE)
glimpse(d)
## Observations: 2,922
## Variables: 2
## $ DATE <date> 2000-01-01, 2000-01-02, 2000-01-03, 2000-01-04, 2000-01-...
## $ rain <dbl> 0.00, 12.90, 0.00, 0.05, 3.55, 2.05, 3.50, 7.65, 1.10, 9....
# b)
d_ts <- ts(data = d$rain, start = c(2000, 1), end = c(2007, 12), frequency = 12)

# c)
d_extended <- d %>% 
  mutate(day_of_week = as.factor(weekdays(DATE)),
         month = as.factor(months(DATE)),
         quarter = as.factor(quarters(DATE))) 
glimpse(d_extended) # worked!
## Observations: 2,922
## Variables: 5
## $ DATE        <date> 2000-01-01, 2000-01-02, 2000-01-03, 2000-01-04, 2...
## $ rain        <dbl> 0.00, 12.90, 0.00, 0.05, 3.55, 2.05, 3.50, 7.65, 1...
## $ day_of_week <fct> Samstag, Sonntag, Montag, Dienstag, Mittwoch, Donn...
## $ month       <fct> Januar, Januar, Januar, Januar, Januar, Januar, Ja...
## $ quarter     <fct> Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1...
hist(d_ts)

# hint: the series is right skewed - one should log transform it but...

par(mfrow=c(2,1)) 
plot(d_ts)
plot(log(d_ts))

# ... one do not log transform in this case --> otherwise we lose observations!
# additionally: the time series is not multiplicative

# d)
# set logical order for weekday and month
levels(d_extended$day_of_week) = c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag")

levels(d_extended$month) = c("Januar", "Februar", "März", "April", "Mai", "Juni", "Juli", "August", "September", "Oktober", "November", "Dezember")

(weekday_boxplot <- d_extended %>%
  # group_by(day_of_week) %>% 
  # mutate(sum_rain_daily = sum(rain)) %>% 
  # distinct(day_of_week, .keep_all = T) %>% 
  # ungroup() %>% 
  # select(day_of_week, rain) %>% 
  plot_ly(y = ~rain, color = ~day_of_week, type = "box"))
(monthly_boxplot <- d_extended %>%
  plot_ly(y = ~rain, color = ~month, type = "box"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
(quarter_boxplot <- d_extended %>%
  plot_ly(y = ~rain, color = ~quarter, type = "box"))
# e) 
(rain_plot <- plot_ly(x = ~d$DATE, y = ~d$rain, mode = 'lines'))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

Exercice 1.6

  1. period: year & time step: day
  2. period: year & time step: day
  3. period: weeks & time step: hours to day
  4. period: year & time step: minutes to day

Exercice 1.7

d <- read.table("hstart.dat.txt")
ts <- ts(d, start = c(1966, 1), end = c(1974, 12), frequency = 12)

# a) 
(hstart_plot <- plot_ly(x = ~time(ts) , y = ~ts, mode = 'lines'))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
# b)
# skipped loess method, decompose is easier to use
d_decomposed = decompose(ts, "additive")
plot(as.ts(d_decomposed$seasonal))

plot(as.ts(d_decomposed$trend))

plot(as.ts(d_decomposed$random))

plot(d_decomposed)

      1. time series is not stationary. Points (1), (2) and (3) mentioned below in Exercice 1.8 are not met. One can see a clear yearly trend. One can further recognize a trend with two peaks. Conclusion: Get seasonality and trend component out of the time series.
  1. do not know how to code the special filter
# d)
# remove trend
ts_no_seasonality <- ts - d_decomposed$seasonal
# ts_no_seasonality_and_no_trend <- ts_no_seasonality - d_decomposed$trend
# check results
# d_decomposed$random - ts_no_seasonality_and_no_trend

# apply linear trend elimination
ts_no_seasonality_no_linear_trend <- pracma::detrend(ts_no_seasonality, tt = "linear")

# plot data
(hstart_plot_2 <- plot_ly(x = ~time(ts) , y = ~ts_no_seasonality_no_linear_trend, mode = 'lines'))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

Exercice 1.8

Stationarity if… - (1) all Xt are identically distributed - (2) regular mean - (3) finit and regular variance - (4) autocovariance dependes only on lag

# a)
Et <- rnorm(100, 0, 1)
Et_1 <- diff(Et)
model <- Et[2:100] - (0.5 * Et_1)
ts <- ts(model)
(Y1_plot <- plot_ly(x = ~c(1:99), y = ~model, mode = 'lines'))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
acf(ts)

pacf(ts)

  1. all criterias are allright - this time series is stationary.

b - d) do not understand questions. What is Yt compared with Et?

# b)
Et <- rnorm(100, 0, 1)
Et_1 <- diff(Et)
model <- Et[2:100] - (0.5 * Et_1)
ts <- ts(model)
(Y1_plot <- plot_ly(x = ~c(1:99), y = ~model, mode = 'lines'))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
acf(ts)

pacf(ts)