Analysis of Spatio-Temporal Data - Exercise 02

Christopher Stephan

1. Load the meteo dataset and fit a model for the seasonal component using LOESS.

# fitting a model for the seasonal component of humidity using loess

load("/home/christopher/Downloads/meteo.RData")
plot(meteo$hours, meteo$humidity, type = "l", main = "Humidity")
d = data.frame(hours = meteo$hours, humidity = meteo$humidity)

yloess1 = loess(meteo$humidity ~ meteo$hours, span = 0.05, d)
yloess2 = loess(meteo$humidity ~ meteo$hours, span = 0.01, d)
yloess3 = loess(meteo$humidity ~ meteo$hours, span = 0.5, d)
yloess4 = loess(meteo$humidity ~ meteo$hours, span = 0.1, d)
yloess5 = loess(meteo$humidity ~ meteo$hours, span = 1.5, d)

ypredict1 = predict(yloess1, data.frame(x = c(1:length(meteo$hours))))
ypredict2 = predict(yloess2, data.frame(x = c(1:length(meteo$hours))))
ypredict3 = predict(yloess3, data.frame(x = c(1:length(meteo$hours))))
ypredict4 = predict(yloess4, data.frame(x = c(1:length(meteo$hours))))
ypredict5 = predict(yloess5, data.frame(x = c(1:length(meteo$hours))))

lines(meteo$hours, ypredict1, col = "red")
lines(meteo$hours, ypredict2, col = "blue", )
lines(meteo$hours, ypredict3, col = "orange")
lines(meteo$hours, ypredict4, col = "green")
lines(meteo$hours, ypredict5, col = "pink")

plot of chunk unnamed-chunk-1

Plot ACF and PACF of the residuals. Try to fit a suitable AR(p) to the residuals using Box-Jenkins.

plot(yloess5$residuals, type = "l", main = "Residuals")

plot of chunk unnamed-chunk-2

acf(yloess5$residuals)

plot of chunk unnamed-chunk-2

pacf(yloess5$residuals)

plot of chunk unnamed-chunk-2

Try to fit a suitable AR(p) to the residuals using Box-Jenkins.

ar(x = yloess5$residuals)
## 
## Call:
## ar(x = yloess5$residuals)
## 
## Coefficients:
##      1       2       3       4       5       6       7       8       9  
##  0.868   0.066   0.052   0.021  -0.021   0.024   0.015  -0.045   0.013  
##     10      11      12      13      14      15      16      17      18  
##  0.021  -0.023   0.027  -0.026  -0.008   0.024   0.027  -0.019   0.013  
##     19      20  
## -0.011  -0.018  
## 
## Order selected 20  sigma^2 estimated as  0.762
# AR(20) model
arima(x = yloess5$residuals, order = c(20, 0, 0))
## 
## Call:
## arima(x = yloess5$residuals, order = c(20, 0, 0))
## 
## Coefficients:
##         ar1    ar2    ar3    ar4     ar5    ar6    ar7     ar8    ar9
##       0.861  0.070  0.055  0.024  -0.022  0.017  0.021  -0.045  0.012
## s.e.  0.010  0.013  0.013  0.013   0.013  0.013  0.013   0.013  0.013
##        ar10    ar11   ar12    ar13    ar14   ar15   ar16    ar17   ar18
##       0.023  -0.028  0.028  -0.024  -0.011  0.027  0.026  -0.015  0.009
## s.e.  0.013   0.013  0.013   0.013   0.013  0.013  0.013   0.013  0.013
##         ar19   ar20  intercept
##       -0.010  -0.02     -0.166
## s.e.   0.013   0.01      7.071
## 
## sigma^2 estimated as 0.726:  log likelihood = -12452,  aic = 24948
tsdiag(arima(yloess5$residuals, order = c(20, 0, 0)))

plot of chunk unnamed-chunk-3

Box.test(arima(yloess5$residuals, order = c(20, 0, 0))$residuals, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  arima(yloess5$residuals, order = c(20, 0, 0))$residuals
## X-squared = 0, df = 1, p-value = 0.9991

2. Compute the periodogram for the CO2 data without trend removal. What do you find?

N = length(co2)
I = abs(fft(co2)/sqrt(N))^2
## Scale periodogram
P = (4/N) * I  ## scaled periodogram
f = (0:floor(N/2))/N
plot(f, I[1:((N/2) + 1)], type = "o", xlab = "frequency", ylab = "", main = "Raw periodogram without trend removal")

plot of chunk unnamed-chunk-4


spec.pgram(co2, detrend = FALSE, col = "blue", main = "without (blue) and with (red) trend removal")
spec.pgram(co2, detrend = TRUE, col = "red", add = TRUE)

plot of chunk unnamed-chunk-4

The resulting periogram shows only one peak at f=0. It is typical when the data has not been detrended (trend not removed). Since there is a peak at 0 one cannot deduce any periodic characteristics for the time series

3. Use function stl() to remove the trend/seasonal component from the CO2 data. Recompute the periodogram and compare it to that in the previous exercise.

data(co2)
# use STL to decompose data in order to identify the trend
model = stl(co2, s.window = "periodic")
trend = model$time.series[, "trend"]
co2_detrended = co2 - trend
plot(co2_detrended)

plot of chunk unnamed-chunk-5

N = length(co2_detrended)
I = abs(fft(co2_detrended)/sqrt(N))^2
P = (4/N) * I
f = (0:floor(N/2))/N
plot(f, I[1:((N/2) + 1)], type = "o", xlab = "frequency", ylab = "", main = "Raw periodogram with trend removal")

plot of chunk unnamed-chunk-5

The dominant peak area occurs somewhere around a frequency of 0.08. This corresponds to a period of about 1/.08 = 12.5 time periods. That’s 12.5 years, since this is annual data. Thus there appears to be a dominant periodicity of about 12.5 years in co2.

4. Draw 500 samples from the standard normal distribution. Compute the peridogram of this white noise time series.

sample = rnorm(500)
N = 500
I = abs(fft(sample)/sqrt(N))^2
P = (4/N) * I
f = (0:floor(N/2))/N
plot(f, I[1:((N/2) + 1)], type = "o", xlab = "frequency", ylab = "", main = "Raw Periodogram")

plot of chunk unnamed-chunk-6

5. Simulate from the following cosine model: Yt = A ∗ cos 2πωt + φ + Wt with ω=1/50, A=2, φ=0.6π and Wt~N(0,σ2). Simulate once with σ2=0 and once with σ2=5.

sample2 = rnorm(500, 0, sqrt(5))
t = 1:500
model = 2 * cos(1/50 * 2 * pi * t + 0.6 * pi)
ysim = model + sample2
plot(ysim)
lines(model, type = "l", col = "blue")

plot of chunk unnamed-chunk-7

6. Simulate from an AR(1)-model using α=0.8 and α=-0.8. Try to explain the differences in the periodograms of these two series (R function arima.sim()). Try function spec.gram() to compute the periodogram.

sim1 = arima.sim(list(order = c(1, 0, 0), ar = 0.8), n = 200)
pacf(sim1)

plot of chunk unnamed-chunk-8

plot(sim1)

plot of chunk unnamed-chunk-8


sim2 = arima.sim(list(order = c(1, 0, 0), ar = -0.8), n = 200)
pacf(sim2)

plot of chunk unnamed-chunk-8

plot(sim2)

plot of chunk unnamed-chunk-8


spec.pgram(sim1, detrend = TRUE, col = "blue", main = "blue=sim1, red=sum2")
spec.pgram(sim2, detrend = TRUE, col = "red", add = TRUE, )

plot of chunk unnamed-chunk-8

7. Periodograms are difficult to interpret, and may be biased. Find out what is meant by „leakage“ and „antialising“. Write down a brief explanation of these phenomena.

Leakage: The term describes the fact that there are, due to a finite length observation period of a signal in the context of spectral analysis such as Fourier analysis in the calculated frequency spectrum and frequency components that do not occur with only a theoretical infinitely long observation period.

Antialising: A low-pass filtering is applied to reduce high frequency components of the input signal.