Seasonal Decomposition by the Fourier Coefficients

by Francisco Parra

5 August 2014

1. Time series

Time series:

IPI. Base 1005. Enero 2002-Septiembre 2013

Tasa interanual de crecimiento del PIB de cantabria en indices de volumen (pibiv). 1T 2001 - 4T 2013

Población (pob). EPA 1T 2005 - 1T 2014

Activos (acti). EPA 1T 2005 - 1T 2014

Ocupados (ocu). EPA 1T 2005 - 1T 2014

Parados (par). EPA 1T 2005 - 1T 2014

Parados primer empleo (par1). EPA 1T 2005 - 1T 2014

Inactivos (ina). EPA 1T 2005 - 1T 2014

Afiliados a todos los regímenes (trg). Enero 1999-Mayo 2014

Afiliados régimen general (tg). Enero 1999-Mayo 2014

Afiliados régimen autónomo (reta). Enero 1999-Mayo 2014

IBEX-35 Datos diarios.

ipi <- read.csv("ipi.csv", header = FALSE, 
    sep = ",", dec = ".")
ipi <- ipi$V1
pibiv <- read.csv("pibiv.csv", header = FALSE, 
    sep = " ", dec = ",")
pib <- pibiv$V1
epa <- read.csv("epa.csv", header = FALSE, 
    sep = ";", dec = ",")
pob <- epa$V1
act <- epa$V2
ocu <- epa$V3
par <- epa$V4
par1 <- epa$V5
ina <- epa$V6
afi <- read.csv("afi.csv", header = FALSE, 
    sep = ";", dec = ".")
trg<- afi$V1
rg <- afi$V2
reta <- afi$V3
a <- rnorm(2000,0,2)
b <- seq(1:2000)
c <- a+ 0.1*b-0.00002*b^2
ibex <- read.csv("ibex.csv", header=FALSE, sep = ",", dec = ".")
ibex <- ibex$V1[1:500]

2. Graphic

## plot time series
plot(ts(ipi,frequency = 12), type="l")

plot of chunk unnamed-chunk-2

plot(ts(pib,frequency = 4), type="l")

plot of chunk unnamed-chunk-2

plot(ts(pob,frequency = 4), type="l")

plot of chunk unnamed-chunk-2

plot(ts(act,frequency = 4), type="l")

plot of chunk unnamed-chunk-2

plot(ts(ocu,frequency = 4), type="l")

plot of chunk unnamed-chunk-2

plot(ts(par,frequency = 4), type="l")

plot of chunk unnamed-chunk-2

plot(ts(par1,frequency = 4), type="l")

plot of chunk unnamed-chunk-2

plot(ts(ina,frequency = 4), type="l")

plot of chunk unnamed-chunk-2

plot(ts(trg,frequency = 12), type="l")

plot of chunk unnamed-chunk-2

plot(ts(rg,frequency = 12), type="l")

plot of chunk unnamed-chunk-2

plot(ts(reta,frequency = 12), type="l")

plot of chunk unnamed-chunk-2

plot(ts(ibex,frequency=7),  type="l")

plot of chunk unnamed-chunk-2

2. Function R

Function gdf(a) and Function gdt (a)

The gdf(a) function transforms the data from the amplitude-time domain the amplitude-frequency domain pre-multiplied by the orthogonal matrix ,$W$, whose elements are defined as follows (Harvey,1978): \[\begin{equation} w_{jt} = \left\lbrace \begin{array}{ll} \left(\frac{1}T\right) ^\frac{1}2 & \forall j=1\\ \left(\frac{2}T\right) ^\frac{1}2 \cos\left[\frac{\pi j(t-1)}T\right] & \forall j=2,4,6,..\frac{(T-2)}{(T-1)}\\ \left(\frac{2}T\right) ^\frac{1}2 \sin\left[\frac{\pi (j-1)(t-1)}T\right] & \forall j=3,5,7,..\frac{(T-2)}T\\ \left(\frac{1}T\right) ^\frac{1}2 (-1)^{t+1} & \forall j=T \end{array} \right. \end{equation}\] The gdt (a) function transforms the data from the amplitude-frequency domain to the amplitude-time domain pre-multiplied by the transpose of $W$ :

gdf <- function(a) {
a <- matrix(a, nrow=1)
n <- length(a)
uno <- as.numeric (1:n)
A <- matrix(rep(sqrt(1/n),n), nrow=1)
if(n%%2==0){
for(i in 3:n-1){ 
if(i%%2==0) {
A1 <- matrix(sqrt(2/n)*cos(pi*(i)*(uno-1)/n), nrow=1)
A <- rbind(A,A1)}
 else {
A2 <- matrix(sqrt(2/n)*sin(pi*(i-1)*(uno-1)/n), nrow=1)
A <- rbind(A,A2)
}} 
AN <- matrix(sqrt(1/n)*(-1)^(uno+1), nrow=1)
A <- rbind(A,AN)
A%*%t(a)
} else {
for(i in 3:n-1){ 
if(i%%2==0) {
A1 <- matrix(
sqrt(2/n)*cos(pi*(i)*(uno-1)/n), nrow=1)
A <- rbind(A,A1)}
 else {
A2 <- matrix(sqrt(2/n)*sin(pi*(i-1)*(uno-1)/n), nrow=1)
A <- rbind(A,A2)
}} 
AN <- matrix(
sqrt(2/n)*sin(pi*(n-1)*(uno-1)/n), nrow=1)
A <- rbind(A,AN)
A%*%t(a)
}
}

gdt <- function(a) {
a <- matrix(a, nrow=1)
n <- length(a)
uno <- as.numeric (1:n)
A <- matrix(rep(sqrt(1/n),n), nrow=1)
if(n%%2==0){
for(i in 3:n-1){ 
if(i%%2==0) {
A1 <- matrix(sqrt(2/n)*cos(pi*(i)*(uno-1)/n), nrow=1)
A <- rbind(A,A1)}
 else {
A2 <- matrix(sqrt(2/n)*sin(pi*(i-1)*(uno-1)/n), nrow=1)
A <- rbind(A,A2)}} 
AN <- matrix(sqrt(1/n)*(-1)^(uno+1), nrow=1)
A <- rbind(A,AN)
t(A)%*%t(a)
} else {
for(i in 3:n-1){ 
if(i%%2==0) {
A1 <- matrix(
sqrt(2/n)*cos(pi*(i)*(uno-1)/n), nrow=1)
A <- rbind(A,A1)}
 else {
A2 <- matrix(sqrt(2/n)*sin(pi*(i-1)*(uno-1)/n), nrow=1)
A <- rbind(A,A2)}} 
AN <- matrix(
sqrt(2/n)*sin(pi*(n-1)*(uno-1)/n), nrow=1)
A <- rbind(A,AN)
t(A)%*%t(a)
}
}

Function cdf(a)

Gets the auxiliary matrix vector operations in time domain and frequency domain, pre-multiplies a vector by the orthogonal matrix $W$ and its transpose, Parra F. (2013)

cdf <- function(a) {
a <- matrix(a, nrow=1)
n <- length(a)
uno <- as.numeric (1:n)
A <- matrix(rep(sqrt(1/n),n), nrow=1)
if(n%%2==0){
for(i in 3:n-1){ 
if(i%%2==0) {
A1 <- matrix(sqrt(2/n)*cos(pi*(i)*(uno-1)/n), nrow=1)
A <- rbind(A,A1)}
 else {
A2 <- matrix(sqrt(2/n)*sin(pi*(i-1)*(uno-1)/n), nrow=1)
A <- rbind(A,A2)}} 
AN <- matrix(sqrt(1/n)*(-1)^(uno+1), nrow=1)
A <- rbind(A,AN)
I<- diag(c(a))
B <- A%*%I
B%*%t(A)
} else {
for(i in 3:n-1){ 
if(i%%2==0) {
A1 <- matrix(
sqrt(2/n)*cos(pi*(i)*(uno-1)/n), nrow=1)
A <- rbind(A,A1)}
 else {
A2 <- matrix(sqrt(2/n)*sin(pi*(i-1)*(uno-1)/n), nrow=1)
A <- rbind(A,A2)}} 
AN <- matrix(
sqrt(2/n)*sin(pi*(n-1)*(uno-1)/n), nrow=1)
A <- rbind(A,AN)
I<- diag(c(a))
B <- A%*%I
B%*%t(A)
}
}

Function periodograma (a)

Calculates and displays the spectrum of the variable “a”

periodograma <- function(a) {
cf <- gdf(a)
n <- length(a)
if (n%%2==0) {
m1 <- c(0)
m2 <- c()
for(i in 1:n){
if(i%%2==0) m1 <-c(m1,cf[i]) else m2 <-c(m2,cf[i])}
m2 <-c(m2,0) 
frecuencia <- seq(0:(n/2)) 
frecuencia <- frecuencia-1
omega <- pi*frecuencia/(n/2)
periodos <- n/frecuencia
densidad <- (m1^2+m2^2)/(4*pi)
tabla <- data.frame(omega,frecuencia, periodos,densidad)
tabla$densidad[(n/2+1)] <- 4*tabla$densidad[(n/2+1)]
data.frame(tabla[2:(n/2+1),])}
else {m1 <- c(0)
m2 <- c()
for(i in 1:(n-1)){
if(i%%2==0) m1 <-c(m1,cf[i]) else m2 <-c(m2,cf[i])}
m2 <-c(m2,cf[n]) 
frecuencia <- seq(0:((n-1)/2)) 
frecuencia <- frecuencia-1
omega <- pi*frecuencia/(n/2)
periodos <- n/frecuencia
densidad <- (m1^2+m2^2)/(4*pi)
tabla <- data.frame(omega,frecuencia, periodos,densidad)
data.frame(tabla[2:((n+1)/2),])}
}

Function gperiodogrma (a)

Plot to the spectrum of a variable “a”

gperiodograma <- function(a) {
tabla <- periodograma(a)
plot(tabla$frecuencia,tabla$densidad,
main = "Espectro", 
ylab = "densidad",
xlab="frecuencia",type = "l",
col="#ff0000")}

Function sort.data.frames (a,~b)

Sort the rows of a data frame “a” by column “b” and a one-sided formula with +/- indicating ascending/descending.

sort.data.frame <- function(form,dat){ 
# Author: Kevin Wright 
# Some ideas from Andy Liaw 
# http://tolstoy.newcastle.edu.au/R/help/04/07/1076.html 
# Use + for ascending, - for decending. 
# Sorting is left to right in the formula 
  

# Useage is either of the following: 
# sort.data.frame(~Block-Variety,Oats) 
# sort.data.frame(Oats,~-Variety+Block) 
  

# If dat is the formula, then switch form and dat 
  if(inherits(dat,"formula")){ 
    f=dat 
    dat=form 
    form=f 
  } 
  if(form[[1]] != "~") 
    stop("Formula must be one-sided.")

# Make the formula into character and remove spaces 
  formc <- as.character(form[2]) 
  formc <- gsub(" ","",formc) 
# If the first character is not + or -, add + 
  if(!is.element(substring(formc,1,1),c("+","-")))     formc <- paste("+",formc,sep="") 
# Extract the variables from the formula 
vars <- unlist(strsplit(formc, "[\\+\\-]"))   
  vars <- vars[vars!=""] # Remove spurious "" terms

# Build a list of arguments to pass to "order" function 
  calllist <- list() 
  pos=1 # Position of + or - 
  for(i in 1:length(vars)){ 
    varsign <- substring(formc,pos,pos) 
    pos <- pos+1+nchar(vars[i]) 
    if(is.factor(dat[,vars[i]])){

      if(varsign=="-")
        calllist[[i]] <- -rank(dat[,vars[i]])
      else
        calllist[[i]] <- rank(dat[,vars[i]])
    } 
    else {
      if(varsign=="-")
        calllist[[i]] <- -dat[,vars[i]]
      else
        calllist[[i]] <- dat[,vars[i]]

    } 
  } 
  dat[do.call("order",calllist),]
}

Function descomponer(y,b)

Decompose a time series into seasonal, trend and irregular components using the transform amplitude-frequency domain to time series. Decompose $y$ of frequency $b$ or number of times in each unit time interval. For example, one could use a value of 7 for frequency when the data are sampled daily, and the natural time period is a week, or 4 and 12 when the data are sampled quarterly and monthly and the natural time period is a year. Transforms the time series in amplitude-frequency domain, by a band spectrum regresión (Parra, F. ,2013) of the serie $y_t$ and a lineal trend distance into $y_1$ and $y_n$, in which regression is carried out in the low and the sesaonal amplitude-frequency .The low frequency are the periodicity a $ $ or $\frac{n-1}{2b}$ , if $n$ is odd. The seasonal frequency are the periodicity: $2 \frac{n}{2b}$,$3n 2 \frac{n}{2b}$,$4 \frac{n}{2b}$,.. .The time serie by trend an seasonal is named $TDST$.

$TD$ is calculate by band spectrum regresión of the serie $y_ t$ and a lineal trend distance into $y_1$ and $y_n$, in which regression is carried out in low amplitude- frequency.

The seasonal serie $ST$ result to take away $TD$ to $TDST$ .

The irregular serie $IR$ result to take away $TDST$ to $y_t$.

descomponer <- function (y,b) {
  n <- length(y)
if(n%%2==0) {f2 <- n/(2*b)} else {
 f2 <- (n-1)/(2*b)}
per <- periodograma(y)
# modelo para obtener serie con tendencia y estacionalidad
S1 <- data.frame(f=per$frecuencia,p=per$densidad)
S <- sort.data.frame (S1,~-p)
p <- as.numeric(S$p)
n2 <- length(p)
s <- p[1]
for (i in 2:n2) {
        s1 <- p[i] + s[(i - 1)]
        s <- c(s, s1)
        s2 <- s/s[n2]
            }
S <- data.frame (S,s2)
S <- sort.data.frame (S,~-f)
sel <- subset(S,f<f2)
for (i in 2:(n/f2)) {
sel1 <- subset(S,f==round(i*f2))
sel <- rbind(sel,sel1)}
m1 <- c(sel$f * 2)
m2 <- c(m1+1)
c <- c(m1,m2)
n3 <- length(c)
d <- rep(1,n3)
s <- data.frame(c,d)
S <- sort.data.frame (s,~c)
i <- seq(1:n)
z <- y[1]+(y[n]-y[1])*i/n
cx <- cdf(z)
id <- seq(1,n)
S1 <- data.frame(cx,c=id)
S2 <- merge(S,S1,by.x="c",by.y="c")
S3 <- rbind(c(1,1,cx[1,]),S2) 
m <- n+2
X1 <- S3[,3:m]
X <- as.matrix(X1)
cy <- gdf(y)
B <- solve(X%*%t(X))%*%(X%*%cy)
Y <- t(X)%*%B  
TDST <- gdt(Y)
#Modelo para obtener serie con tendencia
S1 <- data.frame(f=per$frecuencia,p=per$densidad)
S <- sort.data.frame (S1,~-p)
p <- as.numeric(S$p)
n2 <- length(p)
s <- p[1]
for (i in 2:n2) {
        s1 <- p[i] + s[(i - 1)]
        s <- c(s, s1)
        s2 <- s/s[n2]
            }
S <- data.frame (S,s2)
S <- sort.data.frame (S,~-f)
sel <- subset(S,f<f2)
m1 <- c(sel$f * 2)
m2 <- c(m1+1)
c <- c(m1,m2)
n3 <- length(c)
d <- rep(1,n3)
s <- data.frame(c,d)
S <- sort.data.frame (s,~c)
i <- seq(1:n)
i <- seq(1:n)
z <- y[1]+(y[n]-y[1])*i/n
cx <- cdf(z)
id <- seq(1,n)
S1 <- data.frame(cx,c=id)
S2 <- merge(S,S1,by.x="c",by.y="c")
S3 <- rbind(c(1,1,cx[1,]),S2) 
m <- n+2
X1 <- S3[,3:m]
X <- as.matrix(X1)
cy <- gdf(y)
B <- solve(X%*%t(X))%*%(X%*%cy)
Y <- t(X)%*%B  
TD <- gdt(Y)
# Series de factores estacionales y irregulares
ST <- TDST-TD
IR <- y-TDST
data.frame(y,TDST,TD,ST,IR)}