#==========================================================================
# Introduccion a series de tiempo
# Profesor: Ing. Carlos A. Fernández Palomino
# Email:cafpxl@gmail.com
#==========================================================================
# ------------------------------------------------------------------------
# Cargar paquetes ----------------------------------------
# ------------------------------------------------------------------------
#install.packages("ggplot2")
library(easypackages)
libraries("xts", "cutoffR", "hydroTSM","lattice","ggplot2","corrplot")
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: cutoffR
## Loading required package: hydroTSM
## Loading required package: lattice
## Loading required package: ggplot2
## Loading required package: corrplot
## All packages loaded successfully
# configurar directorio de trabajo
setwd("E:/SENAMHI_2017/Curso_R_SENAMHI/2_Series_de_tiempo")
# ------------------------------------------------------------------------
# Leer archivo csv ----------------------------------------
# ------------------------------------------------------------------------
data <- read.csv("prec.csv",head=TRUE,check.names = F, stringsAsFactors = F)
str(data)
## 'data.frame': 5844 obs. of 9 variables:
## $ fecha: chr "2000-01-01" "2000-01-02" "2000-01-03" "2000-01-04" ...
## $ 679 : num 3.1 7.1 3 31.4 11.8 1.8 3.1 32.2 48 24.1 ...
## $ 683 : num 3.8 NA 0 15.2 14.4 1.8 16.2 12.5 1.2 11.3 ...
## $ 844 : num 4.1 0 0 0 13.1 1 11.1 13 0 5.1 ...
## $ 809 : num 2.6 0 0 0 5.4 2 24.8 3.2 4.5 12.2 ...
## $ 690 : num 0.8 0 0 0 12.5 1.3 8.8 0 5 9.3 ...
## $ 687 : num 0 12.9 0 NA 3.5 2 3.5 7.6 1.1 9.8 ...
## $ 812 : num 1.7 NA 0 0 5.6 3.9 1.3 5 2.4 6.8 ...
## $ 759 : num 8 0.6 1 3.1 2.8 0 2.3 8 2.9 6.9 ...
#data <- read.csv("tmax.csv",head=TRUE,check.names = F)
# Crear objeto xts
idx <- as.Date(data[,1])
data.matrix <- data[,-1]
data.xts <- xts(data.matrix, order.by = idx )
str(data.xts)
## An 'xts' object on 2000-01-01/2015-12-31 containing:
## Data: num [1:5844, 1:8] 3.1 7.1 3 31.4 11.8 1.8 3.1 32.2 48 24.1 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:8] "679" "683" "844" "809" ...
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## NULL
# plot xts (no se ve todas las estaciones)
plot(data.xts)
## Warning in plot.xts(data.xts): only the univariate series will be plotted

plot(data.matrix[,1], type="l")

plot(data.xts[,2])

# convertir a un objeto zoo
data.zoo <- as.zoo(data.xts)
str(data.zoo)
## 'zoo' series from 2000-01-01 to 2015-12-31
## Data: num [1:5844, 1:8] 3.1 7.1 3 31.4 11.8 1.8 3.1 32.2 48 24.1 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:8] "679" "683" "844" "809" ...
## Index: Date[1:5844], format: "2000-01-01" "2000-01-02" "2000-01-03" "2000-01-04" "2000-01-05" ...
# plot zoo
plot(data.zoo, main = "Series de tiempo de precipitación")

#==== intente configurar una sola escala del eje "y" para todo st====
summary(data.zoo)
## Index 679 683 844
## Min. :2000-01-01 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:2003-12-31 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median :2007-12-31 Median : 1.800 Median : 0.000 Median : 0.000
## Mean :2007-12-31 Mean : 5.834 Mean : 1.599 Mean : 1.674
## 3rd Qu.:2011-12-31 3rd Qu.: 8.000 3rd Qu.: 1.200 3rd Qu.: 1.200
## Max. :2015-12-31 Max. :72.900 Max. :35.500 Max. :39.200
## NA's :363 NA's :249
## 809 690 687 812
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
## Mean : 1.768 Mean : 1.968 Mean : 2.448 Mean : 2.444
## 3rd Qu.: 1.200 3rd Qu.: 2.000 3rd Qu.: 2.900 3rd Qu.: 2.600
## Max. :50.100 Max. :40.200 Max. :52.200 Max. :45.100
## NA's :226 NA's :90 NA's :442 NA's :235
## 759
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 2.036
## 3rd Qu.: 2.100
## Max. :44.400
## NA's :16
max(data.zoo, na.rm = T)
## [1] 72.9
plot(data.zoo, main = "Series de tiempo de precipitación",
ylim = c(0,80))

#plot xts con lattice
xyplot(data.xts,xlab = "Fecha",ylab = "Precipitación [mm/dia]",ylim=c(0,100))

#plot con ggplot
autoplot(data.xts[,1:4]) +theme_bw()+xlab('Fecha')+ylab('Precipitación [mm/dia]')
## Warning: Removed 184 rows containing missing values (geom_path).

# ------------------------------------------------------------------------
# Agregacion de series de tiempo con apply.monthly() y apply.yearly()------
# ------------------------------------------------------------------------
# De diario a mensual
data.monthly <- apply.monthly(data.xts, FUN = sum)# Fue correcta la acumulacion?
plot(data.monthly)

data.monthly <- apply.monthly(data.xts[,1], FUN = sum)# agregar para una estacion
plot(data.monthly)

data.monthly <- apply.monthly(data.xts, FUN=apply, MARGIN = 2, sum)# agregar para todas las estaciones
xyplot(data.monthly, ylim = c(0,600))

# hacer varios graficos de data.monthly con plot()
# Crear una funcion para considerar en la agregacion los meses con pocos datos diarios perdidos
SUM <- function(a,n){#a es vector, n es el numero maximo de datos perdidos a tener en cuenta
count <- sum(is.na(a))# cantidad de datos perdidos
if (count <= n) {
tot <- sum(a, na.rm=T)
} else tot <- NA
return (tot)
}
SUM(a=c(1:28,NA,NA),n=2)
## [1] 406
data.monthly <- apply.monthly(data.xts,FUN=apply,MARGIN=2, SUM, n=3)# agregar para todas las estaciones
xyplot(data.monthly,ylim=c(0,600))

# De mensual a anual
data.anual <- apply.yearly(data.monthly, FUN=apply, 2, sum)
xyplot(data.anual)

# Determinar los maximos mensuales y anuales
data.max.mensual <- apply.monthly(data.xts, FUN=apply, 2, max)
xyplot(data.max.mensual)

data.max.anual <- apply.yearly(data.xts, FUN=apply, 2, max)
xyplot(data.max.anual)

# ------------------------------------------------------------------------
# Analisis exploratorio de datos--------------------------------------------
# ------------------------------------------------------------------------
# Graficos de series de tiempo
xyplot(data.xts,xlab = "Fecha",ylab = "Precipitación [mm/dia]",ylim=c(0,100))# serie diaria

xyplot(data.monthly,ylim=c(0,600))# mensual

xyplot(data.anual)# anual

# Diagrama de boxplot
boxplot(coredata(data.xts))# datos diarios por estacion

boxplot(coredata(data.monthly))# datos mensuales por estacion

# boxplot para cada mes y por estacion (Estacionalidad de lluvia)
boxplot(matrix(coredata(data.monthly[,1]), nrow=nrow(data.monthly)/12, ncol=12, byrow=T),
col="gray", main=c(paste(names(data.monthly[,1])), "Prec mensual [mm]"))

# Grafico de histogramas
#par(mfrow = c(1, 2), pty = "s")
hist(coredata(data.xts[,1]), freq = T)# cantidad de datos por clase

histogram(coredata(data.xts[,1]))# porcentaje de datos por clase

#Grafico de datos disponibles por estacion y anio
y.count<-function(x){
count.na <- sum(is.na(x))
value <- 1
if (count.na >= 1){
value <- NA
}
return(value)
}
y.count(c(1,NA,3,4,5))
## [1] NA
y.count(c(1,NA,NA,4,5))
## [1] NA
y.count(c(1,2,3,4,5))
## [1] 1
anios.data <- apply.yearly(data.monthly, FUN = apply, MARGIN = 2, y.count)
anios.data
## 679 683 844 809 690 687 812 759
## 2000-12-31 1 NA 1 1 1 NA 1 1
## 2001-12-31 1 NA 1 1 1 NA 1 1
## 2002-12-31 1 NA 1 1 1 NA NA 1
## 2003-12-31 1 NA 1 1 1 NA 1 1
## 2004-12-31 1 NA 1 NA 1 NA 1 1
## 2005-12-31 1 NA 1 NA NA NA 1 1
## 2006-12-31 1 1 1 1 1 NA NA 1
## 2007-12-31 1 NA 1 1 1 1 1 1
## 2008-12-31 1 NA 1 1 1 1 1 1
## 2009-12-31 1 1 1 1 1 NA 1 1
## 2010-12-31 1 NA NA 1 1 NA 1 1
## 2011-12-31 1 NA 1 1 1 NA 1 1
## 2012-12-31 1 NA 1 1 1 1 1 1
## 2013-12-31 1 NA 1 1 1 NA 1 1
## 2014-12-31 1 1 1 1 NA NA 1 1
## 2015-12-31 1 1 NA NA NA 1 NA 1
xyplot(anios.data, superpose = TRUE)

for (i in 1:ncol(anios.data)){
anios.data[,i][anios.data[,i]==1] <- i*2
}
anios.data
## 679 683 844 809 690 687 812 759
## 2000-12-31 2 NA 6 8 10 NA 14 16
## 2001-12-31 2 NA 6 8 10 NA 14 16
## 2002-12-31 2 NA 6 8 10 NA NA 16
## 2003-12-31 2 NA 6 8 10 NA 14 16
## 2004-12-31 2 NA 6 NA 10 NA 14 16
## 2005-12-31 2 NA 6 NA NA NA 14 16
## 2006-12-31 2 4 6 8 10 NA NA 16
## 2007-12-31 2 NA 6 8 10 12 14 16
## 2008-12-31 2 NA 6 8 10 12 14 16
## 2009-12-31 2 4 6 8 10 NA 14 16
## 2010-12-31 2 NA NA 8 10 NA 14 16
## 2011-12-31 2 NA 6 8 10 NA 14 16
## 2012-12-31 2 NA 6 8 10 12 14 16
## 2013-12-31 2 NA 6 8 10 NA 14 16
## 2014-12-31 2 4 6 8 NA NA 14 16
## 2015-12-31 2 4 NA NA NA 12 NA 16
xyplot(anios.data,
superpose = TRUE,
scales=list(y = list(at = seq(2,16,2), labels = names(anios.data)),
x = list(at = index(anios.data), labels = 2000:2015,rot = 90)),
col="black",
type="o",
pch=16,
cex=2,
auto.key = FALSE,
xlab="Año",
ylab="Estacion"
)

# Analisis exploratorio con hydroTSM
library(hydroTSM)
hydroplot(as.zoo(data.xts[,1]), var.type="Precipitation", pfreq = "dma", ylab = "Prec")

hydroplot(as.zoo(data.monthly[,1]), var.type="Precipitation", pfreq = "ma", ylab = "Prec")

# ------------------------------------------------------------------------
# Completacion de datos--------------------------------------------
# ------------------------------------------------------------------------
# Completando datos mediante cutoffR
# Revisar: Feng, L., Nowak, G., O'Neill, T.J., & Welsh, A.H. (2014). CUTOFF: A spatio-temporal imputation method. Journal of Hydrology, 519(Part D), 3591-3605.
# http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram
library(corrplot)
library(cutoffR)
# Correlacion cruzada
cor.cruzada.mensual <- cor(as.matrix(data.monthly),use="complete")
cor.test(data.monthly[,1],data.monthly[,2])
## Warning in z + c(-1, 1) * sigma * qnorm((1 + conf.level)/2): Recycling array of length 1 in array-vector arithmetic is deprecated.
## Use c() or as.vector() instead.
##
## Pearson's product-moment correlation
##
## data: data.monthly[, 1] and data.monthly[, 2]
## t = 22.102, df = 167, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8190412 0.8972913
## sample estimates:
## cor
## 0.863265
summary(cor.cruzada.mensual)
## 679 683 844 809
## Min. :0.8212 Min. :0.8672 Min. :0.8280 Min. :0.8212
## 1st Qu.:0.8340 1st Qu.:0.8887 1st Qu.:0.8657 1st Qu.:0.8639
## Median :0.8577 Median :0.9060 Median :0.9005 Median :0.8934
## Mean :0.8683 Mean :0.9126 Mean :0.8999 Mean :0.8997
## 3rd Qu.:0.8711 3rd Qu.:0.9189 3rd Qu.:0.9219 3rd Qu.:0.9279
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## 690 687 812 759
## Min. :0.8694 Min. :0.8482 Min. :0.8360 Min. :0.8559
## 1st Qu.:0.9032 1st Qu.:0.8769 1st Qu.:0.8647 1st Qu.:0.8741
## Median :0.9172 Median :0.8967 Median :0.9026 Median :0.9011
## Mean :0.9249 Mean :0.9073 Mean :0.9049 Mean :0.9050
## 3rd Qu.:0.9423 3rd Qu.:0.9225 3rd Qu.:0.9318 3rd Qu.:0.9159
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
min(cor.cruzada.mensual)
## [1] 0.8212186
corrplot(cor.cruzada.mensual, method="circle",type = c("lower"),mar = c(4, 2, 3, 4))

corrplot(cor.cruzada.mensual, method="color",type = c("lower"),mar = c(4, 2, 3, 4))

corrplot(cor.cruzada.mensual, method="number",type = c("lower"),mar = c(4, 2, 3, 4))

data.cutoff <- data.frame(data.monthly,date=index(data.monthly),check.names = FALSE)
#data.cutoff.comp <- cutoff(data = data.cutoff)
data.cutoff.comp <- cutoff(data = data.cutoff,method = c("correlation"),corr = "spearman",cutoff = 0.7)# para tmin:cutoff = 0.8, tmax:cutoff = 0.7
data.mensual.comp <- as.xts(data.cutoff.comp,order.by = index(data.monthly))
xyplot(data.monthly,ylim=c(0,600))# mensual

xyplot(data.mensual.comp,ylim=c(0,600))# mensual

library(latticeExtra)
## Loading required package: RColorBrewer
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
st.sin <- xyplot(data.monthly,ylim=c(0,600),col="red", lwd=2)# mensual
st.com <- xyplot(data.mensual.comp,ylim=c(0,600),col="blue", lwd=2)# mensual
st.com + st.sin

#Analisis de autocorrelacion
acf(data.mensual.comp[,1])

pacf(data.mensual.comp[,1])

#diagnostico
#estimacion de parametros
fit<-arima(as.ts(data.mensual.comp[,1]),order=c(1,0,1))
tsdiag(fit)

Box.test(fit$residuals,lag=1)
##
## Box-Pierce test
##
## data: fit$residuals
## X-squared = 0.00015401, df = 1, p-value = 0.9901
# prediccion
LH.pred<-predict(fit,n.ahead=10)
plot(as.ts(data.mensual.comp[,1]),xlim=c(1,220),ylim=c(0,500))
lines(LH.pred$pred,col="red")
lines(LH.pred$pred+2*LH.pred$se,col="red",lty=3)
lines(LH.pred$pred-2*LH.pred$se,col="red",lty=3)

#http://www.statoek.wiso.uni-goettingen.de/veranstaltungen/zeitreihen/sommer03/ts_r_intro.pdf
# Analisis de tendencia
# install.packages("trend")