TS_2.R

#==========================================================================
# Introduccion a series de tiempo
# Profesor: Ing. Carlos A. Fernández Palomino
# Email:cafpxl@gmail.com
#==========================================================================

#  ------------------------------------------------------------------------
# Cargar paquetes ----------------------------------------
#  ------------------------------------------------------------------------
#install.packages("ggplot2")

library(easypackages)
libraries("xts", "cutoffR", "hydroTSM","lattice","ggplot2","corrplot")

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: cutoffR

## Loading required package: hydroTSM

## Loading required package: lattice

## Loading required package: ggplot2

## Loading required package: corrplot

## All packages loaded successfully

# configurar directorio de trabajo
setwd("E:/SENAMHI_2017/Curso_R_SENAMHI/2_Series_de_tiempo")

#  ------------------------------------------------------------------------
# Leer archivo csv ----------------------------------------
#  ------------------------------------------------------------------------
data <- read.csv("prec.csv",head=TRUE,check.names = F, stringsAsFactors = F)
str(data)

## 'data.frame':    5844 obs. of  9 variables:
##  $ fecha: chr  "2000-01-01" "2000-01-02" "2000-01-03" "2000-01-04" ...
##  $ 679  : num  3.1 7.1 3 31.4 11.8 1.8 3.1 32.2 48 24.1 ...
##  $ 683  : num  3.8 NA 0 15.2 14.4 1.8 16.2 12.5 1.2 11.3 ...
##  $ 844  : num  4.1 0 0 0 13.1 1 11.1 13 0 5.1 ...
##  $ 809  : num  2.6 0 0 0 5.4 2 24.8 3.2 4.5 12.2 ...
##  $ 690  : num  0.8 0 0 0 12.5 1.3 8.8 0 5 9.3 ...
##  $ 687  : num  0 12.9 0 NA 3.5 2 3.5 7.6 1.1 9.8 ...
##  $ 812  : num  1.7 NA 0 0 5.6 3.9 1.3 5 2.4 6.8 ...
##  $ 759  : num  8 0.6 1 3.1 2.8 0 2.3 8 2.9 6.9 ...

#data <- read.csv("tmax.csv",head=TRUE,check.names = F)

# Crear objeto xts
idx <- as.Date(data[,1])
data.matrix <- data[,-1]
data.xts <- xts(data.matrix, order.by = idx )
str(data.xts)

## An 'xts' object on 2000-01-01/2015-12-31 containing:
##   Data: num [1:5844, 1:8] 3.1 7.1 3 31.4 11.8 1.8 3.1 32.2 48 24.1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:8] "679" "683" "844" "809" ...
##   Indexed by objects of class: [Date] TZ: UTC
##   xts Attributes:  
##  NULL

# plot xts (no se ve todas las estaciones)
plot(data.xts)

## Warning in plot.xts(data.xts): only the univariate series will be plotted

plot(data.matrix[,1], type="l")

plot(data.xts[,2])

# convertir a un objeto zoo
data.zoo <- as.zoo(data.xts)
str(data.zoo)

## 'zoo' series from 2000-01-01 to 2015-12-31
##   Data: num [1:5844, 1:8] 3.1 7.1 3 31.4 11.8 1.8 3.1 32.2 48 24.1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:8] "679" "683" "844" "809" ...
##   Index:  Date[1:5844], format: "2000-01-01" "2000-01-02" "2000-01-03" "2000-01-04" "2000-01-05" ...

# plot zoo
plot(data.zoo, main = "Series de tiempo de precipitación")

#==== intente configurar una sola escala del eje "y" para todo st====
summary(data.zoo)

##      Index                 679              683              844        
##  Min.   :2000-01-01   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:2003-12-31   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median :2007-12-31   Median : 1.800   Median : 0.000   Median : 0.000  
##  Mean   :2007-12-31   Mean   : 5.834   Mean   : 1.599   Mean   : 1.674  
##  3rd Qu.:2011-12-31   3rd Qu.: 8.000   3rd Qu.: 1.200   3rd Qu.: 1.200  
##  Max.   :2015-12-31   Max.   :72.900   Max.   :35.500   Max.   :39.200  
##                                        NA's   :363      NA's   :249     
##       809              690              687              812        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median : 0.000   Median : 0.000   Median : 0.000   Median : 0.000  
##  Mean   : 1.768   Mean   : 1.968   Mean   : 2.448   Mean   : 2.444  
##  3rd Qu.: 1.200   3rd Qu.: 2.000   3rd Qu.: 2.900   3rd Qu.: 2.600  
##  Max.   :50.100   Max.   :40.200   Max.   :52.200   Max.   :45.100  
##  NA's   :226      NA's   :90       NA's   :442      NA's   :235     
##       759        
##  Min.   : 0.000  
##  1st Qu.: 0.000  
##  Median : 0.000  
##  Mean   : 2.036  
##  3rd Qu.: 2.100  
##  Max.   :44.400  
##  NA's   :16

max(data.zoo, na.rm = T)

## [1] 72.9

plot(data.zoo, main = "Series de tiempo de precipitación",
     ylim = c(0,80))

#plot xts con lattice
xyplot(data.xts,xlab = "Fecha",ylab = "Precipitación [mm/dia]",ylim=c(0,100))

#plot con ggplot
autoplot(data.xts[,1:4]) +theme_bw()+xlab('Fecha')+ylab('Precipitación [mm/dia]')

## Warning: Removed 184 rows containing missing values (geom_path).

#  ------------------------------------------------------------------------
# Agregacion de series de tiempo con apply.monthly() y apply.yearly()------
#  ------------------------------------------------------------------------
# De diario a mensual
data.monthly <- apply.monthly(data.xts, FUN = sum)# Fue correcta la acumulacion?
plot(data.monthly)

data.monthly <- apply.monthly(data.xts[,1], FUN = sum)# agregar para una estacion
plot(data.monthly)

data.monthly <- apply.monthly(data.xts, FUN=apply, MARGIN = 2, sum)# agregar para todas las estaciones
xyplot(data.monthly, ylim = c(0,600))

# hacer varios graficos de data.monthly con plot()


# Crear una funcion para considerar en la agregacion los meses con pocos datos diarios perdidos
SUM <- function(a,n){#a es vector, n es el numero maximo de datos perdidos a tener en cuenta
  count <- sum(is.na(a))# cantidad de datos perdidos
  if (count <= n) {
    tot <- sum(a, na.rm=T)
    } else tot <- NA
  return (tot)
}


SUM(a=c(1:28,NA,NA),n=2)

## [1] 406

data.monthly <- apply.monthly(data.xts,FUN=apply,MARGIN=2, SUM, n=3)# agregar para todas las estaciones
xyplot(data.monthly,ylim=c(0,600))

# De mensual a anual
data.anual <- apply.yearly(data.monthly,  FUN=apply, 2, sum)
xyplot(data.anual)

# Determinar los maximos mensuales y anuales
data.max.mensual <- apply.monthly(data.xts,  FUN=apply, 2, max)
xyplot(data.max.mensual)

data.max.anual <- apply.yearly(data.xts,  FUN=apply, 2, max)
xyplot(data.max.anual)

#  ------------------------------------------------------------------------
# Analisis exploratorio de datos--------------------------------------------
#  ------------------------------------------------------------------------
# Graficos de series de tiempo
xyplot(data.xts,xlab = "Fecha",ylab = "Precipitación [mm/dia]",ylim=c(0,100))# serie diaria

xyplot(data.monthly,ylim=c(0,600))# mensual

xyplot(data.anual)# anual

# Diagrama de boxplot
boxplot(coredata(data.xts))# datos diarios por estacion

boxplot(coredata(data.monthly))# datos mensuales por estacion

# boxplot para cada mes y por estacion (Estacionalidad de lluvia)
boxplot(matrix(coredata(data.monthly[,1]), nrow=nrow(data.monthly)/12, ncol=12, byrow=T),
        col="gray", main=c(paste(names(data.monthly[,1])), "Prec mensual [mm]"))

# Grafico de histogramas
#par(mfrow = c(1, 2), pty = "s")
hist(coredata(data.xts[,1]), freq = T)# cantidad de datos por clase

histogram(coredata(data.xts[,1]))# porcentaje de datos por clase

#Grafico de datos disponibles por estacion y anio
y.count<-function(x){
  count.na <- sum(is.na(x))
  value <- 1
  if (count.na >= 1){
    value <- NA
    }
  return(value)
}

y.count(c(1,NA,3,4,5))

## [1] NA

y.count(c(1,NA,NA,4,5))

## [1] NA

y.count(c(1,2,3,4,5))

## [1] 1

anios.data <- apply.yearly(data.monthly, FUN = apply, MARGIN = 2, y.count)
anios.data

##            679 683 844 809 690 687 812 759
## 2000-12-31   1  NA   1   1   1  NA   1   1
## 2001-12-31   1  NA   1   1   1  NA   1   1
## 2002-12-31   1  NA   1   1   1  NA  NA   1
## 2003-12-31   1  NA   1   1   1  NA   1   1
## 2004-12-31   1  NA   1  NA   1  NA   1   1
## 2005-12-31   1  NA   1  NA  NA  NA   1   1
## 2006-12-31   1   1   1   1   1  NA  NA   1
## 2007-12-31   1  NA   1   1   1   1   1   1
## 2008-12-31   1  NA   1   1   1   1   1   1
## 2009-12-31   1   1   1   1   1  NA   1   1
## 2010-12-31   1  NA  NA   1   1  NA   1   1
## 2011-12-31   1  NA   1   1   1  NA   1   1
## 2012-12-31   1  NA   1   1   1   1   1   1
## 2013-12-31   1  NA   1   1   1  NA   1   1
## 2014-12-31   1   1   1   1  NA  NA   1   1
## 2015-12-31   1   1  NA  NA  NA   1  NA   1

xyplot(anios.data, superpose = TRUE)

for (i in 1:ncol(anios.data)){
  anios.data[,i][anios.data[,i]==1] <- i*2
  }
anios.data

##            679 683 844 809 690 687 812 759
## 2000-12-31   2  NA   6   8  10  NA  14  16
## 2001-12-31   2  NA   6   8  10  NA  14  16
## 2002-12-31   2  NA   6   8  10  NA  NA  16
## 2003-12-31   2  NA   6   8  10  NA  14  16
## 2004-12-31   2  NA   6  NA  10  NA  14  16
## 2005-12-31   2  NA   6  NA  NA  NA  14  16
## 2006-12-31   2   4   6   8  10  NA  NA  16
## 2007-12-31   2  NA   6   8  10  12  14  16
## 2008-12-31   2  NA   6   8  10  12  14  16
## 2009-12-31   2   4   6   8  10  NA  14  16
## 2010-12-31   2  NA  NA   8  10  NA  14  16
## 2011-12-31   2  NA   6   8  10  NA  14  16
## 2012-12-31   2  NA   6   8  10  12  14  16
## 2013-12-31   2  NA   6   8  10  NA  14  16
## 2014-12-31   2   4   6   8  NA  NA  14  16
## 2015-12-31   2   4  NA  NA  NA  12  NA  16

xyplot(anios.data, 
       superpose = TRUE,
       scales=list(y = list(at = seq(2,16,2), labels = names(anios.data)),
                   x = list(at = index(anios.data), labels = 2000:2015,rot = 90)),
       col="black",
       type="o",
       pch=16,
       cex=2,
       auto.key = FALSE,
       xlab="Año",
       ylab="Estacion"
       )

# Analisis exploratorio con hydroTSM
library(hydroTSM)
hydroplot(as.zoo(data.xts[,1]), var.type="Precipitation", pfreq = "dma", ylab = "Prec")

hydroplot(as.zoo(data.monthly[,1]), var.type="Precipitation", pfreq = "ma", ylab = "Prec")

#  ------------------------------------------------------------------------
# Completacion de datos--------------------------------------------
#  ------------------------------------------------------------------------
# Completando datos mediante cutoffR
# Revisar: Feng, L., Nowak, G., O'Neill, T.J., & Welsh, A.H. (2014). CUTOFF: A spatio-temporal imputation method. Journal of Hydrology, 519(Part D), 3591-3605.
# http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram
library(corrplot)
library(cutoffR)

# Correlacion cruzada
cor.cruzada.mensual <- cor(as.matrix(data.monthly),use="complete")

cor.test(data.monthly[,1],data.monthly[,2])

## Warning in z + c(-1, 1) * sigma * qnorm((1 + conf.level)/2): Recycling array of length 1 in array-vector arithmetic is deprecated.
##   Use c() or as.vector() instead.

## 
##  Pearson's product-moment correlation
## 
## data:  data.monthly[, 1] and data.monthly[, 2]
## t = 22.102, df = 167, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8190412 0.8972913
## sample estimates:
##      cor 
## 0.863265

summary(cor.cruzada.mensual)

##       679              683              844              809        
##  Min.   :0.8212   Min.   :0.8672   Min.   :0.8280   Min.   :0.8212  
##  1st Qu.:0.8340   1st Qu.:0.8887   1st Qu.:0.8657   1st Qu.:0.8639  
##  Median :0.8577   Median :0.9060   Median :0.9005   Median :0.8934  
##  Mean   :0.8683   Mean   :0.9126   Mean   :0.8999   Mean   :0.8997  
##  3rd Qu.:0.8711   3rd Qu.:0.9189   3rd Qu.:0.9219   3rd Qu.:0.9279  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##       690              687              812              759        
##  Min.   :0.8694   Min.   :0.8482   Min.   :0.8360   Min.   :0.8559  
##  1st Qu.:0.9032   1st Qu.:0.8769   1st Qu.:0.8647   1st Qu.:0.8741  
##  Median :0.9172   Median :0.8967   Median :0.9026   Median :0.9011  
##  Mean   :0.9249   Mean   :0.9073   Mean   :0.9049   Mean   :0.9050  
##  3rd Qu.:0.9423   3rd Qu.:0.9225   3rd Qu.:0.9318   3rd Qu.:0.9159  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000

min(cor.cruzada.mensual)

## [1] 0.8212186

corrplot(cor.cruzada.mensual, method="circle",type = c("lower"),mar = c(4, 2, 3, 4))

corrplot(cor.cruzada.mensual, method="color",type = c("lower"),mar = c(4, 2, 3, 4))

corrplot(cor.cruzada.mensual, method="number",type = c("lower"),mar = c(4, 2, 3, 4))

data.cutoff <- data.frame(data.monthly,date=index(data.monthly),check.names = FALSE)
#data.cutoff.comp <- cutoff(data = data.cutoff)
data.cutoff.comp <- cutoff(data = data.cutoff,method = c("correlation"),corr = "spearman",cutoff = 0.7)# para tmin:cutoff = 0.8, tmax:cutoff = 0.7
data.mensual.comp <- as.xts(data.cutoff.comp,order.by = index(data.monthly))

xyplot(data.monthly,ylim=c(0,600))# mensual

xyplot(data.mensual.comp,ylim=c(0,600))# mensual

library(latticeExtra)

## Loading required package: RColorBrewer

## 
## Attaching package: 'latticeExtra'

## The following object is masked from 'package:ggplot2':
## 
##     layer

st.sin <- xyplot(data.monthly,ylim=c(0,600),col="red", lwd=2)# mensual
st.com <- xyplot(data.mensual.comp,ylim=c(0,600),col="blue", lwd=2)# mensual

st.com + st.sin

#Analisis de autocorrelacion
acf(data.mensual.comp[,1])

pacf(data.mensual.comp[,1])

#diagnostico
#estimacion de parametros
fit<-arima(as.ts(data.mensual.comp[,1]),order=c(1,0,1))
tsdiag(fit)

Box.test(fit$residuals,lag=1)

## 
##  Box-Pierce test
## 
## data:  fit$residuals
## X-squared = 0.00015401, df = 1, p-value = 0.9901

# prediccion
LH.pred<-predict(fit,n.ahead=10)

plot(as.ts(data.mensual.comp[,1]),xlim=c(1,220),ylim=c(0,500))
lines(LH.pred$pred,col="red")
lines(LH.pred$pred+2*LH.pred$se,col="red",lty=3)
lines(LH.pred$pred-2*LH.pred$se,col="red",lty=3)

#http://www.statoek.wiso.uni-goettingen.de/veranstaltungen/zeitreihen/sommer03/ts_r_intro.pdf
# Analisis de tendencia
# install.packages("trend")

TS_2.R

usuario

Wed May 24 02:57:13 2017