Carga de librerías para procesado de datos:

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(stringr)
library(purrr)
library(openxlsx)

Carga de la base de datos

# creado de un vector de nombres para leer cada excel:
nombres <- list.files(
  full.names = FALSE,
  pattern = ".xlsx") #%>% 
  #str_replace("\\.xlsx$","") %>% 
  #toupper()

# Creado de un vector de nombres para dar nombres a las estaciones
nombres2 <- list.files(
  full.names = FALSE,
  pattern = ".xlsx") %>% 
  str_replace("\\.xlsx$","") %>% 
  toupper()

# Creación de una lista que contenga los datos de todas las estaciones:
data <- lst()
diaria <- lst()
mensual <- lst()
for (i in 1:length(nombres)) {
  data[[i]] <- read.xlsx(nombres[i])
  names(data[[i]]) <- c("y","d","01",
                      "02","03","04",
                      "05","06","07",
                      "08","09","10",
                      "11","12")
  diaria[[i]] <- pivot_longer(
    data = data[[i]],cols = 3:14,
    names_to = "m",
    values_to = nombres2[i]) %>% 
    mutate(x=as.Date(paste(
      y,m,d,sep = "-"),
      format="%Y-%m-%d")) %>% 
    select(x,nombres2[i]) %>% 
    arrange(x) %>% na.omit()
}

# Algún día puede servir este código xd
# %>% rename_all(~c("FECHA",nombres))

# Combinando todas las tablas de la lista en una sola:
tabla <- diaria %>% reduce(left_join, by="x")
### Rellenando todos los espacios vacíos:
# creación de una columna de fechas completas
completa <- data.frame(x = seq(min(tabla$x),max(tabla$x),"1 day"))
# agregando las fechas faltantes a la base de datos principal:
tabla <- merge(completa,tabla, by = "x", all.x = TRUE)
# Guardando la tabla:
write.csv(tabla, "estaciones.csv",row.names = F,na = "")

# Acondicionando al formato para conversion de tipo
# mensual y anual:
formateada <- xts::xts(tabla[,-1],order.by = tabla$x)
monthly <- hydroTSM::daily2monthly(formateada, FUN=sum, na.rm=FALSE)

## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)

## Please note that 'maptools' will be retired during October 2023,
## plan transition at your earliest convenience (see
## https://r-spatial.org/r/2023/05/15/evolution4.html and earlier blogs
## for guidance);some functionality will be moved to 'sp'.
##  Checking rgeos availability: FALSE

yearly <- hydroTSM::monthly2annual(monthly, FUN=sum, na.rm=FALSE)

Graficando

lattice::xyplot(formateada, main="Data cruda (diaria)",
                col="deepskyblue")

lattice::xyplot(monthly, main="Data cruda (mensual)",
                col= "hotpink")

lattice::xyplot(yearly, main="Data cruda (anual)",
                col="blue")

Completando datos

import pandas as pd
from sklearn.impute import KNNImputer


# Cargado de la base de datos
df = pd.read_csv("estaciones.csv", 
                index_col=['x'], 
                parse_dates=True)

# Estableciendo rango de fechas seleccionado
estaciones = df['01-01-1970':'31-12-2000']

# Estableciendo rango de fechas para el K-Nearest Neighbors 
# (vecinos más cercanos)
fechas = pd.date_range('1970-01-01','2000-12-31')

# Método K-Nearest Neighbors (vecinos más cercanos)
imputer = KNNImputer(n_neighbors=6)
df_knn = pd.DataFrame(imputer.fit_transform(
    estaciones), columns=df.columns, index=estaciones.index)

estacionesknn = "estaciones_corregido_knn.csv"
df_knn.to_csv("estaciones_corregido_knn.csv", index=True)

Carga de data completada con KNN

library(hydroTSM)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: xts

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## 
## Attaching package: 'hydroTSM'

## The following object is masked from 'package:tidyr':
## 
##     extract

library(xts)
library(zoo)
dfm <- as.xts(
  x = read.csv.zoo(
    "estaciones_corregido_knn.csv"))

monthly2 <- hydroTSM::daily2monthly(dfm, FUN=sum, na.rm=TRUE)
yearly2 <- hydroTSM::monthly2annual(dfm, FUN=sum, na.rm=FALSE)

Ploteo de las series completadas

lattice::xyplot(dfm, main="Data completada (diaria)",
                col="deepskyblue")

lattice::xyplot(monthly2, main="Data completada (mensual)",
                col= "hotpink")

lattice::xyplot(yearly2, main="Data completada (anual)",
                col="blue")

# Realizando los test de tendencias

library(Kendall)
library(trend)
prueba_diaria <- lapply(dfm,mk.test)
prueba_mensual <- lapply(monthly2,mk.test)
prueba_anual <- lapply(yearly2,mk.test)

prueba_diaria

## $AYABACA1
## 
##  Mann-Kendall trend test
## 
## data:  X[[i]]
## z = 0, n = 11323, p-value = 1
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##            S         varS          tau 
##            0 134450252636            0 
## 
## 
## $LANCONES1
## 
##  Mann-Kendall trend test
## 
## data:  X[[i]]
## z = 0, n = 11323, p-value = 1
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##           S        varS         tau 
##           0 40909137895           0 
## 
## 
## $MALLARES1
## 
##  Mann-Kendall trend test
## 
## data:  X[[i]]
## z = 0, n = 11323, p-value = 1
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##           S        varS         tau 
##           0 53130909882           0 
## 
## 
## $PANANGA1
## 
##  Mann-Kendall trend test
## 
## data:  X[[i]]
## z = 0, n = 11323, p-value = 1
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##           S        varS         tau 
##           0 36243136300           0 
## 
## 
## $SAPILLICA1
## 
##  Mann-Kendall trend test
## 
## data:  X[[i]]
## z = 0, n = 11323, p-value = 1
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##            S         varS          tau 
##            0 105436123467            0 
## 
## 
## $SAUSALCULUCAN1
## 
##  Mann-Kendall trend test
## 
## data:  X[[i]]
## z = 0, n = 11323, p-value = 1
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##           S        varS         tau 
##           0 93932594298           0

index

Lizeth C.D.

2023-07-22

Carga de librerías para procesado de datos:

Carga de la base de datos

Graficando

Completando datos

Carga de data completada con KNN

Ploteo de las series completadas