#    Geoestadistica
#    Ejemplo: precipitación en tres departamentos de Guatemala,
#    Datos ontenidos de Chirps, https://data.chc.ucsb.edu/products/CHIRPS-2.0/
#    Profesor: Juan Josue Santos
#    juanjosue.sp@gmail.com

#install.packages("gstat",dependencies = TRUE)
library(gstat)  # geoestadistica
## Warning: package 'gstat' was built under R version 4.4.3
library(mapview)  # visualización
library(sf)       # manejo de datos espaciales
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(stars)    # manejo de datos espacial y temporal
## Warning: package 'stars' was built under R version 4.4.3
## Cargando paquete requerido: abind
library(terra)    # manejo de datos raster
## Warning: package 'terra' was built under R version 4.4.3
## terra 1.8.29
library(ggplot2)  # visualización de graicos
## Warning: package 'ggplot2' was built under R version 4.4.3
library(raster)   # manejo de datos raster
## Warning: package 'raster' was built under R version 4.4.3
## Cargando paquete requerido: sp
## Warning: package 'sp' was built under R version 4.4.3
# 1. Cargar los datos
deptos <- shapefile("C:/Users/JuanJosueSantosP/OneDrive/Curso Mariano Galvez/Geoestadísttica/Shape files/Departamentos_WGS84.shp")
## Warning: PROJ: proj_identify: C:\Program
## Files\PostgreSQL\17\share\contrib\postgis-3.5\proj\proj.db contains
## DATABASE.LAYOUT.VERSION.MINOR = 2 whereas a number >= 4 is expected. It comes
## from another PROJ installation. (GDAL error 1)
deptos <- st_as_sf(deptos)  # Convertir a sf para compatibilidad con st_within
str(deptos)
## Classes 'sf' and 'data.frame':   3 obs. of  11 variables:
##  $ DEPARTAMEN: chr  "CHIMALTENANGO" "GUATEMALA" "SACATEPEQUEZ"
##  $ AREA_KM2  : num  1865 2193 537
##  $ COD_DEP   : int  4 1 3
##  $ REGION    : chr  "Central" "Metropolitana" "Central"
##  $ REG_NUM   : int  5 1 5
##  $ POBLACION : num  446133 2541581 248019
##  $ VIVIENDAS : num  93655 619636 54414
##  $ País      : chr  "Guatemala" "Guatemala" "Guatemala"
##  $ Hectares  : num  1863 2189 536
##  $ ICTA      : chr  "Altiplano Central" "Altiplano Central" "Altiplano Central"
##  $ geometry  :sfc_POLYGON of length 3; first list element: List of 1
##   ..$ : num [1:1407, 1:2] -91 -91 -91 -91 -91 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:1407] "1" "2" "3" "4" ...
##   .. .. ..$ : chr [1:2] "x" "y"
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:10] "DEPARTAMEN" "AREA_KM2" "COD_DEP" "REGION" ...
depar_col <- c("skyblue", "lightblue", "lightgreen")  # rampa de color para los tres departamentos
plot(deptos, col = depar_col, border = "black", lwd = 0.5)
## Warning: plotting the first 9 out of 10 attributes; use max.plot = 10 to plot
## all

# colocar etiquetas a los departamentos
#text(deptos$geometry, labels = deptos$DEPARTAMEN, cex = 0.5, col = "black", font = 3)


# 2. Cargar los datos de precipitación
pp <- read.csv("C:/Users/JuanJosueSantosP/OneDrive/Curso Mariano Galvez/Geoestadísttica/Datos Clima/pp2021-2024.csv")
head(pp)
##   X id    lon    lat       date   chirps
## 1 1  1 -91.13 14.938 2020-01-01 0.000000
## 2 2  1 -91.13 14.938 2020-01-02 0.000000
## 3 3  1 -91.13 14.938 2020-01-03 0.000000
## 4 4  1 -91.13 14.938 2020-01-04 3.502182
## 5 5  1 -91.13 14.938 2020-01-05 0.000000
## 6 6  1 -91.13 14.938 2020-01-06 0.000000
# generar columna año, mes y dia para el objeto pp
pp$date <- as.Date(pp$date, format = "%Y-%m-%d")

# extraer mes de la columna date
pp$month <- format(pp$date, "%m")

# extraer año de la columna date
pp$year <- format(pp$date, "%Y")

head(pp)
##   X id    lon    lat       date   chirps month year
## 1 1  1 -91.13 14.938 2020-01-01 0.000000    01 2020
## 2 2  1 -91.13 14.938 2020-01-02 0.000000    01 2020
## 3 3  1 -91.13 14.938 2020-01-03 0.000000    01 2020
## 4 4  1 -91.13 14.938 2020-01-04 3.502182    01 2020
## 5 5  1 -91.13 14.938 2020-01-05 0.000000    01 2020
## 6 6  1 -91.13 14.938 2020-01-06 0.000000    01 2020
# 3. exploracion de data set
summary(pp)
##        X                id              lon              lat       
##  Min.   :     1   Min.   :  1.00   Min.   :-91.13   Min.   :14.26  
##  1st Qu.:146161   1st Qu.: 80.75   1st Qu.:-90.91   1st Qu.:14.43  
##  Median :292321   Median :160.50   Median :-90.69   Median :14.60  
##  Mean   :292321   Mean   :160.50   Mean   :-90.69   Mean   :14.60  
##  3rd Qu.:438480   3rd Qu.:240.25   3rd Qu.:-90.47   3rd Qu.:14.77  
##  Max.   :584640   Max.   :320.00   Max.   :-90.25   Max.   :14.94  
##       date                chirps           month               year          
##  Min.   :2020-01-01   Min.   :  0.000   Length:584640      Length:584640     
##  1st Qu.:2021-04-01   1st Qu.:  0.000   Class :character   Class :character  
##  Median :2022-07-02   Median :  0.000   Mode  :character   Mode  :character  
##  Mean   :2022-07-02   Mean   :  4.666                                        
##  3rd Qu.:2023-10-02   3rd Qu.:  6.362                                        
##  Max.   :2024-12-31   Max.   :116.605
# convertir en factor year y month
pp$year <- as.factor(pp$year)
pp$month <- as.numeric(pp$month)
pp$month <- as.factor(pp$month)

summary(pp)
##        X                id              lon              lat       
##  Min.   :     1   Min.   :  1.00   Min.   :-91.13   Min.   :14.26  
##  1st Qu.:146161   1st Qu.: 80.75   1st Qu.:-90.91   1st Qu.:14.43  
##  Median :292321   Median :160.50   Median :-90.69   Median :14.60  
##  Mean   :292321   Mean   :160.50   Mean   :-90.69   Mean   :14.60  
##  3rd Qu.:438480   3rd Qu.:240.25   3rd Qu.:-90.47   3rd Qu.:14.77  
##  Max.   :584640   Max.   :320.00   Max.   :-90.25   Max.   :14.94  
##                                                                    
##       date                chirps            month          year       
##  Min.   :2020-01-01   Min.   :  0.000   1      : 49600   2020:117120  
##  1st Qu.:2021-04-01   1st Qu.:  0.000   3      : 49600   2021:116800  
##  Median :2022-07-02   Median :  0.000   5      : 49600   2022:116800  
##  Mean   :2022-07-02   Mean   :  4.666   7      : 49600   2023:116800  
##  3rd Qu.:2023-10-02   3rd Qu.:  6.362   8      : 49600   2024:117120  
##  Max.   :2024-12-31   Max.   :116.605   10     : 49600                
##                                         (Other):287040
head(pp,10)
##     X id    lon    lat       date   chirps month year
## 1   1  1 -91.13 14.938 2020-01-01 0.000000     1 2020
## 2   2  1 -91.13 14.938 2020-01-02 0.000000     1 2020
## 3   3  1 -91.13 14.938 2020-01-03 0.000000     1 2020
## 4   4  1 -91.13 14.938 2020-01-04 3.502182     1 2020
## 5   5  1 -91.13 14.938 2020-01-05 0.000000     1 2020
## 6   6  1 -91.13 14.938 2020-01-06 0.000000     1 2020
## 7   7  1 -91.13 14.938 2020-01-07 0.000000     1 2020
## 8   8  1 -91.13 14.938 2020-01-08 3.016321     1 2020
## 9   9  1 -91.13 14.938 2020-01-09 0.000000     1 2020
## 10 10  1 -91.13 14.938 2020-01-10 0.000000     1 2020
# fitrar por año
pp2024 <- pp[pp$year == "2024", ]

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## The following objects are masked from 'package:terra':
## 
##     intersect, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# mutate el objeto pp2024, columna month
pp2024 <- pp2024 %>%
  mutate(month = case_when(
    month == "1" ~ "Enero",
    month == "2" ~ "Febrero",
    month == "3" ~ "Marzo",
    month == "4" ~ "Abril",
    month == "5" ~ "Mayo",
    month == "6" ~ "Junio",
    month == "7" ~ "Julio",
    month == "8" ~ "Agosto",
    month == "9" ~ "Septiembre",
    month == "10" ~ "Octubre",
    month == "11" ~ "Noviembre",
    month == "12" ~ "Diciembre"
  ))

# sumatoria de precipítacion por mes
pp2024.mes <- pp2024 %>%
  group_by(month) %>%
  summarise(precip = sum(chirps   , na.rm = TRUE))

# Tabla con coordenadas, promedio precipitación por coordenada 
# 4. Generar tabla de coordenadas
pp2024.coord <- pp2024 %>%
  group_by(lon, lat) %>%
  summarise(precip_sum_anual = sum(chirps, na.rm = TRUE))
## `summarise()` has grouped output by 'lon'. You can override using the `.groups`
## argument.
head(pp2024.coord,10)
## # A tibble: 10 × 3
## # Groups:   lon [2]
##      lon   lat precip_sum_anual
##    <dbl> <dbl>            <dbl>
##  1 -91.1  14.6            1564.
##  2 -91.1  14.7            1121.
##  3 -91.1  14.7            1119.
##  4 -91.1  14.8            1067.
##  5 -91.1  14.8            1049.
##  6 -91.1  14.8            1049.
##  7 -91.1  14.9            1042.
##  8 -91.1  14.9            1151.
##  9 -91.1  14.3            2901.
## 10 -91.1  14.4            3003.
# 5. Convertir a objeto sf
pp2024.coord.sf <- st_as_sf(pp2024.coord, coords = c("lon", "lat"), crs = 4326)
plot(pp2024.coord.sf, col = "red", pch = 20, cex = 0.5)

plot(deptos, col = depar_col, border = "black", lwd = 0.5)
## Warning: plotting the first 9 out of 10 attributes; use max.plot = 10 to plot
## all
plot(pp2024.coord.sf$geometry, col = "red", pch = 20, cex = 2,add=TRUE, size = 10)

# Ejemplificar el krigin ordinario
# 6. Kriging ordinario
#                                     Evaluación estadistica descriptiva

summary(pp2024.coord)  # estadistica descriptiva
##       lon              lat        precip_sum_anual
##  Min.   :-91.13   Min.   :14.26   Min.   : 900.5  
##  1st Qu.:-90.91   1st Qu.:14.43   1st Qu.:1077.4  
##  Median :-90.69   Median :14.60   Median :1424.4  
##  Mean   :-90.69   Mean   :14.60   Mean   :1708.6  
##  3rd Qu.:-90.47   3rd Qu.:14.77   3rd Qu.:2107.4  
##  Max.   :-90.25   Max.   :14.94   Max.   :3398.6
hist(pp2024.coord$precip_sum_anual)  # histograma

#                                     Datos Faltante y Atipicos
# 1. Evaluación de datos faltantes
#install.packages("VIM")
library(VIM)
## Warning: package 'VIM' was built under R version 4.4.3
## Cargando paquete requerido: colorspace
## 
## Adjuntando el paquete: 'colorspace'
## The following object is masked from 'package:raster':
## 
##     RGB
## The following object is masked from 'package:terra':
## 
##     RGB
## Cargando paquete requerido: grid
## 
## Adjuntando el paquete: 'grid'
## The following object is masked from 'package:terra':
## 
##     depth
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Adjuntando el paquete: 'VIM'
## The following object is masked from 'package:terra':
## 
##     countNA
## The following object is masked from 'package:datasets':
## 
##     sleep
aggr(pp2024.coord, col = c("navyblue", "yellow"),
     numbers = TRUE, sortVars = TRUE, labels = names(pp2024.coord), 
     cex.axis = 0.7, cex.numbers = 0.7, main = "Datos faltantes")   # evaluacion del % datos faltantes

## 
##  Variables sorted by number of missings: 
##          Variable Count
##               lon     0
##               lat     0
##  precip_sum_anual     0
# 2. Evaluación de datos atipicos
#install.packages("outliers")
library(outliers)
boxplot(pp2024.coord$precip_sum_anual, col = "lightblue", main = "Boxplot de precipitación anual")  # boxplot

# test de Grubbs para detectar outliers
outlier(pp2024.coord$precip_sum_anual)  # test de Grubbs para detectar outliers
## [1] 3398.61
#              Eliminar dato atipico
pp2024.coord <- pp2024.coord[pp2024.coord$precip_sum_anual <= 2100, ]  # eliminar outlier


#                                      Evaluación de la normalidad
#                    Ho: "La variable precip_sum_anual es normal"    ####################
qqnorm(pp2024.coord$precip_sum_anual)
qqline(pp2024.coord$precip_sum_anual, col="red")

#install.packages("nortest")
library(nortest)
shapiro.test(pp2024.coord$precip_sum_anual)  # test de normalidad  est de Shapiro-Wilk
## 
##  Shapiro-Wilk normality test
## 
## data:  pp2024.coord$precip_sum_anual
## W = 0.8604, p-value = 7.35e-14
lillie.test(pp2024.coord$precip_sum_anual)  # test de normalidad est de Lilliefors
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  pp2024.coord$precip_sum_anual
## D = 0.16387, p-value < 2.2e-16
# 2. Evaluación de la estacionariedad
#Una tendencia global o regional viola este supuesto de "estacionariedad de la media"
#install.packages("scatterplot3d")
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Cargando paquete requerido: carData
## 
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
scatterplot(precip_sum_anual~lon,pp2024.coord, col="black", smooth=T, boxplots=F, cex=1, ellipse=F) # variacion lineal en el eje X

scatterplot(precip_sum_anual~lat,pp2024.coord, col="black", smooth=T, boxplots=F, cex=1, ellipse=F) # variacion de segundo grado en el eje Y

# eliminacion tendencia espacial
model.MLR<-lm(precip_sum_anual~ lon + lat+ I(lon^2) + I(lat^2) + I(lon*lat),data = pp2024.coord)  #+ I(X^2) + I(Y^2) + I(X*Y)

summary(model.MLR)
## 
## Call:
## lm(formula = precip_sum_anual ~ lon + lat + I(lon^2) + I(lat^2) + 
##     I(lon * lat), data = pp2024.coord)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -381.92  -87.37   -0.39   70.03  434.23 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2890971.0  1190811.0   2.428 0.015959 *  
## lon            72945.0    26569.9   2.745 0.006519 ** 
## lat            58278.9    21202.2   2.749 0.006456 ** 
## I(lon^2)         532.3      150.9   3.528 0.000505 ***
## I(lat^2)        2898.5      328.3   8.829 2.68e-16 ***
## I(lon * lat)    1601.5      273.4   5.859 1.59e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 138.5 on 231 degrees of freedom
## Multiple R-squared:  0.8525, Adjusted R-squared:  0.8493 
## F-statistic:   267 on 5 and 231 DF,  p-value: < 2.2e-16
model.MLR_step<-step(model.MLR,direction = "both")
## Start:  AIC=2343.29
## precip_sum_anual ~ lon + lat + I(lon^2) + I(lat^2) + I(lon * 
##     lat)
## 
##                Df Sum of Sq     RSS    AIC
## <none>                      4433643 2343.3
## - lon           1    144664 4578307 2348.9
## - lat           1    145014 4578657 2348.9
## - I(lon^2)      1    238923 4672566 2353.7
## - I(lon * lat)  1    658824 5092467 2374.1
## - I(lat^2)      1   1496179 5929822 2410.2
summary(model.MLR_step)
## 
## Call:
## lm(formula = precip_sum_anual ~ lon + lat + I(lon^2) + I(lat^2) + 
##     I(lon * lat), data = pp2024.coord)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -381.92  -87.37   -0.39   70.03  434.23 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2890971.0  1190811.0   2.428 0.015959 *  
## lon            72945.0    26569.9   2.745 0.006519 ** 
## lat            58278.9    21202.2   2.749 0.006456 ** 
## I(lon^2)         532.3      150.9   3.528 0.000505 ***
## I(lat^2)        2898.5      328.3   8.829 2.68e-16 ***
## I(lon * lat)    1601.5      273.4   5.859 1.59e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 138.5 on 231 degrees of freedom
## Multiple R-squared:  0.8525, Adjusted R-squared:  0.8493 
## F-statistic:   267 on 5 and 231 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model.MLR_step)

par(mfrow=c(1,1))

# Residuales
model <- update(model.MLR_step)
model$call
## lm(formula = precip_sum_anual ~ lon + lat + I(lon^2) + I(lat^2) + 
##     I(lon * lat), data = pp2024.coord)
length(model$residuals)
## [1] 237
pp2024.coord$residuales <- model$residuals # asignar residuales a la base de datos

# calcular pp en funcion del modelo 
pp2024.coord$prediccion <- predict(model.MLR_step, pp2024.coord) # predecir valores

hist(pp2024.coord$residuales) # histograma de residuos

## Partición de las muestras para modelación, 80% de datos para entrenamiento y 20% de datos para validacion

set.seed(20210712) ## Para hacer reproducibles los subset

data75 <- length(pp2024.coord$residuales) * 0.75 # 75% de los datos

inTrain <- sample(x = nrow(pp2024.coord), size = data75, replace = FALSE, prob = NULL) ### Size, depende el porcentaje que queramos separar usualmente 75,25
training <- pp2024.coord[inTrain, ]
testing <- pp2024.coord[-inTrain, ]  # validar, no participa en el entrenamiento del modelo
names(testing)
## [1] "lon"              "lat"              "precip_sum_anual" "residuales"      
## [5] "prediccion"
names(training)
## [1] "lon"              "lat"              "precip_sum_anual" "residuales"      
## [5] "prediccion"
# Evaluación estructural
# Generación de variograma experimental y teórico

library(gstat)

# Crear objeto espacial para gstat
training_sf <- st_as_sf(training, coords = c("lon", "lat"), crs = 4326)
training_sp <- as(training_sf, "Spatial")

# Variograma experimental de los residuales
vg_exp <- variogram(residuales ~ 1, data = training_sp)
plot(vg_exp, main = "Variograma Experimental de los Residuales")

# Ajuste automático del modelo teórico de variograma
vg_mod_auto <- fit.variogram(vg_exp, vgm(psill = NA, model = c("Sph", "Exp", "Gau", "Mat"), nugget = NA, range = NA))
plot(vg_exp, vg_mod_auto, main = "Variograma Experimental y Teórico (Ajuste Automático)")

vg_mod_auto
##   model    psill    range
## 1   Gau 21441.36 9.706901
# Crear una cuadrícula regular de puntos dentro de los departamentos
# Usamos el paquete sf y terra para mantener la consistencia con el resto del script

# Crear una cuadrícula regular de 100 x 100 puntos (ajustar n si se requiere mayor resolución)
# disolver por pasis el objeto deptos
deptos <- st_union(deptos)  # Unir los departamentos en un solo objeto
plot(deptos)

n_points <- 700
bbox <- st_bbox(deptos)
x_seq <- seq(bbox["xmin"], bbox["xmax"], length.out = n_points)
y_seq <- seq(bbox["ymin"], bbox["ymax"], length.out = n_points)
grid <- expand.grid(lon = x_seq, lat = y_seq)
grid_sf <- st_as_sf(grid, coords = c("lon", "lat"), crs = 4326)

# Mantener solo los puntos dentro de los departamentos
grid_sf <- grid_sf[st_within(grid_sf, deptos, sparse = FALSE)[,1], ]

# Convertir a Spatial para compatibilidad con gstat::krige
grid_sp <- as(grid_sf, "Spatial")

#plot(grid_sp)
# Modelado mediante kriging ordinario de los residuales
# Usamos el modelo de variograma ajustado previamente (vg_mod_auto)
kriging_result <- krige(residuales ~ 1, training_sp, grid_sp, model = vg_mod_auto)
## [using ordinary kriging]
# Visualización de la predicción de kriging
library(raster)
kriging_df <- as.data.frame(kriging_result)
kriging_df$lon <- coordinates(kriging_result)[,1]
kriging_df$lat <- coordinates(kriging_result)[,2]
kriging_raster <- rasterFromXYZ(kriging_df[, c("lon", "lat", "var1.pred")])
plot(kriging_raster, main = "Predicción de Kriging de los Residuales", col = terrain.colors(100))

# el krigin residual se puede sumar a la predicción del modelo de regresión
# para obtener la predicción final
# Convertir la cuadrícula de predicción a un objeto sf

kriging_sf <- st_as_sf(kriging_df, coords = c("lon", "lat"), crs = 4326)  

# Unir la predicción de kriging con la cuadrícula original
kriging_sf <- st_join(grid_sf, kriging_sf, join = st_intersects)

# Predecir la regresión en la cuadrícula
# Primero, extraer las coordenadas de la cuadrícula
coords_grid <- st_coordinates(kriging_sf)

# Crear un data.frame con las variables necesarias para el modelo
grid_pred_df <- as.data.frame(coords_grid)
colnames(grid_pred_df) <- c("lon", "lat")

# Predecir usando el modelo de regresión ajustado
kriging_sf$prediccion <- predict(model.MLR_step, newdata = grid_pred_df)

# Sumar la predicción de kriging a la predicción del modelo de regresión
kriging_sf$prediccion_final <- kriging_sf$var1.pred + kriging_sf$prediccion

# Visualizar la predicción final
plot(kriging_sf$prediccion_final, main = "Predicción Final de Precipitación", col = terrain.colors(100))

# Transformar a un objeto raster para una mejor visualización
kriging_raster_final <- rasterFromXYZ(
  data.frame(
    lon = st_coordinates(kriging_sf)[,1],
    lat = st_coordinates(kriging_sf)[,2],
    prediccion_final = kriging_sf$prediccion_final
  )
)

plot(kriging_raster_final, main = "Predicción Final de Precipitación", col = terrain.colors(100))

# 7. Evaluación de la predicción
# Adaptación para evaluación de la predicción usando el raster generado por kriging

# Guardar el raster de predicción final
writeRaster(kriging_raster_final, "C:/Users/JuanJosueSantosP/OneDrive/Curso Mariano Galvez/Geoestadísttica/Kriging/geoK_O.tif", overwrite=TRUE)

# Leer el raster guardado
var_UK <- raster("C:/Users/JuanJosueSantosP/OneDrive/Curso Mariano Galvez/Geoestadísttica/Kriging/geoK_O.tif")
plot(var_UK)

#crs(var_UK) <- CRS("+init=epsg:32615")  # Asignar proyección UTM zona 15N (ajustar si es necesario)

# Extraer valores predichos en los puntos de testing
testing_sp <- as(st_as_sf(testing, coords = c("lon", "lat"), crs = 4326), "Spatial")
#testing_sp <- spTransform(testing_sp, CRS("+init=epsg:32615"))  # Transformar a la proyección del raster

datos <- extract(x = var_UK, y = testing_sp, sp = TRUE)
testing$var_UK <- datos@data[, 4]  # Asignar valores extraídos

testing=na.omit(testing)  # Eliminar filas con NA

# Crear data.frame para validación
mod_var_UK <- data.frame(obs = testing$precip_sum_anual, mod = testing$var_UK, model = "UK")
modData <- mod_var_UK

Validation <- modData
Validation$K_E <- Validation$mod - Validation$obs

#write.csv(Validation, "E:/TESIS/Tesis Vaner/Tesis Vander documentos full/Estadisticos/valK.csv", row.names = TRUE)

# Estadísticos de validación
ME <- mean(Validation$K_E, na.rm = TRUE)
MAE <- mean(abs(Validation$K_E), na.rm = TRUE)
MSE <- mean(Validation$K_E^2, na.rm = TRUE)
RMSE <- sqrt(mean(Validation$K_E^2, na.rm = TRUE))
AVE <- 1 - (sum(Validation$K_E^2, na.rm = TRUE) / sum((Validation$obs - mean(Validation$obs, na.rm = TRUE))^2, na.rm = TRUE))
par <- cbind(ME, MAE, MSE, RMSE, AVE)
par
##             ME      MAE      MSE   RMSE       AVE
## [1,] -4.550245 29.22107 1335.829 36.549 0.9836632
library(hydroGOF)
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following object is masked from 'package:terra':
## 
##     time<-
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Adjuntando el paquete: 'hydroGOF'
## The following object is masked from 'package:VIM':
## 
##     nrmse
#write.csv(data.frame(value = hydroGOF::gof(Validation$mod, Validation$obs, na.rm = TRUE)),
#         file = "E:/TESIS/Tesis Vaner/Tesis Vander documentos full/Estadisticos/hydroVal_K.csv")

mod <- data.frame(obs = Validation$obs, mod = Validation$mod, model = "UK")
library(openair)
modsts <- modStats(mod, obs = "obs", mod = "mod", type = "model")
modsts
## # A tibble: 1 × 12
##   model     n  FAC2    MB   MGE      NMB   NMGE  RMSE     r        P   COE   IOA
##   <fct> <int> <dbl> <dbl> <dbl>    <dbl>  <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>
## 1 UK       41     1 -4.55  29.2 -0.00358 0.0230  36.5 0.992 1.33e-36 0.872 0.936
parameters <- as.data.frame(cbind(par, modsts))
parameters <- parameters[, c(1, 2, 3, 4, 5, 14, 15, 16)]
parameters
##          ME      MAE      MSE   RMSE       AVE         r            P       COE
## 1 -4.550245 29.22107 1335.829 36.549 0.9836632 0.9919316 1.334178e-36 0.8721138
#write.csv(parameters, file = "E:/TESIS/Tesis Vaner/Tesis Vander documentos full/Estadisticos/Estadigrafos_K.csv")

# Gráfico observados vs. predichos
library(ggplot2)
ggplot(data.frame("obs" = Validation$obs, "mod" = Validation$mod), aes(obs, mod)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0) +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(labels = scales::comma) +
  ggtitle("Observed vs Predicted")