Ejemplo gstats

Datos del aire de méxico del O3 del día 2020-02-10 a las 18 horas.

library(gstat)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(sp)
library(rgdal)
## rgdal: version: 1.5-16, (SVN revision 1050)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/JUAN FRANCO/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/JUAN FRANCO/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-2
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(ggplot2)  # Gráficas
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
geo_data <-  read.table("Geo_data.txt",
                        head=T,
                        dec=",")
shp_map <- read_sf("mpiosutm.shp")

pander::pander(head(geo_data))
X Y O3
473346 2164689 82
482180 2152665 78
481502 2136931 44
469366 2141275 41
479189 2180751 103
474444 2154232 88
pander::pander(summary(geo_data))
X Y O3
Min. :469366 Min. :2123036 Min. : 41.00
1st Qu.:479031 1st Qu.:2140661 1st Qu.: 51.50
Median :487546 Median :2149795 Median : 57.00
Mean :486578 Mean :2150506 Mean : 66.12
3rd Qu.:493141 3rd Qu.:2159851 3rd Qu.: 82.25
Max. :510196 Max. :2180751 Max. :112.00

Gráficas descriptivas

map_variable_mexico <- function(datos, variable, map) {

   plot1 <- ggplot() +
            geom_sf(data = map, size = 0.3) +
            geom_point(data = datos, aes_string(x = "X", y = "Y",
                                                color = variable)) +
            scale_color_viridis_c(direction = -1) +
            labs(
                x = "",
                y = "",
                title = paste(variable, "map CDMX")
            ) +
            theme_void() +
            theme(
                plot.title = element_text(size = 14,
                                         face = "bold",
                                         colour = "black"),
                plot.margin = unit(c(1, 1, 1.5, 1.2), "cm"))

    return(plot1)

}

pl1 <- map_variable_mexico(geo_data,
                           "O3",
                           shp_map)

ggplotly(pl1)
simple_scatter_plot <- function(datos, variable1, variable2) {

    plot1 <- ggplot(as.data.frame(datos),
                    aes_string(x = variable1, y = variable2,
                               color = as.factor(1))) +
                    geom_point() +
                    scale_colour_viridis_d() +
                    labs(
                        x = variable1,
                        y = variable2
                        ) +
                    theme_light() +
                    theme(legend.position = "none")

    return(plot1)

}

pl1 <- simple_scatter_plot(geo_data,
                           "X",
                           "O3")

pl2 <- simple_scatter_plot(geo_data,
                           "Y",
                           "O3")

cowplot::plot_grid(pl1, pl2)

Modelar la media

Modelo 1

model1 <- lm(O3 ~ X + Y, data = geo_data)
pander::pander(summary(model1))
  Estimate Std. Error t value Pr(>|t|)
(Intercept) -1400 679.2 -2.062 0.05185
X -0.0002263 0.0004124 -0.5488 0.5889
Y 0.000733 0.0002872 2.552 0.01856
Fitting linear model: O3 ~ X + Y
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
24 19.59 0.2618 0.1915
pander::pander(anova(model1))
Analysis of Variance Table
  Df Sum Sq Mean Sq F value Pr(>F)
X 1 359.3 359.3 0.9358 0.3444
Y 1 2501 2501 6.513 0.01856
Residuals 21 8063 383.9 NA NA

Modelo 2

model2 <- lm(O3 ~ Y, data = geo_data)
pander::pander(summary(model2))
  Estimate Std. Error t value Pr(>|t|)
(Intercept) -1564 600 -2.607 0.0161
Y 0.0007581 0.000279 2.717 0.01259
Fitting linear model: O3 ~ Y
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
24 19.28 0.2512 0.2172
pander::pander(anova(model2))
Analysis of Variance Table
  Df Sum Sq Mean Sq F value Pr(>F)
Y 1 2744 2744 7.382 0.01259
Residuals 22 8178 371.7 NA NA

Modelo 3

model3 <- lm(O3 ~ Y + I(Y^2), data = geo_data)
pander::pander(summary(model3))
  Estimate Std. Error t value Pr(>|t|)
(Intercept) 211202 63907 3.305 0.003371
Y -0.197 0.05941 -3.317 0.00328
I(Y^2) 4.597e-08 1.381e-08 3.329 0.003183
Fitting linear model: O3 ~ Y + I(Y^2)
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
24 15.97 0.5099 0.4633
pander::pander(anova(model3))
Analysis of Variance Table ## Gráfica de los residuales del modelo 3
  Df Sum Sq Mean Sq F value Pr(>F)
Y 1 2744 2744 10.77 0.003563
I(Y^2) 1 2826 2826 11.09 0.003183
Residuals 21 5353 254.9 NA NA
geo_data <- data.frame(geo_data, residuales_O3 = residuals(model3))

pl1 <- simple_scatter_plot(geo_data,
                           "X",
                           "residuales_O3")

pl2 <- simple_scatter_plot(geo_data,
                           "Y",
                           "residuales_O3")

cowplot::plot_grid(pl1, pl2)

Modelar variograma

Definición objeto de gstats

coordinates(geo_data) <-  ~ X + Y

g_obj <- gstat(id = "O3",
               formula = O3 ~ Y + I(Y^2),
               data = geo_data)

Construción del variograma muestral

var_s <- variogram(g_obj, cutoff = 18000)

pander::pander(var_s)
np dist gamma dir.hor dir.ver id
2 3051 138.7 0 0 O3
4 4178 38.22 0 0 O3
4 5258 69.1 0 0 O3
6 6735 134.4 0 0 O3
5 7661 51.06 0 0 O3
10 9002 189.4 0 0 O3
10 10203 299.1 0 0 O3
7 11309 162.1 0 0 O3
11 12763 249.7 0 0 O3
10 13946 392 0 0 O3
18 15013 304.7 0 0 O3
10 16121 199.4 0 0 O3
9 17332 475.5 0 0 O3

Gráfico del variograma muestral

var_s <- var_s[-1, ] # Quitando la priemra observación
plot(var_s, pl = T)

Gráfico del variograma muestral con valores iniciales Modelo1

vgm_model1 <- vgm("Gau",
                 psill = 300,
                 range = 10000)
plot(var_s, vgm_model1, pl = T)

Gráfico del variograma ajustado Modelo1

fitted_vgm1 <- fit.variogram(object = var_s,
                            model = vgm_model1)
plot(var_s, fitted_vgm1, pl = T)

Gráfico del variograma muestral con valores iniciales Modelo2

vgm_model2 <- vgm("Wav", psill = 300, range = 14000)
plot(var_s, vgm_model2, pl = T)

Gráfico del variograma ajustado Modelo2

fitted_vgm2 <- fit.variogram(object = var_s,
                            model = vgm_model2)
plot(var_s, fitted_vgm2, pl = T)

Gráfico del variograma muestral con valores iniciales Modelo3

vgm_model3 <- vgm("Exp", psill = 400, range = 12500)
plot(var_s, vgm_model3, pl = T)

Gráfico del variograma ajustado Modelo3

fitted_vgm3 <- fit.variogram(object = var_s,
                             model = vgm_model3,
                             fit.method = 2)
## Warning in fit.variogram(object = var_s, model = vgm_model3, fit.method = 2): No
## convergence after 200 iterations: try different initial values?
plot(var_s, fitted_vgm3, pl = T)

Comparación de los modelos.

var_s$fitted1 <- variogramLine(fitted_vgm1, dist_vector = var_s$dist)$gamma
var_s$fitted2 <- variogramLine(fitted_vgm2, dist_vector = var_s$dist)$gamma
var_s$fitted3 <- variogramLine(fitted_vgm3, dist_vector = var_s$dist)$gamma

cmr <- function(obs, fit) {
  return(mean((obs - fit)**2))
}

tabla <- data.frame(rbind(fitted_vgm1,
                          fitted_vgm2,
                          fitted_vgm3),
                    CMR =
                      c(cmr(var_s$gamma, var_s$fitted1),
                      cmr(var_s$gamma, var_s$fitted2),
                      cmr(var_s$gamma, var_s$fitted3)))


pander::pander(tabla[c(1:3,10)])
model psill range CMR
Gau 418.4 12239 5420
Wav 306.4 14000 5453
Exp 15217 742875 5953

Predicciones con el modelo 1

prediction_plot <- function(g_object, variable, map_path) {

    map <- readOGR(map_path)
    new <- sp::spsample(map, n = 10000, type = "regular")
    coordinates(new) ~ x1 + x2
    colnames(new@coords) <- c("X", "Y")

    predic <- predict(g_object, newdata = new)

    prediction <- data.frame(predic)

    pred <- paste(variable, ".pred", sep = "")

    plot1 <- plot_ly(
                 x = prediction[["X"]],
                 y = prediction[["Y"]],
                 z = prediction[[pred]],
                 type = "heatmap",
                 colorbar = list(title = "Prediction"),
                 reversescale = T

            ) %>%
            layout(
                   title = paste(variable, "prediction"),
                   xaxis = list(showticklabels = FALSE),
                   yaxis = list(showticklabels = FALSE),
                   scene = list(aspectration = list(x = 1, y = 1))
                   )
    return(plot1)

}


variance_plot <- function(g_object, variable, map_path) {

    map <- readOGR(map_path)
    new <- sp::spsample(map, n = 10000, type = "regular")
    coordinates(new) ~ x1 + x2
    colnames(new@coords) <- c("X", "Y")

    predic <- predict(g_object, newdata = new)

    prediction <- data.frame(predic)

    var <- paste(variable, ".var", sep = "")

    plot2 <- plot_ly(
                 x = prediction[["X"]],
                 y = prediction[["Y"]],
                 z = prediction[[var]],
                 type = "heatmap",
                 colorbar = list(title = "variance"),
                 colorscale = "Hot",
                 reversescale = T
            )  %>%
            layout(
                   title = paste(variable, "Variance"),
                   xaxis = list(showticklabels = FALSE),
                   yaxis = list(showticklabels = FALSE),
                   scene = list(aspectration = list(x = 1, y = 1))
                   )
    return(plot2)

}

cv_plot <- function(g_object, variable, map_path) {

    map <- readOGR(map_path)
    new <- sp::spsample(map, n = 10000, type = "regular")
    coordinates(new) ~ x1 + x2
    colnames(new@coords) <- c("X", "Y")

    predic <- predict(g_object, newdata = new)

    prediction <- data.frame(predic)
    pred <- paste(variable, ".pred", sep = "")
    var <- paste(variable, ".var", sep = "")
    aux <- abs(sqrt(prediction[var]) / abs(prediction[pred]))
    aux[aux > 1] <- 1
    prediction["cv"] <- aux

    plot3 <- plot_ly(
                 x = prediction[["X"]],
                 y = prediction[["Y"]],
                 z = prediction[["cv"]],
                 type = "heatmap",
                 colorbar = list(title = "cv"),
                 colorscale = "Electric",
                 reversescale = T,
                 zmid = 0.5
            )  %>%
            layout(
                   title = paste(variable, "CV"),
                   xaxis = list(showticklabels = FALSE),
                   yaxis = list(showticklabels = FALSE),
                   scene = list(aspectration = list(x = 1, y = 1)))


   return(plot3)

}

g_obj <- gstat(id = "O3",
               formula = O3 ~ Y + I(Y^2),
               model = fitted_vgm1, # Modelo ajustado 2
               data = geo_data)
prediction_plot(g_obj, "O3", "mpiosutm.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\JUAN FRANCO\Downloads\Ejemplo_gstat\mpiosutm.shp", layer: "mpiosutm"
## with 54 features
## It has 7 fields
## [using universal kriging]
variance_plot(g_obj, "O3", "mpiosutm.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\JUAN FRANCO\Downloads\Ejemplo_gstat\mpiosutm.shp", layer: "mpiosutm"
## with 54 features
## It has 7 fields
## [using universal kriging]
cv_plot(g_obj, "O3", "mpiosutm.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\JUAN FRANCO\Downloads\Ejemplo_gstat\mpiosutm.shp", layer: "mpiosutm"
## with 54 features
## It has 7 fields
## [using universal kriging]