Trabajo_final.utf8

Trabajo final de muestreo - Computación estadística
Jorge Iván Rivera Bermúdez, 2020-ll

library(spdep)
library(ape)
library(sp)
library(MVA)
library(Hmisc)
library(normtest)
library(nortest)
library(spatialreg)
library(corrplot)
library(ggcorrplot)
library(psych)
library(readxl)
library(plotly)
library(ggplot2)
library(akima)
library(DT)
library(cowplot)

# Carga de la data
df <- read_excel("D:/R/Rprojects/UN/Computacion_e/BD_MODELADO.xlsx")
# View(df)
datatable(df, filter = "top", options = list(pageLengthe = 6))

df_m <- as.matrix(df[ , c(3:8)])

ce075 <- df_m[ , 1]  # Variable respuesta 75 cm
ce150 <- df_m[ , 2] # Variable respuesta 150 

exp075 <- df_m[ , 2:6] # Variable explicativa 75 cm
exp150 <- df_m[ , c(1,3:6)] # Variable explicativa 150 cm
coord <- as.matrix(df_m[ , 1:2]) # Coordenadas para elaborar la matriz de pesos

me075 <- as.matrix(exp075) 
me150 <- as.matrix(exp150)

X<- df$Avg_X_MCB
Y<- df$Avg_Y_MCE
CE75<-df$Avg_CEa_07
CE150<- df$Avg_CEa_15
NDVI<- df$NDVI
DEM<- df$DEM
SLOPE<- df$SLOPE
Z<- df$Avg_z

m_p <- as.matrix(dist(coord, diag = T, upper = T)) # Matriz de pesos "W"
invm_p <- as.matrix(1/m_p)
diag(invm_p) <- 0
W <- as.matrix(invm_p)
suma <- apply(W, 1, sum)
We <- W/suma # Matriz de pesos estandarizada
# apply(We, 1, sum)

Índices de Moran

(moran_075 <- Moran.I(ce075, We)$p.value)

## [1] 0

(moran_150 <- Moran.I(ce150, We)$p.value)

## [1] 0

(moran_NDVI <- Moran.I(df_m[,3], We)$p.value)

## [1] 0.1057779

(moran_DEM <- Moran.I(df_m[,4], We)$p.value)

## [1] 0

(moran_SLOPE <- Moran.I(df_m[,5], We)$p.value)

## [1] 2.221843e-10

(moran_z <- Moran.I(df_m[,6], We)$p.value)

## [1] 0

Se observa una dependencia espacial en los datos.

Matrices de correlación

mc_p <- rcorr(as.matrix(df_m[,1:6]), type = 'p') 
mc_s <- rcorr(as.matrix(df_m[,1:6]), type = 's')
mc_p$r

##             Avg_CEa_07  Avg_CEa_15        NDVI        DEM       SLOPE
## Avg_CEa_07  1.00000000  0.01089764  0.03552583  0.5322948 -0.13642334
## Avg_CEa_15  0.01089764  1.00000000 -0.17636216 -0.3841976  0.18719946
## NDVI        0.03552583 -0.17636216  1.00000000  0.1015882  0.09610537
## DEM         0.53229480 -0.38419758  0.10158817  1.0000000 -0.11958979
## SLOPE      -0.13642334  0.18719946  0.09610537 -0.1195898  1.00000000
## Avg_z       0.62185866 -0.46431546  0.21352028  0.7901060 -0.04623111
##                  Avg_z
## Avg_CEa_07  0.62185866
## Avg_CEa_15 -0.46431546
## NDVI        0.21352028
## DEM         0.79010605
## SLOPE      -0.04623111
## Avg_z       1.00000000

mc_s$r

##             Avg_CEa_07  Avg_CEa_15         NDVI          DEM      SLOPE
## Avg_CEa_07  1.00000000 -0.07010792 -0.087758460  0.554650652 -0.1373202
## Avg_CEa_15 -0.07010792  1.00000000 -0.090734198 -0.390986746  0.2188554
## NDVI       -0.08775846 -0.09073420  1.000000000 -0.001001312  0.1079099
## DEM         0.55465065 -0.39098675 -0.001001312  1.000000000 -0.1353297
## SLOPE      -0.13732021  0.21885538  0.107909879 -0.135329719  1.0000000
## Avg_z       0.64975197 -0.50662301  0.087797985  0.816109089 -0.1057241
##                  Avg_z
## Avg_CEa_07  0.64975197
## Avg_CEa_15 -0.50662301
## NDVI        0.08779798
## DEM         0.81610909
## SLOPE      -0.10572405
## Avg_z       1.00000000

cp <- ggcorrplot(mc_p$r, hc.order = TRUE, type = "upper", outline.color = "white",
                 lab = TRUE, lab_size = 2, tl.cex = 10, method = "circle") + labs(title = "Pearson")

cs <- ggcorrplot(mc_s$r, hc.order = TRUE, type = "upper", outline.color = "white", 
                 lab = TRUE,lab_size = 2, tl.cex = 10, method = "circle") + labs(title = "Spearman")

plot_grid(cp, cs)

Se observa la correlación más alta entre el DEM con CE075, y Z con CE075 para ambos casos (Pearson y Spearman). Para CE150 la correlación más alta se encuentra con SLOPE.

Algo de estadística descriptiva:

(descriptiva <- describe(df_m[ , 1:6]))

##            vars   n   mean   sd median trimmed  mad    min    max range  skew
## Avg_CEa_07    1 313   9.77 1.53   9.65    9.76 1.40   6.17  13.60  7.44  0.10
## Avg_CEa_15    2 313  18.50 0.74  18.44   18.47 0.64  16.80  21.51  4.71  0.57
## NDVI          3 313   0.83 0.03   0.84    0.84 0.02   0.70   0.88  0.17 -1.39
## DEM           4 313 205.11 4.55 204.83  205.13 5.68 196.17 213.33 17.17  0.06
## SLOPE         5 313   4.13 2.16   3.78    3.94 2.10   0.21  12.72 12.51  0.95
## Avg_z         6 313 202.47 3.66 202.49  202.57 4.34 193.05 212.35 19.30 -0.14
##            kurtosis   se
## Avg_CEa_07    -0.28 0.09
## Avg_CEa_15     1.22 0.04
## NDVI           2.48 0.00
## DEM           -1.13 0.26
## SLOPE          1.25 0.12
## Avg_z         -0.54 0.21

par(mfrow = c(2, 2))
boxplot(df_m[ , 1], main = 'Boxplot CE 075 cm', ylab = "CE 075 cm", col = "lightblue")
hist(df_m[ , 1], main = 'Histograma CE 075 cm', xlab = "CE 075 cm", col = "steelblue")
boxplot(df_m[ , 2], main = 'Boxplot CE 150 cm', ylab = "CE 150 cm", col = "lightblue")
hist(df_m[ , 2], main = 'Histograma CE 150 cm', xlab = "CE 150 cm", col = "steelblue")

Pruebas de normalidad.

(cvm075 <- cvm.test(df$Avg_CEa_07))

## 
##  Cramer-von Mises normality test
## 
## data:  df$Avg_CEa_07
## W = 0.13062, p-value = 0.04289

(cvm150 <- cvm.test(df$Avg_CEa_15))

## 
##  Cramer-von Mises normality test
## 
## data:  df$Avg_CEa_15
## W = 0.19809, p-value = 0.005641

(sh075 <- sf.test(df$Avg_CEa_07))

## 
##  Shapiro-Francia normality test
## 
## data:  df$Avg_CEa_07
## W = 0.99381, p-value = 0.2013

(sh150 <- sf.test(df$Avg_CEa_15))

## 
##  Shapiro-Francia normality test
## 
## data:  df$Avg_CEa_15
## W = 0.97701, p-value = 0.0001414

Digresión: skewness para diferentes distribuciones de probabilidad.

normal <- rnorm(100, 5, 5)
uniforme <- runif(100, 0,10)
tstudent <- pt(2.2010, 11, lower.tail = F)

(sk_normal <- skewness.norm.test(normal, nrepl = 1000))

## 
##  Skewness test for normality
## 
## data:  normal
## T = 0.11072, p-value = 0.622

(sk_uniforme <- skewness.norm.test(normal, nrepl = 1000))

## 
##  Skewness test for normality
## 
## data:  normal
## T = 0.11072, p-value = 0.621

(sk_tstudent <- skewness.norm.test(normal, nrepl = 1000))

## 
##  Skewness test for normality
## 
## data:  normal
## T = 0.11072, p-value = 0.657

Fin de la digresión.

##Modelado.

##Modelado clásico no espacial.

CEa 075 cm.

mod_c075 <- lm(df$Avg_CEa_07 ~ df$SLOPE + df$Avg_z + df$Avg_CEa_15) 
summary(mod_c075)

## 
## Call:
## lm(formula = df$Avg_CEa_07 ~ df$SLOPE + df$Avg_z + df$Avg_CEa_15)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.41298 -0.71115 -0.05541  0.64834  3.01718 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -73.98404    4.74765 -15.583  < 2e-16 ***
## df$SLOPE       -0.12525    0.02799  -4.475 1.07e-05 ***
## df$Avg_z        0.33680    0.01834  18.369  < 2e-16 ***
## df$Avg_CEa_15   0.86887    0.09270   9.373  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.05 on 309 degrees of freedom
## Multiple R-squared:  0.5315, Adjusted R-squared:  0.527 
## F-statistic: 116.9 on 3 and 309 DF,  p-value: < 2.2e-16

CEa 150cm.

mod_c150 <- lm(df$Avg_CEa_15 ~ df$SLOPE + df$Avg_z + df$Avg_CEa_07) 
summary(mod_c150)

## 
## Call:
## lm(formula = df$Avg_CEa_15 ~ df$SLOPE + df$Avg_z + df$Avg_CEa_07)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.50608 -0.39071  0.01281  0.36509  2.04703 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   47.55651    2.11792  22.454  < 2e-16 ***
## df$SLOPE       0.07595    0.01503   5.053 7.45e-07 ***
## df$Avg_z      -0.15733    0.01123 -14.009  < 2e-16 ***
## df$Avg_CEa_07  0.25480    0.02718   9.373  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5686 on 309 degrees of freedom
## Multiple R-squared:  0.4107, Adjusted R-squared:  0.405 
## F-statistic: 71.78 on 3 and 309 DF,  p-value: < 2.2e-16

Pruebas de normalidad para los residuales de ambos modelos.

res75mc <- mod_c075$residuals
shapiro.test(res75mc)

## 
##  Shapiro-Wilk normality test
## 
## data:  res75mc
## W = 0.99092, p-value = 0.05044

res150mc <- mod_c150$residuals
shapiro.test(res150mc)

## 
##  Shapiro-Wilk normality test
## 
## data:  res150mc
## W = 0.99144, p-value = 0.06673

cvm.test(res75mc)

## 
##  Cramer-von Mises normality test
## 
## data:  res75mc
## W = 0.1071, p-value = 0.08941

cvm.test(res150mc)

## 
##  Cramer-von Mises normality test
## 
## data:  res150mc
## W = 0.035689, p-value = 0.7586

Índices de Moran para ambos modelos.

(moranres75mc <- (Moran.I(res75mc, We)))

## $observed
## [1] 0.1826832
## 
## $expected
## [1] -0.003205128
## 
## $sd
## [1] 0.006374507
## 
## $p.value
## [1] 0

(moranres150mc <- (Moran.I(res150mc, We)))

## $observed
## [1] 0.1232532
## 
## $expected
## [1] -0.003205128
## 
## $sd
## [1] 0.00636692
## 
## $p.value
## [1] 0

Visualización de la CE a 075 cm y 150 cm observada vs. estimada.

estimados75 <- mod_c075$fitted.values
estimados150 <- mod_c150$fitted.values

par(mfrow = c(1, 2))

plot(df$Avg_CEa_07, estimados75, xlab = "CE 075 cm observada",
     ylab = "CE 075 cm estimada", pch = 16, col = "steelblue")

plot(df$Avg_CEa_15, estimados150, xlab = "CE 150 cm observada",
     ylab = "CE 150 cm estimada", pch = 16, col = "steelblue")

Como se observa una dependencia espacial, no es válido aplicar un modelo clásico para este caso.

Modelo autorregresivo puro

(contnb <- dnearneigh(coordinates(coord),0,854, longlat = F))

## Neighbour list object:
## Number of regions: 313 
## Number of nonzero links: 97656 
## Percentage nonzero weights: 99.68051 
## Average number of links: 312

dlist <- nbdists(contnb, coord)
dlist <- lapply(dlist, function(x) 1/x)
wve <- nb2listw(contnb, glist = dlist, style = 'W')

CE 075 cm

map75 <- spautolm(df$Avg_CEa_07 ~ 1, data = df, listw = wve)
summary(map75)

## 
## Call: spautolm(formula = df$Avg_CEa_07 ~ 1, data = df, listw = wve)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -2.749761 -0.448459 -0.068866  0.523352  3.022953 
## 
## Coefficients: 
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   21.367     10.109  2.1136  0.03455
## 
## Lambda: 0.99481 LR test value: 299.95 p-value: < 2.22e-16 
## Numerical Hessian standard error of lambda: 0.0051882 
## 
## Log likelihood: -426.0985 
## ML residual variance (sigma squared): 0.86107, (sigma: 0.92794)
## Number of observations: 313 
## Number of parameters estimated: 3 
## AIC: 858.2

resmap75 <- map75$fit$residuals
(moran_mc_map75 <- moran.mc(resmap75,wve,nsim = 2000))

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  resmap75 
## weights: wve  
## number of simulations + 1: 2001 
## 
## statistic = 0.34956, observed rank = 2001, p-value = 0.0004998
## alternative hypothesis: greater

shapiro.test(resmap75)

## 
##  Shapiro-Wilk normality test
## 
## data:  resmap75
## W = 0.97687, p-value = 6.158e-05

ma75e <- as.data.frame(map75$fit['fitted.values'])
ma75e1 <- map75$fit$fitted.values
dfma75 <- data.frame(ma75e1, ce075)
colnames(dfma75) <-  c('CE75_obs','CE75_est')
resmap75 <- map75$fit$residuals

(cor(dfma75$CE75_obs, dfma75$CE75_est))

## [1] 0.9661478

plot(dfma75$CE75_obs, dfma75$Cea_75est, pch = 16, ylab = "CE 075 cm estimado", 
     xlab = "CE 075 cm observado", main = "Modelo autorregresivo puro")

datamap <- data.frame(x = df$Avg_X_MCB , y = df$Avg_Y_MCE, dfma75$CE75_obs, dfma75$CE75_est)

colnames(datamap) <- c('x', 'y', 'CE75_obs', 'CE75_est')

MAP <- ggplot(data = datamap, aes(x = x, y = y)) +
       geom_point(cex = datamap$CE75_obs * 0.2) +
       geom_point(color = datamap$CE75_est)
MAP

Modelo CE 150 cm

map150 <- spautolm(df$Avg_CEa_15 ~ 1, data = df, listw = wve) 
summary(map150)

## 
## Call: spautolm(formula = df$Avg_CEa_15 ~ 1, data = df, listw = wve)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.41854 -0.35020 -0.04682  0.27014  2.88030 
## 
## Coefficients: 
##             Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  19.4703     2.6199  7.4317 1.072e-13
## 
## Lambda: 0.9875 LR test value: 140.6 p-value: < 2.22e-16 
## Numerical Hessian standard error of lambda: 0.01251 
## 
## Log likelihood: -277.8799 
## ML residual variance (sigma squared): 0.33589, (sigma: 0.57956)
## Number of observations: 313 
## Number of parameters estimated: 3 
## AIC: 561.76

resmap150 <- map150$fit$residuals
(moranmcmap150 <- moran.mc(resmap150, wve, nsim = 2000))

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  resmap150 
## weights: wve  
## number of simulations + 1: 2001 
## 
## statistic = 0.18767, observed rank = 2001, p-value = 0.0004998
## alternative hypothesis: greater

shapiro.test(resmap150)

## 
##  Shapiro-Wilk normality test
## 
## data:  resmap150
## W = 0.94112, p-value = 7.825e-10

ma150e <- as.data.frame(map150$fit['fitted.values'])
ma150e1 <- map150$fit$fitted.values
dfma150 <- data.frame(ma150e1, ce150)
colnames(dfma150) <-  c('CE150_obs','CE150_est')
resmap150 <- map150$fit$residuals
cor(dfma150$CE150_obs, dfma150$CE150_est)

## [1] 0.8901442

plot(dfma150$CE150_obs, dfma150$CE150_est, pch = 16, col = 'steelblue', 
     xlab = "CE 150 observada", ylab = "CE 150 estimada")

datamap150 <- data.frame(x = df$Avg_X_MCB , y = df$Avg_Y_MCE, dfma150$CE150_obs, dfma150$CE150_est)

colnames(datamap150) <- c('x', 'y', 'CE150_obs', 'CE150_est')

MAP150 <- ggplot(data = datamap150, aes(x = x, y = y)) +
       geom_point(cex = datamap150$CE150_obs*0.2) +
       geom_point(color = datamap150$CE150_est)
MAP150

Modelo de autocorrelación espacial

CE 075 cm

sac <- sacsarlm(formula = df$Avg_CEa_07~ df$DEM + df$Avg_CEa_15, data = df,listw = wve)
summary(sac)

## 
## Call:sacsarlm(formula = df$Avg_CEa_07 ~ df$DEM + df$Avg_CEa_15, data = df, 
##     listw = wve)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.801253 -0.327440  0.013079  0.218124  2.506698 
## 
## Type: sac 
## Coefficients: (asymptotic standard errors) 
##                  Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)   -14.6134462 307.1863890 -0.0476    0.9621
## df$DEM          0.0605151   0.0097644  6.1975 5.736e-10
## df$Avg_CEa_15   0.3406668   0.0663859  5.1316 2.873e-07
## 
## Rho: 0.99315
## Asymptotic standard error: 0.51522
##     z-value: 1.9276, p-value: 0.0539
## Lambda: 0.99197
## Asymptotic standard error: 0.60394
##     z-value: 1.6425, p-value: 0.10048
## 
## LR test value: 469.9, p-value: < 2.22e-16
## 
## Log likelihood: -276.625 for sac model
## ML residual variance (sigma squared): 0.3216, (sigma: 0.56709)
## Number of observations: 313 
## Number of parameters estimated: 6 
## AIC: 565.25, (AIC for lm: 1031.1)

sac75e <- sac$fitted.values
sac75re <- sac$residuals
(moran75sac <- moran.mc(sac75re, wve, nsim = 2000))

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  sac75re 
## weights: wve  
## number of simulations + 1: 2001 
## 
## statistic = 0.23736, observed rank = 2001, p-value = 0.0004998
## alternative hypothesis: greater

shapiro.test(sac75re)

## 
##  Shapiro-Wilk normality test
## 
## data:  sac75re
## W = 0.94434, p-value = 1.759e-09

dfsac75<-data.frame(df$Avg_CEa_07,sac75e)
colnames(dfsac75) <- c("CE75_Obsac","CE75_Estsac")
cor(dfsac75$CE75_Obsac,dfsac75$CE75_Estsac)

## [1] 0.9613934

plot(dfsac75$CE75_Obsac, dfsac75$CE75_Estsac, pch = 16, col = 'steelblue', xlab= "CE 75 observada", ylab="CE 75 estimada")

datasac75 <- data.frame(x = df$Avg_X_MCB , y = df$Avg_Y_MCE, dfsac75$CE75_Obsac, dfsac75$CE75_Estsac)

colnames(datasac75) <- c('x', 'y', 'CE75_Obsac', 'CE75_Estsac')

SAC75 <- ggplot(data = datasac75, aes(x = x, y = y)) +
       geom_point(cex = datasac75$CE75_Obsac*0.2) +
       geom_point(color = datasac75$CE75_Estsac)
SAC75

CE 150 cm

sac150<-sacsarlm(formula = df$Avg_CEa_15 ~ df$DEM + df$Avg_CEa_07,data = df,listw = wve)
summary(sac150)

## 
## Call:sacsarlm(formula = df$Avg_CEa_15 ~ df$DEM + df$Avg_CEa_07, data = df, 
##     listw = wve)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.432658 -0.225841 -0.044885  0.210986  2.165549 
## 
## Type: sac 
## Coefficients: (asymptotic standard errors) 
##                 Estimate Std. Error z value  Pr(>|z|)
## (Intercept)    9.2813281 33.5176473  0.2769 0.7818502
## df$DEM        -0.0493051  0.0071328 -6.9124 4.765e-12
## df$Avg_CEa_07  0.1510731  0.0442802  3.4118 0.0006455
## 
## Rho: 0.98443
## Asymptotic standard error: 0.82584
##     z-value: 1.192, p-value: 0.23325
## Lambda: 0.98271
## Asymptotic standard error: 0.91709
##     z-value: 1.0716, p-value: 0.28392
## 
## LR test value: 235.89, p-value: < 2.22e-16
## 
## Log likelihood: -192.8762 for sac model
## ML residual variance (sigma squared): 0.19028, (sigma: 0.43621)
## Number of observations: 313 
## Number of parameters estimated: 6 
## AIC: 397.75, (AIC for lm: 629.64)

sac150e <- sac$fitted.values
sac150re <- sac$residuals
(moran150sac <- moran.mc(sac150re, wve, nsim = 2000))

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  sac150re 
## weights: wve  
## number of simulations + 1: 2001 
## 
## statistic = 0.23736, observed rank = 2001, p-value = 0.0004998
## alternative hypothesis: greater

shapiro.test(sac150re)

## 
##  Shapiro-Wilk normality test
## 
## data:  sac150re
## W = 0.94434, p-value = 1.759e-09

dfsac150 <- data.frame(df$Avg_CEa_15 ,sac150e)
colnames(dfsac150)<- c("CE150_Obsac","CE150_Estsac")
cor(dfsac150$CE150_Obsac,dfsac150$CE150_Estsac)

## [1] 0.04142672

plot(dfsac150$CE150_Obsac, dfsac150$CE150_Estsac, cex=0.5, pch =18, col = 'steelblue', xlab= "CE 150 observada", ylab="CE 150 estimada")

datasac150 <- data.frame(x = df$Avg_X_MCB , y = df$Avg_Y_MCE, dfsac150$CE150_Obsac, dfsac150$CE150_Estsac)

colnames(datasac150) <- c('x', 'y', 'CE150_Obsac', 'CE150_Estsac')

SAC150 <- ggplot(data = datasac150, aes(x = x, y = y)) +
       geom_point(cex = datasac150$CE150_Obsac*0.2) +
       geom_point(color = datasac150$CE150_Estsac)
SAC150

Modelo espacial del error

ese75 <- errorsarlm(df$Avg_CEa_07 ~ df$SLOPE + df$Avg_z + df$Avg_CEa_15 + df$DEM,listw=wve)
summary(ese75)

## 
## Call:
## errorsarlm(formula = df$Avg_CEa_07 ~ df$SLOPE + df$Avg_z + df$Avg_CEa_15 + 
##     df$DEM, listw = wve)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -2.146272 -0.505579 -0.028777  0.423823  2.532202 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                 Estimate Std. Error z value  Pr(>|z|)
## (Intercept)   -32.137556   7.444174 -4.3171 1.581e-05
## df$SLOPE       -0.063454   0.021477 -2.9546  0.003131
## df$Avg_z        0.166409   0.023001  7.2350 4.656e-13
## df$Avg_CEa_15   0.605649   0.084502  7.1673 7.647e-13
## df$DEM          0.020297   0.016479  1.2317  0.218054
## 
## Lambda: 0.99255, LR test value: 166.87, p-value: < 2.22e-16
## Asymptotic standard error: 0.0051624
##     z-value: 192.27, p-value: < 2.22e-16
## Wald statistic: 36966, p-value: < 2.22e-16
## 
## Log likelihood: -372.9496 for error model
## ML residual variance (sigma squared): 0.61456, (sigma: 0.78394)
## Number of observations: 313 
## Number of parameters estimated: 7 
## AIC: 759.9, (AIC for lm: 924.77)

ese75e <- ese75$fitted.values
ese75res <- ese75$residuals
(moran75ese <- moran.mc(ese75res, wve, nsim = 2000))

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  ese75res 
## weights: wve  
## number of simulations + 1: 2001 
## 
## statistic = 0.24511, observed rank = 2001, p-value = 0.0004998
## alternative hypothesis: greater

shapiro.test(ese75res)

## 
##  Shapiro-Wilk normality test
## 
## data:  ese75res
## W = 0.98569, p-value = 0.003354

dfese75 <- data.frame(df$Avg_CEa_07 ,ese75e)
colnames(dfese75) <- c("CE75_Obsese","CE75_Estese")
cor(dfese75$CE75_Obsese,dfese75$CE75_Estese)

## [1] 0.8937144

plot(dfese75$CE75_Obsese, dfese75$CE75_Estese, pch = 16, col = 'steelblue', xlab= "CE 75 observada", ylab="CE 75 estimada")

dataese75 <- data.frame(x = df$Avg_X_MCB , y = df$Avg_Y_MCE, dfese75$CE75_Obsese, dfese75$CE75_Estese)


colnames(dataese75) <- c('x', 'y', 'CE75_Obsese', 'CE75_Estese')
ESEM75 <- ggplot(data = dataese75, aes(x = x, y = y)) +
       geom_point(cex = dataese75$CE75_Obsese*0.2) +
       geom_point(color = dataese75$CE75_Estese)
ESEM75

E 150 cm

ese150 <- errorsarlm(df$Avg_CEa_15 ~ df$SLOPE + df$Avg_z + df$Avg_CEa_07 + df$DEM,listw=wve)
summary(ese150)

## 
## Call:
## errorsarlm(formula = df$Avg_CEa_15 ~ df$SLOPE + df$Avg_z + df$Avg_CEa_07 + 
##     df$DEM, listw = wve)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.464998 -0.296637 -0.024094  0.311716  1.928824 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error z value  Pr(>|z|)
## (Intercept)   39.621003   2.551377 15.5293 < 2.2e-16
## df$SLOPE       0.047515   0.013256  3.5845 0.0003377
## df$Avg_z      -0.104538   0.014258 -7.3317 2.274e-13
## df$Avg_CEa_07  0.232617   0.032384  7.1830 6.819e-13
## df$DEM        -0.011889   0.010237 -1.1614 0.2454641
## 
## Lambda: 0.9821, LR test value: 84.217, p-value: < 2.22e-16
## Asymptotic standard error: 0.0124
##     z-value: 79.204, p-value: < 2.22e-16
## Wald statistic: 6273.3, p-value: < 2.22e-16
## 
## Log likelihood: -223.0129 for error model
## ML residual variance (sigma squared): 0.23711, (sigma: 0.48694)
## Number of observations: 313 
## Number of parameters estimated: 7 
## AIC: 460.03, (AIC for lm: 542.24)

ese150e <- ese150$fitted.values
ese150res <- ese150$residuals
(moran150ese <- moran.mc(ese150res, wve, nsim = 2000))

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  ese150res 
## weights: wve  
## number of simulations + 1: 2001 
## 
## statistic = 0.12923, observed rank = 2001, p-value = 0.0004998
## alternative hypothesis: greater

shapiro.test(ese150res)

## 
##  Shapiro-Wilk normality test
## 
## data:  ese150res
## W = 0.97907, p-value = 0.0001563

dfese150<-data.frame(df$Avg_CEa_15, ese150e)
colnames(dfese150) <- c("CE150_Obsese","CE150_Estese")
cor(dfese150$CE150_Obsese,dfese150$CE150_Estese)

## [1] 0.7734705

plot(dfese150$CE150_Obsese, dfese150$CE150_Estese, pch = 16, col = 'steelblue', xlab = "CE 150 observada", ylab = "CE 150 estimada")

dataese150 <- data.frame(x = df$Avg_X_MCB , y = df$Avg_Y_MCE, dfese150$CE150_Obsese, dfese150$CE150_Estese)


colnames(dataese150) <- c('x', 'y', 'CE150_Obsese', 'CE150_Estese')
ESEM150 <- ggplot(data = dataese150, aes(x = x, y = y)) +
       geom_point(cex = dataese150$CE150_Obsese*0.2) +
       geom_point(color = dataese150$CE150_Estese)
ESEM150

# Variables relacionadas

ce75vsaltitud <- plot_ly(x = df$Avg_X_MCB, y = df$Avg_Y_MCE, z = df$Avg_z, size = I(90))%>%
            layout(title = 'CE 75 vs Altitud (z)',
                  scene = list(
                              xaxis = list(title = "Longitud"),
                              yaxis = list(title = "Latitud"),
                              zaxis = list(title = "Altitud")
    )
  )%>%
add_markers(color = df$Avg_CEa_07)
ce75vsaltitud

## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

ce150vsaltitud <- plot_ly(x = df$Avg_X_MCB, y = df$Avg_Y_MCE, z = df$Avg_z, size = I(90))%>%
            layout(title = 'CE 150 vs Altitud (z)',
                  scene = list(
                              xaxis = list(title = "Longitud"),
                              yaxis = list(title = "Latitud"),
                              zaxis = list(title = "Altitud")
    )
  )%>%
add_markers(color = df$Avg_CEa_15)
ce150vsaltitud

ce75vsslope <- plot_ly(x = df$Avg_X_MCB, y = df$Avg_Y_MCE,  z = df$SLOPE, size = I(90))%>%
            layout(title = 'CE 75 vs Slope',
                  scene = list(
                              xaxis = list(title = "Longitud"),
                              yaxis = list(title = "Latitud"),
                              zaxis = list(title = "Slope")
    )
  )%>%
add_markers(color = df$Avg_CEa_07)
ce75vsslope

ce150vsslope <- plot_ly(x = df$Avg_X_MCB, y = df$Avg_Y_MCE,  z = df$SLOPE, size = I(90))%>%
            layout(title = 'CE 150 vs Slope',
                  scene = list(
                              xaxis = list(title = "Longitud"),
                              yaxis = list(title = "Latitud"),
                              zaxis = list(title = "Slope")
    )
  )%>%
add_markers(color = df$Avg_CEa_15)
ce150vsslope

# Coordenada de interés

plot(x = df$Avg_X_MCB, y = df$Avg_Y_MCE,pch=16, xlab = 'Longitud',ylab = 'Latitud')
points(x=843473,y=956093,col="red",pch=18)

Trabajo final de muestreo - Computación estadística Jorge Iván Rivera Bermúdez, 2020-ll

Jorge Iván Rivera Bermúdez, 2020-ll

Trabajo final de muestreo - Computación estadística
Jorge Iván Rivera Bermúdez, 2020-ll