library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
library(rworldxtra)
library(ggmap)
## Loading required package: ggplot2
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(sf)
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ape)
## Registered S3 method overwritten by 'ape':
## method from
## plot.mst spdep
library(sp)
library(MVA)
## Loading required package: HSAUR2
## Loading required package: tools
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:ape':
##
## zoom
## The following objects are masked from 'package:base':
##
## format.pval, units
library(normtest)
library(nortest)
library(corrplot)
## corrplot 0.90 loaded
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(crayon)
##
## Attaching package: 'crayon'
## The following object is masked from 'package:psych':
##
## %+%
## The following object is masked from 'package:ggplot2':
##
## %+%
library(pastecs)
library(readxl)
Entrega de arroz inundado
Arroz_in <- read_excel("Arroz_in.xlsx")
View(Arroz_in)
## Data frame con Carbono organico
dfCOS=data.frame(Arroz_in)
X=as.matrix(data.frame(dfCOS[,4:15]))
## Para mirar el rango de las coordenadas
range(dfCOS$Long);range(dfCOS$Lat)
## [1] -72.58261 -72.37495
## [1] 8.078605 8.380669
##Primer mapa de los puntos
mapa.puntos <- getMap(resolution = "high")
plot(mapa.puntos,xlim = c(-72.6, -72.3), ylim = c(8.0, 8.4), asp = 1)
points(dfCOS$Long, dfCOS$Lat, col = "darkred", cex = .6,pch=19)
##Segundo mapa de puntos
ggplot(dfCOS, aes(dfCOS$Long, dfCOS$Lat)) + geom_point()
## Warning: Use of `dfCOS$Long` is discouraged. Use `Long` instead.
## Warning: Use of `dfCOS$Lat` is discouraged. Use `Lat` instead.
###______matriz de predictores y respuesta
X=as.matrix(data.frame(dfCOS[,4:15]))
###___matriz de coordenadas
XY=as.matrix(dfCOS[,2:3])
##__matriz de distancias
COS.d=as.matrix(dist(XY, diag=T, upper=T))
##matriz inversa de distancias
COS.d.inv <-as.matrix(1/COS.d)
##asignando cero a la diag
diag(COS.d.inv) <- 0
##__la matriz de pesos basado en distancias
W=as.matrix(COS.d.inv)
##__verificando sumas por filas
SUMAS=apply(W,1,sum)
##__estandarizando la metriz
We=W/SUMAS
##verificando estandarización
apply(We,1,sum)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 401 402 403
## 1 1 1
##Calculando indice de morant para ver si los datos tienen dependencia espacial.
Moran=list()
for(j in 1:12){
Moran[j]=Moran.I(dfCOS[,j+3], COS.d.inv)$p.value
}
Moran
## [[1]]
## [1] 0
##
## [[2]]
## [1] 0
##
## [[3]]
## [1] 0
##
## [[4]]
## [1] 0
##
## [[5]]
## [1] 0
##
## [[6]]
## [1] 0
##
## [[7]]
## [1] 0
##
## [[8]]
## [1] 4.081979e-11
##
## [[9]]
## [1] 1.380351e-09
##
## [[10]]
## [1] 0
##
## [[11]]
## [1] 6.661338e-16
##
## [[12]]
## [1] 0
#Realizando el indice de morant a los P valores, donde las variables con 0 muestran dependencia espacial. Por esto es necesario el uso de modelos que me solucionen dicha dependencia.
#Sin embargo, el indice de morant intereza evaluarlo a los residuales de los modelos para determinar si los modelos solucionan la dependencia espacial que se presenta en los valores de campo.
# Muestreo espacial
library(clhs)
n = 0.80 *403
n = round(n,0)
df_nair = data.frame(x=dfCOS$Long,
y=dfCOS$Lat,
COS=dfCOS$cos)
res <- clhs(df_nair, size = n,
iter = 100, progress = FALSE,
simple = TRUE)
df_nair = dfCOS[res,]
dfCOS = df_nair
##1. Modelo autorregresivo puro \[Y = \lambda W Y + \epsilon\]
XY=as.matrix(dfCOS[,2:3])
COS.d=as.matrix(dist(XY, diag=T, upper=T))
##matriz inversa de distancias
COS.d.inv <-as.matrix(1/COS.d)
##asignando cero a la diag
diag(COS.d.inv) <- 0
##__la matriz de pesos basado en distancias
W=as.matrix(COS.d.inv)
##__verificando sumas por filas
SUMAS=apply(W,1,sum)
##__estandarizando la metriz
We=W/SUMAS
# Nueva construccion de matriz de pesos estandarizada
range(dfCOS$Long);range(dfCOS$Lat)
## [1] -72.58261 -72.37495
## [1] 8.078605 8.380669
contnb=dnearneigh(coordinates(XY),0,380000,longlat = F)
dlist <- nbdists(contnb, XY)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")
#modelo autoregresivo puro
modelo.arp=spautolm(Ca~1,data=dfCOS,listw=Wve)
summary(modelo.arp)
##
## Call: spautolm(formula = Ca ~ 1, data = dfCOS, listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.35509 -2.41944 -0.14151 1.94733 10.93396
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.9003 3.9243 2.0132 0.0441
##
## Lambda: 0.95063 LR test value: 38.932 p-value: 4.3892e-10
## Numerical Hessian standard error of lambda: 0.048843
##
## Log likelihood: -861.0571
## ML residual variance (sigma squared): 12.088, (sigma: 3.4768)
## Number of observations: 322
## Number of parameters estimated: 3
## AIC: 1728.1
library(spatialreg)
# Este modelo asume independencia en los residuales por lo que no seria el indicado, entonces extraemos residuales y hacemos test de moran
res1 = modelo.arp$fit$residuals
Moran.I(res1, COS.d.inv)
## $observed
## [1] 0.04976567
##
## $expected
## [1] -0.003115265
##
## $sd
## [1] 0.005341869
##
## $p.value
## [1] 0
# el test muestra que se rechaza la hipotesis de independencia.
##2. Modelo autorregresivo puro (dos veces con la misma variable) \[Y = \lambda W Y + u \\ u= \rho Wu + \epsilon\]
mod2 = sacsarlm(Ca~1,data=dfCOS,listw=Wve)
summary(mod2)
##
## Call:sacsarlm(formula = Ca ~ 1, data = dfCOS, listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.72744 -2.37964 -0.05684 1.86195 10.56775
##
## Type: sac
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.41586 2.04910 0.2029 0.8392
##
## Rho: 0.92087
## Approximate (numerical Hessian) standard error: 0.076777
## z-value: 11.994, p-value: < 2.22e-16
## Lambda: 0.92087
## Approximate (numerical Hessian) standard error: 0.077519
## z-value: 11.879, p-value: < 2.22e-16
##
## LR test value: 61.526, p-value: 4.3632e-14
##
## Log likelihood: -849.7601 for sac model
## ML residual variance (sigma squared): 11.137, (sigma: 3.3372)
## Number of observations: 322
## Number of parameters estimated: 4
## AIC: 1707.5, (AIC for lm: 1765)
# Este modelo si evaluala dependencia espacial de los datos, entonces extraemos residuales y hacemos test de moran.
res2 =mod2$residuals
Moran.I(res2, COS.d.inv)
## $observed
## [1] 0.03254388
##
## $expected
## [1] -0.003115265
##
## $sd
## [1] 0.005341968
##
## $p.value
## [1] 2.467559e-11
# Mejora mucho pero sigue habiendo dependencia espacial en los residuales.
##3. Modelo en resago espacial (Spatial lag model) \[Y = \lambda W Y +X \beta + \epsilon \]
mser1=errorsarlm(formula=Ca~Mg+K+Cu+Zn,data=dfCOS,listw=Wve)
summary(mser1)
##
## Call:errorsarlm(formula = Ca ~ Mg + K + Cu + Zn, data = dfCOS, listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.64543 -1.84944 -0.27934 1.18091 10.92161
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.720815 4.639933 1.2330 0.2175937
## Mg 1.162087 0.212464 5.4696 4.512e-08
## K 5.682582 1.498637 3.7918 0.0001495
## Cu 0.302784 0.123928 2.4432 0.0145562
## Zn -0.362425 0.097595 -3.7135 0.0002044
##
## Lambda: 0.96501, LR test value: 54.254, p-value: 1.7619e-13
## Asymptotic standard error: 0.024591
## z-value: 39.243, p-value: < 2.22e-16
## Wald statistic: 1540, p-value: < 2.22e-16
##
## Log likelihood: -803.7155 for error model
## ML residual variance (sigma squared): 8.4473, (sigma: 2.9064)
## Number of observations: 322
## Number of parameters estimated: 7
## AIC: 1621.4, (AIC for lm: 1673.7)
#Analizando hay variables como CICE , Na, z y cos, por el p. value alto muestran no estar relacionadas al la variable Ca. sin embargo primeo decidimos correr el test de moran y luego retirarlos.
# Evaluando la dependencia espacial de los residuales
res3 =mser1$residuals
Moran.I(res3, COS.d.inv)
## $observed
## [1] 0.06616071
##
## $expected
## [1] -0.003115265
##
## $sd
## [1] 0.005330153
##
## $p.value
## [1] 0
# La parte autorregresiva puede ser buena, sin embargo la dependencia espacial no se corrige aun y siguen teniendo problemas algunas variables.
##4. Modelo espacial del error (Spatial error model) \[Y = X \beta + u\\u = \lambda W u + \epsilon\]
#Analizando hay variables como CICE , cos y z, por el p. value alto muestran no estar relacionadas al la variable Ca. sin embargo, primeo decidimos correr el test de moran y luego retirarlos.
mod4=lagsarlm(formula=Ca~Mg+K+Na+Cu+Zn,data=dfCOS,listw=Wve)
summary(mod4)
##
## Call:lagsarlm(formula = Ca ~ Mg + K + Na + Cu + Zn, data = dfCOS,
## listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.94716 -1.86713 -0.36893 1.35040 11.23687
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.914038 0.577743 -5.0438 4.563e-07
## Mg 0.889207 0.216743 4.1026 4.086e-05
## K 5.861756 1.501030 3.9052 9.416e-05
## Na 1.255855 1.397166 0.8989 0.3687278
## Cu 0.227483 0.123991 1.8347 0.0665551
## Zn -0.356818 0.098325 -3.6290 0.0002845
##
## Rho: 0.95245, LR test value: 42.235, p-value: 8.0921e-11
## Asymptotic standard error: 0.033224
## z-value: 28.668, p-value: < 2.22e-16
## Wald statistic: 821.83, p-value: < 2.22e-16
##
## Log likelihood: -809.226 for lag model
## ML residual variance (sigma squared): 8.7589, (sigma: 2.9595)
## Number of observations: 322
## Number of parameters estimated: 8
## AIC: 1634.5, (AIC for lm: 1674.7)
## LM test for residual autocorrelation
## test value: 209.36, p-value: < 2.22e-16
# Evaluando la dependencia espacial de los residuales
res4 = mod4$residuals
Moran.I(res4, COS.d.inv)
## $observed
## [1] 0.07193423
##
## $expected
## [1] -0.003115265
##
## $sd
## [1] 0.005332103
##
## $p.value
## [1] 0
# La parte autorregresiva puede ser buena, sin embargo la dependencia espacial no se corrige aun y siguen teniendo problemas algunas variables.
###5. Modelo de autocorrelacion espacial (Spatial Autocorrelation Model)
\[Y = \lambda W Y +X \beta + u\\u =\rho W u +\epsilon \]
#Analizando si se eliminan variables como Na y Cu, ya que por el p. value alto muestran no estar relacionadas a la variable Ca. El modelo se comporta mejor.
mod5=sacsarlm(formula=Ca~cos+Mg+K+CICE+Zn,data=dfCOS,listw=Wve)
summary(mod5)
##
## Call:sacsarlm(formula = Ca ~ cos + Mg + K + CICE + Zn, data = dfCOS,
## listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.32979 -0.21369 0.18160 0.52025 2.49741
##
## Type: sac
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.530778 1.333094 -4.1488 3.342e-05
## cos 0.728221 0.200396 3.6339 0.0002792
## Mg -0.424131 0.096136 -4.4118 1.025e-05
## K -1.628296 0.644844 -2.5251 0.0115664
## CICE 0.802391 0.024816 32.3333 < 2.2e-16
## Zn -0.089060 0.033133 -2.6879 0.0071894
##
## Rho: 0.66911
## Asymptotic standard error: 0.14841
## z-value: 4.5084, p-value: 6.5312e-06
## Lambda: 0.90842
## Asymptotic standard error: 0.07702
## z-value: 11.795, p-value: < 2.22e-16
##
## LR test value: 35.66, p-value: 1.805e-08
##
## Log likelihood: -512.7704 for sac model
## ML residual variance (sigma squared): 1.3881, (sigma: 1.1782)
## Number of observations: 322
## Number of parameters estimated: 9
## AIC: 1043.5, (AIC for lm: 1075.2)
#Los P valores de Rho y lambda sin buenospor lo que se realiza test de moran para evaluar independencia espacial.
res5 = mod5$residuals
Moran.I(res5, COS.d.inv)
## $observed
## [1] 0.01701072
##
## $expected
## [1] -0.003115265
##
## $sd
## [1] 0.005191208
##
## $p.value
## [1] 0.0001057799
#Con este modelo logramos un p valor mayor al 5% por lo que desaparecemos la dependencia espacial y seria correcto avanzar.
##6.Modelo espacial anidado (General Nesting Spatial Model)
\[Y = \lambda W Y +X \beta_1 +W X \beta_2+ u\\u =\rho W u +\epsilon\]
#Analizando si se eliminan variables como z, K y Mg, ya que por el p. value alto muestran no estar relacionadas a la variable Ca. El modelo se comporta mejor.
#Este modelo es el mismo que el anterior pero con resago
mod6=sacsarlm(formula=Ca~cos+K+Na+CICE+Cu+Zn,data=dfCOS,listw=Wve,type="mixed")
summary(mod6)
##
## Call:sacsarlm(formula = Ca ~ cos + K + Na + CICE + Cu + Zn, data = dfCOS,
## listw = Wve, type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.35237 -0.35226 0.13034 0.51605 2.95287
##
## Type: sacmixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.781439 3.612548 -1.0468 0.2952144
## cos 0.841796 0.205314 4.1000 4.131e-05
## K -2.112033 0.622933 -3.3905 0.0006977
## Na -1.028719 0.557168 -1.8463 0.0648433
## CICE 0.752630 0.021688 34.7023 < 2.2e-16
## Cu -0.028558 0.052557 -0.5434 0.5868810
## Zn -0.075655 0.041131 -1.8394 0.0658609
## lag.cos 2.577857 2.862935 0.9004 0.3678944
## lag.K -13.033910 9.642569 -1.3517 0.1764697
## lag.Na 1.672902 8.462292 0.1977 0.8432884
## lag.CICE 0.125832 0.973055 0.1293 0.8971073
## lag.Cu -0.215618 0.799690 -0.2696 0.7874471
## lag.Zn -0.605516 0.700011 -0.8650 0.3870334
##
## Rho: 0.68845
## Asymptotic standard error: 0.94263
## z-value: 0.73035, p-value: 0.46518
## Lambda: 0.80775
## Asymptotic standard error: 0.62356
## z-value: 1.2954, p-value: 0.19519
##
## LR test value: 55.678, p-value: 3.2592e-09
##
## Log likelihood: -511.8763 for sacmixed model
## ML residual variance (sigma squared): 1.3869, (sigma: 1.1777)
## Number of observations: 322
## Number of parameters estimated: 16
## AIC: 1055.8, (AIC for lm: 1095.4)
# En este caso el Rho y lambda no sirven por lo que no es una opcion.
##7. Modelo espacial del error de durbin (Spatial Durbin Error Model)
# z, Mg y Na
mod7=lagsarlm(formula=Ca~cos+K+CICE+Cu+Zn,data=dfCOS,listw=Wve,type="mixed")
summary(mod7)
##
## Call:lagsarlm(formula = Ca ~ cos + K + CICE + Cu + Zn, data = dfCOS,
## listw = Wve, type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.27002 -0.33237 0.14690 0.58092 3.25963
##
## Type: mixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.861675 2.903129 -1.3302 0.1834600
## cos 0.855133 0.209020 4.0912 4.292e-05
## K -2.265104 0.634188 -3.5717 0.0003547
## CICE 0.750760 0.021733 34.5447 < 2.2e-16
## Cu -0.042007 0.052904 -0.7940 0.4271883
## Zn -0.074455 0.042020 -1.7719 0.0764142
## lag.cos 3.290864 2.299242 1.4313 0.1523493
## lag.K -11.148810 7.415647 -1.5034 0.1327316
## lag.CICE -0.182476 0.175791 -1.0380 0.2992589
## lag.Cu -0.100283 0.565371 -0.1774 0.8592136
## lag.Zn -0.479842 0.620464 -0.7734 0.4393099
##
## Rho: 0.85923, LR test value: 12.788, p-value: 0.00034884
## Asymptotic standard error: 0.096033
## z-value: 8.9472, p-value: < 2.22e-16
## Wald statistic: 80.053, p-value: < 2.22e-16
##
## Log likelihood: -517.0148 for mixed model
## ML residual variance (sigma squared): 1.4367, (sigma: 1.1986)
## Number of observations: 322
## Number of parameters estimated: 13
## AIC: 1060, (AIC for lm: 1070.8)
## LM test for residual autocorrelation
## test value: 21.679, p-value: 3.2235e-06
res7 = mod7$residuals
Moran.I(res7, COS.d.inv) # No hay depedencia espacial
## $observed
## [1] 0.01956033
##
## $expected
## [1] -0.003115265
##
## $sd
## [1] 0.00525682
##
## $p.value
## [1] 1.606486e-05
#prueba de normalidad shapiro test para los residuakes dek modelo 5 y 7
shapiro.test(res5)
##
## Shapiro-Wilk normality test
##
## data: res5
## W = 0.71508, p-value < 2.2e-16
shapiro.test(res7)
##
## Shapiro-Wilk normality test
##
## data: res7
## W = 0.82737, p-value < 2.2e-16
#prueba ad.test
ad.test(res5)
##
## Anderson-Darling normality test
##
## data: res5
## A = 21.292, p-value < 2.2e-16
ad.test(res7)
##
## Anderson-Darling normality test
##
## data: res7
## A = 12.28, p-value < 2.2e-16
# prueba sf.test
sf.test(res5)
##
## Shapiro-Francia normality test
##
## data: res5
## W = 0.70955, p-value < 2.2e-16
sf.test(res7)
##
## Shapiro-Francia normality test
##
## data: res7
## W = 0.82252, p-value = 4.192e-16
#prueba cvm.test
cvm.test(res5)
## Warning in cvm.test(res5): p-value is smaller than 7.37e-10, cannot be computed
## more accurately
##
## Cramer-von Mises normality test
##
## data: res5
## W = 3.9442, p-value = 7.37e-10
cvm.test(res7)
## Warning in cvm.test(res7): p-value is smaller than 7.37e-10, cannot be computed
## more accurately
##
## Cramer-von Mises normality test
##
## data: res7
## W = 2.2239, p-value = 7.37e-10
#ninguna prueba nos corrobora la normalidad de los residuales.
hist(res5)
hist(res7)
# prueba de grubbs para detectar datos atipicos
library(outliers)
##
## Attaching package: 'outliers'
## The following object is masked from 'package:psych':
##
## outlier
outliers::grubbs.test(res5)
##
## Grubbs test for one outlier
##
## data: res5
## G.373 = 7.05897, U = 0.84429, p-value = 3.096e-11
## alternative hypothesis: lowest value -8.32978854775542 is an outlier
outliers::grubbs.test(res7) # El valor mas alto es un outlier
##
## Grubbs test for one outlier
##
## data: res7
## G.373 = 6.0558, U = 0.8854, p-value = 7.221e-08
## alternative hypothesis: lowest value -7.2700218063831 is an outlier
which.max(res5)
## 140
## 10
which.max(res7) # Maximo
## 140
## 10
#se ven algunos datos atipicos
plot(dfCOS$Ca,mod5$fitted.values)
plot(dfCOS$Ca,mod7$fitted.values)
dfCOS2 = dfCOS
atipicos =which(dfCOS$Ca>15)
dfCOS2 =dfCOS2[-atipicos,]
XY=as.matrix(dfCOS2[,2:3])
COS.d=as.matrix(dist(XY, diag=T, upper=T))
COS.d.inv <-as.matrix(1/COS.d)
diag(COS.d.inv) <- 0
W=as.matrix(COS.d.inv)
SUMAS=apply(W,1,sum)
We=W/SUMAS
# Nueva construccion de matriz de pesos estandarizada
contnb=dnearneigh(coordinates(XY),0,380000,longlat = F)
dlist <- nbdists(contnb, XY)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")
mod5b=sacsarlm(formula=Ca~cos+Mg+K+CICE+Zn,data=dfCOS2,listw=Wve)
summary(mod5)
##
## Call:sacsarlm(formula = Ca ~ cos + Mg + K + CICE + Zn, data = dfCOS,
## listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.32979 -0.21369 0.18160 0.52025 2.49741
##
## Type: sac
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.530778 1.333094 -4.1488 3.342e-05
## cos 0.728221 0.200396 3.6339 0.0002792
## Mg -0.424131 0.096136 -4.4118 1.025e-05
## K -1.628296 0.644844 -2.5251 0.0115664
## CICE 0.802391 0.024816 32.3333 < 2.2e-16
## Zn -0.089060 0.033133 -2.6879 0.0071894
##
## Rho: 0.66911
## Asymptotic standard error: 0.14841
## z-value: 4.5084, p-value: 6.5312e-06
## Lambda: 0.90842
## Asymptotic standard error: 0.07702
## z-value: 11.795, p-value: < 2.22e-16
##
## LR test value: 35.66, p-value: 1.805e-08
##
## Log likelihood: -512.7704 for sac model
## ML residual variance (sigma squared): 1.3881, (sigma: 1.1782)
## Number of observations: 322
## Number of parameters estimated: 9
## AIC: 1043.5, (AIC for lm: 1075.2)
res5b = mod5b$residuals
Moran.I(res5b, COS.d.inv) # Vuelve la depedencia espacial
## $observed
## [1] 0.01748661
##
## $expected
## [1] -0.003311258
##
## $sd
## [1] 0.005525781
##
## $p.value
## [1] 0.0001673591
shapiro.test(res5b) # No son normales
##
## Shapiro-Wilk normality test
##
## data: res5b
## W = 0.82335, p-value < 2.2e-16
ad.test(res5b)
##
## Anderson-Darling normality test
##
## data: res5b
## A = 10.773, p-value < 2.2e-16
sf.test(res5b)
##
## Shapiro-Francia normality test
##
## data: res5b
## W = 0.81802, p-value = 8.758e-16
cvm.test(res5b)
## Warning in cvm.test(res5b): p-value is smaller than 7.37e-10, cannot be computed
## more accurately
##
## Cramer-von Mises normality test
##
## data: res5b
## W = 1.9052, p-value = 7.37e-10
#eliminando datos atipicos mod7
mod7b=lagsarlm(formula=Ca~cos+K+CICE+Cu+Zn,data=dfCOS2,listw=Wve,type="mixed")
summary(mod7b)
##
## Call:lagsarlm(formula = Ca ~ cos + K + CICE + Cu + Zn, data = dfCOS2,
## listw = Wve, type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.43277 -0.37985 0.12452 0.59315 2.83522
##
## Type: mixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.7377794 2.7912753 -2.4139 0.015784
## cos 0.9019458 0.2028234 4.4470 8.71e-06
## K -1.7687792 0.5798061 -3.0506 0.002284
## CICE 0.6785425 0.0226030 30.0201 < 2.2e-16
## Cu -0.0080048 0.0486440 -0.1646 0.869292
## Zn -0.0734409 0.0381739 -1.9239 0.054373
## lag.cos 3.2675566 2.1326601 1.5322 0.125485
## lag.K -13.3458786 6.8330813 -1.9531 0.050804
## lag.CICE 0.3000402 0.2563756 1.1703 0.241874
## lag.Cu -0.6590432 0.5425264 -1.2148 0.224455
## lag.Zn -0.0265750 0.5413210 -0.0491 0.960845
##
## Rho: 0.86242, LR test value: 14.257, p-value: 0.00015949
## Asymptotic standard error: 0.093799
## z-value: 9.1944, p-value: < 2.22e-16
## Wald statistic: 84.536, p-value: < 2.22e-16
##
## Log likelihood: -453.3873 for mixed model
## ML residual variance (sigma squared): 1.1537, (sigma: 1.0741)
## Number of observations: 303
## Number of parameters estimated: 13
## AIC: 932.77, (AIC for lm: 945.03)
## LM test for residual autocorrelation
## test value: 10.232, p-value: 0.0013804
res7b = mod7b$residuals
Moran.I(res7b, COS.d.inv) # Vuelve la depedencia espacial
## $observed
## [1] 0.01429806
##
## $expected
## [1] -0.003311258
##
## $sd
## [1] 0.005550195
##
## $p.value
## [1] 0.001510081
shapiro.test(res7b) # No son normales
##
## Shapiro-Wilk normality test
##
## data: res7b
## W = 0.86523, p-value = 1.336e-15
ad.test(res7b)
##
## Anderson-Darling normality test
##
## data: res7b
## A = 7.2404, p-value < 2.2e-16
sf.test(res7b)
##
## Shapiro-Francia normality test
##
## data: res7b
## W = 0.86041, p-value = 7.096e-14
cvm.test(res7b)
## Warning in cvm.test(res7b): p-value is smaller than 7.37e-10, cannot be computed
## more accurately
##
## Cramer-von Mises normality test
##
## data: res7b
## W = 1.2007, p-value = 7.37e-10
#No logramos dar normalidad a ninguno de los dos modelos.sin embargo para hacer una prediccion tomaremos el modelo 5
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:crayon':
##
## style
## The following object is masked from 'package:Hmisc':
##
## subplot
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(viridis)
## Loading required package: viridisLite
dataq=dfCOS
dataq$ajustados =mod5$fitted.values
q<-ggplot(dataq,aes(x=Long,y=Lat,color=ajustados))+
geom_point(size = 2) +
ggtitle("Predicciones del modelo 5")+
labs(x="x",y="y")+
scale_colour_gradient(low = "yellow", high = "red", na.value = NA)
ggplotly(q)