Dados de Área (continuação)

1 Dado independente espacialmente

Faça o seguinte exercício:

Obtenha o shapefile dos municípios de Minas Gerais usando o pacote geobr
Gere um dado simulado para estes municípios considerando um modelo de regressão linear simples.
Suponha que a geração do dado seja desconhecida e que queiramos investigar se há dependência espacial. Analise os dados gerados, conforme visto nas aulas passadas.

#carregando os pacotes
library(tidyverse)
library(rgdal)
library(maptools)
library(sp)
library(spdep)
library(spatialreg)
library(readxl)
library(geobr)
library(RColorBrewer)
library(MASS)
library(gridExtra)
library(ggiraph)

#Lendo o shapefile de dados
cod_estado    = 31
localizacoes  = read_municipality(code_muni = cod_estado,year=2018,simplified = T, showProgress = F)

# Construindo a matriz de vizinhança 
nb  = poly2nb(localizacoes,queen=TRUE)
n   = nrow(localizacoes)
W   = matrix(0, n, n)
for(i in 1:n){  W[i, nb[[i]]] = 1 }

# Ponderando a matriz de vizinhança (linhas somando 1)
nbw   = nb2listw(nb,style="W")
Wp    = matrix(0, n, n)
Np    = NULL
for(i in 1:n){  
  Np[i]   = sum(W[i,]) #número de vizinhos da região i
  Wp[i,]  = W[i,]/Np[i] 
}
#outra forma de obter a matriz de vizinhança ponderada:
Wp2 = as(nbw, "CsparseMatrix")
summary(as.vector(Wp-Wp2))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0

#plot(Wp, Wp2)
#lines(Wp,Wp, lwd=2,col="red")

#Gerando um dado simulado de acordo com um MLS
x             = matrix(1,n,2)   #matriz com intercepto + covariavel
x[,2]         = rnorm(n)        #covariável
beta          = c(5,1)        #efeito do intercepto + covariavel
V             = 0.5               #variancia
e             = mvrnorm(1, rep(0,n), V*diag(n))
y_ML          = x%*% beta+e

par(mfrow=c(2,2))
hist(x%*% beta, freq=F, main="", ylab="densidade")
hist(e, freq=F, main="", ylab="densidade")
hist(y_ML, freq=F, main="", ylab="densidade")

#vetorizando o dado simulado
yvetor        = as.vector(y_ML)

#Juntando o dado simulado com as coordenadas
dados       = localizacoes
dados$data  = y_ML
dados$cov   = x[,2]

#plotando os dados

## Função utilizada para categorizar os pontos de corte
age.cat <- function(x, breaks,
                    sep = "-", above.char = "+",min,max) {
  labs <- c(paste(c(min,breaks)[-(length(breaks)+1)],
                  breaks-0.01,
                  sep = sep),
            paste(breaks[length(breaks)], max, sep = "-"))
  cut(x, breaks = c(min,breaks,Inf),
      right = FALSE, labels = labs)
}

# ## Escolha da paleta de cores para y
# aux       = dados$data
# breaks    = round(quantile(aux,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
# mycolors  = rev(brewer.pal(length(breaks)+1, "RdBu"))
# categoria = age.cat(round(aux,2),breaks,min=min(round(aux,2)),max=max(round(aux,2)))
# 
# ggplot() +  geom_sf(data=dados,aes(fill=categoria),colour="black") + 
#   theme_bw() +
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# 
# # construindo um mapa dinâmico para a variável de interesse
# 
# ## O que será mostrado ao encostar em cada município
# tooltip <- c(paste0(dados$name_muni, "\n  ", round(dados$data,2)))
# ## Construção do mapa
# gg <- ggplot(dados) +  geom_sf(aes(fill=categoria)) +
#   geom_sf_interactive(aes(fill=categoria, tooltip = tooltip, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# ##visualização do mapa
# girafe(ggobj = gg)


# Cálculo e teste do Indice Global de Moran
GlobalMoran <- moran.test(y_ML,nbw,alternative="two.sided")
GlobalMoran

## 
##  Moran I test under randomisation
## 
## data:  y_ML  
## weights: nbw    
## 
## Moran I statistic standard deviate = 1.0359, p-value = 0.3003
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0208773075     -0.0011737089      0.0004531695

# Plot do diagrama de espalhamento de Moran
par(mfrow=c(1,1))

moran.plot(yvetor,nbw,label=dados$name_muni)

# #calculando o indice de moran local
# moran_local <- localmoran(yvetor, nbw)
# #plot(moran_local)
# ## Construção do mapa
# #dados$IML    <- moran_local[,1]
# dados$IML    <- moran_local[,5]
# breaks          <- round(quantile(dados$IML,probs = c(0.25,0.50,0.75)),2)
# dados$IMLC   <- age.cat(round(dados$IML,2),breaks,min=min(round(dados$IML,2)),max=max(round(dados$IML,2)))
# dados$tooltip2 <- c(paste0(dados$name_muni, "\n  ", round(dados$IML,2)))
# gg <- ggplot(dados) +  geom_sf(aes(fill=IMLC)) +
#   geom_sf_interactive(aes(fill=IMLC, tooltip = tooltip2, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# girafe(ggobj = gg)
#
# ## Contiguidade Queen
# library(rgeoda)
# queen_w <- queen_weights(dados)
# ## Construção e teste de hipóteses do LISA
# lisa <- rgeoda::local_moran(queen_w, as.data.frame(y_ML))
# 
# ## Colocar no shapefile a classificação dos clusters 0, 1, 2, 3 ou 4.
# dados$LISA<-lisa_clusters(lisa)
# ## Colocar no shapefile os p-valores
# dados$LISA_PVALUE<-lisa_pvalues(lisa)
# ## Substituindo os clusters numéricos por caracteres
# dados$LISA<-gsub("0","Not Significant", dados$LISA)
# dados$LISA<-gsub("1","High-High", dados$LISA)
# dados$LISA<-gsub("2","Low-Low", dados$LISA)
# dados$LISA<-gsub("3","Low-High", dados$LISA)
# dados$LISA<-gsub("4","High-Low", dados$LISA)
# 
# ## Cores
# mycolors <- c("red","pink","lightblue","blue","white")
# 
# ## Mapa
# ggplot() +  geom_sf(data=dados,aes(fill=LISA),color="black") + 
#   theme_bw() + 
#   scale_fill_manual(values = mycolors) +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# 
# #mapa dinâmico
# ## O que será mostrado ao encostar em cada município
# dados$tooltip <- c(paste0(dados$name_muni, "\n  ", dados$LISA_PVALUE))
# ## Construção do mapa
# gg <- ggplot(dados)  +
#   geom_sf_interactive(aes(fill=LISA, tooltip = tooltip, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
#girafe(ggobj = gg)

1.1 Ajustando modelos

Agora realize os seguintes ajustes:
- um modelo de regressão tradicional,
- um modelo SEM,
- um modelo SAR,
- um modelo CAR.
Compare os ajustes gerados.

#Ajustando modelos

ML  <- lm(y_ML~x-1)
SEM <- errorsarlm(y_ML~x-1, data = data.frame(cbind(x,y_ML)), nbw, method="eigen")
SAR <- lagsarlm(y_ML~x-1,   data = data.frame(cbind(x,y_ML)), nbw, method="eigen")
CAR <- spautolm(y_ML~x-1,   data = data.frame(cbind(x,y_ML)), listw = nbw, family="CAR", method="eigen")

# # Visualize o resumo do modelo
# summary(ML)
# summary(SEM)
# summary(SAR)
# summary(CAR)

#Estimativas dos betas
rbind(c("VV", NA, beta),
c("ML",NA, round(coef(ML),3)),
c("SEM", round(coef(SEM),3)),
c("SAR", round(coef(SAR),3)),
c("CAR",c(round(coef(CAR)[3],3), round(coef(CAR)[1:2],3))))

##                     x1      x2     
## [1,] "VV"  NA       "5"     "1"    
## [2,] "ML"  NA       "4.981" "0.959"
## [3,] "SEM" "-0.04"  "4.981" "0.96" 
## [4,] "SAR" "0.031"  "4.826" "0.958"
## [5,] "CAR" "-0.072" "4.981" "0.966"

#Estimativas da variância
rbind(c("VV","ML","SEM","SAR","CAR"),
  c(V,
  round(summary(ML)$sigma^2,3), 
  round(summary(SEM)$s2,3),
  round(summary(SAR)$s2,3),
  round(var(resid(CAR)),3)))

##      [,1]  [,2]    [,3]    [,4]    [,5]   
## [1,] "VV"  "ML"    "SEM"   "SAR"   "CAR"  
## [2,] "0.5" "0.513" "0.511" "0.511" "0.512"

# Análise dos resíduos
par(mfrow=c(2,2))
qqnorm(ML$residuals,main="Modelo linear")
qqline(ML$residuals,lwd=2,col="red")

qqnorm(SEM$residuals, main="Modelo SEM")
qqline(SEM$residuals,lwd=2,col="red")

qqnorm(SAR$residuals, main="Modelo SAR")
qqline(SAR$residuals,lwd=2,col="red")

qqnorm(resid(CAR), main="Modelo CAR")
qqline(resid(CAR),lwd=2,col="red")

par(mfrow=c(2,2))
hist(ML$residuals,main="Modelo linear", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(ML)$sigma^2)),add=T, col="red", lwd=2)

hist(SEM$residuals,main="Modelo SEM", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(SEM)$s2)),add=T, col="red", lwd=2)

hist(SAR$residuals,main="Modelo SAR", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(SAR)$s2)),add=T, col="red", lwd=2)

hist(resid(CAR),main="Modelo CAR", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(var(resid(CAR)))),add=T, col="red", lwd=2)

dados$residuos_ML <- ML$residuals
dados$residuos_SEM <- SEM$residuals
dados$residuos_SAR <- SAR$residuals
dados$residuos_CAR <- resid(CAR)

## Escolha da paleta de cores 
u = dados$residuos_ML
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria1 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa1 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria1),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - ML") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_SEM
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria2 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa2 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria2),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - SEM") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_SAR
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria3 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa3 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria3),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - SAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_CAR
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria4 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa4 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria4),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - CAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
grid.arrange(Mapa1, Mapa2, Mapa3, Mapa4, ncol = 2, nrow=2)

par(mfrow=c(2,2))
plot(SEM$residuals,SAR$residuals)
lines(SEM$residuals,SEM$residuals,col="red",lwd=2)
plot(SEM$residuals,ML$residuals)
lines(SEM$residuals,SEM$residuals,col="red",lwd=2)
plot(resid(CAR),SAR$residuals)
lines(SAR$residuals,SAR$residuals,col="red",lwd=2)

#EQM
mean((y_ML - ML$fitted.values)^2)

## [1] 0.5118713

mean((y_ML - SEM$fitted.values)^2)

## [1] 0.5114334

mean((y_ML - SAR$fitted.values)^2)

## [1] 0.511394

mean((y_ML - CAR$fit$fitted.values)^2)

## [1] 0.5113336

#AIC
AIC = NULL

log_likelihood <- sum(dnorm(ML$residuals, mean = 0, sd = sqrt(summary(ML)$sigma^2), log = TRUE))
num_params <- length(ML$coefficients)+1
AIC[1] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(SEM)$LL
num_params <- length(SEM$parameters)
AIC[2] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(SAR)$LL
num_params <- length(SAR$parameters)
AIC[3] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(CAR)$LL
num_params <- length(CAR$parameters)
AIC[4] <- -2 * log_likelihood + 2 * num_params

rbind(c("ML","SEM","SAR","CAR"), round(AIC,4))

##      [,1]        [,2]        [,3]        [,4]       
## [1,] "ML"        "SEM"       "SAR"       "CAR"      
## [2,] "1855.4727" "1850.9769" "1850.8235" "1851.0216"

2 Dado dependente (SEM)

Obter o shapefile dos municípios do dados usando o pacote geobr
Gerar um dado simulado para estes municípios considerando um modelo SEM.
Suponha que a geração do dado seja desconhecida e que queiramos investigar se há dependência espacial. Analisar os dados gerados, conforme visto na aula passada.

#analisando possiveis valores de rho
autovalores = eigen(Wp)
c(1/min(autovalores$values),1/max(autovalores$values))

## [1] -1.282833  1.000000

autovalores = eigen(W)
c(1/min(autovalores$values),1/max(autovalores$values))

## [1] -0.2654701  0.1503620

#Gerando um dado simulado sob o modelo SEM
rho           = 0.9             #parametro de correlacao
#e             = mvrnorm(1, rep(0,n), diag(V,n))
#e             = mvrnorm(1, rep(0,n), diag(V/Np))
y_SEM             = x%*% beta + solve(diag(n)-rho*Wp)%*% e

par(mfrow=c(2,2))
hist(x%*% beta, freq=F, main="", ylab="densidade")
hist(e, freq=F, main="", ylab="densidade")
hist(y_SEM, freq=F, main="", ylab="densidade")

#Juntando o dado simulado com as coordenadas
yvetor      = as.vector(y_SEM)
dados       = localizacoes
dados$data  = y_SEM
dados$cov   = x[,2]


## Cálculo e teste do Indice Global de Moran
GlobalMoran <- moran.test(y_SEM,nbw,alternative="two.sided")
GlobalMoran

## 
##  Moran I test under randomisation
## 
## data:  y_SEM  
## weights: nbw    
## 
## Moran I statistic standard deviate = 21.741, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.4616082171     -0.0011737089      0.0004531042

## Plot do diagrama de espalhamento de Moran
par(mfrow=c(1,1))

moran.plot(yvetor,nbw,label=dados$name_muni)

# #calculando o indice de moran local
# moran_local <- localmoran(yvetor, nbw)
# #plot(moran_local)
# ## Construção do mapa
# #dados$IML    <- moran_local[,1]
# dados$IML    <- moran_local[,5]
# breaks          <- round(quantile(dados$IML,probs = c(0.25,0.50,0.75)),2)
# dados$IMLC   <- age.cat(round(dados$IML,2),breaks,min=min(round(dados$IML,2)),max=max(round(dados$IML,2)))
# dados$tooltip2 <- c(paste0(dados$name_muni, "\n  ", round(dados$IML,2)))
# gg <- ggplot(dados) +  geom_sf(aes(fill=IMLC)) +
#   geom_sf_interactive(aes(fill=IMLC, tooltip = tooltip2, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# girafe(ggobj = gg)
#
# ## Contiguidade Queen
# library(rgeoda)
# queen_w <- queen_weights(dados)
# ## Construção e teste de hipóteses do LISA
# lisa <- rgeoda::local_moran(queen_w, as.data.frame(y_SEM))
# 
# ## Colocar no shapefile a classificação dos clusters 0, 1, 2, 3 ou 4.
# dados$LISA<-lisa_clusters(lisa)
# ## Colocar no shapefile os p-valores
# dados$LISA_PVALUE<-lisa_pvalues(lisa)
# ## Substituindo os clusters numéricos por caracteres
# dados$LISA<-gsub("0","Not Significant", dados$LISA)
# dados$LISA<-gsub("1","High-High", dados$LISA)
# dados$LISA<-gsub("2","Low-Low", dados$LISA)
# dados$LISA<-gsub("3","Low-High", dados$LISA)
# dados$LISA<-gsub("4","High-Low", dados$LISA)
# 
# ## Cores
# mycolors <- c("red","pink","lightblue","blue","white")
# 
# ## Mapa
# ggplot() +  geom_sf(data=dados,aes(fill=LISA),color="black") + 
#   theme_bw() + 
#   scale_fill_manual(values = mycolors) +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# 
# #mapa dinâmico
# ## O que será mostrado ao encostar em cada município
# dados$tooltip <- c(paste0(dados$name_muni, "\n  ", dados$LISA_PVALUE))
# ## Construção do mapa
# gg <- ggplot(dados)  +
#   geom_sf_interactive(aes(fill=LISA, tooltip = tooltip, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# girafe(ggobj = gg)

2.1 Ajustando modelos

Agora realize os seguintes ajustes:
- um modelo de regressão tradicional,
um modelo SEM,
um modelo SAR,
um modelo CAR.
Compare os ajustes gerados.

#Ajustando modelos

ML  <- lm(y_SEM~x-1)
SEM <- errorsarlm(y_SEM~x-1, data = data.frame(cbind(x,y_SEM)), nbw, method="eigen")
SAR <- lagsarlm(y_SEM~x-1,   data = data.frame(cbind(x,y_SEM)), nbw, method="eigen")
CAR <- spautolm(y_SEM~x-1,   data = data.frame(cbind(x,y_SEM)), listw = nbw, family="CAR", method="eigen")

# # Visualize o resumo do modelo
# summary(ML)
# summary(SEM)
# summary(SAR)
# summary(CAR)

#Estimativas dos betas
rbind(c("VV", rho, beta),
      c("ML",NA, round(coef(ML),3)),
      c("SEM", round(coef(SEM),3)),
      c("SAR", round(coef(SAR),3)),
      c("CAR",c(round(coef(CAR)[3],3), round(coef(CAR)[1:2],3))))

##                    x1      x2     
## [1,] "VV"  "0.9"   "5"     "1"    
## [2,] "ML"  NA      "4.9"   "0.961"
## [3,] "SEM" "0.886" "4.836" "0.948"
## [4,] "SAR" "0.783" "1.09"  "0.951"
## [5,] "CAR" "0.983" "5.439" "0.858"

#Estimativas da variância
rbind(c("VV","ML","SEM","SAR","CAR"),
  c(V,
  round(summary(ML)$sigma^2,3), 
  round(summary(SEM)$s2,3),
  round(summary(SAR)$s2,3),
  round(var(resid(CAR)),3)))

##      [,1]  [,2]    [,3]    [,4]    [,5]   
## [1,] "VV"  "ML"    "SEM"   "SAR"   "CAR"  
## [2,] "0.5" "1.781" "0.516" "0.669" "0.495"

#Entendendo quem sao os residuos de cada modelo ajustado

# --- modelo SEM ---
par(mfrow=c(2,2))
#residuos = (I_n - rho*W) %*% (y - x %*% beta)
plot( SEM$residuals, (diag(n)-SEM$lambda*Wp) %*% (y_SEM - x %*% SEM$coefficients))
lines(SEM$residuals,SEM$residuals, col="red", lwd=2)
#y = residuos + yajustados
plot(y_SEM, SEM$fitted.values+SEM$residuals)
lines(y_SEM, y_SEM, col="red", lwd=2)
#yajustados = y - residuos
plot( SEM$fitted.values, y_SEM - SEM$residuals)
lines(SEM$fitted.values, SEM$fitted.values, col="red", lwd=2)

# --- modelo SAR ---
par(mfrow=c(2,2))

#residuos = (I_n - rho*W) %*% y - x %*% beta
plot( SAR$residuals, (diag(n)-SAR$rho*Wp) %*% y_SEM - x %*% SAR$coefficients)
lines(SAR$residuals,SAR$residuals, col="red", lwd=2)
#y = residuos + yajustados 
plot(y_SEM, SAR$fitted.values+SAR$residuals)
lines(y_SEM, y_SEM, col="red", lwd=2)
#yajustados = y - residuos
plot( SAR$fitted.values, y_SEM - SAR$residuals)
lines(SAR$fitted.values, SAR$fitted.values, col="red", lwd=2)

# --- modelo CAR ---
par(mfrow=c(2,2))

#residuos = (diag(n)-rho*Wp) %*% (y - x %*% beta)
plot( resid(CAR), (diag(n)-coef(CAR)[3]*Wp) %*% (y_SEM - x %*% coef(CAR)[1:2]))
lines(resid(CAR),resid(CAR), col="red", lwd=2)
#y = residuos + yajustados 
plot(y_SEM, CAR$fit$fitted.values + resid(CAR))
lines(y_SEM, y_SEM, col="red", lwd=2)
#yajustados = y - residuos
plot( CAR$fit$fitted.values, y_SEM - resid(CAR))
lines(y_SEM - resid(CAR),y_SEM - resid(CAR), col="red", lwd=2)

# Análise dos resíduos
par(mfrow=c(2,2))
qqnorm(ML$residuals,main="Modelo linear")
qqline(ML$residuals,lwd=2,col="red")

qqnorm(SEM$residuals, main="Modelo SEM")
qqline(SEM$residuals,lwd=2,col="red")

qqnorm(SAR$residuals, main="Modelo SAR")
qqline(SAR$residuals,lwd=2,col="red")

qqnorm(resid(CAR), main="Modelo CAR")
qqline(resid(CAR),lwd=2,col="red")

par(mfrow=c(2,2))
hist(ML$residuals,main="Modelo linear", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(ML)$sigma^2)),add=T, col="red", lwd=2)

hist(SEM$residuals,main="Modelo SEM", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(SEM)$s2)),add=T, col="red", lwd=2)

hist(SAR$residuals,main="Modelo SAR", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(SAR)$s2)),add=T, col="red", lwd=2)

hist(resid(CAR),main="Modelo CAR", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(var(resid(CAR)))),add=T, col="red", lwd=2)

dados$residuos_ML <- ML$residuals
dados$residuos_SEM <- SEM$residuals
dados$residuos_SAR <- SAR$residuals
dados$residuos_CAR <- resid(CAR)

## Escolha da paleta de cores 
u = dados$residuos_ML
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria1 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa1 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria1),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - ML") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_SEM
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria2 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa2 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria2),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - SEM") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_SAR
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria3 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa3 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria3),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - SAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_CAR
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria4 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa4 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria4),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - CAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
grid.arrange(Mapa1, Mapa2, Mapa3, Mapa4, ncol = 2, nrow=2)

par(mfrow=c(2,2))
plot(SEM$residuals,SAR$residuals)
plot(SEM$residuals,ML$residuals)
plot(resid(CAR),SAR$residuals)
plot(resid(CAR),ML$residuals)

#EQM
mean((y_SEM - ML$fitted.values)^2)

## [1] 1.776563

mean((y_SEM - SEM$fitted.values)^2)

## [1] 0.5161137

mean((y_SEM - SAR$fitted.values)^2)

## [1] 0.668561

mean((y_SEM - CAR$fit$fitted.values)^2)

## [1] 0.4950747

#AIC
AIC = NULL

log_likelihood <- sum(dnorm(ML$residuals, mean = 0, sd = sqrt(summary(ML)$sigma^2), log = TRUE))
num_params <- length(ML$coefficients)+1
AIC[1] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(SEM)$LL
num_params <- length(SEM$parameters)
AIC[2] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(SAR)$LL
num_params <- length(SAR$parameters)
AIC[3] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(CAR)$LL
num_params <- length(CAR$parameters)
AIC[4] <- -2 * log_likelihood + 2 * num_params

rbind(c("ML","SEM","SAR","CAR"), round(AIC,4))

##      [,1]       [,2]        [,3]        [,4]      
## [1,] "ML"       "SEM"       "SAR"       "CAR"     
## [2,] "2916.914" "2050.1402" "2212.1731" "2138.201"

3 Dado dependente (SAR)

Obter o shapefile dos municípios do dados usando o pacote geobr
Gerar um dado simulado para estes municípios considerando um modelo SAR.
Suponha que a geração do dado seja desconhecida e que queiramos investigar se há dependência espacial. Analisar os dados gerados, conforme visto na aula passada.

#analisando os possiveis valores de rho
autovalores = eigen(Wp)
c(1/min(autovalores$values),1/max(autovalores$values))

## [1] -1.282833  1.000000

autovalores = eigen(W)
c(1/min(autovalores$values),1/max(autovalores$values))

## [1] -0.2654701  0.1503620

#Gerando um dado simulado sob o modelo SAR
rho           = 0.9             #parametro de correlacao
#e             = mvrnorm(1, rep(0,n), diag(V,n))
#e             = mvrnorm(1, rep(0,n), diag(V/Np))
y_SAR         = solve(diag(n)-rho*Wp)%*%(x%*% beta+e)

par(mfrow=c(2,2))
hist(x%*% beta, freq=F, main="", ylab="densidade")
hist(e, freq=F, main="", ylab="densidade")
hist(y_SAR, freq=F, main="", ylab="densidade")

#Juntando o dado simulado com as coordenadas
yvetor      = as.vector(y_SAR)
dados       = localizacoes
dados$data  = y_SAR
dados$cov   = x[,2]

# #plotando os dados
# 
# ## Pontos de corte
# breaks <- round(quantile(dados$data,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
# ## Escolha da paleta de cores 
# mycolors <- rev(brewer.pal(7, "RdBu"))
# ## Categorizando os pontos de corte
# dados$Categoria <- age.cat(round(y_SAR,2),breaks,min=min(round(y_SAR,2)),max=max(round(y_SAR,2)))
# 
# Mapa = ggplot() +  geom_sf(data=dados,aes(fill=Categoria),colour="black") + 
#   theme_bw() +
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# plot(Mapa)
# 
# #Os comandos a seguir permitem a construção do mapa dinâmico:
# 
# ## O que será mostrado ao encostar em cada município
# dados$tooltip <- c(paste0(dados$name_muni, "\n  ", round(dados$data,2)))
# ## Construção do mapa
# gg <- ggplot(dados) +  geom_sf(aes(fill=Categoria)) +
#   geom_sf_interactive(aes(fill=Categoria, tooltip = tooltip, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# girafe(ggobj = gg)
# 

## Cálculo e teste do Indice Global de Moran
GlobalMoran <- moran.test(y_SAR,nbw,alternative="two.sided")
GlobalMoran

## 
##  Moran I test under randomisation
## 
## data:  y_SAR  
## weights: nbw    
## 
## Moran I statistic standard deviate = 31.517, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.6697721600     -0.0011737089      0.0004531937

## Plot do diagrama de espalhamento de Moran
par(mfrow=c(1,1))

moran.plot(yvetor,nbw,label=dados$name_muni)

# #calculando o indice de moran local
# moran_local <- localmoran(yvetor, nbw)
# #plot(moran_local)
# ## Construção do mapa
# #dados$IML    <- moran_local[,1]
# dados$IML    <- moran_local[,5]
# breaks          <- round(quantile(dados$IML,probs = c(0.25,0.50,0.75)),2)
# dados$IMLC   <- age.cat(round(dados$IML,2),breaks,min=min(round(dados$IML,2)),max=max(round(dados$IML,2)))
# dados$tooltip2 <- c(paste0(dados$name_muni, "\n  ", round(dados$IML,2)))
# gg <- ggplot(dados) +  geom_sf(aes(fill=IMLC)) +
#   geom_sf_interactive(aes(fill=IMLC, tooltip = tooltip2, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# girafe(ggobj = gg)
# 
# ## Contiguidade Queen
# library(rgeoda)
# queen_w <- queen_weights(dados)
# ## Construção e teste de hipóteses do LISA
# lisa <- rgeoda::local_moran(queen_w, as.data.frame(y_SAR))
# 
# ## Colocar no shapefile a classificação dos clusters 0, 1, 2, 3 ou 4.
# dados$LISA<-lisa_clusters(lisa)
# ## Colocar no shapefile os p-valores
# dados$LISA_PVALUE<-lisa_pvalues(lisa)
# ## Substituindo os clusters numéricos por caracteres
# dados$LISA<-gsub("0","Not Significant", dados$LISA)
# dados$LISA<-gsub("1","High-High", dados$LISA)
# dados$LISA<-gsub("2","Low-Low", dados$LISA)
# dados$LISA<-gsub("3","Low-High", dados$LISA)
# dados$LISA<-gsub("4","High-Low", dados$LISA)
# 
# ## Cores
# mycolors <- c("red","pink","lightblue","blue","white")
# 
# ## Mapa
# ggplot() +  geom_sf(data=dados,aes(fill=LISA),color="black") + 
#   theme_bw() + 
#   scale_fill_manual(values = mycolors) +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# 
# #mapa dinâmico
# ## O que será mostrado ao encostar em cada município
# dados$tooltip <- c(paste0(dados$name_muni, "\n  ", dados$LISA_PVALUE))
# ## Construção do mapa
# gg <- ggplot(dados)  +
#   geom_sf_interactive(aes(fill=LISA, tooltip = tooltip, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
#girafe(ggobj = gg)

3.1 Ajustando modelos

Agora realize os seguintes ajustes:
- um modelo de regressão tradicional,
- um modelo SEM,
- um modelo SAR,
- um modelo CAR.
Compare os ajustes gerados.

#Ajustando modelos

ML  <- lm(y_SAR~x-1)
SEM <- errorsarlm(y_SAR~x-1, data = data.frame(cbind(x,y_SAR)), nbw, method="eigen")
SAR <- lagsarlm(y_SAR~x-1,   data = data.frame(cbind(x,y_SAR)), nbw, method="eigen")
CAR <- spautolm(y_SAR~x-1,   data = data.frame(cbind(x,y_SAR)), listw = nbw, family="CAR", method="eigen")

# # Visualize o resumo do modelo
# summary(ML)
# summary(SEM)
# summary(SAR)
# summary(CAR)

#Estimativas dos betas
rbind(c("VV", rho, beta),
c("ML",NA, round(coef(ML),3)),
c("SEM", round(coef(SEM),3)),
c("SAR", round(coef(SAR),3)),
c("CAR",c(round(coef(CAR)[3],3), round(coef(CAR)[1:2],3))))

##                    x1       x2     
## [1,] "VV"  "0.9"   "5"      "1"    
## [2,] "ML"  NA      "49.534" "1.338"
## [3,] "SEM" "0.958" "49.29"  "0.796"
## [4,] "SAR" "0.906" "4.675"  "0.956"
## [5,] "CAR" "0.946" "49.581" "0.07"

#Estimativas da variância
rbind(c("VV","ML","SEM","SAR","CAR"),
  c(V,
  round(summary(ML)$sigma^2,3), 
  round(summary(SEM)$s2,3),
  round(summary(SAR)$s2,3),
  round(var(resid(CAR)),3)))

##      [,1]  [,2]    [,3]    [,4]    [,5]  
## [1,] "VV"  "ML"    "SEM"   "SAR"   "CAR" 
## [2,] "0.5" "3.299" "0.621" "0.509" "1.25"

# Análise dos resíduos

par(mfrow=c(2,2))
qqnorm(ML$residuals,main="Modelo linear")
qqline(ML$residuals,lwd=2,col="red")

qqnorm(SEM$residuals, main="Modelo SEM")
qqline(SEM$residuals,lwd=2,col="red")

qqnorm(SAR$residuals, main="Modelo SAR")
qqline(SAR$residuals,lwd=2,col="red")

qqnorm(resid(CAR), main="Modelo CAR")
qqline(resid(CAR),lwd=2,col="red")

par(mfrow=c(2,2))
hist(ML$residuals,main="Modelo linear", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(ML)$sigma^2)),add=T, col="red", lwd=2)

hist(SEM$residuals,main="Modelo SEM", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(SEM)$s2)),add=T, col="red", lwd=2)

hist(SAR$residuals,main="Modelo SAR", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(summary(SAR)$s2)),add=T, col="red", lwd=2)

hist(resid(CAR),main="Modelo CAR", freq=F, ylab="densidade", xlab="resíduos")
curve(dnorm(x, 0, sqrt(var(resid(CAR)))),add=T, col="red", lwd=2)

dados$residuos_ML <- ML$residuals
dados$residuos_SEM <- SEM$residuals
dados$residuos_SAR <- SAR$residuals
dados$residuos_CAR <- resid(CAR)

## Escolha da paleta de cores 
u = dados$residuos_ML
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria1 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa1 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria1),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - ML") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_SEM
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria2 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa2 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria2),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - SEM") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_SAR
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria3 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa3 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria3),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - SAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_CAR
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria4 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa4 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria4),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - CAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
grid.arrange(Mapa1, Mapa2, Mapa3, Mapa4, ncol = 2, nrow=2)

par(mfrow=c(2,2))
plot(SEM$residuals,SAR$residuals)
plot(SEM$residuals,ML$residuals)
plot(resid(CAR),SAR$residuals)
plot(resid(CAR),ML$residuals)

#EQM
mean((y_SAR - ML$fitted.values)^2)

## [1] 3.291632

mean((y_SAR - SEM$fitted.values)^2)

## [1] 0.6213858

mean((y_SAR - SAR$fitted.values)^2)

## [1] 0.5088704

mean((y_SAR - CAR$fit$fitted.values)^2)

## [1] 1.248507

#AIC
AIC = NULL

log_likelihood <- sum(dnorm(ML$residuals, mean = 0, sd = sqrt(summary(ML)$sigma^2), log = TRUE))
num_params <- length(ML$coefficients)+1
AIC[1] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(SEM)$LL
num_params <- length(SEM$parameters)
AIC[2] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(SAR)$LL
num_params <- length(SAR$parameters)
AIC[3] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(CAR)$LL
num_params <- length(CAR$parameters)
AIC[4] <- -2 * log_likelihood + 2 * num_params

rbind(c("ML","SEM","SAR","CAR"), round(AIC,4))

##      [,1]        [,2]        [,3]        [,4]       
## [1,] "ML"        "SEM"       "SAR"       "CAR"      
## [2,] "3442.9617" "2271.9553" "2052.8259" "3015.1086"

4 Dado dependente (CAR)

Obter o shapefile dos municípios do dados usando o pacote geobr
Gerar um dado simulado para estes municípios considerando um modelo CAR.
Suponha que a geração do dado seja desconhecida e que queiramos investigar se há dependência espacial. Analisar os dados gerados, conforme visto na aula passada.

#analisando os possiveis valores de rho
autovalores = eigen(Wp)
c(1/min(autovalores$values),1/max(autovalores$values))

## [1] -1.282833  1.000000

autovalores = eigen(W)
c(1/min(autovalores$values),1/max(autovalores$values))

## [1] -0.2654701  0.1503620

#Gerando um dado simulado sob o modelo CAR
rho           = 0.9             #parametro de correlacao
e             = mvrnorm(1, rep(0,n), V * solve(diag(Np)-rho*W))
y_CAR         = x%*% beta + e

par(mfrow=c(2,2))
hist(x%*% beta, freq=F, main="", ylab="densidade")
hist(e, freq=F, main="", ylab="densidade")
hist(y_CAR, freq=F, main="", ylab="densidade")

#Juntando o dado simulado com as coordenadas
yvetor      = as.vector(y_CAR)
dados       = localizacoes
dados$data  = y_CAR
dados$cov   = x[,2]

# #plotando os dados
# ## Pontos de corte
# breaks <- round(quantile(dados$data,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
# ## Escolha da paleta de cores 
# mycolors <- rev(brewer.pal(7, "RdBu"))
# ## Categorizando os pontos de corte
# dados$Categoria <- age.cat(round(y_CAR,2),breaks,min=min(round(y_CAR,2)),max=max(round(y_CAR,2)))
# 
# Mapa = ggplot() +  geom_sf(data=dados,aes(fill=Categoria),colour="black") + 
#   theme_bw() +
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# plot(Mapa)
# 
# #Os comandos a seguir permitem a construção do mapa dinâmico:
# 
# ## O que será mostrado ao encostar em cada município
# dados$tooltip <- c(paste0(dados$name_muni, "\n  ", round(dados$data,2)))
# ## Construção do mapa
# gg <- ggplot(dados) +  geom_sf(aes(fill=Categoria)) +
#   geom_sf_interactive(aes(fill=Categoria, tooltip = tooltip, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# girafe(ggobj = gg)
# 

## Cálculo e teste do Indice Global de Moran
GlobalMoran <- moran.test(y_CAR,nbw,alternative="two.sided")
GlobalMoran

## 
##  Moran I test under randomisation
## 
## data:  y_CAR  
## weights: nbw    
## 
## Moran I statistic standard deviate = 2.926, p-value = 0.003433
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0611121092     -0.0011737089      0.0004531245

## Plot do diagrama de espalhamento de Moran
par(mfrow=c(1,1))

moran.plot(yvetor,nbw,label=dados$name_muni)

# #calculando o indice de moran local
# moran_local <- localmoran(yvetor, nbw)
# #plot(moran_local)
# ## Construção do mapa
# #dados$IML    <- moran_local[,1]
# dados$IML    <- moran_local[,5]
# breaks          <- round(quantile(dados$IML,probs = c(0.25,0.50,0.75)),2)
# dados$IMLC   <- age.cat(round(dados$IML,2),breaks,min=min(round(dados$IML,2)),max=max(round(dados$IML,2)))
# dados$tooltip2 <- c(paste0(dados$name_muni, "\n  ", round(dados$IML,2)))
# gg <- ggplot(dados) +  geom_sf(aes(fill=IMLC)) +
#   geom_sf_interactive(aes(fill=IMLC, tooltip = tooltip2, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# girafe(ggobj = gg)
# 
# ## Contiguidade Queen
# library(rgeoda)
# queen_w <- queen_weights(dados)
# ## Construção e teste de hipóteses do LISA
# lisa <- rgeoda::local_moran(queen_w, as.data.frame(y_CAR))
# 
# ## Colocar no shapefile a classificação dos clusters 0, 1, 2, 3 ou 4.
# dados$LISA<-lisa_clusters(lisa)
# ## Colocar no shapefile os p-valores
# dados$LISA_PVALUE<-lisa_pvalues(lisa)
# ## Substituindo os clusters numéricos por caracteres
# dados$LISA<-gsub("0","Not Significant", dados$LISA)
# dados$LISA<-gsub("1","High-High", dados$LISA)
# dados$LISA<-gsub("2","Low-Low", dados$LISA)
# dados$LISA<-gsub("3","Low-High", dados$LISA)
# dados$LISA<-gsub("4","High-Low", dados$LISA)
# 
# ## Cores
# mycolors <- c("red","pink","lightblue","blue","white")
# 
# ## Mapa
# ggplot() +  geom_sf(data=dados,aes(fill=LISA),color="black") + 
#   theme_bw() + 
#   scale_fill_manual(values = mycolors) +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
# 
# #mapa dinâmico
# ## O que será mostrado ao encostar em cada município
# dados$tooltip <- c(paste0(dados$name_muni, "\n  ", dados$LISA_PVALUE))
# ## Construção do mapa
# gg <- ggplot(dados)  +
#   geom_sf_interactive(aes(fill=LISA, tooltip = tooltip, data_id = code_muni),color="black",size=0.1)+ theme_bw() + 
#   scale_fill_manual(values = mycolors,name="Dados simulados") +
#   theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank(),
#         legend.position = "bottom")
#girafe(ggobj = gg)

4.1 Ajustando modelos

Agora realize os seguintes ajustes:
- um modelo de regressão tradicional,
um modelo SEM,
um modelo SAR,
um modelo CAR.
Compare os ajustes gerados.

#Ajustando modelos

ML  <- lm(y_CAR~x-1)
SEM <- errorsarlm(y_CAR~x-1, data = data.frame(cbind(x,y_CAR)), nbw, method="eigen")
SAR <- lagsarlm(y_CAR~x-1,   data = data.frame(cbind(x,y_CAR)), nbw, method="eigen")
CAR <- spautolm(y_CAR~x-1,   data = data.frame(cbind(x,y_CAR)), listw = nbw, family="CAR", method="eigen")

# # Visualize o resumo do modelo
# summary(ML)
# summary(SEM)
# summary(SAR)
# summary(CAR)

#Estimativas dos betas
rbind(c("VV", rho, beta),
      c("ML",NA, round(coef(ML),3)),
      c("SEM", round(coef(SEM),3)),
      c("SAR", round(coef(SAR),3)),
      c("CAR",c(round(coef(CAR)[3],3), round(coef(CAR)[1:2],3))))

##                    x1      x2     
## [1,] "VV"  "0.9"   "5"     "1"    
## [2,] "ML"  NA      "4.985" "1.011"
## [3,] "SEM" "0.606" "4.986" "1.006"
## [4,] "SAR" "0.2"   "3.997" "1.009"
## [5,] "CAR" "0.776" "5.049" "0.937"

#Estimativas da variância
rbind(c("VV","ML","SEM","SAR","CAR"),
  c(V,
  round(summary(ML)$sigma^2,3), 
  round(summary(SEM)$s2,3),
  round(summary(SAR)$s2,3),
  round(var(resid(CAR)),3)))

##      [,1]  [,2]    [,3]   [,4]    [,5]   
## [1,] "VV"  "ML"    "SEM"  "SAR"   "CAR"  
## [2,] "0.5" "0.152" "0.11" "0.139" "0.111"

# Análise dos residuos

par(mfrow=c(2,2))
qqnorm(ML$residuals,main="Modelo linear")
qqline(ML$residuals,lwd=2,col="red")

qqnorm(SEM$residuals, main="Modelo SEM")
qqline(SEM$residuals,lwd=2,col="red")

qqnorm(SAR$residuals, main="Modelo SAR")
qqline(SAR$residuals,lwd=2,col="red")

qqnorm(resid(CAR), main="Modelo CAR")
qqline(resid(CAR),lwd=2,col="red")

par(mfrow=c(2,2))
hist(ML$residuals,main="Modelo linear", freq=F, ylab="densidade", xlab="residuos")
curve(dnorm(x, 0, sqrt(summary(ML)$sigma^2)),add=T, col="red", lwd=2)

hist(SEM$residuals,main="Modelo SEM", freq=F, ylab="densidade", xlab="residuos")
curve(dnorm(x, 0, sqrt(summary(SEM)$s2)),add=T, col="red", lwd=2)

hist(SAR$residuals,main="Modelo SAR", freq=F, ylab="densidade", xlab="residuos")
curve(dnorm(x, 0, sqrt(summary(SAR)$s2)),add=T, col="red", lwd=2)

hist(resid(CAR),main="Modelo CAR", freq=F, ylab="densidade", xlab="residuos")
curve(dnorm(x, 0, sqrt(var(resid(CAR)))),add=T, col="red", lwd=2)

dados$residuos_ML <- ML$residuals
dados$residuos_SEM <- SEM$residuals
dados$residuos_SAR <- SAR$residuals
dados$residuos_CAR <- resid(CAR)

## Escolha da paleta de cores 
u = dados$residuos_ML
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria1 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa1 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria1),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - ML") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_SEM
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria2 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa2 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria2),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - SEM") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_SAR
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria3 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa3 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria3),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - SAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
u = dados$residuos_CAR
breaks <- round(quantile(u,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors <- rev(brewer.pal(7, "RdBu"))
Categoria4 <- age.cat(round(u,2),breaks,min=min(round(u,2)),max=max(round(u,2)))
Mapa4 = ggplot() +  geom_sf(data=dados,aes(fill=Categoria4),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="Dados simulados - CAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")
grid.arrange(Mapa1, Mapa2, Mapa3, Mapa4, ncol = 2, nrow=2)

par(mfrow=c(2,2))
plot(SEM$residuals,SAR$residuals)
plot(SEM$residuals,ML$residuals)
plot(resid(CAR),SAR$residuals)
plot(resid(CAR),ML$residuals)

#EQM
mean((y_CAR - ML$fitted.values)^2)

## [1] 0.1518191

mean((y_CAR - SEM$fitted.values)^2)

## [1] 0.1103924

mean((y_CAR - SAR$fitted.values)^2)

## [1] 0.1388055

mean((y_CAR - CAR$fit$fitted.values)^2)

## [1] 0.1112053

#AIC
AIC = NULL

log_likelihood <- sum(dnorm(ML$residuals, mean = 0, sd = sqrt(summary(ML)$sigma^2), log = TRUE))
num_params <- length(ML$coefficients)+1
AIC[1] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(SEM)$LL
num_params <- length(SEM$parameters)
AIC[2] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(SAR)$LL
num_params <- length(SAR$parameters)
AIC[3] <- -2 * log_likelihood + 2 * num_params

log_likelihood <- summary(CAR)$LL
num_params <- length(CAR$parameters)
AIC[4] <- -2 * log_likelihood + 2 * num_params

rbind(c("ML","SEM","SAR","CAR"), round(AIC,4))

##      [,1]       [,2]       [,3]       [,4]      
## [1,] "ML"       "SEM"      "SAR"      "CAR"     
## [2,] "818.7503" "612.5069" "744.6973" "665.3133"

5 Comparando os dados

## Comparando y
aux       = y_ML
breaks    = round(quantile(aux,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors  = rev(brewer.pal(length(breaks)+1, "RdBu"))
categoria = age.cat(round(aux,2),breaks,min=min(round(aux,2)),max=max(round(aux,2)))
Mapa1 = ggplot() +  geom_sf(data=dados,aes(fill=categoria),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="ML") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")

aux       = y_SEM
breaks    = round(quantile(aux,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors  = rev(brewer.pal(length(breaks)+1, "RdBu"))
categoria = age.cat(round(aux,2),breaks,min=min(round(aux,2)),max=max(round(aux,2)))
Mapa2 = ggplot() +  geom_sf(data=dados,aes(fill=categoria),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="SEM") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")

aux       = y_SAR
breaks    = round(quantile(aux,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors  = rev(brewer.pal(length(breaks)+1, "RdBu"))
categoria = age.cat(round(aux,2),breaks,min=min(round(aux,2)),max=max(round(aux,2)))
Mapa3 = ggplot() +  geom_sf(data=dados,aes(fill=categoria),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="SAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")

aux       = y_CAR
breaks    = round(quantile(aux,probs = c(0.05,0.20,0.40,0.60,0.80,0.95)),2)
mycolors  = rev(brewer.pal(length(breaks)+1, "RdBu"))
categoria = age.cat(round(aux,2),breaks,min=min(round(aux,2)),max=max(round(aux,2)))
Mapa4 = ggplot() +  geom_sf(data=dados,aes(fill=categoria),colour="black") + 
  theme_bw() +
  scale_fill_manual(values = mycolors,name="CAR") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "bottom")

grid.arrange(Mapa1, Mapa2, Mapa3, Mapa4, ncol = 2, nrow=2)

par(mfrow=c(2,2))
plot(y_ML,y_SEM)
plot(y_SAR,y_SEM)
plot(y_CAR,y_SEM)
plot(y_SAR,y_CAR)

Estatística espacial - GET/IME/UFF

Patrícia Lusié Velozo da Costa

2023-2

1 Dado independente espacialmente

1.1 Ajustando modelos

2 Dado dependente (SEM)

2.1 Ajustando modelos

3 Dado dependente (SAR)

3.1 Ajustando modelos

4 Dado dependente (CAR)

4.1 Ajustando modelos

5 Comparando os dados