Effects of Spatial Dependence

## Loading required package: spam

## Loading required package: dotCall64

## Loading required package: grid

## Spam version 2.6-0 (2020-12-14) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.

## 
## Attaching package: 'spam'

## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve

## See https://github.com/NCAR/Fields for
##  an extensive vignette, other supplements and source code

## 
## Attaching package: 'rdist'

## The following object is masked from 'package:fields':
## 
##     rdist

Demonstration of the effects of spatial dependence in regression

# Simulation study to investigate the effect of not accounting for 
# residual spatial dependence in spatial regression 

n.obs<-100
n.datasets<-100
beta<-0.1


COVER<-rep(0,n.datasets)

for(rep in 1:n.datasets){

 #Generage data:
 x<-rnorm(n.obs, mean=0)
 y<-rnorm(n.obs, mean=x*beta)
 x<-as.vector(x)
 y<-as.vector(y)

 #Fit OLS regression
 fit<-lm(y~x)
 beta_hat<-summary(fit)$coef[2,1]
 beta_se<-summary(fit)$coef[2,2]

 #Determine if the true beta is contained in the 
 #95% interval
 COVER[rep]<- beta > beta_hat-1.96*beta_se &
              beta < beta_hat+1.96*beta_se
}

print("Empirical coverage of 95% intervals")
print(mean(COVER))

# Now impose spatial correlation on the observations


nugget<-0
partial_sill<-1
range<-0.5


COVER<-rep(0,n.datasets)

for(rep in 1:n.datasets){
 
 #Generage data:
 s<-cbind(runif(n.obs),runif(n.obs))
 d<-rdist(s)
 d<-as.matrix(d)
 COR<-exp(-d/range)
 C<-partial_sill*COR + nugget*diag(n.obs)
 x<-rmvnorm(1,rep(0,n.obs),C)
 y<-rmvnorm(1,x*beta,C)
 x<-as.vector(x)
 y<-as.vector(y)

 #Fit OLS regression
 fit<-lm(y~x)
 beta_hat<-summary(fit)$coef[2,1]
 beta_se<-summary(fit)$coef[2,2]

 #Determine if the true beta is contained in the 
 #95% interval
 COVER[rep]<- beta > beta_hat-1.96*beta_se &
              beta < beta_hat+1.96*beta_se
}
print("Empirical coverage of 95% intervals")
print(mean(COVER))



#Repeat the same experiment for several different spatial ranges

n.datasets<-1000
range<-seq(0.01,0.5,length=10)
COVER<-matrix(0,n.datasets,10)

for(j in 1:10){for(rep in 1:n.datasets){
 s<-cbind(runif(n.obs),runif(n.obs))
 d<-rdist(s)
 d<-as.matrix(d)
 COR<-exp(-d/range[j])
 C<-partial_sill*COR + nugget*diag(n.obs)
 x<-rmvnorm(1,rep(0,n.obs),C)
 y<-rmvnorm(1,x*beta,C)
 x<-as.vector(x)
 y<-as.vector(y)
 fit<-lm(y~x)

 beta_hat<-summary(fit)$coef[2,1]
 beta_se<-summary(fit)$coef[2,2]

 COVER[rep,j]<- beta > beta_hat-1.96*beta_se &
                beta < beta_hat+1.96*beta_se
}}

#Empirical coverage for each range:
CP<-colMeans(COVER)
#Monte Carlo estimate of the error in CP
SE<-sqrt(CP*(1-CP)/n.datasets)


#Plot estimated coverage with errors
plot(range,CP,xlab="Spatial range",ylab="Coverage probability")
lines(range,CP,lwd=2)

points(range,CP-2*SE)
lines(range,CP-2*SE,lty=2)
points(range,CP+2*SE)
lines(range,CP+2*SE,lty=2)