library(sp)
library(maptools)
## Checking rgeos availability: FALSE
## Note: when rgeos is not available, polygon geometry computations in maptools depend on gpclib,
## which has a restricted licence. It is disabled by default;
## to enable gpclib, type gpclibPermit()
library(gstat)
library(ggmap)
## Loading required package: ggplot2
library(ggplot2)
library(nlme)
library(ncf)
library(spdep)
## Loading required package: Matrix
library(rgdal)
## rgdal: version: 1.1-8, (SVN revision 616)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.0.1, released 2015/09/15
## Path to GDAL shared files: C:/Users/Sara/Documents/R/win-library/3.3/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
## Path to PROJ.4 shared files: C:/Users/Sara/Documents/R/win-library/3.3/rgdal/proj
## Linking to sp version: 1.2-3
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:nlme':
##
## getData
library(lattice)
library(moments)
##
## Attaching package: 'moments'
## The following object is masked from 'package:spdep':
##
## geary
library(automap)
lwd <- read.csv("LWD_spatial.csv",header=TRUE)
head(lwd)
## Tsect_Meter Northing Easting log_scat Num_Scat LWD_length LWD_diam
## 1 0.0 454867 5316325 1.477121 30 6.64 0.85
## 2 12.5 454807 5316016 2.012837 103 12.52 0.34
## 3 110.0 454844 5315904 1.662758 46 19.50 0.71
## 4 128.0 454864 5315906 1.785330 61 3.75 0.44
## 5 10.5 454667 5315791 1.397940 25 9.38 0.37
## 6 11.0 454669 5315794 1.633468 43 8.48 0.48
## LWD_height Radius LWD_AREA
## 1 1.30 0.425 7.34
## 2 0.68 0.170 2.89
## 3 0.56 0.355 7.75
## 4 1.51 0.220 2.49
## 5 1.95 0.185 6.77
## 6 1.22 0.240 4.97
kt <-kurtosis(lwd$Num_Scat)
sk <- skewness(lwd$Num_Scat)
my.xlim <- range(lwd$Num_Scat)
h<-hist(lwd$Num_Scat, breaks=10,col="blue", xlab="Scat Abundances(scats/m2)",main="",xlim=my.xlim)
xfit<-seq(min(lwd),max(lwd$Num_Scat),length=100)
yfit<-dnorm(xfit,mean=mean(lwd$Num_Scat),sd=sd(lwd$Num_Scat))
yfit <- yfit*diff(h$mids[1:2])*length(lwd$Num_Scat)
lines(xfit, yfit, col="red", lwd=2)
boxplot(lwd$Num_Scat, horizontal=TRUE, outline=TRUE, axes=FALSE, ylim=my.xlim, col = "green", add = TRUE,boxwex=8)
text(x = 50, y=80, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
text(x = 50, y=70, labels = paste("Skewness=",round(sk,2)),pos = 4)
This distribution is a bit skewed, I’m going to transform the scat data to better deal with this issue.
par(mfrow=c(1,3))
hist(abs(lwd$Num_Scat))
hist(sqrt(lwd$Num_Scat))
hist(log(lwd$Num_Scat))
kt2 <-kurtosis(lwd$log_scat)
sk2 <- skewness(lwd$log_scat)
my.xlim <- range(lwd$log_scat)
h<-hist(lwd$log_scat, breaks=12,col="blue", xlab="Log Scat Abundances(scats/m2)",main="",xlim=my.xlim)
xfit<-seq(min(lwd),max(lwd$log_scat),length=100)
yfit<-dnorm(xfit,mean=mean(lwd$log_scat),sd=sd(lwd$log_scat))
yfit <- yfit*diff(h$mids[1:2])*length(lwd$log_scat)
lines(xfit, yfit, col="red", lwd=2)
boxplot(lwd$log_scat, horizontal=TRUE, outline=TRUE, axes=FALSE, ylim=my.xlim, col = "green", add = TRUE,boxwex=2)
text(x = 0.25, y=28, labels = paste("Kurtosis=",round(kt2,2)),pos = 4)
text(x = 0.25, y=22, labels = paste("Skewness=",round(sk2,2)),pos = 4)
It looks like a log transform is going to work best for the scat data. Now the data has a more ‘normal’ distribution.
coordinates(lwd)<- c("Northing", "Easting")
proj4string(lwd) <- CRS("+init=epsg:32610")
lwd@proj4string
## CRS arguments:
## +init=epsg:32610 +proj=utm +zone=10 +datum=WGS84 +units=m +no_defs
## +ellps=WGS84 +towgs84=0,0,0
plot(lwd, axes=TRUE)
bubble(lwd, xcol="Easting",ycol="Northing",zcol="Num_Scat", maxsize = 2,col="darkgreen",main = "Scat abundances on LWD (scats m2)")
It appears that there is some clustering in the data, with larger abundances of scat located closer together and along the margins of the terraces in the northern and southern ends of the lakebed and smaller scat abundances located closer together near the center of the lakebed.
scatVar1 <- variogram(log_scat~1, lwd, cloud = FALSE, cutoff=300)
plot(scatVar1,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "bird scat-cutoff=300")
scatVar1 <- variogram(log_scat~1, lwd, cloud = FALSE, cutoff=100)
plot(scatVar1,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "bird scat-cutoff=100")
scatVar1 <- variogram(log_scat~1, lwd, cloud = FALSE, cutoff=200)
plot(scatVar1,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "bird scat-cutoff=200")
The variograms change considerably when I adjust the cutoff distance. The average width of the lakebed is ~300–the sill at this cutoff appears to be around 175m. When I change the cutoff distance to 100, the sill is around 80. At 200 m, again the sill is around 175. Taking all these options into account, I think there is spatial autocorrelcation in scat samples out to distances of around 175m. Because of the linear nature of the lakebed and the fact that my transects run in an east/west direction, I’m going to check for directional autocorrelation.
scatvardir <- variogram(log_scat~1, lwd, cloud = FALSE,alpha=c(90,45,0,135),cutoff=600)
plot(scatvardir,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Log (Number of bird Scat)")
These results are interesting, I was thinking that the 90 degree direction would show strong autocorrelation at distances of 175ish. It seems that autocorrelation may be low in that direction. The 0 degree directions shows strong autocorrelation however, which is interesting–out to distances of ~200 meters.
s <- 1/as.matrix(dist(coordinates(lwd)))
diag(s) <- 0
s[is.infinite(s)] <- 0
moran.test(sqrt(lwd$log_scat),mat2listw(s))
##
## Moran I test under randomisation
##
## data: sqrt(lwd$log_scat)
## weights: mat2listw(s)
##
## Moran I statistic standard deviate = 5.6709, p-value = 7.102e-09
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.337190936 -0.005747126 0.003656992
It looks like there’s weak spatial autocorrelation in the scat data (Moran’s I equals 0.337).
scatI <- spline.correlog(x=coordinates(lwd)[,1], y=coordinates(lwd)[,2],z=lwd$log_scat, resamp=100, quiet=TRUE)
plot(scatI)
The correlogram shows that there is spatial autocorrelation out to distances of 750 meters, after that scat data isn’t autocorrelated. The error at the end of the correlogram shows the amount of uncertainty at greater distances. The overall area of the lakebed is approximately 1 sq km, and the length of the lakebed from north to south is roughly 3.5 km.
lengthVar <- variogram(LWD_length~1, lwd, cloud = FALSE)
plot(lengthVar,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "LWD length")
l <- 1/as.matrix(dist(coordinates(lwd)))
diag(l) <- 0
l[is.infinite(l)] <- 0
moran.test(lwd$LWD_length, mat2listw(l))
##
## Moran I test under randomisation
##
## data: lwd$LWD_length
## weights: mat2listw(l)
##
## Moran I statistic standard deviate = 0.86473, p-value = 0.1936
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.046073864 -0.005747126 0.003591254
lengthI <- spline.correlog(x=coordinates(lwd)[,1], y=coordinates(lwd)[,2],z=lwd$LWD_length, resamp=100, quiet=TRUE)
plot(lengthI)
The variogram, Moran’s I, and correlogram show that there is no spatial autocorrelation in LWD length.
LWD_diamVar <- variogram(LWD_diam~1, lwd, cloud = FALSE, cutoff=300)
plot(LWD_diamVar,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "LWD_diameter")
d <- 1/as.matrix(dist(coordinates(lwd)))
diag(d) <- 0
d[is.infinite(d)] <- 0
moran.test(lwd$LWD_diam, mat2listw(d))
##
## Moran I test under randomisation
##
## data: lwd$LWD_diam
## weights: mat2listw(d)
##
## Moran I statistic standard deviate = 0.80317, p-value = 0.2109
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.038844614 -0.005747126 0.003082423
diamI <- spline.correlog(x=coordinates(lwd)[,1], y=coordinates(lwd)[,2],z=lwd$LWD_diam, resamp=100, quiet=TRUE)
plot(diamI)
LWD_heightVar <- variogram(LWD_height~1, lwd, cloud = FALSE,cutoff=300)
plot(LWD_heightVar,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "LWD_height")
h <- 1/as.matrix(dist(coordinates(lwd)))
diag(h) <- 0
h[is.infinite(h)] <- 0
moran.test(lwd$LWD_height, mat2listw(h))
##
## Moran I test under randomisation
##
## data: lwd$LWD_height
## weights: mat2listw(h)
##
## Moran I statistic standard deviate = 1.2427, p-value = 0.107
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.068207005 -0.005747126 0.003541625
heightI <- spline.correlog(x=coordinates(lwd)[,1], y=coordinates(lwd)[,2],z=lwd$LWD_height, resamp=100, quiet=TRUE)
plot(heightI)
The variogram, Moran’s I, and correlogram show very little spatial autocorrelation in LWD height (Moran’s I = 0.06).
areaVar <- variogram(LWD_AREA~1, lwd, cloud = FALSE,cutoff=300)
plot(areaVar,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "LWD_area")
a<- 1/as.matrix(dist(coordinates(lwd)))
diag(a) <- 0
a[is.infinite(a)] <- 0
moran.test(lwd$LWD_AREA, mat2listw(a))
##
## Moran I test under randomisation
##
## data: lwd$LWD_AREA
## weights: mat2listw(a)
##
## Moran I statistic standard deviate = -0.60495, p-value = 0.7274
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.040619217 -0.005747126 0.003322856
areaI <- spline.correlog(x=coordinates(lwd)[,1], y=coordinates(lwd)[,2],z=lwd$LWD_AREA, resamp=100, quiet=TRUE)
plot(areaI)
Variograms, Moran’s I, and correlogram show no spatial autocorrelation in LWD area.
In summation, there is litte to no spatial autocorrelation in the LWD variables!
lmlwd <- lm(Num_Scat ~ Northing + Easting + LWD_length + LWD_diam + LWD_height + LWD_AREA, data=lwd)
summary(lmlwd)
##
## Call:
## lm(formula = Num_Scat ~ Northing + Easting + LWD_length + LWD_diam +
## LWD_height + LWD_AREA, data = lwd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.678 -19.081 -5.173 6.726 110.559
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.637e+04 3.246e+04 -0.504 0.61463
## Northing -2.216e-02 1.136e-02 -1.951 0.05267 .
## Easting 4.976e-03 5.340e-03 0.932 0.35273
## LWD_length 7.900e-01 5.680e-01 1.391 0.16611
## LWD_diam 2.705e+01 8.150e+00 3.319 0.00111 **
## LWD_height 1.645e+01 5.614e+00 2.931 0.00385 **
## LWD_AREA 6.107e-01 6.373e-01 0.958 0.33930
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.23 on 168 degrees of freedom
## Multiple R-squared: 0.3459, Adjusted R-squared: 0.3225
## F-statistic: 14.8 on 6 and 168 DF, p-value: 1.501e-13
It looks like the three variables with greatest significance (best predictors of scat abundance) are height, area, and UTM Northing.
lmlwd <- lm(Num_Scat ~ Northing + LWD_height + LWD_AREA, data=lwd)
summary(lmlwd)
##
## Call:
## lm(formula = Num_Scat ~ Northing + LWD_height + LWD_AREA, data = lwd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -65.254 -18.959 -6.642 5.163 117.373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.452e+04 3.269e+03 4.440 1.61e-05 ***
## Northing -3.189e-02 7.187e-03 -4.438 1.62e-05 ***
## LWD_height 1.578e+01 5.500e+00 2.869 0.00463 **
## LWD_AREA 1.572e+00 3.904e-01 4.025 8.54e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.02 on 171 degrees of freedom
## Multiple R-squared: 0.3011, Adjusted R-squared: 0.2889
## F-statistic: 24.56 on 3 and 171 DF, p-value: 2.889e-13
When I take out the other variables, the remaining three become very significant. I’ll continue using these variables only and exlude the others. Of course, when it comes to actually running my analysis, I’ll be using AIC scores to back up this model, but for now…
lwd$lmResids<-residuals(lmlwd)
bubble(lwd,zcol='lmResids')
r<-knn2nb(knearneigh(lwd,k=8))
## Warning in knearneigh(lwd, k = 8): knearneigh: identical points found
moran.test(lwd$lmResids, nb2listw(r))
##
## Moran I test under randomisation
##
## data: lwd$lmResids
## weights: nb2listw(r)
##
## Moran I statistic standard deviate = 3.7155, p-value = 0.0001014
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.122063400 -0.005747126 0.001183325
It looks like there is a small amount of spatial autocorrelation in my residuals (0.122)
residsI <- spline.correlog(x=coordinates(lwd)[,1], y=coordinates(lwd)[,2],z=lwd$lmResids, resamp=100, quiet=TRUE)
plot(residsI)
This shows there is spatial autocorrelation in the residuals out to distances of ~200 meters, although it is pretty low. I don’t think I need to run a gls model, I’m going to stick with OLS and call it good.
lmResidsVar<-variogram(lmResids~1,data=lwd)
plot(lmResidsVar)
sph.model<-vgm(psill=1500, model="Sph",range=500, nugget=600)
sph.fit<-fit.variogram(object=lmResidsVar,model=sph.model)
plot(lmResidsVar,model=sph.fit)
It looks like my residuals are spatially autocorrelated out to distances of ~200 meters, although this Moran’s I value is fairly low (0.12).
e <- extent(lwd)
lwd.grid <- raster(e)
proj4string(lwd.grid) <- proj4string(lwd)
res(lwd.grid) <- c(20,20)
lwd.grid <- as(lwd.grid,"SpatialPointsDataFrame")
plot(lwd.grid,col="grey")
points(lwd,"Num_Scat",pch=20,col="red")
lwdIDW1 <- idw(formula=lwd$Num_Scat~1,locations=lwd,newdata=lwd.grid,idp=0.25)
## [inverse distance weighted interpolation]
lwdIDW2 <- idw(formula=lwd$Num_Scat~1,locations=lwd,newdata=lwd.grid,idp=0.5)
## [inverse distance weighted interpolation]
lwdIDW3 <- idw(formula=lwd$Num_Scat~1,locations=lwd,newdata=lwd.grid,idp=1.0)
## [inverse distance weighted interpolation]
par(mfrow=c(1,3))
spplot(lwdIDW1,"var1.pred", main="0.25")
spplot(lwdIDW2,"var1.pred", main="0.5")
spplot(lwdIDW3,"var1.pred", main="1.0")
I interpolated scat abundance using three different values of power (0.25, 0.5, and 1.0). The lower values indicate that more weight was given to distant points, which improves the amount of interpolation, though how much error is introduced is unknown. I will err on the conservative end and use a median value of 0.5. At that power, There is definitely a lack of information in the dark blue circle at the bottom right and lots of interpolation occurring at the upper left, with median values in purple.
scatvar <- variogram(Num_Scat~1, lwd)
plot(scatvar,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Scats per m2")
I’ll try it with an exponential model fit…
scat.var.cropped <- variogram(Num_Scat~1,lwd)
exp.model <- vgm(psill=2000, model="Exp", range=1000, nugget=900)
exp.fit <- fit.variogram(object=scat.var.cropped, model=exp.model)
plot(scat.var.cropped, pch=20, cex=1.5, col="black", model=exp.fit)
Now for the spherical model…
scat.var.cropped2 <- variogram(Num_Scat~1,lwd)
sph.model <- vgm(psill=2000, model="Sph", range=1000, nugget=900)
sph.fit <- fit.variogram(object=scat.var.cropped2, model=sph.model)
plot(scat.var.cropped2, pch=20, cex=1.5, col="black", model=sph.fit)
I think either model is appropriate, but because the spherical model does a better job of capturing the sill, I’ll go with that model. Let’s go ahead and krige the spherical model using automatic kriging.
scatKrig <- autoKrige(Num_Scat~1, lwd, lwd.grid)
## Warning in autoKrige(Num_Scat ~ 1, lwd, lwd.grid): Removed 10 duplicate
## observation(s) in input_data:
## coordinates Tsect_Meter log_scat Num_Scat LWD_length LWD_diam
## 9 (454776, 5315797) 136.0 1.2787536 19 16.00 0.54
## 16 (454693, 5315695) 14.3 1.5910646 39 9.60 0.47
## 36 (454776, 5315477) 107.0 0.0000000 1 5.43 0.64
## 53 (454678, 5315186) 26.5 1.5314789 34 16.50 0.25
## 80 (455089, 5314852) 359.0 0.0000000 1 2.43 0.50
## 78 (455090, 5314851) 357.3 0.3010300 2 1.94 0.38
## 78.1 (455090, 5314851) 357.3 0.3010300 2 1.94 0.38
## 82 (455090, 5314851) 360.0 1.0791812 12 8.77 0.29
## 133 (455555, 5314168) 97.0 1.0791812 12 4.35 0.21
## 167 (455416, 5314570) 88.0 0.0000000 1 11.03 0.15
## 10 (454776, 5315797) 136.5 2.2479733 177 21.00 0.35
## 17 (454693, 5315695) 14.5 2.0969100 125 8.95 0.22
## 37 (454776, 5315477) 107.2 0.0000000 0 5.78 0.36
## 56 (454678, 5315186) 30.0 0.6020600 4 7.92 0.36
## 81 (455089, 5314852) 359.1 0.0000000 1 3.50 0.13
## 82.1 (455090, 5314851) 360.0 1.0791812 12 8.77 0.29
## 83 (455090, 5314851) 360.1 0.9542425 9 10.53 0.34
## 83.1 (455090, 5314851) 360.1 0.9542425 9 10.53 0.34
## 134 (455555, 5314168) 97.5 1.4471580 28 14.50 0.43
## 168 (455416, 5314570) 88.5 1.3010300 20 25.50 0.49
## LWD_height Radius LWD_AREA lmResids
## 9 0.55 0.270 4.75 -9.961230
## 16 1.34 0.235 6.05 -7.118164
## 36 0.69 0.320 2.40 -26.477500
## 53 0.65 0.125 2.68 3.588287
## 80 0.86 0.250 1.04 -17.040772
## 78 1.29 0.190 0.95 -22.653237
## 78.1 1.29 0.190 0.95 -22.653237
## 82 0.89 0.145 2.26 -8.399554
## 133 0.61 0.105 0.56 13.520498
## 167 0.85 0.075 1.41 -7.035709
## 10 2.17 0.175 15.95 104.872733
## 17 1.89 0.110 3.72 73.863960
## 37 0.80 0.180 1.66 -28.050481
## 56 0.51 0.180 1.45 -22.269423
## 81 1.02 0.065 0.46 -18.654240
## 82.1 0.89 0.145 2.26 -8.399554
## 83 0.72 0.170 2.58 -9.219683
## 83.1 0.72 0.170 2.58 -9.219683
## 134 1.48 0.215 9.23 2.166077
## 168 2.29 0.245 28.61 -53.505403
## Warning in autofitVariogram(formula, data_variogram, model = model, kappa = kappa, : Some models where removed for being either NULL or having a negative sill/range/nugget,
## set verbose == TRUE for more information
## [using ordinary kriging]
plot(scatKrig)
Okay, there is obviously a LOT of error (all the dark purple areas) in the plot due to how much of the lakebed was un-sampled. The prediction values look so-so, there are a few small clusters in the north and south that have good prediciton values. This looks comparable to the IDW plot (power=1).
g<-gstat(NULL,"Num_Scat",Num_Scat~1,lwd)
g<-gstat(g,"LWD_length",LWD_length~1,lwd)
g<-gstat(g,"LWD_height",LWD_height~1,lwd)
g<-gstat(g,"LWD_diam",LWD_diam~1,lwd)
g<-gstat(g, "LWD_AREA",LWD_AREA~1,lwd)
g
## data:
## Num_Scat : formula = Num_Scat`~`1 ; data dim = 175 x 9
## LWD_length : formula = LWD_length`~`1 ; data dim = 175 x 9
## LWD_height : formula = LWD_height`~`1 ; data dim = 175 x 9
## LWD_diam : formula = LWD_diam`~`1 ; data dim = 175 x 9
## LWD_AREA : formula = LWD_AREA`~`1 ; data dim = 175 x 9
vm<-variogram(g)
vm.fit<-fit.lmc(vm,g,vgm(1,"Sph",800,1))
plot(vm,vm.fit)
These plots show low correlations between my variables. I want to back that up with some numbers…
cor(as.data.frame(lwd)[c("Num_Scat", "LWD_length","LWD_height","LWD_diam","LWD_AREA")])
## Num_Scat LWD_length LWD_height LWD_diam LWD_AREA
## Num_Scat 1.0000000 0.365522800 0.3687812 0.324734004 0.4536581
## LWD_length 0.3655228 1.000000000 0.2446943 0.004396164 0.7262889
## LWD_height 0.3687812 0.244694296 1.0000000 0.291773792 0.5974930
## LWD_diam 0.3247340 0.004396164 0.2917738 1.000000000 0.3403484
## LWD_AREA 0.4536581 0.726288873 0.5974930 0.340348378 1.0000000
This tells me that scat abundance is most correlated with LWD area (0.45), but this is not exactly a strong association. Because I don’t have strong correlations between my variables, I don’t have a reason to continue with cokriging.
My findings support my hypotheses 1)The OLS model selection showed that height was a good predictor of scat abundance–therefore,scats are most abundant on LWD with tall perches. 2)Scats are more abundant on LWD closer to the vegetated margins (along the edges) of the terraces. Mapping my data showed me this.