Here is some information regarding the {spmodel} package.
“spmodel is an R package used to fit, summarize, and predict for a variety spatial statistical models applied to point-referenced or areal (lattice) data. Parameters are estimated using various methods, including likelihood-based optimization and weighted least squares based on variograms. Additional modeling features include anisotropy, non-spatial random effects, partition factors, big data approaches, and more. Model-fit statistics are used to summarize, visualize, and compare models. Predictions at unobserved locations are readily obtainable.” https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0282524
Here is a quick tutorial/presentation on how to use the {spmodel} package in R. I hope you like it!
setwd("~/Documents/Spatial statistical modeling and prediction using the spmodel package in R")
# install.packages(c("tidyverse","spmodel","sf","mapview"))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(spmodel)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(mapview)
# Import data
usa_pop_2020_od_deaths_2021<- read.csv("usa_state_pop_2020_od_deaths_2021.csv")
head(usa_pop_2020_od_deaths_2021)
## usa_state_longitude usa_state_latitude usa_state od_deaths_2021 pop_2020
## 1 -154.49306 63.58875 Alaska 260 733391
## 2 -86.90230 32.31823 Alabama 1408 5024279
## 3 -91.83183 35.20105 Arkansas 637 3011524
## 4 -111.09373 34.04893 Arizona 2730 7151502
## 5 -119.41793 36.77826 California 10901 39538223
## 6 -105.78207 39.55005 Colorado 1887 5773714
usa_pop20_oddeaths21_sf<- st_as_sf(usa_pop_2020_od_deaths_2021, coords = c("usa_state_longitude","usa_state_latitude"), crs = 4326)
st_crs(usa_pop20_oddeaths21_sf)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
head(usa_pop20_oddeaths21_sf)
## Simple feature collection with 6 features and 3 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -154.4931 ymin: 32.31823 xmax: -86.9023 ymax: 63.58875
## Geodetic CRS: WGS 84
## usa_state od_deaths_2021 pop_2020 geometry
## 1 Alaska 260 733391 POINT (-154.4931 63.58875)
## 2 Alabama 1408 5024279 POINT (-86.9023 32.31823)
## 3 Arkansas 637 3011524 POINT (-91.83183 35.20105)
## 4 Arizona 2730 7151502 POINT (-111.0937 34.04893)
## 5 California 10901 39538223 POINT (-119.4179 36.77826)
## 6 Colorado 1887 5773714 POINT (-105.7821 39.55005)
library(mapview)
mapview(usa_pop20_oddeaths21_sf, zcol = "od_deaths_2021",
layer.name = "Number of Overdose Deaths 2021")
Note. For the spmod model, 88% of the variability in od_deaths_2021 is explained by pop_2020.
# Remove scientific notation
options(scipen=999)
spmod <- splm(od_deaths_2021 ~ pop_2020, usa_pop20_oddeaths21_sf, spcov_type = "exponential")
summary(spmod)
##
## Call:
## splm(formula = od_deaths_2021 ~ pop_2020, data = usa_pop20_oddeaths21_sf,
## spcov_type = "exponential")
##
## Residuals:
## Min 1Q Median 3Q Max
## -3001.202 -288.229 6.628 422.130 2047.811
##
## Coefficients (fixed):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 195.59581230 222.99595596 0.877 0.38
## pop_2020 0.00026727 0.00001393 19.192 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pseudo R-squared: 0.8805
##
## Coefficients (exponential spatial covariance):
## de ie range
## 279957.258 346065.108 6.739
Suppose we want to quantify the difference in model quality between the spatial model and a non-spatial model using the AIC and AICc criteria.
Note. For the lmod model, roughly 87% of the variability in od_deaths_2021 is explained by pop_2020.
lmod <- splm(od_deaths_2021 ~ pop_2020, usa_pop20_oddeaths21_sf, spcov_type = "none")
summary(lmod)
##
## Call:
## splm(formula = od_deaths_2021 ~ pop_2020, data = usa_pop20_oddeaths21_sf,
## spcov_type = "none")
##
## Residuals:
## Min 1Q Median 3Q Max
## -3222.4 -416.9 -139.0 284.5 1885.3
##
## Coefficients (fixed):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 318.17122937 146.04631862 2.179 0.0294 *
## pop_2020 0.00027065 0.00001503 18.005 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pseudo R-squared: 0.8664
##
## Coefficients (none spatial covariance):
## ie
## 622273
AIC(spmod, lmod)
## df AIC
## spmod 3 847.0740
## lmod 1 851.0316
AICc(spmod, lmod)
## df AICc
## spmod 3 847.5740
## lmod 1 851.1116
Note. The noticeably lower mean-squared-prediction error (MSPE) of the spatial model indicates that it is a better fit to the data than the non-spatial model.
loocv(spmod)
## # A tibble: 1 × 4
## bias MSPE RMSPE cor2
## <dbl> <dbl> <dbl> <dbl>
## 1 -14.4 651809. 807. 0.856
loocv(lmod)
## # A tibble: 1 × 4
## bias MSPE RMSPE cor2
## <dbl> <dbl> <dbl> <dbl>
## 1 -13.4 747788. 865. 0.835
In this section, we show how to use predict() to perform spatial prediction (also called Kriging) in spmodel.
usa_pop20_od_deaths21_points_preds<- read.csv("usa_state_pop_2020_od_deaths_2021.csv")
head(usa_pop20_od_deaths21_points_preds)
## usa_state_longitude usa_state_latitude usa_state od_deaths_2021 pop_2020
## 1 -154.49306 63.58875 Alaska 260 733391
## 2 -86.90230 32.31823 Alabama 1408 5024279
## 3 -91.83183 35.20105 Arkansas 637 3011524
## 4 -111.09373 34.04893 Arizona 2730 7151502
## 5 -119.41793 36.77826 California 10901 39538223
## 6 -105.78207 39.55005 Colorado 1887 5773714
usa_pop20_od_deaths21_points_preds_sf<- st_as_sf(usa_pop20_od_deaths21_points_preds, coords = c("usa_state_longitude","usa_state_latitude"), crs = 4326)
st_crs(usa_pop20_od_deaths21_points_preds_sf)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
head(usa_pop20_od_deaths21_points_preds_sf)
## Simple feature collection with 6 features and 3 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -154.4931 ymin: 32.31823 xmax: -86.9023 ymax: 63.58875
## Geodetic CRS: WGS 84
## usa_state od_deaths_2021 pop_2020 geometry
## 1 Alaska 260 733391 POINT (-154.4931 63.58875)
## 2 Alabama 1408 5024279 POINT (-86.9023 32.31823)
## 3 Arkansas 637 3011524 POINT (-91.83183 35.20105)
## 4 Arizona 2730 7151502 POINT (-111.0937 34.04893)
## 5 California 10901 39538223 POINT (-119.4179 36.77826)
## 6 Colorado 1887 5773714 POINT (-105.7821 39.55005)
usa_pop20_od_deaths21_points_preds_sf$od_predictions_2022 <- predict(spmod, newdata = usa_pop20_od_deaths21_points_preds_sf)
head(usa_pop20_od_deaths21_points_preds_sf)
## Simple feature collection with 6 features and 4 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -154.4931 ymin: 32.31823 xmax: -86.9023 ymax: 63.58875
## Geodetic CRS: WGS 84
## usa_state od_deaths_2021 pop_2020 geometry
## 1 Alaska 260 733391 POINT (-154.4931 63.58875)
## 2 Alabama 1408 5024279 POINT (-86.9023 32.31823)
## 3 Arkansas 637 3011524 POINT (-91.83183 35.20105)
## 4 Arizona 2730 7151502 POINT (-111.0937 34.04893)
## 5 California 10901 39538223 POINT (-119.4179 36.77826)
## 6 Colorado 1887 5773714 POINT (-105.7821 39.55005)
## od_predictions_2022
## 1 332.5419
## 2 1782.3476
## 3 995.5905
## 4 2272.1104
## 5 10799.5643
## 6 1657.3861
library(mapview)
mapview(usa_pop20_od_deaths21_points_preds_sf, zcol = "od_predictions_2022",
layer.name = "Number of Predictived Overdose Deaths in 2022")
Special thanks to the authors/developers for this awesome R package: https://cran.r-project.org/web/packages/spmodel/index.html
Disclaimer. All presentation contents are the responsibility of the author and do not represent the official views of any organization.