Markdown Author: Jessie Bell
Download this Rmd: Top right corner → Code → Download Rmd
Let’s turn to the California precipitation data that Hijmans uses for the nearest neighbor interpolation but we will do something cooler. We will make a continuous 10x10km surface of precipitation from the 432 locations of long-term precipitation records.
#had to download the package and install as a local file:
#install.packages("rspat_1.0-1.tar.gz", repos = NULL, type="source")
# precip point data
prcpCA <- readRDS("prcpCA.rds")
# empty grid to interpolate into
gridCA <- readRDS("gridCA.rds")
# make as sf
prcpCA_sf <- prcpCA |> st_as_sf(coords = c("X", "Y")) |>
st_set_crs(value = 3310)
# simple map -- see postscript below for a fancy map
prcpCA_sf %>% ggplot() +
geom_sf(aes(fill=ANNUAL,size=ANNUAL),color="white",
shape=21,alpha=0.8) +
scale_fill_continuous(type = "viridis",name="mm") +
labs(title="Total Annual Precipitation") +
scale_size(guide="none")
idw_CA_model <- gstat(formula=ANNUAL~1,
locations=prcpCA_sf,
set=list(idp=2)) #using p of 2
idw_CA_model_1 <- gstat(formula=ANNUAL~1,
locations=prcpCA_sf,
set=list(idp=1)) #using p of 1
idw_CA_model_half <- gstat(formula=ANNUAL~1,
locations=prcpCA_sf,
set=list(idp=1)) #using p of 0.5
#turn grid into sf
gridCA_sf <- st_as_sf(gridCA,
coords = c("X","Y"),
crs = (value = 3310))
pred_IDW_CA_sf <- predict(idw_CA_model, gridCA_sf)
## [inverse distance weighted interpolation]
pred_IDW_CA_sf_1 <- predict(idw_CA_model_1, gridCA_sf)
## [inverse distance weighted interpolation]
pred_IDW_CA_sf_half <- predict(idw_CA_model_half, gridCA_sf)
## [inverse distance weighted interpolation]
SpatRastersf_CA_rast <-function(sfObject,variableIndex = 1){
# coerce sf to a data.frame
dfObject <- data.frame(st_coordinates(sfObject),
z=as.data.frame(sfObject)[,variableIndex])
# coerce data.frame to SpatRaster
rastObject <- rast(dfObject,crs=crs(sfObject))
names(rastObject) <- names(sfObject)[variableIndex]
return(rastObject)
}
IDW_CA_rast <- sf_CA_rast(pred_IDW_CA_sf)
IDW_CA_rast
## class : SpatRaster
## dimensions : 105, 91, 1 (nrow, ncol, nlyr)
## resolution : 10000, 10000 (x, y)
## extent : -371692.4, 538307.6, -599520.9, 450479.1 (xmin, xmax, ymin, ymax)
## coord. ref. : NAD83 / California Albers (EPSG:3310)
## source(s) : memory
## name : var1.pred
## min value : 62.70143
## max value : 2270.15526
IDW_CA_rast_1 <- sf_CA_rast(pred_IDW_CA_sf_1)
IDW_CA_rast_half <- sf_CA_rast(pred_IDW_CA_sf_half)
# and plot
ggplot() +
geom_spatraster(data=IDW_CA_rast, mapping = aes(fill=var1.pred),alpha=0.8) +
scale_fill_continuous(type = "viridis",name="Precipitation(mm)",na.value = "transparent") +
labs(title="ANNUAL PRECIPITATION", subtitle = "IDW with p=2") +
theme_minimal()
Make it better:
ggplot() +
geom_spatraster_contour_filled(data=IDW_CA_rast,
breaks = seq(from=0, to=2000,by=200),
alpha = 0.9) +
scale_fill_discrete(name="Precipitation (mm)",na.value = "transparent") +
labs(title="Annual Precipitation", subtitle = "IDW with p=2") +
theme_minimal()
ggplot() +
geom_spatraster_contour_filled(data=IDW_CA_rast_1,
breaks = seq(from=0, to=2000,by=200),
alpha = 0.9) +
scale_fill_discrete(name="Precipitation (mm)",na.value = "transparent") +
labs(title="Annual Precipitation", subtitle = "IDW with p=1") +
theme_minimal()
ggplot() +
geom_spatraster_contour_filled(data=IDW_CA_rast_half,
breaks = seq(from=0, to=2000,by=200),
alpha = 0.9) +
scale_fill_discrete(name="Precipitation (mm)",na.value = "transparent") +
labs(title="Annual Precipitation", subtitle = "IDW with p=0.5") +
theme_minimal()
obs <- prcpCA_sf$ANNUAL
preds <- terra::extract(IDW_CA_rast, prcpCA_sf, na.rm=T) %>% pull(var1.pred)
rsq <- cor(obs,preds,use="complete.obs")^2
rmse <- sqrt(mean((preds - obs)^2,na.rm=TRUE))
rsq
## [1] 0.9828998
rmse
## [1] 65.29779
#p=2 has highest R^2
ggplot() +
geom_abline(slope=1,intercept = 0) +
geom_point(aes(x=obs,y=preds), color="plum") +
coord_fixed(ratio=1, xlim = range(preds,obs),ylim = range(preds,obs)) +
labs(x="Observed Values",
y="Predicted Values",
title="Annual Precipitation (mm) CA")
Witholding 25% just like in the example:
n <- nrow(prcpCA_sf)
rows4test <- sample(x = 1:n,size = n*0.25)
CATest <- prcpCA_sf[rows4test,]
CATrain <- prcpCA_sf[-rows4test,]
# note that we build the model with CATrain
idw_p2_model <- gstat(formula=ANNUAL~1,
locations = CATrain,
set=list(idp = 2))
prcpIDW_p2_sf <- predict(idw_p2_model,gridCA_sf)
## [inverse distance weighted interpolation]
prcpIDW_p2_rast <- sf_CA_rast(prcpIDW_p2_sf)
# and plot
ggplot() +
geom_spatraster(data=prcpIDW_p2_rast, mapping = aes(fill=var1.pred),alpha=0.8) +
scale_fill_continuous(type = "viridis",name="Precipitation (mm)",na.value = "transparent") +
labs(title="Annual Precipitation", subtitle = "IDW with p=2") +
theme_minimal()
We built the model with the training data CATrain which
had 75% of the original data. We kept 25% of the data hidden from the
model in CATest. By comparing the predictions from the
training data to the withheld values from the testing data we can assess
the out-of-sample skill.
# note use of CATest here
obs <- CATest$ANNUAL
preds <- terra::extract(prcpIDW_p2_rast, CATest) %>% pull(var1.pred)
rsq <- cor(obs,preds,use="complete.obs")^2
rmse <- sqrt(mean((preds - obs)^2,na.rm=TRUE))
rsq
## [1] 0.7510978
rmse
## [1] 218.0212
ggplot() +
geom_abline(slope=1,intercept = 0) +
geom_point(aes(x=obs,y=preds), color="steelblue") +
coord_fixed(ratio=1, xlim = range(preds,obs),ylim = range(preds,obs)) +
labs(x="Observed Values",
y="Predicted Values",
title="Annual Precipitation (mm)")
Looking at the plot above you can see that the skill of the model still has not decreased by very much, with an \(R^2\) = 0.8198488, and RMSE = 232.5713. You can also see that both predicted and observed values fall equally above and below the line for these points.
Here is the RMSE of the NULL case where everything is predicted as the mean:
obs <- prcpCA_sf$ANNUAL
preds <- mean(prcpCA_sf$ANNUAL)
rmseNULL <- sqrt(mean((preds - obs)^2))
rmseNULL
## [1] 438.947
rmse
## [1] 218.0212
Here you can see that rmse is quite a bit lower that
rmseNULL. Now, let’s calculate the relative performance of
rmse:
1 - (rmse / rmseNULL)
## [1] 0.5033085
tmap_mode("view")
## tmap mode set to interactive viewing
#turn sf to rast
CAIDW_p2_rast <- sf_CA_rast(pred_IDW_CA_sf)
tm_shape(CAIDW_p2_rast) +
tm_raster(col="var1.pred", palette = "viridis")+
tm_shape(prcpCA_sf)+
tm_symbols(col="tomato",alpha=0.5,size = 0.5)