Packages Used
library(tidyr)
library(ggplot2)
library(mgcv)
library(raster)
library(dplyr)
library(sf)
library(tictoc)
Load in the Data
exp_data <- read.csv("D:\\Ukraine_GradProject\\Explosion_Model\\exp_all.csv")
Vegetation Change Model
tic() # Start Time
# The Explosion Model using the bam function from the mgcv package
# The spline is created from X&Y coordinates, thin-plated spline, and a certain number of knots ("k")
# A higher "k" value will have a longer computational time than a smaller "k" value will
# Could use the gam function from the mgcv package, gam is more robust, while bam is used for large amounts of data
bamexp_75 <- bam(Slope ~ s(POINT_X, POINT_Y, bs = 'tp', k = 75) + NBR + EXP_INT + TEMP + PRCP + factor(HSG2),
family = scat(), # Uses a T-distribution
data = exp_data)
toc() # End Time
## 8900.98 sec elapsed
Checking Model Results
summary(bamexp_75) # Summary function is used to find the Estimate Coefficients, Standard Errors, and Significance
##
## Family: Scaled t(3,572114851.771)
## Link function: identity
##
## Formula:
## Slope ~ s(POINT_X, POINT_Y, bs = "tp", k = 75) + NBR + EXP_INT +
## TEMP + PRCP + factor(HSG2)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.501e+11 2.087e+08 719.24 <2e-16 ***
## NBR -5.558e+08 7.974e+02 -696955.66 <2e-16 ***
## EXP_INT -3.119e+04 2.357e+03 -13.23 <2e-16 ***
## TEMP -1.348e+10 1.392e+05 -96880.57 <2e-16 ***
## PRCP -1.537e+06 1.177e+05 -13.06 <2e-16 ***
## factor(HSG2)2 3.148e+08 1.870e+06 168.32 <2e-16 ***
## factor(HSG2)4 3.281e+08 1.890e+06 173.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(POINT_X,POINT_Y) 73.99 74 6824201 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.00118 Deviance explained = 92.9%
## fREML = 4.344e+06 Scale est. = 1 n = 2901625
gam.check(bamexp_75) # Gam.check is used to check the model for linearity, co-variance, and normally distributed residuals

##
## Method: fREML Optimizer: perf newton
## full convergence after 12 iterations.
## Gradient range [0.0006547716,0.0006547716]
## (score 4344008 & scale 1).
## Hessian positive definite, eigenvalue range [35.97556,35.97556].
## Model rank = 81 / 81
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(POINT_X,POINT_Y) 74 74 0.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Checks the k-index value for the model, closer to 1 is considered acceptable.
How to Plot the Splines
Splines show areas of high and low levels of space throughout a
study area
data75 <- exp_data %>% drop_na() # Drops any NA's and creates a new dataframe without any NA's
# Pred_spline is Added onto the new dataframe as a new column
data75$pred_spline <- predict(bamexp_75, # The predict function is used to predict what the values of the spline will be based on the values in the Explosion Model
new.data = exp_data %>% drop_na())
data75$pred_plot <- scales::squish(data75$pred_spline,
range = c(quantile(data75$pred_spline, 0.01),
quantile(data75$pred_spline, 0.99)))
ggplot(data75, aes(POINT_X, POINT_Y, fill = pred_plot)) + #Plotting each point with its predict spline value
geom_tile() +
scale_fill_viridis_c() +
coord_sf(crs = 4326) +
theme_minimal()
