Install Packages

if (!require("rspat")) remotes::install_github("rspatial/rspat") ###Loading required package: rspat ###Loading required package: terra ###terra 1.7.62 
## Loading required package: rspat
## Loading required package: terra
## terra 1.7.83
if (!require("predicts")) install.packages("predicts") ###Loading required package: predicts 
## Loading required package: predicts
if (!require("geodata")) install.packages("geodata") ###Loading required package: geodata 
## Loading required package: geodata

call terra and rspat package

library(terra)     # Spatial data analysis
library(rspat)     # Spatial distribution modeling
bf <- spat_data("bigfoot")  # Load dataset for analysis

Observation

dim(bf)   # Get data dimensions
## [1] 3092    3
head(bf)  # Preview first few rows of data
##         lon      lat Class
## 1 -142.9000 61.50000     A
## 2 -132.7982 55.18720     A
## 3 -132.8202 55.20350     A
## 4 -141.5667 62.93750     A
## 5 -149.7853 61.05950     A
## 6 -141.3165 62.77335     A

Plot: Species Occurrence Points

plot(bf[,1:2], cex=0.5, col="red")  # Plot species occurrence points
library(geodata)                    # Load geodata
wrld <- geodata::world(path=".")    # Load world boundaries
bnds <- wrld[wrld$NAME_0 %in% 
               c("Canada", "Mexico", "United States"), ] 
lines(bnds)                         # Overlay boundaries

- Purpose: Visualize species occurrence in specific regions. - Interpretation: Map with red points showing occurrences and boundaries for selected countries. - Use: Explore spatial distribution and regional context of data.

Visualization: Climate Data

wc <- geodata::worldclim_global("bio", res=10, ".")   # Load climate data
plot(wc[[c(1, 12)]], nr=2)                           # Plot temperature and precipitation

- Purpose: Visualize global climate data. - Interpretation: Displays two climate variables (temperature and precipitation) globally. - Use: Helps understand environmental variables relevant to species distribution.

Climate Data

bfc <- extract(wc, bf[,1:2])  # Extract climate data for species occurrence points
head(bfc, 3)                  # Preview the first 3 rows
##   ID wc2.1_10m_bio_1 wc2.1_10m_bio_2 wc2.1_10m_bio_3 wc2.1_10m_bio_4
## 1  1       -1.832979       12.504708        28.95899       1152.4308
## 2  2        6.360650        5.865935        32.27475        462.5731
## 3  3        6.360650        5.865935        32.27475        462.5731
##   wc2.1_10m_bio_5 wc2.1_10m_bio_6 wc2.1_10m_bio_7 wc2.1_10m_bio_8
## 1        20.34075      -22.840000        43.18075        5.327750
## 2        16.65505       -1.519947        18.17500        3.964495
## 3        16.65505       -1.519947        18.17500        3.964495
##   wc2.1_10m_bio_9 wc2.1_10m_bio_10 wc2.1_10m_bio_11 wc2.1_10m_bio_12
## 1      -0.6887083         11.80792       -16.038542              991
## 2      10.4428196         12.28183         1.467686             3079
## 3      10.4428196         12.28183         1.467686             3079
##   wc2.1_10m_bio_13 wc2.1_10m_bio_14 wc2.1_10m_bio_15 wc2.1_10m_bio_16
## 1              120               42         31.32536              337
## 2              448              141         35.27518             1127
## 3              448              141         35.27518             1127
##   wc2.1_10m_bio_17 wc2.1_10m_bio_18 wc2.1_10m_bio_19
## 1              157              288              216
## 2              468              630              873
## 3              468              630              873
  • Extract climate data (e.g., temperature, precipitation) at the locations of species occurrences.

Remove the First Column with the ID

bfc <- bfc[,-1]  # Remove the ID column

Plot the Species’ Distribution

plot(bfc[ ,"wc2.1_10m_bio_1"], bfc[, "wc2.1_10m_bio_12"], col="red", xlab="Annual mean temperature (.C)", ylab="Annual precipitation (mm)") 

  • Purpose: Visualize the relationship between two key climate variables.
  • Interpretation: A scatter plot of annual mean temperature vs. precipitation for species occurrences.
  • Use: Explore the climate preferences or niches of the species.

Background data

ext_bf <- ext(vect(bf[, 1:2])) + 1  # Create an extended spatial extent around occurrence points
ext_bf                             # Display the extent 
## SpatExtent : -157.75, -63.4627, 24.141, 70.5 (xmin, xmax, ymin, ymax)

Take 5000 Random Samples

set.seed(0) 
window(wc) <- ext_bf 
bg <- spatSample(wc, 5000, "random", na.rm=TRUE, xy=TRUE) 
head(bg) 
##           x        y wc2.1_10m_bio_1 wc2.1_10m_bio_2 wc2.1_10m_bio_3
## 1  -99.2500 66.75000     -13.2934895        7.870646        14.96619
## 2 -106.0833 42.08333       5.6722708       14.530958        36.82943
## 3 -111.9167 46.58333       6.7605939       14.135854        35.23372
## 4 -106.9167 54.75000       0.4086979       11.528605        24.43290
## 5 -118.2500 67.08333      -9.1363859        8.185354        16.34505
## 6 -111.2500 38.91667       8.4194584       15.997125        38.84047
##   wc2.1_10m_bio_4 wc2.1_10m_bio_5 wc2.1_10m_bio_6 wc2.1_10m_bio_7
## 1       1638.6833        15.42850       -37.16100        52.58950
## 2        894.3715        27.86600       -11.58875        39.45475
## 3        927.7927        28.14375       -11.97650        40.12025
## 4       1290.1088        22.55225       -24.63250        47.18475
## 5       1567.0846        17.46575       -32.61275        50.07850
## 6        904.0610        30.49050       -10.69625        41.18675
##   wc2.1_10m_bio_8 wc2.1_10m_bio_9 wc2.1_10m_bio_10 wc2.1_10m_bio_11
## 1        6.484917      -31.617332         7.518209        -31.76942
## 2        9.226916       -4.839750        17.168291         -4.83975
## 3       15.638333       -4.921750        18.186209         -4.92175
## 4       15.417084      -13.864500        15.417084        -16.31392
## 5        8.609292      -21.353209        10.573625        -27.49783
## 6       19.076958        3.179209        19.812834         -2.50475
##   wc2.1_10m_bio_12 wc2.1_10m_bio_13 wc2.1_10m_bio_14 wc2.1_10m_bio_15
## 1              171               33                4         70.29919
## 2              288               42               13         38.78144
## 3              293               48                9         53.40759
## 4              471               86               16         58.32499
## 5              223               43                7         61.21693
## 6              228               28               11         32.40370
##   wc2.1_10m_bio_16 wc2.1_10m_bio_17 wc2.1_10m_bio_18 wc2.1_10m_bio_19
## 1               90               13               78               13
## 2              112               41               90               41
## 3              129               35              115               35
## 4              220               53              220               56
## 5              108               27               93               29
## 6               83               40               72               44
  • Purpose: Generate random background points from the climate data within the extent.
  • Use: Serves as control data for comparison with species occurrences in modeling.

Plot spatSample

plot(bg[, c("x", "y")]) 

- Purpose: Visualize the distribution of randomly sampled points. - Interpretation: A scatter plot showing the locations of sampled points. - Use: Confirms spatial coverage and distribution of background data.

Data

head(bg)
##           x        y wc2.1_10m_bio_1 wc2.1_10m_bio_2 wc2.1_10m_bio_3
## 1  -99.2500 66.75000     -13.2934895        7.870646        14.96619
## 2 -106.0833 42.08333       5.6722708       14.530958        36.82943
## 3 -111.9167 46.58333       6.7605939       14.135854        35.23372
## 4 -106.9167 54.75000       0.4086979       11.528605        24.43290
## 5 -118.2500 67.08333      -9.1363859        8.185354        16.34505
## 6 -111.2500 38.91667       8.4194584       15.997125        38.84047
##   wc2.1_10m_bio_4 wc2.1_10m_bio_5 wc2.1_10m_bio_6 wc2.1_10m_bio_7
## 1       1638.6833        15.42850       -37.16100        52.58950
## 2        894.3715        27.86600       -11.58875        39.45475
## 3        927.7927        28.14375       -11.97650        40.12025
## 4       1290.1088        22.55225       -24.63250        47.18475
## 5       1567.0846        17.46575       -32.61275        50.07850
## 6        904.0610        30.49050       -10.69625        41.18675
##   wc2.1_10m_bio_8 wc2.1_10m_bio_9 wc2.1_10m_bio_10 wc2.1_10m_bio_11
## 1        6.484917      -31.617332         7.518209        -31.76942
## 2        9.226916       -4.839750        17.168291         -4.83975
## 3       15.638333       -4.921750        18.186209         -4.92175
## 4       15.417084      -13.864500        15.417084        -16.31392
## 5        8.609292      -21.353209        10.573625        -27.49783
## 6       19.076958        3.179209        19.812834         -2.50475
##   wc2.1_10m_bio_12 wc2.1_10m_bio_13 wc2.1_10m_bio_14 wc2.1_10m_bio_15
## 1              171               33                4         70.29919
## 2              288               42               13         38.78144
## 3              293               48                9         53.40759
## 4              471               86               16         58.32499
## 5              223               43                7         61.21693
## 6              228               28               11         32.40370
##   wc2.1_10m_bio_16 wc2.1_10m_bio_17 wc2.1_10m_bio_18 wc2.1_10m_bio_19
## 1               90               13               78               13
## 2              112               41               90               41
## 3              129               35              115               35
## 4              220               53              220               56
## 5              108               27               93               29
## 6               83               40               72               44
bg <- bg[, -c(1:2)]  # Remove coordinate columns (x, y) from the background data
head(bg)
##   wc2.1_10m_bio_1 wc2.1_10m_bio_2 wc2.1_10m_bio_3 wc2.1_10m_bio_4
## 1     -13.2934895        7.870646        14.96619       1638.6833
## 2       5.6722708       14.530958        36.82943        894.3715
## 3       6.7605939       14.135854        35.23372        927.7927
## 4       0.4086979       11.528605        24.43290       1290.1088
## 5      -9.1363859        8.185354        16.34505       1567.0846
## 6       8.4194584       15.997125        38.84047        904.0610
##   wc2.1_10m_bio_5 wc2.1_10m_bio_6 wc2.1_10m_bio_7 wc2.1_10m_bio_8
## 1        15.42850       -37.16100        52.58950        6.484917
## 2        27.86600       -11.58875        39.45475        9.226916
## 3        28.14375       -11.97650        40.12025       15.638333
## 4        22.55225       -24.63250        47.18475       15.417084
## 5        17.46575       -32.61275        50.07850        8.609292
## 6        30.49050       -10.69625        41.18675       19.076958
##   wc2.1_10m_bio_9 wc2.1_10m_bio_10 wc2.1_10m_bio_11 wc2.1_10m_bio_12
## 1      -31.617332         7.518209        -31.76942              171
## 2       -4.839750        17.168291         -4.83975              288
## 3       -4.921750        18.186209         -4.92175              293
## 4      -13.864500        15.417084        -16.31392              471
## 5      -21.353209        10.573625        -27.49783              223
## 6        3.179209        19.812834         -2.50475              228
##   wc2.1_10m_bio_13 wc2.1_10m_bio_14 wc2.1_10m_bio_15 wc2.1_10m_bio_16
## 1               33                4         70.29919               90
## 2               42               13         38.78144              112
## 3               48                9         53.40759              129
## 4               86               16         58.32499              220
## 5               43                7         61.21693              108
## 6               28               11         32.40370               83
##   wc2.1_10m_bio_17 wc2.1_10m_bio_18 wc2.1_10m_bio_19
## 1               13               78               13
## 2               41               90               41
## 3               35              115               35
## 4               53              220               56
## 5               27               93               29
## 6               40               72               44

Plot Data

plot(bg[,1], bg[,12],              # Create a scatter plot 
     xlab="Annual mean temperature (°C)",  # Label x as "Annual mean temperature (°C)".
     ylab="Annual precipitation (mm)",    # Label y as "Annual precipitation (mm)".
     cex=.8)                              # Adjust the size of the points to be slightly smaller (scale = 0.8).

points(bfc[,1], bfc[,12],          # Overlay points from bfc dataset with x = bfc[,1] and y = bfc[,12].
       col="red",                  # Use red color for these points.
       cex=.6,                     # Make these points smaller (scale = 0.6).
       pch="+")                   # Use a "+" shape for these points.

legend("topleft",                  # Place the legend at the top-left corner of the plot.
       c("observed", "background"), # Add labels: "observed" and "background".
       col=c("red", "black"),      # Use red for "observed" and black for "background".
       pch=c("+", "o"),            # Match point shapes: "+" for "observed" and "o" for "background".
       pt.cex=c(.6, .8))           # Match point sizes: 0.6 for "observed" and 0.8 for "background".

  • Purpose: Compare observed species data with background data for key climate variables.
  • Interpretation: Black circles: Background points showing random climate data. Red plus signs: Observed species locations with climate values.
  • Use: Visually assess differences between observed and background climate conditions.

East vs West

#eastern points 
bfe <- bfc[bf[,1] > -102, ]  # Select eastern species points (longitude > -102)
#western points 
bfw <- bfc[bf[,1] <= -102, ] # Select western species points (longitude <= -102)
dw <- rbind(cbind(pa=1, bfw), cbind(pa=0, bg)) # Combine western points (presence) with background (absence)
de <- rbind(cbind(pa=1, bfe), cbind(pa=0, bg)) # Combine eastern points (presence) with background (absence)
dw <- data.frame(dw)            # Convert western data to a data frame
de <- data.frame(na.omit(de))   # Convert eastern data to a data frame and remove missing values
dim(dw)                         # Output dimensions of western dataset
## [1] 6224   20
dim(de)                         # Output dimensions of eastern dataset
## [1] 6866   20
  • Purpose: Divide the species occurrence data into eastern and western subsets for regional analysis.
  • Interpretation: dw: Western dataset with 6224 rows and 20 columns (presence + background). de: Eastern dataset with 6866 rows and 20 columns (presence + background).
  • Use: Enables separate species distribution modeling for the two regions.

Fit a Model (CART)

library(rpart)  # Load the rpart library for CART modeling.
cart <- rpart(pa ~ ., data = dw)  # Fit a classification and regression tree (CART) model to the western dataset (dw).

Complexity Parameter

printcp(cart)  # Print the complexity parameter table for the CART model.
## 
## Regression tree:
## rpart(formula = pa ~ ., data = dw)
## 
## Variables actually used in tree construction:
## [1] wc2.1_10m_bio_10 wc2.1_10m_bio_12 wc2.1_10m_bio_14 wc2.1_10m_bio_18
## [5] wc2.1_10m_bio_19 wc2.1_10m_bio_3  wc2.1_10m_bio_4 
## 
## Root node error: 983.29/6224 = 0.15798
## 
## n= 6224 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.322797      0   1.00000 1.00019 0.019351
## 2 0.080521      1   0.67720 0.68771 0.019687
## 3 0.073325      2   0.59668 0.59759 0.015480
## 4 0.068645      3   0.52336 0.52163 0.015231
## 5 0.027920      4   0.45471 0.47026 0.014758
## 6 0.014907      5   0.42679 0.44778 0.015136
## 7 0.010869      6   0.41188 0.44082 0.015406
## 8 0.010197      7   0.40102 0.43500 0.015330
## 9 0.010000      8   0.39082 0.43197 0.015319
  • Purpose: Assess the complexity parameter (cp) values for model pruning and performance.
  • Interpretation: Columns: CP: Complexity parameter controlling tree size. nsplit: Number of splits in the tree. xerror: Cross-validated error for each model. Smaller xerror indicates better model performance.
  • Use: Select an optimal cp value to avoid overfitting.

Plot Complexity Parameter

plotcp(cart)  # Plot cross-validation error against the complexity parameter (cp). 

  • Purpose: Visualize the relationship between model complexity and performance.
  • Interpretation: X-axis: Complexity parameter (cp). Y-axis: Cross-validated error (xerror). Optimal cp is the one with minimal error or within 1 SE of the minimum.
  • Use: Guides pruning to simplify the decision tree.

Fit the Pruned Model

cart <- rpart(pa ~ ., data = dw, cp = 0.02)  # Refit CART using the chosen complexity parameter (cp = 0.02).
  • Purpose: Refine the decision tree using the selected cp to balance accuracy and simplicity.
  • Interpretation: A pruned CART model with reduced splits and better generalization.
  • Use: Final model for prediction and interpretation.

Plot the Regression Tree

library(rpart.plot)  # Load library for visualizing decision trees.
rpart.plot(cart, uniform = TRUE, main = "Regression Tree")  # Plot the pruned regression tree.

  • Purpose: Visualize the decision tree structure.
  • Interpretation: -Nodes: Splits based on predictor thresholds. -Leaf nodes: Final predictions for presence/absence.
  • Uniform layout ensures consistent spacing for better readability.
  • Use: Understand model logic and key variables.

Use the Model to Predict Suitability

x <- predict(wc, cart)  # Predict species suitability using the CART model.
x <- mask(x, wc[[1]])   # Mask predictions to limit them to the input climate data region.
x <- round(x, 2)        # Round predictions to two decimal places for clarity.
plot(x, type = "class", plg = list(x = "bottomleft"))  # Visualize predicted suitability.

  • Purpose: Predict and map the habitat suitability for the species based on climate data.
  • Interpretation:
  • Map visualization: – Shows areas classified as suitable or unsuitable for the species. – Classification is based on climate thresholds identified by the CART model.
  • Use: Analyze spatial patterns of habitat suitability for conservation planning.

Random Forest

Test and Train Data

set.seed(123)  # Set random seed for reproducibility.
i <- sample(nrow(dw), 0.2 * nrow(dw))  # Randomly sample 20% of data for the test set.
test <- dw[i,]  # Assign 20% of data to the test set.
train <- dw[-i,]  # Use remaining 80% as the training set.
fpa <- as.factor(train[, 'pa'])  # Convert species presence/absence (pa) to a factor for classification.
  • Purpose: Split the dataset into training and testing sets, ensuring reproducibility by setting a random seed.
  • Interpretation: train contains 80% of the data for model fitting. test contains 20% of the data for model validation. fpa ensures that the response variable (pa) is treated as a categorical factor.
  • Use: Training set used to train the model, test set for evaluating model performance.

RandomForest model

library(randomForest)  # Load the randomForest package.
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
crf <- randomForest(train[, 2:ncol(train)], fpa)  # Fit a Random Forest model with the training data.
crf  # Display model summary.
## 
## Call:
##  randomForest(x = train[, 2:ncol(train)], y = fpa) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 4
## 
##         OOB estimate of  error rate: 7.19%
## Confusion matrix:
##      0   1 class.error
## 0 3832 165  0.04128096
## 1  193 790  0.19633774

- *Purpose:* Fit a Random Forest model using the training data (train) to predict species presence/absence (fpa). - *Interpretation:* -Model output (crf) includes: –OOB error rate: The error rate on the out-of-bag samples (used for model validation). –Variable importance: Ranking of features by their contribution to prediction accuracy. -Random Forest: Ensemble of decision trees, improving accuracy by averaging over multiple trees. - *Use:* Understand model performance, including accuracy and variable importance.

The output shows a Random Forest classification model with 500 trees and a selection of 4 variables per split. The out-of-bag (OOB) error rate is 7.19%, indicating the model misclassifies about 7.19% of unseen data. The confusion matrix shows the model’s performance with 3832 true negatives, 165 false positives, 193 false negatives, and 790 true positives. The class error for predicting 0 is 4.13%, and for predicting 1 is 19.63%. These values highlight the model’s accuracy and misclassification tendencies.

Variable Importance Plot

varImpPlot(crf) # Plot the importance of variables in predicting species presence.

  • Purpose: Visualize the relative importance of predictor variables in the model.
  • Interpretation:
  • X-axis: Variable importance score (higher means more influential).
  • Y-axis: Variables used in the model.
  • Use: Identify the most important predictors that influence species presence.

Use Regression (Tune a Parameter)

trf <- tuneRF(train[, 2:ncol(train)], train[, "pa"])  # Tune the Random Forest model to find optimal parameters.
## Warning in randomForest.default(x, y, mtry = mtryStart, ntree = ntreeTry, : The
## response has five or fewer unique values.  Are you sure you want to do
## regression?
## mtry = 6  OOB error = 0.05594974 
## Searching left ...
## Warning in randomForest.default(x, y, mtry = mtryCur, ntree = ntreeTry, : The
## response has five or fewer unique values.  Are you sure you want to do
## regression?
## mtry = 3     OOB error = 0.05612125 
## -0.003065481 0.05 
## Searching right ...
## Warning in randomForest.default(x, y, mtry = mtryCur, ntree = ntreeTry, : The
## response has five or fewer unique values.  Are you sure you want to do
## regression?
## mtry = 12    OOB error = 0.05485775 
## 0.01951734 0.05

trf  # Display tuning results.
##    mtry   OOBError
## 3     3 0.05612125
## 6     6 0.05594974
## 12   12 0.05485775
mt <- trf[which.min(trf[,2]), 1]  # Extract the optimal `mtry` (number of variables considered at each split).
mt  # Display the optimal `mtry` value.
## [1] 12

TuneRF and Values of mt Represent

rrf <- randomForest(train[, 2:ncol(train)], train[, "pa"], mtry=mt, ntree=250) 
## Warning in randomForest.default(train[, 2:ncol(train)], train[, "pa"], mtry =
## mt, : The response has five or fewer unique values.  Are you sure you want to
## do regression?
plot(rrf) 

  • Purpose: Fit a Random Forest model with the optimal mtry value determined from the tuning step (mt), using 250 trees.
  • Interpretation:
  • rrf model is fitted with the tuned parameter (mtry) and specified number of trees (ntree = 250).
  • Plot: Displays the OOB error rate as the number of trees increases, showing the model’s convergence.
  • Use: Assess the model’s performance and convergence over different tree numbers.