แบบฝึกปฏิบัตินี้เป็นการฝึกปฏิบัติการประยุกต์ใช้แบบจำลองนิเวศวิทยาถิ่นที่อยู่ (Ecological Nice Modeling: ENM) สำหรับงานระบาดวิทยา โดยข้อมูลและแนวการเขียนรหัสได้ประยุกต์มาจากเวปไซต์ Spatial Data Science with R (https://rspatial.org/raster/index.html#)

Set working directory เลือกไฟล์ที่จะเก็บข้อมูลและผลลัพธ์

setwd("/Volumes/KEERATI SSD/Keerati/SCPH/Spatial_Epidemiology")

Download package

library(maptools)
## Loading required package: sp
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)
## Warning: multiple methods tables found for 'elide'
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
## 
## Attaching package: 'maptools'
## The following object is masked from 'package:sp':
## 
##     elide
library(dismo)
## Loading required package: raster

#Occurrence Data

Import occurrence data (Bradypus Data)

bradypus<- read.csv("Data/data.csv")
head(bradypus) #ดูข้อมูล 6 แถวแรก
##   X      lon      lat
## 1 1 -65.4000 -10.3833
## 2 2 -65.3833 -10.3833
## 3 3 -65.1333 -16.8000
## 4 4 -63.6667 -17.4500
## 5 5 -63.8500 -17.4000
## 6 6 -64.4167 -16.0000

We do not need the first column

bradypus <- bradypus[,-1]
tail(bradypus) #ดูข้อมูล 6 แถวสุดท้ายถว
##          lon     lat
## 111 -65.9167  3.0833
## 112 -66.2167  5.3000
## 113 -67.6833 10.3667
## 114 -68.3833 10.5333
## 115 -66.8333 10.3667
## 116 -72.6000  9.1500

Download world map (wrld_simpl)

data(wrld_simpl)
plot(wrld_simpl, axes=TRUE, col="gray")

plot(wrld_simpl, xlim=c(-80,70), ylim=c(-60,10), axes=TRUE, col="light yellow")

# Environmental Data

Import environmental variables

path <- file.path(system.file(package="dismo"), 'ex')
files <- list.files(path, pattern='grd$', full.names=TRUE )
files
## [1] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/bio1.grd" 
## [2] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/bio12.grd"
## [3] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/bio16.grd"
## [4] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/bio17.grd"
## [5] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/bio5.grd" 
## [6] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/bio6.grd" 
## [7] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/bio7.grd" 
## [8] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/bio8.grd" 
## [9] "/Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/dismo/ex/biome.grd"

Creat set of env. variables

predictors <- stack(files[1:8])

Use the names() function to label each covariate layer

names(predictors) <- c("MeanTemp.","AnnualPrecip.","Precip.WettestQuarter","PrecipDriestQuarter","Max.Temp.","Min.Temp.","Temp.Range","MeanTemp.WettestQuarter")

Plot predictors

plot(predictors)

Overlay data

plot(predictors, 1) #เลือกปัจจัยที่ 1
# note the "add=TRUE" argument with plot
plot(wrld_simpl,add=TRUE)
points(bradypus, col='red', pch = 20)

#Background points

We use the first file to create a RasterLayer

mask <- raster(files[1])
plot(mask)

Create 500 random points

set.seed(1963) # set seed to assure that the examples will always have the same random sample.
bg <- randomPoints(mask, 500, p= bradypus) #use help(randomPoints) to explore function 

set up the plotting area for two maps

par(mfrow=c(1,2)) #สร้างแผนที่ 1 แถว 2 คอลัมม์
plot(mask)
plot(wrld_simpl,add=TRUE)
points(bg, cex=0.7, col='blue', pch = 20) #เพิ่ม bg ลงบนแผนที่
plot(mask)
plot(wrld_simpl,add=TRUE)
points(bradypus, cex=0.7, col='red', pch = 20) #เพิ่ม bradypus ลงบนแผนที่

Extracting values from rasters

presvals <- extract(predictors, bradypus) #presence values
head(presvals)
##      MeanTemp. AnnualPrecip. Precip.WettestQuarter PrecipDriestQuarter
## [1,]       263          1639                   724                  62
## [2,]       263          1639                   724                  62
## [3,]       253          3624                  1547                 373
## [4,]       243          1693                   775                 186
## [5,]       243          1693                   775                 186
## [6,]       252          2501                  1081                 280
##      Max.Temp. Min.Temp. Temp.Range MeanTemp.WettestQuarter
## [1,]       338       191        147                     261
## [2,]       338       191        147                     261
## [3,]       329       150        179                     271
## [4,]       318       150        168                     264
## [5,]       318       150        168                     264
## [6,]       326       154        172                     270
absvals<-extract(predictors, bg) #absence values
head(absvals)
##      MeanTemp. AnnualPrecip. Precip.WettestQuarter PrecipDriestQuarter
## [1,]       245          1177                   545                  77
## [2,]       258          2332                   830                 353
## [3,]       173          1402                   420                 243
## [4,]       238           606                   246                  37
## [5,]       254          2459                   957                 269
## [6,]        42           733                   373                  25
##      Max.Temp. Min.Temp. Temp.Range MeanTemp.WettestQuarter
## [1,]       329       142        186                     257
## [2,]       315       213        102                     256
## [3,]       333         5        328                     174
## [4,]       300       173        126                     240
## [5,]       319       208        111                     254
## [6,]       148      -109        257                      58

Create column for presence (1) and absence (2)

pb <- c(rep(1, nrow(presvals)), rep(0, nrow(absvals)))

Combind presvals and absvals

df<-rbind(presvals, absvals)

#combind df and pb

sdm <- data.frame(cbind(pb, df))
head(sdm); tail(sdm)
##   pb MeanTemp. AnnualPrecip. Precip.WettestQuarter PrecipDriestQuarter
## 1  1       263          1639                   724                  62
## 2  1       263          1639                   724                  62
## 3  1       253          3624                  1547                 373
## 4  1       243          1693                   775                 186
## 5  1       243          1693                   775                 186
## 6  1       252          2501                  1081                 280
##   Max.Temp. Min.Temp. Temp.Range MeanTemp.WettestQuarter
## 1       338       191        147                     261
## 2       338       191        147                     261
## 3       329       150        179                     271
## 4       318       150        168                     264
## 5       318       150        168                     264
## 6       326       154        172                     270
##     pb MeanTemp. AnnualPrecip. Precip.WettestQuarter PrecipDriestQuarter
## 611  0       167           145                    67                  10
## 612  0       135          1308                   353                 290
## 613  0       190          1633                   777                  71
## 614  0       185          1044                   323                 197
## 615  0       269          2324                   969                 237
## 616  0       260          2126                   997                 113
##     Max.Temp. Min.Temp. Temp.Range MeanTemp.WettestQuarter
## 611       369         1        368                      92
## 612       302       -34        337                     211
## 613       279       101        178                     196
## 614       352        13        340                     224
## 615       332       221        111                     265
## 616       320       205        115                     256

Distribution for “Presence” vs “Absence” in Environment

plot(presvals, col = "red", pch = 20, cex=1.0)
points(absvals, col = "blue", pch = 20, cex=1.0)

#Model fitting, prediction, and evaluation

Model 1: pb ~ MeanTemp. + Max.Temp. + AnnualPrecip.

m1 <- glm(pb ~ MeanTemp. + Max.Temp. + AnnualPrecip., data=sdm)
summary(m1)
## 
## Call:
## glm(formula = pb ~ MeanTemp. + Max.Temp. + AnnualPrecip., data = sdm)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.94719  -0.23004  -0.08335   0.08994   0.87275  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.548e-01  1.010e-01   1.532    0.126    
## MeanTemp.      1.905e-03  4.017e-04   4.741 2.65e-06 ***
## Max.Temp.     -1.809e-03  4.521e-04  -4.002 7.04e-05 ***
## AnnualPrecip.  1.182e-04  1.645e-05   7.183 1.99e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1180437)
## 
##     Null deviance: 94.156  on 615  degrees of freedom
## Residual deviance: 72.243  on 612  degrees of freedom
## AIC: 437.91
## 
## Number of Fisher Scoring iterations: 2

Model 2: all Env. variables

m2 = glm(pb ~ ., data=sdm)
summary(m2)
## 
## Call:
## glm(formula = pb ~ ., data = sdm)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.97839  -0.22158  -0.08527   0.09110   0.88681  
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)              2.015e-01  1.041e-01   1.935   0.0534 . 
## MeanTemp.               -2.867e-03  2.188e-03  -1.310   0.1907   
## AnnualPrecip.            2.583e-04  9.659e-05   2.674   0.0077 **
## Precip.WettestQuarter   -2.739e-04  2.015e-04  -1.359   0.1747   
## PrecipDriestQuarter     -3.631e-04  2.260e-04  -1.607   0.1086   
## Max.Temp.                3.889e-02  2.974e-02   1.308   0.1915   
## Min.Temp.               -3.655e-02  2.977e-02  -1.228   0.2200   
## Temp.Range              -3.877e-02  2.971e-02  -1.305   0.1924   
## MeanTemp.WettestQuarter  5.695e-04  5.206e-04   1.094   0.2745   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1171059)
## 
##     Null deviance: 94.156  on 615  degrees of freedom
## Residual deviance: 71.083  on 607  degrees of freedom
## AIC: 437.95
## 
## Number of Fisher Scoring iterations: 2

Model3: pb ~ MeanTemp. + AnnualPrecip

m3 <- glm(pb ~ MeanTemp. + AnnualPrecip., data=sdm)
summary(m3)
## 
## Call:
## glm(formula = pb ~ MeanTemp. + AnnualPrecip., data = sdm)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.03338  -0.20748  -0.09876   0.06028   0.94742  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.1931723  0.0521317  -3.705  0.00023 ***
## MeanTemp.      0.0006926  0.0002672   2.592  0.00977 ** 
## AnnualPrecip.  0.0001467  0.0000150   9.779  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.120936)
## 
##     Null deviance: 94.156  on 615  degrees of freedom
## Residual deviance: 74.134  on 613  degrees of freedom
## AIC: 451.83
## 
## Number of Fisher Scoring iterations: 2

#Model prediction

Create new dataset for prediction

bio1 = c(40, 150, 200)
bio5 = c(60, 115, 290)
bio12 = c(600, 1600, 1700)
pd = data.frame(cbind(bio1, bio5, bio12))
names(pd)<- c("MeanTemp.", "Max.Temp.", "AnnualPrecip.")
pd
##   MeanTemp. Max.Temp. AnnualPrecip.
## 1        40        60           600
## 2       150       115          1600
## 3       200       290          1700

Function predict()

predict(m1, pd)
##         1         2         3 
## 0.1932906 0.4214249 0.2118041

The purpose of ENM is to create a map of suitability scores.

p <- predict(predictors, m1)
plot(p)

#Model evaluation

eval_glm<- evaluate(bradypus, bg, m1, predictors)
eval_glm
## class          : ModelEvaluation 
## n presences    : 116 
## n absences     : 500 
## AUC            : 0.8642586 
## cor            : 0.4824235 
## max TPR+TNR at : 0.2267917

#Plot ROC Note: An ROC curve (receiver operating characteristic curve) is a graph showing the performance of a classification model at all classification thresholds. This curve plots two parameters: True Positive Rate. False Positive Rate

plot(eval_glm, 'ROC')

#Generalized Linear Models

Sprit data to Training and Testing as 80:20

random selection of bradypus and bg

sam1<- sample(nrow(bradypus), round(0.80*nrow(bradypus)))
pre_train<-bradypus[sam1,]
pre_test<-bradypus[-sam1,]
head(pre_train)
##          lon      lat
## 79  -82.5167   9.4500
## 102 -74.2167  -4.8333
## 40  -76.6667   5.7000
## 5   -63.8500 -17.4000
## 82  -82.2500   9.1833
## 76  -84.4333  12.1500
sam2<-sample(nrow(bg), round(0.80*nrow(bg)))
bg_train<-bg[sam2,]
bg_test<-bg[-sam2,]

Get Env data

env_pretrain<-extract(predictors, pre_train)
env_bgtrain<-extract(predictors, bg_train)

Create presence/absence column

pb_glm <- c(rep(1, nrow(env_pretrain)), rep(0, nrow(env_bgtrain)))

Create training data by combing “env_pretrain” and “env_bgtrain”

env_train<-rbind(env_pretrain, env_bgtrain)
head(env_train)
##      MeanTemp. AnnualPrecip. Precip.WettestQuarter PrecipDriestQuarter
## [1,]       199          2740                   986                 290
## [2,]       268          2403                   723                 407
## [3,]       263          7091                  2065                1372
## [4,]       243          1693                   775                 186
## [5,]       255          3056                  1017                 488
## [6,]       253          3424                  1512                 221
##      Max.Temp. Min.Temp. Temp.Range MeanTemp.WettestQuarter
## [1,]       255       141        114                     198
## [2,]       327       207        120                     268
## [3,]       316       213        104                     262
## [4,]       318       150        168                     264
## [5,]       299       204         95                     257
## [6,]       310       198        112                     255

Create training data by combing “pb_glm” and “env_train”

dat_train<-as.data.frame(cbind(pb,env_train))
## Warning in cbind(pb, env_train): number of rows of result is not a multiple of
## vector length (arg 1)
head(dat_train)
##   pb MeanTemp. AnnualPrecip. Precip.WettestQuarter PrecipDriestQuarter
## 1  1       199          2740                   986                 290
## 2  1       268          2403                   723                 407
## 3  1       263          7091                  2065                1372
## 4  1       243          1693                   775                 186
## 5  1       255          3056                  1017                 488
## 6  1       253          3424                  1512                 221
##   Max.Temp. Min.Temp. Temp.Range MeanTemp.WettestQuarter
## 1       255       141        114                     198
## 2       327       207        120                     268
## 3       316       213        104                     262
## 4       318       150        168                     264
## 5       299       204         95                     257
## 6       310       198        112                     255

Fit glm model

glm1 <- glm(pb ~ MeanTemp. + Max.Temp. + AnnualPrecip., data=dat_train)
summary(m1)
## 
## Call:
## glm(formula = pb ~ MeanTemp. + Max.Temp. + AnnualPrecip., data = sdm)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.94719  -0.23004  -0.08335   0.08994   0.87275  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.548e-01  1.010e-01   1.532    0.126    
## MeanTemp.      1.905e-03  4.017e-04   4.741 2.65e-06 ***
## Max.Temp.     -1.809e-03  4.521e-04  -4.002 7.04e-05 ***
## AnnualPrecip.  1.182e-04  1.645e-05   7.183 1.99e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1180437)
## 
##     Null deviance: 94.156  on 615  degrees of freedom
## Residual deviance: 72.243  on 612  degrees of freedom
## AIC: 437.91
## 
## Number of Fisher Scoring iterations: 2
glm2 = glm(pb ~ ., data=dat_train)
summary(glm2)
## 
## Call:
## glm(formula = pb ~ ., data = dat_train)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.97249  -0.25833  -0.12226   0.08683   1.04253  
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)              0.3689882  0.1302915   2.832  0.00482 **
## MeanTemp.               -0.0016446  0.0028357  -0.580  0.56219   
## AnnualPrecip.            0.0002669  0.0001274   2.094  0.03674 * 
## Precip.WettestQuarter   -0.0003435  0.0002670  -1.287  0.19886   
## PrecipDriestQuarter     -0.0003552  0.0002981  -1.191  0.23411   
## Max.Temp.                0.0418108  0.0376944   1.109  0.26789   
## Min.Temp.               -0.0407075  0.0377325  -1.079  0.28119   
## Temp.Range              -0.0424376  0.0376554  -1.127  0.26030   
## MeanTemp.WettestQuarter  0.0002986  0.0006733   0.444  0.65757   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1502631)
## 
##     Null deviance: 88.706  on 492  degrees of freedom
## Residual deviance: 72.727  on 484  degrees of freedom
## AIC: 475.57
## 
## Number of Fisher Scoring iterations: 2
glm3 <- glm(pb ~ MeanTemp. + AnnualPrecip., data=dat_train)
summary(glm3)
## 
## Call:
## glm(formula = pb ~ MeanTemp. + AnnualPrecip., data = dat_train)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.04278  -0.24682  -0.13338   0.00489   1.05425  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -7.490e-02  6.501e-02  -1.152    0.250    
## MeanTemp.      4.153e-04  3.329e-04   1.247    0.213    
## AnnualPrecip.  1.415e-04  1.902e-05   7.439 4.57e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1543255)
## 
##     Null deviance: 88.706  on 492  degrees of freedom
## Residual deviance: 75.620  on 490  degrees of freedom
## AIC: 482.8
## 
## Number of Fisher Scoring iterations: 2

Compare AIC from 3 models

a<-c(glm1$aic, glm2$aic, glm3$aic)
a
## [1] 470.7537 475.5740 482.7996

Plot a map of suitability scores

pre_glm1 <- predict(predictors, glm1)
plot(pre_glm1)

Model evaluation

eval_glm1<- evaluate(pre_test, bg_test, glm1, predictors)
eval_glm1
## class          : ModelEvaluation 
## n presences    : 23 
## n absences     : 100 
## AUC            : 0.8691304 
## cor            : 0.4948806 
## max TPR+TNR at : 0.323711

Get threshold for making binary map

tr<-threshold(eval_glm1)
tr
##                kappa spec_sens no_omission prevalence equal_sens_spec
## thresholds 0.4203321  0.323711    0.233366    0.19227       0.3461372
##            sensitivity
## thresholds   0.2788091
tr_kappa<-threshold(eval_glm1,'kappa')
tr_spec_sens<-threshold(eval_glm1, 'spec_sens')
tr_equal_sens_spec<-threshold(eval_glm1, 'equal_sens_spec')

Plot Suitability and Binary maps

par(mfrow=c(1,2))
plot(pre_glm1, main = "GLM")
plot(wrld_simpl, add=TRUE, border='dark grey')
plot(pre_glm1 > tr_kappa, main='Presence/Absence_Kappa')
plot(wrld_simpl, add=TRUE, border='dark grey')

Plot all maps

par(mfrow=c(2,2))
plot(pre_glm1, main = "GLM")
plot(wrld_simpl, add=TRUE, border='dark grey')
plot(pre_glm1 > tr_kappa, main='Presence/Absence_Kappa')
plot(wrld_simpl, add=TRUE, border='dark grey')
plot(pre_glm1 > tr_spec_sens, main='Presence/Absence_spec_sens')
plot(wrld_simpl, add=TRUE, border='dark grey')
plot(pre_glm1 > tr_equal_sens_spec, main='Presence/Absence_equal_sens_spec')
plot(wrld_simpl, add=TRUE, border='dark grey')

Contact info: Email: Line ID: kponpetch