# To clean environment 
rm(list = ls(all.names = TRUE)) 
gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 519723 27.8    1162822 62.2   644200 34.5
## Vcells 927978  7.1    8388608 64.0  1634532 12.5

# loads the dismo library
library(raster)

## Warning: package 'raster' was built under R version 4.2.2

## Loading required package: sp

library(sp)
library(rgdal)

## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## 
## rgdal: version: 1.5-32, (SVN revision 1176)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.4.3, released 2022/04/22
## Path to GDAL shared files: C:/Users/luisc/AppData/Local/R/win-library/4.2/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/luisc/AppData/Local/R/win-library/4.2/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.5-0
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.

library(rJava)
library(dismo)

library(rgbif)

Preparación de datos de Spodoptera frugiperda

El gusano cogollero del maíz (Spodoptera frugiperda) es una plaga de insectos que afecta a más de 80 especies de plantas y causa daños a cereales cultivados de importancia económica –como el maíz, el arroz y el sorgo– y también a los cultivos de hortalizas y al algodón.

library(dismo)
Spodoptera <- gbif("Spodoptera", "frugiperda*", geo=FALSE)

## 6025 records found

## 0-300-600-900-1200-1500-1800-2100-2400-2700-3000-3300-3600-3900-4200-4500-4800-5100-5400-5700-6000-6025 records downloaded

# load the saved S. acaule data
data(Spodoptera)

## Warning in data(Spodoptera): data set 'Spodoptera' not found

# how many rows and colums?
dim(Spodoptera)

## [1] 6025  163

## [1] 1366   25
#select the records that have longitude and latitude data
colnames(Spodoptera)

##   [1] "acceptedNameUsage"                  "acceptedScientificName"            
##   [3] "acceptedTaxonKey"                   "accessRights"                      
##   [5] "adm1"                               "adm2"                              
##   [7] "associatedReferences"               "associatedSequences"               
##   [9] "associatedTaxa"                     "basisOfRecord"                     
##  [11] "bibliographicCitation"              "catalogNumber"                     
##  [13] "class"                              "classKey"                          
##  [15] "cloc"                               "collectionCode"                    
##  [17] "collectionID"                       "collectionKey"                     
##  [19] "continent"                          "coordinatePrecision"               
##  [21] "coordinateUncertaintyInMeters"      "country"                           
##  [23] "crawlId"                            "datasetID"                         
##  [25] "datasetKey"                         "datasetName"                       
##  [27] "dateIdentified"                     "day"                               
##  [29] "depth"                              "depthAccuracy"                     
##  [31] "disposition"                        "dynamicProperties"                 
##  [33] "elevation"                          "elevationAccuracy"                 
##  [35] "endDayOfYear"                       "eventDate"                         
##  [37] "eventID"                            "eventRemarks"                      
##  [39] "eventTime"                          "family"                            
##  [41] "familyKey"                          "fieldNotes"                        
##  [43] "fieldNumber"                        "footprintSRS"                      
##  [45] "footprintWKT"                       "fullCountry"                       
##  [47] "gbifID"                             "genericName"                       
##  [49] "genus"                              "genusKey"                          
##  [51] "geodeticDatum"                      "georeferencedBy"                   
##  [53] "georeferencedDate"                  "georeferenceProtocol"              
##  [55] "georeferenceRemarks"                "georeferenceSources"               
##  [57] "georeferenceVerificationStatus"     "habitat"                           
##  [59] "higherClassification"               "higherGeography"                   
##  [61] "higherGeographyID"                  "hostingOrganizationKey"            
##  [63] "http://unknown.org/captive"         "http://unknown.org/language"       
##  [65] "http://unknown.org/nick"            "http://unknown.org/recordEnteredBy"
##  [67] "http://unknown.org/recordId"        "http://unknown.org/recordID"       
##  [69] "http://unknown.org/rights"          "http://unknown.org/rightsHolder"   
##  [71] "http://unknown.org/taxonRankID"     "identificationID"                  
##  [73] "identificationQualifier"            "identificationRemarks"             
##  [75] "identificationVerificationStatus"   "identifiedBy"                      
##  [77] "identifier"                         "individualCount"                   
##  [79] "informationWithheld"                "installationKey"                   
##  [81] "institutionCode"                    "institutionID"                     
##  [83] "institutionKey"                     "isInCluster"                       
##  [85] "island"                             "islandGroup"                       
##  [87] "ISO2"                               "iucnRedListCategory"               
##  [89] "key"                                "kingdom"                           
##  [91] "kingdomKey"                         "language"                          
##  [93] "lastCrawled"                        "lastInterpreted"                   
##  [95] "lastParsed"                         "lat"                               
##  [97] "license"                            "lifeStage"                         
##  [99] "locality"                           "locationAccordingTo"               
## [101] "locationID"                         "locationRemarks"                   
## [103] "lon"                                "materialSampleID"                  
## [105] "modified"                           "month"                             
## [107] "municipality"                       "nameAccordingTo"                   
## [109] "namePublishedIn"                    "namePublishedInYear"               
## [111] "nomenclaturalCode"                  "occurrenceID"                      
## [113] "occurrenceRemarks"                  "occurrenceStatus"                  
## [115] "order"                              "orderKey"                          
## [117] "organismID"                         "organismQuantity"                  
## [119] "organismQuantityType"               "organismRemarks"                   
## [121] "originalNameUsage"                  "otherCatalogNumbers"               
## [123] "ownerInstitutionCode"               "parentNameUsage"                   
## [125] "phylum"                             "phylumKey"                         
## [127] "preparations"                       "previousIdentifications"           
## [129] "programmeAcronym"                   "projectId"                         
## [131] "protocol"                           "publishingCountry"                 
## [133] "publishingOrgKey"                   "recordedBy"                        
## [135] "recordNumber"                       "references"                        
## [137] "rights"                             "rightsHolder"                      
## [139] "samplingProtocol"                   "scientificName"                    
## [141] "scientificNameID"                   "sex"                               
## [143] "species"                            "speciesKey"                        
## [145] "specificEpithet"                    "startDayOfYear"                    
## [147] "taxonConceptID"                     "taxonID"                           
## [149] "taxonKey"                           "taxonomicStatus"                   
## [151] "taxonRank"                          "taxonRemarks"                      
## [153] "type"                               "typeStatus"                        
## [155] "verbatimCoordinateSystem"           "verbatimElevation"                 
## [157] "verbatimEventDate"                  "verbatimIdentification"            
## [159] "verbatimLocality"                   "vernacularName"                    
## [161] "waterBody"                          "year"                              
## [163] "downloadDate"

##  [1] "species"               "continent"             "country"
##  [4] "adm1"                  "adm2"                  "locality"
##  [7] "lat"                   "lon"                   "coordUncertaintyM"
## [10] "alt"                   "institution"           "collection"
## [13] "catalogNumber"         "basisOfRecord"         "collector"
## [16] "earliestDateCollected" "latestDateCollected"   "gbifNotes"
## [19] "downloadDate"          "maxElevationM"         "minElevationM"
## [22] "maxDepthM"             "minDepthM"             "ISO2"
## [25] "cloc"
acgeo <- subset(Spodoptera, !is.na(lon) & !is.na(lat))
dim(acgeo)

## [1] 4446  163

## [1] 1082   25
# show some values
acgeo[1:4, c(1:5,7:10)]

##   acceptedNameUsage                acceptedScientificName acceptedTaxonKey
## 1              <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
## 2              <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
## 3              <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
## 4              <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
##   accessRights      adm1 associatedReferences associatedSequences
## 1         <NA> São Paulo                 <NA>                <NA>
## 2         <NA>     Texas                 <NA>                <NA>
## 3         <NA>     Texas                 <NA>                <NA>
## 4         <NA>     Texas                 <NA>                <NA>
##   associatedTaxa     basisOfRecord
## 1           <NA> HUMAN_OBSERVATION
## 2           <NA> HUMAN_OBSERVATION
## 3           <NA> HUMAN_OBSERVATION
## 4           <NA> HUMAN_OBSERVATION

##                    species     continent

Mapa de las localidades de ocrurrencia

library(maptools)

## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.

data(wrld_simpl)
plot(wrld_simpl, xlim=c(-80,70), ylim=c(-60,10), axes=TRUE, col="light yellow")
# restore the box around the map
box()
# add the points
points(acgeo$lon, acgeo$lat, col='orange', pch=20, cex=0.75)
# plot points again to add a border, for better visibility
points(acgeo$lon, acgeo$lat, col='red', cex=0.75)

Limpieza de datos

Spodoptera[c(303,1200),1:50]

##      acceptedNameUsage                acceptedScientificName acceptedTaxonKey
## 303               <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
## 1200              <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
##      accessRights    adm1 adm2 associatedReferences associatedSequences
## 303          <NA>   Texas <NA>                 <NA>                <NA>
## 1200         <NA> Ontario <NA>                 <NA>                <NA>
##      associatedTaxa     basisOfRecord bibliographicCitation catalogNumber
## 303            <NA> HUMAN_OBSERVATION                  <NA>     140689655
## 1200           <NA> HUMAN_OBSERVATION                  <NA>      98396703
##        class classKey                 cloc collectionCode collectionID
## 303  Insecta      216 Texas, United States   Observations         <NA>
## 1200 Insecta      216      Ontario, Canada   Observations         <NA>
##      collectionKey continent coordinatePrecision coordinateUncertaintyInMeters
## 303           <NA>      <NA>                  NA                             2
## 1200          <NA>      <NA>                  NA                            NA
##            country crawlId datasetID                           datasetKey
## 303  United States     326      <NA> 50c9509d-22c7-4a22-a47d-8c48425ef4a7
## 1200        Canada     326      <NA> 50c9509d-22c7-4a22-a47d-8c48425ef4a7
##                                  datasetName      dateIdentified day depth
## 303  iNaturalist research-grade observations 2022-10-31T19:34:16  30    NA
## 1200 iNaturalist research-grade observations 2021-10-16T17:16:36  15    NA
##      depthAccuracy disposition dynamicProperties elevation elevationAccuracy
## 303             NA        <NA>              <NA>        NA                NA
## 1200            NA        <NA>              <NA>        NA                NA
##      endDayOfYear           eventDate eventID eventRemarks      eventTime
## 303          <NA> 2022-10-30T19:31:00    <NA>         <NA> 19:31:00-05:00
## 1200         <NA> 2021-10-15T10:43:00    <NA>         <NA> 10:43:00-05:00
##         family familyKey fieldNotes fieldNumber footprintSRS footprintWKT
## 303  Noctuidae      7015       <NA>        <NA>         <NA>         <NA>
## 1200 Noctuidae      7015       <NA>        <NA>         <NA>         <NA>
##                   fullCountry     gbifID genericName      genus genusKey
## 303  United States of America 3963352661  Spodoptera Spodoptera  1765207
## 1200                   Canada 3398680105  Spodoptera Spodoptera  1765207

Echemos un vistazo a estos registros:

lonzero = subset(acgeo, lon==0)
# show all records, only the first 13 columns
lonzero[, 1:13]

##  [1] acceptedNameUsage      acceptedScientificName acceptedTaxonKey      
##  [4] accessRights           adm1                   adm2                  
##  [7] associatedReferences   associatedSequences    associatedTaxa        
## [10] basisOfRecord          bibliographicCitation  catalogNumber         
## [13] class                 
## <0 rows> (or 0-length row.names)

Registros duplicados

# which records are duplicates (only for the first 10 columns)?
dups <- duplicated(lonzero[, 1:10])
# remove duplicates
lonzero  <-  lonzero[dups, ]
lonzero[,1:13]

##  [1] acceptedNameUsage      acceptedScientificName acceptedTaxonKey      
##  [4] accessRights           adm1                   adm2                  
##  [7] associatedReferences   associatedSequences    associatedTaxa        
## [10] basisOfRecord          bibliographicCitation  catalogNumber         
## [13] class                 
## <0 rows> (or 0-length row.names)

dups2 <- duplicated(acgeo[, c('lon', 'lat')])
# number of duplicates
sum(dups2)

## [1] 1709

## [1] 483
# keep the records that are _not_ duplicated
acg <- acgeo[!dups2, ]

i <- acg$lon > 0 & acg$lat > 0
acg$lon[i] <- -1 * acg$lon[i]
acg$lat[i] <- -1 * acg$lat[i]
acg <- acg[acg$lon < -50 & acg$lat > -50, ]

Comprobación cruzada

library(sp)
coordinates(acg) <- ~lon+lat
crs(acg) <- crs(wrld_simpl)
class(acg)

## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"

class(wrld_simpl)

## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
ovr <- over(acg, wrld_simpl)

head(ovr)

##    FIPS ISO2 ISO3  UN          NAME   AREA   POP2005 REGION SUBREGION      LON
## 2  <NA> <NA> <NA>  NA          <NA>     NA        NA     NA        NA       NA
## 3    US   US  USA 840 United States 915896 299846449     19        21  -98.606
## 4    MX   MX  MEX 484        Mexico 190869 104266392     19        13 -102.535
## 5  <NA> <NA> <NA>  NA          <NA>     NA        NA     NA        NA       NA
## 8    US   US  USA 840 United States 915896 299846449     19        21  -98.606
## 11 <NA> <NA> <NA>  NA          <NA>     NA        NA     NA        NA       NA
##       LAT
## 2      NA
## 3  39.622
## 4  23.951
## 5      NA
## 8  39.622
## 11     NA

cntr <- ovr$NAME

(1) ¿Qué puntos (identificados por sus números de registro) no coinciden con ningún país (es decir, están en un océano)? (No hay ninguno (porque ya eliminamos los puntos que mapearon en el océano)).

(2) Qué puntos tienen coordenadas que se encuentran en un país diferente al que figura en el campo ‘país’ del registro gbif

i <- which(is.na(cntr))
i

##   [1]    1    4    6    8   16   20   21   40   41   50   56   57   58   60   65
##  [16]   67   68   69   71   74   75   76   77   79   82   83   89   93  103  109
##  [31]  113  114  116  118  120  121  127  130  135  138  140  141  152  154  164
##  [46]  165  174  176  177  180  182  183  186  187  191  192  193  199  200  201
##  [61]  203  204  205  206  211  212  213  217  218  219  222  223  225  226  239
##  [76]  240  241  248  250  251  263  264  266  268  270  271  272  273  276  278
##  [91]  279  281  283  284  289  290  291  292  294  295  307  308  309  311  312
## [106]  314  315  316  324  327  328  329  331  335  340  353  357  359  360  367
## [121]  374  375  383  384  389  390  391  397  398  401  403  404  406  408  419
## [136]  425  445  451  466  480  489  500  512  513  515  516  521  523  526  527
## [151]  531  537  596  612  649  655  657  661  665  667  676  677  688  693  695
## [166]  710  717  722  723  727  736  738  743  745  750  754  852  859  863  864
## [181]  882  887  890  891  912  928  933  934  937  938  968  984  985  988  992
## [196]  993  994  997  999 1003 1004 1005 1006 1010 1014 1019 1020 1021 1024 1031
## [211] 1038 1039 1040 1041 1045 1049 1050 1056 1058 1059 1062 1071 1072 1075 1079
## [226] 1080 1081 1082 1084 1085 1092 1099 1100 1103 1105 1106 1107 1108 1110 1112
## [241] 1116 1117 1118 1119 1120 1122 1123 1124 1125 1126 1127 1128 1129 1131 1132
## [256] 1134 1136 1137 1138 1139 1140 1141 1142 1145 1147 1148 1150 1151 1152 1153
## [271] 1164 1165 1172 1173 1178 1181 1182 1183 1186 1187 1188 1189 1191 1193 1195
## [286] 1197 1198 1199 1200 1201 1202 1203 1206 1207 1208 1211 1222 1223 1240 1243
## [301] 1244 1249 1250 1254 1255 1256 1258 1259 1260 1267 1268 1271 1274 1275 1276
## [316] 1288 1290 1294 1296 1297 1307 1308 1311 1317 1318 1319 1322 1323 1324 1326
## [331] 1327 1328 1332 1334 1337 1342 1343 1349 1376 1378 1389 1391 1392 1393 1394
## [346] 1413 1426 1427 1428 1429 1430 1431 1433 1436 1438 1439 1440 1444 1446 1451
## [361] 1457 1458 1460 1496 1498 1499 1507 1509 1510 1511 1512 1514 1515 1518 1523
## [376] 1533 1534 1541 1542 1547 1548 1549 1563 1569 1570 1571 1572 1574 1581 1582
## [391] 1583 1585 1586 1587 1591 1592 1593 1604 1605 1606 1608 1616 1617 1618 1619
## [406] 1621 1622 1623 1624 1629 1630 1633 1635 1636 1637 1638 1639 1644 1645 1646
## [421] 1648 1650 1651 1660 1661 1665 1666 1667 1698 1699 1702 1703 1704 1720 1728
## [436] 1729 1744 1745 1783 1786 1787 1807 1825 1844 1845 1846 1847 1861 1863 1886
## [451] 1888 1890 1892 1914 1919 1944 1945 1946 1947 1948 1949 1950 1951 1952 1954
## [466] 1960 1963 1966 1967 1968 1969 1970 1971 1974 1975 1976 1978 1979 1980 1981
## [481] 1986 1987 1992 1996 1999 2000 2004 2005 2006 2054 2065 2093 2103 2223 2224
## [496] 2274 2293 2294 2308 2311 2312 2314 2315 2325 2351 2364 2365 2366 2384 2397
## [511] 2432 2468 2469 2474 2476 2478 2481 2482 2503 2507 2526 2534 2565 2576 2603
## [526] 2634 2641 2642 2646 2647 2649 2650 2656

## integer(0)
j <- which(cntr != acg$country)
# for the mismatches, bind the country names of the polygons and points
cbind(cntr, acg$country)[j,]

##       cntr                    
##  [1,] "143" "United States"   
##  [2,] "143" "United States"   
##  [3,] "143" "United States"   
##  [4,] "143" "United States"   
##  [5,] "143" "United States"   
##  [6,] "143" "United States"   
##  [7,] "143" "United States"   
##  [8,] "143" "United States"   
##  [9,] "143" "United States"   
## [10,] "143" "United States"   
## [11,] "143" "United States"   
## [12,] "143" "United States"   
## [13,] "143" "United States"   
## [14,] "143" "United States"   
## [15,] "143" "United States"   
## [16,] "143" "United States"   
## [17,] "143" "United States"   
## [18,] "143" "United States"   
## [19,] "143" "United States"   
## [20,] "143" "United States"   
## [21,] "143" "United States"   
## [22,] "143" "United States"   
## [23,] "143" "United States"   
## [24,] "91"  "Mexico"          
## [25,] "91"  "Mexico"          
## [26,] "91"  "Mexico"          
## [27,] "91"  "Mexico"          
## [28,] "143" "United States"   
## [29,] "143" "United States"   
## [30,] "143" "United States"   
## [31,] "143" "United States"   
## [32,] "172" "India"           
## [33,] "172" "India"           
## [34,] "172" "India"           
## [35,] "172" "India"           
## [36,] "172" "India"           
## [37,] "172" "India"           
## [38,] "143" "United States"   
## [39,] "143" "United States"   
## [40,] "172" "India"           
## [41,] "172" "India"           
## [42,] "172" "India"           
## [43,] "172" "India"           
## [44,] "172" "India"           
## [45,] "172" "India"           
## [46,] "143" "United States"   
## [47,] "143" "United States"   
## [48,] "172" "India"           
## [49,] "172" "India"           
## [50,] "172" "India"           
## [51,] "172" "India"           
## [52,] "172" "India"           
## [53,] "46"  "Pakistan"        
## [54,] "46"  "Pakistan"        
## [55,] "234" "Canada"          
## [56,] "172" "India"           
## [57,] "172" "India"           
## [58,] "172" "India"           
## [59,] "172" "India"           
## [60,] "143" "United States"   
## [61,] "143" "United States"   
## [62,] "143" "United States"   
## [63,] "172" "India"           
## [64,] "172" "India"           
## [65,] "143" "United States"   
## [66,] "143" "United States"   
## [67,] "209" "French Guiana"   
## [68,] "143" "United States"   
## [69,] "158" "Costa Rica"      
## [70,] "41"  "United States"   
## [71,] "172" "India"           
## [72,] "172" "India"           
## [73,] "172" "India"           
## [74,] "172" "India"           
## [75,] "46"  "Pakistan"        
## [76,] "89"  "Saint-Barthélemy"
## [77,] "172" "India"

plot(acg)
plot(wrld_simpl, add=T, border='blue', lwd=2)
points(acg[j, ], col='red', pch=20, cex=2)

georef <- subset(Spodoptera, (is.na(lon) | is.na(lat)) & ! is.na(locality) )
dim(georef)

## [1] 355 163

## [1] 131  25
georef[1:3,1:13]

##     acceptedNameUsage                acceptedScientificName acceptedTaxonKey
## 99               <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
## 101              <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
## 359              <NA> Spodoptera frugiperda J.E.Smith, 1797          5109855
##     accessRights adm1 adm2 associatedReferences
## 99          <NA> <NA> <NA>                 <NA>
## 101         <NA> <NA> <NA>                 <NA>
## 359         <NA> <NA> <NA>                 <NA>
##                                     associatedSequences associatedTaxa
## 99  https://www.ebi.ac.uk/ena/browser/api/embl/OP649581           <NA>
## 101 https://www.ebi.ac.uk/ena/browser/api/embl/OP649582           <NA>
## 359 https://www.ebi.ac.uk/ena/browser/api/embl/MZ429433          maize
##       basisOfRecord bibliographicCitation catalogNumber   class
## 99  MATERIAL_SAMPLE                  <NA>          <NA> Insecta
## 101 MATERIAL_SAMPLE                  <NA>          <NA> Insecta
## 359 MATERIAL_SAMPLE                  <NA>          <NA> Insecta

georef$cloc[4]

## [1] "Assam,Kokrajhar, India"

Sesgo de muestreo

r <- raster(acg)
# set the resolution of the cells to (for example) 1 degree
res(r) <- 1
# expand (extend) the extent of the RasterLayer a little
r <- extend(r, extent(r)+1)
# sample:
acsel <- gridSample(acg, r, n=1)
# to illustrate the method and show the result
p <- rasterToPolygons(r)
plot(p, border='gray')
points(acg)
# selected points in red
points(acsel, cex=1, col='red', pch='x')

Puntos de ausencia y de fondo

library(dismo)
# get the file names
files <- list.files(path=paste(system.file(package="dismo"), '/ex',
                       sep=''),  pattern='grd',  full.names=TRUE )
# we use the first file to create a RasterLayer
mask <- raster(files[1])
# select 500 random points
# set seed to assure that the examples will always
# have the same random sample.
set.seed(1963)
bg <- randomPoints(mask, 500 )

par(mfrow=c(1,2))
plot(!is.na(mask), legend=FALSE)
points(bg, cex=0.5)
# now we repeat the sampling, but limit
# the area of sampling using a spatial extent
e <- extent(-80, -53, -39, -22)
bg2 <- randomPoints(mask, 50, ext=e)
plot(!is.na(mask), legend=FALSE)
plot(e, add=TRUE, col='red')
points(bg2, cex=0.5)

Llamar datos de Bioclim para America

path <- file.path(system.file(package="dismo"), 'ex')

library(dismo)
files <- list.files(path, pattern='grd$', full.names=TRUE )
files

## [1] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/bio1.grd" 
## [2] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/bio12.grd"
## [3] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/bio16.grd"
## [4] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/bio17.grd"
## [5] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/bio5.grd" 
## [6] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/bio6.grd" 
## [7] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/bio7.grd" 
## [8] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/bio8.grd" 
## [9] "C:/Users/luisc/AppData/Local/R/win-library/4.2/dismo/ex/biome.grd"

predictors <- stack(files)
predictors

## class      : RasterStack 
## dimensions : 192, 186, 35712, 9  (nrow, ncol, ncell, nlayers)
## resolution : 0.5, 0.5  (x, y)
## extent     : -125, -32, -56, 40  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## names      : bio1, bio12, bio16, bio17, bio5, bio6, bio7, bio8, biome 
## min values :  -23,     0,     0,     0,   61, -212,   60,  -66,     1 
## max values :  289,  7682,  2458,  1496,  422,  242,  461,  323,    14

## class      : RasterStack
## dimensions : 192, 186, 35712, 9  (nrow, ncol, ncell, nlayers)
## resolution : 0.5, 0.5  (x, y)
## extent     : -125, -32, -56, 40  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs
## names      : bio1, bio12, bio16, bio17, bio5, bio6, bio7, bio8, biome
## min values :  -23,     0,     0,     0,   61, -212,   60,  -66,     1
## max values :  289,  7682,  2458,  1496,  422,  242,  461,  323,    14
names(predictors)

## [1] "bio1"  "bio12" "bio16" "bio17" "bio5"  "bio6"  "bio7"  "bio8"  "biome"

## [1] "bio1"  "bio12" "bio16" "bio17" "bio5"  "bio6"  "bio7"  "bio8"  "biome"
plot(predictors)

library(spThin)

## Warning: package 'spThin' was built under R version 4.2.2

## Loading required package: spam

## Warning: package 'spam' was built under R version 4.2.2

## Spam version 2.9-1 (2022-08-07) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.

## 
## Attaching package: 'spam'

## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve

## Loading required package: grid

## Loading required package: fields

## Warning: package 'fields' was built under R version 4.2.2

## Loading required package: viridis

## Loading required package: viridisLite

## 
## Try help(fields) to get started.

## Loading required package: knitr

Llmara datos de Bioclim para todo el mundo

dir.create(path = "data")

## Warning in dir.create(path = "data"): 'data' already exists

dir.create(path = "output")

## Warning in dir.create(path = "output"): 'output' already exists

bioclim_data <- getData(name = "worldclim",
                        var = "bio",
                        res = 2.5,
                        path = "data/")

## Warning in getData(name = "worldclim", var = "bio", res = 2.5, path = "data/"): getData will be removed in a future version of raster
## . Please use the geodata package instead

predictors <- stack(bioclim_data)
predictors

## class      : RasterStack 
## dimensions : 3600, 8640, 31104000, 19  (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667  (x, y)
## extent     : -180, 180, -60, 90  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## names      :  bio1,  bio2,  bio3,  bio4,  bio5,  bio6,  bio7,  bio8,  bio9, bio10, bio11, bio12, bio13, bio14, bio15, ... 
## min values :  -278,     9,     8,    64,   -86,  -559,    53,  -278,  -501,  -127,  -506,     0,     0,     0,     0, ... 
## max values :   319,   213,    96, 22704,   489,   258,   725,   376,   365,   382,   289, 10577,  2437,   697,   265, ...

names(predictors)

##  [1] "bio1"  "bio2"  "bio3"  "bio4"  "bio5"  "bio6"  "bio7"  "bio8"  "bio9" 
## [10] "bio10" "bio11" "bio12" "bio13" "bio14" "bio15" "bio16" "bio17" "bio18"
## [19] "bio19"

plot(predictors)

library(progress)

library(reshape2)

library(vip)

## Warning: package 'vip' was built under R version 4.2.2

## 
## Attaching package: 'vip'

## The following object is masked from 'package:utils':
## 
##     vi

library(pdp)

## Warning: package 'pdp' was built under R version 4.2.2

library(fastshap)

## Warning: package 'fastshap' was built under R version 4.2.2

## 
## Attaching package: 'fastshap'

## The following object is masked from 'package:vip':
## 
##     gen_friedman

library(ResourceSelection)

## Warning: package 'ResourceSelection' was built under R version 4.2.2

## ResourceSelection 0.3-5   2019-07-22

library(CalibratR)

## Warning: package 'CalibratR' was built under R version 4.2.2

library(ape)

## Warning: package 'ape' was built under R version 4.2.2

## 
## Attaching package: 'ape'

## The following objects are masked from 'package:raster':
## 
##     rotate, zoom

library(ranger)

## Warning: package 'ranger' was built under R version 4.2.2

library(mgcv)

## Warning: package 'mgcv' was built under R version 4.2.2

## Loading required package: nlme

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:raster':
## 
##     getData

## This is mgcv 1.8-41. For overview type 'help("mgcv-package")'.

library(ecospat)

## Warning: package 'ecospat' was built under R version 4.2.2

Mapa de una sola capa en un Rasterstack de Spodoptera fugiperda en el continente de america, con prevalencia en Estados Unidos

library(maptools)
data(wrld_simpl)
plot(predictors, 1)
plot(wrld_simpl, add=TRUE)
points(acg, col='blue')

library(dismo)
presvals <- extract(predictors, acg)
set.seed(0)
backgr <- randomPoints(acg, 500)

## Warning in randomPoints(acg, 500): changed n to ncell of the mask (extent)

absvals <- extract(predictors, backgr)
pb <- c(rep(1, nrow(presvals)), rep(0, nrow(absvals)))
sdmdata <- data.frame(cbind(pb, rbind(presvals, absvals)))

head(sdmdata)

##   pb bio1 bio2 bio3 bio4 bio5 bio6 bio7 bio8 bio9 bio10 bio11 bio12 bio13 bio14
## 1  1  223   90   36 5473  334   87  247  267  159   286   147   761   143    27
## 2  1  231  128   42 6124  374   73  301  274  162   302   145   539   112    16
## 3  1  233  121   44 5374  362   87  275  273  173   294   156   548   101    12
## 4  1   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA    NA    NA
## 5  1  199  123   38 6924  356   35  321  237  122   284   106   834   112    40
## 6  1   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA    NA    NA
##   bio15 bio16 bio17 bio18 bio19
## 1    48   316   119   194   128
## 2    58   229    65   162    72
## 3    54   209    71   157    87
## 4    NA    NA    NA    NA    NA
## 5    32   276   161   181   166
## 6    NA    NA    NA    NA    NA

tail(sdmdata)

##      pb bio1 bio2 bio3  bio4 bio5 bio6 bio7 bio8 bio9 bio10 bio11 bio12 bio13
## 2775  0   NA   NA   NA    NA   NA   NA   NA   NA   NA    NA    NA    NA    NA
## 2776  0   NA   NA   NA    NA   NA   NA   NA   NA   NA    NA    NA    NA    NA
## 2777  0  111  135   31 10280  324 -105  429  216  -27   240   -27   778   111
## 2778  0   NA   NA   NA    NA   NA   NA   NA   NA   NA    NA    NA    NA    NA
## 2779  0   NA   NA   NA    NA   NA   NA   NA   NA   NA    NA    NA    NA    NA
## 2780  0   33  123   24 12880  270 -224  494  188 -144   188  -144   487    82
##      bio14 bio15 bio16 bio17 bio18 bio19
## 2775    NA    NA    NA    NA    NA    NA
## 2776    NA    NA    NA    NA    NA    NA
## 2777    15    53   313    58   306    58
## 2778    NA    NA    NA    NA    NA    NA
## 2779    NA    NA    NA    NA    NA    NA
## 2780    13    58   219    45   219    45

summary(sdmdata)

##        pb              bio1            bio2            bio3      
##  Min.   :0.0000   Min.   :  5.0   Min.   : 52.0   Min.   :22.00  
##  1st Qu.:1.0000   1st Qu.:132.0   1st Qu.: 99.0   1st Qu.:34.00  
##  Median :1.0000   Median :189.0   Median :121.0   Median :37.00  
##  Mean   :0.9565   Mean   :171.8   Mean   :114.8   Mean   :38.42  
##  3rd Qu.:1.0000   3rd Qu.:219.0   3rd Qu.:126.0   3rd Qu.:39.50  
##  Max.   :1.0000   Max.   :283.0   Max.   :191.0   Max.   :89.00  
##                   NA's   :137     NA's   :137     NA's   :137    
##       bio4            bio5            bio6             bio7      
##  Min.   :  152   Min.   : 93.0   Min.   :-245.0   Min.   : 89.0  
##  1st Qu.: 5491   1st Qu.:306.0   1st Qu.: -42.0   1st Qu.:252.0  
##  Median : 6937   Median :332.0   Median :  24.0   Median :321.0  
##  Mean   : 6834   Mean   :321.6   Mean   :  12.5   Mean   :309.1  
##  3rd Qu.: 8106   3rd Qu.:349.0   3rd Qu.:  81.0   3rd Qu.:353.0  
##  Max.   :12880   Max.   :419.0   Max.   : 227.0   Max.   :495.0  
##  NA's   :137     NA's   :137     NA's   :137      NA's   :137    
##       bio8            bio9            bio10           bio11        
##  Min.   :-49.0   Min.   :-157.0   Min.   : 31.0   Min.   :-159.00  
##  1st Qu.:198.0   1st Qu.:  63.0   1st Qu.:233.5   1st Qu.:  23.00  
##  Median :232.0   Median : 124.0   Median :275.0   Median :  99.00  
##  Mean   :215.5   Mean   : 108.7   Mean   :255.7   Mean   :  79.94  
##  3rd Qu.:266.0   3rd Qu.: 160.0   3rd Qu.:284.0   3rd Qu.: 146.00  
##  Max.   :310.0   Max.   : 290.0   Max.   :321.0   Max.   : 270.00  
##  NA's   :137     NA's   :137      NA's   :137     NA's   :137      
##      bio12            bio13           bio14           bio15       
##  Min.   :   3.0   Min.   :  1.0   Min.   :  0.0   Min.   :  6.00  
##  1st Qu.: 769.0   1st Qu.:108.0   1st Qu.: 27.0   1st Qu.: 19.00  
##  Median : 870.0   Median :124.0   Median : 43.0   Median : 32.00  
##  Mean   : 987.6   Mean   :135.9   Mean   : 45.8   Mean   : 34.95  
##  3rd Qu.:1136.0   3rd Qu.:144.0   3rd Qu.: 63.0   3rd Qu.: 48.00  
##  Max.   :5022.0   Max.   :756.0   Max.   :209.0   Max.   :167.00  
##  NA's   :137      NA's   :137     NA's   :137     NA's   :137     
##      bio16          bio17           bio18            bio19       
##  Min.   :   2   Min.   :  0.0   Min.   :   2.0   Min.   :   0.0  
##  1st Qu.: 278   1st Qu.:119.0   1st Qu.: 190.0   1st Qu.: 128.0  
##  Median : 314   Median :159.0   Median : 223.0   Median : 164.0  
##  Mean   : 345   Mean   :163.4   Mean   : 260.3   Mean   : 196.5  
##  3rd Qu.: 340   3rd Qu.:213.0   3rd Qu.: 303.0   3rd Qu.: 243.0  
##  Max.   :1963   Max.   :704.0   Max.   :1304.0   Max.   :1683.0  
##  NA's   :137    NA's   :137     NA's   :137      NA's   :137

## guardo las dos fuentes de datos 

saveRDS(sdmdata, "sdm.Rds")
saveRDS(presvals, "pvals.Rds")

#Model fitting, prediction, and evaluation

library(dismo)
sdmdata <- readRDS("sdm.Rds")
presvals <- readRDS("pvals.Rds")

Eliminar Bio17 y Bio16 de acuerdo al grafico de correlacion de variables.

pred_nf <- dropLayer(predictors, 'bio17','bio16')

## grupo de train and test
set.seed(0)
group <- kfold(acg, 5)
pres_train <- acg[group != 1, ]
pres_test <- acg[group == 1, ]

ext <- extent(-80, -72, -7, 13)

set.seed(10)
backg <- randomPoints(pred_nf, n=1000, ext=ext, extf = 1.25)
colnames(backg) = c('lon', 'lat')
group <- kfold(backg, 5)
backg_train <- backg[group != 1, ]
backg_test <- backg[group == 1, ]

r <- raster(pred_nf, 1)
plot(!is.na(r), col=c('white', 'light grey'), legend=FALSE)
plot(ext, add=TRUE, col='red', lwd=2)
points(backg_train, pch='-', cex=0.5, col='yellow')
points(backg_test, pch='-',  cex=0.5, col='black')
points(pres_train, pch= '+', col='green')
points(pres_test, pch='+', col='blue')

Perfil de los metodos Bioclim

bc <- bioclim(pred_nf, pres_train)
plot(bc, a=1, b=2, p=0.95)

Evaluacion del modelo

e_1 <- evaluate(pres_test, backg_test, bc, pred_nf)
e_1

## class          : ModelEvaluation 
## n presences    : 516 
## n absences     : 200 
## AUC            : 0.9762258 
## cor            : 0.5228352 
## max TPR+TNR at : 0.01237601

tr <- threshold(e_1, 'spec_sens')
tr

## [1] 0.01237601

pb <- predict(pred_nf, bc, ext=ext, progress='')
pb

## class      : RasterLayer 
## dimensions : 480, 192, 92160  (nrow, ncol, ncell)
## resolution : 0.04166667, 0.04166667  (x, y)
## extent     : -80, -72, -7, 13  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : memory
## names      : layer 
## values     : 0, 0.02399232  (min, max)

Modelacion Bioclim de Spordoptera fugiperda para Colombia, Panama, Ecuador, Venezuela

par(mfrow=c(1,2))
plot(pb, main='Bioclim, raw values')
plot(wrld_simpl, add=TRUE, border='dark grey')
plot(pb > tr, main='presence/absence')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(pres_train, pch='+')

plot(pb > tr, main='presence/absence')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(pres_train, pch='+')

Modelacion Domain de Spordoptera fugiperda para Colombia, Panama, Ecuador y Venezuela.

dm <- domain(pred_nf, pres_train)
e_dm <- evaluate(pres_test, backg_test, dm, pred_nf)
e_dm

## class          : ModelEvaluation 
## n presences    : 516 
## n absences     : 200 
## AUC            : 0.98203 
## cor            : 0.8286281 
## max TPR+TNR at : 0.4631216

pd = predict(pred_nf, dm, ext=ext, progress='')
par(mfrow=c(1,2))
plot(pd, main='Domain, raw values')
plot(wrld_simpl, add=TRUE, border='dark grey')

points(pres_train, pch='+')

tr <- threshold(e_dm, 'spec_sens')
plot(pd > tr, main='presence/absence')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(pres_train, pch='+')

según el modelo de maxent las variables predictoras de mayor contribución son la bio3 y la bio 11 las cuales tienen que ver con temperatura.

maxent()

## This is MaxEnt version 3.4.3

xm <- maxent(predictors, pres_train)

## Warning in .local(x, p, ...): 37 (3.14%) of the presence points have NA
## predictor values

plot(xm)

e_mx <- evaluate(pres_test, backg_test, xm, predictors)
e_mx

## class          : ModelEvaluation 
## n presences    : 516 
## n absences     : 200 
## AUC            : 0.9822965 
## cor            : 0.8502373 
## max TPR+TNR at : 0.2339765

px <- predict(predictors, xm, ext=ext, progress='')
par(mfrow=c(1,2))
plot(px, main='Maxent, raw values')
plot(wrld_simpl, add=TRUE, border='dark grey')
tr <- threshold(e_mx, 'spec_sens')
plot(px > tr, main='presence/absence')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(pres_train, pch='+')

tr <- threshold(e_mx, 'spec_sens')
plot(px > tr, main='presence/absence')
plot(wrld_simpl, add=TRUE, border='dark grey')
points(pres_train, pch='+')

COMPARACION DE MODELOS

Modelo bioclim

plot(e_1, 'ROC')

Modelo domain

plot(e_dm, 'ROC')

Modelo Maxent

plot(e_mx, 'ROC')

Mejor modelo:

Para la lepidoptera plaga (Spodoptera frugiperda), el modelo domain y maxent fueron los que presentaron mejores resultados en relacion al AUC, con un AUC= 0.982 para los dos modelos, esto se debe a la gran cantidad de datos sesgados en la evaluacion.

MDS spodoptera

Luis Castillo

2022-12-03

Preparación de datos de Spodoptera frugiperda

Limpieza de datos

Mejor modelo: