Load the relevant libraries

If you haven’t download any of the packages, you can use the install.packages() function in r.

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(sp) 
library(activity)
library(overlap)

## This is overlap devel version 0.3.4.
## For overview type ?overlap; for changes do news(p='overlap').

library(plyr)

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following object is masked from 'package:purrr':
## 
##     compact

Load the relevant dataframes

Here, these dataframes mentioned are as follows:

Independent captures of species
The forest integrity (FLII) covariate values
The guild information (diet and body mass) and IUCN conservation status of species
The common names of each species

caps = as.data.frame(read.csv("ECL captures summer spp projects_20210322.csv"))
mammal_guild = as.data.frame(read.csv("Species_info-20220512.csv"))
ebird_data = as.data.frame(read.csv("bird_guild_info_20221031_eBird.csv"))
birdlife_data = as.data.frame(read.csv("bird_guild_info_20221031_BirdLife.csv"))
birdtree_data = as.data.frame(read.csv("bird_guild_info_20221031_BirdTree.csv"))
common_names = as.data.frame(read.csv("species_names_for_sam_20220527.csv"))

Cleaning the captures dataframe

Here, we filter out unclear taxon, domesticated taxon and humans as well as correcting the species name of certain taxon.

sort(unique(caps$Species)) #Check the names of species present within the captures dataframe

##   [1] "Accipitridae_spp"           "Acridotheres_javanicus"    
##   [3] "Alcedinidae_spp"            "Alophoixus_phaeocephalus"  
##   [5] "Amaurornis_phoenicurus"     "Anthreptes_malacensis"     
##   [7] "Arborophila_charltonii"     "Arborophila_sp."           
##   [9] "Arctictis_binturong"        "Arctogalidia_trivirgata"   
##  [11] "Arctonyx_collaris"          "Argus_sp."                 
##  [13] "Argusianus_argus"           "Atherurus_macrourus"       
##  [15] "bat"                        "bat_bird?"                 
##  [17] "bird"                       "Bird_Pitta"                
##  [19] "Bos_gaurus"                 "Bos_javanicus"             
##  [21] "Bos_taurus"                 "Bubalus_bubalis"           
##  [23] "Bucerotidae_spp"            "Callosciurus_finlaysonii"  
##  [25] "Callosciurus_notatus"       "Callosciurus_prevostii"    
##  [27] "Canis_aureus"               "Canis_lupus_familiaris"    
##  [29] "Capricornis_milneedwardsii" "Capricornis_sp."           
##  [31] "Capricornis_sumatraensis"   "Carpococcyx_radiceus"      
##  [33] "Catopuma_badia"             "Catopuma_temminckii"       
##  [35] "Centropus_sinensis"         "Cervidae_spp"              
##  [37] "Chalcophaps_indica"         "Copsychus stricklandii"    
##  [39] "Copsychus_malabaricus"      "Copsychus_pyrropygus"      
##  [41] "Copsychus_saularis"         "Crested_fireback_pheasant" 
##  [43] "Crested_fireback_pheasent"  "Cuon_alpinus"              
##  [45] "Cynogale_bennettii"         "Dendronanthus_indicus"     
##  [47] "Dinopium_benghalense"       "Ducula_badia"              
##  [49] "Echinosorex_gymnura"        "Elephas_maximus"           
##  [51] "Elephas_maximus_Domestic"   "Enicurus_leschenaulti"     
##  [53] "Error"                      "Erythropitta_ussheri"      
##  [55] "Eupetes_macrocerus"         "Ficedula_zanthopygia"      
##  [57] "Gallus_gallus"              "Gallus_gallus_domesticus"  
##  [59] "Gallus_sp."                 "Garrulax_canorus"          
##  [61] "Garrulax_palliatus"         "Geokichla_interpres"       
##  [63] "Ghost"                      "Gracula_religiosa"         
##  [65] "Helarctos_malayanus"        "Hemigalus_derbyanus"       
##  [67] "Herpestes_brachyurus"       "Herpestes_javanicus"       
##  [69] "Herpestes_urva"             "Herpestidae_spp"           
##  [71] "Homo_sapiens"               "Homo_sapiens_hiker"        
##  [73] "Homo_sapiens_Poacher"       "Homo_sapiens_ranger"       
##  [75] "Homo_sapiens_researcher"    "Homo_sapiens_tourist"      
##  [77] "Hydrornis_baudii"           "Hydrornis_irena"           
##  [79] "Hystricidae_spp"            "Hystrix_brachyura"         
##  [81] "Hystrix_crassispinis"       "Insect"                    
##  [83] "Kenopia_striata"            "Lariscus_hosei"            
##  [85] "Lariscus_insignis"          "Leopoldamys_sabanus"       
##  [87] "Lizard"                     "Lophura_bulweri"           
##  [89] "Lophura_ignita"             "Lophura_rufa"              
##  [91] "Lutrinae?"                  "Macaca_arctoides"          
##  [93] "Macaca_fascicularis"        "Macaca_nemestrina"         
##  [95] "Macaca_sp."                 "Malacocincla_malaccensis"  
##  [97] "Malacopteron_affine"        "Mammalia?"                 
##  [99] "Manidae_spp"                "Manis_javanica"            
## [101] "Martes_flavigula"           "Maxomys_sp."               
## [103] "Motacilla_sp."              "Muntiacus_montanus"        
## [105] "Muntiacus_muntjak"          "Muntiacus_sp."             
## [107] "Muridae_spp"                "Mustela_nudipes"           
## [109] "Mydaus_marchei"             "Neofelis_nebulosa"         
## [111] "Nisaetus_alboniger"         "Nisaetus_cirrhatus"        
## [113] "Orthotomus_atrogularis"     "Paguma_larvata"            
## [115] "Panthera_pardus"            "Panthera_tigris"           
## [117] "Paradoxurus_hermaphroditus" "Pardofelis_marmorata"      
## [119] "Pellorneidae_spp"           "Pellorneum_capistratum"    
## [121] "Pellorneum_malaccense"      "Pernis_apivorus"           
## [123] "Phasianidae_spp"            "Pitta_sordida"             
## [125] "Polyplectron_malacense"     "Pongo_sp."                 
## [127] "Presbytis_melalophos"       "Presbytis_rubicunda"       
## [129] "Presbytis_siamensis"        "Presbytis_sp."             
## [131] "Prionailurus_bengalensis"   "Prionailurus_planiceps"    
## [133] "Prionodon_linsang"          "Rallina_fasciata"          
## [135] "Rattus_sp."                 "Rattus_tiomanicus"         
## [137] "Ratufa_bicolor"             "remove"                    
## [139] "Rhinosciurus_laticaudatus"  "Rhipidura_javanica"        
## [141] "Rhizomys_sumatrensis"       "Rodent"                    
## [143] "Rollulus_rouloul"           "Rusa_unicolor"             
## [145] "Scandentia?"                "Scincidae_spp"             
## [147] "Sciuridae_spp"              "Scorpion"                  
## [149] "SetUp_Collect"              "Small_mammal?"             
## [151] "Soricidae_spp"              "Spilornis_cheela"          
## [153] "Strigiformes"               "Suncus_murinus"            
## [155] "Sundasciurus_hippurus"      "Sus_barbatus"              
## [157] "Sus_scrofa"                 "Sus_scrofa_Domestic"       
## [159] "Symphalangus_syndactylus"   "Synoicus_chinensis"        
## [161] "Tamiops_sp."                "Tapirus_indicus"           
## [163] "Tarsiidae_spp"              "Trachypithecus_obscurus"   
## [165] "Tragulus_sp."               "Trichys_fasciculata"       
## [167] "Tupaia_glis"                "Tupaia_sp."                
## [169] "unknown"                    "Unknown"                   
## [171] "Ursidae_spp"                "Ursus_thibetanus"          
## [173] "Varanus_nebulosus"          "Varanus_rudicollis"        
## [175] "Varanus_salvator"           "Varanus_sp."               
## [177] "Viverra_megaspila"          "Viverra_tangalunga"        
## [179] "Viverra_zibetha"            "Viverricula_indica"        
## [181] "Zoothera_citrina"

#Change Hystricidae_spp to Hystrix_brachyura for sumatran surveys
caps$Species[caps$Species == "Hystricidae_spp" & caps$survey_id == "BBS"] = "Hystrix_brachyura"
caps$Species[caps$Species == "Hystricidae_spp" & caps$survey_id == "Kerinci"] = "Hystrix_brachyura"
caps$Species[caps$Species == "Hystricidae_spp" & caps$survey_id == "Leuser"] = "Hystrix_brachyura"

#Change Arctonyx collaris to Arctonyx hoevenii for sumatran surveys
caps$Species[caps$Species == "Arctonyx_collaris" & caps$survey_id == "BBS"] = "Arctonyx_hoevenii"
caps$Species[caps$Species == "Arctonyx_collaris" & caps$survey_id == "Kerinci"] = "Arctonyx_hoevenii"
caps$Species[caps$Species == "Arctonyx_collaris" & caps$survey_id == "Leuser"] = "Arctonyx_hoevenii"

#Change Lophura rufa to Lophura ignita for now. Lophura ignita will represent all crested fireback pheasants found in our sites. (*I have checked the body masses and guild info beforehand and I found that both were having same body masses and diet so such a change is fine in this case) 
caps$Species[caps$Species == "Lophura_rufa"] = "Lophura_ignita"

#Remove Danum2020 surveys (Covariates not calculated for this survey)
caps = filter(caps, !survey_id == "Danum_2020")

#Filter humans and unclear taxons
caps = caps %>% filter(!caps$Species %in% c('Accipitridae_spp', 'Alcedinidae_spp', 'Arborophila_sp.', 'Argus_sp.', 'bat', 'bat_bird?', 'bird', 'Bird_Pitta', 'Bucerotidae_spp', 'Capricornis_sp.', 'Cervidae_spp', 'Crested_fireback_pheasant', 'Crested_fireback_pheasent', 'Elephas_maximus_Domestic', 'Error', 'Gallus_gallus_domesticus', 'Gallus_sp.', 'Ghost', 'Herpestidae_spp', 'Homo_sapiens', 'Homo_sapiens_hiker', 'Homo_sapiens_Poacher', 'Homo_sapiens_ranger', 'Homo_sapiens_researcher', 'Homo_sapiens_tourist', 'Insect', 'Lizard', 'Lutrinae?', 'Macaca_sp.', 'Mammalia?', 'Manidae_spp', 'Maxomys_sp.', 'Motacilla_sp.', 'Muridae_spp', 'Pellorneidae_spp', 'Phasianidae_spp', 'Pongo_sp.', 'Presbytis_sp.', 'Rattus_sp.', 'remove', 'Rodent', 'Scandentia?', 'Scincidae_spp', 'Sciuridae_spp', 'Scorpion', 'SetUp_Collect', 'Small_mammal?', 'Soricidae_spp', 'Strigiformes', 'Sus_scrofa_Domestic', 'Tamiops_sp.', 'Tarsiidae_spp', 'Tupaia_sp.', 'unknown', 'Unknown', 'Ursidae_spp', 'Varanus_sp.', 'Muntiacus_sp.', 'Canis_lupus_familiaris', 'Bos_taurus'))

Removing rodents, treeshrews and birds less than 1 kg

Here, we attempted to remove rodents, treeshrews and birds less than 1 kg due to uncertainty in species identification in certain sites. We did this by incorporating the guild information mentioned above.

#Create a dataframe only containing bird captures (let's filter birds first!)
bird_caps = caps %>% filter(caps$Species %in% c('Acridotheres_javanicus', 'Alophoixus_phaeocephalus', 'Amaurornis_phoenicurus', 'Anthreptes_malacensis', 'Arborophila_charltonii', 'Argusianus_argus', 'Carpococcyx_radiceus', 'Centropus_sinensis', 'Chalcophaps_indica', 'Copsychus_malabaricus','Copsychus_pyrropygus', 'Copsychus_saularis', 'Dendronanthus_indicus', 'Dinopium_benghalense', 'Ducula_badia', 'Enicurus_leschenaulti', 'Erythropitta_ussheri', 'Eupetes_macrocerus', 'Ficedula_zanthopygia', 'Gallus_gallus', 'Garrulax_canorus', 'Garrulax_palliatus', 'Geokichla_interpres', 'Gracula_religiosa', 'Hydrornis_baudii', 'Hydrornis_irena', 'Kenopia_striata', 'Lophura_bulweri', 'Lophura_ignita', 'Malacopteron_affine', 'Nisaetus_alboniger', 'Nisaetus_cirrhatus', 'Orthotomus_atrogularis', 'Pellorneum_capistratum', 'Pellorneum_malaccense', 'Pernis_apivorus', 'Pitta_sordida', 'Polyplectron_malacense', 'Rallina_fasciata', 'Rhipidura_javanica', 'Rollulus_rouloul', 'Spilornis_cheela', 'Synoicus_chinensis', 'Zoothera_citrina'))

Accounting for all birds and their guild information

#Remove _ and replace with an actual space
bird_caps$Species <- gsub("_", " ", as.character(bird_caps$Species))

#Extract the body mass and trophic guild info from the birdlife metadata
names(birdlife_data)

##  [1] "Sequence"           "Species1"           "Family1"           
##  [4] "Order1"             "Avibase.ID1"        "Total.individuals" 
##  [7] "Female"             "Male"               "Unknown"           
## [10] "Complete.measures"  "Beak.Length_Culmen" "Beak.Length_Nares" 
## [13] "Beak.Width"         "Beak.Depth"         "Tarsus.Length"     
## [16] "Wing.Length"        "Kipps.Distance"     "Secondary1"        
## [19] "Hand.Wing.Index"    "Tail.Length"        "Mass"              
## [22] "Mass.Source"        "Mass.Refs.Other"    "Inference"         
## [25] "Traits.inferred"    "Reference.species"  "Habitat"           
## [28] "Habitat.Density"    "Migration"          "Trophic.Level"     
## [31] "Trophic.Niche"      "Primary.Lifestyle"  "Min.Latitude"      
## [34] "Max.Latitude"       "Centroid.Latitude"  "Centroid.Longitude"
## [37] "Range.Size"

birdlife_data = birdlife_data %>% 
  select('Species1', 'Mass', 'Trophic.Level') %>% 
  mutate(mass_kg = Mass/1000) %>% 
  select(-Mass)

colnames(birdlife_data) <- c('Species', 'Guild', 'Body_mass')

#Extract the body mass and trophic guild info from the birdtree metadata
names(birdtree_data)

##  [1] "Species3"           "Family3"            "Order3"            
##  [4] "Total.individuals"  "Female"             "Male"              
##  [7] "Unknown"            "Complete.measures"  "Beak.Length_Culmen"
## [10] "Beak.Length_Nares"  "Beak.Width"         "Beak.Depth"        
## [13] "Tarsus.Length"      "Wing.Length"        "Kipps.Distance"    
## [16] "Secondary1"         "Hand.Wing.Index"    "Tail.Length"       
## [19] "Mass"               "Mass.Source"        "Mass.Refs.Other"   
## [22] "Inference"          "Traits.inferred"    "Reference.species" 
## [25] "Habitat"            "Habitat.Density"    "Migration"         
## [28] "Trophic.Level"      "Trophic.Niche"      "Primary.Lifestyle" 
## [31] "Min.Latitude"       "Max.Latitude"       "Centroid.Latitude" 
## [34] "Centroid.Longitude" "Range.Size"         "Species.Status"

birdtree_data = birdtree_data %>% 
  select('Species3', 'Mass', 'Trophic.Level') %>% 
  mutate(mass_kg = Mass/1000) %>% 
  select(-Mass)

colnames(birdtree_data) <- c('Species', 'Guild', 'Body_mass')

#Extract the body mass and trophic guild info from the ebird metadata
names(ebird_data)

##  [1] "Species2"           "Family2"            "Order2"            
##  [4] "Avibase.ID2"        "Total.individuals"  "Female"            
##  [7] "Male"               "Unknown"            "Complete.measures" 
## [10] "Beak.Length_Culmen" "Beak.Length_Nares"  "Beak.Width"        
## [13] "Beak.Depth"         "Tarsus.Length"      "Wing.Length"       
## [16] "Kipps.Distance"     "Secondary1"         "Hand.Wing.Index"   
## [19] "Tail.Length"        "Mass"               "Mass.Source"       
## [22] "Mass.Refs.Other"    "Inference"          "Traits.inferred"   
## [25] "Reference.species"  "Habitat"            "Habitat.Density"   
## [28] "Migration"          "Trophic.Level"      "Trophic.Niche"     
## [31] "Primary.Lifestyle"

ebird_data = ebird_data %>% 
  select('Species2', 'Mass', 'Trophic.Level') %>% 
  mutate(mass_kg = Mass/1000) %>% 
  select(-Mass)

colnames(ebird_data) <- c('Species', 'Guild', 'Body_mass')

Here, we will compare each of the bird guild dataframes with our bird captures dataframe to see whether there are any bird species missing from the guild dataframes.

setdiff(bird_caps$Species, birdlife_data$Species)

## [1] "Copsychus malabaricus" "Pellorneum malaccense" "Copsychus pyrropygus" 
## [4] "Zoothera citrina"

setdiff(bird_caps$Species, birdtree_data$Species)

## [1] "Erythropitta ussheri"   "Pellorneum malaccense"  "Geokichla interpres"   
## [4] "Hydrornis baudii"       "Hydrornis irena"        "Carpococcyx radiceus"  
## [7] "Synoicus chinensis"     "Copsychus pyrropygus"   "Acridotheres javanicus"

setdiff(bird_caps$Species, ebird_data$Species)

## [1] "Arborophila charltonii" "Zoothera citrina"

So, it seems that the ebird_data dataframe has the fewest difference. We can extract the guild information of the two missing bird species, namely Arborophila charltonii and Zoothera citrina from the other two bird guild dataframes.

missing_bird <- birdlife_data %>% 
  filter(Species == "Arborophila charltonii")

missing_bird2 <- birdtree_data %>% 
  filter(Species == "Zoothera citrina")

ebird_data = rbind(ebird_data, missing_bird)

ebird_data = rbind(ebird_data, missing_bird2)

setdiff(bird_caps$Species, ebird_data$Species) #Nice, now everything is accounted for!

## character(0)

Since we don’t need these dataframes anymore, we can remove them and keep the environment clean.

rm(birdlife_data, birdtree_data, missing_bird, missing_bird2)

Now, we can remove bird species that are less than 1 kg

bird_caps = merge(bird_caps, ebird_data, by = "Species")

bird_caps = bird_caps %>% 
  filter(Body_mass > 1)

rm(ebird_data)

Accounting for all mammals and their guild information

#Create a mammal captures dataframe
mammal_caps = caps %>% filter(!caps$Species %in% c('Acridotheres_javanicus', 'Alophoixus_phaeocephalus', 'Amaurornis_phoenicurus', 'Anthreptes_malacensis', 'Arborophila_charltonii', 'Argusianus_argus', 'Carpococcyx_radiceus', 'Centropus_sinensis', 'Chalcophaps_indica', 'Copsychus_malabaricus',  'Copsychus_pyrropygus', 'Copsychus_saularis', 'Dendronanthus_indicus', 'Dinopium_benghalense', 'Ducula_badia', 'Enicurus_leschenaulti', 'Erythropitta_ussheri', 'Eupetes_macrocerus', 'Ficedula_zanthopygia', 'Gallus_gallus', 'Garrulax_canorus', 'Garrulax_palliatus', 'Geokichla_interpres', 'Gracula_religiosa', 'Hydrornis_baudii', 'Hydrornis_irena', 'Kenopia_striata', 'Lophura_bulweri', 'Lophura_ignita', 'Malacocincla_malaccensis', 'Malacopteron_affine', 'Nisaetus_alboniger', 'Nisaetus_cirrhatus', 'Orthotomus_atrogularis', 'Pellorneum_capistratum', 'Pellorneum_malaccense', 'Pernis_apivorus', 'Pitta_sordida', 'Polyplectron_malacense', 'Rallina_fasciata', 'Rhipidura_javanica', 'Rollulus_rouloul', 'Spilornis_cheela', 'Synoicus_chinensis', 'Zoothera_citrina', 'Bubalus_bubalis'))

Here, we will need to organise our mammal_guild dataframe so that we can merge with captures later. We also need to check whether all mammal guild information is accounted for.

mammal_guild = mammal_guild %>% 
  select('genus_species', 'Guild', 'Body_mass') %>% 
  mutate(Body_mass_kg = Body_mass/1000) %>% 
  select(-Body_mass)

colnames(mammal_guild) <- c('Species', 'Guild', 'Body_mass')

setdiff(mammal_caps$Species, mammal_guild$Species) #See whether all mammals are accounted for!

## character(0)

Now we can remove rodents and threeshrews that are less than 1 kg.

mammal_caps = merge(mammal_caps, mammal_guild, by = "Species")

sort(unique(mammal_caps$Species))

##  [1] "Arctictis_binturong"        "Arctogalidia_trivirgata"   
##  [3] "Arctonyx_collaris"          "Arctonyx_hoevenii"         
##  [5] "Atherurus_macrourus"        "Bos_gaurus"                
##  [7] "Callosciurus_finlaysonii"   "Callosciurus_notatus"      
##  [9] "Callosciurus_prevostii"     "Canis_aureus"              
## [11] "Capricornis_milneedwardsii" "Capricornis_sumatraensis"  
## [13] "Catopuma_temminckii"        "Cuon_alpinus"              
## [15] "Echinosorex_gymnura"        "Elephas_maximus"           
## [17] "Helarctos_malayanus"        "Hemigalus_derbyanus"       
## [19] "Herpestes_brachyurus"       "Herpestes_javanicus"       
## [21] "Herpestes_urva"             "Hystrix_brachyura"         
## [23] "Hystrix_crassispinis"       "Lariscus_hosei"            
## [25] "Lariscus_insignis"          "Leopoldamys_sabanus"       
## [27] "Macaca_arctoides"           "Macaca_fascicularis"       
## [29] "Macaca_nemestrina"          "Manis_javanica"            
## [31] "Martes_flavigula"           "Muntiacus_montanus"        
## [33] "Muntiacus_muntjak"          "Mustela_nudipes"           
## [35] "Mydaus_marchei"             "Neofelis_nebulosa"         
## [37] "Paguma_larvata"             "Panthera_pardus"           
## [39] "Panthera_tigris"            "Paradoxurus_hermaphroditus"
## [41] "Pardofelis_marmorata"       "Presbytis_melalophos"      
## [43] "Presbytis_rubicunda"        "Presbytis_siamensis"       
## [45] "Prionailurus_bengalensis"   "Prionailurus_planiceps"    
## [47] "Prionodon_linsang"          "Rattus_tiomanicus"         
## [49] "Ratufa_bicolor"             "Rhinosciurus_laticaudatus" 
## [51] "Rhizomys_sumatrensis"       "Rusa_unicolor"             
## [53] "Suncus_murinus"             "Sundasciurus_hippurus"     
## [55] "Sus_barbatus"               "Sus_scrofa"                
## [57] "Symphalangus_syndactylus"   "Tapirus_indicus"           
## [59] "Trachypithecus_obscurus"    "Tragulus_sp."              
## [61] "Trichys_fasciculata"        "Tupaia_glis"               
## [63] "Ursus_thibetanus"           "Varanus_nebulosus"         
## [65] "Varanus_salvator"           "Viverra_megaspila"         
## [67] "Viverra_tangalunga"         "Viverra_zibetha"           
## [69] "Viverricula_indica"

mammal_caps = mammal_caps %>% 
  filter(!Species %in% c('Callosciurus_finlaysonii', 'Callosciurus_notatus', 'Callosciurus_prevostii', 'Lariscus_hosei', 'Lariscus_insignis', 'Leopoldamys_sabanus', 'Rattus_tiomanicus', 'Rhinosciurus_laticaudatus', 'Sundasciurus_hippurus', 'Tupaia_glis'))

Now, we can merge both the bird and mammal captures dataset

caps <- rbind(mammal_caps, bird_caps)
rm(mammal_caps, bird_caps, mammal_guild) #Keep environment clean!

Accounting for all reptiles and their guild information

There is only two reptiles that are identifiable and frequent our cameras, namely the Asian water monitor (Varanus salvator) and the Clouded monitor (Varanus nebulosus). Their captures were already included within the mammal captures dataframe before combining with bird captures. Their guild information is derived from previously published journal articles rather than a database. Here are the respective links as follows:

caps$Guild[caps$Species %in% c('Varanus_salvator', 'Varanus_nebulosus')] = 'carnivore'
caps$Body_mass[caps$Species == 'Varanus_salvator'] = 19.5
caps$Body_mass[caps$Species == 'Varanus_nebulosus'] = 4.3

Accounting for daylength variation

Here, we will be using camera coordinates and Suntime () function to take into account of daylength variation. Since SEA is near the equator, it would not vary that much but its good practice to do this! The radian time produced is a relative value of both sunset and sunrise hours. For instance, if a certain camera location has a sunrise time of 0630 hr, the Suntime () function will regard it as 1.57 rad and vice versa for sunset time (4.71 rad).

Organizing camera coordinates for each camera trap survey

Since we are dealing with multiple surveys, there will be multiple time zones and therefore we need to split the dataset based on each survey’s respective time zones.

#Put back "_" for bird species to make it consistent throughout
caps$Species <- gsub(" ", "_", as.character(caps$Species))

#Load in the metadata which contains the camera coordinates
ECL_metadata = as.data.frame(read.csv("ECL and Collaborator Camera Trap Metadata_20220802.csv"))

#Check what columns are present
sort(names(ECL_metadata))

##  [1] "baited"                 "Budd_pop"               "Budd_pop_10km"         
##  [4] "Budd_pop_1km"           "Budd_pop_3km"           "Budd_pop_5km"          
##  [7] "camera_end.date"        "camera_id"              "camera_start.date"     
## [10] "camera_type"            "Canopy_closure"         "Canopy_Height"         
## [13] "Christ_pop"             "Christ_pop_10km"        "Christ_pop_1km"        
## [16] "Christ_pop_3km"         "Christ_pop_5km"         "degraded_percent_1000" 
## [19] "degraded_percent_10000" "degraded_percent_3000"  "degraded_percent_5000" 
## [22] "effort"                 "elevation"              "forest_integrity"      
## [25] "forest_percent_1000"    "forest_percent_10000"   "forest_percent_3000"   
## [28] "forest_percent_5000"    "Forest_type"            "habitat"               
## [31] "human_foot"             "Islam_pop"              "Islam_pop_10km"        
## [34] "Islam_pop_1km"          "Islam_pop_3km"          "Islam_pop_5km"         
## [37] "Landscape"              "Latitude"               "LDF_percent_1000"      
## [40] "LDF_percent_10000"      "LDF_percent_3000"       "LDF_percent_5000"      
## [43] "LEF_percent_1000"       "LEF_percent_10000"      "LEF_percent_3000"      
## [46] "LEF_percent_5000"       "LM_percent_1000"        "LM_percent_10000"      
## [49] "LM_percent_3000"        "LM_percent_5000"        "LMDF_percent_1000"     
## [52] "LMDF_percent_10000"     "LMDF_percent_3000"      "LMDF_percent_5000"     
## [55] "LMEF_percent_1000"      "LMEF_percent_10000"     "LMEF_percent_3000"     
## [58] "LMEF_percent_5000"      "LO_percent_1000"        "LO_percent_10000"      
## [61] "LO_percent_3000"        "LO_percent_5000"        "Longitude"             
## [64] "notes"                  "OP_percent_1000"        "OP_percent_10000"      
## [67] "OP_percent_3000"        "OP_percent_5000"        "RP_percent_1000"       
## [70] "RP_percent_10000"       "RP_percent_3000"        "RP_percent_5000"       
## [73] "source"                 "survey_id"              "Total_pop"             
## [76] "Total_pop_10km"         "Total_pop_1km"          "Total_pop_3km"         
## [79] "Total_pop_5km"          "trail_status"           "UB_percent_1000"       
## [82] "UB_percent_10000"       "UB_percent_3000"        "UB_percent_5000"       
## [85] "UMEF_percent_1000"      "UMEF_percent_10000"     "UMEF_percent_3000"     
## [88] "UMEF_percent_5000"      "UTM_zone"               "X"                     
## [91] "Y"                      "year"

#Check the Landscape column 
sort(unique(ECL_metadata$Landscape))

##  [1] "Bukit_Barisan_Selatan_National_Park" "Bulong_Reserve_Bulangshan_section"  
##  [3] "Bulong_Reserve_Mengsong_section"     "Danum_Valley_Conservation_Area"     
##  [5] "Dong_Yai_Wildlife_Sanctuary"         "Gunung_Leuser_National_Park"        
##  [7] "Halmahera_Weda"                      "Htaung_Pru_Reserved_Forest"         
##  [9] "Huai_Kha_Khaeng_Wildlife_Sanctuary"  "Kerinci_Seblat_National_Park"       
## [11] "Khao_Banthat_Wildlife_Sanctuary"     "Khao_Yai_National_Park"             
## [13] "Khlong_Saeng_Wildlife_Sanctuary"     "Kon_Plong_Vietnam"                  
## [15] "Lambir_Hills_National_Park"          "Mengao_Subreserve"                  
## [17] "Mengla_Subreserve"                   "Menglun_Subreserve"                 
## [19] "Myinmoletkat_Taung_Foothills"        "Nabanhe_National_Reserve"           
## [21] "Nakai_Nam_Theun_Protected_Area"      "Nam_Kading_National_Protected_Area" 
## [23] "New_Guinea_Arfak_Mts"                "New_Guinea_Nimbokrang"              
## [25] "Pang_Sida_National_Park"             "Pasoh_Forest_Reserve"               
## [27] "Sabah_Maliau"                        "Sabah_Ulu_Padas"                    
## [29] "Sarawak_Hose_mtns"                   "Sarawak_Mulu"                       
## [31] "Sarawak_Pulong_Tau"                  "Sarawak_Ulu_Baram"                  
## [33] "Sarawak_Ulu_Trusan"                  "Singapore"                          
## [35] "Sulawesi_Buton_north"                "Sulawesi_Buton_south"               
## [37] "Sulawesi_Tangkoko"                   "Ta_Phraya_National_Park"            
## [39] "Thap_Lan_National_Park"              "Ulu_Muda_Forest_Reserve"

#Select the columns we need and filter out the countries we do not need
ECL_metadata = ECL_metadata %>% 
  select(camera_id, survey_id, Landscape, Longitude, Latitude) %>% 
  filter(Landscape %in% c('Bukit_Barisan_Selatan_National_Park', 'Danum_Valley_Conservation_Area', 'Gunung_Leuser_National_Park', 'Khao_Yai_National_Park', 'Lambir_Hills_National_Park', 'Singapore', 'Ulu_Muda_Forest_Reserve', 'Pasoh_Forest_Reserve', 'Kerinci_Seblat_National_Park'))

After sorting through the metadata, we are still missing camera coordinates from the ECL survey done in Khao Chong in 2018. Here, we will incorporate these missing coordinates from another metadata file.

#Load in the second metadata file
ECL_metadata_2 = as.data.frame(read.csv("Full_Metadata_by_Camera.csv"))

#Check what columns are present
names(ECL_metadata_2)

##  [1] "X"                    "survey_id"            "region"              
##  [4] "country"              "site"                 "effort"              
##  [7] "size_km2"             "Protected_area"       "Y_lat"               
## [10] "X_long"               "logging"              "logging_obs"         
## [13] "edge_1km"             "year_start"           "year_end"            
## [16] "monthstart"           "monthfinish"          "n_points"            
## [19] "study_id"             "location_id"          "n_cameras"           
## [22] "cam_spacing"          "area_cover_km2"       "indent_cap_mins"     
## [25] "AltitudeMin"          "AltitudeMax"          "forest_type"         
## [28] "study_notes"          "veg_notes"            "study_author"        
## [31] "fragment_id"          "camera_id"            "Source"              
## [34] "debug"                "Elevation"            "Slope"               
## [37] "AnnualPrecipitation"  "ForestCover250m"      "ForestCover500m"     
## [40] "ForestCover1K"        "ForestCover2K"        "ForestCover3K"       
## [43] "ForestCover5K"        "ForestCover10K"       "forest_integrity250m"
## [46] "forest_integrity500m" "forest_integrity1K"   "forest_integrity2K"  
## [49] "forest_integrity3K"   "forest_integrity5K"   "forest_integrity10K" 
## [52] "Intactness_index250m" "Intactness_index500m" "Intactness_index1K"  
## [55] "Intactness_index2K"   "Intactness_index3K"   "Intactness_index5K"  
## [58] "Intactness_index10K"  "HumanPop1K"           "HumanPop5K"          
## [61] "HumanPop10K"          "NightLights250m"      "NightLights500m"     
## [64] "NightLights1K"        "NightLights2K"        "NightLights3K"       
## [67] "NightLights5K"        "NightLights10K"       "MinNightLights250m"  
## [70] "MinNightLights500m"   "MinNightLights1K"     "MinNightLights2K"    
## [73] "MinNightLights3K"     "MinNightLights5K"     "MinNightLights10K"   
## [76] "MaxNightLights250m"   "MaxNightLights500m"   "MaxNightLights1K"    
## [79] "MaxNightLights2K"     "MaxNightLights3K"     "MaxNightLights5K"    
## [82] "MaxNightLights10K"    "HumanFootprint1K"     "HumanFootprint2K"    
## [85] "HumanFootprint3K"     "HumanFootprint5K"     "HumanFootprint10K"   
## [88] "Budd_Pop_1km"         "Christ_Pop_1km"       "Islam_Pop_1km"       
## [91] "OilPalm1K"            "OilPalm2K"            "OilPalm3K"           
## [94] "OilPalm5K"            "OilPalm10K"

#Select columns that we need and only obtain the data for KhaoChong2018 survey
ECL_metadata_2 = ECL_metadata_2 %>% 
  select(camera_id, survey_id, Y_lat, X_long) %>% 
  filter(survey_id == 'KhaoChong2018.ECL')

#Rename the survey_id and camera_id to match those found in the ECL_metadata dataframe
ECL_metadata_2 = ECL_metadata_2 %>% 
  mutate(survey_id = str_remove_all(survey_id, ".ECL")) %>% 
  mutate(camera_id = str_remove_all(camera_id, "KhaoChong2018.ECL.")) %>% 
  select(camera_id, survey_id, Y_lat, X_long)

#Remove Landscape column from the ECL_metadata dataframe so that we can rbind both metadatas together
ECL_metadata = ECL_metadata %>% 
  select(-Landscape)

#Change column names of ECL_metadata_2 to match those of ECL_metadata
colnames(ECL_metadata_2) = c("camera_id", "survey_id", "Latitude", "Longitude")

#rbind both metadatas
ECL_metadata <- rbind(ECL_metadata, ECL_metadata_2)

#Remove ECL_metadata_2 to keep environment clean
rm(ECL_metadata_2)

Before proceeding to next part, let us compare both the captures dataframe and the ECL_metadata dataframe to check whether all cameras are accounted for.

setdiff(caps$camera_id, ECL_metadata$camera_id) #Check whether all cameras are accounted for

## [1] "SR5_sorted"  "LM198_CAM35" "LM201-CAM68" "C13_sorted"  "SR9_sorted"

Unfortunately, five camera coordinates are still missing, namely “SR5_sorted”, “LM201-CAM68”, “LM198_CAM35”, “C13_sorted” and “SR9_sorted”. However, we have asked our collaborator, Mr. Jonathan Moore, who had assisted in setting up those cameras to provide the coordinates.

#Load in last remaining coordinates given by Jon and combined it with ECL_metadata dataframe
ECL_metadata_3 <- as.data.frame(read.csv("ECL_missing_coordinates_20221103.csv"))
ECL_metadata <- rbind(ECL_metadata, ECL_metadata_3)

#Lets remove ECL_metadata_3 to keep environment clean
rm(ECL_metadata_3)

Now, let us do a final check to see whether all cameras have been accounted for.

setdiff(caps$camera_id, ECL_metadata$camera_id) #All good!

## character(0)

Now, we will merge the ECL_metadata dataframe with the captures dataframe.

#Remove survey_id from ECL_metadata as the names do not match those in the captures dataframe
ECL_metadata <- ECL_metadata %>% 
  select(-survey_id)

#Merge captures and camera coordinates into one single dataset
caps <- merge(caps, ECL_metadata, by = 'camera_id')

Using the SunTime () function

Thailand surveys

#Select Thailand surveys
thai <-  filter(caps, survey_id %in% c('KhaoYai2019', 'KhaoChong2018'))

#create a vector containing time in radians which will be used in circular analyses later on
time <- gettime(thai$Photo.Time, format = "%H:%M:%S", scale = c("radian"))

#Create a vector containing date in the correct format
date <- as.POSIXct(thai$Photo.Date, tz= "Asia/Bangkok", format = '%d/%m/%Y')

# Create a SpatialPoints object with the location
coords <- data.frame(thai$Longitude,thai$Latitude)
coords2 <- sp::SpatialPoints(coords, proj4string = sp::CRS("+epsg=4087 +proj=longlat +datum=WGS84"))

#Correct for sunrise and sunset time based on lat and long
st <- sunTime(time, date, coords2)
 
#Merge it with main thailand dataset
thai$time.rad <- st

#Keep environment clean!
rm(coords,coords2,date,time,st)

Now, we do the same for all the other surveys (i.e., Malaysia, Sumatra, Sinagpore)

Malaysian and Singapore surveys

#Select Malaysian and Singapore surveys 
my_sg <-  filter(caps, survey_id %in% c('Danum_Valley_2019a', 'Danum2018', 'Lambir2017', 'Pasoh_TEAM_2013', 'Pasoh_TEAM_2014', 'Pasoh_TEAM_2015', 'Pasoh_TEAM_2017', 'Singapore', 'Ulu_Muda_2015a', 'Ulu_Muda_2015b',
'Ulu_Muda_2015c', 'Ulu_Muda_2015d', 'Ulu_Muda_2016a', 'Ulu_Muda_2016b', 'Ulu_Muda_2016c'))

#create a vector containing time in radians which will be used in circular analyses later on
time <- gettime(my_sg$Photo.Time, format = "%H:%M:%S", scale = c("radian")) 

#Create a vector containing date in the correct format
date <- as.POSIXct(my_sg$Photo.Date, tz= "Asia/Singapore", format = '%d/%m/%Y')

# Create a SpatialPoints object with the location
coords <- data.frame(my_sg$Longitude,my_sg$Latitude)
coords2 <- sp::SpatialPoints(coords, proj4string = sp::CRS("+epsg=4087 +proj=longlat +datum=WGS84"))

#Correct for sunrise and sunset time based on lat and long
st <- sunTime(time, date, coords2)

#Merge it with main peninsular and singapore dataset
my_sg$time.rad <- st

#Keep environment clean!
rm(coords,coords2,date,time,st)

Sumatran surveys

#Select sumatran surveys
sum <- filter(caps, survey_id %in% c('BBS', 'Kerinci', 'Leuser'))

#create a vector containing time in radians which will be used in circular analyses later on
time <- gettime(sum$Photo.Time, format = "%H:%M:%S", scale = c("radian")) 

#Create a vector containing date in the correct format
date <- as.POSIXct(sum$Photo.Date, tz= "Asia/Jakarta", format = '%d/%m/%Y')

# Create a SpatialPoints object with the location
coords <- data.frame(sum$Longitude,sum$Latitude)
coords2 <- sp::SpatialPoints(coords, proj4string = sp::CRS("+epsg=4087 +proj=longlat +datum=WGS84"))

#Correct for sunrise and sunset time based on lat and long
st <- sunTime(time, date, coords2)

#Merge it with main sumatra dataset
sum$time.rad <- st

#Keep environment clean!
rm(coords,coords2,date,time,st)

Now, let us merge all the respective time zones together!

#Rbind the datasets together
caps <- rbind(my_sg, thai)
caps <- rbind(caps, sum)

#Keep environment clean!
rm(my_sg, sum, thai, ECL_metadata)

Including the disturbance proxy: Forest Landscape Integrity Index (FLII)

Here, we will include the forest landscape integrity index (FLII) at each of our camera locations. The FLII values can be extracted from the FLII database

#Load in covariate dataset
covs <- as.data.frame(read.csv("ECL_metadata_cam_level_summer_spp_20210322.csv"))
names(covs)

##  [1] "camera_id"             "elevation"             "dist_to_edge"         
##  [4] "human_pop_density_1km" "dist_to_river"         "forest_integrity"     
##  [7] "human_footprint"       "forest_cover_1km"      "forest_cover_2km"     
## [10] "degraded_forest_1km"   "degraded_forest_2km"   "oil_palm_1km"         
## [13] "oil_palm_2km"          "forest_loss_1km"       "forest_loss_2km"      
## [16] "survey_id"

head(covs)

##   camera_id elevation dist_to_edge human_pop_density_1km dist_to_river
## 1 01_sorted       136    1026.1716                 0.000      2.586365
## 2 02_sorted       267     513.7002               337.502    560.858992
## 3 03_sorted       570    2708.0205                 0.000     89.424243
## 4 04_sorted       428    1070.5300                 0.000    187.522370
## 5 05_sorted       543    1605.1846                 0.000    420.510052
## 6 06_sorted       282     263.1455                 0.000    426.342843
##   forest_integrity human_footprint forest_cover_1km forest_cover_2km
## 1          8.91407              10         98.00000         93.53234
## 2          7.37650              14         74.00000         63.00000
## 3          9.23420               7        100.00000        100.00000
## 4          8.33995              12        100.00000         88.46154
## 5          8.69985               7        100.00000         94.11765
## 6          7.42806              16         67.30769         66.34615
##   degraded_forest_1km degraded_forest_2km oil_palm_1km oil_palm_2km
## 1             2.00000            6.467662            0            0
## 2            26.00000           37.000000            0            0
## 3             0.00000            0.000000            0            0
## 4             0.00000           11.538462            0            0
## 5             0.00000            5.882353            0            0
## 6            32.69231           33.653846            0            0
##   forest_loss_1km forest_loss_2km     survey_id
## 1      0.09876543      0.04938576 KhaoChong2018
## 2      1.48735746      3.95990595 KhaoChong2018
## 3      0.07398274      0.06175889 KhaoChong2018
## 4      0.17335314      0.87871287 KhaoChong2018
## 5      0.29607698      1.22831924 KhaoChong2018
## 6      2.37859267      2.77193417 KhaoChong2018

#Filter out other covariates and only include FLII
covs <- covs %>% select(camera_id, survey_id, forest_integrity)

#Include a FLII_status column to differentiate between degraded and intact camera locations
covs$FLII_status <- "Degraded"
covs$FLII_status[covs$forest_integrity > median(covs$forest_integrity)] = "Intact"

#Check if all cameras are accounted for
setdiff(caps$camera_id, covs$camera_id)

## character(0)

setdiff(covs$camera_id, caps$camera_id) #All the cameras in captures are accounted for so all good!

##  [1] "01_sorted"           "111_sorted"          "113_2017"           
##  [4] "322_2013"            "BBS_02A_SE_CAM_47"   "DVCA-19"            
##  [7] "KYCAM235_sorted"     "Leuser_16B_NW_SD_33" "LM28-CAM86"         
## [10] "LM29-CAM82"          "S_SIL10"             "S_SIL3"             
## [13] "S_SIL6"              "SR4_sorted"          "TB1"                
## [16] "TB4"                 "TB8"                 "F78_2016b"

#Merge both captures and FLII
caps <- merge(caps, covs, by = c('camera_id', 'survey_id'))

#Keep environment clean!
rm(covs)

#Create a dataset for community and guild-level analyses

Here, we will create the dataset for the community and guild-level analyses. We will also be correcting some of the feeding guilds info of certain species as more recent research have provide further insight into their diets. Finally, we will determine the trophic guild of each species based on diet (i.e., carnivore, herbivore and omnivore) and body mass (i.e., small (<4kg), medium (4-20kg), large (>20kg))

#Standardise feeding guild column
sort(unique(caps$Guild))

## [1] "carnivore"          "Carnivore"          "frugivore"         
## [4] "herbivore"          "Herbivore"          "herbivore?"        
## [7] "insectivore"        "omnivore"           "picivore_carnivore"

caps$Guild[caps$Guild == 'frugivore'] = 'herbivore'
caps$Guild[caps$Guild == 'insectivore'] = 'omnivore'
caps$Guild[caps$Guild == 'picivore_carnivore'] = 'carnivore'
caps$Guild[caps$Guild == 'Carnivore'] = 'carnivore'
caps$Guild[caps$Guild == 'Herbivore'] = 'herbivore'
caps$Guild[caps$Guild == 'herbivore?'] = 'herbivore'

#Change the feeding guild of macaques to omnivores
caps$Guild[caps$Species %in% c('Macaca_nemestrina', 'Macaca_arctoides')] = 'omnivore'

#Change the feeding guild of Bulwer's pheasant to omnivore
caps$Guild[caps$Species == 'Lophura_bulweri'] = 'omnivore'

Now, we will begin to determine the trophic guild of each species.

#Create a column called 'trophic_guild'
caps$trophic_guild = 'NA'

#Carnivore guilds
caps$trophic_guild[caps$Guild == 'carnivore' & caps$Body_mass < 4] = 'small_carnivore'
caps$trophic_guild[caps$Guild == 'carnivore' & caps$Body_mass >= 4 & caps$Body_mass <= 20] = 'medium_carnivore'
caps$trophic_guild[caps$Guild == 'carnivore' & caps$Body_mass > 20] = 'large_carnivore'

#Herbivore guilds
caps$trophic_guild[caps$Guild == 'herbivore' & caps$Body_mass < 4] = 'small_herbivore'
caps$trophic_guild[caps$Guild == 'herbivore' & caps$Body_mass >= 4 & caps$Body_mass <= 20] = 'medium_herbivore'
caps$trophic_guild[caps$Guild == 'herbivore' & caps$Body_mass > 20] = 'large_herbivore'

#Omnivore guilds
caps$trophic_guild[caps$Guild == 'omnivore' & caps$Body_mass < 4] = 'small_omnivore'
caps$trophic_guild[caps$Guild == 'omnivore' & caps$Body_mass >= 4 & caps$Body_mass <= 20] = 'medium_omnivore'
caps$trophic_guild[caps$Guild == 'omnivore' & caps$Body_mass > 20] = 'large_omnivore'

Finally, let’s save this dataset for our analyses later.

#Select only the columns that we need
names(caps)

##  [1] "camera_id"         "survey_id"         "Species"          
##  [4] "camera_start.date" "camera_end.date"   "Photo.Date"       
##  [7] "Photo.Time"        "Individuals"       "Guild"            
## [10] "Body_mass"         "Longitude"         "Latitude"         
## [13] "time.rad"          "forest_integrity"  "FLII_status"      
## [16] "trophic_guild"

comm_guild <- select(caps, c('camera_id', 'survey_id', 'Species', 'time.rad', 'FLII_status', 'trophic_guild'))

#Save it!
write.csv(comm_guild, 'SEA_Activity_dataset_for_community_guild-level_analyses_20230306.csv', row.names = F)

Create a supplementary table for our community and guild analyses

Here, we will create a supplementary table consisting of sample size and guild information of all species involved in our community- and guild-level analyses. But first, let’s include the total sample size of each species and the sample size of each species in both intact and degraded forests.

#Calculate the total sample size and the sample size within each forest type
comm_guild_supp <- caps %>%  
  select(Species, FLII_status) %>% 
  group_by(FLII_status) %>% 
  dplyr::count(Species, name = 'total_detections') %>% 
  pivot_wider(values_from = total_detections, names_from = FLII_status)

#Replace NA with 0
comm_guild_supp[is.na(comm_guild_supp)] <- 0

#Calculate total detections
comm_guild_supp <- comm_guild_supp %>% mutate(total_detection = Degraded + Intact)

Next, let’s include the common_names of each species.

#First, let's sort the common_names dataframe to better merge it with the comm_guild dataframe later
colnames(common_names) <- c('common_name', 'Species')#Change the column names so that we can easily merge them!

#Remove duplicate names
common_names <- common_names[!duplicated(common_names$Species),]

#Check which species are missing from the common_names dataframe
setdiff(caps$Species, common_names$Species)# We are missing Ratufa_bicolor and Arctonyx_hoevenii

## [1] "Ratufa_bicolor"    "Arctonyx_hoevenii"

#Create a new dataframe to address missing names by binding it later
missing_names <- data.frame(common_name = c("Black_giant_squirrel",  "Sumatran_hog_badger"), 
                            Species = c("Ratufa_bicolor", "Arctonyx_hoevenii")) #Address missing names!

#Bind it to the common_names dataframe
common_names <- rbind(common_names, missing_names) 

#Merge the common_name dataframe with the comm_guild dataframe
comm_guild_supp <- merge(comm_guild_supp, common_names, by = 'Species')

#Keep environment clean!
rm(missing_names)

To be more thorough, let’s also include the guild information, conservation status and the number of landscapes detected for each species. We will first extract the guild information from our captures dataset.

#Extract guild info from the captures dataset
guild_info <- select(caps, c('Species', 'Guild', 'Body_mass', 'trophic_guild'))

#Merge it with the comm_guild_supp dataset
comm_guild_supp <- merge(comm_guild_supp, guild_info, by = 'Species')

#Remove duplicate lines
comm_guild_supp <- comm_guild_supp[!duplicated(comm_guild_supp$Species),]

#Keep environment clean
rm(guild_info)

Next, let’s include the conservation status of each species.

#Load in the conservation status info dataset into r
iucn <- as.data.frame(read.csv('Species_info-20220512.csv'))
names(iucn)

## [1] "genus_species" "Type"          "Family"        "Genus"        
## [5] "species"       "IUCN"          "Guild"         "Notes"        
## [9] "Body_mass"

#Select only the columns that we need 
iucn <- select(iucn, c('genus_species', 'IUCN'))

#Check if all species are accounted for
setdiff(comm_guild_supp$Species, iucn$genus_species) #All good!

## character(0)

#Change the column names of iucn dataset for merging later
colnames(iucn) <- c('Species', 'IUCN')

#Merge both datasets
comm_guild_supp <- merge(comm_guild_supp, iucn, by = 'Species')

#There is NAs in the IUCN column. Let's address them!
comm_guild_supp$IUCN[comm_guild_supp$Species == 'Lophura_bulweri'] = 'VU'
comm_guild_supp$IUCN[comm_guild_supp$Species == 'Nisaetus_cirrhatus'] = 'LC'
comm_guild_supp$IUCN[comm_guild_supp$Species == 'Tragulus_sp.'] = 'LC'
comm_guild_supp$IUCN[comm_guild_supp$Species == 'Trichys_fasciculata'] = 'LC'
comm_guild_supp$IUCN[comm_guild_supp$Species == 'Varanus_nebulosus'] = 'NT'
comm_guild_supp$IUCN[comm_guild_supp$Species == 'Varanus_salvator'] = 'LC'
#If you want to check whether your dataset have any NAs, you can use the is.na () function

#Keep environment clean!
rm(iucn)

Here, we will include the number of landscapes detected for each species.

#First, let's create a capture dataset with landscapes
land <- select(caps, c('survey_id', 'Species'))
land$landscape = 'NA'
sort(unique(land$survey_id))

##  [1] "BBS"                "Danum_Valley_2019a" "Danum2018"         
##  [4] "Kerinci"            "KhaoChong2018"      "KhaoYai2019"       
##  [7] "Lambir2017"         "Leuser"             "Pasoh_TEAM_2013"   
## [10] "Pasoh_TEAM_2014"    "Pasoh_TEAM_2015"    "Pasoh_TEAM_2017"   
## [13] "Singapore"          "Ulu_Muda_2015a"     "Ulu_Muda_2015b"    
## [16] "Ulu_Muda_2015c"     "Ulu_Muda_2015d"     "Ulu_Muda_2016a"    
## [19] "Ulu_Muda_2016b"     "Ulu_Muda_2016c"

land$landscape[land$survey_id %in% c('Pasoh_TEAM_2013', 'Pasoh_TEAM_2014', 'Pasoh_TEAM_2015', 'Pasoh_TEAM_2017')] = 'Pasoh_forest_reserve'
land$landscape[land$survey_id %in% c('Ulu_Muda_2015a', 'Ulu_Muda_2015b', 'Ulu_Muda_2016b', 'Ulu_Muda_2016c','Ulu_Muda_2016a', 'Ulu_Muda_2015c', 'Ulu_Muda_2015d')] = 'Ulu_Muda_forest_reserve'
land$landscape[land$survey_id %in% c('Danum_Valley_2019a', 'Danum2018')] = 'Danum_Valley'
land$landscape[land$survey_id == 'BBS'] = 'Bukit_barisan_selatan'
land$landscape[land$survey_id == 'Kerinci'] = 'Kerinci_seblat'
land$landscape[land$survey_id == 'Leuser'] = 'Gunung_leuser'
land$landscape[land$survey_id == 'Lambir2017'] = 'Lambir_hills'
land$landscape[land$survey_id == 'KhaoChong2018'] = 'Khao_Chong'
land$landscape[land$survey_id == 'KhaoYai2019'] = 'Khao_Yai'

#Check the number of landscapes present (*There should be 10 landscapes)
sort(unique(land$landscape))

##  [1] "Bukit_barisan_selatan"   "Danum_Valley"           
##  [3] "Gunung_leuser"           "Kerinci_seblat"         
##  [5] "Khao_Chong"              "Khao_Yai"               
##  [7] "Lambir_hills"            "NA"                     
##  [9] "Pasoh_forest_reserve"    "Ulu_Muda_forest_reserve"

#We will use the ddply in the plyr package to calculate the number of landscapes detected
landscape_detected <- ddply(land, .(Species), summarize, landscape_detected = length(unique(landscape)))

#Merge it with the comm_guild_supp dataset
comm_guild_supp <- merge(comm_guild_supp, landscape_detected, by = 'Species')

#Keep environment clean!
rm(land, landscape_detected)

Finally, we can save the dataframe.

write.csv(comm_guild_supp, 'SEA_Activity_community_guild-level_supplementary_20230307.csv', row.names = F)

Create a dataset for species-level temporal and species overlap analyses

Now, for species-level temporal analyses, we will need to select species with >= 20 detections in both intact and degraded forests. This is to prevent zero errors and provide accuracy to our analyses. First, we will check which species is going to be selected by using the comm_guild_supp dataframe from before.

#Lets filter out species that have < 20 detections in both intact and degraded forests
sp_supp = filter(comm_guild_supp, Degraded >= 20, Intact >= 20)

#Save this for the species-level
write.csv(sp_supp, 'SEA_Activity_species-level_supplementary_20230307.csv', row.names = F)

Now, we can create a dataframe for our species-level analyses

#Check which species are present
sort(unique(sp_supp$Species))

##  [1] "Argusianus_argus"           "Atherurus_macrourus"       
##  [3] "Echinosorex_gymnura"        "Helarctos_malayanus"       
##  [5] "Hemigalus_derbyanus"        "Hystrix_brachyura"         
##  [7] "Hystrix_crassispinis"       "Lophura_ignita"            
##  [9] "Macaca_nemestrina"          "Martes_flavigula"          
## [11] "Muntiacus_muntjak"          "Neofelis_nebulosa"         
## [13] "Paguma_larvata"             "Panthera_tigris"           
## [15] "Paradoxurus_hermaphroditus" "Prionailurus_bengalensis"  
## [17] "Prionodon_linsang"          "Rusa_unicolor"             
## [19] "Sus_barbatus"               "Sus_scrofa"                
## [21] "Tapirus_indicus"            "Tragulus_sp."              
## [23] "Trichys_fasciculata"

#Select the species that meets the requirement
sp <- filter(comm_guild, Species %in% c('Argusianus_argus', 'Atherurus_macrourus', 'Echinosorex_gymnura', 'Helarctos_malayanus', 'Hemigalus_derbyanus', 'Hystrix_brachyura', 'Hystrix_crassispinis', 'Lophura_ignita', 'Macaca_nemestrina', 'Martes_flavigula', 'Muntiacus_muntjak', 'Neofelis_nebulosa', 'Paguma_larvata', 'Panthera_tigris', 'Paradoxurus_hermaphroditus', 'Prionailurus_bengalensis', 'Prionodon_linsang', 'Rusa_unicolor', 'Sus_barbatus', 'Sus_scrofa', 'Tapirus_indicus', 'Tragulus_sp.', 'Trichys_fasciculata'))

#Let's save this as our species-level dataset
write.csv(sp, 'SEA_Activity_dataset_for_species-level_analyses_20230307.csv', row.names = F)

#Keep environment clean
rm(comm_guild, comm_guild_supp, sp, sp_supp)

In conclusion, after filtering all the unwanted species, we are left with 31138 total detections, encompassing 63 different species. For our species-level analyses, we are left with 29879 total detections, encompassing over 23 different species. Now, we can begin to analyse the respective datasets.

Dataset Preparation for Temporal Activity Analyses

Samuel Xin Tham Lee

2023-02-28