RAI

1 Step 1 Load the data into R

deployment <- read.csv("deployments.csv",
                 header = TRUE, 
                 sep = ",", 
                 stringsAsFactors = FALSE)
camtraps <- read.csv("images.csv", 
                 header = TRUE, 
                 sep = ",", 
                 stringsAsFactors = FALSE)

2 Step 2 Ensure necessary packages are installed

# creates a list of packages needed for the following analysis
list.of.packages <- c(
                      "leaflet",       
                      "plotly",          
                      "kableExtra",    
                      "tidyr",         
                      "dplyr",         
                      "viridis",      
                      "corrplot",      
                      "lubridate",    
                      "taxize",        
                      "sf",           
                      "reshape",       
                      "vegan",         
                      "plotrix",      
                      "chron",         
                      "knitr")        
                              

# uses the previously created vector and checks the current R project if these packages have been installed in not it installs and loads them if it does contain these packages it just loads them from the library

new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages,repos = "http://cran.us.r-project.org")

Installing package into 'C:/Users/Owen King/AppData/Local/R/win-library/4.4'
(as 'lib' is unspecified)

Warning: package 'taxize' is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

lapply(list.of.packages, require, character.only = TRUE)

Loading required package: leaflet

Warning: package 'leaflet' was built under R version 4.4.2

Loading required package: plotly

Warning: package 'plotly' was built under R version 4.4.2

Loading required package: ggplot2


Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

Loading required package: kableExtra

Warning: package 'kableExtra' was built under R version 4.4.2

Loading required package: tidyr

Loading required package: dplyr

Warning: package 'dplyr' was built under R version 4.4.2


Attaching package: 'dplyr'

The following object is masked from 'package:kableExtra':

    group_rows

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Loading required package: viridis

Warning: package 'viridis' was built under R version 4.4.2

Loading required package: viridisLite

Loading required package: corrplot

Warning: package 'corrplot' was built under R version 4.4.2

corrplot 0.95 loaded

Loading required package: lubridate


Attaching package: 'lubridate'

The following objects are masked from 'package:base':

    date, intersect, setdiff, union

Loading required package: taxize

Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
logical.return = TRUE, : there is no package called 'taxize'

Loading required package: sf

Warning: package 'sf' was built under R version 4.4.2

Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE

Loading required package: reshape

Warning: package 'reshape' was built under R version 4.4.2


Attaching package: 'reshape'

The following object is masked from 'package:lubridate':

    stamp

The following object is masked from 'package:dplyr':

    rename

The following objects are masked from 'package:tidyr':

    expand, smiths

The following object is masked from 'package:plotly':

    rename

Loading required package: vegan

Warning: package 'vegan' was built under R version 4.4.2

Loading required package: permute

Warning: package 'permute' was built under R version 4.4.2

Loading required package: lattice

This is vegan 2.6-8

Loading required package: plotrix

Loading required package: chron

Warning: package 'chron' was built under R version 4.4.2


Attaching package: 'chron'

The following objects are masked from 'package:lubridate':

    days, hours, minutes, seconds, years

Loading required package: knitr

[[1]]
[1] TRUE

[[2]]
[1] TRUE

[[3]]
[1] TRUE

[[4]]
[1] TRUE

[[5]]
[1] TRUE

[[6]]
[1] TRUE

[[7]]
[1] TRUE

[[8]]
[1] TRUE

[[9]]
[1] FALSE

[[10]]
[1] TRUE

[[11]]
[1] TRUE

[[12]]
[1] TRUE

[[13]]
[1] TRUE

[[14]]
[1] TRUE

[[15]]
[1] TRUE

3 Step 3 check the data is correct

#displays variables within the data set
names(deployment)

 [1] "project_id"                 "deployment_id"             
 [3] "longitude"                  "latitude"                  
 [5] "start_date"                 "end_date"                  
 [7] "bait_type"                  "bait_description"          
 [9] "feature_type"               "feature_type_methodology"  
[11] "camera_id"                  "camera_name"               
[13] "quiet_period"               "camera_functioning"        
[15] "sensor_height"              "height_other"              
[17] "sensor_orientation"         "orientation_other"         
[19] "plot_treatment"             "plot_treatment_description"
[21] "detection_distance"         "subproject_name"           
[23] "subproject_design"          "event_name"                
[25] "event_description"          "event_type"                
[27] "recorded_by"                "fuzzed"

names(camtraps)

 [1] "project_id"              "deployment_id"          
 [3] "image_id"                "sequence_id"            
 [5] "filename"                "location"               
 [7] "is_blank"                "identified_by"          
 [9] "wi_taxon_id"             "class"                  
[11] "order"                   "family"                 
[13] "genus"                   "species"                
[15] "common_name"             "uncertainty"            
[17] "timestamp"               "age"                    
[19] "sex"                     "animal_recognizable"    
[21] "individual_id"           "number_of_objects"      
[23] "individual_animal_notes" "behavior"               
[25] "highlighted"             "markings"               
[27] "cv_confidence"           "license"                
[29] "fuzzed"

#displays the structure of the data set
str(deployment)

'data.frame':   300 obs. of  28 variables:
 $ project_id                : int  2001294 2001294 2001294 2001294 2001294 2001294 2001294 2001294 2001294 2001294 ...
 $ deployment_id             : chr  "03B-19" "06B-19" "08B-19" "11A-19" ...
 $ longitude                 : num  -123 -123 -123 -123 -123 ...
 $ latitude                  : num  42.1 42 42 42 42 ...
 $ start_date                : chr  "12/09/2019 00:00" "13/09/2019 00:00" "13/09/2019 00:00" "13/09/2019 00:00" ...
 $ end_date                  : chr  "09/10/2019 00:00" "06/11/2019 00:00" "06/11/2019 00:00" "06/11/2019 00:00" ...
 $ bait_type                 : chr  "None" "None" "None" "None" ...
 $ bait_description          : logi  NA NA NA NA NA NA ...
 $ feature_type              : chr  "forest" "creek" "creek" "forest" ...
 $ feature_type_methodology  : logi  NA NA NA NA NA NA ...
 $ camera_id                 : int  NA NA NA NA NA NA NA NA NA NA ...
 $ camera_name               : chr  "" "" "" "" ...
 $ quiet_period              : int  0 0 0 0 0 0 0 0 0 0 ...
 $ camera_functioning        : chr  "Camera Functioning" "Camera Functioning" "Camera Functioning" "Camera Functioning" ...
 $ sensor_height             : chr  "Knee height" "Knee height" "Knee height" "Knee height" ...
 $ height_other              : logi  NA NA NA NA NA NA ...
 $ sensor_orientation        : chr  "Parallel" "Parallel" "Parallel" "Parallel" ...
 $ orientation_other         : logi  NA NA NA NA NA NA ...
 $ plot_treatment            : logi  NA NA NA NA NA NA ...
 $ plot_treatment_description: logi  NA NA NA NA NA NA ...
 $ detection_distance        : logi  NA NA NA NA NA NA ...
 $ subproject_name           : chr  "" "" "" "" ...
 $ subproject_design         : chr  "" "" "" "" ...
 $ event_name                : logi  NA NA NA NA NA NA ...
 $ event_description         : logi  NA NA NA NA NA NA ...
 $ event_type                : logi  NA NA NA NA NA NA ...
 $ recorded_by               : logi  NA NA NA NA NA NA ...
 $ fuzzed                    : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...

str(camtraps)

'data.frame':   547189 obs. of  29 variables:
 $ project_id             : int  2001294 2001294 2001294 2001294 2001294 2001294 2001294 2001294 2001294 2001294 ...
 $ deployment_id          : chr  "34A-19" "01A-19" "01B-19" "15B-19" ...
 $ image_id               : chr  "700d0357-4313-4724-9ae5-cd29a5cc0589" "ffc7f4b4-1551-43e6-b359-e149cd174975" "74bd6673-1586-4bc1-be5b-4023e7dcde5c" "aca78142-2d78-442d-8287-3b017c17c4c1" ...
 $ sequence_id            : logi  NA NA NA NA NA NA ...
 $ filename               : chr  "09260186.JPG" "10230418.JPG" "09300423.JPG" "11050556.JPG" ...
 $ location               : chr  "https://app.wildlifeinsights.org/download/2007276/project/2001294/data-files/700d0357-4313-4724-9ae5-cd29a5cc0589" "https://app.wildlifeinsights.org/download/2007276/project/2001294/data-files/ffc7f4b4-1551-43e6-b359-e149cd174975" "https://app.wildlifeinsights.org/download/2007276/project/2001294/data-files/74bd6673-1586-4bc1-be5b-4023e7dcde5c" "https://app.wildlifeinsights.org/download/2007276/project/2001294/data-files/aca78142-2d78-442d-8287-3b017c17c4c1" ...
 $ is_blank               : logi  NA NA NA NA NA NA ...
 $ identified_by          : chr  "Ashley Newcomb" "Sierra Rose" "Sierra Rose" "Ashley Newcomb" ...
 $ wi_taxon_id            : chr  "d01f67fd-836b-4b25-89fc-2239e59f56b0" "f1856211-cfb7-4a5b-9158-c0f72fd09ee6" "febff896-db40-4ac8-bcfe-5bb99a600950" "f67c4346-bbbd-43d5-9b19-b541d493019b" ...
 $ class                  : chr  "Mammalia" "" "Mammalia" "Mammalia" ...
 $ order                  : chr  "Rodentia" "" "Cetartiodactyla" "Rodentia" ...
 $ family                 : chr  "Sciuridae" "" "Cervidae" "Sciuridae" ...
 $ genus                  : chr  "Otospermophilus" "" "Odocoileus" "Tamiasciurus" ...
 $ species                : chr  "beecheyi" "" "hemionus" "douglasii" ...
 $ common_name            : chr  "California Ground Squirrel" "Blank" "Mule Deer" "Douglas's Squirrel" ...
 $ uncertainty            : logi  NA NA NA NA NA NA ...
 $ timestamp              : chr  "26/09/2019 13:22" "23/10/2019 10:25" "30/09/2019 00:18" "05/11/2019 10:09" ...
 $ age                    : chr  "" "" "" "" ...
 $ sex                    : chr  "" "" "" "" ...
 $ animal_recognizable    : logi  NA NA NA NA NA NA ...
 $ individual_id          : logi  NA NA NA NA NA NA ...
 $ number_of_objects      : int  1 1 2 1 2 1 1 1 1 1 ...
 $ individual_animal_notes: logi  NA NA NA NA NA NA ...
 $ behavior               : logi  NA NA NA NA NA NA ...
 $ highlighted            : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
 $ markings               : logi  NA NA NA NA NA NA ...
 $ cv_confidence          : logi  NA NA NA NA NA NA ...
 $ license                : chr  "CC-BY-NC" "CC-BY-NC" "CC-BY-NC" "CC-BY-NC" ...
 $ fuzzed                 : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...

4 Step 4 calculate the number of photos and the number of blanks

#Calculates the number of photos by working out how many rows are in the camtrap data set which gave the output ([1] "Number of photographs: 547189")
number <- nrow(camtraps)
print(paste("Number of photographs:", number))

[1] "Number of photographs: 547189"

#calculate the number of unique species within the data set. which had the output of [1] "Number of unique species: 71" this may be an overestimate due to humans being labbled as human, human research, ect.

camtraps$full_species <- paste(camtraps$genus, camtraps$species, sep = " ")
unique_species <- unique(camtraps$full_species)
print(paste("Number of unique species:",length(unique_species)))

[1] "Number of unique species: 71"

# we can also view these unique species with the code below we can now see the issue with the identification of Humans diluting the amount of species that were truely captured
print(unique_species)

 [1] "Otospermophilus beecheyi"    " "                          
 [3] "Odocoileus hemionus"         "Tamiasciurus douglasii"     
 [5] "Homo sapiens"                "Sciurus griseus"            
 [7] "Homo "                       "Ursus americanus"           
 [9] "Cervus canadensis"           "Glaucomys sabrinus"         
[11] "Bassariscus astutus"         "Neotamias "                 
[13] "Sylvilagus bachmani"         "Cyanocitta stelleri"        
[15] "Procyon lotor"               "Meleagris gallopavo"        
[17] "Oreortyx pictus"             "Bos taurus"                 
[19] "Pekania pennanti"            "Urocyon cinereoargenteus"   
[21] "Ixoreus naevius"             "Colaptes cafer"             
[23] "Catharus "                   "Turdus migratorius"         
[25] "Callospermophilus lateralis" "Lynx rufus"                 
[27] "Puma concolor"               "Canis familiaris"           
[29] "Peromyscus "                 "Canis latrans"              
[31] "Melanerpes formicivorus"     "Hylatomus pileatus"         
[33] "Neotoma "                    "Bonasa umbellus"            
[35] "Junco hyemalis"              "Colaptes auratus"           
[37] "Mustela erminea"             "Sialia mexicana"            
[39] "Felis catus"                 "Leuconotopicus villosus"    
[41] "Spilogale gracilis"          "Tamias "                    
[43] "Dryobates "                  "Glaucomys "                 
[45] "Lepus californicus"          "Catharus guttatus"          
[47] "Catharus swainsoni"          "Neotoma fuscipes"           
[49] "Lepus americanus"            "Dendragapus fuliginosus"    
[51] "Mephitis mephitis"           "Sceloporus "                
[53] "Patagioenas fasciata"        "Neotoma cinerea"            
[55] "Aphelocoma californica"      "Odocoileus "                
[57] "Mustela frenata"             "Eutamias "                  
[59] "Dryobates pubescens"         "Aquila chrysaetos"          
[61] "Mustela "                    "Accipiter gentilis"         
[63] "Neovison vison"              "Didelphis virginiana"       
[65] "Euphagus cyanocephalus"      "Turdus "                    
[67] "Corvus corax"                "Sciurus "                   
[69] "Ammodramus savannarum"       "Lepus "                     
[71] "Poecile gambeli"

#it is useful to know the amount of blank images to understand how many misfires the camera traps had. the output was [1]  364922.

empty_genus <- sum(camtraps$genus == "")
print(empty_genus)

[1] 364922

#The output was [1] "Percentage of photographs without animal captures 66.6903026193875.
#however rounding this to 2 decimal places is better for interpretation therefore 66.69% which is a large amount of dataset however, the data is still useable due to how large the data set is.

proportion <- (empty_genus/nrow(camtraps))*100
print(paste("Percentage of photographs without animal captures:",proportion))

[1] "Percentage of photographs without animal captures: 66.6903026193875"

5 Step 5 check how many deployments have no caputures

#first count the number of deployments
unique_deployments <- unique(deployment$deployment_id)
num_all_deployments <- length(unique_deployments)

#then work out the number of deployments that where successful i.e. had captured images of animals
successful_deployments<- unique(camtraps$deployment_id)
num_successful_deployments <-length(successful_deployments)

#use the previous bit of code to inform how many deployments where not successful which will provide us with this output. [1] "Number of deployments with no captures: 0", which indicates every deployment was successful.
total<-num_all_deployments - num_successful_deployments
print(paste("Number of deployments with no captures:", total))

[1] "Number of deployments with no captures: 0"

6 Step 6 look at the start and end of the sampling

#this is to display the items in each column to inspect as seen here the format for the date is day:month:year_hours:minutes:seconds.
head(deployment$start_date)

[1] "12/09/2019 00:00" "13/09/2019 00:00" "13/09/2019 00:00" "13/09/2019 00:00"
[5] "18/09/2019 00:00" "18/09/2019 00:00"

head(deployment$end_date)

[1] "09/10/2019 00:00" "06/11/2019 00:00" "06/11/2019 00:00" "06/11/2019 00:00"
[5] "07/11/2019 00:00" "07/11/2019 00:00"

#after checking data in previous step it can be seen that the seconds are missing and have no values therefore we need to add them and can add them using the code below.
#the ifelse function checks the length of the date string to see if it has enough numbers in the string to contain seconds.
# Add ":00" to start_date if seconds are missing
deployment$start_date <- ifelse(nchar(deployment$start_date) == 16, 
                                paste0(deployment$start_date, ":00"), 
                                deployment$start_date)

# Add ":00" to end_date if seconds are missing
deployment$end_date <- ifelse(nchar(deployment$end_date) == 16, 
                              paste0(deployment$end_date, ":00"), 
                              deployment$end_date)

library(lubridate)

# Convert start_date and end_date to POSIXct format directly in Pacific Time (America/Los_Angeles), ensuring that the date and time format is in line with how the data set is laid out, so ours is day/month/year_hours/minutes/seconds.
deployment$start_date <- dmy_hms(deployment$start_date, tz = "America/Los_Angeles")
deployment$end_date <- dmy_hms(deployment$end_date, tz = "America/Los_Angeles")

# Check for NAs after conversion to ensure it has been successful
 print(sum(is.na(deployment$start_date)))

[1] 0

#[1] 0
print(sum(is.na(deployment$end_date)))

[1] 0

#[1] 0

# Find the earliest start date
earliest_start_date <- min(deployment$start_date, na.rm = TRUE)

# Find the latest end date
latest_end_date <- max(deployment$end_date, na.rm = TRUE)

# View the results
print(paste("Earliest start date:", earliest_start_date))

[1] "Earliest start date: 2019-08-10"

print(paste("Latest end date:", latest_end_date))

[1] "Latest end date: 2021-11-05"

7 Step 7 check the intervals in which the samples where taken

# work out how long each deployment was deployed for.
deployment$interval <- interval(deployment$start_date, deployment$end_date)

# specify that you want to calculate the interval in days
deployment$duration_days <- as.numeric(deployment$interval) / (60 * 60 * 24)  


# display the results in the following order start date, end date, and the interval between both of those which is called duration_days.
print(head(deployment[, c("start_date", "end_date", "duration_days")],5))

  start_date   end_date duration_days
1 2019-09-12 2019-10-09      27.00000
2 2019-09-13 2019-11-06      54.04167
3 2019-09-13 2019-11-06      54.04167
4 2019-09-13 2019-11-06      54.04167
5 2019-09-18 2019-11-07      50.04167

#we can also sumarise the intervals using the summary comand to have the output. min = 6, 1st quart = 35, Median, = 44, Mean = 42.77, 3rd quart = 51.04, Max = 88.04. 
#This is useful to see if there have been any mistakes in the conversion or during the data collection as sometimes mixing the start date and end date is possible.
summary(deployment$duration_days)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   6.00   35.00   44.00   42.77   51.04   88.04

8 Step 8 Calculate the sampling effort

# Sum the duration_days to calculate total camera days (sampling effort)
total_camera_days <- sum(deployment$duration_days, na.rm = TRUE)

# round the effort to be a hole number as half days are useless in further analysis
total_camera_days_rounded <- round(total_camera_days, 0)

# View the result using the print function which gave the output [1] "Total camera days (sampling effort): 12831"
print(paste("Total camera days (sampling effort):", total_camera_days_rounded))

[1] "Total camera days (sampling effort): 12831"

9 Step 9 Plot the camera traps on a map

# the red dots indicating where the cameras where deployed on the map, the map should appear in the viewer.

library(leaflet)

m <- leaflet() %>%             
  addProviderTiles(providers$Esri.WorldImagery) %>% addCircleMarkers (lng = deployment$longitude, lat = deployment$latitude, color = "red", radius = 5, fill = TRUE, fillColor = "red", fillOpacity = 0.7) %>% addScaleBar(position = "topleft", options = scaleBarOptions(metric = TRUE, imperial = FALSE))

#this will display the map that was previously created
m

10 Step 10 Conduct the Relative Index of Abundance

library(dplyr)
# Define a custom function called event.sp to calculate trapping events and Relative Index of Abundance (RIA)
event.sp <- function(camtraps, 
                     deployment, 
                     total_camera_days_rounded, 
                     time_threshold = NULL) {
  
  #we include 4 parameters in this function, camtraps and deployment which both represent a dataset which will be using in the RAI calculation.
  #total camera days rounded which is the effort rounded to the nearest integer which aids in the RIA calculation.
  #time threshold adds the ability to set a buffer after an observation as the RIA sees each row as a seperate event however a bear for example may have just sat in front of the camera for 20 minutes trigering the camera 4 times. 
  
  
  # the reason for using genus as some species could not be identified fully therefore identifying species via their genus makes the output fully representative of the whole dataset when producing the RIA.
  
  # Checks for missing values in the column genus
  missing_genus <- sum(is.na(camtraps$genus) | camtraps$genus == "")
  
  # Print count of missing genus values
  if (missing_genus > 0) {
    cat("Warning: Found", missing_genus, "missing or empty values in 'genus'. Rows with missing genus will be excluded.\n")
  }
  
  # Filter out rows with missing or empty genus
  camtraps <- camtraps %>%
    filter(!is.na(genus) & genus != "")
  
  # Combine 'genus' and 'species' into a new 'full_species' column in the camtraps dataset
  camtraps <- camtraps %>%
    mutate(
      full_species = paste(genus, species, sep = " "),  
      full_species = trimws(full_species)  
    ) 
  
  # Convert timestamp column in camtraps to POSIXct
  camtraps$timestamp <- as.POSIXct(camtraps$timestamp, format = "%Y:%m:%d %H:%M:%S")
  
  #calculate trapping events without a threshold of 30 mins
  
  if (is.null(time_threshold)) {
    trapping_events <- camtraps %>% 
      group_by(full_species, deployment_id) %>% 
      summarise(trapping_event_count = n(), .groups = 'drop') 
   
  } else {
    camtraps <- camtraps %>%
      arrange(deployment_id, full_species, timestamp)
    
   
    trapping_events <- camtraps %>%
      group_by(full_species, deployment_id) %>%
      mutate(time_diff = c(NA, diff(as.numeric(timestamp)))) %>%
    
      mutate(new_event = ifelse(is.na(time_diff) | time_diff > time_threshold, 1, 0)) %>%
     
      summarise(trapping_event_count = sum(new_event), .groups = 'drop')
  }  
    
  
  # Calculate RIA using the total camera days divided by the total trapping events.
  trapping_events <- trapping_events %>%
    mutate(RIA = trapping_event_count / total_camera_days_rounded)  # Calculate RIA

  # Return the trapping events summary with RIA
  return(trapping_events)
}

#output should indicate the species, deployment, trapping event count, and the RIA. The RIA indicates the proportion of that species caught in comparison to the number of camera trap days overall.

11 Step 11 without time threshold

# Without time threshold
trapping_events_no_threshold <- event.sp(camtraps, deployment, total_camera_days_rounded)

Warning: Found 364922 missing or empty values in 'genus'. Rows with missing genus will be excluded.

#save the RIA as a csv so it can be used to inform the naive occupancy and the occupancy model

#write.csv(trapping_events_no_threshold, file = "trapping_events_no_threshold.csv", row.names = FALSE)

12 Step 12 with time threshold

# With a 30-minute threshold (in seconds)
trapping_events_with_threshold <- event.sp(camtraps, deployment, total_camera_days_rounded, time_threshold = 30 * 60)

Warning: Found 364922 missing or empty values in 'genus'. Rows with missing genus will be excluded.

#save the RIA as a csv so it can be used to inform the naive occupancy and the occupancy model

#write.csv(trapping_events_with_threshold, file = "trapping_events_with_threshold.csv", row.names = FALSE)

13 Step 13 Overall RIA without the threshold

library(knitr)
# this will create an overall RIA without the threshold assuming that every capture is a signular event.

# Summarize results across all deployments
species_summary <- trapping_events_no_threshold %>%
  group_by(full_species) %>%
  summarise(
    total_trapping_event_count = sum(trapping_event_count, na.rm = TRUE),  # Sum of trapping events
    total_RAI = sum(RIA, na.rm = TRUE),  # Sum of RAI
    .groups = 'drop'  # Drop grouping after summarization
  ) %>%
  arrange(desc(total_trapping_event_count))  # Optional: sort by total trapping events

# Print a nicely formatted table

kable(species_summary, 
      format = "html",   # C
      col.names = c("Species", "Total Trapping Events", "Total RAI"), 
      caption = "Summary of Trapping Events and RAI by Species")

Summary of Trapping Events and RAI by Species
Species	Total Trapping Events	Total RAI
Tamiasciurus douglasii	35673	2.7802198
Homo sapiens	34349	2.6770322
Odocoileus hemionus	31781	2.4768919
Homo	16411	1.2790118
Sciurus griseus	13545	1.0556465
Ursus americanus	13442	1.0476190
Sceloporus	7260	0.5658172
Neotamias	5689	0.4433793
Otospermophilus beecheyi	4589	0.3576494
Bos taurus	2863	0.2231315
Urocyon cinereoargenteus	2430	0.1893851
Ixoreus naevius	1658	0.1292183
Glaucomys sabrinus	1454	0.1133193
Pekania pennanti	1095	0.0853402
Peromyscus	817	0.0636739
Oreortyx pictus	764	0.0595433
Cyanocitta stelleri	679	0.0529187
Tamias	654	0.0509703
Neotoma	615	0.0479308
Bassariscus astutus	577	0.0449692
Neotoma cinerea	482	0.0375653
Cervus canadensis	479	0.0373315
Neotoma fuscipes	434	0.0338243
Turdus migratorius	417	0.0324994
Lynx rufus	414	0.0322656
Meleagris gallopavo	383	0.0298496
Puma concolor	383	0.0298496
Lepus californicus	353	0.0275115
Sylvilagus bachmani	351	0.0273556
Eutamias	298	0.0232250
Canis latrans	240	0.0187047
Catharus	182	0.0141844
Procyon lotor	154	0.0120022
Colaptes cafer	149	0.0116125
Spilogale gracilis	146	0.0113787
Mephitis mephitis	139	0.0108331
Glaucomys	112	0.0087289
Catharus guttatus	101	0.0078716
Lepus	98	0.0076378
Bonasa umbellus	77	0.0060011
Felis catus	76	0.0059232
Ammodramus savannarum	66	0.0051438
Canis familiaris	64	0.0049879
Colaptes auratus	63	0.0049100
Callospermophilus lateralis	55	0.0042865
Patagioenas fasciata	30	0.0023381
Junco hyemalis	29	0.0022602
Hylatomus pileatus	25	0.0019484
Lepus americanus	21	0.0016367
Aphelocoma californica	16	0.0012470
Didelphis virginiana	15	0.0011690
Mustela erminea	10	0.0007794
Mustela	7	0.0005456
Dendragapus fuliginosus	6	0.0004676
Dryobates pubescens	6	0.0004676
Catharus swainsoni	5	0.0003897
Accipiter gentilis	3	0.0002338
Aquila chrysaetos	3	0.0002338
Corvus corax	3	0.0002338
Dryobates	3	0.0002338
Euphagus cyanocephalus	3	0.0002338
Leuconotopicus villosus	3	0.0002338
Melanerpes formicivorus	3	0.0002338
Mustela frenata	3	0.0002338
Neovison vison	3	0.0002338
Turdus	3	0.0002338
Poecile gambeli	2	0.0001559
Sialia mexicana	2	0.0001559
Odocoileus	1	0.0000779
Sciurus	1	0.0000779

#save the RIA as a csv so it can be used to inform the naive occupancy and the occupancy model

#write.csv(species_summary, file = "species_summary.csv", row.names = FALSE)

14 Step 14 Overall RIA with threshold

# using the exact code from above but changing the source data to the thresholded output from the previous RAI. ensuring that there will not be repeated captures. 

# Summarize results across all deployments
species_summary_threshold <- trapping_events_with_threshold %>%
  group_by(full_species) %>%
  summarise(
    total_trapping_event_count = sum(trapping_event_count, na.rm = TRUE),  # Sum of trapping events
    total_RAI = sum(RIA, na.rm = TRUE),  # Sum of RAI
    .groups = 'drop'  # Drop grouping after summarization
  ) %>%
  arrange(desc(total_trapping_event_count))  # Optional: sort by total trapping events to make the data more interpretable.

# print the output in a formatted table using the Kable function.

kable(species_summary_threshold, 
      format = "html",   # C
      col.names = c("Species", "Total Trapping Events", "Total RAI"), 
      caption = "Summary of Trapping Events and RAI by Species")

Summary of Trapping Events and RAI by Species
Species	Total Trapping Events	Total RAI
Tamiasciurus douglasii	35673	2.7802198
Homo sapiens	34349	2.6770322
Odocoileus hemionus	31781	2.4768919
Homo	16411	1.2790118
Sciurus griseus	13545	1.0556465
Ursus americanus	13442	1.0476190
Sceloporus	7260	0.5658172
Neotamias	5689	0.4433793
Otospermophilus beecheyi	4589	0.3576494
Bos taurus	2863	0.2231315
Urocyon cinereoargenteus	2430	0.1893851
Ixoreus naevius	1658	0.1292183
Glaucomys sabrinus	1454	0.1133193
Pekania pennanti	1095	0.0853402
Peromyscus	817	0.0636739
Oreortyx pictus	764	0.0595433
Cyanocitta stelleri	679	0.0529187
Tamias	654	0.0509703
Neotoma	615	0.0479308
Bassariscus astutus	577	0.0449692
Neotoma cinerea	482	0.0375653
Cervus canadensis	479	0.0373315
Neotoma fuscipes	434	0.0338243
Turdus migratorius	417	0.0324994
Lynx rufus	414	0.0322656
Meleagris gallopavo	383	0.0298496
Puma concolor	383	0.0298496
Lepus californicus	353	0.0275115
Sylvilagus bachmani	351	0.0273556
Eutamias	298	0.0232250
Canis latrans	240	0.0187047
Catharus	182	0.0141844
Procyon lotor	154	0.0120022
Colaptes cafer	149	0.0116125
Spilogale gracilis	146	0.0113787
Mephitis mephitis	139	0.0108331
Glaucomys	112	0.0087289
Catharus guttatus	101	0.0078716
Lepus	98	0.0076378
Bonasa umbellus	77	0.0060011
Felis catus	76	0.0059232
Ammodramus savannarum	66	0.0051438
Canis familiaris	64	0.0049879
Colaptes auratus	63	0.0049100
Callospermophilus lateralis	55	0.0042865
Patagioenas fasciata	30	0.0023381
Junco hyemalis	29	0.0022602
Hylatomus pileatus	25	0.0019484
Lepus americanus	21	0.0016367
Aphelocoma californica	16	0.0012470
Didelphis virginiana	15	0.0011690
Mustela erminea	10	0.0007794
Mustela	7	0.0005456
Dendragapus fuliginosus	6	0.0004676
Dryobates pubescens	6	0.0004676
Catharus swainsoni	5	0.0003897
Accipiter gentilis	3	0.0002338
Aquila chrysaetos	3	0.0002338
Corvus corax	3	0.0002338
Dryobates	3	0.0002338
Euphagus cyanocephalus	3	0.0002338
Leuconotopicus villosus	3	0.0002338
Melanerpes formicivorus	3	0.0002338
Mustela frenata	3	0.0002338
Neovison vison	3	0.0002338
Turdus	3	0.0002338
Poecile gambeli	2	0.0001559
Sialia mexicana	2	0.0001559
Odocoileus	1	0.0000779
Sciurus	1	0.0000779

#save the RIA as a csv so it can be used to inform the naive occupancy and the occupancy model

#write.csv(species_summary_threshold, file = "species_summary_threshold.csv", row.names = FALSE)

15 Step 15 Species accumulation curve

#Ensure the dates in deployments are classified as date types which ensures both the comparisons and calculations are accurate 
deployment <- deployment %>%
  mutate(start_date = as.Date(start_date),
         end_date = as.Date(end_date))

#Create a data sequence for each deployment, generating a sequence based on the deployment date, from the start date to end date. 
deployment_dates <- deployment %>%
  select(deployment_id, start_date, end_date) %>%
  
  rowwise() %>%
  do(data.frame(deployment_id = .$deployment_id, 
                 date = seq(.$start_date, .$end_date, by = "day"))) %>%
  ungroup()

# Create a new variable called full species by combining the genus and species column. as well as converting the timestamp of each capture to a date format so it is compatible with the deployment data. due to the camtraps dataset timestamp being in a different format day/month/year where as the deployment was year/month/year so changing it is essential for both datasets to be compatible. 
camtraps1 <- camtraps %>%
  mutate(full_species = paste(genus, species, sep = " "),
         timestamp = as.Date(timestamp, format = "%d/%m/%Y %H:%M"))

#Create a count for number species captured for each day for each deployment. 
species_per_day <- deployment_dates %>%
  left_join(camtraps1, by = c("deployment_id", "date" = "timestamp")) %>%
  group_by(deployment_id, date) %>%
  summarise(species_detected = list(unique(full_species)), .groups = 'drop') %>%
  ungroup()

Warning in left_join(., camtraps1, by = c("deployment_id", date = "timestamp")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 54464 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.

# Calculate the cumulative number of new species, per day. and create a loop working out how many new species where found in the next day.

cumulative_species <- numeric(nrow(species_per_day))

# go through each date and calculate cumulative new species that appear for every new day thereafter.
detected_species <- character(0)  

for (i in seq_len(nrow(species_per_day))) {
  # Get newly detected species for the day
  new_species <- species_per_day$species_detected[[i]]
  
  # add the new sum to the cumulative number of species
  detected_species <- union(detected_species, new_species)
  
  # store the cumulative number of species 
  cumulative_species[i] <- length(detected_species)
}

# Combine with dates to create the number of days
species_per_day <- species_per_day %>%
  mutate(cumulative_species = cumulative_species)

# using the previous code count the number of camera days overall in response to the number of species. 
camera_days <- species_per_day %>%
  mutate(camera_day = row_number())
# Inspect the first few rows of species_per_day to ensure the accumulation of the number of species per day worked and the calculation ran correctly. which in this case it did and you can see this by an increasing number of species in the species_per_day column.
head(species_per_day)

# A tibble: 6 × 4
  deployment_id date       species_detected cumulative_species
  <chr>         <date>     <list>                        <dbl>
1 01A-19        2019-09-12 <chr [3]>                         3
2 01A-19        2019-09-13 <chr [1]>                         4
3 01A-19        2019-09-14 <chr [1]>                         4
4 01A-19        2019-09-15 <chr [1]>                         4
5 01A-19        2019-09-16 <chr [1]>                         4
6 01A-19        2019-09-17 <chr [1]>                         4

# Check if species_detected contains useful data
species_per_day %>%
  filter(lengths(species_detected) > 0) %>%
  head()

# A tibble: 6 × 4
  deployment_id date       species_detected cumulative_species
  <chr>         <date>     <list>                        <dbl>
1 01A-19        2019-09-12 <chr [3]>                         3
2 01A-19        2019-09-13 <chr [1]>                         4
3 01A-19        2019-09-14 <chr [1]>                         4
4 01A-19        2019-09-15 <chr [1]>                         4
5 01A-19        2019-09-16 <chr [1]>                         4
6 01A-19        2019-09-17 <chr [1]>                         4

library(ggplot2)
# using the package ggplot2, plot the data that was created from the previous calculations in step 15.
ggplot(camera_days, aes(x = camera_day, y = cumulative_species)) +
  geom_line(color = "blue", linewidth = 1) +  
  geom_point(color = "red") +
  labs(
    x = "Number of Camera Days",
    y = "Cumulative Number of New Species Detected",
    title = "Species Accumulation Curve"
  ) +
  theme_minimal()