Introduction

This file covers: - How to set up AI assistants like GitCopilot and chattr AI to assist with coding - Sorting data with conditional statements - How to set up classic loops - How to set up Purrr loops - How to use functions

Setting Up AI assistants

Set up GitCopilot in Tools > Global Options > Assistant

Install chattr AI and other packages

#install.packages("ellmer")
#install.packages("chattr")
#install.packages("palmerpenguins")
library("knitr")
library(usethis)

Test git copilot

# Load tidyverse and palmerpenguins libraries 
library("tidyverse")
library("palmerpenguins")
# Create a ggplot scatter plot using the penguins dataset. 
# Plot bill_length_mm on the x-axis and bill_depth_mm on the y-axis.
# Color points by species and use geom_point with size 3.
# Add a minimal theme and clean labels.   
ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
  geom_point(size = 3) +
  theme_minimal() +
  labs(title = "Penguin Bill Dimensions",
       x = "Bill Length (mm)",
       y = "Bill Depth (mm)",
       color = "Species")

# Create boxplot using penguins dataset 
# Plot flipper_length_mm across different islands
# Color by species and use geom_point with size 3.
# Add a minimal theme and clean labels.  
ggplot(penguins, aes(x = island, y = flipper_length_mm, color = species)) +
  geom_boxplot() +
  geom_point(position = position_jitter(width = 0.2), size = 3) +
  theme_minimal() +
  labs(title = "Penguin Flipper Length by Island",
       x = "Island",
       y = "Flipper Length (mm)",
       color = "Species")

Check API Key is Recognized

Sys.getenv("GEMINI_API_KEY")
## [1] "AIzaSyCBXYD6IgjN160sBmhx27u-oY61pVgRKtg"

Set up chat

library("chattr")
library("ellmer")
library(ggplot2)
my_chat <- ellmer::chat_google_gemini(model = "gemini-2.5-flash")
chattr_use(my_chat)
chattr::chattr("say hello")
## # hello
## 
## library(readr)
## library(ggplot2)
## library(dplyr)

Start a Chat

chattr_app()
Shiny applications not supported in static R Markdown documents
#Execute Key shortcut: Ctrl + Alt + c

Test chattr AI with ggplot2 prompt: “Show me how to load the built-in iris dataset and use ggplot2 to build a scatter plot of Sepal.Length versus Sepal.Width colored by Species.”

data(iris)

ggplot(iris, aes(x = Sepal.Width, y = Sepal.Length, color = Species)) +
  geom_point()

Sorting Data

ifthen statments:if a condition is meet, do this command.

Change object if it equates to something

x <- 1
x # what is x?
## [1] 1
if (x == 1) {
  x <- 2
}
x # what is x now?
## [1] 2
x <- 3
x # what is x?
## [1] 3
if (x == 1) {
  x <- 2
}
x # what is x now?
## [1] 3

ifelse staments: used to sort data, such as if a conditon is not met start a second set of commands.

Example:

if (animal_data_received) { 1. save statement in log file that the animal has been updated } else { 1. save statement saying no data received }

elseif statements: allows additional conditional commands between the if and else

Example of ifelse code:

name <- "Nick"
if (name == "Nick"){
  last.name <- "Murray"
}
print (paste(name, last.name)) 
## [1] "Nick Murray"
# try changing name to Bob, what happens?

# now handle that case
name <- "Bob"
if (name == "Nick"){
  last.name <- "Murray"
} else {
  last.name <- "Jones"
}
print (paste(name, last.name))
## [1] "Bob Jones"
# chaining together
name <- "Jim"
if (name == "Nick"){
  last.name <- "Murray"
} else if (name == "Bob") {
  last.name <- "Jones"
} else {
  last.name <- "... last name is unknown"
}
print (paste(name, last.name))
## [1] "Jim ... last name is unknown"

Simple Example: Checking Sea Surface Temperature (SST)

#() set the logical condition and curly braces {} used for the action if that condition is true
sst <- 30.2

if (sst > 29.5) {
  print("Warning: Marine heatwave threshold exceeded!")
} else {
  print("SST remains within baseline parameters.")
}
## [1] "Warning: Marine heatwave threshold exceeded!"

if_else()

The if_else(condition, true_output, false_output) function evaluates an entire column item-by-item:

# Sample coral data
coral_monitoring <- tibble(
  site = c("Site_A", "Site_B", "Site_C"),
  depth_m = c(12, 35, 8)
)

# Classify depth using if_else
coral_monitoring <- coral_monitoring %>% 
  mutate(zone = if_else(depth_m > 30, "Deep Reef", "Shallow Reef"))

coral_monitoring
## # A tibble: 3 × 3
##   site   depth_m zone        
##   <chr>    <dbl> <chr>       
## 1 Site_A      12 Shallow Reef
## 2 Site_B      35 Deep Reef   
## 3 Site_C       8 Shallow Reef
## %>%: used to chain functions together (passes the result of the expression on its left as the first argument to the function on its right)
## Mutate: used to edit columns in a data frame

case_when()

When you have more than two possibilities, stacking multiple if-else blocks becomes unreadable. case_when() is better and uses formula tildes (~):

coral_monitoring <- coral_monitoring %>% 
  mutate(reef_category = case_when(
    depth_m < 10  ~ "Lagoon / Flats",
    depth_m <= 30 ~ "Crest / Slope",
    depth_m > 30  ~ "Mesophotic / Deep",
    TRUE          ~ "Unclassified" # Catch-all remainder
  ))
# create a tibble named marine_stations containing a column for salinity with values 35, 28, 32, and 12
# Use the pipe operator (%>%) and mutate() combined with case_when() to create a new column named environment_type
# Classify values below 15 as "Estuarine", values between 15 and 30 as "Brackish", and values above 30 as "Marine"
marine_stations <- tibble(
  salinity = c(35, 28, 32, 12)
)
marine_stations <- marine_stations %>% 
  mutate(environment_type = case_when(
    salinity < 15 ~ "Estuarine",
    salinity >= 15 & salinity <= 30 ~ "Brackish",
    salinity > 30 ~ "Marine"
  ))

marine_stations
## # A tibble: 4 × 2
##   salinity environment_type
##      <dbl> <chr>           
## 1       35 Marine          
## 2       28 Brackish        
## 3       32 Marine          
## 4       12 Estuarine

Classic Looping

Create a new object for every loop run with a name value

for (value in c("My", "first", "for", "loop")) {
  print("one run")
}
## [1] "one run"
## [1] "one run"
## [1] "one run"
## [1] "one run"

Can use any word you want with print()

for (word in c("My", "second", "for", "loop")) {
  print(word)
}
## [1] "My"
## [1] "second"
## [1] "for"
## [1] "loop"
for (string in c("My", "second", "for", "loop")) {
  print(string)
}
## [1] "My"
## [1] "second"
## [1] "for"
## [1] "loop"
for (i in c("My", "second", "for", "loop")) {
  print(i)
}
## [1] "My"
## [1] "second"
## [1] "for"
## [1] "loop"

Classic loop anatomy

for (year in 2020:2024) {
  print(paste("Processing climate data for year:", year))
}
## [1] "Processing climate data for year: 2020"
## [1] "Processing climate data for year: 2021"
## [1] "Processing climate data for year: 2022"
## [1] "Processing climate data for year: 2023"
## [1] "Processing climate data for year: 2024"

Loop across index sequences

transect_lengths <- c(50, 100, 25, 75)

for (i in seq_along(transect_lengths)) {
  print(paste("Transect number", i, "measures", transect_lengths[i], "metres."))
}
## [1] "Transect number 1 measures 50 metres."
## [1] "Transect number 2 measures 100 metres."
## [1] "Transect number 3 measures 25 metres."
## [1] "Transect number 4 measures 75 metres."

Make your own for loop: A list of your top 3 favourite animals

#Make a for loop that lists my top 3 favorite animals using the print() function and paste() to combine text and the loop value

favortie_animals <- c("Snow Leopard", "Octopus", "Sperm Whale")
My_number <- c(1, 2, 3)
for (i in seq_along(My_number)) {
  print(paste ("My number",i, "favorite animal is the", favortie_animals[i] ))

  }
## [1] "My number 1 favorite animal is the Snow Leopard"
## [1] "My number 2 favorite animal is the Octopus"
## [1] "My number 3 favorite animal is the Sperm Whale"

Create list to be looped and compactly display it

my_list <- 1:50 
print (my_list)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
str(my_list)
##  int [1:50] 1 2 3 4 5 6 7 8 9 10 ...

Run the loop

# make a for loop (i is typically used)
for(i in my_list) {                              # Head of for-loop
  result <- i
  out <- paste0("I am printing loop number: ", i, ".")# Output: 1 loop
  print(out)                                    # Using print function
}  # end of loop
## [1] "I am printing loop number: 1."
## [1] "I am printing loop number: 2."
## [1] "I am printing loop number: 3."
## [1] "I am printing loop number: 4."
## [1] "I am printing loop number: 5."
## [1] "I am printing loop number: 6."
## [1] "I am printing loop number: 7."
## [1] "I am printing loop number: 8."
## [1] "I am printing loop number: 9."
## [1] "I am printing loop number: 10."
## [1] "I am printing loop number: 11."
## [1] "I am printing loop number: 12."
## [1] "I am printing loop number: 13."
## [1] "I am printing loop number: 14."
## [1] "I am printing loop number: 15."
## [1] "I am printing loop number: 16."
## [1] "I am printing loop number: 17."
## [1] "I am printing loop number: 18."
## [1] "I am printing loop number: 19."
## [1] "I am printing loop number: 20."
## [1] "I am printing loop number: 21."
## [1] "I am printing loop number: 22."
## [1] "I am printing loop number: 23."
## [1] "I am printing loop number: 24."
## [1] "I am printing loop number: 25."
## [1] "I am printing loop number: 26."
## [1] "I am printing loop number: 27."
## [1] "I am printing loop number: 28."
## [1] "I am printing loop number: 29."
## [1] "I am printing loop number: 30."
## [1] "I am printing loop number: 31."
## [1] "I am printing loop number: 32."
## [1] "I am printing loop number: 33."
## [1] "I am printing loop number: 34."
## [1] "I am printing loop number: 35."
## [1] "I am printing loop number: 36."
## [1] "I am printing loop number: 37."
## [1] "I am printing loop number: 38."
## [1] "I am printing loop number: 39."
## [1] "I am printing loop number: 40."
## [1] "I am printing loop number: 41."
## [1] "I am printing loop number: 42."
## [1] "I am printing loop number: 43."
## [1] "I am printing loop number: 44."
## [1] "I am printing loop number: 45."
## [1] "I am printing loop number: 46."
## [1] "I am printing loop number: 47."
## [1] "I am printing loop number: 48."
## [1] "I am printing loop number: 49."
## [1] "I am printing loop number: 50."
print ("For loop complete!")
## [1] "For loop complete!"
print ("Program complete!")
## [1] "Program complete!"

Add a number to every item in the list being looped

add_me <- 500     #Establish added value
for(i in my_list) {                              # Head of for-loop
  result <- i + add_me # add to the i value
  out <- paste0("Result of iteration: ", i, " = ", result, ".") 
  print(out)    
}   #End of loop
## [1] "Result of iteration: 1 = 501."
## [1] "Result of iteration: 2 = 502."
## [1] "Result of iteration: 3 = 503."
## [1] "Result of iteration: 4 = 504."
## [1] "Result of iteration: 5 = 505."
## [1] "Result of iteration: 6 = 506."
## [1] "Result of iteration: 7 = 507."
## [1] "Result of iteration: 8 = 508."
## [1] "Result of iteration: 9 = 509."
## [1] "Result of iteration: 10 = 510."
## [1] "Result of iteration: 11 = 511."
## [1] "Result of iteration: 12 = 512."
## [1] "Result of iteration: 13 = 513."
## [1] "Result of iteration: 14 = 514."
## [1] "Result of iteration: 15 = 515."
## [1] "Result of iteration: 16 = 516."
## [1] "Result of iteration: 17 = 517."
## [1] "Result of iteration: 18 = 518."
## [1] "Result of iteration: 19 = 519."
## [1] "Result of iteration: 20 = 520."
## [1] "Result of iteration: 21 = 521."
## [1] "Result of iteration: 22 = 522."
## [1] "Result of iteration: 23 = 523."
## [1] "Result of iteration: 24 = 524."
## [1] "Result of iteration: 25 = 525."
## [1] "Result of iteration: 26 = 526."
## [1] "Result of iteration: 27 = 527."
## [1] "Result of iteration: 28 = 528."
## [1] "Result of iteration: 29 = 529."
## [1] "Result of iteration: 30 = 530."
## [1] "Result of iteration: 31 = 531."
## [1] "Result of iteration: 32 = 532."
## [1] "Result of iteration: 33 = 533."
## [1] "Result of iteration: 34 = 534."
## [1] "Result of iteration: 35 = 535."
## [1] "Result of iteration: 36 = 536."
## [1] "Result of iteration: 37 = 537."
## [1] "Result of iteration: 38 = 538."
## [1] "Result of iteration: 39 = 539."
## [1] "Result of iteration: 40 = 540."
## [1] "Result of iteration: 41 = 541."
## [1] "Result of iteration: 42 = 542."
## [1] "Result of iteration: 43 = 543."
## [1] "Result of iteration: 44 = 544."
## [1] "Result of iteration: 45 = 545."
## [1] "Result of iteration: 46 = 546."
## [1] "Result of iteration: 47 = 547."
## [1] "Result of iteration: 48 = 548."
## [1] "Result of iteration: 49 = 549."
## [1] "Result of iteration: 50 = 550."
print ("For loop complete!")
## [1] "For loop complete!"
print ("Program complete!")
## [1] "Program complete!"

Code for when you need to extract a value from a data file and add it to another one and so on

# Note we have i and j for each loop
for (i in c(1, 2, 3, 4, 5)) {
  print (paste ("I am base loop number: ",i))
  Sys.sleep(0.8) # slow it down for example purposes
  for (j in c(1, 2, 3)){ 
    print (paste ("... I am inside loop number: ",j))
  }
}
## [1] "I am base loop number:  1"
## [1] "... I am inside loop number:  1"
## [1] "... I am inside loop number:  2"
## [1] "... I am inside loop number:  3"
## [1] "I am base loop number:  2"
## [1] "... I am inside loop number:  1"
## [1] "... I am inside loop number:  2"
## [1] "... I am inside loop number:  3"
## [1] "I am base loop number:  3"
## [1] "... I am inside loop number:  1"
## [1] "... I am inside loop number:  2"
## [1] "... I am inside loop number:  3"
## [1] "I am base loop number:  4"
## [1] "... I am inside loop number:  1"
## [1] "... I am inside loop number:  2"
## [1] "... I am inside loop number:  3"
## [1] "I am base loop number:  5"
## [1] "... I am inside loop number:  1"
## [1] "... I am inside loop number:  2"
## [1] "... I am inside loop number:  3"

While loops: continue to loop while a condition remains true

Repeat loops: continue until you tell them to stop

Purrr Looping

Purrr Functions

map(): applies a function to each element of a list or vector and returns a list of the same length

map_dbl(): applies a function to each element of a list or vector and returns a double (numeric) vector

map_chr(): applies a function to each element of a list or vector and returns a character

map_df(): applies a function to each element of a list or vector and returns a data frame

Standard for Loop to change values on list with sqrt function vs…

site_areas <- c(144, 400, 625)
results <- numeric(length(site_areas)) # Must build an empty container first

for(i in seq_along(site_areas)) {
  results[i] <- sqrt(site_areas[i])
}
results
## [1] 12 20 25

…purrr equivalent

library(purrr)
# Map directly and define your expected output type explicitly
mapped_results <- map_dbl(site_areas, sqrt)
mapped_results
## [1] 12 20 25

Combine purrr pipelines inside data frames to split analysis across separate groups

# typical summary
summary(iris) 
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
# Use pipeline to split data into species
iris %>%  
  split(.$Species) %>%  # Splits the iris data set into a list of data frames based on the Species column (insert iris into dot)
  map(~summary(.x)) # Finds summary of species
## $setosa
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.300   Min.   :1.000   Min.   :0.100  
##  1st Qu.:4.800   1st Qu.:3.200   1st Qu.:1.400   1st Qu.:0.200  
##  Median :5.000   Median :3.400   Median :1.500   Median :0.200  
##  Mean   :5.006   Mean   :3.428   Mean   :1.462   Mean   :0.246  
##  3rd Qu.:5.200   3rd Qu.:3.675   3rd Qu.:1.575   3rd Qu.:0.300  
##  Max.   :5.800   Max.   :4.400   Max.   :1.900   Max.   :0.600  
##        Species  
##  setosa    :50  
##  versicolor: 0  
##  virginica : 0  
##                 
##                 
##                 
## 
## $versicolor
##   Sepal.Length    Sepal.Width     Petal.Length   Petal.Width          Species  
##  Min.   :4.900   Min.   :2.000   Min.   :3.00   Min.   :1.000   setosa    : 0  
##  1st Qu.:5.600   1st Qu.:2.525   1st Qu.:4.00   1st Qu.:1.200   versicolor:50  
##  Median :5.900   Median :2.800   Median :4.35   Median :1.300   virginica : 0  
##  Mean   :5.936   Mean   :2.770   Mean   :4.26   Mean   :1.326                  
##  3rd Qu.:6.300   3rd Qu.:3.000   3rd Qu.:4.60   3rd Qu.:1.500                  
##  Max.   :7.000   Max.   :3.400   Max.   :5.10   Max.   :1.800                  
## 
## $virginica
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.900   Min.   :2.200   Min.   :4.500   Min.   :1.400  
##  1st Qu.:6.225   1st Qu.:2.800   1st Qu.:5.100   1st Qu.:1.800  
##  Median :6.500   Median :3.000   Median :5.550   Median :2.000  
##  Mean   :6.588   Mean   :2.974   Mean   :5.552   Mean   :2.026  
##  3rd Qu.:6.900   3rd Qu.:3.175   3rd Qu.:5.875   3rd Qu.:2.300  
##  Max.   :7.900   Max.   :3.800   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    : 0  
##  versicolor: 0  
##  virginica :50  
##                 
##                 
## 

Using Chattr to compare methods when calculating means for fish count data

fish_counts <- list(
  river_a = c(12, 15, 10, 18, 13),
  river_b = c(25, 22, 28, 30, 24),
  river_c = c(8, 9, 7, 10, 6)
)

# Using a for-loop
mean_counts_for_loop <- numeric(length(fish_counts))
for (i in seq_along(fish_counts)) {
  mean_counts_for_loop[i] <- mean(fish_counts[[i]])
}
print(mean_counts_for_loop)
## [1] 13.6 25.8  8.0
# Using purrr::map_dbl
mean_counts_purrr <- map_dbl(fish_counts, mean)
print(mean_counts_purrr)
## river_a river_b river_c 
##    13.6    25.8     8.0

Functions

Functions are useful for avoiding repeated code

Function consists of a Name, Arguments (the input variables), and a Body (the execution logic wrapped inside curly braces)

Good practice to only use arguments passed directly into them, rather than accidentally grabbing random background objects in your global R environment

# Define Function
calculate_coral_mortality <- function(initial_count, surviving_count) {
  
  # Logic safety switch using our conditional tools!
  if (initial_count <= 0) {
    stop("Initial count must be greater than zero.")
  }
  # Calculate mortality rate
  mortality_rate <- (initial_count - surviving_count) / initial_count
  return(mortality_rate)
}

# Test custom function
calculate_coral_mortality(initial_count = 120, surviving_count = 84)
## [1] 0.3

Other Examples include converting temperature units: temp_C <- (temp_F - 32) * 5 / 9

Can use a function to do this for every value in a dataset

fahrenheit_to_celsius <- function(temp_F) {    # establish conversion as output of function operating on the arguments
   # convert F to C (good practice to always list what function does)
   temp_C <- (temp_F - 32) * 5 / 9
  return(temp_C)                     
}

# run the function on 2 lines
fahrenheit_to_celsius(100)
## [1] 37.77778
fahrenheit_to_celsius(200)
## [1] 93.33333

Celsius to fahrenheit

celsius_to_fahrenheit <- function(temp_C) {    
   # convert C to F
   temp_F <- (temp_C * 9 / 5) + 32 
  return(temp_F)                     
}

celsius_to_fahrenheit(0)
## [1] 32
celsius_to_fahrenheit(20)
## [1] 68
celsius_to_fahrenheit(40)
## [1] 104

Temp conversion w/ saftey stop for absolute zero

convert_temp_c_to_f <- function(temp_C) {
  if (temp_C < -273.15) {
    stop("Temperature cannot be below absolute zero (-273.15°C).")
  }
  temp_F <- (temp_C * 9 / 5) + 32
  return(temp_F)
}

convert_temp_c_to_f(25)
## [1] 77
#convert_temp_c_to_f(-300) # Triggers the safety check and stops execution with an error message

Advanced problems

# Create custom function named clean_and_classify_survey that accepts a data frame and a column name threshold parameter as inputs
# Safely check if the specified column name exists within the passed data frame; if it does not, trigger a user-friendly error message using stop()
# Filter out rows containing missing values (NA) within that specific column
# Use the pipe operator and mutate() along with a vectorised conditional (if_else() or case_when()) to append a status string variable classification based on your threshold parameter.
# Return the cleaned, altered data frame to the global environment.
# Test your custom pipeline using a mock tibble that contains at least one missing value to confirm the code handles anomalies without crashing.

clean_and_classify_survey <- function(df, column_name, threshold) {
  # Check if the specified column exists in the data frame
  if (!column_name %in% colnames(df)) {
    stop(paste("Error: Column", column_name, "not found in the data frame."))
  }
  
  # Filter out rows with NA values in the specified column and classify based on threshold
  cleaned_df <- df %>%
    filter(!is.na(.data[[column_name]])) %>%
    mutate(status = if_else(.data[[column_name]] > threshold, "Above Threshold", "Below Threshold"))
  
  return(cleaned_df)
}
# Test the function with a mock tibble
test_data <- tibble(
  id = 1:10,
  measurement = c(5, 7, NA, 3, 8,
                 2, 9, NA, 4, 6)
)
result <- clean_and_classify_survey(test_data, "measurement", threshold = 5)
print(result)
## # A tibble: 8 × 3
##      id measurement status         
##   <int>       <dbl> <chr>          
## 1     1           5 Below Threshold
## 2     2           7 Above Threshold
## 3     4           3 Below Threshold
## 4     5           8 Above Threshold
## 5     6           2 Below Threshold
## 6     7           9 Above Threshold
## 7     9           4 Below Threshold
## 8    10           6 Above Threshold
#Given a vector of raw survey counts counts <- c(15, 24, 8, 42) and a vector of species-specific scaling factors factors <- c(1.2, 0.8, 2.5, 1.1), write a mapping statement that iterates across both vectors simultaneously to return a single vector of scaled abundance values. Do not use a manual index loop.

library(purrr)

counts <- c(15, 24, 8, 42)
factors <- c(1.2, 0.8, 2.5, 1.1)

scaled_abundance <- map2_dbl(counts, factors, ~ .x * .y)
print(scaled_abundance)
## [1] 18.0 19.2 20.0 46.2