This file covers: - How to set up AI assistants like GitCopilot and chattr AI to assist with coding - Sorting data with conditional statements - How to set up classic loops - How to set up Purrr loops - How to use functions
#install.packages("ellmer")
#install.packages("chattr")
#install.packages("palmerpenguins")
library("knitr")
library(usethis)
# Load tidyverse and palmerpenguins libraries
library("tidyverse")
library("palmerpenguins")
# Create a ggplot scatter plot using the penguins dataset.
# Plot bill_length_mm on the x-axis and bill_depth_mm on the y-axis.
# Color points by species and use geom_point with size 3.
# Add a minimal theme and clean labels.
ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
geom_point(size = 3) +
theme_minimal() +
labs(title = "Penguin Bill Dimensions",
x = "Bill Length (mm)",
y = "Bill Depth (mm)",
color = "Species")
# Create boxplot using penguins dataset
# Plot flipper_length_mm across different islands
# Color by species and use geom_point with size 3.
# Add a minimal theme and clean labels.
ggplot(penguins, aes(x = island, y = flipper_length_mm, color = species)) +
geom_boxplot() +
geom_point(position = position_jitter(width = 0.2), size = 3) +
theme_minimal() +
labs(title = "Penguin Flipper Length by Island",
x = "Island",
y = "Flipper Length (mm)",
color = "Species")
## run in console
# usethis::edit_r_environ()
## Enter into new window
# GEMINI_API_KEY=your_actual_key_here
## Restart R session for changes to take effect
## Ensure R uses prefered model each session
usethis::edit_r_profile()
## Enter into new window
options(.chattr_chat = ellmer::chat_google_gemini())
## Restart R session for changes to take effect
Sys.getenv("GEMINI_API_KEY")
## [1] "AIzaSyCBXYD6IgjN160sBmhx27u-oY61pVgRKtg"
library("chattr")
library("ellmer")
library(ggplot2)
my_chat <- ellmer::chat_google_gemini(model = "gemini-2.5-flash")
chattr_use(my_chat)
chattr::chattr("say hello")
## # hello
##
## library(readr)
## library(ggplot2)
## library(dplyr)
chattr_app()
#Execute Key shortcut: Ctrl + Alt + c
data(iris)
ggplot(iris, aes(x = Sepal.Width, y = Sepal.Length, color = Species)) +
geom_point()
x <- 1
x # what is x?
## [1] 1
if (x == 1) {
x <- 2
}
x # what is x now?
## [1] 2
x <- 3
x # what is x?
## [1] 3
if (x == 1) {
x <- 2
}
x # what is x now?
## [1] 3
if (animal_data_received) { 1. save statement in log file that the animal has been updated } else { 1. save statement saying no data received }
name <- "Nick"
if (name == "Nick"){
last.name <- "Murray"
}
print (paste(name, last.name))
## [1] "Nick Murray"
# try changing name to Bob, what happens?
# now handle that case
name <- "Bob"
if (name == "Nick"){
last.name <- "Murray"
} else {
last.name <- "Jones"
}
print (paste(name, last.name))
## [1] "Bob Jones"
# chaining together
name <- "Jim"
if (name == "Nick"){
last.name <- "Murray"
} else if (name == "Bob") {
last.name <- "Jones"
} else {
last.name <- "... last name is unknown"
}
print (paste(name, last.name))
## [1] "Jim ... last name is unknown"
#() set the logical condition and curly braces {} used for the action if that condition is true
sst <- 30.2
if (sst > 29.5) {
print("Warning: Marine heatwave threshold exceeded!")
} else {
print("SST remains within baseline parameters.")
}
## [1] "Warning: Marine heatwave threshold exceeded!"
# Sample coral data
coral_monitoring <- tibble(
site = c("Site_A", "Site_B", "Site_C"),
depth_m = c(12, 35, 8)
)
# Classify depth using if_else
coral_monitoring <- coral_monitoring %>%
mutate(zone = if_else(depth_m > 30, "Deep Reef", "Shallow Reef"))
coral_monitoring
## # A tibble: 3 × 3
## site depth_m zone
## <chr> <dbl> <chr>
## 1 Site_A 12 Shallow Reef
## 2 Site_B 35 Deep Reef
## 3 Site_C 8 Shallow Reef
## %>%: used to chain functions together (passes the result of the expression on its left as the first argument to the function on its right)
## Mutate: used to edit columns in a data frame
coral_monitoring <- coral_monitoring %>%
mutate(reef_category = case_when(
depth_m < 10 ~ "Lagoon / Flats",
depth_m <= 30 ~ "Crest / Slope",
depth_m > 30 ~ "Mesophotic / Deep",
TRUE ~ "Unclassified" # Catch-all remainder
))
# create a tibble named marine_stations containing a column for salinity with values 35, 28, 32, and 12
# Use the pipe operator (%>%) and mutate() combined with case_when() to create a new column named environment_type
# Classify values below 15 as "Estuarine", values between 15 and 30 as "Brackish", and values above 30 as "Marine"
marine_stations <- tibble(
salinity = c(35, 28, 32, 12)
)
marine_stations <- marine_stations %>%
mutate(environment_type = case_when(
salinity < 15 ~ "Estuarine",
salinity >= 15 & salinity <= 30 ~ "Brackish",
salinity > 30 ~ "Marine"
))
marine_stations
## # A tibble: 4 × 2
## salinity environment_type
## <dbl> <chr>
## 1 35 Marine
## 2 28 Brackish
## 3 32 Marine
## 4 12 Estuarine
for (value in c("My", "first", "for", "loop")) {
print("one run")
}
## [1] "one run"
## [1] "one run"
## [1] "one run"
## [1] "one run"
for (value in c("My", "second", "for", "loop")) {
print(value)
}
## [1] "My"
## [1] "second"
## [1] "for"
## [1] "loop"
for (word in c("My", "second", "for", "loop")) {
print(word)
}
## [1] "My"
## [1] "second"
## [1] "for"
## [1] "loop"
for (string in c("My", "second", "for", "loop")) {
print(string)
}
## [1] "My"
## [1] "second"
## [1] "for"
## [1] "loop"
for (i in c("My", "second", "for", "loop")) {
print(i)
}
## [1] "My"
## [1] "second"
## [1] "for"
## [1] "loop"
for (year in 2020:2024) {
print(paste("Processing climate data for year:", year))
}
## [1] "Processing climate data for year: 2020"
## [1] "Processing climate data for year: 2021"
## [1] "Processing climate data for year: 2022"
## [1] "Processing climate data for year: 2023"
## [1] "Processing climate data for year: 2024"
transect_lengths <- c(50, 100, 25, 75)
for (i in seq_along(transect_lengths)) {
print(paste("Transect number", i, "measures", transect_lengths[i], "metres."))
}
## [1] "Transect number 1 measures 50 metres."
## [1] "Transect number 2 measures 100 metres."
## [1] "Transect number 3 measures 25 metres."
## [1] "Transect number 4 measures 75 metres."
#Make a for loop that lists my top 3 favorite animals using the print() function and paste() to combine text and the loop value
favortie_animals <- c("Snow Leopard", "Octopus", "Sperm Whale")
My_number <- c(1, 2, 3)
for (i in seq_along(My_number)) {
print(paste ("My number",i, "favorite animal is the", favortie_animals[i] ))
}
## [1] "My number 1 favorite animal is the Snow Leopard"
## [1] "My number 2 favorite animal is the Octopus"
## [1] "My number 3 favorite animal is the Sperm Whale"
my_list <- 1:50
print (my_list)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
str(my_list)
## int [1:50] 1 2 3 4 5 6 7 8 9 10 ...
# make a for loop (i is typically used)
for(i in my_list) { # Head of for-loop
result <- i
out <- paste0("I am printing loop number: ", i, ".")# Output: 1 loop
print(out) # Using print function
} # end of loop
## [1] "I am printing loop number: 1."
## [1] "I am printing loop number: 2."
## [1] "I am printing loop number: 3."
## [1] "I am printing loop number: 4."
## [1] "I am printing loop number: 5."
## [1] "I am printing loop number: 6."
## [1] "I am printing loop number: 7."
## [1] "I am printing loop number: 8."
## [1] "I am printing loop number: 9."
## [1] "I am printing loop number: 10."
## [1] "I am printing loop number: 11."
## [1] "I am printing loop number: 12."
## [1] "I am printing loop number: 13."
## [1] "I am printing loop number: 14."
## [1] "I am printing loop number: 15."
## [1] "I am printing loop number: 16."
## [1] "I am printing loop number: 17."
## [1] "I am printing loop number: 18."
## [1] "I am printing loop number: 19."
## [1] "I am printing loop number: 20."
## [1] "I am printing loop number: 21."
## [1] "I am printing loop number: 22."
## [1] "I am printing loop number: 23."
## [1] "I am printing loop number: 24."
## [1] "I am printing loop number: 25."
## [1] "I am printing loop number: 26."
## [1] "I am printing loop number: 27."
## [1] "I am printing loop number: 28."
## [1] "I am printing loop number: 29."
## [1] "I am printing loop number: 30."
## [1] "I am printing loop number: 31."
## [1] "I am printing loop number: 32."
## [1] "I am printing loop number: 33."
## [1] "I am printing loop number: 34."
## [1] "I am printing loop number: 35."
## [1] "I am printing loop number: 36."
## [1] "I am printing loop number: 37."
## [1] "I am printing loop number: 38."
## [1] "I am printing loop number: 39."
## [1] "I am printing loop number: 40."
## [1] "I am printing loop number: 41."
## [1] "I am printing loop number: 42."
## [1] "I am printing loop number: 43."
## [1] "I am printing loop number: 44."
## [1] "I am printing loop number: 45."
## [1] "I am printing loop number: 46."
## [1] "I am printing loop number: 47."
## [1] "I am printing loop number: 48."
## [1] "I am printing loop number: 49."
## [1] "I am printing loop number: 50."
print ("For loop complete!")
## [1] "For loop complete!"
print ("Program complete!")
## [1] "Program complete!"
add_me <- 500 #Establish added value
for(i in my_list) { # Head of for-loop
result <- i + add_me # add to the i value
out <- paste0("Result of iteration: ", i, " = ", result, ".")
print(out)
} #End of loop
## [1] "Result of iteration: 1 = 501."
## [1] "Result of iteration: 2 = 502."
## [1] "Result of iteration: 3 = 503."
## [1] "Result of iteration: 4 = 504."
## [1] "Result of iteration: 5 = 505."
## [1] "Result of iteration: 6 = 506."
## [1] "Result of iteration: 7 = 507."
## [1] "Result of iteration: 8 = 508."
## [1] "Result of iteration: 9 = 509."
## [1] "Result of iteration: 10 = 510."
## [1] "Result of iteration: 11 = 511."
## [1] "Result of iteration: 12 = 512."
## [1] "Result of iteration: 13 = 513."
## [1] "Result of iteration: 14 = 514."
## [1] "Result of iteration: 15 = 515."
## [1] "Result of iteration: 16 = 516."
## [1] "Result of iteration: 17 = 517."
## [1] "Result of iteration: 18 = 518."
## [1] "Result of iteration: 19 = 519."
## [1] "Result of iteration: 20 = 520."
## [1] "Result of iteration: 21 = 521."
## [1] "Result of iteration: 22 = 522."
## [1] "Result of iteration: 23 = 523."
## [1] "Result of iteration: 24 = 524."
## [1] "Result of iteration: 25 = 525."
## [1] "Result of iteration: 26 = 526."
## [1] "Result of iteration: 27 = 527."
## [1] "Result of iteration: 28 = 528."
## [1] "Result of iteration: 29 = 529."
## [1] "Result of iteration: 30 = 530."
## [1] "Result of iteration: 31 = 531."
## [1] "Result of iteration: 32 = 532."
## [1] "Result of iteration: 33 = 533."
## [1] "Result of iteration: 34 = 534."
## [1] "Result of iteration: 35 = 535."
## [1] "Result of iteration: 36 = 536."
## [1] "Result of iteration: 37 = 537."
## [1] "Result of iteration: 38 = 538."
## [1] "Result of iteration: 39 = 539."
## [1] "Result of iteration: 40 = 540."
## [1] "Result of iteration: 41 = 541."
## [1] "Result of iteration: 42 = 542."
## [1] "Result of iteration: 43 = 543."
## [1] "Result of iteration: 44 = 544."
## [1] "Result of iteration: 45 = 545."
## [1] "Result of iteration: 46 = 546."
## [1] "Result of iteration: 47 = 547."
## [1] "Result of iteration: 48 = 548."
## [1] "Result of iteration: 49 = 549."
## [1] "Result of iteration: 50 = 550."
print ("For loop complete!")
## [1] "For loop complete!"
print ("Program complete!")
## [1] "Program complete!"
# Note we have i and j for each loop
for (i in c(1, 2, 3, 4, 5)) {
print (paste ("I am base loop number: ",i))
Sys.sleep(0.8) # slow it down for example purposes
for (j in c(1, 2, 3)){
print (paste ("... I am inside loop number: ",j))
}
}
## [1] "I am base loop number: 1"
## [1] "... I am inside loop number: 1"
## [1] "... I am inside loop number: 2"
## [1] "... I am inside loop number: 3"
## [1] "I am base loop number: 2"
## [1] "... I am inside loop number: 1"
## [1] "... I am inside loop number: 2"
## [1] "... I am inside loop number: 3"
## [1] "I am base loop number: 3"
## [1] "... I am inside loop number: 1"
## [1] "... I am inside loop number: 2"
## [1] "... I am inside loop number: 3"
## [1] "I am base loop number: 4"
## [1] "... I am inside loop number: 1"
## [1] "... I am inside loop number: 2"
## [1] "... I am inside loop number: 3"
## [1] "I am base loop number: 5"
## [1] "... I am inside loop number: 1"
## [1] "... I am inside loop number: 2"
## [1] "... I am inside loop number: 3"
site_areas <- c(144, 400, 625)
results <- numeric(length(site_areas)) # Must build an empty container first
for(i in seq_along(site_areas)) {
results[i] <- sqrt(site_areas[i])
}
results
## [1] 12 20 25
library(purrr)
# Map directly and define your expected output type explicitly
mapped_results <- map_dbl(site_areas, sqrt)
mapped_results
## [1] 12 20 25
# typical summary
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
# Use pipeline to split data into species
iris %>%
split(.$Species) %>% # Splits the iris data set into a list of data frames based on the Species column (insert iris into dot)
map(~summary(.x)) # Finds summary of species
## $setosa
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.300 Min. :1.000 Min. :0.100
## 1st Qu.:4.800 1st Qu.:3.200 1st Qu.:1.400 1st Qu.:0.200
## Median :5.000 Median :3.400 Median :1.500 Median :0.200
## Mean :5.006 Mean :3.428 Mean :1.462 Mean :0.246
## 3rd Qu.:5.200 3rd Qu.:3.675 3rd Qu.:1.575 3rd Qu.:0.300
## Max. :5.800 Max. :4.400 Max. :1.900 Max. :0.600
## Species
## setosa :50
## versicolor: 0
## virginica : 0
##
##
##
##
## $versicolor
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## Min. :4.900 Min. :2.000 Min. :3.00 Min. :1.000 setosa : 0
## 1st Qu.:5.600 1st Qu.:2.525 1st Qu.:4.00 1st Qu.:1.200 versicolor:50
## Median :5.900 Median :2.800 Median :4.35 Median :1.300 virginica : 0
## Mean :5.936 Mean :2.770 Mean :4.26 Mean :1.326
## 3rd Qu.:6.300 3rd Qu.:3.000 3rd Qu.:4.60 3rd Qu.:1.500
## Max. :7.000 Max. :3.400 Max. :5.10 Max. :1.800
##
## $virginica
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.900 Min. :2.200 Min. :4.500 Min. :1.400
## 1st Qu.:6.225 1st Qu.:2.800 1st Qu.:5.100 1st Qu.:1.800
## Median :6.500 Median :3.000 Median :5.550 Median :2.000
## Mean :6.588 Mean :2.974 Mean :5.552 Mean :2.026
## 3rd Qu.:6.900 3rd Qu.:3.175 3rd Qu.:5.875 3rd Qu.:2.300
## Max. :7.900 Max. :3.800 Max. :6.900 Max. :2.500
## Species
## setosa : 0
## versicolor: 0
## virginica :50
##
##
##
fish_counts <- list(
river_a = c(12, 15, 10, 18, 13),
river_b = c(25, 22, 28, 30, 24),
river_c = c(8, 9, 7, 10, 6)
)
# Using a for-loop
mean_counts_for_loop <- numeric(length(fish_counts))
for (i in seq_along(fish_counts)) {
mean_counts_for_loop[i] <- mean(fish_counts[[i]])
}
print(mean_counts_for_loop)
## [1] 13.6 25.8 8.0
# Using purrr::map_dbl
mean_counts_purrr <- map_dbl(fish_counts, mean)
print(mean_counts_purrr)
## river_a river_b river_c
## 13.6 25.8 8.0
# Define Function
calculate_coral_mortality <- function(initial_count, surviving_count) {
# Logic safety switch using our conditional tools!
if (initial_count <= 0) {
stop("Initial count must be greater than zero.")
}
# Calculate mortality rate
mortality_rate <- (initial_count - surviving_count) / initial_count
return(mortality_rate)
}
# Test custom function
calculate_coral_mortality(initial_count = 120, surviving_count = 84)
## [1] 0.3
fahrenheit_to_celsius <- function(temp_F) { # establish conversion as output of function operating on the arguments
# convert F to C (good practice to always list what function does)
temp_C <- (temp_F - 32) * 5 / 9
return(temp_C)
}
# run the function on 2 lines
fahrenheit_to_celsius(100)
## [1] 37.77778
fahrenheit_to_celsius(200)
## [1] 93.33333
celsius_to_fahrenheit <- function(temp_C) {
# convert C to F
temp_F <- (temp_C * 9 / 5) + 32
return(temp_F)
}
celsius_to_fahrenheit(0)
## [1] 32
celsius_to_fahrenheit(20)
## [1] 68
celsius_to_fahrenheit(40)
## [1] 104
convert_temp_c_to_f <- function(temp_C) {
if (temp_C < -273.15) {
stop("Temperature cannot be below absolute zero (-273.15°C).")
}
temp_F <- (temp_C * 9 / 5) + 32
return(temp_F)
}
convert_temp_c_to_f(25)
## [1] 77
#convert_temp_c_to_f(-300) # Triggers the safety check and stops execution with an error message
# Create custom function named clean_and_classify_survey that accepts a data frame and a column name threshold parameter as inputs
# Safely check if the specified column name exists within the passed data frame; if it does not, trigger a user-friendly error message using stop()
# Filter out rows containing missing values (NA) within that specific column
# Use the pipe operator and mutate() along with a vectorised conditional (if_else() or case_when()) to append a status string variable classification based on your threshold parameter.
# Return the cleaned, altered data frame to the global environment.
# Test your custom pipeline using a mock tibble that contains at least one missing value to confirm the code handles anomalies without crashing.
clean_and_classify_survey <- function(df, column_name, threshold) {
# Check if the specified column exists in the data frame
if (!column_name %in% colnames(df)) {
stop(paste("Error: Column", column_name, "not found in the data frame."))
}
# Filter out rows with NA values in the specified column and classify based on threshold
cleaned_df <- df %>%
filter(!is.na(.data[[column_name]])) %>%
mutate(status = if_else(.data[[column_name]] > threshold, "Above Threshold", "Below Threshold"))
return(cleaned_df)
}
# Test the function with a mock tibble
test_data <- tibble(
id = 1:10,
measurement = c(5, 7, NA, 3, 8,
2, 9, NA, 4, 6)
)
result <- clean_and_classify_survey(test_data, "measurement", threshold = 5)
print(result)
## # A tibble: 8 × 3
## id measurement status
## <int> <dbl> <chr>
## 1 1 5 Below Threshold
## 2 2 7 Above Threshold
## 3 4 3 Below Threshold
## 4 5 8 Above Threshold
## 5 6 2 Below Threshold
## 6 7 9 Above Threshold
## 7 9 4 Below Threshold
## 8 10 6 Above Threshold
#Given a vector of raw survey counts counts <- c(15, 24, 8, 42) and a vector of species-specific scaling factors factors <- c(1.2, 0.8, 2.5, 1.1), write a mapping statement that iterates across both vectors simultaneously to return a single vector of scaled abundance values. Do not use a manual index loop.
library(purrr)
counts <- c(15, 24, 8, 42)
factors <- c(1.2, 0.8, 2.5, 1.1)
scaled_abundance <- map2_dbl(counts, factors, ~ .x * .y)
print(scaled_abundance)
## [1] 18.0 19.2 20.0 46.2