wild boar optimal gps fix rate interval analysis

List of necessary packages

Set the directory where your CSV files are stored

csv_dir <- "/Users/ykwon/Library/CloudStorage/GoogleDrive-youngclick@gmail.com/My Drive/ykwon/research/2022 PDA research/KOREA/data2023/trackingdata/Rproject_tracking/trackingdatav20"

List all CSV files in the directory

csv_files <- list.files(csv_dir, pattern = ".csv$", full.names = TRUE)
csv_files

Read and process CSV files

read_csv_data <- function(file) {
  data <- read.csv(file)
  data$timestamp <- as.POSIXct(data$timestamp, format = "%Y-%m-%d %H:%M")
  
  # Sort the data by timestamp in ascending order
  data <- data[order(data$timestamp), ]
  
  # Calculate the time difference and add it as a new column 'timediff'
  data <- data %>%
    mutate(timediff = as.numeric(timestamp - lag(timestamp), units = "mins"))
  
  return(data)
}

Read all the CSV files into a named list of dataframes

data_list <- lapply(setNames(csv_files, basename(csv_files)), read_csv_data)
data_list

Convert the list of dataframes to individual dataframes in the global environment

list2env(data_list, envir = .GlobalEnv)
df_names_csv <- paste0(c("nibr2002", "nibr2003", "nibr2004", "nibr2005", "nibr2006", "nibr2007", "nibr2008", "nibr2009", "nibr2010", "nibr2102", "nibr2104", "nibr2105", "nibr2106", "nibr2108", "nibr2201"), ".csv")
df_names_csv
combined_df <- do.call(rbind, mget(df_names_csv))
grouped_data <- combined_df %>%
  group_by(id)
grouped_data

Examine the summary statistics of timediff by id - timediff is in minute unit

timediff_summary <- grouped_data %>%
  summarize(
    count = n(),
    min_timediff = min(timediff, na.rm = TRUE),
    max_timediff = max(timediff, na.rm = TRUE),
    mean_timediff = mean(timediff, na.rm = TRUE),
    median_timediff = median(timediff, na.rm = TRUE),
    sd_timediff = sd(timediff, na.rm = TRUE)
  )
timediff_summary #check which species to pick
## # A tibble: 15 × 7
##    id       count min_timediff max_timediff mean_timediff median_timediff
##    <chr>    <int>        <dbl>        <dbl>         <dbl>           <dbl>
##  1 nibr2002    64          179         3240        320.               180
##  2 nibr2003    82          179         2341        256.               180
##  3 nibr2004    91          179          360        184.               180
##  4 nibr2005    72          178          361        195.               180
##  5 nibr2006    64          177         1440        380.               359
##  6 nibr2007   105            0         1080        190.               180
##  7 nibr2008   110           61          360        181.               180
##  8 nibr2009    53          179          361        190.               180
##  9 nibr2010    76          178          361        187.               180
## 10 nibr2102   698            1         3596         27.5                6
## 11 nibr2104  1710            1          267         11.3                4
## 12 nibr2105  1557            1         2696         18.8                3
## 13 nibr2106  2354            1          330          8.72               4
## 14 nibr2108    65            4           32          7.45               5
## 15 nibr2201  1070            0        22124        147.               120
## # ℹ 1 more variable: sd_timediff <dbl>

select list of species for subsquent analysis

species21_list <- list(nibr2102.csv, nibr2104.csv, nibr2105.csv, nibr2106.csv)  

below is function to calculate the basic movement information (speed and distance) and reproject

forest is shape file that I matched the projection

import forest shape file as forest object

forest <- st_read("/Users/ykwon/Library/CloudStorage/GoogleDrive-youngclick@gmail.com/My Drive/ykwon/research/2022 PDA research/KOREA/data2023/trackingdata/Rproject_tracking/map_jinju/forest.shp")
## Reading layer `forest' from data source 
##   `/Users/ykwon/Library/CloudStorage/GoogleDrive-youngclick@gmail.com/My Drive/ykwon/research/2022 PDA research/KOREA/data2023/trackingdata/Rproject_tracking/map_jinju/forest.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 28487 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 281378.6 ymin: 273387.5 xmax: 324613.7 ymax: 306662.6
## Projected CRS: Korea 2000 / Central Belt 2010
combined_function1 <- function(df, forest) {
  
  # Convert the timestamp column to a proper datetime format
  df$timestamp <- as.POSIXct(df$timestamp)
  
  # Create a SpatialPoints object from the dataframe
  points <- SpatialPoints(df[, c("long", "lat")], proj4string = CRS("+proj=longlat +datum=WGS84"))
  
  # Reproject coordinates to match the forest object CRS
  points_reproj <- spTransform(points, crs(forest))
  
  # Add reprojected coordinates as x and y columns to the data frame
  df$x <- coordinates(points_reproj)[, 1]
  df$y <- coordinates(points_reproj)[, 2]
  
  # Calculate the distance between consecutive points (using the reprojected coordinates)
  df$distance <- c(0, sqrt(diff(df$x)^2 + diff(df$y)^2))
  
  # Calculate the speed between consecutive points (distance / time difference)
  df$speed <- df$distance / df$timediff
  
  # Add a new column indicating day or night based on timestamp
  df$day_night <- ifelse(as.POSIXlt(df$timestamp)$hour >= 6 & as.POSIXlt(df$timestamp)$hour < 20, "Day", "Night")
  
  # Create a move object from the reprojected coordinates
  move_obj <- move(df$x, df$y, time = df$timestamp, proj = crs(forest))
  
  return(df)
}

Loop through each species data frame

for (i in 1:length(species21_list)) {
  species21_list[[i]] <- combined_function1(species21_list[[i]], forest)
}

After careful examination, I decide to select only three species: 2104,2105,2106

species21_list <- species21_list[-1] #now there are three species
# Create an empty list to store the plots
plot_list <- list()

Loop through each species data frame

for (i in 1:length(species21_list)) {
  species_df <- species21_list[[i]]  # Get the current species data frame
  
  # Plot 1: Movement patterns over time (speed)
  plot1 <- ggplot(species_df, aes(x = timestamp, y = speed)) +
    geom_line(color = "blue") +
    labs(title = "Movement Patterns", x = "Timestamp", y = "Speed (m/s)") +
    theme_minimal()
  
  # Plot 2: Movement patterns over time with day and night intervals
  plot2 <- ggplot(species_df, aes(x = timestamp, y = speed, color = day_night)) +
    geom_line() +
    labs(title = "Movement Patterns", x = "Timestamp", y = "Speed (m/s)", color = "Day/Night") +
    theme_minimal()
  
  # Add the plots to the list
  plot_list[[i]] <- plot1
  plot_list[[i+length(species21_list)]] <- plot2
}

Create a grid of plots

grid.arrange(grobs = plot_list, ncol = 2) #movement pattern

it looks like species2105 needs data clean up upto Jun07

species21_list[[2]] <- species21_list[[2]] %>%
  filter(timestamp <= as.POSIXct("2021-06-07"))  # replace YYYY with the desired year

now you can rerun the plot above to confirm the data clean-up Create a list to store the data subsets for each minute interval

telemetry_data_subsets <- list()

Iterate over data frames in species21_list

for (j in 1:length(species21_list)) {
  # Get the current data frame
  telemetry_data <- species21_list[[j]]
  
  # Add a column for the cumulative time difference
  telemetry_data$cumulative_timediff <- cumsum(ifelse(is.na(telemetry_data$timediff), 0, telemetry_data$timediff)) 
  # Set na.rm = TRUE to remove NA values
  
  # Iterate over minute intervals from 5 to 120, in steps of 5
  for (interval in seq(5, 120, by = 5)) {
    
    # Initialize the subsetted data frame
    telemetry_data_subset <- data.frame()
    
    # Initialize the index of the previously selected row
    prev_index <- 1
    
    # Loop through the telemetry data starting from the third row
    for (i in 3:nrow(telemetry_data)) {
      # Calculate the time difference from the previously selected row
      time_diff <- telemetry_data$cumulative_timediff[i] - telemetry_data$cumulative_timediff[prev_index]
      
      # Check if the time difference is greater than or equal to the desired interval
      if (time_diff >= interval) {
        # Append the current row to the subsetted data frame
        telemetry_data_subset <- rbind(telemetry_data_subset, telemetry_data[i, ])
        
        # Update the index of the previously selected row
        prev_index <- i
      }
    }
    
    # Store the subsetted data frame in the list, using the minute interval and the data frame index as the name
    telemetry_data_subsets[[paste0("df_", j, "_interval_", interval, "min")]] <- telemetry_data_subset
  }
}
summary(telemetry_data_subsets)
rows_count <- sapply(telemetry_data_subsets, nrow)
rows_count

Convert the named numeric vector into a dataframe

df <- data.frame(name = names(rows_count), count = rows_count, stringsAsFactors = FALSE)

Extract species and interval from the name

df <- df %>%
  separate(name, into = c("species", "interval"), sep = "_interval_") %>%
  mutate(species = gsub("^df_", "species", species), # Replace "df_" with "species"
         interval = as.integer(gsub("min$", "", interval))) # Remove "min" and convert to integer
gpsfix_nrow <- ggplot(df, aes(x = interval, y = count, color = species, group = species)) +
  geom_line() +
  labs(title = "Number of Rows by Increasing GPS Fix Rates",
       x = "Interval (minutes)",
       y = "Number of Rows") +
  theme_minimal() +
  scale_color_discrete(name = "Species")

gpsfix_nrow

##Function to run BBMM model for a given species data frame

create_bbmm <- function(df) {
  # Remove rows where timediff is NA
  df <- df[!is.na(df$timediff), ]
  
  bbmm_model <- brownian.bridge(x = df$x, y = df$y, time.lag = df$timediff,
                                location.error = 10, cell.size = 10)
  return(bbmm_model)
}

Apply create_bbmm to each data frame in species21_list

bbmm_results <- lapply(telemetry_data_subsets, create_bbmm)
bbmm_results
load("bbmm_results.RData")

Function to plot BBMM contours for a given species data frame

plot_bbmm <- function(bbmm_model, df) {
  bbmm.contour(bbmm_model, levels = c(seq(50, 90, by = 10), 95, 99), locations = df, plot = TRUE)
}

Apply plot_bbmm to each BBMM model and its corresponding data subset # PLOTS are saved as PDF here

dev.off()
## null device 
##           1
pdf("bbmm_plotsAUG9_10m.pdf", width=10, height=10)
par(mfrow=c(4, 4))  # set up a 4x4 plotting grid

for (i in seq_along(bbmm_results)) {
  plot_name <- names(telemetry_data_subsets)[i]
  plot_bbmm(bbmm_results[[i]], telemetry_data_subsets[[i]])
  title(main = plot_name)  # This line adds the title after plotting
}

dev.off()  # close the PDF device
## null device 
##           1

area with compactness and complexity: acc

create_bbmm.area.cc <- function(df, percentile) {
  df <- df[!is.na(df$timediff), ]
  bbmm_model <- brownian.bridge(x = df$x, y = df$y, time.lag = df$timediff,
                                location.error = 10, cell.size = 10)
  
  # Calculate the contour levels
  levels <- c(seq(50,90, by = 10),95,99)
  
  # Get the index of the desired percentile in the levels vector
  percentile_index <- which(levels == percentile)
  
  contours <- bbmm.contour(bbmm_model, levels = levels)
  
  # Lists to store results
  area_list = numeric(length(levels))
  compactness_list = numeric(length(levels))
  complexity_list = numeric(length(levels))
  
  for (i in 1:length(levels)) {
    bbmm.contour <- data.frame(x = bbmm_model$x, y = bbmm_model$y, probability = bbmm_model$probability)
    bbmm.contour <- bbmm.contour[bbmm.contour$probability >= contours$Z[i],]
    
    m = SpatialPixelsDataFrame(points = bbmm.contour[c("x","y")], data = bbmm.contour)
    
    # Convert SpatialPixelsDataFrame to raster
    r = raster(m)
    
    # Convert raster to polygons
    poly_subset = rasterToPolygons(r, dissolve = TRUE)
    
    poly_area = gArea(poly_subset)
    poly_perimeter = gLength(poly_subset)
    
    # Area
    area_list[i] = poly_area
    
    # Compactness
    compactness_list[i] = (4 * pi * poly_area) / (poly_perimeter^2)
    
    # Complexity
    complexity_list[i] = poly_perimeter / sqrt(poly_area)
  }
  
  result = list(area = area_list, compactness = compactness_list, complexity = complexity_list)
  return(result)
}
percentiles <- c(seq(50, 90, by = 10), 95, 99)
results.acc <- data.frame()
for(i in 1:length(telemetry_data_subsets)){
  for(j in 1:length(percentiles)){
    metrics <- create_bbmm.area.cc(telemetry_data_subsets[[i]], percentile = percentiles[j])
    
    temp_result <- data.frame(
      dataset_index = i,
      percentile = j,
      area = metrics$area[j],
      compactness = metrics$compactness[j],
      complexity = metrics$complexity[j]
    )
    
    results.acc <- rbind(results.acc, temp_result)
  }
}
results.acc

instead running the code (takes several hours) import the results

results.acc <- read.csv("results.acc.csv")

Number of species, number of intervals, and number of percentiles

num_species <- 3
num_intervals <- length(seq(5, 120, by = 5))
num_percentiles <- length(c(seq(50, 90, by = 10), 95, 99))

Create a mapping for species based on dataset_index

species_mapping <- paste0("species", 1:num_species)

Map dataset_index to species

results.acc$species <- rep(species_mapping, each = num_intervals * num_percentiles)

# Create a mapping for interval based on the pattern you mentioned
intervals <- rep(seq(5, 120, by = 5), times = num_species, each = num_percentiles)
results.acc$interval <- intervals[1:nrow(results.acc)]

# Map percentile numbers to actual percentiles
percentile_mapping <- c(seq(50, 90, by = 10), 95, 99)
results.acc$percentile <- rep(percentile_mapping, times = num_species * num_intervals)

# Rearrange columns and display
results.acc <- results.acc[, c("species", "interval", "percentile", "area", "compactness", "complexity")]
head(results.acc)

Print the cleaned dataframe

head(results.acc)
#write.csv(results.acc,"results_acc_aug9.csv")

Define the plots

results.acc$percentile <- as.factor(results.acc$percentile)

color_palette <- brewer.pal(length(unique(results.acc$percentile)), "Set1")

area_plot <- ggplot(results.acc, aes(x = interval, y = area, color = percentile, group = percentile)) +
  geom_line() +
  facet_wrap(~species, scales = "free_y") + 
  labs(title = "Area change by increasing GPS fix intervals",
       x = "Interval Minutes",
       y = "Area",
       color = "Percentile") +
  scale_color_manual(values = color_palette) +
  theme_minimal()

compactness_plot <- ggplot(results.acc, aes(x = interval, y = compactness, color = percentile, group = percentile)) +
  geom_line() +
  facet_wrap(~species, scales = "free_y") + 
  labs(title = "Compactness change by increasing GPS fix intervals",
       x = "Interval Minutes",
       y = "Compactness",
       color = "Percentile") +
  scale_color_manual(values = color_palette) +
  theme_minimal()

complexity_plot <- ggplot(results.acc, aes(x = interval, y = complexity, color = percentile, group = percentile)) +
  geom_line() +
  facet_wrap(~species, scales = "free_y") + 
  labs(title = "Complexity change by increasing GPS fix intervals",
       x = "Interval Minutes",
       y = "Complexity",
       color = "Percentile") +
  scale_color_manual(values = color_palette) +
  theme_minimal()

Save plots to PDF # PLOTS are saved as PDF here

dev.off()
## null device 
##           1
pdf("acc_file_aug9.pdf", width = 10, height = 7)

print(area_plot)
print(compactness_plot)
print(complexity_plot)

dev.off()
## null device 
##           1

result in wide format

results_wide <- results.acc %>%
  pivot_wider(
    id_cols = c(species, interval), 
    names_from = percentile, 
    values_from = c(area, compactness, complexity),
    names_sep = "_"
  )

head(results_wide)
## # A tibble: 6 × 23
##   species  interval area_50 area_60 area_70 area_80 area_90 area_95 area_99
##   <chr>       <dbl>   <int>   <int>   <int>   <int>   <int>   <int>   <int>
## 1 species1        5   47100   72800  109400  168300  276900  384800  633500
## 2 species1       10   62900   95400  142000  215500  353000  502000  873800
## 3 species1       15   74200  111800  165100  246400  403400  584000 1027600
## 4 species1       20   83600  123400  180700  267900  442100  648300 1132600
## 5 species1       25  126900  189500  280600  432000  750700 1086400 1754300
## 6 species1       30  102300  151300  220500  331000  558900  827500 1384800
## # ℹ 14 more variables: compactness_50 <dbl>, compactness_60 <dbl>,
## #   compactness_70 <dbl>, compactness_80 <dbl>, compactness_90 <dbl>,
## #   compactness_95 <dbl>, compactness_99 <dbl>, complexity_50 <dbl>,
## #   complexity_60 <dbl>, complexity_70 <dbl>, complexity_80 <dbl>,
## #   complexity_90 <dbl>, complexity_95 <dbl>, complexity_99 <dbl>

statistical approach

Step 1: Calculate absolute change in area for each GPS fix interval

Define a function to calculate the absolute changes

calculate_absolute_change <- function(df) {
  df %>%
    arrange(species, interval) %>%
    group_by(species) %>%
    mutate(across(starts_with("area_"), ~ c(0, diff(.)),
                  .names = "change_{.col}"),
           across(starts_with("compactness_"), ~ c(0, diff(.)),
                  .names = "change_{.col}"),
           across(starts_with("complexity_"), ~ c(0, diff(.)),
                  .names = "change_{.col}"))
}

Apply the function

results_with_changes <- calculate_absolute_change(results_wide)
head(results_with_changes)
## # A tibble: 6 × 44
## # Groups:   species [1]
##   species  interval area_50 area_60 area_70 area_80 area_90 area_95 area_99
##   <chr>       <dbl>   <int>   <int>   <int>   <int>   <int>   <int>   <int>
## 1 species1        5   47100   72800  109400  168300  276900  384800  633500
## 2 species1       10   62900   95400  142000  215500  353000  502000  873800
## 3 species1       15   74200  111800  165100  246400  403400  584000 1027600
## 4 species1       20   83600  123400  180700  267900  442100  648300 1132600
## 5 species1       25  126900  189500  280600  432000  750700 1086400 1754300
## 6 species1       30  102300  151300  220500  331000  558900  827500 1384800
## # ℹ 35 more variables: compactness_50 <dbl>, compactness_60 <dbl>,
## #   compactness_70 <dbl>, compactness_80 <dbl>, compactness_90 <dbl>,
## #   compactness_95 <dbl>, compactness_99 <dbl>, complexity_50 <dbl>,
## #   complexity_60 <dbl>, complexity_70 <dbl>, complexity_80 <dbl>,
## #   complexity_90 <dbl>, complexity_95 <dbl>, complexity_99 <dbl>,
## #   change_area_50 <dbl>, change_area_60 <dbl>, change_area_70 <dbl>,
## #   change_area_80 <dbl>, change_area_90 <dbl>, change_area_95 <dbl>, …

Define a function to calculate the relative changes and significant changes

calculate_relative_change_and_significance <- function(df, significance_threshold = 10) {
  percentile_cols <- grep("^(area|compactness|complexity)_\\d+$", names(df), value = TRUE)
  
  df %>%
    group_by(species) %>%
    mutate(across(all_of(percentile_cols), 
                  list(relative_change = ~ (./lag(.))*100 - 100, 
                       is_significant = ~ abs((./lag(.))*100 - 100) > significance_threshold), 
                  .names = "{.col}_{.fn}"))
}

Using the function on the given data

results_with_changes_updated <- calculate_relative_change_and_significance(results_with_changes)
head(results_with_changes_updated)
## # A tibble: 6 × 86
## # Groups:   species [1]
##   species  interval area_50 area_60 area_70 area_80 area_90 area_95 area_99
##   <chr>       <dbl>   <int>   <int>   <int>   <int>   <int>   <int>   <int>
## 1 species1        5   47100   72800  109400  168300  276900  384800  633500
## 2 species1       10   62900   95400  142000  215500  353000  502000  873800
## 3 species1       15   74200  111800  165100  246400  403400  584000 1027600
## 4 species1       20   83600  123400  180700  267900  442100  648300 1132600
## 5 species1       25  126900  189500  280600  432000  750700 1086400 1754300
## 6 species1       30  102300  151300  220500  331000  558900  827500 1384800
## # ℹ 77 more variables: compactness_50 <dbl>, compactness_60 <dbl>,
## #   compactness_70 <dbl>, compactness_80 <dbl>, compactness_90 <dbl>,
## #   compactness_95 <dbl>, compactness_99 <dbl>, complexity_50 <dbl>,
## #   complexity_60 <dbl>, complexity_70 <dbl>, complexity_80 <dbl>,
## #   complexity_90 <dbl>, complexity_95 <dbl>, complexity_99 <dbl>,
## #   change_area_50 <dbl>, change_area_60 <dbl>, change_area_70 <dbl>,
## #   change_area_80 <dbl>, change_area_90 <dbl>, change_area_95 <dbl>, …

Reshape the data to a long format

results_change_long <- results_with_changes_updated %>%
  pivot_longer(
    cols = -c(species, interval), # Exclude 'species' and 'interval' from the reshaping
    names_to = c("metric", "percentile", ".value"), # Define the new columns created from the original column names
    names_pattern = "(area|compactness|complexity)_(\\d+)_(relative_change|is_significant)" # Regex pattern to match column names
  )

head(results_change_long)
## # A tibble: 6 × 6
## # Groups:   species [1]
##   species  interval metric percentile relative_change is_significant
##   <chr>       <dbl> <chr>  <chr>                <dbl> <lgl>         
## 1 species1        5 <NA>   <NA>                    NA NA            
## 2 species1        5 <NA>   <NA>                    NA NA            
## 3 species1        5 <NA>   <NA>                    NA NA            
## 4 species1        5 <NA>   <NA>                    NA NA            
## 5 species1        5 <NA>   <NA>                    NA NA            
## 6 species1        5 <NA>   <NA>                    NA NA
results_change_long <- results_change_long %>%
  na.omit()
head(results_change_long)
## # A tibble: 6 × 6
## # Groups:   species [1]
##   species  interval metric percentile relative_change is_significant
##   <chr>       <dbl> <chr>  <chr>                <dbl> <lgl>         
## 1 species1       10 area   50                    33.5 TRUE          
## 2 species1       10 area   60                    31.0 TRUE          
## 3 species1       10 area   70                    29.8 TRUE          
## 4 species1       10 area   80                    28.0 TRUE          
## 5 species1       10 area   90                    27.5 TRUE          
## 6 species1       10 area   95                    30.5 TRUE

Plot the data

ggplot(results_change_long, aes(x = interval, y = relative_change, color = percentile)) +
  geom_point() +
  facet_grid(species ~ metric) + # Facet by species and metric
  labs(x = "Interval", y = "Relative Change", color = "Percentile") +
  theme_bw()

### now statistical approach to examine the tipping point Add a numerical interval variable

library(segmented)

# run a initial linear model
init_model.50 <- lm(`area_50_relative_change` ~ interval, data = results_with_changes_updated)
init_model.99 <- lm(`area_99_relative_change` ~ interval, data = results_with_changes_updated)

summary(init_model.50)
## 
## Call:
## lm(formula = area_50_relative_change ~ interval, data = results_with_changes_updated)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.197 -21.074  -2.264  14.270  72.131 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  20.5624     7.3672   2.791  0.00684 **
## interval     -0.1287     0.1010  -1.275  0.20678   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.81 on 67 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.02368,    Adjusted R-squared:  0.009109 
## F-statistic: 1.625 on 1 and 67 DF,  p-value: 0.2068
summary(init_model.99)
## 
## Call:
## lm(formula = area_99_relative_change ~ interval, data = results_with_changes_updated)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.786 -11.202  -0.575  15.338  48.743 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 21.03558    5.54254   3.795  0.00032 ***
## interval    -0.19064    0.07595  -2.510  0.01450 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.93 on 67 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.08594,    Adjusted R-squared:  0.0723 
## F-statistic:   6.3 on 1 and 67 DF,  p-value: 0.0145

apply segmented regression

seg_model.50 <- segmented(init_model.50, seg.Z = ~interval, psi=list(interval=c(30,70)))
seg_model.99 <- segmented(init_model.99, seg.Z = ~interval, psi=list(interval=c(30,70)))

# view the results
summary(seg_model.50)
## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = init_model.50, seg.Z = ~interval, psi = list(interval = c(30, 
##     70)))
## 
## Estimated Break-Point(s):
##                   Est. St.Err
## psi1.interval  68.133 15.376
## psi2.interval 105.000 11.256
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  30.3852    11.0963   2.738  0.00802 **
## interval     -0.4203     0.2688  -1.564  0.12289   
## U1.interval   0.9511     0.5641   1.686       NA   
## U2.interval  -2.3677     2.3264  -1.018       NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.84 on 63 degrees of freedom
## Multiple R-Squared: 0.08045,  Adjusted R-squared: 0.007474 
## 
## Boot restarting based on 6 samples. Last fit:
## Convergence attained in 5 iterations (rel. change 6.8959e-09)
summary(seg_model.99)
## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = init_model.99, seg.Z = ~interval, psi = list(interval = c(30, 
##     70)))
## 
## Estimated Break-Point(s):
##                  Est. St.Err
## psi1.interval 16.505  5.269
## psi2.interval 55.000 46.275
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  79.6078    43.2118   1.842   0.0701 .
## interval     -3.8256     3.3898  -1.129   0.2634  
## U1.interval   3.5348     3.4199   1.034       NA  
## U2.interval   0.2553     0.4801   0.532       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.76 on 63 degrees of freedom
## Multiple R-Squared: 0.1542,  Adjusted R-squared: 0.08704 
## 
## Boot restarting based on 10 samples. Last fit:
## Convergence attained in 2 iterations (rel. change -6.8716e-08)

Extract breakpoints

breakpoints.50 <- seg_model.50$psi[, 1]

# Plot data
plot(results_with_changes_updated$interval, results_with_changes_updated$area_50_relative_change,
     xlab = "Interval", ylab = "Area 50 Relative Change",
     main = "Segmented Regression for 50th Percentile", col = "blue", pch = 20)

coefs <- coef(seg_model.50)

# Plot initial segment
abline(coefs[1], coefs[2], col = "red")

Determine the y-value at the breakpoint for plotting the second segment

y_at_breakpoint <- coefs[1] + coefs[2] * breakpoints.50 + coefs[3] * (results_with_changes_updated$interval - breakpoints.50)