List of necessary packages
Set the directory where your CSV files are stored
csv_dir <- "/Users/ykwon/Library/CloudStorage/GoogleDrive-youngclick@gmail.com/My Drive/ykwon/research/2022 PDA research/KOREA/data2023/trackingdata/Rproject_tracking/trackingdatav20"
List all CSV files in the directory
csv_files <- list.files(csv_dir, pattern = ".csv$", full.names = TRUE)
csv_files
Read and process CSV files
read_csv_data <- function(file) {
data <- read.csv(file)
data$timestamp <- as.POSIXct(data$timestamp, format = "%Y-%m-%d %H:%M")
# Sort the data by timestamp in ascending order
data <- data[order(data$timestamp), ]
# Calculate the time difference and add it as a new column 'timediff'
data <- data %>%
mutate(timediff = as.numeric(timestamp - lag(timestamp), units = "mins"))
return(data)
}
Read all the CSV files into a named list of dataframes
data_list <- lapply(setNames(csv_files, basename(csv_files)), read_csv_data)
data_list
Convert the list of dataframes to individual dataframes in the global environment
list2env(data_list, envir = .GlobalEnv)
df_names_csv <- paste0(c("nibr2002", "nibr2003", "nibr2004", "nibr2005", "nibr2006", "nibr2007", "nibr2008", "nibr2009", "nibr2010", "nibr2102", "nibr2104", "nibr2105", "nibr2106", "nibr2108", "nibr2201"), ".csv")
df_names_csv
combined_df <- do.call(rbind, mget(df_names_csv))
grouped_data <- combined_df %>%
group_by(id)
grouped_data
Examine the summary statistics of timediff by id - timediff is in minute unit
timediff_summary <- grouped_data %>%
summarize(
count = n(),
min_timediff = min(timediff, na.rm = TRUE),
max_timediff = max(timediff, na.rm = TRUE),
mean_timediff = mean(timediff, na.rm = TRUE),
median_timediff = median(timediff, na.rm = TRUE),
sd_timediff = sd(timediff, na.rm = TRUE)
)
timediff_summary #check which species to pick
## # A tibble: 15 × 7
## id count min_timediff max_timediff mean_timediff median_timediff
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 nibr2002 64 179 3240 320. 180
## 2 nibr2003 82 179 2341 256. 180
## 3 nibr2004 91 179 360 184. 180
## 4 nibr2005 72 178 361 195. 180
## 5 nibr2006 64 177 1440 380. 359
## 6 nibr2007 105 0 1080 190. 180
## 7 nibr2008 110 61 360 181. 180
## 8 nibr2009 53 179 361 190. 180
## 9 nibr2010 76 178 361 187. 180
## 10 nibr2102 698 1 3596 27.5 6
## 11 nibr2104 1710 1 267 11.3 4
## 12 nibr2105 1557 1 2696 18.8 3
## 13 nibr2106 2354 1 330 8.72 4
## 14 nibr2108 65 4 32 7.45 5
## 15 nibr2201 1070 0 22124 147. 120
## # ℹ 1 more variable: sd_timediff <dbl>
species21_list <- list(nibr2102.csv, nibr2104.csv, nibr2105.csv, nibr2106.csv)
import forest shape file as forest object
forest <- st_read("/Users/ykwon/Library/CloudStorage/GoogleDrive-youngclick@gmail.com/My Drive/ykwon/research/2022 PDA research/KOREA/data2023/trackingdata/Rproject_tracking/map_jinju/forest.shp")
## Reading layer `forest' from data source
## `/Users/ykwon/Library/CloudStorage/GoogleDrive-youngclick@gmail.com/My Drive/ykwon/research/2022 PDA research/KOREA/data2023/trackingdata/Rproject_tracking/map_jinju/forest.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 28487 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 281378.6 ymin: 273387.5 xmax: 324613.7 ymax: 306662.6
## Projected CRS: Korea 2000 / Central Belt 2010
combined_function1 <- function(df, forest) {
# Convert the timestamp column to a proper datetime format
df$timestamp <- as.POSIXct(df$timestamp)
# Create a SpatialPoints object from the dataframe
points <- SpatialPoints(df[, c("long", "lat")], proj4string = CRS("+proj=longlat +datum=WGS84"))
# Reproject coordinates to match the forest object CRS
points_reproj <- spTransform(points, crs(forest))
# Add reprojected coordinates as x and y columns to the data frame
df$x <- coordinates(points_reproj)[, 1]
df$y <- coordinates(points_reproj)[, 2]
# Calculate the distance between consecutive points (using the reprojected coordinates)
df$distance <- c(0, sqrt(diff(df$x)^2 + diff(df$y)^2))
# Calculate the speed between consecutive points (distance / time difference)
df$speed <- df$distance / df$timediff
# Add a new column indicating day or night based on timestamp
df$day_night <- ifelse(as.POSIXlt(df$timestamp)$hour >= 6 & as.POSIXlt(df$timestamp)$hour < 20, "Day", "Night")
# Create a move object from the reprojected coordinates
move_obj <- move(df$x, df$y, time = df$timestamp, proj = crs(forest))
return(df)
}
Loop through each species data frame
for (i in 1:length(species21_list)) {
species21_list[[i]] <- combined_function1(species21_list[[i]], forest)
}
After careful examination, I decide to select only three species: 2104,2105,2106
species21_list <- species21_list[-1] #now there are three species
# Create an empty list to store the plots
plot_list <- list()
Loop through each species data frame
for (i in 1:length(species21_list)) {
species_df <- species21_list[[i]] # Get the current species data frame
# Plot 1: Movement patterns over time (speed)
plot1 <- ggplot(species_df, aes(x = timestamp, y = speed)) +
geom_line(color = "blue") +
labs(title = "Movement Patterns", x = "Timestamp", y = "Speed (m/s)") +
theme_minimal()
# Plot 2: Movement patterns over time with day and night intervals
plot2 <- ggplot(species_df, aes(x = timestamp, y = speed, color = day_night)) +
geom_line() +
labs(title = "Movement Patterns", x = "Timestamp", y = "Speed (m/s)", color = "Day/Night") +
theme_minimal()
# Add the plots to the list
plot_list[[i]] <- plot1
plot_list[[i+length(species21_list)]] <- plot2
}
Create a grid of plots
grid.arrange(grobs = plot_list, ncol = 2) #movement pattern
it looks like species2105 needs data clean up upto Jun07
species21_list[[2]] <- species21_list[[2]] %>%
filter(timestamp <= as.POSIXct("2021-06-07")) # replace YYYY with the desired year
now you can rerun the plot above to confirm the data clean-up Create a list to store the data subsets for each minute interval
telemetry_data_subsets <- list()
Iterate over data frames in species21_list
for (j in 1:length(species21_list)) {
# Get the current data frame
telemetry_data <- species21_list[[j]]
# Add a column for the cumulative time difference
telemetry_data$cumulative_timediff <- cumsum(ifelse(is.na(telemetry_data$timediff), 0, telemetry_data$timediff))
# Set na.rm = TRUE to remove NA values
# Iterate over minute intervals from 5 to 120, in steps of 5
for (interval in seq(5, 120, by = 5)) {
# Initialize the subsetted data frame
telemetry_data_subset <- data.frame()
# Initialize the index of the previously selected row
prev_index <- 1
# Loop through the telemetry data starting from the third row
for (i in 3:nrow(telemetry_data)) {
# Calculate the time difference from the previously selected row
time_diff <- telemetry_data$cumulative_timediff[i] - telemetry_data$cumulative_timediff[prev_index]
# Check if the time difference is greater than or equal to the desired interval
if (time_diff >= interval) {
# Append the current row to the subsetted data frame
telemetry_data_subset <- rbind(telemetry_data_subset, telemetry_data[i, ])
# Update the index of the previously selected row
prev_index <- i
}
}
# Store the subsetted data frame in the list, using the minute interval and the data frame index as the name
telemetry_data_subsets[[paste0("df_", j, "_interval_", interval, "min")]] <- telemetry_data_subset
}
}
summary(telemetry_data_subsets)
rows_count <- sapply(telemetry_data_subsets, nrow)
rows_count
Convert the named numeric vector into a dataframe
df <- data.frame(name = names(rows_count), count = rows_count, stringsAsFactors = FALSE)
Extract species and interval from the name
df <- df %>%
separate(name, into = c("species", "interval"), sep = "_interval_") %>%
mutate(species = gsub("^df_", "species", species), # Replace "df_" with "species"
interval = as.integer(gsub("min$", "", interval))) # Remove "min" and convert to integer
gpsfix_nrow <- ggplot(df, aes(x = interval, y = count, color = species, group = species)) +
geom_line() +
labs(title = "Number of Rows by Increasing GPS Fix Rates",
x = "Interval (minutes)",
y = "Number of Rows") +
theme_minimal() +
scale_color_discrete(name = "Species")
gpsfix_nrow
##Function to run BBMM model for a given species data frame
create_bbmm <- function(df) {
# Remove rows where timediff is NA
df <- df[!is.na(df$timediff), ]
bbmm_model <- brownian.bridge(x = df$x, y = df$y, time.lag = df$timediff,
location.error = 10, cell.size = 10)
return(bbmm_model)
}
Apply create_bbmm to each data frame in species21_list
bbmm_results <- lapply(telemetry_data_subsets, create_bbmm)
bbmm_results
load("bbmm_results.RData")
Function to plot BBMM contours for a given species data frame
plot_bbmm <- function(bbmm_model, df) {
bbmm.contour(bbmm_model, levels = c(seq(50, 90, by = 10), 95, 99), locations = df, plot = TRUE)
}
Apply plot_bbmm to each BBMM model and its corresponding data subset # PLOTS are saved as PDF here
dev.off()
## null device
## 1
pdf("bbmm_plotsAUG9_10m.pdf", width=10, height=10)
par(mfrow=c(4, 4)) # set up a 4x4 plotting grid
for (i in seq_along(bbmm_results)) {
plot_name <- names(telemetry_data_subsets)[i]
plot_bbmm(bbmm_results[[i]], telemetry_data_subsets[[i]])
title(main = plot_name) # This line adds the title after plotting
}
dev.off() # close the PDF device
## null device
## 1
create_bbmm.area.cc <- function(df, percentile) {
df <- df[!is.na(df$timediff), ]
bbmm_model <- brownian.bridge(x = df$x, y = df$y, time.lag = df$timediff,
location.error = 10, cell.size = 10)
# Calculate the contour levels
levels <- c(seq(50,90, by = 10),95,99)
# Get the index of the desired percentile in the levels vector
percentile_index <- which(levels == percentile)
contours <- bbmm.contour(bbmm_model, levels = levels)
# Lists to store results
area_list = numeric(length(levels))
compactness_list = numeric(length(levels))
complexity_list = numeric(length(levels))
for (i in 1:length(levels)) {
bbmm.contour <- data.frame(x = bbmm_model$x, y = bbmm_model$y, probability = bbmm_model$probability)
bbmm.contour <- bbmm.contour[bbmm.contour$probability >= contours$Z[i],]
m = SpatialPixelsDataFrame(points = bbmm.contour[c("x","y")], data = bbmm.contour)
# Convert SpatialPixelsDataFrame to raster
r = raster(m)
# Convert raster to polygons
poly_subset = rasterToPolygons(r, dissolve = TRUE)
poly_area = gArea(poly_subset)
poly_perimeter = gLength(poly_subset)
# Area
area_list[i] = poly_area
# Compactness
compactness_list[i] = (4 * pi * poly_area) / (poly_perimeter^2)
# Complexity
complexity_list[i] = poly_perimeter / sqrt(poly_area)
}
result = list(area = area_list, compactness = compactness_list, complexity = complexity_list)
return(result)
}
percentiles <- c(seq(50, 90, by = 10), 95, 99)
results.acc <- data.frame()
for(i in 1:length(telemetry_data_subsets)){
for(j in 1:length(percentiles)){
metrics <- create_bbmm.area.cc(telemetry_data_subsets[[i]], percentile = percentiles[j])
temp_result <- data.frame(
dataset_index = i,
percentile = j,
area = metrics$area[j],
compactness = metrics$compactness[j],
complexity = metrics$complexity[j]
)
results.acc <- rbind(results.acc, temp_result)
}
}
results.acc
instead running the code (takes several hours) import the results
results.acc <- read.csv("results.acc.csv")
Number of species, number of intervals, and number of percentiles
num_species <- 3
num_intervals <- length(seq(5, 120, by = 5))
num_percentiles <- length(c(seq(50, 90, by = 10), 95, 99))
Create a mapping for species based on dataset_index
species_mapping <- paste0("species", 1:num_species)
Map dataset_index to species
results.acc$species <- rep(species_mapping, each = num_intervals * num_percentiles)
# Create a mapping for interval based on the pattern you mentioned
intervals <- rep(seq(5, 120, by = 5), times = num_species, each = num_percentiles)
results.acc$interval <- intervals[1:nrow(results.acc)]
# Map percentile numbers to actual percentiles
percentile_mapping <- c(seq(50, 90, by = 10), 95, 99)
results.acc$percentile <- rep(percentile_mapping, times = num_species * num_intervals)
# Rearrange columns and display
results.acc <- results.acc[, c("species", "interval", "percentile", "area", "compactness", "complexity")]
head(results.acc)
Print the cleaned dataframe
head(results.acc)
#write.csv(results.acc,"results_acc_aug9.csv")
Define the plots
results.acc$percentile <- as.factor(results.acc$percentile)
color_palette <- brewer.pal(length(unique(results.acc$percentile)), "Set1")
area_plot <- ggplot(results.acc, aes(x = interval, y = area, color = percentile, group = percentile)) +
geom_line() +
facet_wrap(~species, scales = "free_y") +
labs(title = "Area change by increasing GPS fix intervals",
x = "Interval Minutes",
y = "Area",
color = "Percentile") +
scale_color_manual(values = color_palette) +
theme_minimal()
compactness_plot <- ggplot(results.acc, aes(x = interval, y = compactness, color = percentile, group = percentile)) +
geom_line() +
facet_wrap(~species, scales = "free_y") +
labs(title = "Compactness change by increasing GPS fix intervals",
x = "Interval Minutes",
y = "Compactness",
color = "Percentile") +
scale_color_manual(values = color_palette) +
theme_minimal()
complexity_plot <- ggplot(results.acc, aes(x = interval, y = complexity, color = percentile, group = percentile)) +
geom_line() +
facet_wrap(~species, scales = "free_y") +
labs(title = "Complexity change by increasing GPS fix intervals",
x = "Interval Minutes",
y = "Complexity",
color = "Percentile") +
scale_color_manual(values = color_palette) +
theme_minimal()
Save plots to PDF # PLOTS are saved as PDF here
dev.off()
## null device
## 1
pdf("acc_file_aug9.pdf", width = 10, height = 7)
print(area_plot)
print(compactness_plot)
print(complexity_plot)
dev.off()
## null device
## 1
result in wide format
results_wide <- results.acc %>%
pivot_wider(
id_cols = c(species, interval),
names_from = percentile,
values_from = c(area, compactness, complexity),
names_sep = "_"
)
head(results_wide)
## # A tibble: 6 × 23
## species interval area_50 area_60 area_70 area_80 area_90 area_95 area_99
## <chr> <dbl> <int> <int> <int> <int> <int> <int> <int>
## 1 species1 5 47100 72800 109400 168300 276900 384800 633500
## 2 species1 10 62900 95400 142000 215500 353000 502000 873800
## 3 species1 15 74200 111800 165100 246400 403400 584000 1027600
## 4 species1 20 83600 123400 180700 267900 442100 648300 1132600
## 5 species1 25 126900 189500 280600 432000 750700 1086400 1754300
## 6 species1 30 102300 151300 220500 331000 558900 827500 1384800
## # ℹ 14 more variables: compactness_50 <dbl>, compactness_60 <dbl>,
## # compactness_70 <dbl>, compactness_80 <dbl>, compactness_90 <dbl>,
## # compactness_95 <dbl>, compactness_99 <dbl>, complexity_50 <dbl>,
## # complexity_60 <dbl>, complexity_70 <dbl>, complexity_80 <dbl>,
## # complexity_90 <dbl>, complexity_95 <dbl>, complexity_99 <dbl>
Define a function to calculate the absolute changes
calculate_absolute_change <- function(df) {
df %>%
arrange(species, interval) %>%
group_by(species) %>%
mutate(across(starts_with("area_"), ~ c(0, diff(.)),
.names = "change_{.col}"),
across(starts_with("compactness_"), ~ c(0, diff(.)),
.names = "change_{.col}"),
across(starts_with("complexity_"), ~ c(0, diff(.)),
.names = "change_{.col}"))
}
Apply the function
results_with_changes <- calculate_absolute_change(results_wide)
head(results_with_changes)
## # A tibble: 6 × 44
## # Groups: species [1]
## species interval area_50 area_60 area_70 area_80 area_90 area_95 area_99
## <chr> <dbl> <int> <int> <int> <int> <int> <int> <int>
## 1 species1 5 47100 72800 109400 168300 276900 384800 633500
## 2 species1 10 62900 95400 142000 215500 353000 502000 873800
## 3 species1 15 74200 111800 165100 246400 403400 584000 1027600
## 4 species1 20 83600 123400 180700 267900 442100 648300 1132600
## 5 species1 25 126900 189500 280600 432000 750700 1086400 1754300
## 6 species1 30 102300 151300 220500 331000 558900 827500 1384800
## # ℹ 35 more variables: compactness_50 <dbl>, compactness_60 <dbl>,
## # compactness_70 <dbl>, compactness_80 <dbl>, compactness_90 <dbl>,
## # compactness_95 <dbl>, compactness_99 <dbl>, complexity_50 <dbl>,
## # complexity_60 <dbl>, complexity_70 <dbl>, complexity_80 <dbl>,
## # complexity_90 <dbl>, complexity_95 <dbl>, complexity_99 <dbl>,
## # change_area_50 <dbl>, change_area_60 <dbl>, change_area_70 <dbl>,
## # change_area_80 <dbl>, change_area_90 <dbl>, change_area_95 <dbl>, …
Define a function to calculate the relative changes and significant changes
calculate_relative_change_and_significance <- function(df, significance_threshold = 10) {
percentile_cols <- grep("^(area|compactness|complexity)_\\d+$", names(df), value = TRUE)
df %>%
group_by(species) %>%
mutate(across(all_of(percentile_cols),
list(relative_change = ~ (./lag(.))*100 - 100,
is_significant = ~ abs((./lag(.))*100 - 100) > significance_threshold),
.names = "{.col}_{.fn}"))
}
Using the function on the given data
results_with_changes_updated <- calculate_relative_change_and_significance(results_with_changes)
head(results_with_changes_updated)
## # A tibble: 6 × 86
## # Groups: species [1]
## species interval area_50 area_60 area_70 area_80 area_90 area_95 area_99
## <chr> <dbl> <int> <int> <int> <int> <int> <int> <int>
## 1 species1 5 47100 72800 109400 168300 276900 384800 633500
## 2 species1 10 62900 95400 142000 215500 353000 502000 873800
## 3 species1 15 74200 111800 165100 246400 403400 584000 1027600
## 4 species1 20 83600 123400 180700 267900 442100 648300 1132600
## 5 species1 25 126900 189500 280600 432000 750700 1086400 1754300
## 6 species1 30 102300 151300 220500 331000 558900 827500 1384800
## # ℹ 77 more variables: compactness_50 <dbl>, compactness_60 <dbl>,
## # compactness_70 <dbl>, compactness_80 <dbl>, compactness_90 <dbl>,
## # compactness_95 <dbl>, compactness_99 <dbl>, complexity_50 <dbl>,
## # complexity_60 <dbl>, complexity_70 <dbl>, complexity_80 <dbl>,
## # complexity_90 <dbl>, complexity_95 <dbl>, complexity_99 <dbl>,
## # change_area_50 <dbl>, change_area_60 <dbl>, change_area_70 <dbl>,
## # change_area_80 <dbl>, change_area_90 <dbl>, change_area_95 <dbl>, …
Reshape the data to a long format
results_change_long <- results_with_changes_updated %>%
pivot_longer(
cols = -c(species, interval), # Exclude 'species' and 'interval' from the reshaping
names_to = c("metric", "percentile", ".value"), # Define the new columns created from the original column names
names_pattern = "(area|compactness|complexity)_(\\d+)_(relative_change|is_significant)" # Regex pattern to match column names
)
head(results_change_long)
## # A tibble: 6 × 6
## # Groups: species [1]
## species interval metric percentile relative_change is_significant
## <chr> <dbl> <chr> <chr> <dbl> <lgl>
## 1 species1 5 <NA> <NA> NA NA
## 2 species1 5 <NA> <NA> NA NA
## 3 species1 5 <NA> <NA> NA NA
## 4 species1 5 <NA> <NA> NA NA
## 5 species1 5 <NA> <NA> NA NA
## 6 species1 5 <NA> <NA> NA NA
results_change_long <- results_change_long %>%
na.omit()
head(results_change_long)
## # A tibble: 6 × 6
## # Groups: species [1]
## species interval metric percentile relative_change is_significant
## <chr> <dbl> <chr> <chr> <dbl> <lgl>
## 1 species1 10 area 50 33.5 TRUE
## 2 species1 10 area 60 31.0 TRUE
## 3 species1 10 area 70 29.8 TRUE
## 4 species1 10 area 80 28.0 TRUE
## 5 species1 10 area 90 27.5 TRUE
## 6 species1 10 area 95 30.5 TRUE
Plot the data
ggplot(results_change_long, aes(x = interval, y = relative_change, color = percentile)) +
geom_point() +
facet_grid(species ~ metric) + # Facet by species and metric
labs(x = "Interval", y = "Relative Change", color = "Percentile") +
theme_bw()
### now statistical approach to examine the tipping point Add a
numerical interval variable
library(segmented)
# run a initial linear model
init_model.50 <- lm(`area_50_relative_change` ~ interval, data = results_with_changes_updated)
init_model.99 <- lm(`area_99_relative_change` ~ interval, data = results_with_changes_updated)
summary(init_model.50)
##
## Call:
## lm(formula = area_50_relative_change ~ interval, data = results_with_changes_updated)
##
## Residuals:
## Min 1Q Median 3Q Max
## -62.197 -21.074 -2.264 14.270 72.131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.5624 7.3672 2.791 0.00684 **
## interval -0.1287 0.1010 -1.275 0.20678
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.81 on 67 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.02368, Adjusted R-squared: 0.009109
## F-statistic: 1.625 on 1 and 67 DF, p-value: 0.2068
summary(init_model.99)
##
## Call:
## lm(formula = area_99_relative_change ~ interval, data = results_with_changes_updated)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.786 -11.202 -0.575 15.338 48.743
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.03558 5.54254 3.795 0.00032 ***
## interval -0.19064 0.07595 -2.510 0.01450 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.93 on 67 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.08594, Adjusted R-squared: 0.0723
## F-statistic: 6.3 on 1 and 67 DF, p-value: 0.0145
apply segmented regression
seg_model.50 <- segmented(init_model.50, seg.Z = ~interval, psi=list(interval=c(30,70)))
seg_model.99 <- segmented(init_model.99, seg.Z = ~interval, psi=list(interval=c(30,70)))
# view the results
summary(seg_model.50)
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = init_model.50, seg.Z = ~interval, psi = list(interval = c(30,
## 70)))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.interval 68.133 15.376
## psi2.interval 105.000 11.256
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.3852 11.0963 2.738 0.00802 **
## interval -0.4203 0.2688 -1.564 0.12289
## U1.interval 0.9511 0.5641 1.686 NA
## U2.interval -2.3677 2.3264 -1.018 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.84 on 63 degrees of freedom
## Multiple R-Squared: 0.08045, Adjusted R-squared: 0.007474
##
## Boot restarting based on 6 samples. Last fit:
## Convergence attained in 5 iterations (rel. change 6.8959e-09)
summary(seg_model.99)
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = init_model.99, seg.Z = ~interval, psi = list(interval = c(30,
## 70)))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.interval 16.505 5.269
## psi2.interval 55.000 46.275
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 79.6078 43.2118 1.842 0.0701 .
## interval -3.8256 3.3898 -1.129 0.2634
## U1.interval 3.5348 3.4199 1.034 NA
## U2.interval 0.2553 0.4801 0.532 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.76 on 63 degrees of freedom
## Multiple R-Squared: 0.1542, Adjusted R-squared: 0.08704
##
## Boot restarting based on 10 samples. Last fit:
## Convergence attained in 2 iterations (rel. change -6.8716e-08)
Extract breakpoints
breakpoints.50 <- seg_model.50$psi[, 1]
# Plot data
plot(results_with_changes_updated$interval, results_with_changes_updated$area_50_relative_change,
xlab = "Interval", ylab = "Area 50 Relative Change",
main = "Segmented Regression for 50th Percentile", col = "blue", pch = 20)
coefs <- coef(seg_model.50)
# Plot initial segment
abline(coefs[1], coefs[2], col = "red")
Determine the y-value at the breakpoint for plotting the second segment
y_at_breakpoint <- coefs[1] + coefs[2] * breakpoints.50 + coefs[3] * (results_with_changes_updated$interval - breakpoints.50)