Notice this is for seedlings only
See the full code for the break down for the importing and cleaning open the documents of the same name. Or here: - 2024 cleaning code and - 2025 cleaning code
Purl (or stitch) together R code from the markdown cleaning files. These stitched files are stored in ‘purl’ folder with the date of the code purled.
# library(knitr)
#
# purl("cleaning_2024_health_form_data.Rmd",
# output =
# paste("purl/cleaning_2024_health_form_data",
# Sys.Date(),".R"))
#
# purl("cleaning_2025_health_form_data.Rmd",
# output =
# paste("purl/cleaning_2025_health_form_data",
# Sys.Date(),".R"))
Next, we can run those extracted R files to actually import and clean the data.
## 2024 data
source(paste("purl/cleaning_2024_health_form_data", Sys.Date(), ".R"))
## 2025 data
source(paste("purl/cleaning_2025_health_form_data", Sys.Date(), ".R"))
library(dplyr)
# Add a 'year' column to each dataset to allow grouping/plotting by year
health_assess_2024 <- health_assess_2024 %>%
mutate(year = 2024)
health_assess_2025 <- health_assess_2025 %>%
mutate(year = 2025)
# Keep only the necessary columns for this hypothesis
sliced_2024 <- health_assess_2024 %>%
select(year, dbh_cm, percent_live_canopy, base_canker_area, trunk_canker_area)
sliced_2025 <- health_assess_2025 %>%
select(year, dbh_cm, percent_live_canopy, base_canker_area, trunk_canker_area)
# Combine the two data frames
combined_2024_2025 <- bind_rows(sliced_2024, sliced_2025)
This chunk defines a re-usable function to run a regression across multiple input datas.
do_hypoth7_analysis <- function(data, predictor, response, xlabel, ylabel, color = "black") {
# EXPLICIT REMOVING NAs
na.omit(data)
# Define linear model
model <- lm(reformulate(predictor, response), data = data)
coefs <- coef(model)
r_squared <- summary(model)$r.squared
# Create equation and R^2 text
equation <- paste0("y = ", round(coefs[2], 2), "x + ", round(coefs[1], 2))
r2_text <- paste0("italic(R)^2 == ", round(r_squared, 4))
# Get plotting ranges
max_x <- max(data[[predictor]], na.rm = TRUE)
max_y <- max(data[[response]], na.rm = TRUE)
min_y <- min(data[[response]], na.rm = TRUE)
range_y <- max_y - min_y
# Annotate sample size near the bottom-right
sample_size_y_position <- min_y + 0.08 * range_y
# Annotate equation and r2 near the top-right
eq_y_position <- max_y - 0.05 * range_y
r2_y_position <- max_y - 0.12 * range_y
# PLOT ------------------------------------------------------
ggplot(data, aes_string(x = predictor, y = response)) +
# Points
geom_point(color = color) +
# Line of best fit
geom_smooth(method = "lm", color = color) +
# Axes Label & Theme
labs(x = xlabel, y = ylabel) + # Set the y-axis label
theme(
axis.title.y = element_text(color = color) # Change y-axis label color
) +
# Samples size text
annotate(
"text",
x = max_x,
y = sample_size_y_position,
label = paste("n = ", count(data)),
hjust = 1,
size = 6,
color = "black"
) +
# Equation and R^2 text
annotate(
"text",
x = max_x,
y = eq_y_position,
label = equation,
hjust = 1,
size = 4,
color = color
) +
annotate(
"text",
x = max_x,
y = r2_y_position,
label = r2_text,
hjust = 1,
size = 4,
color = color,
parse = TRUE # Ensures that italics styling works
)
}
Here are the major delineations of questions across this hypotheses:
Note: - Can’t investigate across years because only took densiometer measures in 2025 - Can’t investigate across age class because only took densiometer measures for seedlings
library(scales)
hex <- hue_pal()(3)
seedlings_2025 <- health_assess_2025 %>% filter(is.na(dbh_cm))
patterns_across_seedling_base <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_average",
response = "base_canker_area",
xlabel = "seedling % canopy open",
ylabel = "% of base area with canker",
color = hex[1]
)
patterns_across_seedling_base
patterns_across_seedling_trunk <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_average",
response = "trunk_canker_area",
xlabel = "seedling % canopy open",
ylabel = "% of trunk area with canker",
color = hex[2]
)
patterns_across_seedling_trunk
library(patchwork)
(patterns_across_seedling_base + patterns_across_seedling_trunk) + plot_layout(guide = "collect")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
library(scales)
hex <- hue_pal()(3)
seedlings_2025 <- health_assess_2025 %>% filter(is.na(dbh_cm))
patterns_across_north_base <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_north",
response = "base_canker_area",
xlabel = "northern canopy density (% covered with canopy)",
ylabel = "% of base area with canker",
color = hex[1]
)
patterns_across_north_base
patterns_across_north_trunk <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_north",
response = "trunk_canker_area",
xlabel = "northern canopy density (% covered with canopy)",
ylabel = "% of trunk area with canker",
color = hex[2]
)
patterns_across_north_trunk
library(scales)
hex <- hue_pal()(3)
seedlings_2025 <- health_assess_2025 %>% filter(is.na(dbh_cm))
patterns_across_west_base <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_west",
response = "base_canker_area",
xlabel = "western canopy density (% covered with canopy)",
ylabel = "% of base area with canker",
color = hex[1]
)
patterns_across_west_base
patterns_across_west_trunk <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_west",
response = "trunk_canker_area",
xlabel = "western canopy density (% covered with canopy)",
ylabel = "% of trunk area with canker",
color = hex[2]
)
patterns_across_west_trunk
library(scales)
hex <- hue_pal()(3)
seedlings_2025 <- health_assess_2025 %>% filter(is.na(dbh_cm))
patterns_across_south_base <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_south",
response = "base_canker_area",
xlabel = "southern canopy density (% covered with canopy)",
ylabel = "% of base area with canker",
color = hex[1]
)
patterns_across_north_base
patterns_across_south_trunk <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_south",
response = "trunk_canker_area",
xlabel = "southern canopy density (% covered with canopy)",
ylabel = "% of trunk area with canker",
color = hex[2]
)
patterns_across_south_trunk
library(scales)
hex <- hue_pal()(3)
seedlings_2025 <- health_assess_2025 %>% filter(is.na(dbh_cm))
patterns_across_east_base <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_east",
response = "base_canker_area",
xlabel = "eastern canopy density (% covered with canopy)",
ylabel = "% of base area with canker",
color = hex[1]
)
patterns_across_east_base
patterns_across_east_trunk <- do_hypoth7_analysis(
data = seedlings_2025,
predictor = "densio_east",
response = "trunk_canker_area",
xlabel = "eastern canopy density (% covered with canopy)",
ylabel = "% of trunk area with canker",
color = hex[2]
)
patterns_across_east_trunk
(patterns_across_north_base + patterns_across_east_base) /
(patterns_across_south_base + patterns_across_west_base)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 7 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
(patterns_across_north_trunk + patterns_across_east_trunk) /
(patterns_across_south_trunk + patterns_across_west_trunk)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 7 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
library(tidyverse)
seedlings_2025 <- health_assess_2025 %>% filter(is.na(dbh_cm))
seedling_health_long <- seedlings_2025 %>%
mutate(case_id = row_number()) %>%
pivot_longer(
cols = starts_with("densio_"),
names_to = "direction",
values_to = "percent_cover"
) %>%
mutate(
quartile = ntile(percent_cover, 4) # 4 = quartiles
)
seedling_health_long <- seedling_health_long %>%
mutate(line_type = "Individual") # add a dummy column to map linetype
seedling_health_long <- seedling_health_long %>%
filter(direction != 'densio_average')
individual_spagetti <- seedling_health_long %>% ggplot(aes(x = direction, y = percent_cover, group = case_id)) +
# Spaghetti lines with linetype
geom_line(aes(linetype = line_type, color = factor(quartile)), alpha = 0.3) +
# Points
geom_point(aes(color = factor(quartile)), size = 2, alpha = 0.8) +
# Mean line
stat_summary(
fun = mean,
geom = "line",
aes(group = 1, color = "Mean", linetype = "Mean"),
size = 1.2
) +
# Mean points
stat_summary(
fun = mean,
geom = "point",
aes(group = 1, color = "Mean"),
size = 3
) +
# Manual color and linetype scales
scale_color_manual(
name = "Legend",
values = c(
"1" = "#E41A1C",
"2" = "#377EB8",
"3" = "#4DAF4A",
"4" = "#984EA3"
),
labels = c("Q1", "Q2", "Q3", "Q4")
) +
scale_linetype_manual(
name = "Line Type",
values = c("Individual" = "solid", "Mean" = "solid"),
labels = c("Individual", "Mean")
) +
theme_minimal() +
labs(
title = "% Open on Densiometer by Cardinal Direction",
subtitle = "Thin lines represent individual seedlings across directions",
x = "Cardinal Direction",
y = "% Open on Densiometer"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
individual_spagetti
## Warning: Removed 31 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 31 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_point()`).