library(tidyverse)
library(readr)
library(styler)
health_assess_2024 <- read_csv("july_3_2024_butternut_health_assessment_forms.csv")
health_assess_2025 <- read_csv("jun_2025_butternut_health_assessment_form.csv")
# view(health_assess_2024)
# Only need first 66 in 2024 (these are WCP)
# health_assess_2024 <- health_assess_2024 %>% slice_head(n = 66)
# Need to ignore the first 17 (pre-major fixes to the google form)
health_assess_2025 <- health_assess_2025 %>% slice(17:n())
health_assess_2025 <- health_assess_2025 %>% select(
-`Producing seed?`,
-`Roughly how many seeds are on the tree?`,
-`How many seed are in each bunch (average estimate)?`,
# Collections
-`What did you collect?`,
-`If VOUCHERS were collected, how many?`,
-`If LEAVES were collected, how many?`,
-`If CUTTINGS were collected, how many?`,
-`If SEEDS were collected, how many?`,
-`If other collections were made, please describe them here including the number collected.`,
-`How deep are the furrows in the bark?`,
-`What shade (from light/white to dark) is the tree bark?`,
# Editing
-`Edited after field collection? (Y/N)`,
-`If edited, what date:`,
-`If edited, what:`
)
# Plant Height (ft)
health_assess_2024 <- health_assess_2024 %>% rename(plant_height_ft = `Plant height (ft)`)
# DBH
health_assess_2024 <- health_assess_2024 %>% rename(dbh_cm = `DBH (cm)`)
# % live canopy
health_assess_2024 <- health_assess_2024 %>% rename(percent_live_canopy = `Percent live canopy`)
# base_epicormics
health_assess_2024 <- health_assess_2024 %>% rename(base_epicormics = `Number of epicormic branches/ sprouts from the base`)
# trunk_epicormics
health_assess_2024 <- health_assess_2024 %>% rename(trunk_epicormics = `Number of epicormic branches/ sprouts from the trunk`)
# has_canker
health_assess_2024 <- health_assess_2024 %>% rename(has_canker = `Visible cankers? (according to health assessment form)`)
# has_callous
health_assess_2024 <- health_assess_2024 %>% rename(has_callous = `If large cankers present, are they being calloused over?`)
# trunk_canker_area
health_assess_2024 <- health_assess_2024 %>% rename(trunk_canker_area = `Area of trunk infected by canker (%)`)
# base_canker_area
health_assess_2024 <- health_assess_2024 %>% rename(base_canker_area = `Area of base/ root flare infected by canker (%)`)
# purdue_severity_based_on_canker
health_assess_2024 <- health_assess_2024 %>% rename(purdue_severity_based_on_canker = `If trees are infected, severity of infection`)
# seedling_y_n
health_assess_2024 <- health_assess_2024 %>% rename(seedling_y_n = `Seedling (Y/N)`)
# Percent_live_canopy
health_assess_2025 <- health_assess_2025 %>% rename(percent_live_canopy = `Percent live canopy (estimate to the nearest 10% increment, being sure to only include live branches in assessment)\r\n\r\nNote: This is a measure of crown density. In order to estimate this, first envision the amount of canopy there would be if the tree were fully healthy. Butternuts do not typically have a tightly formed canopy even when healthy so be sure to evaluate based on branch presence and location. Then estimate what percent of the envisioned canopy is actually present. This will be your estimate of percent live canopy.`)
# plant_height_ft
health_assess_2025 <- health_assess_2025 %>% rename(plant_height_ft = health_assess_2025$`Plant Height (in FEET)`)
# dbh_cm
health_assess_2025 <- health_assess_2025 %>% rename(dbh_cm = health_assess_2025$`DBH (in CENTIMETERS)`)
# crown class
health_assess_2025 <- health_assess_2025 %>% rename(crown_class = `What is the crown class of this individual?`)
# base_epicormics
health_assess_2025 <- health_assess_2025 %>% rename(base_epicormics = `Number of epicormic branches / sprouts from the base`)
# trunk_epicormics
health_assess_2025 <- health_assess_2025 %>% rename(trunk_epicormics = `Number of epicormic branches / sprouts from the trunk`)
# has_canker
health_assess_2025 <- health_assess_2025 %>% rename(has_canker = `Visible cankers?`)
# has_callous
health_assess_2025 <- health_assess_2025 %>% rename(has_callous = `If large cankers are present, are they being calloused over?`)
# trunk_canker_area
health_assess_2025 <- health_assess_2025 %>% rename(trunk_canker_area = `How much area of the trunk below first main branch is infected by canker, measured as a percentage of total trunk with cankers visible (including cankering visible underneath uplifted bark)?`)
# girdled_circum
health_assess_2025 <- health_assess_2025 %>% rename(girdled_circum = `At the part of the trunk that appears most girdled by canker, what portion of the circumference of the trunk is girdled?`)
# base_canker_area
health_assess_2025 <- health_assess_2025 %>% rename(base_canker_area = `How much area of the base/ root flare is infected by canker, e.g. as a percentage of root flare (up to 10 cm above soil) with cankers visible (including underneath bark)?`)
# purdue_severity_based_on_canker
health_assess_2025 <- health_assess_2025 %>% rename(purdue_severity_based_on_canker = `Assess severity of infection. Focus on the bottom 10 feet of the tree when assessing the number and size of cankers, noting that cankers can be hard to see on old trees with thick bark. CANKERS:`)
# purdue_severity_based_on_canopy
health_assess_2025 <- health_assess_2025 %>% rename(purdue_severity_based_on_canopy = `Assess severity of infection. CANOPY:`)
health_assess_2024$plant_height_ft <- as.numeric(health_assess_2024$plant_height_ft)
## Warning: NAs introduced by coercion
health_assess_2024$dbh_cm <- as.numeric(health_assess_2024$dbh_cm)
## Warning: NAs introduced by coercion
health_assess_2024$percent_live_canopy <- as.numeric(health_assess_2024$percent_live_canopy)
## Warning: NAs introduced by coercion
health_assess_2024$base_canker_area <- as.numeric(health_assess_2024$base_canker_area)
## Warning: NAs introduced by coercion
health_assess_2024$trunk_canker_area <- as.numeric(health_assess_2024$trunk_canker_area)
## Warning: NAs introduced by coercion
health_assess_2024$plant_height_ft <- as.numeric(health_assess_2024$plant_height_ft)
bar_seedlings <- health_assess_2024 %>% ggplot(aes(x = seedling_y_n)) +
geom_bar(aes(fill = seedling_y_n))
bar_seedlings
library(patchwork)
# Plant Height (ft)
hist_plant_height <- health_assess_2024 %>% ggplot(aes(x = plant_height_ft)) +
geom_histogram(bins = 30, aes(fill = seedling_y_n))
# DBH (cm)
hist_dbh <- health_assess_2024 %>% ggplot(aes(x = dbh_cm)) +
geom_histogram(bins = 30, aes(fill = seedling_y_n))
hist_plant_height / hist_dbh
## Warning: Removed 33 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 76 rows containing non-finite outside the scale range
## (`stat_bin()`).
hist_percent_live_canopy <- health_assess_2024 %>% ggplot(aes(x = percent_live_canopy)) +
xlab("% Live Canopy in 2024") +
geom_histogram(bins = 30, aes(fill = seedling_y_n))
hist_percent_live_canopy
## Warning: Removed 56 rows containing non-finite outside the scale range
## (`stat_bin()`).
hist_epicormic_base <- health_assess_2024 %>% ggplot(aes(x = base_epicormics)) +
geom_histogram(aes(fill = seedling_y_n))
hist_epicormic_trunk <- health_assess_2024 %>% ggplot(aes(x = trunk_epicormics)) +
geom_histogram(aes(fill = seedling_y_n))
hist_epicormic_base / hist_epicormic_trunk
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 47 rows containing non-finite outside the scale range
## (`stat_bin()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 49 rows containing non-finite outside the scale range
## (`stat_bin()`).
library(viridis)
## Loading required package: viridisLite
bar_has_canker <- health_assess_2024 %>% ggplot(aes(x = has_canker)) +
geom_bar(aes(fill = has_callous))
bar_has_callous <- health_assess_2024 %>% ggplot(aes(x = has_callous)) +
geom_bar(aes(fill = purdue_severity_based_on_canker))
bar_has_canker + bar_has_callous
hist_base_canker <- health_assess_2024 %>% ggplot(aes(x = base_canker_area)) +
geom_histogram(aes(fill = has_callous))
hist_trunk_canker <- health_assess_2024 %>% ggplot(aes(x = trunk_canker_area)) +
geom_histogram(aes(fill = has_callous))
hist_base_canker / hist_trunk_canker
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 57 rows containing non-finite outside the scale range
## (`stat_bin()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 38 rows containing non-finite outside the scale range
## (`stat_bin()`).
point_base_canker <- health_assess_2024 %>% ggplot(aes(x = dbh_cm, y=base_canker_area)) +
geom_point(aes(color=purdue_severity_based_on_canker))
point_trunk_canker <- health_assess_2024 %>% ggplot(aes(x = dbh_cm, y=trunk_canker_area)) +
geom_point(aes(color=purdue_severity_based_on_canker))
point_base_canker / point_trunk_canker
## Warning: Removed 89 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 79 rows containing missing values or values outside the scale range
## (`geom_point()`).
bar_canker_severity <- health_assess_2024 %>% ggplot(aes(x = purdue_severity_based_on_canker)) +
geom_bar(aes(fill = purdue_severity_based_on_canker))
bar_canker_severity