Below is the pretreatment data taken from the TridentataMasterData excel file. Updated for 2024
## plot plantID height diameter
## Min. :801.0 Min. : 17.0 Min. : 15.00 Min. : 16.00
## 1st Qu.:809.0 1st Qu.:144.5 1st Qu.: 60.50 1st Qu.: 73.08
## Median :815.0 Median :562.0 Median : 74.00 Median : 98.00
## Mean :814.6 Mean :426.5 Mean : 73.45 Mean :102.78
## 3rd Qu.:821.0 3rd Qu.:690.5 3rd Qu.: 86.00 3rd Qu.:132.00
## Max. :827.0 Max. :900.0 Max. :124.00 Max. :250.00
## NA's :5 NA's :5 NA's :5
## diamter2 stem# gallindex
## Min. : 9.0 Min. : 1.00 Min. :0.0000
## 1st Qu.: 50.0 1st Qu.: 2.00 1st Qu.:0.0000
## Median : 72.0 Median : 3.00 Median :0.0000
## Mean : 75.9 Mean : 3.36 Mean :0.0404
## 3rd Qu.: 95.5 3rd Qu.: 4.00 3rd Qu.:0.0000
## Max. :229.0 Max. :13.00 Max. :1.0000
## NA's :5 NA's :5 NA's :5
Cleaning up data
plant_data <- tibble(
plant_ID = c(TridentataMasterData$plantID),
height = c(TridentataMasterData$height),
diameter = c(TridentataMasterData$diameter),
diameter2 = c(TridentataMasterData$diamter2),
stem_num = c(TridentataMasterData$`stem#`)
)
clip_data <- tibble(
plot = c(Clipdata$Plot),
corner = c(Clipdata$Letter),
mass_grams = c(Clipdata$GrossMass)
)
Clean data: rename variables, find/correct/remove missing values, outlier ID
cleaned_data <- plant_data %>%
rename(
ID = plant_ID,
Height_cm = height,
Diameter_cm = diameter,
Diameter2_cm = diameter2,
Stem_Count = stem_num
) %>%
mutate(
Diameter_avg = rowMeans(select(., Diameter_cm, Diameter2_cm), na.rm = TRUE),
Stem_Count = replace_na(Stem_Count, 0)
)
identify_outliers <- function(data, column) {
Q1 <- quantile(data[[column]], 0.25, na.rm = TRUE)
Q3 <- quantile(data[[column]], 0.75, na.rm = TRUE)
IQR_value <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
outliers <- data %>%
filter((!!sym(column) < lower_bound) | (!!sym(column) > upper_bound))
return(outliers)
}
height_outliers <- identify_outliers(cleaned_data, "Height_cm")
print(height_outliers)
## # A tibble: 1 × 6
## ID Height_cm Diameter_cm Diameter2_cm Stem_Count Diameter_avg
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 560 15 16 9 1 12.5
mass_outliers <- identify_outliers(clip_data, "mass_grams")
print(mass_outliers)
## # A tibble: 2 × 3
## plot corner mass_grams
## <dbl> <chr> <dbl>
## 1 806 A 42.1
## 2 813 A 52.6
Summary statistics on clean data