STAT 210 - Final Project

Author

Cheyanne Bunnell

clean_plants <- edible_plants |>
  as_tibble() |>
  mutate(
    across(where(is.character), ~ na_if(.x, "Currently no data available."))
  ) |>
  mutate(soil = str_trim(soil)) |>
  mutate(
    energy = if_else(
      is.na(nutritional_info) & energy == 0,
      NA,
      energy
    )
  ) |>
  mutate(
    across(starts_with("preferred"), as.numeric)
  ) |>
  mutate(temperature_growing = str_remove_all(temperature_growing, " ")) |>
  mutate(across(where(is.character), ~ str_squish(.x)))
plants <- clean_plants |>
  mutate(
    water = str_to_lower(water),
    temperature_class = str_to_lower(temperature_class),
    temperature_class = case_when(
      temperature_class == "very hard" ~ "very hardy",
      TRUE ~ temperature_class
    ),
    cultivation = str_to_lower(cultivation),
    cultivation = case_when(
      cultivation == "brassicas" ~ "brassica",
      TRUE ~ cultivation
    ),
    preferred_ph_lower = as.numeric(preferred_ph_lower),
    preferred_ph_upper = as.numeric(preferred_ph_upper),
    nutrients = str_to_lower(nutrients),
    nutrients = case_when(
      nutrients == "high potassium fertiliser every 2 weeks." ~ "fertiliser",
      nutrients == "high" ~ "very high",
      nutrients == "medium to high" ~ "high",
      TRUE ~ nutrients
    )
  )
plants$water <- factor(plants$water,
  levels = c("very low", "low", "medium", "high", "very high")
)

plants |>
  filter(cultivation != 'miscellaneous') |>
  ggplot(aes(x = water, fill = cultivation)) +
  geom_bar(position = 'dodge') +
    labs(
    title = "Water Requirements by Cultivation Type",
    x = "Water Level",
    y = "Count",
    fill = "Cultivation Type"
  ) +
  scale_fill_manual(values = c(
  "#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e",
  "#e6ab02", "#a6761d", "#666666", "#1f78b4", "#b2df8a", "#fb9a99"
))

plants$temperature_class <- factor(plants$temperature_class,
  levels = c("very tender", "tender", "half hardy", "hardy", "very hardy")
)

plants |>
  filter(cultivation != 'miscellaneous') |>
  ggplot(aes(x = temperature_class, fill = cultivation)) +
  geom_bar(position = 'dodge') +
  
  labs(
    title = "Temperature Range by Cultivation",
    x = "Temperature Class",
    y = "Count"
  )  +
  scale_fill_manual(values = c(
  "#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e",
  "#e6ab02", "#a6761d", "#666666", "#1f78b4", "#b2df8a", "#fb9a99"
))

plants |>
  filter(cultivation != 'miscellaneous') |>
  ggplot(aes(x = preferred_ph_lower, y = cultivation)) +
  geom_boxplot() +
  labs(
    title = "Soil pH Distribution by Cultivation Type",
    x = "Preferred pH (Lower Bound)",
    y = "Cultivation"
  )

plants |>
  filter(cultivation != 'miscellaneous') |>
  ggplot(aes(x = preferred_ph_upper, y = cultivation)) +
  geom_boxplot() +
  labs(
    title = "Soil pH Distribution by Cultivation Type",
    x = "Preferred pH (Upper Bound)",
    y = "Cultivation"
  )

plants |>
  filter(cultivation != 'miscellaneous') |>
  ggplot(aes(x = preferred_ph_lower, y = preferred_ph_upper, color = cultivation)) +
  geom_point(size = 3) +
  labs(
    title = "Soil pH Range by Cultivation",
    x = "Lower pH",
    y = "Upper pH"
  ) +
  scale_color_manual(values = c(
  "#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e",
  "#e6ab02", "#a6761d", "#666666", "#1f78b4", "#b2df8a", "#fb9a99"
))

plants |>
  filter(cultivation != 'miscellaneous') |>
  group_by(cultivation) |>
  summarise(
    avg_ph_low = mean(preferred_ph_lower, na.rm = TRUE),
    avg_ph_up = mean(preferred_ph_upper, na.rm = TRUE)
  ) |>
  ggplot(aes(x = avg_ph_low, y = avg_ph_up, color = cultivation)) +
  geom_point(size = 3) +
  labs(
    title = "Average Soil pH Range by Cultivation",
    x = "Average Lower pH",
    y = "Average Upper pH"
  ) +
  theme_minimal() +
  scale_color_manual(values = c(
  "#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e",
  "#e6ab02", "#a6761d", "#666666", "#1f78b4", "#b2df8a", "#fb9a99"
))

plants |>
  ggplot(aes(x = water, fill = nutrients)) +
  geom_bar(position = 'dodge') +
  
  labs(
    title = "Water Requirements by Nutrient Needs",
    x = "Water Level",
    y = "Count",
    fill = "Nutrient Level"
  ) +
  
  scale_fill_brewer(palette = "Set1")

plants |>
  filter(cultivation != 'miscellaneous') |>
  ggplot(aes(x = preferred_ph_lower, y = preferred_ph_upper, color = cultivation)) +
  geom_point(size = 3) +
  labs(
    title = "Soil pH Range by Cultivation",
    x = "Lower pH",
    y = "Upper pH"
  )  +
  scale_color_manual(values = c(
  "#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e",
  "#e6ab02", "#a6761d", "#666666", "#1f78b4", "#b2df8a", "#fb9a99"
))