knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(dplyr)
library(here)
library(broom)
library(GGally)
library(jtools)
library(kableExtra)
# read in data
palmetto <- read_csv(here("data", "palmetto.csv"),
col_types = cols(.default = 'c')) %>%
mutate(height = as.numeric(height))
# assign species 1 & 2 names, make species numeric
# 1 1 = s repens, 1 2 = s etonia
palmetto_tidy <- palmetto %>%
filter(plant == "1") %>%
mutate(species = as.numeric(species)) %>%
mutate(species_name = case_when(species == 1 ~ "repens",
species == 2 ~ "etonia",
TRUE ~ "none")) %>%
select(year, species_name, height, length, width, green_lvs) %>%
mutate(width = as.numeric(width)) %>%
mutate(length = as.numeric(length)) %>%
mutate(green_lvs = as.numeric(green_lvs))
# Directions: use binary logistic regression to test feasibility of using variables plant height (height), canopy length (length), canopy width (width), and number of green leaves (green_lvs) to classify whether a palmetto is species Serenoa repens or Sabal etonia. Use code folding and hide all messages & warnings in your knitted HTML.
# exploratory graphs
palmetto_tidy %>%
select(species_name, height:green_lvs) %>%
ggpairs(aes(color=species_name))
# Figure 1: Height v Green Leaves
height_grnlvs_figure <- ggplot(data = palmetto_tidy, aes(x = height, y = green_lvs)) +
geom_point(aes(color = species_name)) +
facet_wrap(~species_name) +
theme(legend.position = 'none') +
labs(x = "plant height", y = "# of green leaves")
height_grnlvs_figure
Figure 1. Height of plant vs number of green leaves on S. etonia and S. repens plant species.
## --------------------------------------------
# etonia_height <- palmetto_tidy %>%
# filter(species_name == "etonia") %>%
# mutate(green_lvs = as.numeric(green_lvs))
#
# mean(etonia_height$height) #79.26316
# mean(etonia_height$green_lvs) #4.263158
## --------------------------------------------
# repens_height <- palmetto_tidy %>%
# filter(species_name == "repens") %>%
# mutate(green_lvs = as.numeric(green_lvs))
#
# mean(repens_height$height) #73.31579
# mean(repens_height$green_lvs) #6.842105
Figure 1 shows that overall, S. etonia plants are a little taller (mean height = 79 cm) than S. repens, whose average height is 73 cm. However, S. repens averaged a higher number of green leaves (mean = 6.8) while S. etonia averaged approximately 4 green leaves per plant.
# Figure 2: canopy length vs width
canopy_length_height <- ggplot(data=palmetto_tidy, aes(x=width, y=length)) +
geom_point(aes(color = species_name)) +
scale_x_continuous(limits =c (55,143),
breaks = c(60, 80, 100, 120, 140)) +
scale_y_continuous(limits = c(65, 190),
breaks = c(60, 90, 120, 150, 180)) +
labs(x = "Canopy Width", y = "Canopy Length") +
scale_color_discrete(name = "Species")
canopy_length_height
Figure 2. Width of plant canopy vs length of plant canopy for S. etonia and S. repens plant species.
S. etonia, on average, has a much larger canopy width and length compared to S. repens. Some individuals (n=2) of S. etonia are relatively smaller and closer in size to S. repens.
# Figure 3: Height vs canopy width
height_canopy_width <- ggplot(data=palmetto_tidy, aes(x=height, y=width)) +
geom_point(aes(color = species_name)) +
labs(x = "Plant Height", y = "Canopy Width") +
scale_color_discrete(name = "Species")
height_canopy_width
Figure 3. Height of plant vs canopy width for S. entonia and S. repens plant species.
S. entonia, overall, has a greater canopy width than S. repens, but is close to the same height of S. repens. As seen in Figure 1, S. entonia has a mean plant height of approximately 79 cm and S. repens has a mean plant height of approximately 73 cm.
# read in data
palmetto_new <- read_csv(here("data", "palmetto_two.csv")) %>%
mutate(species = case_when(species == 1 ~ "Serenoa_repens",
species == 2 ~ "Sabal_etonia")) %>%
select(species, length, width, height, green_lvs)
palmetto_new$species <- as.factor(palmetto_new$species)
palmetto_blr_new <- glm(species ~ height + length + width + green_lvs,
data = palmetto_new,
family = "binomial")
# summary(palmetto_blr_new)
blr_kable_new_tidy <- palmetto_blr_new %>%
broom::tidy ()
palmetto_table <- blr_kable_new_tidy %>%
kable(col.names = c("term",
"estimate",
"std error",
"statistic",
"p-value")) %>%
kable_styling(bootstrap_options = "bordered",
full_width = F,
position = "left")
palmetto_table
| term | estimate | std error | statistic | p-value |
|---|---|---|---|---|
| (Intercept) | -3.2266851 | 0.1420708 | -22.71180 | 0 |
| height | 0.0292173 | 0.0023061 | 12.66984 | 0 |
| length | -0.0458233 | 0.0018661 | -24.55600 | 0 |
| width | -0.0394434 | 0.0021000 | -18.78227 | 0 |
| green_lvs | 1.9084747 | 0.0388634 | 49.10728 | 0 |
palmetto_blr_fitted <- palmetto_blr_new %>%
broom::augment(type.predict = "response")
# palmetto_blr_fitted
palmetto_blr_50 <- palmetto_blr_fitted %>%
mutate(prob_50 = case_when(.fitted >= .5 ~ "Serenoa_repens",
.fitted < .5 ~ "Sabal_etonia")) %>%
mutate(class_correctly = case_when(species == prob_50 ~ "Correct",
species != prob_50 ~ "Incorrect"))
palmetto_count <- palmetto_blr_50 %>%
count(species, class_correctly) %>%
pivot_wider(names_from = class_correctly, values_from = n) %>%
mutate(total = rowSums(across(where(is.numeric)))) %>%
mutate(percent_correct = (Correct/total*100)) %>%
mutate(species = case_when(species == "Serenoa_repens" ~ "Serenoa repens",
species == "Sabal_etonia" ~ "Sabal etonia"))
palmetto_count_table <- kable(palmetto_count, col.names = c("Species", "Classified Correctly (n)",
"Classified Incorrectly (n)",
"Total Species (n)",
"Percent Correctly Classified")) %>%
kable_styling(bootstrap_options = "striped")
palmetto_count_table
| Species | Classified Correctly (n) | Classified Incorrectly (n) | Total Species (n) | Percent Correctly Classified |
|---|---|---|---|---|
| Sabal etonia | 5701 | 454 | 6155 | 92.62388 |
| Serenoa repens | 5548 | 564 | 6112 | 90.77225 |
Table 1. Percentage of correctly classified binomial regression for Sabal etonia and Serenoa repens.