library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.4.3

## Warning: package 'tibble' was built under R version 4.4.3

## Warning: package 'tidyr' was built under R version 4.4.3

## Warning: package 'purrr' was built under R version 4.4.3

## Warning: package 'stringr' was built under R version 4.4.3

## Warning: package 'forcats' was built under R version 4.4.3

## Warning: package 'lubridate' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(smatr)
library(readr)
Sal <- read_csv("SrResearch/Copy of BIOL_451.csv")

## Rows: 328 Columns: 68
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Month, Date, Section, Sal_1_Sex, Sal_1_Morph, Sal_2_Sex, Sal_2_Mor...
## dbl (53): Row, Column, Ageratina.altissima.Nat, Alliaria.petiolata.Inv, Amph...
## lgl  (5): Sal_1_BC, Sal_2_BC, Sal_3_BC, Sal_4_BC, Sal_4_Sex
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

my_theme <- function() {
  ggthemes::theme_hc(
    base_family = "serif"
  ) +
    theme(
      plot.title.position = "plot",
      plot.title = element_text(
        face = "bold"
      ),
      panel.grid.major = element_line(
        linewidth = 0.1
      ),
      plot.caption.position = "plot",
      axis.title.x = element_text(
        hjust = .5,
        size = 12,
        face = "bold"
      ),
      axis.title.y = element_text(
        hjust = .5,
        size = 12,
        face = "bold"
      ),
      plot.subtitle = element_text(
        face = "italic"
      ),
      axis.text = element_text(
        face = "bold"
      ),
      legend.title.position = "top",
      legend.title = element_text(
        hjust = .5,
        face = "italic"
      )
    ) 
}

Creating SMI function

SMI <- function(Mi, Lo, Li, bmax) {
  `Mi`*(`Lo`/`Li`)^`bmax`
}

Alliaria petiolata

Histogram of Alpe

Sal |>
  group_by(Row, Column, Section)|>
  ggplot()+
  aes(Alliaria.petiolata.Inv)+
  geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Splitting Dataframes

Salamander data with Alpe Present

Sal |>
  select("Month", "Date", "Row", "Column", "Section", "Alliaria.petiolata.Inv", 48:68) |>
  select("Month", "Date", "Row", "Column", "Section", "Alliaria.petiolata.Inv",c(8,9,13,14,18,19,23,24)) |>
  pivot_longer(
    cols = c(7,9,11,13), names_to = "Sal_nmb", values_to = "Length"
  )|>
  mutate(Mass = case_when(
   Sal_nmb == "Sal_1_Length" ~ Sal_1_Mass,
    Sal_nmb == "Sal_2_Length" ~ Sal_2_Mass,
    Sal_nmb == "Sal_3_Length" ~ Sal_3_Mass,
    Sal_nmb == "Sal_4_Length" ~ Sal_4_Mass
  ))|>
  drop_na(Length) |>
  select(1:6, 11, 12, 13) |>
  filter(Alliaria.petiolata.Inv > 0) -> Sal_Measures_Alpe_Present

Salamander data with Alpe absent

Sal |>
  select("Month", "Date", "Row", "Column", "Section", "Alliaria.petiolata.Inv", 48:68) |>
  select("Month", "Date", "Row", "Column", "Section", "Alliaria.petiolata.Inv",c(8,9,13,14,18,19,23,24)) |>
  pivot_longer(
    cols = c(7,9,11,13), names_to = "Sal_nmb", values_to = "Length"
  )|>
  mutate(Mass = case_when(
   Sal_nmb == "Sal_1_Length" ~ Sal_1_Mass,
    Sal_nmb == "Sal_2_Length" ~ Sal_2_Mass,
    Sal_nmb == "Sal_3_Length" ~ Sal_3_Mass,
    Sal_nmb == "Sal_4_Length" ~ Sal_4_Mass
  ))|>
  drop_na(Length) |>
  select(1:6, 11, 12, 13) |>
  filter(Alliaria.petiolata.Inv == 0) -> Sal_Measures_Alpe_Absent

Alpe Absent Body Condition

Step 1: Find any outliers

Sal_Measures_Alpe_Absent |>
  ggplot()+
  aes(x = Length, y = Mass)+
  geom_point()

Step 2: Find bmax

Sal_Measures_Alpe_Absent |>
  ggplot()+
  aes(x = log(Length), y = log(Mass))+
  geom_point()

sma(Mass ~ Length, log = 'xy', data = Sal_Measures_Alpe_Absent)

## Call: sma(formula = Mass ~ Length, data = Sal_Measures_Alpe_Absent, 
##     log = "xy") 
## 
## Fit using Standardized Major Axis 
## 
## These variables were log-transformed before fitting: xy 
## 
## Confidence intervals (CI) are at 95%
## 
## ------------------------------------------------------------
## Coefficients:
##             elevation    slope
## estimate    -3.628971 2.241237
## lower limit -4.151737 1.933468
## upper limit -3.106206 2.597996
## 
## H0 : variables uncorrelated
## R-squared : 0.5674766 
## P-value : 7.5484e-16

mean(Sal_Measures_Alpe_Absent$Length)

## [1] 37.7825

bmax = 2.241237

Step 3: Apply SMI to all datapoints

Sal_Measures_Alpe_Absent |>
  mutate(SMI = SMI(Mi = Mass, Lo = mean(Length), Li = Length, bmax = 2.241237),
         ALPE = "Absent") -> Sal_Measures_Alpe_Absent

SMI for Alpe Present

Step 1

Sal_Measures_Alpe_Present |>
  ggplot()+
  aes(x = Length, y = Mass)+
  geom_point()

Step 2

Sal_Measures_Alpe_Present |>
  ggplot()+
  aes(x = log(Length), y = log(Mass))+
  geom_point()

sma(Mass ~ Length, log = 'xy', data = Sal_Measures_Alpe_Present)

## Call: sma(formula = Mass ~ Length, data = Sal_Measures_Alpe_Present, 
##     log = "xy") 
## 
## Fit using Standardized Major Axis 
## 
## These variables were log-transformed before fitting: xy 
## 
## Confidence intervals (CI) are at 95%
## 
## ------------------------------------------------------------
## Coefficients:
##             elevation    slope
## estimate    -3.416338 2.112347
## lower limit -3.836498 1.863885
## upper limit -2.996179 2.393930
## 
## H0 : variables uncorrelated
## R-squared : 0.7147998 
## P-value : < 2.22e-16

bmax = 2.112347

Step 3

Sal_Measures_Alpe_Present |>
  mutate(SMI = SMI(Mi = Mass, Lo = mean(Length), Li = Length, bmax = 2.112347),
         ALPE = "Present") ->Sal_Measures_Alpe_Present

Comparing SMIs

ALPE <- c(Sal_Measures_Alpe_Absent$ALPE, Sal_Measures_Alpe_Present$ALPE)
smi_alpe <- c(Sal_Measures_Alpe_Absent$SMI, Sal_Measures_Alpe_Present$SMI)
df_alpe <- data.frame(ALPE, smi_alpe)

ggplot(df_alpe)+
  aes(x = ALPE, y = smi_alpe)+
  geom_boxplot()+
  labs(
    x = "Alpe",
    y = "SMI"
  )+
  my_theme()

T Test

t.test(smi_alpe ~ ALPE)

## 
##  Welch Two Sample t-test
## 
## data:  smi_alpe by ALPE
## t = -1.5987, df = 142.57, p-value = 0.1121
## alternative hypothesis: true difference in means between group Absent and group Present is not equal to 0
## 95 percent confidence interval:
##  -0.11722446  0.01239407
## sample estimates:
##  mean in group Absent mean in group Present 
##             0.8351983             0.8876135

Lonicera maackii

Histogram of Loma

ggplot(Sal)+
  aes(x = Lonicera.maackii.Inv)+
  geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Selecting Data: Loma Present

Sal |>
  select("Month", "Date", "Row", "Column", "Section", "Lonicera.maackii.Inv", 48:68) |>
  select("Month", "Date", "Row", "Column", "Section", "Lonicera.maackii.Inv",c(8,9,13,14,18,19,23,24)) |>
  pivot_longer(
    cols = c(7,9,11,13), names_to = "Sal_nmb", values_to = "Length"
  )|>
  mutate(Mass = case_when(
   Sal_nmb == "Sal_1_Length" ~ Sal_1_Mass,
    Sal_nmb == "Sal_2_Length" ~ Sal_2_Mass,
    Sal_nmb == "Sal_3_Length" ~ Sal_3_Mass,
    Sal_nmb == "Sal_4_Length" ~ Sal_4_Mass
  ))|>
  drop_na(Length) |>
  select(1:6, 11, 12, 13) |>
  filter(Lonicera.maackii.Inv > 0) -> Sal_Measures_Loma_Present

Selecting Data: Loma Absent

Sal |>
  select("Month", "Date", "Row", "Column", "Section", "Lonicera.maackii.Inv", 48:68) |>
  select("Month", "Date", "Row", "Column", "Section", "Lonicera.maackii.Inv",c(8,9,13,14,18,19,23,24)) |>
  pivot_longer(
    cols = c(7,9,11,13), names_to = "Sal_nmb", values_to = "Length"
  )|>
  mutate(Mass = case_when(
   Sal_nmb == "Sal_1_Length" ~ Sal_1_Mass,
    Sal_nmb == "Sal_2_Length" ~ Sal_2_Mass,
    Sal_nmb == "Sal_3_Length" ~ Sal_3_Mass,
    Sal_nmb == "Sal_4_Length" ~ Sal_4_Mass
  ))|>
  drop_na(Length) |>
  select(1:6, 11, 12, 13) |>
  filter(Lonicera.maackii.Inv == 0) -> Sal_Measures_Loma_Absent

Loma Present SMI

Find Outliers

Sal_Measures_Loma_Present |>
  ggplot()+
  aes(x = Length, y = Mass)+
  geom_point()

Find bmax

Sal_Measures_Loma_Present |>
  ggplot()+
  aes(x = log(Length), y = log(Mass))+
  geom_point()

sma(Mass ~ Length, log = 'xy', data = Sal_Measures_Loma_Present)

## Call: sma(formula = Mass ~ Length, data = Sal_Measures_Loma_Present, 
##     log = "xy") 
## 
## Fit using Standardized Major Axis 
## 
## These variables were log-transformed before fitting: xy 
## 
## Confidence intervals (CI) are at 95%
## 
## ------------------------------------------------------------
## Coefficients:
##             elevation    slope
## estimate    -4.145240 2.565993
## lower limit -4.865578 2.147080
## upper limit -3.424901 3.066640
## 
## H0 : variables uncorrelated
## R-squared : 0.5853791 
## P-value : 1.6276e-11

bmax = 2.565993

Step 3

Sal_Measures_Loma_Present |>
  mutate(SMI = SMI(Mi = Mass, Lo = mean(Length), Li = Length, bmax = 2.565993),
         LOMA = "Present") ->Sal_Measures_Loma_Present

Loma Absent

Step 1

Sal_Measures_Loma_Absent |>
  ggplot()+
  aes(x = Length, y = Mass)+
  geom_point()

Step 2

Sal_Measures_Loma_Absent |>
  ggplot()+
  aes(x = log(Length), y = log(Mass))+
  geom_point()

sma(Mass ~ Length, log = 'xy', data = Sal_Measures_Loma_Absent)

## Call: sma(formula = Mass ~ Length, data = Sal_Measures_Loma_Absent, 
##     log = "xy") 
## 
## Fit using Standardized Major Axis 
## 
## These variables were log-transformed before fitting: xy 
## 
## Confidence intervals (CI) are at 95%
## 
## ------------------------------------------------------------
## Coefficients:
##             elevation    slope
## estimate    -3.325241 2.056086
## lower limit -3.712898 1.826112
## upper limit -2.937583 2.315022
## 
## H0 : variables uncorrelated
## R-squared : 0.648234 
## P-value : < 2.22e-16

bmax = 2.056086

Step 3

Sal_Measures_Loma_Absent |>
  mutate(SMI = SMI(Mi = Mass, Lo = mean(Length), Li = Length, bmax = 2.056086),
         LOMA = "Absent") ->Sal_Measures_Loma_Absent

Comparing SMIs

LOMA <- c(Sal_Measures_Loma_Absent$LOMA, Sal_Measures_Loma_Present$LOMA)
smi_loma <- c(Sal_Measures_Loma_Absent$SMI, Sal_Measures_Loma_Present$SMI)
df <- data.frame(LOMA, smi_loma)

ggplot(df)+
  aes(x = LOMA, y = smi_loma)+
  geom_boxplot()+
  labs(
    x = "Loma",
    y = "SMI"
  )+
  my_theme()

T Test

t.test(smi_loma ~ LOMA)

## 
##  Welch Two Sample t-test
## 
## data:  smi_loma by LOMA
## t = 3.9083, df = 123.73, p-value = 0.0001521
## alternative hypothesis: true difference in means between group Absent and group Present is not equal to 0
## 95 percent confidence interval:
##  0.0617326 0.1884188
## sample estimates:
##  mean in group Absent mean in group Present 
##             0.9043437             0.7792680

Rosa multiflora

Histogram

ggplot(Sal)+
  aes(x = Rosa.multiflora.Inv)+
  geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Splitting Dataframes

Romu present

Sal |>
  select("Month", "Date", "Row", "Column", "Section", "Rosa.multiflora.Inv", 48:68) |>
  select("Month", "Date", "Row", "Column", "Section", "Rosa.multiflora.Inv",c(8,9,13,14,18,19,23,24)) |>
  pivot_longer(
    cols = c(7,9,11,13), names_to = "Sal_nmb", values_to = "Length"
  )|>
  mutate(Mass = case_when(
   Sal_nmb == "Sal_1_Length" ~ Sal_1_Mass,
    Sal_nmb == "Sal_2_Length" ~ Sal_2_Mass,
    Sal_nmb == "Sal_3_Length" ~ Sal_3_Mass,
    Sal_nmb == "Sal_4_Length" ~ Sal_4_Mass
  ))|>
  drop_na(Length) |>
  select(1:6, 11, 12, 13) |>
  filter(Rosa.multiflora.Inv > 0) -> Sal_Measures_Romu_Present

Romu absent

Sal |>
  select("Month", "Date", "Row", "Column", "Section", "Rosa.multiflora.Inv", 48:68) |>
  select("Month", "Date", "Row", "Column", "Section", "Rosa.multiflora.Inv",c(8,9,13,14,18,19,23,24)) |>
  pivot_longer(
    cols = c(7,9,11,13), names_to = "Sal_nmb", values_to = "Length"
  )|>
  mutate(Mass = case_when(
   Sal_nmb == "Sal_1_Length" ~ Sal_1_Mass,
    Sal_nmb == "Sal_2_Length" ~ Sal_2_Mass,
    Sal_nmb == "Sal_3_Length" ~ Sal_3_Mass,
    Sal_nmb == "Sal_4_Length" ~ Sal_4_Mass
  ))|>
  drop_na(Length) |>
  select(1:6, 11, 12, 13) |>
  filter(Rosa.multiflora.Inv == 0) -> Sal_Measures_Romu_Absent

Romu Present

Step 1

ggplot(Sal_Measures_Romu_Present)+
  aes(x = Length, y = Mass)+
  geom_point()

Step 2

ggplot(Sal_Measures_Romu_Present)+
  aes(x = log(Length), y = log(Mass))+
  geom_point()

sma(Mass ~ Length, log = 'xy', data = Sal_Measures_Romu_Present)

## Call: sma(formula = Mass ~ Length, data = Sal_Measures_Romu_Present, 
##     log = "xy") 
## 
## Fit using Standardized Major Axis 
## 
## These variables were log-transformed before fitting: xy 
## 
## Confidence intervals (CI) are at 95%
## 
## ------------------------------------------------------------
## Coefficients:
##             elevation    slope
## estimate    -3.581728 2.213128
## lower limit -4.219992 1.843898
## upper limit -2.943465 2.656294
## 
## H0 : variables uncorrelated
## R-squared : 0.5737301 
## P-value : 5.2208e-11

bmax = 2.213128

Step 3

Sal_Measures_Romu_Present |>
  mutate(SMI = SMI(Mi = Mass, Lo = mean(Length), Li = Length, bmax = 2.213128),
         ROMU = "Present") ->Sal_Measures_Romu_Present

Romu Absent

ggplot(Sal_Measures_Romu_Absent)+
  aes(x = Length, y = Mass)+
  geom_point()

Step 2

ggplot(Sal_Measures_Romu_Absent)+
  aes(x = log(Length), y = log(Mass))+
  geom_point()

sma(Mass ~ Length, log = 'xy', data = Sal_Measures_Romu_Absent)

## Call: sma(formula = Mass ~ Length, data = Sal_Measures_Romu_Absent, 
##     log = "xy") 
## 
## Fit using Standardized Major Axis 
## 
## These variables were log-transformed before fitting: xy 
## 
## Confidence intervals (CI) are at 95%
## 
## ------------------------------------------------------------
## Coefficients:
##             elevation    slope
## estimate    -3.526124 2.179104
## lower limit -3.930018 1.938878
## upper limit -3.122230 2.449094
## 
## H0 : variables uncorrelated
## R-squared : 0.6553729 
## P-value : < 2.22e-16

bmax = 2.179104

Step 3

Sal_Measures_Romu_Absent |>
  mutate(SMI = SMI(Mi = Mass, Lo = mean(Length), Li = Length, bmax = 2.179104),
         ROMU = "Absent") ->Sal_Measures_Romu_Absent

Comparing SMI

ROMU <- c(Sal_Measures_Romu_Absent$ROMU, Sal_Measures_Romu_Present$ROMU)
smi_romu <- c(Sal_Measures_Romu_Absent$SMI, Sal_Measures_Romu_Present$SMI)
df_romu <- data.frame(ROMU, smi_romu)

ggplot(df_romu)+
  aes(x = ROMU, y = smi_romu)+
  geom_boxplot()+
  labs(
    x = "Romu",
    y = "SMI"
  )+
  my_theme()

T Test

t.test(smi_romu ~ ROMU)

## 
##  Welch Two Sample t-test
## 
## data:  smi_romu by ROMU
## t = 1.6445, df = 108.58, p-value = 0.103
## alternative hypothesis: true difference in means between group Absent and group Present is not equal to 0
## 95 percent confidence interval:
##  -0.01165274  0.12520912
## sample estimates:
##  mean in group Absent mean in group Present 
##             0.8798491             0.8230710

SMIs

Henry Joniak

2026-01-10

Creating SMI function

Alliaria petiolata

Histogram of Alpe

Splitting Dataframes

Salamander data with Alpe Present

Salamander data with Alpe absent

Alpe Absent Body Condition

Step 1: Find any outliers

Step 2: Find bmax

Step 3: Apply SMI to all datapoints

SMI for Alpe Present

Step 1

Step 2

Step 3

Comparing SMIs

T Test

Lonicera maackii

Histogram of Loma

Selecting Data: Loma Present

Selecting Data: Loma Absent

Loma Present SMI

Find Outliers

Find bmax

Step 3

Loma Absent

Step 1

Step 2

Step 3

Comparing SMIs

T Test

Rosa multiflora

Histogram

Splitting Dataframes

Romu present

Romu absent

Romu Present

Step 1

Step 2

Step 3

Romu Absent

Step 2

Step 3

Comparing SMI

T Test