Snail Data Summary Stats, Bar Graph, and T.test

# load packages 
library(ggplot2)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
library(kableExtra)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggtext)
library(stringr)

# read in data 
snaildata_ttest <- read.csv("snails t test for R.csv")
snaildata_anova <- read.csv("snails ANOVA for R.csv")

# view the data 
head(snaildata_anova)
##      ZONE EGGS ZONE_NUM
## 1 lowtide    9        1
## 2 lowtide    8        1
## 3 lowtide   12        1
## 4 lowtide    9        1
## 5 lowtide   11        1
## 6 lowtide   11        1
head(snaildata_ttest)
##      ZONE EGGS
## 1 lowtide    9
## 2 lowtide    8
## 3 lowtide   12
## 4 lowtide    9
## 5 lowtide   11
## 6 lowtide   11
# use the summarySE() function to collect summary statistics 
snailsumdata <- summarySE(data = snaildata_ttest, measurevar = "EGGS", groupvars = "ZONE", na.rm = TRUE)

# view the summary statistics 
snailsumdata
##      ZONE  N    EGGS       sd        se       ci
## 1 lowtide 16  9.1875 2.040221 0.5100551 1.087157
## 2 midtide 16 12.3750 2.918333 0.7295832 1.555070
# create a table with summary stats
snailsumdata %>% select(ZONE, N, EGGS, sd, se, ci) %>% 
  kable(caption = "Summary Statistics for Snail Data", 
        col.names = c("Zone", "Samples n", "Mean Eggs", "SD", "SE", "CI"), align = "c") %>% 
        kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE) 
Summary Statistics for Snail Data
Zone Samples n Mean Eggs SD SE CI
lowtide 16 9.1875 2.040221 0.5100551 1.087157
midtide 16 12.3750 2.918333 0.7295832 1.555070
# create bar graph
ggplot(snailsumdata, aes(x = ZONE, y = EGGS)) + 
  geom_col(aes(fill = c("lightblue", "darkgreen"))) + 
  # add error bars 
  geom_errorbar(aes(ymin = EGGS - se, ymax = EGGS + se), width = 0.5, size = 2) + 
    # add labels 
    labs(title = "Mean Snail Egg Production by Zone", 
         X = "Shore Zone", 
         y = "Number of Eggs", 
         caption = ("**Figure 1.** *Mean egg production (n = 16) by snails in both low and mid-tide shore zones with SE.*")) +

    # set theme 
    theme_classic() + 
  theme(plot.title = element_text(face = "bold", size = 15), 
        axis.title.x = element_text(face = "bold", size = 13), 
        axis.title.y = element_text(face = "bold", size = 13),
        plot.caption = element_markdown(hjust = 0, size = 11, lineheight = 1.5),
        legend.position = "none")

# conduct t-test 

ttest <- t.test(snaildata_ttest$EGGS~snaildata_ttest$ZONE)
print(ttest)
## 
##  Welch Two Sample t-test
## 
## data:  snaildata_ttest$EGGS by snaildata_ttest$ZONE
## t = -3.5807, df = 26.835, p-value = 0.001335
## alternative hypothesis: true difference in means between group lowtide and group midtide is not equal to 0
## 95 percent confidence interval:
##  -5.014555 -1.360445
## sample estimates:
## mean in group lowtide mean in group midtide 
##                9.1875               12.3750

Snail Part 2 ANOVA test, kable

# libraries 
library(ggplot2)
library(Rmisc)
library(dplyr)
library(kableExtra)
library(ggtext)
library(stringr)

# read in data 
snaildata_anova <- read.csv("snails ANOVA for R.csv")

head(snaildata_anova)
##      ZONE EGGS ZONE_NUM
## 1 lowtide    9        1
## 2 lowtide    8        1
## 3 lowtide   12        1
## 4 lowtide    9        1
## 5 lowtide   11        1
## 6 lowtide   11        1
# summary stats 
sumstatsanova <- summarySE(data = snaildata_anova, measurevar = "EGGS", groupvars = "ZONE", na.rm = TRUE)
sumstatsanova
##       ZONE  N    EGGS       sd        se       ci
## 1 hightide 16 16.5000 2.065591 0.5163978 1.100676
## 2  lowtide 16  9.1875 2.040221 0.5100551 1.087157
## 3  midtide 16 12.3750 2.918333 0.7295832 1.555070
# re-ordering the bars for the figure
sumstatsanova$ZONE <- factor(sumstatsanova$ZONE, levels = c("lowtide", "midtide", "hightide"))

# create bar graph 

p2 <- ggplot(sumstatsanova, aes(x = ZONE, y = EGGS)) + 
  geom_col(aes(fill = ZONE)) + 
  scale_fill_manual(values = c("lightblue", "green4", "red3")) +
  geom_errorbar(aes(ymin = EGGS - se, ymax = EGGS + se), width = 0.6, size = 1.8) +
  labs(title = "Mean Snail Egg Production by Zone", 
       x = "Zone", 
       y = "Eggs") +
# theme
  theme_classic() + 
  theme(plot.title = element_text(face = "bold", size = 15), 
        axis.title.x = element_text(face = "bold", size = 13), 
        axis.title.y = element_text(face = "bold", size = 13),
      
        legend.position = "none")

p2

# ANOVA test 
model <- aov(snaildata_anova$EGGS~snaildata_anova$ZONE)
# show ANOVA summary
summary(model)
##                      Df Sum Sq Mean Sq F value  Pr(>F)    
## snaildata_anova$ZONE  2  430.1  215.06   38.07 2.1e-10 ***
## Residuals            45  254.2    5.65                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post hoc test (tukeys test) 

tukeytestmodel <- TukeyHSD(model)
tukeytestmodel
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = snaildata_anova$EGGS ~ snaildata_anova$ZONE)
## 
## $`snaildata_anova$ZONE`
##                     diff       lwr       upr     p adj
## lowtide-hightide -7.3125 -9.349023 -5.275977 0.0000000
## midtide-hightide -4.1250 -6.161523 -2.088477 0.0000366
## midtide-lowtide   3.1875  1.150977  5.224023 0.0012591
df_model <- as.data.frame(tukeytestmodel$`snaildata_anova$ZONE`) %>% 
  mutate(
    diff = round(diff, 3), 
    lwr = round(lwr, 3), 
    upr = round(upr, 3), 
    `p adj` = round(`p adj`, 3))

# table 3. 
table3 <- kable(x = df_model, 
      caption = "Table 3. post-hoc test of snailanova data by zone", 
      col.name = c("zone", "difference of means", "lwr bound confidence interval", "upr bound confidence interval", "adjusted p-value"), align = "c") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE)

table3
Table 3. post-hoc test of snailanova data by zone
zone difference of means lwr bound confidence interval upr bound confidence interval adjusted p-value
lowtide-hightide -7.313 -9.349 -5.276 0.000
midtide-hightide -4.125 -6.162 -2.088 0.000
midtide-lowtide 3.188 1.151 5.224 0.001

Marsh Snail part 3, ANOVA, tables, column graph

# libraries
library(ggplot2)
library(Rmisc)
library(dplyr)
library(kableExtra)
library(ggtext)
library(stringr)
library(readr)
library(ggtext)

# load in the data for the marsh snail  

marshsnaildata <- read.csv("Marsh_Snail_Data_ 2 .csv")
View(marshsnaildata)

# summary stats 

marshsum <- summarySE(data = marshsnaildata, measurevar = "Height", groupvars = "State", na.rm = TRUE)
marshsum
##           State  N    Height       sd        se       ci
## 1       Florida 10  6.716126 1.840858 0.5821305 1.316871
## 2       Georgia 10  7.443790 2.959360 0.9358317 2.116998
## 3 NorthCarolina 10 10.624127 2.669881 0.8442905 1.909918
## 4 SouthCarolina 10  7.367641 2.013503 0.6367257 1.440374
# graph 
marshplot<- ggplot(data = marshsum, aes(x = State, y = Height)) +
geom_col(aes(fill = State)) + 
  geom_errorbar(aes(ymin = Height - se, ymax = Height + se), width = 0.6, size = 1.8) +
  
#labels
  labs(title = "Mean Body Size of *Littoriaria irrorate* Across U.S.States", #make sure you're using ggtext package and are use the command "element_markdown()" in theme()
       x = "State", 
       y = "Body Size (height)") +
  
# theme 
  theme_classic() +
  theme(plot.title = element_markdown(face = "bold", size = 15), 
        axis.title.x = element_text(face = "bold", size = 13), 
        axis.title.y = element_text(face = "bold", size = 13))

marshplot

# ANOVA test 
anova_marsh <- aov(data = marshsnaildata, Height~State)
summary(anova_marsh)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## State        3  92.38  30.793    5.28 0.00403 **
## Residuals   36 209.96   5.832                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post-hoc tukey test 
marshtukey <- TukeyHSD(anova_marsh)
marshtukey
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Height ~ State, data = marshsnaildata)
## 
## $State
##                                    diff        lwr        upr     p adj
## Georgia-Florida              0.72766399 -2.1810856  3.6364135 0.9063288
## NorthCarolina-Florida        3.90800075  0.9992512  6.8167503 0.0047882
## SouthCarolina-Florida        0.65151552 -2.2572340  3.5602651 0.9303762
## NorthCarolina-Georgia        3.18033676  0.2715872  6.0890863 0.0275720
## SouthCarolina-Georgia       -0.07614847 -2.9848980  2.8326011 0.9998718
## SouthCarolina-NorthCarolina -3.25648523 -6.1652348 -0.3477357 0.0231860
# extract tuley test as data frame
df_marshtukey <- as.data.frame(marshtukey$State) %>% 
  mutate(
    diff = round(diff, 3), 
    lwr = round(lwr, 3), 
    upr = round(upr, 3), 
    `p adj` = round(`p adj`, 3))

# make table here because there is no way i'm copy and pasting everything into word 
# table 4
table5 <- kable(
  x = df_marshtukey, 
  caption = "Table 5. Post-hoc test for marsh snail data.", 
      col.names = c("State", "difference of means", "lwr bound confidence interval", "upr bound confidence interval", "adjusted p-value"), align = "c") %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
table5
Table 5. Post-hoc test for marsh snail data.
State difference of means lwr bound confidence interval upr bound confidence interval adjusted p-value
Georgia-Florida 0.728 -2.181 3.636 0.906
NorthCarolina-Florida 3.908 0.999 6.817 0.005
SouthCarolina-Florida 0.652 -2.257 3.560 0.930
NorthCarolina-Georgia 3.180 0.272 6.089 0.028
SouthCarolina-Georgia -0.076 -2.985 2.833 1.000
SouthCarolina-NorthCarolina -3.256 -6.165 -0.348 0.023