Snail Data Summary Stats, Bar Graph, and T.test
# load packages
library(ggplot2)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
library(kableExtra)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggtext)
library(stringr)
# read in data
snaildata_ttest <- read.csv("snails t test for R.csv")
snaildata_anova <- read.csv("snails ANOVA for R.csv")
# view the data
head(snaildata_anova)
## ZONE EGGS ZONE_NUM
## 1 lowtide 9 1
## 2 lowtide 8 1
## 3 lowtide 12 1
## 4 lowtide 9 1
## 5 lowtide 11 1
## 6 lowtide 11 1
head(snaildata_ttest)
## ZONE EGGS
## 1 lowtide 9
## 2 lowtide 8
## 3 lowtide 12
## 4 lowtide 9
## 5 lowtide 11
## 6 lowtide 11
# use the summarySE() function to collect summary statistics
snailsumdata <- summarySE(data = snaildata_ttest, measurevar = "EGGS", groupvars = "ZONE", na.rm = TRUE)
# view the summary statistics
snailsumdata
## ZONE N EGGS sd se ci
## 1 lowtide 16 9.1875 2.040221 0.5100551 1.087157
## 2 midtide 16 12.3750 2.918333 0.7295832 1.555070
# create a table with summary stats
snailsumdata %>% select(ZONE, N, EGGS, sd, se, ci) %>%
kable(caption = "Summary Statistics for Snail Data",
col.names = c("Zone", "Samples n", "Mean Eggs", "SD", "SE", "CI"), align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Summary Statistics for Snail Data
Zone
|
Samples n
|
Mean Eggs
|
SD
|
SE
|
CI
|
lowtide
|
16
|
9.1875
|
2.040221
|
0.5100551
|
1.087157
|
midtide
|
16
|
12.3750
|
2.918333
|
0.7295832
|
1.555070
|
# create bar graph
ggplot(snailsumdata, aes(x = ZONE, y = EGGS)) +
geom_col(aes(fill = c("lightblue", "darkgreen"))) +
# add error bars
geom_errorbar(aes(ymin = EGGS - se, ymax = EGGS + se), width = 0.5, size = 2) +
# add labels
labs(title = "Mean Snail Egg Production by Zone",
X = "Shore Zone",
y = "Number of Eggs",
caption = ("**Figure 1.** *Mean egg production (n = 16) by snails in both low and mid-tide shore zones with SE.*")) +
# set theme
theme_classic() +
theme(plot.title = element_text(face = "bold", size = 15),
axis.title.x = element_text(face = "bold", size = 13),
axis.title.y = element_text(face = "bold", size = 13),
plot.caption = element_markdown(hjust = 0, size = 11, lineheight = 1.5),
legend.position = "none")

# conduct t-test
ttest <- t.test(snaildata_ttest$EGGS~snaildata_ttest$ZONE)
print(ttest)
##
## Welch Two Sample t-test
##
## data: snaildata_ttest$EGGS by snaildata_ttest$ZONE
## t = -3.5807, df = 26.835, p-value = 0.001335
## alternative hypothesis: true difference in means between group lowtide and group midtide is not equal to 0
## 95 percent confidence interval:
## -5.014555 -1.360445
## sample estimates:
## mean in group lowtide mean in group midtide
## 9.1875 12.3750
Snail Part 2 ANOVA test, kable
# libraries
library(ggplot2)
library(Rmisc)
library(dplyr)
library(kableExtra)
library(ggtext)
library(stringr)
# read in data
snaildata_anova <- read.csv("snails ANOVA for R.csv")
head(snaildata_anova)
## ZONE EGGS ZONE_NUM
## 1 lowtide 9 1
## 2 lowtide 8 1
## 3 lowtide 12 1
## 4 lowtide 9 1
## 5 lowtide 11 1
## 6 lowtide 11 1
# summary stats
sumstatsanova <- summarySE(data = snaildata_anova, measurevar = "EGGS", groupvars = "ZONE", na.rm = TRUE)
sumstatsanova
## ZONE N EGGS sd se ci
## 1 hightide 16 16.5000 2.065591 0.5163978 1.100676
## 2 lowtide 16 9.1875 2.040221 0.5100551 1.087157
## 3 midtide 16 12.3750 2.918333 0.7295832 1.555070
# re-ordering the bars for the figure
sumstatsanova$ZONE <- factor(sumstatsanova$ZONE, levels = c("lowtide", "midtide", "hightide"))
# create bar graph
p2 <- ggplot(sumstatsanova, aes(x = ZONE, y = EGGS)) +
geom_col(aes(fill = ZONE)) +
scale_fill_manual(values = c("lightblue", "green4", "red3")) +
geom_errorbar(aes(ymin = EGGS - se, ymax = EGGS + se), width = 0.6, size = 1.8) +
labs(title = "Mean Snail Egg Production by Zone",
x = "Zone",
y = "Eggs") +
# theme
theme_classic() +
theme(plot.title = element_text(face = "bold", size = 15),
axis.title.x = element_text(face = "bold", size = 13),
axis.title.y = element_text(face = "bold", size = 13),
legend.position = "none")
p2

# ANOVA test
model <- aov(snaildata_anova$EGGS~snaildata_anova$ZONE)
# show ANOVA summary
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## snaildata_anova$ZONE 2 430.1 215.06 38.07 2.1e-10 ***
## Residuals 45 254.2 5.65
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post hoc test (tukeys test)
tukeytestmodel <- TukeyHSD(model)
tukeytestmodel
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = snaildata_anova$EGGS ~ snaildata_anova$ZONE)
##
## $`snaildata_anova$ZONE`
## diff lwr upr p adj
## lowtide-hightide -7.3125 -9.349023 -5.275977 0.0000000
## midtide-hightide -4.1250 -6.161523 -2.088477 0.0000366
## midtide-lowtide 3.1875 1.150977 5.224023 0.0012591
df_model <- as.data.frame(tukeytestmodel$`snaildata_anova$ZONE`) %>%
mutate(
diff = round(diff, 3),
lwr = round(lwr, 3),
upr = round(upr, 3),
`p adj` = round(`p adj`, 3))
# table 3.
table3 <- kable(x = df_model,
caption = "Table 3. post-hoc test of snailanova data by zone",
col.name = c("zone", "difference of means", "lwr bound confidence interval", "upr bound confidence interval", "adjusted p-value"), align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE)
table3
Table 3. post-hoc test of snailanova data by zone
zone
|
difference of means
|
lwr bound confidence interval
|
upr bound confidence interval
|
adjusted p-value
|
lowtide-hightide
|
-7.313
|
-9.349
|
-5.276
|
0.000
|
midtide-hightide
|
-4.125
|
-6.162
|
-2.088
|
0.000
|
midtide-lowtide
|
3.188
|
1.151
|
5.224
|
0.001
|
Marsh Snail part 3, ANOVA, tables, column graph
# libraries
library(ggplot2)
library(Rmisc)
library(dplyr)
library(kableExtra)
library(ggtext)
library(stringr)
library(readr)
library(ggtext)
# load in the data for the marsh snail
marshsnaildata <- read.csv("Marsh_Snail_Data_ 2 .csv")
View(marshsnaildata)
# summary stats
marshsum <- summarySE(data = marshsnaildata, measurevar = "Height", groupvars = "State", na.rm = TRUE)
marshsum
## State N Height sd se ci
## 1 Florida 10 6.716126 1.840858 0.5821305 1.316871
## 2 Georgia 10 7.443790 2.959360 0.9358317 2.116998
## 3 NorthCarolina 10 10.624127 2.669881 0.8442905 1.909918
## 4 SouthCarolina 10 7.367641 2.013503 0.6367257 1.440374
# graph
marshplot<- ggplot(data = marshsum, aes(x = State, y = Height)) +
geom_col(aes(fill = State)) +
geom_errorbar(aes(ymin = Height - se, ymax = Height + se), width = 0.6, size = 1.8) +
#labels
labs(title = "Mean Body Size of *Littoriaria irrorate* Across U.S.States", #make sure you're using ggtext package and are use the command "element_markdown()" in theme()
x = "State",
y = "Body Size (height)") +
# theme
theme_classic() +
theme(plot.title = element_markdown(face = "bold", size = 15),
axis.title.x = element_text(face = "bold", size = 13),
axis.title.y = element_text(face = "bold", size = 13))
marshplot

# ANOVA test
anova_marsh <- aov(data = marshsnaildata, Height~State)
summary(anova_marsh)
## Df Sum Sq Mean Sq F value Pr(>F)
## State 3 92.38 30.793 5.28 0.00403 **
## Residuals 36 209.96 5.832
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post-hoc tukey test
marshtukey <- TukeyHSD(anova_marsh)
marshtukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Height ~ State, data = marshsnaildata)
##
## $State
## diff lwr upr p adj
## Georgia-Florida 0.72766399 -2.1810856 3.6364135 0.9063288
## NorthCarolina-Florida 3.90800075 0.9992512 6.8167503 0.0047882
## SouthCarolina-Florida 0.65151552 -2.2572340 3.5602651 0.9303762
## NorthCarolina-Georgia 3.18033676 0.2715872 6.0890863 0.0275720
## SouthCarolina-Georgia -0.07614847 -2.9848980 2.8326011 0.9998718
## SouthCarolina-NorthCarolina -3.25648523 -6.1652348 -0.3477357 0.0231860
# extract tuley test as data frame
df_marshtukey <- as.data.frame(marshtukey$State) %>%
mutate(
diff = round(diff, 3),
lwr = round(lwr, 3),
upr = round(upr, 3),
`p adj` = round(`p adj`, 3))
# make table here because there is no way i'm copy and pasting everything into word
# table 4
table5 <- kable(
x = df_marshtukey,
caption = "Table 5. Post-hoc test for marsh snail data.",
col.names = c("State", "difference of means", "lwr bound confidence interval", "upr bound confidence interval", "adjusted p-value"), align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
table5
Table 5. Post-hoc test for marsh snail data.
State
|
difference of means
|
lwr bound confidence interval
|
upr bound confidence interval
|
adjusted p-value
|
Georgia-Florida
|
0.728
|
-2.181
|
3.636
|
0.906
|
NorthCarolina-Florida
|
3.908
|
0.999
|
6.817
|
0.005
|
SouthCarolina-Florida
|
0.652
|
-2.257
|
3.560
|
0.930
|
NorthCarolina-Georgia
|
3.180
|
0.272
|
6.089
|
0.028
|
SouthCarolina-Georgia
|
-0.076
|
-2.985
|
2.833
|
1.000
|
SouthCarolina-NorthCarolina
|
-3.256
|
-6.165
|
-0.348
|
0.023
|