Make a code chunk below and read in any packages you may need.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Ocean_Point_Data <- read.csv("https://raw.githubusercontent.com/jbaumann3/BIOL234_Biostats_MHC/main/Ocean_Point_Data%202018.csv")
head(Ocean_Point_Data)
#drop columns we don't need!
algae_graze <- Ocean_Point_Data[, -c(6, 33,34)]
head(algae_graze)
## Number trans_description year tidal_ht_m tidal_ht_cat_num calothr semibal
## 1 0 a_cobble_protected 2017 -0.13 1 0 0
## 2 0 a_cobble_protected 2017 -0.13 1 0 0
## 3 0 a_cobble_protected 2017 -0.13 1 0 0
## 4 0 a_cobble_protected 2018 0.38 2 12 8
## 5 0 a_cobble_protected 2018 0.38 2 93 7
## 6 0 a_cobble_protected 2018 0.78 2 65 30
## mytilus Halichondria Spongomorpha chaetomo ulva_lac ascophyl lamin_di
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 5 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 10 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## lamin_sa fucus_spir fucus_vesic fucus_dist fucus_sp chondrus mastocar
## 1 0 0 0 0 0 0 40
## 2 0 0 5 0 0 0 20
## 3 0 0 5 0 0 0 20
## 4 0 0 60 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 5 0 0 0 0
## petrocel corralina ceramium lithothamn Enteromorpha tectura litt_sax litt_lit
## 1 0 2 0 58 0 0 0 64
## 2 0 5 0 60 0 0 0 0
## 3 0 15 0 55 0 0 0 0
## 4 0 0 0 0 0 0 0 112
## 5 0 0 0 0 0 0 0 112
## 6 0 0 0 0 0 0 0 0
## litt_obt nucella
## 1 0 0
## 2 0 0
## 3 16 0
## 4 80 0
## 5 16 0
## 6 0 0
Pivot the data to long format - one column for names and one for values
long_algae <- algae_graze %>%
pivot_longer(cols = calothr:nucella, names_to = "species", values_to = "count")
head(long_algae)
## # A tibble: 6 x 7
## Number trans_description year tidal_ht_m tidal_ht_cat_num species count
## <int> <chr> <int> <dbl> <int> <chr> <dbl>
## 1 0 a_cobble_protected 2017 -0.13 1 calothr 0
## 2 0 a_cobble_protected 2017 -0.13 1 semibal 0
## 3 0 a_cobble_protected 2017 -0.13 1 mytilus 0
## 4 0 a_cobble_protected 2017 -0.13 1 Halichondria 0
## 5 0 a_cobble_protected 2017 -0.13 1 Spongomorpha 0
## 6 0 a_cobble_protected 2017 -0.13 1 chaetomo 0
species <- long_algae%>%
select(species)%>%
group_by(species)%>%
summarise(n = n())
head(species)
## # A tibble: 6 x 2
## species n
## <chr> <int>
## 1 ascophyl 155
## 2 calothr 155
## 3 ceramium 155
## 4 chaetomo 155
## 5 chondrus 155
## 6 corralina 155
long_algae$type <-
ifelse(long_algae$species=="tectura" | long_algae$species=="litt_sax" | long_algae$species=="litt_lit" | long_algae$species=="litt_obt" | long_algae$species=="nucella", "abundance", "%cover")
long_algae$cat <-
ifelse(long_algae$species=="tectura" | long_algae$species=="litt_sax" | long_algae$species=="litt_lit" | long_algae$species=="litt_obt" | long_algae$species=="mytilus" | long_algae$species=="nucella" | long_algae$species=="semibal" | long_algae$species=="tectura", "grazer", "algae")
head(long_algae)
## # A tibble: 6 x 9
## Number trans_description year tidal_ht_m tidal_ht_cat_num species count type
## <int> <chr> <int> <dbl> <int> <chr> <dbl> <chr>
## 1 0 a_cobble_protect~ 2017 -0.13 1 calothr 0 %cov~
## 2 0 a_cobble_protect~ 2017 -0.13 1 semibal 0 %cov~
## 3 0 a_cobble_protect~ 2017 -0.13 1 mytilus 0 %cov~
## 4 0 a_cobble_protect~ 2017 -0.13 1 Halich~ 0 %cov~
## 5 0 a_cobble_protect~ 2017 -0.13 1 Spongo~ 0 %cov~
## 6 0 a_cobble_protect~ 2017 -0.13 1 chaeto~ 0 %cov~
## # ... with 1 more variable: cat <chr>
make 2 new columns, one grazer_count, one algal_diversity
summary <- long_algae %>%
group_by(tidal_ht_cat_num, cat)%>%
summarise(n = n()) %>%
mutate(tidal_ht_cat_num = as.character(tidal_ht_cat_num))%>%
pivot_wider(names_from = cat,
values_from = n)
## `summarise()` has grouped output by 'tidal_ht_cat_num'. You can override using the `.groups` argument.
head(summary)
## # A tibble: 6 x 3
## # Groups: tidal_ht_cat_num [6]
## tidal_ht_cat_num algae grazer
## <chr> <int> <int>
## 1 1 399 147
## 2 2 247 91
## 3 3 209 77
## 4 4 665 245
## 5 5 228 84
## 6 6 228 84
ggplot(summary, aes(x = grazer, y = algae, colour = tidal_ht_cat_num))+
geom_point()+
geom_jitter()
Grazer abundance and algal diversity are proportional, meaning greater grazer abundance corresponds with greater algal diversity.
In the following chunk, starting with the raw data from github, build a dplyr/tidyverse pipeline that calculates mean coral cover by reef type. Don’t forget to calculate a relevant and useful error term for plotting
coral_cover <- read.csv("https://raw.githubusercontent.com/jbaumann3/BIOL234_Biostats_MHC/main/coralcover.csv")
head(coral_cover)
## site type lat transect diver cc_percent
## 1 1 Back Reef 3 1 4 5.8435758
## 2 1 Back Reef 3 2 4 0.9505263
## 3 1 Back Reef 3 3 4 5.2423389
## 4 1 Back Reef 3 4 5 5.0040475
## 5 1 Back Reef 3 5 5 5.8954916
## 6 2 Patch Reef 3 1 4 5.2826190
2.Make a visual that allows us to assess the following Null Hypothesis: There is no difference in coral cover by reef type. You should make a graph that shows relevant means, error bars, and raw data
meancoral <- coral_cover %>%
group_by(type)%>%
summarise(mean=mean(cc_percent), sd=sd(cc_percent), n=n(), se= sd/sqrt(n))
ggplot(meancoral, aes(x=type, y=mean))+
geom_jitter(data=coral_cover, aes(x=type, y=cc_percent, colour=type), width=0.3)+
geom_point(size = 2)+
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width = 0.2, colour = "black", alpha = 0.7)+
theme_bw() +
theme(panel.border = element_rect(colour = "black", size = .5), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
theme(legend.position = "none")
aov1 <- aov(cc_percent ~ type, data = coral_cover)
summary(aov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 2 1172 585.9 6.683 0.00215 **
## Residuals 74 6487 87.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cc_percent ~ type, data = coral_cover)
##
## $type
## diff lwr upr p adj
## Nearshore-Back Reef -8.7600964 -15.479776 -2.040416 0.0072335
## Patch Reef-Back Reef 0.8217506 -5.010031 6.653533 0.9393628
## Patch Reef-Nearshore 9.5818470 2.905196 16.258498 0.0027941
The ANOVA showed that we do have sufficient evidence to reject the null. There is significant difference in coral cover between reef types (p<0.01). A post-hoc analysis showed that nearshore had significantly less coral cover than patch reef and back reef (p<0.01). Difference in coral cover was not significantly different between back and patch reef.
library(palmerpenguins)
## Warning: package 'palmerpenguins' was built under R version 4.1.3
pens <- penguins%>%
drop_na()
meanpens <- pens %>%
group_by(species, sex)%>%
summarise(mean = mean(bill_length_mm), sd = sd(bill_length_mm), n=n(), se= sd/sqrt(n))
## `summarise()` has grouped output by 'species'. You can override using the `.groups` argument.
#Graph goes here
ggplot(meanpens, aes(x = species, y = mean, colour = sex))+
geom_jitter(data = pens, aes(x = species, y = bill_length_mm, colour = sex))+
geom_point(size = 2)+
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width = 0.2, colour = "black", alpha = 0.8)+
theme_bw() +
theme(panel.border = element_rect(colour = "black", size = .5), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
What does it look like we see in the graph?
The graph appears to show sex difference within species for bill length. There is also difference between species, with apparent significant difference between Adelie penguins and Chinstrap and Gentoo penguins. Bill length for Chinstrap and Gentoo penguins doesn’t appear to be different.
#stats go here!
aov2 <- aov(bill_length_mm ~ species * sex, data = pens)
summary(aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## species 2 7015 3508 654.189 <2e-16 ***
## sex 1 1136 1136 211.807 <2e-16 ***
## species:sex 2 24 12 2.284 0.103
## Residuals 327 1753 5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov2)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bill_length_mm ~ species * sex, data = pens)
##
## $species
## diff lwr upr p adj
## Chinstrap-Adelie 10.009851 9.209424 10.8102782 0.0000000
## Gentoo-Adelie 8.744095 8.070779 9.4174106 0.0000000
## Gentoo-Chinstrap -1.265756 -2.094535 -0.4369773 0.0010847
##
## $sex
## diff lwr upr p adj
## male-female 3.693368 3.194089 4.192646 0
##
## $`species:sex`
## diff lwr upr p adj
## Chinstrap:female-Adelie:female 9.315995 7.937732 10.6942588 0.0000000
## Gentoo:female-Adelie:female 8.306259 7.138639 9.4738788 0.0000000
## Adelie:male-Adelie:female 3.132877 2.034137 4.2316165 0.0000000
## Chinstrap:male-Adelie:female 13.836583 12.458320 15.2148470 0.0000000
## Gentoo:male-Adelie:female 12.216236 11.064727 13.3677453 0.0000000
## Gentoo:female-Chinstrap:female -1.009736 -2.443514 0.4240412 0.3338130
## Adelie:male-Chinstrap:female -6.183118 -7.561382 -4.8048548 0.0000000
## Chinstrap:male-Chinstrap:female 4.520588 2.910622 6.1305547 0.0000000
## Gentoo:male-Chinstrap:female 2.900241 1.479553 4.3209291 0.0000002
## Adelie:male-Gentoo:female -5.173382 -6.341002 -4.0057622 0.0000000
## Chinstrap:male-Gentoo:female 5.530325 4.096547 6.9641020 0.0000000
## Gentoo:male-Gentoo:female 3.909977 2.692570 5.1273846 0.0000000
## Chinstrap:male-Adelie:male 10.703707 9.325443 12.0819703 0.0000000
## Gentoo:male-Adelie:male 9.083360 7.931851 10.2348686 0.0000000
## Gentoo:male-Chinstrap:male -1.620347 -3.041035 -0.1996591 0.0149963
Looking at stats and graph, what do we see?
There is a significant difference in bill length between species and sex (p<0.001). There is no significant interaction effect. The results of the post-hoc Tukey’s test shows us that each species is significantly different from another (p<0.01). There is also a significant difference between sex, with male penguins having larger bills than females (p=0). Males and females of each species were significantly different both within and between species, with the exception of female Gentoo and female Chinstrap, which were not significantly different.
Generally speaking, R has a tough time dealing with time formatting. This is a common issue in programming. There are special data formats called Datetime that we can use to work through this.
time_series <- read.csv("https://raw.githubusercontent.com/jbaumann3/Belize-RT-Baumann-et-al-2021/main/temperature%20and%20light/RT_in_situ_temp.csv")
head(time_series)
## Date Month Day Year Time..GMT. temp site
## 1 12/11/2017 12 11 2017 3:00:00 PM 26.524 False Caye
## 2 12/11/2017 12 11 2017 3:15:00 PM 26.549 False Caye
## 3 12/11/2017 12 11 2017 3:30:00 PM 26.598 False Caye
## 4 12/11/2017 12 11 2017 3:45:00 PM 26.671 False Caye
## 5 12/11/2017 12 11 2017 4:00:00 PM 26.744 False Caye
## 6 12/11/2017 12 11 2017 4:15:00 PM 26.769 False Caye
str(time_series)
## 'data.frame': 59864 obs. of 7 variables:
## $ Date : chr "12/11/2017" "12/11/2017" "12/11/2017" "12/11/2017" ...
## $ Month : int 12 12 12 12 12 12 12 12 12 12 ...
## $ Day : int 11 11 11 11 11 11 11 11 11 11 ...
## $ Year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ Time..GMT.: chr "3:00:00 PM" "3:15:00 PM" "3:30:00 PM" "3:45:00 PM" ...
## $ temp : num 26.5 26.5 26.6 26.7 26.7 ...
## $ site : chr "False Caye" "False Caye" "False Caye" "False Caye" ...
These data are from an experiment I did recently on coral reefs in Belize. You should see temperature data from 2 sites across a range of dates.
new_time <- time_series %>%
unite(datetime, c("Date", "Time..GMT."))
head(new_time)
## datetime Month Day Year temp site
## 1 12/11/2017_3:00:00 PM 12 11 2017 26.524 False Caye
## 2 12/11/2017_3:15:00 PM 12 11 2017 26.549 False Caye
## 3 12/11/2017_3:30:00 PM 12 11 2017 26.598 False Caye
## 4 12/11/2017_3:45:00 PM 12 11 2017 26.671 False Caye
## 5 12/11/2017_4:00:00 PM 12 11 2017 26.744 False Caye
## 6 12/11/2017_4:15:00 PM 12 11 2017 26.769 False Caye
new_time$datetime <- mdy_hms(new_time$datetime)
str(new_time)
## 'data.frame': 59864 obs. of 6 variables:
## $ datetime: POSIXct, format: "2017-12-11 15:00:00" "2017-12-11 15:15:00" ...
## $ Month : int 12 12 12 12 12 12 12 12 12 12 ...
## $ Day : int 11 11 11 11 11 11 11 11 11 11 ...
## $ Year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ temp : num 26.5 26.5 26.6 26.7 26.7 ...
## $ site : chr "False Caye" "False Caye" "False Caye" "False Caye" ...
ggplot(new_time, aes(x = datetime, y = temp, colour= site))+
geom_line(alpha = 0.5)+
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'