This data come from work I did with a few undergraduate students in 2022. Fouling communities are marine communities that foul or cover hard surfaces (like dock pilings, boats, rocks, etc.). They are comprised of sessile (non-moving) invertebrates. They also happen to contain a lot of invasive species. This project collected data on fouling communities that adhered themselves to plastic plates hanging in 2 locations in MA - Beverly Harbor and Gloucester Harbor. Plates were left in the water for 1 month, 2 months, 6 months, or 12 months. Plates were assessed for species present at each location and time point (there are multiple plates for each location and time point). Data was organized for us to make statistical comparisons across time and space.
The data is set up as follows: “UniqueID” - this was an ID created to represent individual plates, they are given a “G” or a “B” indicating Gloucester or Beverly, a number for months in water (1,2,6, or 12), and a final letter (A,B,C,D,E, or F) indicating a single plate.
“Location” - another indicator of where the plate was deployed (Beverly or Gloucester)
All of the other columns are species observed on the plates and the values below them indicate abundance/count
NOTE: for each prompt, I need to see code in order to give you credit! NOTE: you can perform all of these tasks in one code chunk or many code chunks, this is up to you
The scree plot shows similar levels of variance between some of the dimensions. The influence of variance is spread out between many species. The two principle components are not as separate from the other species as we have seen in previous examples.
The overlapping of the ellipse tells us that Beverly is very similar to Gloucester, though Gloucester has some outliers (data points 33 and 36) which expand the ellipse a bit further to accommodate for greater variability among the data points associated with Gloucester.
fouling <- read.table('fouling.csv',sep=',',header=T)
fouling
## Unique.ID Location D..vexillum Anemone Semibalanus Mytilus Styela
## 1 G1A Gloucester 0 0 0 0 0
## 2 G1B Gloucester 0 0 0 0 0
## 3 G1C Gloucester 0 0 1 0 0
## 4 G1D Gloucester 0 0 0 0 0
## 5 B1A Beverly 0 0 0 0 0
## 6 B1B Beverly 0 0 0 0 0
## 7 B1C Beverly 0 0 0 0 0
## 8 B1D Beverly 0 0 0 0 0
## 9 G2A Gloucester 0 0 0 0 0
## 10 G2B Gloucester 0 0 0 0 0
## 11 G2C Gloucester 0 0 0 0 0
## 12 G2D Gloucester 0 0 0 0 0
## 13 G2E Beverly 0 0 0 0 0
## 14 B2A Beverly 0 0 14 0 0
## 15 B2B Beverly 0 0 7 0 0
## 16 B2C Beverly 0 0 9 0 0
## 17 B2D Gloucester 0 0 8 0 0
## 18 B2E Gloucester 0 0 15 0 0
## 19 G6A Gloucester 0 0 0 1 0
## 20 G6B Gloucester 0 0 0 0 0
## 21 G6C Beverly 0 0 0 0 0
## 22 G6D Beverly 0 0 0 0 0
## 23 G6E Beverly 0 0 0 0 0
## 24 B6A Beverly 0 0 4 0 0
## 25 B6B Gloucester 0 0 3 0 0
## 26 B6C Gloucester 0 0 6 0 0
## 27 B6D Gloucester 0 0 0 0 0
## 28 B6E Gloucester 0 0 6 0 0
## 29 G12A Beverly 0 0 24 47 0
## 30 G12B Beverly 0 0 12 51 0
## 31 G12C Beverly 0 0 0 18 0
## 32 G12D Beverly 0 0 25 32 0
## 33 G12E Gloucester 0 0 32 19 0
## 34 G12F Gloucester 0 0 8 28 0
## 35 B12A Gloucester 5 2 13 3 1
## 36 B12B Gloucester 12 0 25 5 5
## 37 B12C Beverly 0 0 0 2 0
## 38 B12D Beverly 0 1 10 7 1
## 39 B12E Beverly 2 0 12 5 1
## 40 B12F Beverly 1 0 8 3 0
## Juv..Mussel Diplosoma Encrusting.bryozoan Oyster Ascidiella Botryllus
## 1 0 2 0 0 19 0
## 2 0 10 0 0 25 0
## 3 0 18 0 0 64 2
## 4 0 7 0 0 13 0
## 5 0 0 1 0 25 0
## 6 0 2 0 0 32 0
## 7 0 0 0 0 11 0
## 8 0 0 0 0 20 0
## 9 0 9 0 0 4 0
## 10 0 16 0 0 13 0
## 11 0 9 0 0 18 0
## 12 0 12 0 0 17 2
## 13 0 10 0 0 23 0
## 14 0 0 2 0 17 0
## 15 0 3 1 2 16 0
## 16 0 0 4 1 18 2
## 17 0 1 2 0 20 0
## 18 0 3 1 0 10 0
## 19 0 0 0 0 9 0
## 20 0 0 0 0 22 0
## 21 0 0 0 0 27 0
## 22 0 0 0 0 12 0
## 23 0 0 0 0 20 0
## 24 0 0 6 1 20 0
## 25 125 3 5 0 12 5
## 26 0 0 9 0 15 7
## 27 0 5 7 0 23 9
## 28 0 4 15 3 12 14
## 29 0 0 0 0 0 0
## 30 0 0 0 0 0 0
## 31 0 0 0 0 0 0
## 32 0 0 0 0 0 30
## 33 0 1 0 0 1 0
## 34 0 0 0 0 0 0
## 35 0 2 1 6 46 2
## 36 0 5 0 11 52 16
## 37 0 2 0 15 48 7
## 38 0 10 0 0 22 0
## 39 0 3 0 0 28 0
## 40 0 0 0 8 0 1
## Eudendrium Limpet Corella Bugula Ribbed.Mussel Ciona Botrylloides
## 1 29 0 0 20 0 40 0
## 2 58 0 0 20 0 53 6
## 3 0 0 0 7 0 53 0
## 4 58 0 0 4 0 38 1
## 5 145 0 0 60 0 10 2
## 6 145 0 0 40 0 12 4
## 7 319 0 0 0 0 0 0
## 8 261 0 0 0 0 0 1
## 9 58 0 0 20 0 69 2
## 10 29 0 0 20 0 70 3
## 11 29 0 0 0 0 65 3
## 12 29 0 0 0 0 61 9
## 13 58 0 0 0 0 63 8
## 14 261 0 0 40 0 2 0
## 15 203 0 0 60 0 3 0
## 16 174 0 0 40 0 6 0
## 17 232 0 0 40 0 5 0
## 18 290 0 0 40 0 1 0
## 19 87 0 0 40 0 90 0
## 20 29 0 0 0 0 115 6
## 21 29 0 0 20 0 126 8
## 22 116 0 0 20 0 158 4
## 23 87 0 0 40 0 116 11
## 24 232 0 0 0 0 0 0
## 25 261 0 0 0 0 4 0
## 26 290 0 0 0 0 0 0
## 27 203 0 0 0 0 2 0
## 28 203 0 0 0 0 6 3
## 29 0 0 4 24 1 30 6
## 30 0 0 0 12 2 12 4
## 31 0 0 11 32 14 30 10
## 32 0 1 10 40 1 53 0
## 33 0 0 20 32 1 42 19
## 34 0 1 0 13 0 41 13
## 35 0 0 2 24 2 2 5
## 36 0 2 2 20 3 2 6
## 37 0 1 1 13 4 6 10
## 38 0 15 3 60 0 8 10
## 39 0 3 0 50 6 4 3
## 40 0 0 0 0 1 6 0
library("factoextra")
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("FactoMineR")
pca.data <- PCA(fouling[,-(1:2)], scale.unit = TRUE, graph = FALSE)
fviz_eig(pca.data, addlabels = TRUE, ylim = c(0, 70))
fviz_pca_var(pca.data, col.var = "cos2",
gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"),
repel = TRUE)
pca.data <- PCA(t(fouling[,-(1:2)]), scale.unit = TRUE, graph = FALSE)
fviz_pca_ind(pca.data, col.ind = "cos2",
gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"),
repel = TRUE)
library(ggpubr)
a <- fviz_pca_ind(pca.data, col.ind = "cos2",
gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"),
repel = TRUE)
ggpar(a,
title = "Principal Component Analysis",
xlab = "PC1", ylab = "PC2",
legend.title = "Cos2", legend.position = "top",
ggtheme = theme_minimal())
pca.data <- PCA(fouling[,-(1:2)], scale.unit = TRUE,ncp = 2, graph = FALSE)
fouling$Location <- as.factor(fouling$Location)
library(RColorBrewer)
nb.cols <- 3
mycolors <- colorRampPalette(brewer.pal(3, "Set1"))(nb.cols)
a <- fviz_pca_ind(pca.data, col.ind = fouling$Location,
palette = mycolors, addEllipses = TRUE)
ggpar(a,
title = "Principal Component Analysis",
xlab = "PC1", ylab = "PC2",
legend.title = "Location", legend.position = "top",
ggtheme = theme_minimal())
Average Ciona abundance is just slightly higher in Gloucester than Beverly.
library(ggplot2)
mean_Ciona <- aggregate(fouling$Ciona, by = list(fouling$Location), mean)
sd_Ciona <- aggregate(fouling$Ciona, by = list(fouling$Location), sd)
total_Ciona <- merge(mean_Ciona, sd_Ciona, by = "Group.1")
colnames(total_Ciona) <- c("Location", "Mean", "SD")
ggplot(total_Ciona) +
geom_bar( aes(x=Location, y=Mean, fill=Location), stat="identity", alpha=0.5) + scale_fill_manual(values = c("red", "green")) + theme_bw() +
geom_errorbar( aes(x=Location, ymin=Mean-SD, ymax=Mean+SD), width=0.1, colour="black", alpha=0.9, size=1.5) +
labs( x = "Location", y = "Mean")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Based on the scatter plot I would say there is no clear relationship between Ciona and Ascidiella abundance. There are data points all over the graph and the linear line is horizontal suggesting little correlation between the two. The CI on either side of the trend line indicates variability in Ascidiella values with higher amounts of Ciona.
BONUS: Ciona and Ascidiella are both scientific names and therefore should be written in italics! You will need to look up how to do this, but if you can create the plot above and have the axes labels show the italicized names you can earn 3 bonus points!
library(ggplot2)
ggplot(fouling, aes(x=Ciona, y=Ascidiella)) +
geom_point(color = "skyblue", shape = 15, size = 2 ) + labs( x = expression(italic("Ciona")), y = expression(italic("Ascidiella"))) +
geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE)
## `geom_smooth()` using formula = 'y ~ x'
Add a column to the fouling.csv file, this column should represent the number of months a plate has been in the water. Remember, the UniqueID column contains information about number of months - use this to determine the value needed for the new column (should be a 1, 2, 6, or 12). You can add this column in R or using sheets/excel (whatever is easiest for you)
With the column added, make sure the updated dataset is back in R.
Code: convert the new column to a factor
Code: run a two-way ANOVA that looks at how the interaction of the new column AND Location influence the abundance of Ciona.
Question: what does your output tell you about Ciona abundance? The output tells us that months has a p-value of 0.0531 and location has a p-value of 0.9990. Therefore, separately months and location effect Ciona abundance. But the two variables together have a p-value of 0.0166 meaning that the variables do not significantly interact with each other.
Code: create a visual that helps represent how the new column and Location relate to abundance.
fouling2 <- read.table('fouling2.csv',sep=',',header=T)
fouling2
## Unique.ID Location months D..vexillum Anemone Semibalanus Mytilus Styela
## 1 G1A Gloucester 1 0 0 0 0 0
## 2 G1B Gloucester 1 0 0 0 0 0
## 3 G1C Gloucester 1 0 0 1 0 0
## 4 G1D Gloucester 1 0 0 0 0 0
## 5 B1A Beverly 1 0 0 0 0 0
## 6 B1B Beverly 1 0 0 0 0 0
## 7 B1C Beverly 1 0 0 0 0 0
## 8 B1D Beverly 1 0 0 0 0 0
## 9 G2A Gloucester 2 0 0 0 0 0
## 10 G2B Gloucester 2 0 0 0 0 0
## 11 G2C Gloucester 2 0 0 0 0 0
## 12 G2D Gloucester 2 0 0 0 0 0
## 13 G2E Beverly 2 0 0 0 0 0
## 14 B2A Beverly 2 0 0 14 0 0
## 15 B2B Beverly 2 0 0 7 0 0
## 16 B2C Beverly 2 0 0 9 0 0
## 17 B2D Gloucester 2 0 0 8 0 0
## 18 B2E Gloucester 2 0 0 15 0 0
## 19 G6A Gloucester 6 0 0 0 1 0
## 20 G6B Gloucester 6 0 0 0 0 0
## 21 G6C Beverly 6 0 0 0 0 0
## 22 G6D Beverly 6 0 0 0 0 0
## 23 G6E Beverly 6 0 0 0 0 0
## 24 B6A Beverly 6 0 0 4 0 0
## 25 B6B Gloucester 6 0 0 3 0 0
## 26 B6C Gloucester 6 0 0 6 0 0
## 27 B6D Gloucester 6 0 0 0 0 0
## 28 B6E Gloucester 6 0 0 6 0 0
## 29 G12A Beverly 12 0 0 24 47 0
## 30 G12B Beverly 12 0 0 12 51 0
## 31 G12C Beverly 12 0 0 0 18 0
## 32 G12D Beverly 12 0 0 25 32 0
## 33 G12E Gloucester 12 0 0 32 19 0
## 34 G12F Gloucester 12 0 0 8 28 0
## 35 B12A Gloucester 12 5 2 13 3 1
## 36 B12B Gloucester 12 12 0 25 5 5
## 37 B12C Beverly 12 0 0 0 2 0
## 38 B12D Beverly 12 0 1 10 7 1
## 39 B12E Beverly 12 2 0 12 5 1
## 40 B12F Beverly 12 1 0 8 3 0
## Juv..Mussel Diplosoma Encrusting.bryozoan Oyster Ascidiella Botryllus
## 1 0 2 0 0 19 0
## 2 0 10 0 0 25 0
## 3 0 18 0 0 64 2
## 4 0 7 0 0 13 0
## 5 0 0 1 0 25 0
## 6 0 2 0 0 32 0
## 7 0 0 0 0 11 0
## 8 0 0 0 0 20 0
## 9 0 9 0 0 4 0
## 10 0 16 0 0 13 0
## 11 0 9 0 0 18 0
## 12 0 12 0 0 17 2
## 13 0 10 0 0 23 0
## 14 0 0 2 0 17 0
## 15 0 3 1 2 16 0
## 16 0 0 4 1 18 2
## 17 0 1 2 0 20 0
## 18 0 3 1 0 10 0
## 19 0 0 0 0 9 0
## 20 0 0 0 0 22 0
## 21 0 0 0 0 27 0
## 22 0 0 0 0 12 0
## 23 0 0 0 0 20 0
## 24 0 0 6 1 20 0
## 25 125 3 5 0 12 5
## 26 0 0 9 0 15 7
## 27 0 5 7 0 23 9
## 28 0 4 15 3 12 14
## 29 0 0 0 0 0 0
## 30 0 0 0 0 0 0
## 31 0 0 0 0 0 0
## 32 0 0 0 0 0 30
## 33 0 1 0 0 1 0
## 34 0 0 0 0 0 0
## 35 0 2 1 6 46 2
## 36 0 5 0 11 52 16
## 37 0 2 0 15 48 7
## 38 0 10 0 0 22 0
## 39 0 3 0 0 28 0
## 40 0 0 0 8 0 1
## Eudendrium Limpet Corella Bugula Ribbed.Mussel Ciona Botrylloides
## 1 29 0 0 20 0 40 0
## 2 58 0 0 20 0 53 6
## 3 0 0 0 7 0 53 0
## 4 58 0 0 4 0 38 1
## 5 145 0 0 60 0 10 2
## 6 145 0 0 40 0 12 4
## 7 319 0 0 0 0 0 0
## 8 261 0 0 0 0 0 1
## 9 58 0 0 20 0 69 2
## 10 29 0 0 20 0 70 3
## 11 29 0 0 0 0 65 3
## 12 29 0 0 0 0 61 9
## 13 58 0 0 0 0 63 8
## 14 261 0 0 40 0 2 0
## 15 203 0 0 60 0 3 0
## 16 174 0 0 40 0 6 0
## 17 232 0 0 40 0 5 0
## 18 290 0 0 40 0 1 0
## 19 87 0 0 40 0 90 0
## 20 29 0 0 0 0 115 6
## 21 29 0 0 20 0 126 8
## 22 116 0 0 20 0 158 4
## 23 87 0 0 40 0 116 11
## 24 232 0 0 0 0 0 0
## 25 261 0 0 0 0 4 0
## 26 290 0 0 0 0 0 0
## 27 203 0 0 0 0 2 0
## 28 203 0 0 0 0 6 3
## 29 0 0 4 24 1 30 6
## 30 0 0 0 12 2 12 4
## 31 0 0 11 32 14 30 10
## 32 0 1 10 40 1 53 0
## 33 0 0 20 32 1 42 19
## 34 0 1 0 13 0 41 13
## 35 0 0 2 24 2 2 5
## 36 0 2 2 20 3 2 6
## 37 0 1 1 13 4 6 10
## 38 0 15 3 60 0 8 10
## 39 0 3 0 50 6 4 3
## 40 0 0 0 0 1 6 0
fouling2$months <- as.factor(fouling2$months)
str(fouling2)
## 'data.frame': 40 obs. of 21 variables:
## $ Unique.ID : chr "G1A" "G1B" "G1C" "G1D" ...
## $ Location : chr "Gloucester" "Gloucester" "Gloucester" "Gloucester" ...
## $ months : Factor w/ 4 levels "1","2","6","12": 1 1 1 1 1 1 1 1 2 2 ...
## $ D..vexillum : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Anemone : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Semibalanus : int 0 0 1 0 0 0 0 0 0 0 ...
## $ Mytilus : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Styela : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Juv..Mussel : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diplosoma : int 2 10 18 7 0 2 0 0 9 16 ...
## $ Encrusting.bryozoan: int 0 0 0 0 1 0 0 0 0 0 ...
## $ Oyster : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Ascidiella : int 19 25 64 13 25 32 11 20 4 13 ...
## $ Botryllus : int 0 0 2 0 0 0 0 0 0 0 ...
## $ Eudendrium : int 29 58 0 58 145 145 319 261 58 29 ...
## $ Limpet : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Corella : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Bugula : int 20 20 7 4 60 40 0 0 20 20 ...
## $ Ribbed.Mussel : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Ciona : int 40 53 53 38 10 12 0 0 69 70 ...
## $ Botrylloides : int 0 6 0 1 2 4 0 1 2 3 ...
MLvC_model <- aov(Ciona ~ months * Location, data = fouling2)
summary(MLvC_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## months 3 10637 3546 2.845 0.0531 .
## Location 1 0 0 0.000 0.9990
## months:Location 3 14792 4931 3.956 0.0166 *
## Residuals 32 39882 1246
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggpubr)
library(ggplot2)
fouling_means <- fouling2 %>%
group_by(months, Location) %>%
summary(Ciona_mean = mean(Ciona))
ggplot(fouling2, aes(x = months, y = Ciona, color = Location)) +
geom_line() + geom_point() + facet_wrap(~ Location) + theme_minimal() +
labs(x = "Number of Months", y = "Ciona Abundance")