library(MASS)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
It is ok to see these warnings! R is just telling you that it has read the packages correctly but they were built in different versions of R than the one you are currently using, it will not effect the function of the package!
Before we can begin our analysis, we first need to import and transform the data from long format (each species in each sample on a new line) to wide format (each sample as a row with species as columns). We have done this for you but if you would like to know how to do it yourself some extra code is provided at the end.
data <- read.csv(file = "LL.data.transformed.csv", header = T, stringsAsFactors = F)
head(data)
## Sample.ID Location Method Acari Amphipoda Aranaea Archaeognatha
## 1 A1 Riparian Funnel 2 1 1 0
## 2 A2 Riparian Funnel 0 0 1 0
## 3 A3 Riparian Hand 0 0 1 0
## 4 B1 Riparian Funnel 0 0 1 0
## 5 B2 Riparian Hand 0 0 1 0
## 6 B3 Riparian Funnel 0 0 1 0
## Blattodea Coletoptera Collembola Cycloptera Dermptera Diotocardia
## 1 0 1 3 0 0 1
## 2 0 2 0 0 0 0
## 3 1 0 4 0 0 0
## 4 2 0 0 0 0 0
## 5 1 0 0 0 0 0
## 6 1 1 0 0 0 0
## Diplura Diptera Formicidae Gastropoda Hemiptera Hymenoptera Isopoda
## 1 1 0 0 0 0 1 0
## 2 0 0 2 0 0 0 0
## 3 0 0 1 0 0 0 0
## 4 0 3 2 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 5 0 0 0 0
## Isoptera Ispotera larvae Lepidoptera Nematoda Neuroptera Orthoptera
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 1 0
## Pselaphidae Pseudoscorpion Psocoptera Pseudosorpion Siphonoptera
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 4
## Thysanoptera Thysanura Trichoptera
## 1 0 0 1
## 2 0 0 0
## 3 0 0 0
## 4 1 0 0
## 5 0 0 0
## 6 1 0 0
We can now calculate the species richness and abundance for each sample and save into a new data frame called data.summary
data.summary <- data %>%
mutate(Abundance = rowSums(.[4:35] ), Richness = rowSums(.[4:35]>0))%>%
select(Sample.ID, Location, Method, Abundance, Richness)
head(data.summary)
## Sample.ID Location Method Abundance Richness
## 1 A1 Riparian Funnel 12 9
## 2 A2 Riparian Funnel 5 3
## 3 A3 Riparian Hand 7 4
## 4 B1 Riparian Funnel 9 5
## 5 B2 Riparian Hand 2 2
## 6 B3 Riparian Funnel 14 7
To be able to calcuate species diversity and evenness we need a dataframe that does no include the extra information (site, method etc) that is in the current dataset.
data.simple <- data[, 4:35] #this tells R that we want to make a new data frame using all rows but only the information from columns 4 to 35 which contain the species data
Now that we have this we can use a similar method above to calculate H (shannon diversity) and J (evenness) Lickily for us, in the vegan package there is a function {diversity} which calculates shannon diversity without having to go through all the steps
data.summary <- data.summary%>%
mutate(H = diversity(data.simple, index = "shannon", MARGIN = 1, base = exp(1)), J = (H/log(Richness)))%>%
select(Sample.ID, Location, Method, Abundance, Richness, H, J)
head(data.summary)
## Sample.ID Location Method Abundance Richness H J
## 1 A1 Riparian Funnel 12 9 2.0947290 0.9533523
## 2 A2 Riparian Funnel 5 3 1.0549202 0.9602297
## 3 A3 Riparian Hand 7 4 1.1537419 0.8322489
## 4 B1 Riparian Funnel 9 5 1.5229551 0.9462652
## 5 B2 Riparian Hand 2 2 0.6931472 1.0000000
## 6 B3 Riparian Funnel 14 7 1.6681740 0.8572718
If you compare these values to the ones calculated in the excel spreadsheet you can see they are the same!
To see how effective our sampling methods were we can use species accumulation curves which plots the curve of cumulative species richness versus sampling effort (number of samples). In the vegan package we have a function {specaccum} which calculates this for us. Let’s begin by looking at the total species accumulation over all sites and sampling methods
totAccum <- specaccum(data.simple, method = "exact")
plot(totAccum, xlab="Number of quadrats", ylab = "Number of Species", ci = 0)
From this plot, does it look like we have sufficient sampling to capture all the species present in the leaf litter?
To see how species are accumulated in the different habitats and using different collection methods we need to add subset into the specaccum function. For the report we need a curve for each of;
You can do this by changing out the values in (subset = (data.summary\(Location == "" & data.summary\)Method == “”)). To add all the lines to the same plot you need to include ‘add=TRUE’ into the plot function and you can change the colour of each line. We will also add a legend so we know which line is which.
RidgeFunnel <- specaccum(data.simple, method = "exact", subset = (data.summary$Location == "Ridge" & data.summary$Method == "Funnel"))# selects rows which are from ridge sites and sorted by Funnel
plot(RidgeFunnel,xlab="Number of quadrats", ylab = "Number of Species", ci = 0, col = "blue")# the ci = 0 here tells R not to plot the error bars
RidgeHand<- specaccum(data.simple, method = "exact", subset = (data.summary$Location == "Ridge" & data.summary$Method == "Hand"))
plot(RidgeHand, ci = 0, col = "red", add = TRUE)
legend("bottomright", c("Ridge, Funnel", "Ridge, Hand", "Riparian, Funnel", "Riparian, Hand"), col = c("blue", "red", "yellow", "black"), pch = 15)
What do these plots tell us about the adequacy of our sampling methods for each habitat?
To compare the diversity of the two communities/habitats we will be using t-tests to calculate the significance of the difference between the means of each diversity indicie we calculated, i.e. how does the average species diversity (H) differ between the two habitats. Again we could do this in excel or by hand but the code is provided below.
First we need to subset the whole dataset so that we are only using the data from the Tullgren Funnel sorting method.
summary.funnel <- data.summary%>% filter(Method == "Funnel")
head(summary.funnel)
## Sample.ID Location Method Abundance Richness H J
## 1 A1 Riparian Funnel 12 9 2.094729 0.9533523
## 2 A2 Riparian Funnel 5 3 1.054920 0.9602297
## 3 B1 Riparian Funnel 9 5 1.522955 0.9462652
## 4 B3 Riparian Funnel 14 7 1.668174 0.8572718
## 5 C1 Ridge Funnel 9 7 1.889159 0.9708358
## 6 C2 Ridge Funnel 1 1 0.000000 NaN
We can now calculate the t-tests for each indicie. Make sure to take note of the t-value and the p-value we need these to state our statistical conclusions
rich.test <- t.test(Richness ~ Location, data = summary.funnel)
rich.test
##
## Welch Two Sample t-test
##
## data: Richness by Location
## t = -0.60614, df = 21.924, p-value = 0.5506
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.948082 1.614749
## sample estimates:
## mean in group Ridge mean in group Riparian
## 3.833333 4.500000
abund.test <- t.test(Abundance ~ Location, data = summary.funnel)
abund.test
H.test <- t.test(H~Location, data = summary.funnel)
H.test
J.test <- t.test(J~Location, data = summary.funnel)
J.test
What do these conclusions tell us about the data we collected? Are the communities different?
But first we need to make a data frame with the relevant data
df.summary <- summary.funnel %>%
group_by(Location)%>%
summarise(Abund = mean(Abundance), Abund.sd = sd(Abundance), Rich=mean(Richness), rich.sd = sd(Richness), H1 = mean(H), H.sd = sd(H), J1 = mean(!is.na(J)), J.sd = sd(!is.na(J))) # we have to use !is.na() here because J was calculated using H and some values were divided by zero which resulted in the Na. !is.na() uses all the other values and excludes the Nas
head(df.summary)
## # A tibble: 2 x 9
## Location Abund Abund.sd Rich rich.sd H1 H.sd J1 J.sd
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ridge 6.92 6.16 3.83 2.98 0.996 0.754 0.75 0.452
## 2 Riparian 7.79 5.48 4.5 2.56 1.14 0.742 0.786 0.426
Now we can make our plots using ggplot2
ggplot(df.summary, aes(x=Location, y=Abund, ymin=(Abund-Abund.sd), ymax=(Abund+Abund.sd), fill = Location))+
geom_col()+
geom_errorbar()+
labs(x=NULL, y = "Average Species Abundance per plot")+
theme_classic()
** You can do the same for each other plot required for the report by changing the y, ymin, and ymax values**
LL.data <- read.csv(file = "2051ENV raw data.csv", header = T, stringsAsFactors = F)
head(LL.data)
## Sample.ID Species Abundance
## 1 A1 Acari 2
## 2 C1 Acari 1
## 3 D2 Acari 1
## 4 E1 Acari 1
## 5 E3 Acari 2
## 6 G1 Acari 1
After reading in our data we can see there is many rows with data for the same species but from different samples, we now need to transform it into wide format. To do that we need to create a matrix that is as long as the number of samples we have and as wide as the number of species observed.
taxa <- unique(LL.data$Species) #creates list of each unique species
samples <- sort(unique(LL.data$Sample.ID)) #creates list of each unique site or sample
#make empty matrix "data1" ready to fill in
data1 <- matrix(nrow = length(samples), ncol = length(taxa), dimnames = list(samples,taxa))
for(r in 1:nrow(LL.data)){
samp <- LL.data[r, 1]
tax <- LL.data[r, 2]
data1[samp,tax] <- LL.data[r, 3]
}
data1[is.na(data1)] <- 0 #convert NA's to 0
head(data1)
## Acari Amphipoda Aranaea Archaeognatha Blattodea Coletoptera Collembola
## A1 2 1 1 0 0 1 3
## A2 0 0 1 0 0 2 0
## A3 0 0 1 0 1 0 4
## B1 0 0 1 0 2 0 0
## B2 0 0 1 0 1 0 0
## B3 0 0 1 0 1 1 0
## Cycloptera Dermptera Diotocardia Diplura Diptera Formicidae Gastropoda
## A1 0 0 1 1 0 0 0
## A2 0 0 0 0 0 2 0
## A3 0 0 0 0 0 1 0
## B1 0 0 0 0 3 2 0
## B2 0 0 0 0 0 0 0
## B3 0 0 0 0 0 5 0
## Hemiptera Hymenoptera Isopoda Isoptera Ispotera larvae Lepidoptera
## A1 0 1 0 0 0 0 0
## A2 0 0 0 0 0 0 0
## A3 0 0 0 0 0 0 0
## B1 0 0 0 0 0 0 0
## B2 0 0 0 0 0 0 0
## B3 0 0 0 0 0 0 0
## Nematoda Neuroptera Orthoptera Pselaphidae Pseudoscorpion Psocoptera
## A1 0 0 0 0 0 0
## A2 0 0 0 0 0 0
## A3 0 0 0 0 0 0
## B1 0 0 0 0 0 0
## B2 0 0 0 0 0 0
## B3 0 1 0 0 0 0
## Pseudosorpion Siphonoptera Thysanoptera Thysanura Trichoptera
## A1 0 0 0 0 1
## A2 0 0 0 0 0
## A3 0 0 0 0 0
## B1 0 0 1 0 0
## B2 0 0 0 0 0
## B3 0 4 1 0 0
Now we have a dataset that is in wide format where each sample is its own row and each species is its own column but we need to save it as a data frame to do anyfurther analysis
data <- as.data.frame(data1)