Diversity Statistical Analysis

library(vegan)

## Loading required package: permute

## Loading required package: lattice

## This is vegan 2.5-4

communityI <- c(10,1,1,1,1,1,1,1,1,1)
communityII <- c(5,5,5,5,5,5,5,5,5,5)

#Shannon Diversity
#compute relative abundance of each species in sample
Com1pI <- communityI/sum(communityI)
Com1H <- -sum(Com1pI*log(Com1pI))

Com2pI <- communityII/sum(communityI)
Com2H <- -sum(Com2pI*log(Com2pI))

#Simpson Diversity on paper

##Question 1
#Using the Shannon Diversity Index, Community 1 had a diversity index of 1.73 which indicates a lower diversity, while community 2 had a 3.51 diversity index which indicates higher diversity in that community. 
#Using the Simpson Diveristy Index, Community 1 had a diversity index of 0.56 which indicates it has a lower diversity than community 2 that had a diversity index of 1.

##Question 2
#Using the limited information and a quick internet search, I was able to write three hypotheses for habitat type regarding this experiment. My first hypothesis (species richess) is that there will be a greater species richness of beetles in woodland habitats than grasses because there is more shelter under bark of trees, under logs and amoung rocks. My second hypothesis (the Shannon Index) is that there will be a larger shannon index diversity in woodlands than grasslands.My third hypothesis (the Simpson Index) is that the relative abundance of beetle will be greater in woodlands because there are more habitat places for beetles to live.

library(reshape)
beetleImport <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\groundBeetleAbundanceSP19.csv")
head(beetleImport) #viewing first six rows of dataset

##                  species speciesCode abundance habitatCode replicate
## 1  Abax parallelepipedus     Aba.par       388           E         1
## 2      Bembidion lampros     Bem.lam         1           E         1
## 3 Bembidion mannerheimii     Bem.man         2           E         1
## 4 Calathus rotundicollis     Cal.rot        13           E         1
## 5      Carabus violaceus     Cal.vio         3           E         1
## 6    Leistus fulvibarbis     Lei.ful         1           E         1

str(beetleImport) #viewing data types

## 'data.frame':    323 obs. of  5 variables:
##  $ species    : Factor w/ 48 levels "Abax parallelepipedus",..: 1 15 16 21 22 29 32 35 37 38 ...
##  $ speciesCode: Factor w/ 48 levels "Aba.par","Acu.dub",..: 1 15 16 21 22 28 32 35 37 38 ...
##  $ abundance  : num  388 1 2 13 3 1 59 3 2 2 ...
##  $ habitatCode: Factor w/ 3 levels "E","G","W": 1 1 1 1 1 1 1 1 1 1 ...
##  $ replicate  : int  1 1 1 1 1 1 1 1 1 1 ...

#pasting two columns together
beetleImport$ID <- paste(beetleImport$habitatCode, beetleImport$replicate, sep = "_")

#reshaping long form data into a matrix with cast()
beetle <- cast(beetleImport, ID ~ speciesCode, value= "abundance")

rownames(beetle) <- beetle$ID #name rows with ID
beetle$Id <- NULL #drop ID column

beetle[is.na(beetle)] <- 0 #replace NA with 0

##Estimating alpha diversity with the vegan package
library(vegan)
spBeetle <- specnumber(beetle) #species richness
HBeetle <- diversity(beetle, index="shannon") #Shannon(H')
DBeetle <- diversity(beetle, index="invsimpson") #simpson(1/D)

#making a data frame of the results
diversityResult <- data.frame(spBeetle ,HBeetle, DBeetle)

#importing and merging habitat data
habitat <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\groundBeetleHabitatSP19.csv")

#paste two columns together with an underscore
habitat$ID <- paste(habitat$habitatCode, habitat$replicate, sep= "_")

#create data frame
diversityResult <- data.frame(spBeetle, HBeetle, DBeetle)

#create ID column
diversityResult$ID <- rownames(diversityResult)

#merge the two data frames by ID column
beetleDiversity <- merge(diversityResult, habitat, by=c("ID"))

library(vegan)
spBeetle <- specnumber(beetle) #species richness
HBeetle <- diversity(beetle, index="shannon") #shannon (H')
DBeetle <- diversity(beetle, index="invsimpson") #simpson (1/D)

#making a dataframe of the results
diversityResult <- data.frame(spBeetle, HBeetle, DBeetle)

diversityResult$ID <- rownames(diversityResult) #rownames to column

habitat <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\groundBeetleHabitatSP19.csv")

#paste two columns together with an underscore
habitat$ID <- paste(habitat$habitatCode, habitat$replicate, sep="_")

#merge the two dataframes by ID column (same as before)
beetleDiversity <- merge(diversityResult, habitat, by=c("ID"))

head(beetleDiversity) #first six rows of data

##    ID spBeetle  HBeetle  DBeetle habitatCode replicate vegHgt habitat
## 1 E_1       18 1.267096 2.571619           E         1    5.5    Edge
## 2 E_2       15 1.424505 3.179289           E         2   15.0    Edge
## 3 E_3       16 1.412179 3.106648           E         3   15.0    Edge
## 4 E_4       26 1.737146 3.871034           E         4    3.0    Edge
## 5 E_5       22 1.636923 3.684945           E         5    2.5    Edge
## 6 E_6       18 1.557477 3.448558           E         6    2.0    Edge

#ANOVA for Species Richness

hist(beetleDiversity$spBeetle)

qqnorm(beetleDiversity$spBeetle)
qqline(beetleDiversity$spBeetle)

anova <- aov(beetleDiversity$spBeetle~ habitatCode, data=beetleDiversity)

plot(anova)

summary(anova)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## habitatCode  2  507.4   253.7   23.28 2.52e-05 ***
## Residuals   15  163.5    10.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anova)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = beetleDiversity$spBeetle ~ habitatCode, data = beetleDiversity)
## 
## $habitatCode
##           diff       lwr       upr     p adj
## G-E   6.166667   1.21555 11.117784 0.0144428
## W-E  -6.833333 -11.78445 -1.882216 0.0071810
## W-G -13.000000 -17.95112 -8.048883 0.0000164

#The results of the Tukey test provide information that the grasslands have a larger diversity than woodlands. My p value was less than 0.5 so I reject my null hypothesis.

#ANOVA for Simpson

hist(beetleDiversity$HBeetle)

qqnorm(beetleDiversity$HBeetle)
qqline(beetleDiversity$HBeetle)

anova2 <- aov(beetleDiversity$HBeetle~ habitatCode, data=beetleDiversity)

plot(anova2)

summary(anova2)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## habitatCode  2 1.2957  0.6479   30.03 5.69e-06 ***
## Residuals   15 0.3236  0.0216                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anova2)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = beetleDiversity$HBeetle ~ habitatCode, data = beetleDiversity)
## 
## $habitatCode
##            diff        lwr        upr     p adj
## G-E  0.58908809  0.3688140  0.8093622 0.0000132
## W-E  0.04223582 -0.1780383  0.2625099 0.8733261
## W-G -0.54685227 -0.7671264 -0.3265782 0.0000309

#The Tukey test provides data that the difference betwen woodlands and grass was negative, so from this I can infer that the grasslands had a larger diversity. Again, my pvalue was less than 0.05 so I reject my null hypothesis

#ANOVA for hypothesis Shannon

hist(beetleDiversity$DBeetle)

qqnorm(beetleDiversity$DBeetle)
qqline(beetleDiversity$DBeetle)

anova3 <- aov(beetleDiversity$DBeetle~ habitatCode, data=beetleDiversity)

plot(anova3)

summary(anova3)

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## habitatCode  2  8.408   4.204   9.509 0.00215 **
## Residuals   15  6.632   0.442                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anova3)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = beetleDiversity$DBeetle ~ habitatCode, data = beetleDiversity)
## 
## $habitatCode
##          diff        lwr        upr     p adj
## G-E  1.628969  0.6318303  2.6261083 0.0019247
## W-E  0.479944 -0.5171950  1.4770831 0.4434382
## W-G -1.149025 -2.1461643 -0.1518863 0.0233026

#This Tukey test has a negative difference between woodlands and grasses so I can infer that in this index the grasslands are more diverse than grasslands. I rejected my null hypothesis here as well.

#Linear Model

LM <- lm(beetleDiversity$spBeetle ~ beetleDiversity$vegHgt, data=beetleDiversity)

plot(LM)

summary(LM)

## 
## Call:
## lm(formula = beetleDiversity$spBeetle ~ beetleDiversity$vegHgt, 
##     data = beetleDiversity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9838 -1.4385  0.2056  1.5231  4.8670 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            25.47574    1.11459  22.857 1.21e-13 ***
## beetleDiversity$vegHgt -0.74596    0.09808  -7.605 1.06e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.014 on 16 degrees of freedom
## Multiple R-squared:  0.7833, Adjusted R-squared:  0.7698 
## F-statistic: 57.84 on 1 and 16 DF,  p-value: 1.062e-06

#My data generally fit the qqline and was not a funnel shape. My p-value is less than 0.05 so I can confidently reject my null hypothesis. My R squared value was 0.7833, since the value falls close to 1 the data falls within the best fit line. I think that running the linear model for species richness and vegetation height is appropriate because the differing habitats also have different plants, so this way we can see what type of vegetation beetles like most.

#Question 5
##Estimating alpha diversity with the vegan package

communitytrait <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\kembellabCommunityTrait.csv")
metadata <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\kembellabPlotMetadata.csv")

library(ggplot2) 

#reshaping long form data into a matrix with cast()
plant <- cast(communitytrait, plot ~ species, value= "abundance")

rownames(plant) <- plant$plot #name rows with ID
plant$plot <- NULL #drop ID column

plant[is.na(plant)] <- 0 #replace NA with 0


##Estimating alpha diversity with the vegan package
library(vegan)
spplant <- specnumber(plant) #species richness
Hplant <- diversity(plant, index="shannon") #Shannon(H')
Dplant <- diversity(plant, index="invsimpson") #simpson(1/D)

#making a data frame of the results
diversityResult <- data.frame(spplant ,Hplant, Dplant)

#create ID column
diversityResult$plot <- rownames(diversityResult)

#merge the two data frames by ID column
plantDiversity <- merge(diversityResult, metadata, by=c("plot"))

hist(plantDiversity$spplant)

qqnorm(plantDiversity$spplant)
qqline(plantDiversity$spplant)

plantanova <- aov(plantDiversity$spplant~ habitatCode, data=plantDiversity)

plot(plantanova)

summary(plantanova)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## habitatCode  1  216.7  216.75   5.701 0.0381 *
## Residuals   10  380.2   38.02                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#My assumptions were met. The data fit within the residuals, and the data points were along the best fit line. My pvalue was less than 0.05 so I reject my null hypothesis. 

graph <- ggplot(data=plantDiversity, aes(x=habitat, y=plantDiversity$spplant))

graph +
  theme_classic() +
  geom_boxplot()

Diversity Statistical Analysis

Gabriela Krochmal

March 21st, 2019