## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-4
communityI <- c(10,1,1,1,1,1,1,1,1,1)
communityII <- c(5,5,5,5,5,5,5,5,5,5)
#Shannon Diversity
#compute relative abundance of each species in sample
Com1pI <- communityI/sum(communityI)
Com1H <- -sum(Com1pI*log(Com1pI))
Com2pI <- communityII/sum(communityI)
Com2H <- -sum(Com2pI*log(Com2pI))
#Simpson Diversity on paper
##Question 1
#Using the Shannon Diversity Index, Community 1 had a diversity index of 1.73 which indicates a lower diversity, while community 2 had a 3.51 diversity index which indicates higher diversity in that community.
#Using the Simpson Diveristy Index, Community 1 had a diversity index of 0.56 which indicates it has a lower diversity than community 2 that had a diversity index of 1.
##Question 2
#Using the limited information and a quick internet search, I was able to write three hypotheses for habitat type regarding this experiment. My first hypothesis (species richess) is that there will be a greater species richness of beetles in woodland habitats than grasses because there is more shelter under bark of trees, under logs and amoung rocks. My second hypothesis (the Shannon Index) is that there will be a larger shannon index diversity in woodlands than grasslands.My third hypothesis (the Simpson Index) is that the relative abundance of beetle will be greater in woodlands because there are more habitat places for beetles to live.
library(reshape)
beetleImport <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\groundBeetleAbundanceSP19.csv")
head(beetleImport) #viewing first six rows of dataset
## species speciesCode abundance habitatCode replicate
## 1 Abax parallelepipedus Aba.par 388 E 1
## 2 Bembidion lampros Bem.lam 1 E 1
## 3 Bembidion mannerheimii Bem.man 2 E 1
## 4 Calathus rotundicollis Cal.rot 13 E 1
## 5 Carabus violaceus Cal.vio 3 E 1
## 6 Leistus fulvibarbis Lei.ful 1 E 1
str(beetleImport) #viewing data types
## 'data.frame': 323 obs. of 5 variables:
## $ species : Factor w/ 48 levels "Abax parallelepipedus",..: 1 15 16 21 22 29 32 35 37 38 ...
## $ speciesCode: Factor w/ 48 levels "Aba.par","Acu.dub",..: 1 15 16 21 22 28 32 35 37 38 ...
## $ abundance : num 388 1 2 13 3 1 59 3 2 2 ...
## $ habitatCode: Factor w/ 3 levels "E","G","W": 1 1 1 1 1 1 1 1 1 1 ...
## $ replicate : int 1 1 1 1 1 1 1 1 1 1 ...
#pasting two columns together
beetleImport$ID <- paste(beetleImport$habitatCode, beetleImport$replicate, sep = "_")
#reshaping long form data into a matrix with cast()
beetle <- cast(beetleImport, ID ~ speciesCode, value= "abundance")
rownames(beetle) <- beetle$ID #name rows with ID
beetle$Id <- NULL #drop ID column
beetle[is.na(beetle)] <- 0 #replace NA with 0
##Estimating alpha diversity with the vegan package
library(vegan)
spBeetle <- specnumber(beetle) #species richness
HBeetle <- diversity(beetle, index="shannon") #Shannon(H')
DBeetle <- diversity(beetle, index="invsimpson") #simpson(1/D)
#making a data frame of the results
diversityResult <- data.frame(spBeetle ,HBeetle, DBeetle)
#importing and merging habitat data
habitat <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\groundBeetleHabitatSP19.csv")
#paste two columns together with an underscore
habitat$ID <- paste(habitat$habitatCode, habitat$replicate, sep= "_")
#create data frame
diversityResult <- data.frame(spBeetle, HBeetle, DBeetle)
#create ID column
diversityResult$ID <- rownames(diversityResult)
#merge the two data frames by ID column
beetleDiversity <- merge(diversityResult, habitat, by=c("ID"))
library(vegan)
spBeetle <- specnumber(beetle) #species richness
HBeetle <- diversity(beetle, index="shannon") #shannon (H')
DBeetle <- diversity(beetle, index="invsimpson") #simpson (1/D)
#making a dataframe of the results
diversityResult <- data.frame(spBeetle, HBeetle, DBeetle)
diversityResult$ID <- rownames(diversityResult) #rownames to column
habitat <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\groundBeetleHabitatSP19.csv")
#paste two columns together with an underscore
habitat$ID <- paste(habitat$habitatCode, habitat$replicate, sep="_")
#merge the two dataframes by ID column (same as before)
beetleDiversity <- merge(diversityResult, habitat, by=c("ID"))
head(beetleDiversity) #first six rows of data
## ID spBeetle HBeetle DBeetle habitatCode replicate vegHgt habitat
## 1 E_1 18 1.267096 2.571619 E 1 5.5 Edge
## 2 E_2 15 1.424505 3.179289 E 2 15.0 Edge
## 3 E_3 16 1.412179 3.106648 E 3 15.0 Edge
## 4 E_4 26 1.737146 3.871034 E 4 3.0 Edge
## 5 E_5 22 1.636923 3.684945 E 5 2.5 Edge
## 6 E_6 18 1.557477 3.448558 E 6 2.0 Edge
#ANOVA for Species Richness
hist(beetleDiversity$spBeetle)

qqnorm(beetleDiversity$spBeetle)
qqline(beetleDiversity$spBeetle)

anova <- aov(beetleDiversity$spBeetle~ habitatCode, data=beetleDiversity)
plot(anova)




## Df Sum Sq Mean Sq F value Pr(>F)
## habitatCode 2 507.4 253.7 23.28 2.52e-05 ***
## Residuals 15 163.5 10.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = beetleDiversity$spBeetle ~ habitatCode, data = beetleDiversity)
##
## $habitatCode
## diff lwr upr p adj
## G-E 6.166667 1.21555 11.117784 0.0144428
## W-E -6.833333 -11.78445 -1.882216 0.0071810
## W-G -13.000000 -17.95112 -8.048883 0.0000164
#The results of the Tukey test provide information that the grasslands have a larger diversity than woodlands. My p value was less than 0.5 so I reject my null hypothesis.
#ANOVA for Simpson
hist(beetleDiversity$HBeetle)

qqnorm(beetleDiversity$HBeetle)
qqline(beetleDiversity$HBeetle)

anova2 <- aov(beetleDiversity$HBeetle~ habitatCode, data=beetleDiversity)
plot(anova2)




## Df Sum Sq Mean Sq F value Pr(>F)
## habitatCode 2 1.2957 0.6479 30.03 5.69e-06 ***
## Residuals 15 0.3236 0.0216
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = beetleDiversity$HBeetle ~ habitatCode, data = beetleDiversity)
##
## $habitatCode
## diff lwr upr p adj
## G-E 0.58908809 0.3688140 0.8093622 0.0000132
## W-E 0.04223582 -0.1780383 0.2625099 0.8733261
## W-G -0.54685227 -0.7671264 -0.3265782 0.0000309
#The Tukey test provides data that the difference betwen woodlands and grass was negative, so from this I can infer that the grasslands had a larger diversity. Again, my pvalue was less than 0.05 so I reject my null hypothesis
#ANOVA for hypothesis Shannon
hist(beetleDiversity$DBeetle)

qqnorm(beetleDiversity$DBeetle)
qqline(beetleDiversity$DBeetle)

anova3 <- aov(beetleDiversity$DBeetle~ habitatCode, data=beetleDiversity)
plot(anova3)




## Df Sum Sq Mean Sq F value Pr(>F)
## habitatCode 2 8.408 4.204 9.509 0.00215 **
## Residuals 15 6.632 0.442
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = beetleDiversity$DBeetle ~ habitatCode, data = beetleDiversity)
##
## $habitatCode
## diff lwr upr p adj
## G-E 1.628969 0.6318303 2.6261083 0.0019247
## W-E 0.479944 -0.5171950 1.4770831 0.4434382
## W-G -1.149025 -2.1461643 -0.1518863 0.0233026
#This Tukey test has a negative difference between woodlands and grasses so I can infer that in this index the grasslands are more diverse than grasslands. I rejected my null hypothesis here as well.
#Linear Model
LM <- lm(beetleDiversity$spBeetle ~ beetleDiversity$vegHgt, data=beetleDiversity)
plot(LM)




##
## Call:
## lm(formula = beetleDiversity$spBeetle ~ beetleDiversity$vegHgt,
## data = beetleDiversity)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9838 -1.4385 0.2056 1.5231 4.8670
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.47574 1.11459 22.857 1.21e-13 ***
## beetleDiversity$vegHgt -0.74596 0.09808 -7.605 1.06e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.014 on 16 degrees of freedom
## Multiple R-squared: 0.7833, Adjusted R-squared: 0.7698
## F-statistic: 57.84 on 1 and 16 DF, p-value: 1.062e-06
#My data generally fit the qqline and was not a funnel shape. My p-value is less than 0.05 so I can confidently reject my null hypothesis. My R squared value was 0.7833, since the value falls close to 1 the data falls within the best fit line. I think that running the linear model for species richness and vegetation height is appropriate because the differing habitats also have different plants, so this way we can see what type of vegetation beetles like most.
#Question 5
##Estimating alpha diversity with the vegan package
communitytrait <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\kembellabCommunityTrait.csv")
metadata <- read.csv("C:\\Users\\Gabriela Krochmal\\Desktop\\ConBio_R\\kembellabPlotMetadata.csv")
library(ggplot2)
#reshaping long form data into a matrix with cast()
plant <- cast(communitytrait, plot ~ species, value= "abundance")
rownames(plant) <- plant$plot #name rows with ID
plant$plot <- NULL #drop ID column
plant[is.na(plant)] <- 0 #replace NA with 0
##Estimating alpha diversity with the vegan package
library(vegan)
spplant <- specnumber(plant) #species richness
Hplant <- diversity(plant, index="shannon") #Shannon(H')
Dplant <- diversity(plant, index="invsimpson") #simpson(1/D)
#making a data frame of the results
diversityResult <- data.frame(spplant ,Hplant, Dplant)
#create ID column
diversityResult$plot <- rownames(diversityResult)
#merge the two data frames by ID column
plantDiversity <- merge(diversityResult, metadata, by=c("plot"))
hist(plantDiversity$spplant)

qqnorm(plantDiversity$spplant)
qqline(plantDiversity$spplant)

plantanova <- aov(plantDiversity$spplant~ habitatCode, data=plantDiversity)
plot(plantanova)




## Df Sum Sq Mean Sq F value Pr(>F)
## habitatCode 1 216.7 216.75 5.701 0.0381 *
## Residuals 10 380.2 38.02
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#My assumptions were met. The data fit within the residuals, and the data points were along the best fit line. My pvalue was less than 0.05 so I reject my null hypothesis.
graph <- ggplot(data=plantDiversity, aes(x=habitat, y=plantDiversity$spplant))
graph +
theme_classic() +
geom_boxplot()
