Tessler and Clark (2016) analysis: 1. Paired t test 2. NMDS of community
library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(ggpubr) #for combining ggplot figures
library(PairedData)
You could also just work with your indic data set if you were continuing code.
note For future analysis, it might be important to have a way to seperate pairs. I added a PairName column. Not important yet but maybe later.
indic <- read.csv("practice3_kdata.csv", header = TRUE)
into long format using gather function from dplyr.
rich_long <- gather(indic, key= "Taxa", value ="richness",
AllRichness)
head(rich_long)
## X PairName location set boulder route grade face_or_top
## 1 1 1C Brick 1 Owl Farm South Street 4- face
## 2 2 1C Brick 1 Owl Farm South Street 4- face
## 3 3 1U Brick 1 Owl Farm South Street 4- face
## 4 4 1U Brick 1 Owl Farm South Street 4- face
## 5 5 2C Brick 2 Backyard Grotesque Old Woman 7 face
## 6 6 2C Brick 2 Backyard Grotesque Old Woman 7 face
## feature feature_size aspect incline sun climbed_or_unclimbed plot_height
## 1 ledge small S vertical 1 climbed 2
## 2 ledge small S vertical 1 climbed 2
## 3 ledge small S vertical 1 unclimbed 2
## 4 ledge small S vertical 1 unclimbed 2
## 5 flake small E overhang 2 climbed 2
## 6 flake small E overhang 2 climbed 2
## AllShannon AllSimpson AllCover LRichness LShannon LSimpson LCover BRichness
## 1 0.6931472 0.5000000 6.00 5 0.8336596 0.4854788 113.10 2
## 2 0.8336596 0.4854788 113.10 5 0.8336596 0.4854788 113.10 2
## 3 0.0000000 0.0000000 0.55 7 0.8711364 0.4797421 112.15 1
## 4 0.8711364 0.4797421 112.15 7 0.8711364 0.4797421 112.15 1
## 5 0.0000000 0.0000000 30.00 3 0.3424462 0.1762646 83.05 1
## 6 0.3424462 0.1762646 83.05 3 0.3424462 0.1762646 83.05 1
## BShannon BSimpson BCover Taxa richness
## 1 0.6931472 0.5 6.00 AllRichness 2
## 2 0.6931472 0.5 6.00 AllRichness 5
## 3 0.0000000 0.0 0.55 AllRichness 1
## 4 0.0000000 0.0 0.55 AllRichness 7
## 5 0.0000000 0.0 30.00 AllRichness 1
## 6 0.0000000 0.0 30.00 AllRichness 3
sum<-
group_by(rich_long, climbed_or_unclimbed) %>%
summarise(
count = n(),
mean = mean(richness, na.rm = TRUE),
sd = sd(richness, na.rm = TRUE)
)
sum
## # A tibble: 2 x 4
## climbed_or_unclimbed count mean sd
## <chr> <int> <dbl> <dbl>
## 1 climbed 7 2.86 1.95
## 2 unclimbed 7 4.14 3.02
Not super pretty, but good way to look at things quickly to get an idea of the data. If you want to make nicer figures, use ggplot.
ggboxplot(rich_long, x = "climbed_or_unclimbed", y = "richness",
color = "climbed_or_unclimbed", palette = c("#00AFBB", "#E7B800"),
order = c("climbed", "unclimbed"),
ylab = "number of species", xlab = "climbing?")
using package PairedData
# Subset climbed data
cl <- subset(rich_long, climbed_or_unclimbed == "climbed", richness,
drop = TRUE)
# subset unclimbed data
uncl <- subset(rich_long, climbed_or_unclimbed == "unclimbed", richness,
drop = TRUE)
# Plot paired data
pd <- paired(cl, uncl)
plot(pd, type = "profile") + theme_bw()
# compute the difference
d <- with(rich_long,
richness[climbed_or_unclimbed == "climbed"] - richness[climbed_or_unclimbed == "unclimbed"])
# Shapiro-Wilk normality test for the differences
shapiro.test(d) # => p-value = 0.4226
##
## Shapiro-Wilk normality test
##
## data: d
## W = 0.91378, p-value = 0.4226
From the output, the p-value is greater than the significance level 0.05 implying that the distribution of the differences (d) are not significantly different from normal distribution. In other words, we can assume the normality. + The null hypothesis of these tests is that “sample distribution is normal”. If the test is significant, the distribution is non-normal. How to interpret: if the p-value > 0.05 implying that the distribution of the data are not significantly different from normal distribution. In other words, we can assume the normality.
with a density plot and Q-Q plot. Q-Q plot draws the correlation between a given sample and the normal distribution. A 45-degree reference line is also plotted. If all the dots fell perfectly on the 45 degree line, the data would be perfectly normally distributed
ggdensity(indic$AllRichness,
main = "Density plot of species richness",
xlab = "Number of species")
#Q-Q plot
ggqqplot(indic$AllRichness)
option one: data saved in two difference numeric vectors
res <- t.test(cl, uncl, paired = TRUE)
res
##
## Paired t-test
##
## data: cl and uncl
## t = -1.7215, df = 6, p-value = 0.136
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.1132530 0.5418244
## sample estimates:
## mean of the differences
## -1.285714
option two: data are saved in one data frame
res <- t.test(richness ~ climbed_or_unclimbed, data = rich_long, paired = TRUE)
res
##
## Paired t-test
##
## data: richness by climbed_or_unclimbed
## t = -1.7215, df = 6, p-value = 0.136
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.1132530 0.5418244
## sample estimates:
## mean of the differences
## -1.285714
Both options should give you the same result, which one you use depends on how your data is set up.
t.test() contains the following output: * Statistic: the value of the t test statistics * parameter: the degrees of freedom for the t test statistics * p.value: the p-value for the test * conf.int: a confidence interval for the mean appropriate to the specified alternative hypothesis. * estimate: the means of the two groups being compared (in the case of independent t test) or difference in means (in the case of paired t test).
# printing the p-value
res$p.value
## [1] 0.1359504
# printing the mean
res$estimate
## mean of the differences
## -1.285714
# printing the confidence interval
res$conf.int
## [1] -3.1132530 0.5418244
## attr(,"conf.level")
## [1] 0.95
Okay so the nmds stuff is tricky. It is weird with just a subset of the data, so we will need to mess around with it for the full data. I have the code as a text here just so you can take a look, but really do not worry about it for now. The main point and big findings of your analysis will be the paired t tests.
kdata = read.csv("practicedata1.csv", header=TRUE)
#group_by command not necessary with following code
#kdata = kdata %>%
# group_by(taxa)
which(colnames(kdata)=="cover")
## [1] 16
kdata[,16][kdata[,16]==6] = 75
kdata[,16][kdata[,16]==5] = 30
kdata[,16][kdata[,16]==4] = 7.5
kdata[,16][kdata[,16]==3] = 3
kdata[,16][kdata[,16]==2] = 0.55
kdata[,16][kdata[,16]==1] = 0.05
kdata[,16][kdata[,16]==0] = 0
lichen = kdata[kdata$taxa=="L",]
moss = kdata[kdata$taxa=="B",]
wide = spread(kdata, vegcode, cover)
lichen_wide = spread(lichen, vegcode, cover)
moss_wide = spread(moss, vegcode, cover)
total_abundance_matrix = wide[,15:27]
total_abundance_matrix[is.na(total_abundance_matrix)] =0
lichen_abundance_matrix = lichen_wide[,15:25]
lichen_abundance_matrix[is.na(lichen_abundance_matrix)] =0
moss_abundance_matrix = moss_wide[,15:16]
moss_abundance_matrix[is.na(moss_abundance_matrix)] =0
head(wide)
## location set boulder route grade face_or_top feature
## 1 Brick 1 Owl Farm South Street 4- face ledge
## 2 Brick 1 Owl Farm South Street 4- face ledge
## 3 Brick 1 Owl Farm South Street 4- face ledge
## 4 Brick 1 Owl Farm South Street 4- face ledge
## 5 Brick 2 Backyard Grotesque Old Woman 7 face flake
## 6 Brick 2 Backyard Grotesque Old Woman 7 face flake
## feature_size aspect incline sun climbed_or_unclimbed plot_height taxa 10 13
## 1 small S vertical 1 climbed 2 B NA NA
## 2 small S vertical 1 climbed 2 L 75 30
## 3 small S vertical 1 unclimbed 2 B NA NA
## 4 small S vertical 1 unclimbed 2 L 75 3
## 5 small E overhang 2 climbed 2 B NA NA
## 6 small E overhang 2 climbed 2 L 75 NA
## 15 17 18 19 21 3 5 6 7 Y Z
## 1 NA NA NA NA NA NA NA NA NA 3.00 3
## 2 0.05 NA NA 7.5 NA 0.55 NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA 0.55 NA
## 4 0.05 NA NA 30.0 0.55 NA NA 0.55 3 NA NA
## 5 NA NA NA NA NA NA NA NA NA 30.00 NA
## 6 NA NA NA NA 7.50 NA NA 0.55 NA NA NA
#community data set - only the species
com = wide[,15:27] #community data - all species
com[is.na(com)] =0
m_com = as.matrix(com) #convert com to a matrix
#environmental data set - only abio factors. For now, onyl categorical
which(colnames(wide)=="boulder") #3
which(colnames(wide)=="climbed_or_unclimbed") #12
env = wide[,c(3,12)]
set.seed(123) #so same results with repeated iterations
c_nmds = metaMDS(m_com, distance = "bray")
stressplot(c_nmds)
c_nmds
en = envfit(c_nmds, env, permutations = 999, na.rm = TRUE)
en #only factors so far
plot(c_nmds)
plot(en)
#extract values to plot with ggplot
c.data.scores = as.data.frame(scores(c_nmds))
c.data.scores$boulder = wide$boulder
c.data.scores$climbing = wide$climbed_or_unclimbed
#from tutorial
gg = ggplot(data = c.data.scores, aes(x = NMDS1, y = NMDS2)) +
geom_point(data = c.data.scores, aes(colour = climbing), size = 3, alpha = 0.5) +
scale_colour_manual(values = c("orange", "steelblue")) +
theme(axis.title = element_text(size = 10, face = "bold", colour = "grey30"),
panel.background = element_blank(), panel.border = element_rect(fill = NA, colour = "grey30"),
axis.ticks = element_blank(), axis.text = element_blank(), legend.key = element_blank(),
legend.title = element_text(size = 10, face = "bold", colour = "grey30"),
legend.text = element_text(size = 9, colour = "grey30")) +
labs(colour = "Climbing")
gg
#from old thesis code
#I can make these look nicer
ggplot(c.data.scores, aes(x=NMDS1, y=NMDS2, col=boulder)) +
geom_point(size=2) +
stat_ellipse(size=2) +
theme_bw() +
scale_color_brewer(palette = "Dark2") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.text = element_text(size=16, colour = "black"),
axis.line = element_line(colour = "black"), axis.title = element_text(size=16, colour = "black"),
legend.title = element_text(size = 16, colour = "black"), legend.text = element_text(size=16, colour= "black"))
ggplot(c.data.scores, aes(x=NMDS1, y=NMDS2, col=climbing)) +
geom_point(size=2) +
stat_ellipse(size=2) +
theme_bw() +
scale_color_brewer(palette = "Dark2") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.text = element_text(size=16, colour = "black"),
axis.line = element_line(colour = "black"), axis.title = element_text(size=16, colour = "black"),
legend.title = element_text(size = 16, colour = "black"), legend.text = element_text(size=16, colour= "black"))
ggplot(c.data.scores, aes(x=NMDS1, y=NMDS2, shape=factor(boulder), col=climbing)) +
geom_point(size=2) +
stat_ellipse(size=2) +
theme_bw() +
scale_color_brewer(palette = "Dark2")+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.text = element_text(size=16, colour = "black"),
axis.line = element_line(colour = "black"), axis.title = element_text(size=16, colour = "black"),
legend.title = element_text(size = 16, colour = "black"), legend.text = element_text(size=16, colour= "black"))
#use the same m_com data set as before
#create an environment data set.
env <- wide[,c("climbed_or_unclimbed", "feature", "aspect", "incline", "sun")]
#nmds code
set.seed(123)
e1_nmds = metaMDS(m_com, distance = "bray")
stressplot(e1_nmds)
e1_nmds
en1 = envfit(e1_nmds, env, permutations = 999, na.rm = TRUE)
en1
#extract values to plot with ggplot
e1.data.scores = as.data.frame(scores(e1_nmds))
e1.data.scores$boulder = wide$boulder
en_coord_cont = as.data.frame(scores(en1, "vectors")) * ordiArrowMul(en1)
en_coord_cat = as.data.frame(scores(en1, "factors")) * ordiArrowMul(en1)
#from tutorial
gg = ggplot(data = e1.data.scores, aes(x = NMDS1, y = NMDS2)) +
geom_point(data = e1.data.scores, aes(colour = boulder), size = 3, alpha = 0.5) +
scale_colour_manual(values = c("orange", "steelblue")) +
theme(axis.title = element_text(size = 10, face = "bold", colour = "grey30"),
panel.background = element_blank(), panel.border = element_rect(fill = NA, colour = "grey30"),
axis.ticks = element_blank(), axis.text = element_blank(), legend.key = element_blank(),
legend.title = element_text(size = 10, face = "bold", colour = "grey30"),
legend.text = element_text(size = 9, colour = "grey30")) +
labs(colour = "boulder")
gg
gg = ggplot(data = e1.data.scores, aes(x = NMDS1, y = NMDS2)) +
geom_point(data = e1.data.scores, aes(colour = Site), size = 3, alpha = 0.5) +
scale_colour_manual(values = c("orange", "steelblue")) +
geom_segment(aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
data = en_coord_cont, size =1, alpha = 0.5, colour = "grey30") +
geom_text(data = en_coord_cont, aes(x = NMDS1, y = NMDS2), check_overlap = TRUE,
colour = "grey30", fontface = "bold", label = row.names(en_coord_cont), vjust="inward"
)+
geom_point(data = en_coord_cat, aes(x = NMDS1, y = NMDS2),
shape = "diamond", size = 4, alpha = 0.6, colour = "navy") +
geom_text(data = en_coord_cat, aes(x = NMDS1, y = NMDS2), check_overlap = TRUE,
label = row.names(en_coord_cat), colour = "navy", fontface = "bold") +
theme(axis.title = element_text(size = 10, face = "bold", colour = "grey30"),
panel.background = element_blank(), panel.border = element_rect(fill = NA, colour = "grey30"),
axis.ticks = element_blank(), axis.text = element_blank(), legend.key = element_blank(),
legend.title = element_text(size = 10, face = "bold", colour = "grey30"),
legend.text = element_text(size = 9, colour = "grey30"))+
labs(colour = "Site")
gg