Rationale
I conducted this study specifically to explore how habitat alteration affects zooplankton fauna using Cladocera as a model community and a small Tropical water reservoir (Pashan lake) as a study system.
Assess the faunistics and species richness of cladocerans in the reservoir prior to and post modification of the habitat.
Checking changes in a few water parameters due to the habitat modification
Look at how the fauna has changed with respect to its functional traits.
Libraries used
Two groups were be used in the analysis, one for 2016 samples which symbolized the samples collected after beautification and 2009-2010 samples which represented the samples collected before the beautification. These years have been selected since the sampling strategy was similar to the 2016 sampling.
# Data file path (It is assumed that the sample data is saved in the working directory and saved as an excel file)
data_path<-"C:/Users/samee/Desktop/R data/sample_datasets/pashan_data.xlsx"
#Pashan pre 2016 data.
pashan_pre_2016<-read_excel(data_path,
sheet=1)
#Pashan 2016 collection data
pashan_2016<-read_excel(data_path,
sheet=5)pashan_pre_2016_final<-pashan_pre_2016%>%
dplyr::select('Species','Family','2009_1':'2010_10')
#Pashan full data
pashan_full_data<-pashan_pre_2016_final%>%
left_join(pashan_2016%>%
dplyr::select(-Family),
by='Species')%>%
replace(., is.na(.), 0)
head(pashan_full_data,5)| Species | Family | 2009_1 | 2009_3 | 2010_1 | 2010_4 | 2010_7 | 2010_10 | 2016_2_1 | 2016_2_2 | 2016_2_3 | 2016_2_4 | 2016_2_5 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| D_excisum | Sididae | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| D_sarsi | Sididae | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| C_cornuta | Daphniidae | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| C_quadrangula | Daphniidae | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| D_lumholtzi | Daphniidae | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
The dataframe was then converted from wide to a long format using both datasets (pre 2016 and 2016 respectivley)
| Species | Family | Years | values |
|---|---|---|---|
| D_excisum | Sididae | 2009_1 | 0 |
| D_sarsi | Sididae | 2009_1 | 0 |
| C_cornuta | Daphniidae | 2009_1 | 0 |
| C_quadrangula | Daphniidae | 2009_1 | 0 |
| D_lumholtzi | Daphniidae | 2009_1 | 0 |
| S_kingi | Daphniidae | 2009_1 | 1 |
For the long dataset, the same two groups were created based on the samples collected viz. pre (2009-2010) and post (2016) samples beautification respectivley
For the long dataset, two groups were be used, one for after beautification and one for before beautification
#1. Generating a index to select the samples collected in the year 2016
index_2016<-str_detect(pashan_long$Years,'2016')
# finding out the number of rows with 2016 index (and pre 2016 rows respectivley)
post_no<-nrow(pashan_long[index_2016,])
pre_no=nrow(pashan_long)-post_no
#2. Updating the Pashan dataset with the new grouping column
pashan_long<-pashan_long%>%
dplyr::mutate(collection_grp=rep(c("early","late"),
times=c(pre_no,
post_no)))
# Re-order the collection period groups for based on collection periods (i.e. pre and post respectivley)
pashan_long$collection_grp<-fct_relevel(pashan_long$collection_grp,"pre")
head(pashan_long,5)| Species | Family | Years | values | collection_grp |
|---|---|---|---|---|
| D_excisum | Sididae | 2009_1 | 0 | early |
| D_sarsi | Sididae | 2009_1 | 0 | early |
| C_cornuta | Daphniidae | 2009_1 | 0 | early |
| C_quadrangula | Daphniidae | 2009_1 | 0 | early |
| D_lumholtzi | Daphniidae | 2009_1 | 0 | early |
pashan_long%>%
ggplot(aes(x=Years,
y=Species)) +
geom_tile(aes(fill = as.factor(values)))+
scale_fill_manual(values = c("black",
"orange")) +
theme_bw(base_size = 11)+
theme(legend.title = element_text(size = 11),
legend.text = element_text(size = 11),
legend.position="top",
axis.title = element_text(size = 11),
axis.text.x = element_text(size = 11,
angle = 90,
hjust = 1),
axis.text.y = element_text(size = 11))+
labs(fill='Species Presence/Absence')The pashan_long data contains multiple entries of the same species based on the samples collected. Hence, the data are modified to have uniqiue species identities with corresponding presence/absence (0/1) based on the collection groups
#Obtaining the summarized data
pashan_sp_data<-pashan_long%>%
group_by(Family,
Species,
collection_grp)%>%
summarise(species_tot=sum(values))%>%
mutate(sp_pre_abs=ifelse(species_tot>0,1,0))
head(pashan_sp_data)| Family | Species | collection_grp | species_tot | sp_pre_abs |
|---|---|---|---|---|
| Bosminidae | B_dietersi | early | 1 | 1 |
| Bosminidae | B_dietersi | late | 0 | 0 |
| Bosminidae | B_longirostris | early | 2 | 1 |
| Bosminidae | B_longirostris | late | 0 | 0 |
| Chydoridae | C_eurynotus | early | 6 | 1 |
| Chydoridae | C_eurynotus | late | 1 | 1 |
#plot
pashan_sp_data%>%
group_by(collection_grp)%>%
summarise(sp_tot=sum(sp_pre_abs))%>%
plot_ly(x = ~collection_grp,
y=~ sp_tot,
type = "bar",
hoverinfo = 'text',
text = ~ paste ("collection_period:",collection_grp,"<br>",
"total_species:",sp_tot))%>%
layout(title = "Total species observed during two periods of collection",
xaxis = list(title = "Collection period"),
yaxis = list(title = "Total species"))pashan_sp_data%>%
ggplot(aes(x=Family,
y=sp_pre_abs))+
geom_col(fill='orange')+
facet_wrap(~collection_grp)+
theme_bw(base_size = 12)+
labs(x="Familes",
y="Total species",
title = "Total species observed in Pashan")+
coord_flip()pashan_species<-pashan_long%>%
mutate_if(is.character,
as.factor)%>%
group_by(Family,
Species,
collection_grp)%>%
summarize(total_occ=sum(values))%>%
arrange(desc(total_occ))
#Re-order the species based on total occurrences
pashan_species$Species<-fct_reorder(pashan_species$Species,
pashan_species$total_occ)
#plot
pashan_species%>%
ggplot(aes(x=Species,
y=total_occ))+
geom_col(fill='orange')+
facet_wrap(~collection_grp)+
theme_bw(base_size = 13)+
labs(x="Familes",
y="Occurrences",
title = "Species observed in Pashan")+
coord_flip()# Obtaining the data for analysis.Since the analysis would require the pre and post levels, pashan_long dataset is used
pashan_nmds_data<-pashan_long%>%
dplyr::select(Species,
Years:collection_grp)%>%
tidyr::spread(Species,
values)%>%
column_to_rownames('Years')
#nMDS
pashan_nmds<-metaMDS(pashan_nmds_data%>%
dplyr::select_if(is.numeric), # selecting the numeric data
distance = 'jaccard', #jaccard is used here in order
k=2,
trymax = 99)## Run 0 stress 0.08300893
## Run 1 stress 0.08300893
## ... Procrustes: rmse 4.786036e-06 max resid 9.283231e-06
## ... Similar to previous best
## Run 2 stress 0.08500196
## Run 3 stress 0.08300893
## ... Procrustes: rmse 2.910096e-06 max resid 5.621881e-06
## ... Similar to previous best
## Run 4 stress 0.08500196
## Run 5 stress 0.09650476
## Run 6 stress 0.08300893
## ... Procrustes: rmse 5.398223e-06 max resid 9.68622e-06
## ... Similar to previous best
## Run 7 stress 0.09650613
## Run 8 stress 0.08751628
## Run 9 stress 0.1233733
## Run 10 stress 0.3275258
## Run 11 stress 0.08262652
## ... New best solution
## ... Procrustes: rmse 0.06079008 max resid 0.1506624
## Run 12 stress 0.08300893
## ... Procrustes: rmse 0.06079064 max resid 0.1486553
## Run 13 stress 0.1225824
## Run 14 stress 0.09650486
## Run 15 stress 0.08500196
## Run 16 stress 0.1143242
## Run 17 stress 0.1216703
## Run 18 stress 0.08262651
## ... New best solution
## ... Procrustes: rmse 1.378107e-05 max resid 2.604741e-05
## ... Similar to previous best
## Run 19 stress 0.1119093
## Run 20 stress 0.08300893
## ... Procrustes: rmse 0.06078592 max resid 0.148649
## *** Solution reached
## [1] 0.02255403 0.01211035 0.02092718 0.02645381 0.03859315 0.03138005
## [7] 0.02301601 0.01635017 0.03186306 0.02174562 0.01657451
# nMDS plot
# to obtain the number of observations of pre and post period respectivley
table(pashan_nmds_data$collection_grp)##
## early late
## 6 5
# Defining the colors based on the levels
color_vector<-c("forestgreen","grey20")
# Defining the symbols based on the levels
symbol_vectors<-c(21,22)
#plot
#1. blank plot
ordiplot(pashan_nmds,
type="n")
#2. sites data
# Plotting the sites data using points function and adding color and symbols based on the groups
#points
points(pashan_nmds,
display = "sites",
col = "black",
pch = symbol_vectors[pashan_nmds_data$collection_grp],
bg = color_vector[pashan_nmds_data$collection_grp],
cex=1.4)
#names
orditorp(pashan_nmds,
display="sites",
cex=1,
air=0.5,
col = color_vector[pashan_nmds_data$collection_grp]
)
# Hulls for the two collection periods
ordihull(
pashan_nmds,
groups=pashan_nmds_data$collection_grp,
display = "sites",
draw = c("polygon"),
col = NULL,
border = color_vector,
lty = c(1, 2),
lwd = 2.5,
label = TRUE
)
#Adding title
title("nMDS Pashan Plantkon")
#4. species data
orditorp(pashan_nmds,
display="species",
cex=0.9,
air=0.01,
col= "black"
)# Before performing PERMANOVA, the data are converted into distances
pashan_perm_dist<-vegdist(subset(pashan_nmds_data,
select=-collection_grp),
method = "jaccard",
binary = T,
na.rm = T)
#This is followed by checking the beta dispersion of the distance data for multivariate spread (dispersion) of data using the pre and post beautification groups within the data
data_betadis<-betadisper(pashan_perm_dist,
pashan_nmds_data$collection_grp)%>%
anova(.)
#results of the beta dispersion
paste0("The P value for beta dispersion is:", data_betadis$`Pr(>F)`[1])## [1] "The P value for beta dispersion is:0.680973256080187"
#Since the p value is more than 0.05, null hypothesis cannot be rejected (which means data have a homogeneous multivariate spread (equivalent to homogeneity of variances))
#PERMANOVA (Using Jaccard index)
data_permanova<-adonis(subset(pashan_nmds_data,
select=-collection_grp) ~ collection_grp,
data = pashan_nmds_data,
permutations = 5000,
method = "jaccard")
#PERMANOVA results
data_permanova$aov.tab| Df | SumsOfSqs | MeanSqs | F.Model | R2 | Pr(>F) | |
|---|---|---|---|---|---|---|
| collection_grp | 1 | 0.7353871 | 0.7353871 | 3.456215 | 0.2774691 | 0.0023995 |
| Residuals | 9 | 1.9149512 | 0.2127724 | NA | 0.7225309 | NA |
| Total | 10 | 2.6503383 | NA | NA | 1.0000000 | NA |
Pashan nMDS data are used
#For overall beta diversity values which consider all the pond samples
fauna_beta_multi<-betapart::betapart.core(pashan_beta_data)%>%
betapart::beta.multi(.,index.family="jaccard")
fauna_beta_multi## $beta.JTU
## [1] 0.7916667
##
## $beta.JNE
## [1] 0.1008065
##
## $beta.JAC
## [1] 0.8924731
#For pairwise beta diversity values which consider faunal dissimilaritybetween 2 samples
fauna_beta_pairs<-betapart::betapart.core(pashan_beta_data)%>%
betapart::beta.pair(.)
fauna_beta_pairs## $beta.sim
## 2009_1 2009_3 2010_1 2010_10 2010_4 2010_7 2016_2_1
## 2009_3 0.4285714
## 2010_1 0.2857143 0.4444444
## 2010_10 0.4285714 0.5000000 0.2222222
## 2010_4 0.4285714 0.3000000 0.2222222 0.4545455
## 2010_7 0.4285714 0.3000000 0.3333333 0.3636364 0.5000000
## 2016_2_1 0.3333333 0.0000000 0.3333333 0.6666667 0.3333333 0.3333333
## 2016_2_2 0.2000000 0.0000000 0.2000000 0.4000000 0.2000000 0.2000000 0.0000000
## 2016_2_3 0.5000000 0.2500000 0.5000000 0.5000000 0.2500000 0.5000000 0.3333333
## 2016_2_4 0.4000000 0.2000000 0.4000000 0.4000000 0.4000000 0.2000000 0.0000000
## 2016_2_5 0.7500000 0.5000000 0.7500000 1.0000000 0.5000000 0.2500000 0.6666667
## 2016_2_2 2016_2_3 2016_2_4
## 2009_3
## 2010_1
## 2010_10
## 2010_4
## 2010_7
## 2016_2_1
## 2016_2_2
## 2016_2_3 0.2500000
## 2016_2_4 0.2000000 0.2500000
## 2016_2_5 0.7500000 0.7500000 0.7500000
##
## $beta.sne
## 2009_1 2009_3 2010_1 2010_10 2010_4 2010_7
## 2009_3 0.10084034
## 2010_1 0.08928571 0.02923977
## 2010_10 0.12698413 0.02380952 0.07777778
## 2010_4 0.19047619 0.11666667 0.16908213 0.06545455
## 2010_7 0.20779221 0.14000000 0.16666667 0.09790210 0.01724138
## 2016_2_1 0.26666667 0.53846154 0.33333333 0.19047619 0.43137255 0.44444444
## 2016_2_2 0.13333333 0.33333333 0.22857143 0.22500000 0.37894737 0.40000000
## 2016_2_3 0.13636364 0.32142857 0.19230769 0.23333333 0.41666667 0.28947368
## 2016_2_4 0.10000000 0.26666667 0.17142857 0.22500000 0.28421053 0.40000000
## 2016_2_5 0.06818182 0.21428571 0.09615385 0.00000000 0.27777778 0.43421053
## 2016_2_1 2016_2_2 2016_2_3 2016_2_4
## 2009_3
## 2010_1
## 2010_10
## 2010_4
## 2010_7
## 2016_2_1
## 2016_2_2 0.25000000
## 2016_2_3 0.09523810 0.08333333
## 2016_2_4 0.25000000 0.00000000 0.08333333
## 2016_2_5 0.04761905 0.02777778 0.00000000 0.02777778
##
## $beta.sor
## 2009_1 2009_3 2010_1 2010_10 2010_4 2010_7 2016_2_1
## 2009_3 0.5294118
## 2010_1 0.3750000 0.4736842
## 2010_10 0.5555556 0.5238095 0.3000000
## 2010_4 0.6190476 0.4166667 0.3913043 0.5200000
## 2010_7 0.6363636 0.4400000 0.5000000 0.4615385 0.5172414
## 2016_2_1 0.6000000 0.5384615 0.6666667 0.8571429 0.7647059 0.7777778
## 2016_2_2 0.3333333 0.3333333 0.4285714 0.6250000 0.5789474 0.6000000 0.2500000
## 2016_2_3 0.6363636 0.5714286 0.6923077 0.7333333 0.6666667 0.7894737 0.4285714
## 2016_2_4 0.5000000 0.4666667 0.5714286 0.6250000 0.6842105 0.6000000 0.2500000
## 2016_2_5 0.8181818 0.7142857 0.8461538 1.0000000 0.7777778 0.6842105 0.7142857
## 2016_2_2 2016_2_3 2016_2_4
## 2009_3
## 2010_1
## 2010_10
## 2010_4
## 2010_7
## 2016_2_1
## 2016_2_2
## 2016_2_3 0.3333333
## 2016_2_4 0.2000000 0.3333333
## 2016_2_5 0.7777778 0.7500000 0.7777778
#Importing data
pashan_env_data<-read_excel(data_path,
sheet=4)%>%
mutate_if(is.character,
as.factor)
# Converting the wide dataset to a long dataframe
pashan_envdata_long<-pashan_env_data%>%
dplyr::rename('salinity'='sal')%>%
gather(env_variables,
values,
pH,
temperature,
salinity)
# Re-order the collection period groups for based on collection periods (i.e. pre and post respectivley)
pashan_envdata_long$collection_grp<-fct_relevel(pashan_envdata_long$collection_grp,"pre")
head(pashan_envdata_long,5)| Years | collection_grp | conductivity | TDS | env_variables | values |
|---|---|---|---|---|---|
| 2010 | pre | 421 | 298 | pH | 9.10 |
| 2010 | pre | 449 | 317 | pH | 8.40 |
| 2010 | pre | 430 | 305 | pH | 8.10 |
| 2010 | pre | 457 | 324 | pH | 7.90 |
| 2016 | post | 792 | 568 | pH | 8.28 |
#Summary
pashan_envdata_summ<-pashan_envdata_long%>%
group_by(env_variables,
collection_grp)%>%
summarise(mean_val=mean(values),
median_val=median(values),
std_dev=sd(values))
pashan_envdata_summ| env_variables | collection_grp | mean_val | median_val | std_dev |
|---|---|---|---|---|
| pH | pre | 8.375 | 8.25 | 0.5251984 |
| pH | post | 8.326 | 8.34 | 0.7119551 |
| salinity | pre | 210.750 | 210.00 | 8.8081402 |
| salinity | post | 426.200 | 432.00 | 64.9669147 |
| temperature | pre | 26.425 | 26.80 | 1.9737865 |
| temperature | post | 26.160 | 27.00 | 2.8201064 |