Rationale

I conducted this study specifically to explore how habitat alteration affects zooplankton fauna using Cladocera as a model community and a small Tropical water reservoir (Pashan lake) as a study system.

Specific objectives:

  1. Assess the faunistics and species richness of cladocerans in the reservoir prior to and post modification of the habitat.

  2. Checking changes in a few water parameters due to the habitat modification

  3. Look at how the fauna has changed with respect to its functional traits.

Libraries used

Importing the species diversity data

Two groups were be used in the analysis, one for 2016 samples which symbolized the samples collected after beautification and 2009-2010 samples which represented the samples collected before the beautification. These years have been selected since the sampling strategy was similar to the 2016 sampling.

# Data file path (It is assumed that the sample data is saved in the working directory and saved as an excel file)

data_path<-"C:/Users/samee/Desktop/R data/sample_datasets/pashan_data.xlsx"


#Pashan pre 2016 data.

pashan_pre_2016<-read_excel(data_path,
                          sheet=1)


#Pashan 2016 collection data

pashan_2016<-read_excel(data_path,
                        sheet=5)

Final dataset

pashan_pre_2016_final<-pashan_pre_2016%>%
    dplyr::select('Species','Family','2009_1':'2010_10')


#Pashan full data

pashan_full_data<-pashan_pre_2016_final%>%
    left_join(pashan_2016%>%
                  dplyr::select(-Family),
              by='Species')%>%
    replace(., is.na(.), 0)

head(pashan_full_data,5)
Species Family 2009_1 2009_3 2010_1 2010_4 2010_7 2010_10 2016_2_1 2016_2_2 2016_2_3 2016_2_4 2016_2_5
D_excisum Sididae 0 1 0 1 1 0 0 0 0 0 0
D_sarsi Sididae 0 0 0 0 0 1 0 0 0 0 0
C_cornuta Daphniidae 0 1 1 1 1 1 1 1 1 1 0
C_quadrangula Daphniidae 0 0 0 0 0 0 0 0 0 0 0
D_lumholtzi Daphniidae 0 0 0 0 1 0 0 0 0 0 1

Wide to long format

The dataframe was then converted from wide to a long format using both datasets (pre 2016 and 2016 respectivley)

pashan_long<-pashan_full_data%>%
    gather(Years,
           values,
           '2009_1':'2016_2_5')

head(pashan_long)
Species Family Years values
D_excisum Sididae 2009_1 0
D_sarsi Sididae 2009_1 0
C_cornuta Daphniidae 2009_1 0
C_quadrangula Daphniidae 2009_1 0
D_lumholtzi Daphniidae 2009_1 0
S_kingi Daphniidae 2009_1 1

Adding group labels to the long dataset

For the long dataset, the same two groups were created based on the samples collected viz. pre (2009-2010) and post (2016) samples beautification respectivley

For the long dataset, two groups were be used, one for after beautification and one for before beautification

#1. Generating a index to select the samples collected in the year 2016

index_2016<-str_detect(pashan_long$Years,'2016')

# finding out the number of rows with 2016 index (and pre 2016 rows respectivley)

post_no<-nrow(pashan_long[index_2016,])

pre_no=nrow(pashan_long)-post_no


#2. Updating the Pashan dataset with the new grouping column

pashan_long<-pashan_long%>%
    dplyr::mutate(collection_grp=rep(c("early","late"),
                                     times=c(pre_no,
                                             post_no)))


# Re-order the collection period groups for based on collection periods (i.e. pre and post respectivley)

pashan_long$collection_grp<-fct_relevel(pashan_long$collection_grp,"pre")

head(pashan_long,5)
Species Family Years values collection_grp
D_excisum Sididae 2009_1 0 early
D_sarsi Sididae 2009_1 0 early
C_cornuta Daphniidae 2009_1 0 early
C_quadrangula Daphniidae 2009_1 0 early
D_lumholtzi Daphniidae 2009_1 0 early

Pashan data exploration and visualization

Visualizing the species found in Pashan in all samples (using the long dataset)using a heatmap which shows species presence/absence for all the samples.

pashan_long%>%
    ggplot(aes(x=Years,
               y=Species)) + 
    geom_tile(aes(fill = as.factor(values)))+
    scale_fill_manual(values = c("black", 
                                 "orange")) + 
    theme_bw(base_size = 11)+
    theme(legend.title = element_text(size = 11), 
          legend.text = element_text(size = 11),
          legend.position="top",
          axis.title = element_text(size = 11), 
          axis.text.x = element_text(size = 11, 
                                     angle = 90, 
                                     hjust = 1), 
          axis.text.y = element_text(size = 11))+
    labs(fill='Species Presence/Absence')

Total species in the pre and post beautification collections

The pashan_long data contains multiple entries of the same species based on the samples collected. Hence, the data are modified to have uniqiue species identities with corresponding presence/absence (0/1) based on the collection groups

#Obtaining the summarized data

pashan_sp_data<-pashan_long%>%
    group_by(Family,
             Species,
             collection_grp)%>%
    summarise(species_tot=sum(values))%>%
    mutate(sp_pre_abs=ifelse(species_tot>0,1,0))

head(pashan_sp_data)
Family Species collection_grp species_tot sp_pre_abs
Bosminidae B_dietersi early 1 1
Bosminidae B_dietersi late 0 0
Bosminidae B_longirostris early 2 1
Bosminidae B_longirostris late 0 0
Chydoridae C_eurynotus early 6 1
Chydoridae C_eurynotus late 1 1
#plot 

pashan_sp_data%>%
    group_by(collection_grp)%>%
    summarise(sp_tot=sum(sp_pre_abs))%>%
    plot_ly(x = ~collection_grp,
            y=~ sp_tot,
            type = "bar",
            hoverinfo = 'text',
            text = ~ paste ("collection_period:",collection_grp,"<br>",
                            "total_species:",sp_tot))%>%
    layout(title = "Total species observed during two periods of collection",
           xaxis = list(title = "Collection period"),
           yaxis = list(title = "Total species"))

Family wise distribution of species in pre and post beautification periods

pashan_sp_data%>%
    ggplot(aes(x=Family,
               y=sp_pre_abs))+
    geom_col(fill='orange')+
    facet_wrap(~collection_grp)+
    theme_bw(base_size = 12)+
    labs(x="Familes",
         y="Total species",
         title = "Total species observed in Pashan")+
    coord_flip()

Individual species occurrences (counts) in the pre and post collections

pashan_species<-pashan_long%>%
    mutate_if(is.character,
              as.factor)%>%
    group_by(Family,
             Species,
             collection_grp)%>%
    summarize(total_occ=sum(values))%>%
    arrange(desc(total_occ))

#Re-order the species based on total occurrences

pashan_species$Species<-fct_reorder(pashan_species$Species,
                                    pashan_species$total_occ)

#plot

pashan_species%>%
        ggplot(aes(x=Species,
               y=total_occ))+
    geom_col(fill='orange')+
    facet_wrap(~collection_grp)+
    theme_bw(base_size = 13)+
    labs(x="Familes",
         y="Occurrences",
         title = "Species observed in Pashan")+
    coord_flip()

Non metric dimensional scaling to assess the species association of the pre and post collection periods of Pashan

# Obtaining the data for analysis.Since the analysis would require the pre and post levels, pashan_long dataset is used

pashan_nmds_data<-pashan_long%>%
    dplyr::select(Species,
                  Years:collection_grp)%>%
    tidyr::spread(Species,
                  values)%>%
    column_to_rownames('Years')


#nMDS 

pashan_nmds<-metaMDS(pashan_nmds_data%>%
                         dplyr::select_if(is.numeric), # selecting the numeric data
                     distance = 'jaccard', #jaccard is used here in order 
                     k=2,
                     trymax = 99)
## Run 0 stress 0.08300893 
## Run 1 stress 0.08300893 
## ... Procrustes: rmse 4.786036e-06  max resid 9.283231e-06 
## ... Similar to previous best
## Run 2 stress 0.08500196 
## Run 3 stress 0.08300893 
## ... Procrustes: rmse 2.910096e-06  max resid 5.621881e-06 
## ... Similar to previous best
## Run 4 stress 0.08500196 
## Run 5 stress 0.09650476 
## Run 6 stress 0.08300893 
## ... Procrustes: rmse 5.398223e-06  max resid 9.68622e-06 
## ... Similar to previous best
## Run 7 stress 0.09650613 
## Run 8 stress 0.08751628 
## Run 9 stress 0.1233733 
## Run 10 stress 0.3275258 
## Run 11 stress 0.08262652 
## ... New best solution
## ... Procrustes: rmse 0.06079008  max resid 0.1506624 
## Run 12 stress 0.08300893 
## ... Procrustes: rmse 0.06079064  max resid 0.1486553 
## Run 13 stress 0.1225824 
## Run 14 stress 0.09650486 
## Run 15 stress 0.08500196 
## Run 16 stress 0.1143242 
## Run 17 stress 0.1216703 
## Run 18 stress 0.08262651 
## ... New best solution
## ... Procrustes: rmse 1.378107e-05  max resid 2.604741e-05 
## ... Similar to previous best
## Run 19 stress 0.1119093 
## Run 20 stress 0.08300893 
## ... Procrustes: rmse 0.06078592  max resid 0.148649 
## *** Solution reached
#Stressplot of the nMDS

stressplot(pashan_nmds)

#goodness of fit for both of the collection periods

goodness(pashan_nmds)
##  [1] 0.02255403 0.01211035 0.02092718 0.02645381 0.03859315 0.03138005
##  [7] 0.02301601 0.01635017 0.03186306 0.02174562 0.01657451
# nMDS plot

# to obtain the number of observations of pre and post period respectivley

table(pashan_nmds_data$collection_grp)
## 
## early  late 
##     6     5
# Defining the colors based on the levels

color_vector<-c("forestgreen","grey20")

# Defining the symbols based on the levels

symbol_vectors<-c(21,22)


#plot

#1. blank plot

ordiplot(pashan_nmds,
         type="n")

#2. sites data

# Plotting the sites data using points function and adding color and symbols based on the groups

#points

points(pashan_nmds,
       display = "sites",
       col = "black",
       pch = symbol_vectors[pashan_nmds_data$collection_grp],
       bg = color_vector[pashan_nmds_data$collection_grp],
       cex=1.4)

#names

orditorp(pashan_nmds,
         display="sites",
         cex=1,
         air=0.5,
         col = color_vector[pashan_nmds_data$collection_grp]
)

# Hulls for the two collection periods

ordihull(
    pashan_nmds,
    groups=pashan_nmds_data$collection_grp,
    display = "sites",
    draw = c("polygon"),
    col = NULL,
    border = color_vector,
    lty = c(1, 2),
    lwd = 2.5,
    label = TRUE
)

#Adding title

title("nMDS Pashan Plantkon")

#4. species data

orditorp(pashan_nmds,
         display="species",
         cex=0.9,
         air=0.01,
         col= "black"
)

PERMANOVA to check significance in the species communities in the pre and post beautificationp periods

# Before performing PERMANOVA, the data are converted into distances

pashan_perm_dist<-vegdist(subset(pashan_nmds_data,
                                 select=-collection_grp), 
                          method = "jaccard",
                          binary = T,
                          na.rm = T)


#This is followed by checking the beta dispersion of the distance data for multivariate spread (dispersion) of data using the pre and post beautification groups within the data

data_betadis<-betadisper(pashan_perm_dist, 
                         pashan_nmds_data$collection_grp)%>%
    anova(.)


#results of the beta dispersion

paste0("The P value for beta dispersion is:", data_betadis$`Pr(>F)`[1])
## [1] "The P value for beta dispersion is:0.680973256080187"
#Since the p value is more than 0.05, null hypothesis cannot be rejected (which means data have a homogeneous multivariate spread (equivalent to homogeneity of variances))


#PERMANOVA (Using Jaccard index)

data_permanova<-adonis(subset(pashan_nmds_data,
                              select=-collection_grp) ~ collection_grp,
                       data = pashan_nmds_data, 
                       permutations = 5000, 
                       method = "jaccard")

#PERMANOVA results

data_permanova$aov.tab
Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
collection_grp 1 0.7353871 0.7353871 3.456215 0.2774691 0.0023995
Residuals 9 1.9149512 0.2127724 NA 0.7225309 NA
Total 10 2.6503383 NA NA 1.0000000 NA

Beta Diversity

Modifying data for beta diversity analyses.

Pashan nMDS data are used

require(betapart)

pashan_beta_data<-pashan_nmds_data%>%
    dplyr::select(-collection_grp)%>%
    dplyr::select(which(!colSums(., 
                          na.rm=TRUE) == 0))

Calculating the beta diversity (and the Beta sim and Beta nes partitions)

#For overall beta diversity values which consider all the pond samples 
fauna_beta_multi<-betapart::betapart.core(pashan_beta_data)%>%
    betapart::beta.multi(.,index.family="jaccard")

fauna_beta_multi
## $beta.JTU
## [1] 0.7916667
## 
## $beta.JNE
## [1] 0.1008065
## 
## $beta.JAC
## [1] 0.8924731
#For pairwise beta diversity values which consider faunal dissimilaritybetween 2 samples 
fauna_beta_pairs<-betapart::betapart.core(pashan_beta_data)%>%
    betapart::beta.pair(.)

fauna_beta_pairs
## $beta.sim
##             2009_1    2009_3    2010_1   2010_10    2010_4    2010_7  2016_2_1
## 2009_3   0.4285714                                                            
## 2010_1   0.2857143 0.4444444                                                  
## 2010_10  0.4285714 0.5000000 0.2222222                                        
## 2010_4   0.4285714 0.3000000 0.2222222 0.4545455                              
## 2010_7   0.4285714 0.3000000 0.3333333 0.3636364 0.5000000                    
## 2016_2_1 0.3333333 0.0000000 0.3333333 0.6666667 0.3333333 0.3333333          
## 2016_2_2 0.2000000 0.0000000 0.2000000 0.4000000 0.2000000 0.2000000 0.0000000
## 2016_2_3 0.5000000 0.2500000 0.5000000 0.5000000 0.2500000 0.5000000 0.3333333
## 2016_2_4 0.4000000 0.2000000 0.4000000 0.4000000 0.4000000 0.2000000 0.0000000
## 2016_2_5 0.7500000 0.5000000 0.7500000 1.0000000 0.5000000 0.2500000 0.6666667
##           2016_2_2  2016_2_3  2016_2_4
## 2009_3                                
## 2010_1                                
## 2010_10                               
## 2010_4                                
## 2010_7                                
## 2016_2_1                              
## 2016_2_2                              
## 2016_2_3 0.2500000                    
## 2016_2_4 0.2000000 0.2500000          
## 2016_2_5 0.7500000 0.7500000 0.7500000
## 
## $beta.sne
##              2009_1     2009_3     2010_1    2010_10     2010_4     2010_7
## 2009_3   0.10084034                                                       
## 2010_1   0.08928571 0.02923977                                            
## 2010_10  0.12698413 0.02380952 0.07777778                                 
## 2010_4   0.19047619 0.11666667 0.16908213 0.06545455                      
## 2010_7   0.20779221 0.14000000 0.16666667 0.09790210 0.01724138           
## 2016_2_1 0.26666667 0.53846154 0.33333333 0.19047619 0.43137255 0.44444444
## 2016_2_2 0.13333333 0.33333333 0.22857143 0.22500000 0.37894737 0.40000000
## 2016_2_3 0.13636364 0.32142857 0.19230769 0.23333333 0.41666667 0.28947368
## 2016_2_4 0.10000000 0.26666667 0.17142857 0.22500000 0.28421053 0.40000000
## 2016_2_5 0.06818182 0.21428571 0.09615385 0.00000000 0.27777778 0.43421053
##            2016_2_1   2016_2_2   2016_2_3   2016_2_4
## 2009_3                                              
## 2010_1                                              
## 2010_10                                             
## 2010_4                                              
## 2010_7                                              
## 2016_2_1                                            
## 2016_2_2 0.25000000                                 
## 2016_2_3 0.09523810 0.08333333                      
## 2016_2_4 0.25000000 0.00000000 0.08333333           
## 2016_2_5 0.04761905 0.02777778 0.00000000 0.02777778
## 
## $beta.sor
##             2009_1    2009_3    2010_1   2010_10    2010_4    2010_7  2016_2_1
## 2009_3   0.5294118                                                            
## 2010_1   0.3750000 0.4736842                                                  
## 2010_10  0.5555556 0.5238095 0.3000000                                        
## 2010_4   0.6190476 0.4166667 0.3913043 0.5200000                              
## 2010_7   0.6363636 0.4400000 0.5000000 0.4615385 0.5172414                    
## 2016_2_1 0.6000000 0.5384615 0.6666667 0.8571429 0.7647059 0.7777778          
## 2016_2_2 0.3333333 0.3333333 0.4285714 0.6250000 0.5789474 0.6000000 0.2500000
## 2016_2_3 0.6363636 0.5714286 0.6923077 0.7333333 0.6666667 0.7894737 0.4285714
## 2016_2_4 0.5000000 0.4666667 0.5714286 0.6250000 0.6842105 0.6000000 0.2500000
## 2016_2_5 0.8181818 0.7142857 0.8461538 1.0000000 0.7777778 0.6842105 0.7142857
##           2016_2_2  2016_2_3  2016_2_4
## 2009_3                                
## 2010_1                                
## 2010_10                               
## 2010_4                                
## 2010_7                                
## 2016_2_1                              
## 2016_2_2                              
## 2016_2_3 0.3333333                    
## 2016_2_4 0.2000000 0.3333333          
## 2016_2_5 0.7777778 0.7500000 0.7777778

Environmental data summary for the two collection periods

#Importing data

pashan_env_data<-read_excel(data_path,
                            sheet=4)%>%
    mutate_if(is.character,
              as.factor)

# Converting the wide dataset to a long dataframe

pashan_envdata_long<-pashan_env_data%>%
    dplyr::rename('salinity'='sal')%>%
    gather(env_variables,
           values,
           pH,
           temperature,
           salinity)

# Re-order the collection period groups for based on collection periods (i.e. pre and post respectivley)

pashan_envdata_long$collection_grp<-fct_relevel(pashan_envdata_long$collection_grp,"pre")

head(pashan_envdata_long,5)
Years collection_grp conductivity TDS env_variables values
2010 pre 421 298 pH 9.10
2010 pre 449 317 pH 8.40
2010 pre 430 305 pH 8.10
2010 pre 457 324 pH 7.90
2016 post 792 568 pH 8.28

Summarizing the environmental data

#Summary

pashan_envdata_summ<-pashan_envdata_long%>%
    group_by(env_variables,
             collection_grp)%>%
    summarise(mean_val=mean(values),
              median_val=median(values),
              std_dev=sd(values))

pashan_envdata_summ
env_variables collection_grp mean_val median_val std_dev
pH pre 8.375 8.25 0.5251984
pH post 8.326 8.34 0.7119551
salinity pre 210.750 210.00 8.8081402
salinity post 426.200 432.00 64.9669147
temperature pre 26.425 26.80 1.9737865
temperature post 26.160 27.00 2.8201064

Plots for visualizing the environmental data

pashan_envdata_long%>%
    ggplot(aes(collection_grp,
               values))+
    geom_boxplot(fill='orange')+
    facet_wrap(~env_variables,
               scales = 'free')+
    theme_bw(base_size = 18)

WIP