First steps

Load packages and data

require(readr)
sikkimdata <- read_csv("~/Desktop/sikkimdata.csv")

require(dplyr)
require(ggplot2)
require(tidyr)
require(EcoSimR)

Test out the methods and plot out what the “niches” are

#break the DOY data into categorical "periods"
cuts<-seq(115,400,15)

#convert % into species-proportional and plot to make sure it looks ok
sikkimdata %>%
  filter(stage=="3") %>% #first let's just look at flowering, stage 3
  mutate(period=cut(DOY,cuts,labels=1:19)) %>%
  group_by(species) %>%
    mutate(abundance=`% flower or fruit`/sum(`% flower or fruit`)) %>%
  ggplot(aes(species,abundance)) +
    geom_point(pch=1,aes(size=abundance,colour=species)) +
    geom_text(aes(species,abundance+.2,label=substr(species,3,4),color=species)) +
    facet_grid(elevation~period) +
    theme_bw() +
    scale_x_discrete("",breaks=c()) +
    scale_y_continuous("",breaks=c())

One issue I see is that I’d like to ignore the cases when “start” and “peak” are both on the same date. I’ll do this by grouping by species/elevation/DOY to find unique values

sikkimdata %>% filter(stage=="3") %>% dim()
## [1] 102   8
#the full data has 102 records total

sikkimdata %>% filter(stage=="3") %>%
   group_by(species,DOY,elevation) %>%
   summarize(abundance=(max(`% flower or fruit`))) %>%
   dim() 
## [1] 96  4
#okay our data now correctly has 96 records total

#so I'll include that in the dpylr block

sikkimdata %>% filter(stage=="3") %>%
  group_by(species,DOY,elevation) %>%
  summarize(abundance=(max(`% flower or fruit`))) %>%
  mutate(period=cut(DOY,cuts,labels=1:19)) %>%
  ungroup() %>%
  group_by(species,elevation) %>%
  mutate(abundancep=`abundance`/sum(`abundance`)) %>%
  filter(abundance>0) %>%
  ungroup() %>%
  group_by(species,elevation,period) %>%
  summarize(niche=paste(elevation,period),abundancep=unique(abundancep)) ->nicheddata

  nicheddata %>% data.frame() %>% dplyr::select(species,abundancep,niche) %>%
  spread(niche,abundancep,fill=0) -> nicheddata
  nicheddata[,-1]->nicheddata

Define a new function to normalize and get Czekanowski overlaps

nicheoverlaps<-function(matrix){
  
  #define overlap function
  czoverlap<-function(normalized_vector1,normalized_vector2){
    1-.5*sum((abs(normalized_vector1-normalized_vector2)))}
  
  #normalize matrix
  normalized<-sweep(matrix, 1, rowSums(matrix), FUN="/")
  
  #compute pairwise overlaps
  length<-dim(normalized)[1]
  combs<-combn(1:length,2)
  overlaps<-c()
  for(each_comb in 1:dim(combs)[2]){
    i<-combs[,each_comb][1]
    j<-combs[,each_comb][2]
    each_overlap<-czoverlap(normalized[i,],normalized[j,])
    overlaps<-c(overlaps,each_overlap)
  }
  
  #report the mean
  mean(overlaps)
}

#randomize the matrix
nichepermute<-function(matrix,iterations){
  meanoverlaps<-c()
  for(i in 1:iterations){
    meanoverlap<-nicheoverlaps(t(apply(matrix,1,sample))) #so this is randomizing WITHIN species
    meanoverlaps<-c(meanoverlaps,meanoverlap)
  }
  meanoverlaps
}

Apply the methods

actual<-nicheoverlaps(nicheddata)
permuted<-nichepermute(nicheddata,10000)

Do a basic plot

qplot(permuted,binwidth=0.001)+geom_segment(aes(x=mean(permuted),xend=mean(permuted),y=0,yend=200),colour='white')+geom_segment(aes(x=actual,xend=actual,y=0,yend=200),colour='red')

A nicer looking plot, with confidence intervals:

actualseason<-nicheoverlaps(nicheddata)*100 #this doesn't change b/t year
permutedseason<-as.data.frame(overlaps<-nichepermute(nicheddata,1000)*100)
confints<-quantile(permutedseason$overlaps,probs=c(.05,.95))

ggplot(permutedseason,aes(x=overlaps))+
  theme_bw()+
  geom_histogram(colour="black",fill="white",binwidth=1)+
  #geom_segment(aes(x=confints[1],xend=confints[1],y=0,yend=250))+
  #geom_segment(aes(x=confints[2],xend=confints[2],y=0,yend=250))+
  geom_segment(aes(x=actualseason,xend=actualseason,y=0,yend=250),colour='red')+
  scale_x_continuous("overlap (%)")+
  ggtitle("Observed (red) and simulated niche (white) niche overlap Sikkim data")

Using EcoSimR gives similar results

# run null model with czekanowski index and ra3, 5000 replications
myRandomModel <- niche_null_model(speciesData=nicheddata, 
                                  algo="ra3", metric="czekanowski", 
                                  suppressProg=TRUE,nReps=5000)

# print summary of model and plot histogram
summary(myRandomModel)
## Time Stamp:  Tue Feb  6 20:28:23 2018 
## Reproducible:  FALSE 
## Number of Replications:  5000 
## Elapsed Time:  5.6 secs 
## Metric:  czekanowski 
## Algorithm:  ra3 
## Observed Index:  0.072997 
## Mean Of Simulated Index:  0.10453 
## Variance Of Simulated Index:  0.00023889 
## Lower 95% (1-tail):  0.081871 
## Upper 95% (1-tail):  0.13183 
## Lower 95% (2-tail):  0.078289 
## Upper 95% (2-tail):  0.13908 
## Lower-tail P =  0.0078 
## Upper-tail P =  0.9922 
## Observed metric > 39 simulated metrics 
## Observed metric < 4961 simulated metrics 
## Observed metric = 0 simulated metrics 
## Standardized Effect Size (SES):  -2.0403
plot(myRandomModel,type="hist")

Testing the hypotheses

First Hypothesis: budding and flowering are more staggered (ie less overlap), whereas fruiting and dehiscence are more overlapping

First I’ll make a function to do this quickly

basic_plot<-function(phenology_stage,habitat_type=c("alpine","subalpine")){
  
  sikkimdata %>% filter(stage==phenology_stage,habitat %in% habitat_type) %>%
    group_by(species,DOY,elevation) %>%
    summarize(abundance=(max(`% flower or fruit`))) %>%
    mutate(period=cut(DOY,cuts,labels=1:19)) %>%
    ungroup() %>%
    group_by(species,elevation) %>%
    mutate(abundancep=`abundance`/sum(`abundance`)) %>%
    filter(abundance>0) %>%
    ungroup() %>%
    group_by(species,elevation,period) %>%
    summarize(niche=paste(elevation,period),abundancep=unique(abundancep)) ->nicheddata
  
  nicheddata %>% data.frame() %>% dplyr::select(species,abundancep,niche) %>%
    spread(niche,abundancep,fill=0) -> nicheddata
  nicheddata[,1]->rownames(nicheddata)
  nicheddata[,-1]->nicheddata
  
  actualseason<-nicheoverlaps(nicheddata)*100 #this doesn't change b/t year
  permutedseason<-as.data.frame(overlaps<-nichepermute(nicheddata,1000)*100)
  confints<-quantile(permutedseason$overlaps,probs=c(.05,.95))
  
  #plot
  ggplot(permutedseason,aes(x=overlaps))+
    theme_bw()+
    geom_histogram(colour="black",fill="white",binwidth=1)+
    geom_segment(aes(x=confints[[1]],xend=confints[[1]],y=0,yend=250),alpha=I(1/2))+
    geom_segment(aes(x=confints[[2]],xend=confints[[2]],y=0,yend=250),alpha=I(1/2))+
    geom_segment(aes(x=actualseason,xend=actualseason,y=0,yend=250),colour='red')+
    scale_x_continuous("overlap (%)")+
    ggtitle("Observed (red) and simulated niche (white) niche overlap")
}

Then I’ll use the function to see that:

1 Budding looks similar to flowering

basic_plot(phenology_stage=2)

2 Fruiting shows way more overlap:

basic_plot(4)

basic_plot(5)

3 And the end of fruiting much more overlap than expected by chance

(but note that our method might not be most robust for that finding)

basic_plot(6)

basic_plot(7)

The same pattern is evident using EcoSim, for what it’s worth

ecosim_plot<-function(phenology_stage){
  
  sikkimdata %>% filter(stage==phenology_stage) %>%
    group_by(species,DOY,elevation) %>%
    summarize(abundance=(max(`% flower or fruit`))) %>%
    mutate(period=cut(DOY,cuts,labels=1:19)) %>%
    ungroup() %>%
    group_by(species,elevation) %>%
    mutate(abundancep=`abundance`/sum(`abundance`)) %>%
    filter(abundance>0) %>%
    ungroup() %>%
    group_by(species,elevation,period) %>%
    summarize(niche=paste(elevation,period),abundancep=unique(abundancep)) ->nicheddata
  
  nicheddata %>% data.frame() %>% dplyr::select(species,abundancep,niche) %>%
    spread(niche,abundancep,fill=0) -> nicheddata
  nicheddata[,1]->rownames(nicheddata)
  nicheddata[,-1]->nicheddata
  
  niche_null_model(speciesData=nicheddata, 
                                    algo="ra3", metric="czekanowski", 
                                    suppressProg=TRUE,nReps=5000)
}

budding similar to flowering – less overlap:

ecosim_plot(2)->ESmodel_2
plot(ESmodel_2,type="hist")

ecosim_plot(3)->ESmodel_3
plot(ESmodel_3,type="hist")

the two early fruiting stages similar more overlap (similar to random)

ecosim_plot(4)->ESmodel_4
plot(ESmodel_4,type="hist")

ecosim_plot(5)->ESmodel_5
plot(ESmodel_5,type="hist")

late flowering and dehiscense show even more overlap.

ecosim_plot(6)->ESmodel_6
plot(ESmodel_6,type="hist")

ecosim_plot(7)->ESmodel_7
plot(ESmodel_7,type="hist")

Second hypothesis: differences in alpine / subalpine species?

budding fairly similar

basic_plot(2,habitat_type="alpine")

basic_plot(2,habitat_type="subalpine")

flowering quite different

basic_plot(3,habitat_type="alpine")

basic_plot(3,habitat_type="subalpine")

later phenological stages quite similar

basic_plot(4,habitat_type="alpine")

basic_plot(4,habitat_type="subalpine")

basic_plot(5,habitat_type="alpine")

basic_plot(5,habitat_type="subalpine")

basic_plot(6,habitat_type="alpine")

basic_plot(6,habitat_type="subalpine")

basic_plot(7,habitat_type="alpine")

basic_plot(7,habitat_type="subalpine")