Getting started


In addition to MCSim, we will be using the following packages:

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.1
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ade4)
library(vegan)
## Warning: package 'vegan' was built under R version 4.0.4
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
library(sars)
## Warning: package 'sars' was built under R version 4.0.5
library(MCSim)

Read in empirical data, calculate niches


We will use the mite dataset available in the vegan package.

d.comm <- get(data("mite"))

# Here I'm using the entire mite dataset
d.comm <- d.comm[,order(colSums(d.comm), decreasing = TRUE)]

# Here I'm only using continuous environemntal variables
d.env <- scale(get(data("mite.env"))[,c("SubsDens","WatrCont")])

# Here are the xy-coordinates
d.geo <- get(data("mite.xy"))

Here we will Make a matrix with site information, which will be useful for plotting later on. Then we will use the ade4 package to calculate the niches for the species in the dataset based on where they occur, their relative abundances, and available site-level environmental data.

d.siteinfo <- data.frame(
  Site.code = paste0('site',c(1:nrow(d.env))),
  get(data("mite.env")))

# -- calculate RAs from densities
d.comm.ra <- d.comm / rowSums(d.comm)

# -- calculate niches for species
dudi.pca.env <- dudi.pca(d.env, scale = TRUE, scan = FALSE, nf=1)
niche.species <- niche(dudi.pca.env, Y = d.comm.ra, scann = FALSE)
d.niche <- data.frame(
  niche.pos=niche.species$li,
  as.data.frame(niche.param(niche.species)))

# -- calculate niche positions for each of the sites
d.site.niche  <-  data.frame(
  d.siteinfo,
  dudi.pca.env$li)

# -- make 1-Dimensional axis for arranging sites in a plot along the environmental gradient
mod.pca.geo <- princomp(d.geo)
d.site.1D <- data.frame(
  site.name = as.character(d.siteinfo$Site.code),
  region = as.character(d.siteinfo$Topo),
  pca.score = mod.pca.geo$scores[,1],
  pca.rank = rank(mod.pca.geo$scores[,1]))

Next, we will plot coenoclines to visualize the species’ niche positoins along the environmental gradient. Note that the “rug” marks along the x-axis denote the sites’ locations along the environmental gradient.

env.axis <- d.site.niche$Axis1

niche.factor <- 0.5 #rescale niche breadths

species.niche <- d.niche %>%
  select(Axis1, Tol) %>%
  rename(
    trait.Ef = Axis1,
    trait.Ef.sd = Tol) %>%
  mutate(
    trait.Ef.sd.rescaled = trait.Ef.sd * niche.factor)

# Plot the coenoclines to view species' habitat preferences and niche breadths
# along the environmental gradient (Ef)
plot.coenoclines(
  Ef = env.axis,
  trait.Ef = species.niche$trait.Ef,
  trait.Ef.sd = species.niche$trait.Ef.sd.rescaled)


Simulate a metacommunity


# -------------------------------------------------------------
# -- Metacommunity simulation using MCSim
# -- SCENARIO: Species sorting based on habitat preferences
# --------------------------

set.seed(1)

# -- make the landscape
# -- I arbitrarily chose m = 0.15
simulation.landscape <- make.landscape(
  site.coords = d.geo,
  Ef = d.site.niche$Axis1,
  m = 0.15,
  JL = rowSums(d.comm)*100)
## [1] "success!!"
# -- plot a map of the landscape
simulation.landscape$site.coords %>%
  ggplot(aes(x,y,size = simulation.landscape$site.info$JL)) +
  geom_point()

# explore dispersal kernel
plot.standardized.disp.kernel(landscape = simulation.landscape,
                                     w = 500)

# -- IMPORTANT NOTE on the simulation
# -- W.r = 500 produces a relatively steep dispersal kernel such that dispersal 
# --   occurs mostly between adjacent sites.
simoutput <- MCSim::metasim(
  landscape = simulation.landscape,
  output.dir.path = 'SIM_RESULTS',
  scenario.ID = 'mites_niche_model',  
  trait.Ef = species.niche$trait.Ef,
  trait.Ef.sd = species.niche$trait.Ef.sd.rescaled,
  J.t0 = d.comm.ra,
  n.timestep = 100,
  W.r = 500,
  nu = 0,
  speciation.limit = 0,
  save.sim = FALSE
)
## [1] "Timestep: 2"
## [1] "Timestep: 3"
## [1] "Timestep: 4"
## [1] "Timestep: 5"
## [1] "Timestep: 6"
## [1] "Timestep: 7"
## [1] "Timestep: 8"
## [1] "Timestep: 9"
## [1] "Timestep: 10"
## [1] "Timestep: 11"
## [1] "Timestep: 12"
## [1] "Timestep: 13"
## [1] "Timestep: 14"
## [1] "Timestep: 15"
## [1] "Timestep: 16"
## [1] "Timestep: 17"
## [1] "Timestep: 18"
## [1] "Timestep: 19"
## [1] "Timestep: 20"
## [1] "Timestep: 21"
## [1] "Timestep: 22"
## [1] "Timestep: 23"
## [1] "Timestep: 24"
## [1] "Timestep: 25"
## [1] "Timestep: 26"
## [1] "Timestep: 27"
## [1] "Timestep: 28"
## [1] "Timestep: 29"
## [1] "Timestep: 30"
## [1] "Timestep: 31"
## [1] "Timestep: 32"
## [1] "Timestep: 33"
## [1] "Timestep: 34"
## [1] "Timestep: 35"
## [1] "Timestep: 36"
## [1] "Timestep: 37"
## [1] "Timestep: 38"
## [1] "Timestep: 39"
## [1] "Timestep: 40"
## [1] "Timestep: 41"
## [1] "Timestep: 42"
## [1] "Timestep: 43"
## [1] "Timestep: 44"
## [1] "Timestep: 45"
## [1] "Timestep: 46"
## [1] "Timestep: 47"
## [1] "Timestep: 48"
## [1] "Timestep: 49"
## [1] "Timestep: 50"
## [1] "Timestep: 51"
## [1] "Timestep: 52"
## [1] "Timestep: 53"
## [1] "Timestep: 54"
## [1] "Timestep: 55"
## [1] "Timestep: 56"
## [1] "Timestep: 57"
## [1] "Timestep: 58"
## [1] "Timestep: 59"
## [1] "Timestep: 60"
## [1] "Timestep: 61"
## [1] "Timestep: 62"
## [1] "Timestep: 63"
## [1] "Timestep: 64"
## [1] "Timestep: 65"
## [1] "Timestep: 66"
## [1] "Timestep: 67"
## [1] "Timestep: 68"
## [1] "Timestep: 69"
## [1] "Timestep: 70"
## [1] "Timestep: 71"
## [1] "Timestep: 72"
## [1] "Timestep: 73"
## [1] "Timestep: 74"
## [1] "Timestep: 75"
## [1] "Timestep: 76"
## [1] "Timestep: 77"
## [1] "Timestep: 78"
## [1] "Timestep: 79"
## [1] "Timestep: 80"
## [1] "Timestep: 81"
## [1] "Timestep: 82"
## [1] "Timestep: 83"
## [1] "Timestep: 84"
## [1] "Timestep: 85"
## [1] "Timestep: 86"
## [1] "Timestep: 87"
## [1] "Timestep: 88"
## [1] "Timestep: 89"
## [1] "Timestep: 90"
## [1] "Timestep: 91"
## [1] "Timestep: 92"
## [1] "Timestep: 93"
## [1] "Timestep: 94"
## [1] "Timestep: 95"
## [1] "Timestep: 96"
## [1] "Timestep: 97"
## [1] "Timestep: 98"
## [1] "Timestep: 99"
## [1] "Timestep: 100"
# plot dot plots
plot.dot.plots(simoutput)


How does the “SAR” change during the course of the simulation?


Note that I have SAR in quotes because we’re not looking at true SARs here. I am fitting SAR models to sampling effort curves.

# assemble data for SAR analysis
# NOTE: not a true SAR because I'm using sample number as a proxy for area
# This could be improved with some more sophisticated coding. 
my_timestep <- 1
n_samples <- 100
min_sample_size <- 2
max_sample_size <- 50

sars_data <- data.frame()

Jt_wide <- simoutput$J.long %>% 
  dplyr::filter(timestep == my_timestep) %>%
  dplyr::select(-timestep) %>%
  tidyr::pivot_wider(id_cols = site,names_from = "spp",values_from = "count") %>%
  tibble::column_to_rownames(var = "site")

for(sample_size in sample(min_sample_size:max_sample_size, size = n_samples, replace = TRUE)){
  sars_data_i <- data.frame()
  sars_data_i <- data.frame(
    a = sample_size,
    s = sum(
      colSums(
        Jt_wide[
          sample(1:nrow(Jt_wide), size = sample_size, replace = TRUE),]) > 0))
  
  sars_data <- dplyr::bind_rows(
    sars_data,
    sars_data_i)
}


# fit a SAR model
mod_fit_1 <- sars::sar_loga(
  data = sars_data, 
  grid_start = "partial" )

# model summary
summary(mod_fit_1)
## 
## Model:
## Logarithmic
## 
## Call: 
## S == c + z * log(A)
## 
## Did the model converge: TRUE
## 
## Residuals:
##           0%          25%          50%          75%         100% 
## -12.21796100  -1.13521195   0.06069497   1.21292858   3.73555597 
## 
## Parameters:
##     Estimate Std. Error    t value   Pr(>|t|)       2.5%   97.5%
## c 2.2759e+01 9.7078e-01 2.3445e+01 5.6829e-42 2.0818e+01 24.7010
## z 3.5468e+00 3.0893e-01 1.1481e+01 7.6797e-20 2.9290e+00  4.1647
## 
## R-squared: 0.57, Adjusted R-squared: 0.56
## AIC: 443.76, AICc: 444.01, BIC: 451.58
## Observed shape: convex up, Asymptote: FALSE
# plot SAR fit
plot(mod_fit_1)

Now we will compare the “SAR” from timestep 1 to the “SAR” for timestep 2:

######################
# SAR for timestep 95
#####################

# assemble data for SAR analysis
# NOTE: not a true SAR because I'm using sample number as a proxy for area
# This could be improved with some more sophisticated coding. 
my_timestep <- 95
n_samples <- 100
min_sample_size <- 2
max_sample_size <- 50

sars_data <- data.frame()

Jt_wide <- simoutput$J.long %>% 
  dplyr::filter(timestep == my_timestep) %>%
  dplyr::select(-timestep) %>%
  tidyr::pivot_wider(id_cols = site,names_from = "spp",values_from = "count") %>%
  tibble::column_to_rownames(var = "site")

for(sample_size in sample(min_sample_size:max_sample_size, size = n_samples, replace = TRUE)){
  sars_data_i <- data.frame()
  sars_data_i <- data.frame(
    a = sample_size,
    s = sum(
      colSums(
        Jt_wide[
          sample(1:nrow(Jt_wide), size = sample_size, replace = TRUE),]) > 0))
  
  sars_data <- dplyr::bind_rows(
    sars_data,
    sars_data_i)
}


# fit a SAR model
mod_fit_95 <- sars::sar_loga(
  data = sars_data, 
  grid_start = "partial" )

# model summary
summary(mod_fit_95)
## 
## Model:
## Logarithmic
## 
## Call: 
## S == c + z * log(A)
## 
## Did the model converge: TRUE
## 
## Residuals:
##         0%        25%        50%        75%       100% 
## -8.1103203 -1.2050736  0.1744391  1.3628114  5.3936701 
## 
## Parameters:
##     Estimate Std. Error    t value   Pr(>|t|)       2.5%   97.5%
## c 9.7278e+00 1.2239e+00 7.9484e+00 3.2675e-12 7.2801e+00 12.1755
## z 4.8800e+00 3.8004e-01 1.2841e+01 1.0135e-22 4.1199e+00  5.6401
## 
## R-squared: 0.63, Adjusted R-squared: 0.62
## AIC: 480, AICc: 480.25, BIC: 487.82
## Observed shape: convex up, Asymptote: FALSE
# Compare plots of SAR fits
par(mfcol = c(1,2))
plot(mod_fit_1)
title(main = "Timestep 1")
plot(mod_fit_95)
title(main = "Timestep 95")

Note change in the SAR curve after 95 timesteps of an enviornmental filter acting on the community.