1. AMBI

Calculate maximum Shannon diversity and richness for the whole dataset for using it in the M-AMBI index.

Hmax  <-  max(diversity(mtm[, -1]))
Smax  <-  max(specnumber(mtm[, -1]))

basic_indices <- mtm %>%
  transmute(
    cesymr =  cesymr,
    H = diversity(mtm[,-1]),
    S = specnumber(mtm[,-1]),
    N = rowSums(mtm[,-1]),
    J = H / log(S),
    d = (S - 1) / log(N)
  )

Calculate EG related indices using three diffrent EG classicifaction:

library(knitr)
kable(EG)
taxa EG_Keeley EG_Robertson_Keeley EG_Robertson BOPA ITI
abarenicola I I I Polychaeta IV
acanthochitona zelandica I I NA Polyplacophora NA
acari NA NA NA Arachnida NA
actiniidae II II NA NA I
alpheidae II II NA NA NA
amalda NA NA NA Gastropoda NA
ampharetidae NA II II Polychaeta II
amphibola crenata NA III III Gastropoda III
amphipoda NA II II Amphipoda III
annelida NA NA NA NA NA
anthozoa II II II NA I
antisolarium egenum NA NA NA Gastropoda III
aonides III I I Polychaeta III
araneae NA NA NA NA NA
arcuatula senhousia III III NA Bivalvia II
armandia maculata III II II Polychaeta IV
arthritica bifurca III IV IV Bivalvia III
asteroidea I I NA Asteroidea NA
austrofusus glans NA NA NA Gastropoda NA
austrohelice crassa II V V NA NA
austrominius modestus II II II Maxillopoda II
austrovenus stutchburyi NA II II Bivalvia II
biffarius filholi I I I NA II
bivalvia NA NA NA Bivalvia NA
boccardia III II II Polychaete III
borniola reniformis NA NA NA Bivalvia III
buccinulum II II NA Gastropoda NA
calyptraeidae I I NA Gastropoda III
capitellidae V IV IV OP IV
chaetognatha NA NA NA Sagittoidea NA
chiton II II NA Polyplacophora NA
cidaridae juvenile NA NA NA NA NA
cirratulidae II III III Polychaeta III
cirripedia NA NA NA Maxillopoda II
cominella adspersa NA III III Gastropoda NA
cominella glandiformis NA III III Gastropoda NA
cominella maculosa NA III III Gastropoda NA
copepoda NA II II Maxillopoda NA
corbula zelandica IV IV NA Bivalvia II
cossura consimilis I I NA Polychaeta IV
crangonidae I I NA NA NA
crustacea NA NA NA Arthropoda NA
cumacea II I I Cumacea III
cyclograpsus lavauxi NA NA NA NA NA
cyclomactra ovata NA II II Bivalvia II
daphnia NA NA NA NA NA
diloma II II II Gastropoda III
diplodonta II II NA Bivalvia II
divalucina cumingi NA NA NA Bivalvia NA
dorvilleidae IV II II Polychaeta NA
dosinia subrosea I I NA Bivalvia II
eatoniella I I I Gastropoda III
edwardsia II II II Anthozoa IV
enteropneusta II II NA NA II
epitoniidae I I NA Gastropoda NA
eunicidae II II NA Polychaeta NA
euterebra tristis NA NA NA Gastropoda III
fellaster zelandiae NA NA NA NA NA
flabelligeridae II II NA Polychaeta II
gastropoda NA NA NA Gastropoda NA
glyceridae III II II Polychaeta NA
goniadidae I II II Polychaeta NA
halicarcinus II III III Decapoda NA
halopyrgus pupoides II III III Gastropoda III
haminoea zelandiae II I I Gastropoda NA
haustrum scobina NA NA NA Gastropoda NA
hemigrapsus II II NA NA NA
hemiplax hirtipes I V V NA NA
hesionidae IV I I Polychaeta NA
heteromastus filiformis II III III Polychaeta NA
hiatula NA NA NA Bivalvia II
hirudinea IV IV NA Clitellata IV
holothuroidea I I NA Holothuroidea III
hunkydora australica NA NA NA Bivalvia NA
hydrozoa I I NA Hydrozoa NA
insecta NA NA NA NA NA
ischnochiton maorianus II II NA Polyplacophora NA
isopoda NA NA NA Isopoda NA
lasaea II II NA Bivalvia II
leptomya retiaria retiaria II II NA Bivalvia IV
lumbrineridae II II NA Polychaeta NA
lunella smaragda NA NA NA Gastropoda NA
macomona liliana NA II II Bivalvia III
mactridae I I NA Bivalvia III
magelona I III III Polychaeta IV
maldanidae I I I Polychaeta IV
manayunkia III III NA Polychaeta I
melanopsis NA NA NA Gastropoda NA
melliteryx parva NA NA NA Bivalvia II
micrelenchus NA I I Gastropoda III
myadora NA NA NA Bivalvia II
myllitella vivens vivens NA NA NA Bivalvia III
mysella III III NA Bivalvia II
mysida II I I Mysidacea III
mytilidae III III NA Bivalvia II
nassarius burchardi II II NA Gastropoda NA
neanthes III III NA Polychaeta NA
nebaliacea V V NA NA II
nematoda IV II II Nematoda IV
nemertea III III III Nemertea NA
neoguraleus NA NA NA Gastropoda III
nephtyidae II II NA Polychaeta NA
nepinnotheres atrinicola IV IV NA NA NA
nepinnotheres novaezelandiae IV IV NA NA NA
nereididae NA III III Polychaeta III
nereis III III III Polychaeta NA
nerita melanotragus I I NA Gastropoda NA
nicon aestuariensis III III III Polychaeta NA
notoacmea NA II II Gastropoda NA
notomastus III III NA Polychaeta IV
nuculidae I II II Bivalvia III
odostomia II II NA Gastropoda NA
oenonidae NA NA NA Polychaeta NA
oligochaeta V III III NA NA
onuphidae II II NA Polychaeta NA
opheliidae I I NA Polychaeta IV
ophiuroidea II II NA Ophiuroidea III
opisthobranchia NA NA NA Gastropoda III
orbinia papillosa I I I Polychaeta IV
orbiniidae NA NA NA Polychaeta IV
ostracoda II I I Ostracoda NA
ostreidae I I NA Bivalvia I
ovalipes catharus NA NA NA NA NA
owenia I II II Polychaeta III
oweniidae NA II II Polychaeta III
paguridae II II NA NA III
palaemonidae I I NA NA NA
paphies australis NA II II Bivalvia II
paphies donacina NA II II Bivalvia II
paraonidae II III III Polychaeta IV
paratya curvirostris NA NA NA NA NA
patiriella regularis NA NA NA Asteroidea NA
pectinaria I III III Polychaeta IV
perinereis III III III Polychaeta NA
peronaea gaimardi NA NA NA Bivalvia NA
perrierina turneri NA NA NA Bivalvia I
phoronida II II NA Phoronida II
phyllochaetopterus socialis I I NA Polychaeta II
phyllodocidae III II II Polychaeta NA
pisinna zosterophila NA NA NA Gastropoda III
platyhelminthes II II NA Platyhelminthes III
platynereis III I I Polychaeta NA
polychaeta NA NA NA Polychaeta NA
polydora IV III III Polychaeta IV
polynoidae I I I Polychaeta NA
potamopyrgus II III III Gastropoda III
pradoxa NA NA NA Gastropoda NA
priapulidae III III NA NA IV
prionospio II II II Polychaeta III
pseudarcopagia NA NA NA Bivalvia NA
pycnogonida II II NA NA NA
rhyssoplax NA NA NA Polyplacophora NA
risellopsis varia NA NA NA Gastropoda NA
rissoidae I I NA Gastropoda III
sabellariidae II I I Polychaeta II
sabellidae I I I Polychaeta II
scalibregmatidae NA NA NA Polychaeta IV
scolecolepides NA I I Polychaeta III
scolelepis III I I Polychaeta IV
scoloplos I I I Polychaeta IV
scoloplos cylindrifer I I I Polychaeta IV
serpulidae I I NA Polychaeta I
sigalionidae I I NA Polychaeta NA
sipuncula I II II Sipuncula III
solemya NA NA NA Bivalvia NA
sphaerodoridae NA NA NA Polychaeta NA
spionidae III III III Polychaeta IV
stomatopoda NA NA NA NA NA
syllidae II II II Polychaeta NA
sypharochiton pelliserpentis NA NA NA Polyplacophora NA
tanaidacea II I I Tanaidacea NA
terebellidae II II NA Polychaeta III
theora lubrica III III III Bivalvia IV
travisia I I NA Polychaeta IV
trichobranchidae II II NA Polychaeta III
trichoptera NA NA NA NA NA
trochustiaratus I I NA Gastropoda III
turbonilla I I NA Gastropoda NA
turridae NA NA NA Gastropoda NA
upogebia (acutigebia) danai I I NA Decapoda II
venerida I I NA Bivalvia II
xymene NA I I Gastropoda NA
zalipais lissa NA NA NA Gastropoda NA
zeacumantus NA II II Gastropoda IV
zethalia zelandica NA NA NA Gastropoda II
EG_K_AZTI_AB <- mtm %>%
  gather(sp, abund, -cesymr) %>%
  left_join(eg, by = 'sp') %>%
  group_by(cesymr) %>%
  mutate(
    N = sum(abund),
    H = diversity(abund),
    S = specnumber(abund),
    UA = sum(is.na(EG_K_AZTI_AB) & abund > 0) / S * 100) %>%
  drop_na(EG_K_AZTI_AB) %>%
  mutate(N1 = sum(abund),
         S1 = specnumber(abund)) %>%
  group_by(cesymr, EG_K_AZTI_AB) %>%
  mutate(prop_n = sum(abund) / N1,
         prop_s = specnumber(abund) / S1) %>%
  summarise(
    n = mean(prop_n, na.rm = T) * 100,
    s = mean(prop_s, na.rm = T) * 100,
    H = mean(H),
    S = mean(S),
    N = mean(N),
    N1 = mean(N1),
    S1 = mean(S1),
    UA = mean(UA)
  ) %>%
  gather(key, value, n, s) %>%
  unite(EG_comb, EG_K_AZTI_AB, key) %>%
  spread(EG_comb, value) %>%
  mutate(
    AMBI   = (1.5 * II_n + 3 * III_n + 4.5 * IV_n + 6 * V_n) / 100,
    M_AMBI = (S / Smax + H / Hmax + (1 - AMBI / 6)) / 3,
    AMBI_S = (1.5 * (II_n + II_s) + 3 * (III_n + III_s) + 4.5 * (IV_n + IV_s) + 6 * (V_n + V_s)) / 200,
    BENTIX = (6 * (I_n + II_n) + 2 * (III_n + IV_n + V_n)) / 100,
    MEDOCC = (2 * II_n + 4 * III_n + 6 * (IV_n + V_n)) / 100
  ) %>%
  select(cesymr,UA,AMBI:MEDOCC) %>% 
  mutate(EG = 'EG_K_AZTI_AB') %>% 
  gather(index, value, AMBI:MEDOCC) %>%
  unite(index1, index, EG)

####EG_R_K_AZTI_AB####
EG_R_K_AZTI_AB <- mtm %>%
  gather(sp, abund, -cesymr) %>%
  left_join(eg, by = 'sp') %>%
  group_by(cesymr) %>%
  mutate(
    N = sum(abund),
    H = diversity(abund),
    S = specnumber(abund),
    UA = sum(is.na(EG_R_K_AZTI_AB) & abund > 0) / S * 100
  ) %>%
  drop_na(EG_R_K_AZTI_AB) %>%
  mutate(N1 = sum(abund),
         S1 = specnumber(abund)) %>%
  group_by(cesymr, EG_R_K_AZTI_AB) %>%
  mutate(prop_n = sum(abund) / N1,
         prop_s = specnumber(abund) / S1) %>%
  summarise(
    n = mean(prop_n, na.rm = T) * 100,
    s = mean(prop_s, na.rm = T) * 100,
    H = mean(H),
    S = mean(S),
    N = mean(N),
    N1 = mean(N1),
    S1 = mean(S1),
    UA = mean(UA)
  ) %>%
  gather(key, value, n, s) %>%
  unite(EG_comb, EG_R_K_AZTI_AB, key) %>%
  spread(EG_comb, value) %>%
  mutate(
    AMBI   = (1.5 * II_n + 3 * III_n + 4.5 * IV_n + 6 * V_n) / 100,
    M_AMBI = (S / Smax + H / Hmax + (1 - AMBI / 6)) / 3,
    AMBI_S = (1.5 * (II_n + II_s) + 3 * (III_n + III_s) + 4.5 * (IV_n + IV_s) + 6 * (V_n + V_s)) / 200,
    BENTIX = (6 * (I_n + II_n) + 2 * (III_n + IV_n + V_n)) / 100,
    MEDOCC = (2 * II_n + 4 * III_n + 6 * (IV_n + V_n)) / 100
  ) %>%
  select(cesymr,UA,AMBI:MEDOCC) %>% 
  mutate(EG = 'EG_R_K_AZTI_AB') %>% 
  gather(index, value, AMBI:MEDOCC) %>%
  unite(index1, index, EG)

###EG_R_AB####
EG_R_AB <- mtm %>%
  gather(sp, abund, -cesymr) %>%
  left_join(eg, by = 'sp') %>%
  group_by(cesymr) %>%
  mutate(
    N = sum(abund),
    H = diversity(abund),
    S = specnumber(abund),
    UA = sum(is.na(EG_R_AB) & abund > 0) / S * 100
  ) %>%
  drop_na(EG_R_AB) %>%
  mutate(N1 = sum(abund),
         S1 = specnumber(abund)) %>%
  group_by(cesymr, EG_R_AB) %>%
  mutate(prop_n = sum(abund) / N1,
         prop_s = specnumber(abund) / S1) %>%
  summarise(
    n = mean(prop_n, na.rm = T) * 100,
    s = mean(prop_s, na.rm = T) * 100,
    H = mean(H),
    S = mean(S),
    N = mean(N),
    N1 = mean(N1),
    S1 = mean(S1),
    UA = mean(UA)
  ) %>%
  gather(key, value, n, s) %>%
  unite(EG_comb, EG_R_AB, key) %>%
  spread(EG_comb, value) %>%
  mutate(
    AMBI   = (1.5 * II_n + 3 * III_n + 4.5 * IV_n + 6 * V_n) / 100,
    M_AMBI = (S / Smax + H / Hmax + (1 - AMBI / 6)) / 3,
    AMBI_S = (1.5 * (II_n + II_s) + 3 * (III_n + III_s) + 4.5 * (IV_n + IV_s) + 6 * (V_n + V_s)) / 200,
    BENTIX = (6 * (I_n + II_n) + 2 * (III_n + IV_n + V_n)) / 100,
    MEDOCC = (2 * II_n + 4 * III_n + 6 * (IV_n + V_n)) / 100
  ) %>%
  select(cesymr,UA,AMBI:MEDOCC) %>% 
  mutate(EG = 'EG_R_AB') %>% 
  gather(index, value, AMBI:MEDOCC) %>%
  unite(index1, index, EG)

Based on correlations and Anna comments select 2 EG classifications:

EG_K_AZTI_AB <- EG_K_AZTI_AB %>% mutate(EG = 'K_AZTI_AB')
EG_R_K_AZTI_AB <- EG_R_K_AZTI_AB %>% mutate(EG = 'EG_R_K_AZTI_AB')
all_ambi_long <- bind_rows(EG_K_AZTI_AB,EG_R_K_AZTI_AB)

Percentage of unassigned taxa for easch EG classification

theme_set(theme_bw(base_size = 8))
ggplot(all_ambi_long, aes(x = value)) +
  geom_histogram(position = 'identity',color=1,fill='gray80',bins=20) +
  facet_wrap(~ index1, scales = 'free')
## Warning: Removed 1110 rows containing non-finite values (stat_bin).

ggplot(all_ambi_long, aes(x = UA)) +
  geom_histogram(position = 'identity',color=1,fill='gray80') +
  facet_wrap(~ index1, scales = 'free')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 130 rows containing non-finite values (stat_bin).

AMBI_K_wide <- EG_K_AZTI_AB %>% spread(index1,value) %>% rename(UA_K = UA) %>% select(-EG)
AMBI_R_wide <- EG_R_K_AZTI_AB %>% spread(index1,value) %>% rename(UA_R = UA) %>% select(-EG)
## Warning: Removed 13 rows containing non-finite values (stat_density).

## Warning: Removed 13 rows containing non-finite values (stat_density).

2. BQI

Calculate sensitivity for taxa that ocurr in >5% of the samples

sp <-
  ceiling(mtm[,-1]) #to round-up decimal point in abundance of samples with abundance corrected for core diameter

f <- data.frame(
  taxa = colnames(sp),
  freq = colSums(sp != 0),
  row.names = 1:ncol(sp)
  )#get the number of cores were each taxon occur #
  

f5 <- f %>%
  dplyr::filter(freq > nrow(sp) * .05) ##list of taxa taxa that occur in >5% cores

sp2 <- sp %>%
  dplyr::filter(rowSums(.) >= 50) %>% ##remove core with < 50 individuals
  mutate(ES50 = rarefy(., 50)) %>%  #get the ES50 rarefied richness based on Hurlbert's (1971) formulation.
  select(match(f5$taxa, names(sp)), ES50) %>% ##select taxa that occur in >5% of samples
  gather(sp, abund, -ES50) ##convert into long format

sensi5 <-
  sp2 %>%
  group_by(sp) %>%
  dplyr::filter(abund > 0) %>%
  arrange(ES50) %>%
  mutate(p = percent_rank(cumsum(abund))) %>%
  slice(which.min(abs(p - .05))) %>%
  data.frame(.) %>%
  select(ES50, sp) %>%
  rename(sensitivity = ES50) %>%
  arrange(sensitivity)

kable(sensi5)
sensitivity sp
3.513288 potamopyrgus
3.743073 amphibola crenata
3.892110 amphipoda
3.944333 austrohelice crassa
4.089359 hemiplax hirtipes
4.121021 spionidae all
4.150406 nereididae
4.362826 arthritica bifurca
4.366324 isopoda
4.533346 oligochaeta
4.599141 cossura consimilis
4.651881 paphies australis
4.724596 halicarcinus
4.727273 capitellidae
4.814693 nematoda
4.946660 mysida
5.234194 nephtyidae
5.236645 austrovenus stutchburyi
5.255044 heteromastus filiformis
5.443262 nemertea
5.498591 orbiniidae all
5.502914 paraonidae
5.524700 cominella glandiformis
5.608283 glyceridae
6.000000 edwardsia
6.026442 macomona liliana
6.051962 cumacea
6.342854 notoacmea
6.465517 actiniidae
6.629071 diloma
6.641215 magelona
6.699195 syllidae
6.882395 goniadidae
7.058335 maldanidae
7.336834 zeacumantus
7.363923 nuculidae
8.604653 hiatula

BQI calculations

####Calculate richness and abundace
dat1 <- mtm %>%
  mutate(S = rowSums(.[, -1] > 0), ##richness
         N = rowSums(.[, -1])) %>% ###abundance
  select(N, S, cesymr)###collate cesymr data

###Convert data into long format and select taxa with sensitivity values only
BQI_data <- mtm %>%
  dplyr::select(match(f5$taxa, names(mtm)), cesymr) %>%
  gather(sp, abund, -cesymr) %>%
  left_join(sensi5, by = 'sp') %>%
  left_join(dat1, by = 'cesymr')

###BQI calculation #####
BQI <-
  BQI_data %>%
  group_by(cesymr) %>%
  mutate(v1 = (abund / #abundace of a given taxa
                 sum(abund)) * ##total abundance of taxa with sensitivity values
           sensitivity) %>%
  summarise(BQI = sum(v1) * ##sum of part 1 at the replicate level
              dplyr::first(log10(S + 1)) * #log of total ichness at replicate level
              (dplyr::first(N) / (dplyr::first(N) + 5)),
            ##abundance correction factor
            N = mean(N),
            S = mean(S)) %>% 
  select(cesymr,BQI)

3. ITI

iti <- mtm %>% 
  gather(sp,abund,-cesymr) %>% 
  left_join(eg,by='sp') %>% 
  group_by(cesymr) %>%
  mutate(N = sum(abund),
         S = specnumber(abund),
         UA = sum(is.na(FFG_KNRAB)&abund>0)/S*100) %>% 
  drop_na(FFG_KNRAB) %>%
  mutate(N1= sum(abund)) %>% 
  group_by(cesymr,FFG_KNRAB) %>% 
  mutate(prop_n=sum(abund)/N1) %>% 
  summarise(eg_n_prop = mean(prop_n)*100,
            UA_ITI = mean(UA)) %>%
  spread(FFG_KNRAB,eg_n_prop) %>%
  mutate(ITI   = 100 - (33.33 * ( (II + 2*III + 3*IV) / (I + II + III +IV) ) )) %>% 
  select(cesymr, UA_ITI,ITI)

4. BOPA

###BOPA calculation####
bopa <- mtm %>% 
  gather(sp,abund,-cesymr) %>% 
  left_join(eg,by='sp') %>% 
  group_by(cesymr) %>%
  mutate(N = sum(abund),
         S = specnumber(abund),
         UA = sum(is.na(BOPA_EG_R_K_AZTI_AB)&abund>0)/S*100) %>% 
  group_by(cesymr,BOPA_EG_R_K_AZTI_AB) %>% 
  mutate(prop_n=sum(abund)) %>% 
  summarise(prop_n = mean(prop_n),
            UA = mean(UA),
            N = mean(N)) %>%
  spread(BOPA_EG_R_K_AZTI_AB,prop_n) %>%
  select(cesymr,Amphipoda,OP,N) %>% 
  mutate(BOPA   = log10((OP/N)/((Amphipoda/N) + 1)+1)) %>% 
  select(cesymr,BOPA)