###read latest data ######
library(readxl)
## Warning: package 'readxl' was built under R version 3.1.3
mtm <- suppressMessages(
suppressWarnings(read_excel('LumpedAggedInfPCMet2017-01-19_BI.xlsx',na='empty')))
setwd("P:/MTM2 Oranga Taiao/EMP Data/EMP Data/Biotic Index calculator/BQI")
suppressMessages(
suppressWarnings(
library(tidyverse)))
####Calculate sensitivity values ####
###get only spp variables
sp <- mtm %>%
select(dorvilleidae:`lunella smaragda`)
sp <- ceiling(sp) #to round-up decimal point in abundance of samples with abundance corrected for core diameter
f <- data.frame(taxa=colnames(sp),
freq=colSums(sp!=0),
row.names = 1:ncol(sp))#get the number of cores were each taxon occur #
####Sensi10: Sensitivity scores (ES50_0.05) for taxa that occur in 10% of samples
f10 <- f %>%
dplyr::filter(freq>nrow(sp)*.1) ##list of taxa taxa that occur in >10% samples
suppressMessages(suppressWarnings(library(vegan)))#for rarefy function
sp1 <- sp %>%
dplyr::filter(rowSums(.)>=50) %>% ##remove core with < 50 individuals
mutate(ES50 = rarefy(.,50)) %>% #get the ES50 rarefied richness based on Hurlbert's (1971) formulation.
select(match(f10$taxa,names(sp)),ES50) %>% ##select taxa that occur in >10% samples
gather(sp,abund,-ES50) ##convert into long format
sensi10 <-
sp1 %>%
group_by(sp) %>%
dplyr::filter(abund>0) %>%
arrange(ES50) %>%
mutate(p=percent_rank(cumsum(abund))) %>%
slice(which.min(abs(p - .05))) %>%
data.frame(.) %>%
select(ES50,sp) %>%
rename(sensitivity = ES50) %>%
arrange(sensitivity) %>%
print()
## sensitivity sp
## 1 3.513288 potamopyrgus
## 2 3.892110 amphipoda
## 3 3.944333 austrohelice crassa
## 4 4.089359 hemiplax hirtipes
## 5 4.121021 spionidae all
## 6 4.150406 nereididae
## 7 4.362826 arthritica bifurca
## 8 4.366324 isopoda
## 9 4.533346 oligochaeta
## 10 4.599141 cossura consimilis
## 11 4.651881 paphies australis
## 12 4.724596 halicarcinus
## 13 4.727273 capitellidae
## 14 5.234194 nephtyidae
## 15 5.236645 austrovenus stutchburyi
## 16 5.255044 heteromastus filiformis
## 17 5.443262 nemertea
## 18 5.498591 orbiniidae all
## 19 5.502914 paraonidae
## 20 5.524700 cominella glandiformis
## 21 5.608283 glyceridae
## 22 6.000000 edwardsia
## 23 6.026442 macomona liliana
## 24 6.051962 cumacea
## 25 6.342854 notoacmea
## 26 6.465517 actiniidae
## 27 6.629071 diloma
## 28 6.641215 magelona
## 29 6.699195 syllidae
## 30 7.058335 maldanidae
## 31 7.336834 zeacumantus
## 32 7.363923 nuculidae
write.csv(sensi10,'sensitivity_10%.csv',row.names = F)
###Sensi5: Calculate sensitivity for taxa that ocurr in >5% of the samples#####
f5 <- f %>%
dplyr::filter(freq>nrow(sp)*.05) ##list of taxa taxa that occur in >20 cores
sp2 <- sp %>%
dplyr::filter(rowSums(.)>=50) %>% ##remove core with < 50 individuals
mutate(ES50 = rarefy(.,50)) %>% #get the ES50 rarefied richness based on Hurlbert's (1971) formulation.
select(match(f5$taxa,names(sp)),ES50) %>% ##select taxa that occur in >5% of samples
gather(sp,abund,-ES50) ##convert into long format
sensi5 <-
sp2 %>%
group_by(sp) %>%
dplyr::filter(abund>0) %>%
arrange(ES50) %>%
mutate(p=percent_rank(cumsum(abund))) %>%
slice(which.min(abs(p - .05))) %>%
data.frame(.) %>%
select(ES50,sp) %>%
rename(sensitivity = ES50) %>%
arrange(sensitivity) %>%
print()
## sensitivity sp
## 1 3.513288 potamopyrgus
## 2 3.743073 amphibola crenata
## 3 3.892110 amphipoda
## 4 3.944333 austrohelice crassa
## 5 4.089359 hemiplax hirtipes
## 6 4.121021 spionidae all
## 7 4.150406 nereididae
## 8 4.362826 arthritica bifurca
## 9 4.366324 isopoda
## 10 4.533346 oligochaeta
## 11 4.599141 cossura consimilis
## 12 4.651881 paphies australis
## 13 4.724596 halicarcinus
## 14 4.727273 capitellidae
## 15 4.814693 nematoda
## 16 4.946660 mysida
## 17 5.234194 nephtyidae
## 18 5.236645 austrovenus stutchburyi
## 19 5.255044 heteromastus filiformis
## 20 5.443262 nemertea
## 21 5.498591 orbiniidae all
## 22 5.502914 paraonidae
## 23 5.524700 cominella glandiformis
## 24 5.608283 glyceridae
## 25 6.000000 edwardsia
## 26 6.026442 macomona liliana
## 27 6.051962 cumacea
## 28 6.342854 notoacmea
## 29 6.465517 actiniidae
## 30 6.629071 diloma
## 31 6.641215 magelona
## 32 6.699195 syllidae
## 33 6.882395 goniadidae
## 34 7.058335 maldanidae
## 35 7.336834 zeacumantus
## 36 7.363923 nuculidae
## 37 8.604653 hiatula
write.csv(sensi5,'sensitivity_5%.csv',row.names = F)###write to csv file
###Sensi5_all:dont remove samples with <50 and use taxa in 5% of samples#####
f5_all <- f %>%
dplyr::filter(freq>nrow(sp)*.05) ##list of taxa taxa that occur in >5% cores
sp3<- sp %>%
dplyr::filter(rowSums(.)>0) %>% ##remove core with < 0 individuals
mutate(ES50 = rarefy(.,50)) %>% #get the ES50 rarefied richness based on Hurlbert's (1971) formulation.
select(match(f5_all$taxa,names(sp)),ES50) %>% ##select taxa that occur in >5% of samples
gather(sp,abund,-ES50) ##convert into long format
## Warning in rarefy(., 50): Requested 'sample' was larger than smallest site
## maximum (1)
sensi5_all <-
sp3 %>%
group_by(sp) %>%
dplyr::filter(abund>0) %>%
arrange(ES50) %>%
mutate(p=percent_rank(cumsum(abund))) %>%
slice(which.min(abs(p - .05))) %>%
data.frame(.) %>%
select(ES50,sp) %>%
rename(sensitivity = ES50) %>%
arrange(sensitivity) %>%
print()
## sensitivity sp
## 1 2.976488 austrohelice crassa
## 2 3.419637 potamopyrgus
## 3 3.660779 nereididae
## 4 3.769189 spionidae all
## 5 3.805448 amphibola crenata
## 6 3.880534 amphipoda
## 7 3.974684 hemiplax hirtipes
## 8 4.000000 arthritica bifurca
## 9 4.000000 austrovenus stutchburyi
## 10 4.000000 oligochaeta
## 11 4.000000 paphies australis
## 12 4.139791 isopoda
## 13 4.401274 cossura consimilis
## 14 4.629846 capitellidae
## 15 4.649613 halicarcinus
## 16 4.651881 mysida
## 17 4.874798 edwardsia
## 18 4.959281 paraonidae
## 19 5.000000 cominella glandiformis
## 20 5.000000 diloma
## 21 5.000000 heteromastus filiformis
## 22 5.000000 macomona liliana
## 23 5.000000 nemertea
## 24 5.000000 nephtyidae
## 25 5.112769 nematoda
## 26 5.203340 orbiniidae all
## 27 5.396116 glyceridae
## 28 5.777435 notoacmea
## 29 6.000000 cumacea
## 30 6.000000 maldanidae
## 31 6.000000 zeacumantus
## 32 6.136017 syllidae
## 33 6.302414 actiniidae
## 34 6.333013 magelona
## 35 6.833064 hiatula
## 36 6.866387 goniadidae
## 37 7.000000 nuculidae
write.csv(sensi5_all,'sensitivity_5%_all.csv',row.names = F)##write to csv file
###Calculate BQI and compare results to Nigel's calculator####
###Get data on species abundance only and cesymr for Tasman disctrict only
dat <- mtm %>% select(cesymr,council,dorvilleidae:`lunella smaragda`) %>%
dplyr::filter(council=='tasmandistrictcouncil') %>%
select(-council)
####Calculate richness and abundace
dat1 <- dat %>%
mutate(S = rowSums(.[,-1] > 0),##richness
N = rowSums(.[,-1])) %>% ###abundace
select(N,S,cesymr)###collate cesymr data
###Convert data into long format and select taxa with sensitivity values only
BQI_data_tas <- dat %>%
dplyr::select(match(f10$taxa,names(dat)),cesymr) %>%
gather(sp,abund,-cesymr) %>%
left_join(sensi10,by='sp') %>%
left_join(dat1,by='cesymr')
BQI <-
BQI_data_tas %>%
group_by(cesymr) %>%
mutate(v1 = (abund/ #abundace of a given taxa
sum(abund))*##total abundance of taxa with sensitivity values
sensitivity) %>%
summarise(BQI = sum(v1)* ##sum of part 1 at the replicate level
dplyr::first(log10(S+1))*#log of total ichness at replicate level
(dplyr::first(N) / (dplyr::first(N)+5)),##abundance correction factor
N=mean(N))
write.csv(BQI,'bqi_tasman.csv',row.names = F)
##Add BQI index to original dataset
mtm1 <- left_join(dat,BQI,by = 'cesymr')
theme_set(theme_bw(base_size = 12))
###Read Anna's results from Nigel's calculator
library(readxl)
tas=read_excel('tas.xlsx') %>%
bind_cols(BQI)
ggplot(tas,aes(x=`BQI-new`,y=BQI))+
geom_point()+
xlab('BQI-R calculator')+
ylab('BQI-Excel calculator')+
geom_abline(intercept = 0,slope = 1,col=2)

##there is a small discrepancy between the two indices calculator, maybe a mismatch in the number of dp?
ggplot(tas,aes(x=N,y=`Total abundance (N)`))+
geom_point()+
xlab('N-R calculator')+
ylab('N-Excel calculator')+
geom_abline(intercept = 0,slope = 1,col=2)

##Total abundance match exactly
####Calculate BQI for the whole dataset and using different sensitivity values: sensi5_all, sensi10 and sensi5####
##BQI sensi10####
dat_all <- mtm %>% select(cesymr,dorvilleidae:`lunella smaragda`)
####Calculate richness and abundace
dat_div_all<- dat_all %>%
mutate(S = rowSums(.[,-1] > 0),##richness
N = rowSums(.[,-1])) %>% ###abundace
select(N,S,cesymr)###collate cesymr data
###Convert data into long format and select taxa with sensitivity values only
BQI_dat_10 <- dat_all %>%
dplyr::select(match(f10$taxa,names(dat_all)),cesymr) %>%
gather(sp,abund,-cesymr) %>%
left_join(sensi10,by='sp') %>%
left_join(dat_div_all,by='cesymr')
BQI_all_10 <-
BQI_dat_10 %>%
group_by(cesymr) %>%
mutate(v1 = (abund/ #abundace of a given taxa
sum(abund))*##total abundance of taxa with sensitivity values
sensitivity) %>%
summarise(BQI_10 = sum(v1)* ##sum of part 1 at the replicate level
dplyr::first(log10(S+1))*#log of total ichness at replicate level
(dplyr::first(N) / (dplyr::first(N)+5)))
####Sensi5#####
###Convert data into long format and select taxa with sensitivity values only
BQI_dat_5 <- dat_all %>%
dplyr::select(match(f5$taxa,names(dat_all)),cesymr) %>%
gather(sp,abund,-cesymr) %>%
left_join(sensi5,by='sp') %>%
left_join(dat_div_all,by='cesymr')
BQI_all_5<-
BQI_dat_5 %>%
group_by(cesymr) %>%
mutate(v1 = (abund/ #abundace of a given taxa
sum(abund))*##total abundance of taxa with sensitivity values
sensitivity) %>%
summarise(BQI_5 = sum(v1)* ##sum of part 1 at the replicate level
dplyr::first(log10(S+1))*#log of total ichness at replicate level
(dplyr::first(N) / (dplyr::first(N)+5)))
####Sensi5_all#####
###Convert data into long format and select taxa with sensitivity values only
BQI_dat_5all <- dat_all %>%
dplyr::select(match(f5_all$taxa,names(dat_all)),cesymr) %>%
gather(sp,abund,-cesymr) %>%
left_join(sensi5_all,by='sp') %>%
left_join(dat_div_all,by='cesymr')
BQI_all_5all<-
BQI_dat_5all %>%
group_by(cesymr) %>%
mutate(v1 = (abund/ #abundace of a given taxa
sum(abund))*##total abundance of taxa with sensitivity values
sensitivity) %>%
summarise(BQI_5all = sum(v1)* ##sum of part 1 at the replicate level
dplyr::first(log10(S+1))*#log of total ichness at replicate level
(dplyr::first(N) / (dplyr::first(N)+5)))
BQI <- full_join(BQI_all_10,BQI_all_5,by='cesymr') %>%
full_join(BQI_all_5all,by='cesymr')
names(BQI)
## [1] "cesymr" "BQI_10" "BQI_5" "BQI_5all"
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y,use='complete.obs'))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
pairs(BQI[,-1],upper.panel = panel.cor)

mtm_BQI <- full_join(mtm,BQI,by='cesymr') %>%
select(sedlt63:As,BQI_10:BQI_5all) %>%
gather(var,value,-c(BQI_10:BQI_5all))
theme_set(theme_bw(base_size = 8))
ggplot(mtm_BQI,aes(x=value,y=BQI_10))+
geom_point(alpha=.3)+
geom_smooth()+
facet_wrap(~var,scales = 'free')
## Warning: Removed 60627 rows containing non-finite values (stat_smooth).
## Warning: Removed 60627 rows containing missing values (geom_point).

ggplot(mtm_BQI,aes(x=value,y=BQI_5))+
geom_point(alpha=.3)+
geom_smooth()+
facet_wrap(~var,scales = 'free')
## Warning: Removed 60627 rows containing non-finite values (stat_smooth).
## Warning: Removed 60627 rows containing missing values (geom_point).

ggplot(mtm_BQI,aes(x=value,y=BQI_5all))+
geom_point(alpha=.3)+
geom_smooth()+
facet_wrap(~var,scales = 'free')
## Warning: Removed 60627 rows containing non-finite values (stat_smooth).
## Warning: Removed 60627 rows containing missing values (geom_point).

library(broom)
## Warning: package 'broom' was built under R version 3.1.3
BQI_fit <- full_join(mtm,BQI,by='cesymr') %>%
select(sedlt63,BQI_10:BQI_5all) %>%
gather(var,value,-sedlt63) %>%
group_by(var) %>%
do(fit = lm(value ~ sedlt63, data = .))
tidy(BQI_fit, fit)
## Source: local data frame [6 x 6]
## Groups: var [3]
##
## var term estimate std.error statistic p.value
## (chr) (chr) (dbl) (dbl) (dbl) (dbl)
## 1 BQI_10 (Intercept) 5.11950517 0.0217597008 235.27461 0.000000e+00
## 2 BQI_10 sedlt63 -0.02856288 0.0007024158 -40.66378 0.000000e+00
## 3 BQI_5 (Intercept) 5.13555946 0.0218196437 235.36404 0.000000e+00
## 4 BQI_5 sedlt63 -0.02885671 0.0007043508 -40.96923 0.000000e+00
## 5 BQI_5all (Intercept) 4.63914925 0.0202129928 229.51323 0.000000e+00
## 6 BQI_5all sedlt63 -0.02555916 0.0006524872 -39.17190 9.417516e-305
glance(BQI_fit, fit)
## Source: local data frame [3 x 12]
## Groups: var [3]
##
## var r.squared adj.r.squared sigma statistic p.value df
## (chr) (dbl) (dbl) (dbl) (dbl) (dbl) (int)
## 1 BQI_10 0.1849488 0.1848370 1.360661 1653.543 0.000000e+00 2
## 2 BQI_5 0.1872156 0.1871041 1.364409 1678.478 0.000000e+00 2
## 3 BQI_5all 0.1739442 0.1738308 1.263944 1534.438 9.417516e-305 2
## Variables not shown: logLik (dbl), AIC (dbl), BIC (dbl), deviance (dbl),
## df.residual (int)
BQI5_mud <- full_join(mtm,BQI,by='cesymr') %>%
select(sedlt63,BQI_5,council)
library(viridis)
## Warning: package 'viridis' was built under R version 3.1.3
ggplot(BQI5_mud,aes(sedlt63,BQI_5)) +
geom_point(alpha=.5,aes(color=council)) +
geom_smooth(method='lm')+
scale_color_viridis(discrete=T,name = 'Council') +
xlab(~ "Percentage mud (<63 "*mu * "m)") +
ylab('BQI')
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).
