setwd("C:/Users/javiera/Dropbox/BQI")
suppressMessages(suppressWarnings(library(readxl)))
suppressMessages(suppressWarnings(library(tidyverse)))

##Read spp abund data####
ambi_dat <- suppressWarnings(read_excel('LumpedAggedInfPCMet2017-01-19_BI.xlsx',na='empty')) %>%
  select(cesymr,dorvilleidae:`lunella smaragda`)

###read EG data###
eg <- read_excel('EG_FFG_Javier.xlsx')

ambi_dat1 <- ambi_dat %>% 
  gather(sp,abund,-cesymr) %>% 
  left_join(eg,by='sp') %>% 
  group_by(cesymr) %>%
  mutate(N = sum(abund)) %>% 
  group_by(cesymr,AMBI_R) %>% 
  mutate(prop=sum(abund)/N) %>% 
  group_by(cesymr,AMBI_R) %>% 
  summarise(eg_prop = mean(prop,na.rm=T)*100) %>% 
  spread(AMBI_R,eg_prop) %>% 
  mutate(AMBI = (0*I + 1.5*II + 3*III + 4.5*IV + 6*V) / 100)


ggplot(ambi_dat1,aes(AMBI))+
  geom_histogram(color=1,fill='gray80')+
  theme_bw()+
  geom_vline(xintercept = c(1.2,3.3,5,6,7),lty=2,col=2)+
  annotate(geom = 'text',x = 0.4, y=900, label = 'Unpolluted')+
  annotate(geom = 'text',x = 2.25, y=900, label = 'Slight')+
  annotate(geom = 'text',x =  4, y=900, label = 'Mean')+
  annotate(geom = 'text',x = 5.5, y=900, label = 'Heavy')+
  annotate(geom = 'text',x = 6.5, y=900, label = 'Bad')+
  ylab('Number of samples')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 13 rows containing non-finite values (stat_bin).

theme_set(theme_bw())
ggplot(ambi_dat1,aes(`<NA>`))+
  geom_histogram(color=1,fill='gray80')+
  theme_bw()+
  xlab('Percentage of unassigned taxa (%)')+
  ylab('Number of samples')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 13 rows containing non-finite values (stat_bin).

ambi_dat2 <- suppressWarnings(read_excel('LumpedAggedInfPCMet2017-01-19_BI.xlsx',na='empty')) %>% 
  dplyr::select(cesymr,sedlt63OOlt2mm,council) %>% 
  left_join(ambi_dat1,by = 'cesymr')

library(viridis)
## Warning: package 'viridis' was built under R version 3.2.5
ggplot(ambi_dat2,aes(x=sedlt63OOlt2mm,y=AMBI)) +
  geom_point(alpha=.5,aes(color=council)) +
  geom_smooth(method='lm')+
  scale_color_viridis(discrete=T,name = 'Council') +
  xlab(~ "Percentage mud (<63 "*mu * "m)") +
  ylab('BQI')+
  geom_hline(yintercept = c(1.2,3.3,5,6,7),lty=2,col=2)+
  annotate(geom = 'text',y = 0.4, x=90, label = 'Unpolluted')+
  annotate(geom = 'text',y = 2.25, x=90, label = 'Slight')+
  annotate(geom = 'text',y =  4, x=90, label = 'Mean')+
  annotate(geom = 'text',y = 5.5, x=90, label = 'Heavy')+
  annotate(geom = 'text',y = 6.5, x=90, label = 'Bad')+
  ylab('AMBI')
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).