rm(list = ls())
library(readxl)
library(marelac)
library(oce)
library(dplyr)
library(ggplot2)
library(cowplot)
library(ggpubr)
library(tidyr)
library(RColorBrewer)
library(reshape2)
library(tidyverse)
library(openxlsx)
library(metR)
library(scales)
library(gridExtra)
library(patchwork)
######## read in data ##########
t = read_delim("Randomized_1h_Ricart.txt", delim= "\t")
t=data.frame(t)
t$dep_date = as.factor(colsplit(t$deployment, '_', names = c('Dates','Rates','sdf'))[[2]])
######## AOU calculation ##########
t$SG_salinity[is.na(t$SG_salinity)] = mean(t$SG_salinity,na.rm = TRUE) # Replace NA salinity with average Sal
t$NV_salinity[is.na(t$NV_salinity)] = mean(t$NV_salinity,na.rm = TRUE) # Replace NA salinity with average Sal
######## Make delta_X variables ##########
t$delta_DO = t$SG_DO_saturation- t$NV_DO_saturation
t$delta_T = t$SG_temperature_C- t$NV_temperature_C
t$delta_S = t$SG_salinity- t$NV_salinity
t$site = as.factor(t$site)
#### Fix dates ########
t$datetime=as.POSIXct(t$date,format = "%Y-%M-%D") + (t$time_UTC*60*60)
attr(t$datetime, "tzone") <- "UTC"
t$datetime_local = t$datetime
attr(t$datetime_local, "tzone") <- "America/Los_Angeles"
t$hour_local = as.numeric(format(as.POSIXct(t$datetime_local,format="%Y-%M-%D H:%M:%S"),"%H"))
t$daytime <- ifelse(t$datetime >= t$sunrise_time_UTC & t$datetime < t$sunset_time_UTC,"Day","Night")
t$season <- season(t$datetime_local)
t$region <- ifelse(t$site == "NP" | t$site == "MB",("Southern Sites"),("Northern Sites"))
#### Shorten Variable names ########
t <- rename(t,c("SG_TA"='SG_total_alkalinity_mols_kg.3',"NV_TA"='NV_Total_Alkalinity_mols_kg.3',"RMS_vel"='SG_RMS_velocity_m_s.1',"WCH"="Water_height_m_calibrated_with_ADV"))
######## calculate delta[H+] ########
t$SG_H <- 10^-(t$SG_pH) # mol/kg
t$NV_H <- 10^-(t$NV_pH) # mol/kg
t$SG_H <- t$SG_H * 10^9 # nmol/kg
t$NV_H <- t$NV_H * 10^9 # nmol/kg
t$delta_H <- t$SG_H - t$NV_H # nmol/kg
####### Filter data by thresholds ##########
t$delta_H_thresh <- ifelse(t$delta_H > 0 ,("Δ[H+] > 0"),("Δ[H+] < 0")) # binary variable of delta_H > 0
t$delta_DO_thresh <- ifelse(t$delta_DO > 0 ,("ΔDO% > 0"),("ΔDO% < 0")) # binary variable of delta_DO% > 0
t$delta_S_thresh <- ifelse(abs(t$delta_S) < 0.1 ,"|ΔS| < 0.1","|ΔS| > 0.1") # binary variable of |delta_S| < 0.1 (i.e. minimal mixing effect)
t$thresh <- paste(t$delta_DO_thresh ,"&",t$delta_S_thresh) # = 1 if only one condition met, =2 if both conditions met
t$thresh[t$thresh =="NA & |ΔS| < 0.1" | t$thresh =="NA & |ΔS| > 0.1"] = NA
a <- t[complete.cases(t$thresh),] # New data frame, only including rows with Δ[H+], ΔDO, and Δsalinity
# bar chart for threshold (split by region)
summ <- a %>%
group_by(delta_H_thresh,thresh,region) %>%
#group_by(delta_H_thresh,thresh) %>%
summarise(count = n_distinct(delta_H, na.rm = TRUE))
p1 <- ggplot(summ,aes(x=as.factor(delta_H_thresh),y=count,fill= as.factor(thresh))) + facet_wrap(~region,dir="v",scales="fixed")+
geom_bar(position = 'dodge', stat='identity') +
geom_text(aes(label=count),position=position_dodge(width=0.9),vjust=-.25)+
theme_bw()+theme(legend.box.background = element_rect(),legend.position = c(0.7,0.3),panel.border = element_rect(colour = "black", fill=NA),panel.grid = element_blank()) +
geom_hline(yintercept = 0)+
labs(x=NULL,fill="Criteria",y="Count") + ylim(0,750)
p1

# bar chart for threshold (split by season and region)
summ_b <- a %>%
group_by(delta_H_thresh,thresh,season,region) %>%
summarise(count = n_distinct(delta_H, na.rm = TRUE))
p1a <- ggplot(summ_b,aes(x=as.factor(delta_H_thresh),y=count,fill= as.factor(thresh))) + facet_wrap(~ region + season, ncol=4)+
geom_bar(position = 'dodge', stat='identity') +
geom_text(aes(label=count),position=position_dodge(width=0.9),vjust=-.25)+
theme_bw()+theme(legend.box.background = element_rect(),legend.position = c(0.75,0.3),panel.border = element_rect(colour = "black", fill=NA),panel.grid = element_blank()) +
geom_hline(yintercept = 0) + ylim(0,400)+
labs(x=NULL,fill="Criteria",y="Count")
p1a

#### salinity vs [H+]
p2 <- ggplot(data=t)+
geom_point(size=0.7,aes(x=(SG_salinity),y=(SG_H),color=site,shape="SG"))+
geom_point(size=0.7,aes(x=(NV_salinity),y=(NV_H),color=site,shape="NV"))+
theme_bw()+theme(legend.direction = "vertical",legend.box = "vertical",legend.position = "right",panel.border = element_rect(colour = "black", fill=NA),panel.grid = element_blank(),
legend.spacing.y = unit(0, 'cm')) + labs(x="Salinity",y=expression(paste("[H"^{'+'},"]",' (nmol kg'^{-1},')')),color="Site",shape=NULL)+
facet_wrap(nrow=2,~region,scales = "free")
p2

######## Hourly climatology screened by thresholds (for [H+]) ########
hourly_screened <- a %>%
group_by(hour_local,thresh) %>%
dplyr::summarize(mean_SG_H = mean(SG_H, na.rm=TRUE),sd_SG_H = sd(SG_H, na.rm=TRUE),mean_NV_H = mean(NV_H, na.rm=TRUE),sd_NV_H = sd(NV_H, na.rm=TRUE),mean_dH = mean(delta_H, na.rm=TRUE),med_dH = median(delta_H, na.rm=TRUE),sd_dH = sd(delta_H, na.rm=TRUE),n_dH = sum(!is.na(delta_H)))
hourly_screened$se_dH <- hourly_screened$sd_dH/sqrt(hourly_screened$n_dH)
p3 <- ggplot(data=hourly_screened,aes(x=hour_local))+
geom_pointrange(aes(y=mean_dH,ymin=mean_dH-se_dH,ymax=mean_dH+se_dH))+
geom_smooth(aes(y=mean_dH))+
geom_point(aes(y=mean_dH,size=(n_dH)))+
geom_hline(yintercept=0)+
theme_bw()+ theme(legend.position = c(.78,0.36),legend.direction = "vertical",panel.border = element_rect(colour = "black", fill=NA),panel.grid = element_blank()) +
labs(x="Hour",color = NULL,y=expression(paste("Δ[H"^{'+'},"]",' (nmol kg'^{-1},')'))) +
guides(size = 'none')+
facet_wrap(~thresh)
p3

# pH vs delta pH
p4 <- ggplot()+
geom_point(size=0.5,shape=20,aes(x=t$NV_pH,y=-t$delta_pH,color="NV"))+
geom_smooth(method=lm,aes(x=t$NV_pH,y=-t$delta_pH,color="NV"))+
geom_point(size=0.5,shape=1,aes(x=t$SG_pH,y=t$delta_pH,color="SG"))+
geom_smooth(method=lm,aes(x=t$SG_pH,y=t$delta_pH,color="SG"))+ geom_hline(yintercept=0)+
theme_bw()+ theme(legend.position = c(.80,0.15),legend.direction = "vertical",panel.grid = element_blank()) +
labs(x="pH (Either NV or SG)",y="ΔpH",color = NULL) +
geom_text(aes(x=7.8,1),label=expression(paste("NV: slope=0.58 R"^{2},"=0.21")))+
geom_text(aes(x=7.8,0.85),label=expression(paste("SG: slope=0.71 R"^{2},"=0.45")))
p4

# Regression statistics
pval_SG <- summary(lm(t$delta_pH ~ t$SG_pH))$coefficients[2,4]
r2_SG <- summary(lm(t$delta_pH ~ t$SG_pH))$r.squared
slope_SG <- summary(lm(t$delta_pH ~ t$SG_pH))$coefficients[2]
pval_NV <- summary(lm(-t$delta_pH ~ t$NV_pH))$coefficients[2,4]
r2_NV <- summary(lm(-t$delta_pH ~ t$NV_pH))$r.squared
slope_NV <- summary(lm(-t$delta_pH ~ t$NV_pH))$coefficients[2]
######## compile figure ########
ggsave("fig1.png",width=8 , height = 9,dpi=600,plot=ggarrange(p4,p2,p1,p3,
ncol = 2,
nrow = 2,
labels = c("A","B","C","D"),
label.x = 0,
label.y = 1,
hjust = -0.5,
vjust = 1.5,
font.label = list(size = 14, color = "black", face = "bold", family = NULL),
align = c( "v"),
widths = c(1,1,1,1),
heights = c(1,1.2,1.2,1),
legend = NULL,
common.legend = FALSE,
legend.grob = NULL
))