# Load required libraries
pacman::p_load(pacman, dplyr, ggplot2, lubridate, gridExtra, viridis, tidyr, maps)

# Load the dataset
data <- read.csv("SAOcean20190107_20190122.csv")

# Rename the time column to match the expected format
names(data)[names(data) == "TIME_UTC_hh.mm.ss"] <- "TIME_UTC_hh:mm:ss"

# Convert date and time to proper datetime format
data$Datetime <- lubridate::dmy(data$DATE_UTC__ddmmyyyy) +
  hms::as_hms(data$`TIME_UTC_hh:mm:ss`)

# Ensure proper numeric types
data <- data %>%
  mutate(
    SST = as.numeric(SST..deg.C.),   # Sea Surface Temperature
    SSS = as.numeric(Sal),           # Salinity
    PCO2_WAT_SST = as.numeric(pCO2_sw.uatm.), # Partial pressure of CO2 in water
    xCO2_equ = as.numeric(xCO2_equ.umol.mol.), # Equilibrium CO2
    Tequ = as.numeric(Tequ..deg.C.) # Equilibrium temperature
  ) %>%
  filter(!is.na(SST) & !is.na(SSS) & !is.na(PCO2_WAT_SST))

# Calculate correlations for relevant plots
correlation_sst_sss <- cor(data$SST, data$SSS, use = "complete.obs")
correlation_sst_pco2 <- cor(data$SST, data$PCO2_WAT_SST, use = "complete.obs")
correlation_tequ_xco2 <- cor(data$Tequ, data$xCO2_equ, use = "complete.obs")

# Plot 1: Global map and survey detail
p1 <- ggplot() +
  borders("world", colour = "#cccccc", fill = "#669900") +
  coord_fixed(ratio = 1) +
  annotate("rect", xmin = -62.2689, xmax = -35.573, ymin = -62.6268, ymax = -51.9062, 
           alpha = 0.3, fill = "#f00000", color = "#800000") +                                               
  labs(caption = "NCEI Accession: 0208364(a)\nStudy Area Location: Southern Ocean\nSpatial:N=-51.9062,E=-35.573,S=-62.6268,W=-62.2689\nTemporal Coverage: 2019-01-07 to 2019-01-22
       SurveyType: Research Cruise\nVessel Name/Code: James Clark Ross/74JC\nVessel Owner: UK-Natural Environment Research Council\nData Visualisation: Patrick Ford 2025") +
  theme_void() 


# Plot 2: Sea Surface Temperature (SST) Across Latitude
p2 <- ggplot(data, aes(x = `SST`, y = LAT_dec_degree)) +
  geom_line(color = "#3399cc") +
  labs(title = "Sea Surface Temperature (SST) Across Latitude", face = "bold",
       y = "Latitude",
       x = "Sea Surface Temperature (°C)\n(at intake depth: 6 m)") +
  theme_minimal()

# Plot 3: Spatial Distribution of Sea Surface Temperature (SST)
p3 <- ggplot(data, aes(x = LONG_dec_degree, y = LAT_dec_degree, color = SST)) +
  geom_point(size = 2) +
  scale_color_viridis_c() +
  labs(title = "Spatial Distribution of Sea Surface Temperature (SST)", face = "bold",
       x = "Longitude",
       y = "Latitude",
       color = "SST (\u00B0C)") +
  theme_minimal() 

# Plot 4: Sea Surface Temperature (SST) Over Time
p4 <- ggplot(data, aes(x = Datetime, y = `SST`)) +
  geom_line(color = "#3399cc") +
  labs(title = "Sea Surface Temperature (SST) Over Time", face = "bold",
       x = "Datetime (UTC)",
       y = "Sea Surface Temperature (°C)\n(at intake depth: 6 m)") +
  theme_minimal()

# Plot 5: Correlation Between SST and Salinity
p5 <- ggplot(data, aes(x = `SST`, y = `SSS`)) +
  geom_point(alpha = 0.5, color = "#ff9933") +
  geom_smooth(method = "lm", se = FALSE, color = "#f00000") +
  labs(
    title = paste0("Correlation btwn SST & Salinity (r = ", round(correlation_sst_sss, 2), ")"),
    x = "Sea Surface Temperature (°C)\n(at intake depth: 6 m)",
    y = "Salinity (PSU)"
  ) +
  theme_minimal()

# Plot 6: Spatial Distribution of Seawater pCOâ‚‚
p6 <- ggplot(data, aes(x = LONG_dec_degree, y = LAT_dec_degree, color = `pCO2_sw.uatm.`)) +
  geom_point(size = 2) +
  scale_color_viridis_c() +
  labs(title = "Spatial Distribution of Seawater pCOâ‚‚", face = "bold",
       x = "Longitude",
       y = "Latitude",
       color = "pCOâ‚‚") +
  theme_minimal() 

# Plot 7: Comparison of Atmospheric and Seawater COâ‚‚
p7 <- ggplot(data) +
  geom_line(aes(x = Datetime, y = pCO2_sw.uatm., color = "Seawater CO2")) +
  geom_line(aes(x = Datetime, y = pCO2_atm.uatm., color = "Atmospheric CO2")) +
  scale_color_manual(values = c("Seawater CO2" = "#0033ff", "Atmospheric CO2" = "#f00000")) +
  labs(title = "Comparison of Atmospheric & Seawater COâ‚‚", face = "bold",
       x = "Datetime (UTC)",
       y = "Partial Pressure of CO₂ (μatm)",
       color = "Variable") +
  theme_minimal() +
  theme(legend.position = "top")

# Plot 8: Correlation Between Seawater pCOâ‚‚ and SST
p8 <- ggplot(data, aes(x = SST, y = PCO2_WAT_SST)) +
  geom_point(alpha = 0.6, color = "#006600") +
  geom_smooth(method = "lm", se = FALSE, color = "#f00000") +
  labs(
    title = paste0("Correlation btwn Seawater pCOâ‚‚ & SST (r = ", round(correlation_sst_pco2, 2), ")"),
    x = "Sea Surface Temperature (°C)\n(at intake depth: 6 m)",
    y = "Seawater pCO₂ (μatm)\n(SST-corrected, wet)"
  ) +
  theme_minimal()

# Plot 9: Temporal Variation of Salinity and Seawater pCOâ‚‚
p9 <- ggplot(data, aes(x = Datetime)) +
  geom_line(aes(y = SSS, color = "Salinity"), linewidth = 1) +
  geom_line(aes(y = PCO2_WAT_SST / 10, color = "Seawater pCOâ‚‚"), linewidth = 1) +
  scale_color_manual(values = c("Salinity" = "#33cc99", "Seawater pCOâ‚‚" = "#0033ff")) +
  scale_y_continuous(
    name = "Salinity (PSU)",
    sec.axis = sec_axis(~ . * 10, name = expression(paste("Seawater ", CO[2], " (", mu, "atm)")))
  ) +
  labs(title = "Temporal Variation of Salinity & Seawater pCOâ‚‚", face = "bold",
       x = "Datetime (UTC)",
       color = "Variable") +
  theme_minimal() +
  theme(legend.position = "top")

# Plot 10: Temporal Variation of Salinity
p10 <- ggplot(data, aes(x = Datetime, y = Sal)) +
  geom_line(color = "#33cc99") +
  labs(title = "Temporal Variation of Salinity", face  = "bold",
       x = "Datetime (UTC)",
       y = "Salinity (PSU)") +
  theme_minimal()

# Plot 11: Correlation Between Equilibrium Temperature and COâ‚‚
p11 <- ggplot(data, aes(x = Tequ, y = xCO2_equ)) +
  geom_point(alpha = 0.5, color = "#003399") +
  geom_smooth(method = "lm", se = FALSE, color = "#f00000") +
  labs(
    title = paste0("Correlation btwn Equilibrium Temperature & COâ‚‚ (r = ", round(correlation_tequ_xco2, 2), ")"),
    x = "Equilibrium Temperature (°C)",
    y = expression(paste("CO"[2], " (", mu, "mol/mol)"))
  ) +
  theme_minimal()

# Plot 12: Normalise SST and Latitude
data$SST_norm <- (data$SST - min(data$SST)) / (max(data$SST) - min(data$SST))
data$LAT_norm <- (data$LAT_dec_degree - min(data$LAT_dec_degree)) / (max(data$LAT_dec_degree) - min(data$LAT_dec_degree))

# Combined plot for Sea temp over time and latitude (with normalisation)
p12 <- ggplot(data, aes(x = Datetime)) +
  geom_line(aes(y = SST_norm, color = "SST"), linewidth = 1) +
  geom_line(aes(y = LAT_norm, color = "Latitude"), linewidth = 1, linetype = "dashed") +
  scale_color_manual(values = c("SST" = "#330099", "Latitude" = "#3399cc")) +
  labs(title = "SST & Latitude Over Time (°C) Normalised",
       x = "Datetime (UTC)",
       y = "Normalised Value (°C) ", 
       color = "Variable") +
  theme_minimal() +
theme(legend.position = "top")
  

# Plot 13: Distribution of Key Variables; Note not used
data_long <- data %>%
  pivot_longer(cols = c(SST, PCO2_WAT_SST, SSS),
               names_to = "Variable", values_to = "Value") %>%
  filter(!is.na(Value)) %>%
  mutate(Value = as.numeric(Value))

p13 <- ggplot(data_long, aes(x = Variable, y = Value, fill = Variable)) +
  geom_boxplot(outlier.color = "#f00000", alpha = 0.7) +
  scale_fill_viridis_d() +
  labs(title = "Distribution of Key Variables", face = "bold",
       x = "Variable",
       y = "Value") +
  theme_minimal()

# Combine all plots in a grid layout (3 x 4)
grid.arrange(
  p1, p4, p2,
  p3, p12, p11,  
  p6, p7, p8,  
  p9, p10, p5,  
  ncol = 3, nrow = 4
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'