# Load required libraries
pacman::p_load(pacman, dplyr, ggplot2, lubridate, gridExtra, viridis, tidyr, maps)

# Load the dataset
data <- read.csv("SAOcean20191125_20191220.csv")

# Rename the time column to match the expected format
names(data)[names(data) == "TIME_UTC_hh.mm.ss"] <- "TIME_UTC_hh:mm:ss"
  
# Convert date and time to proper datetime format
data$Datetime <- lubridate::dmy(data$DATE_UTC__ddmmyyyy) +
hms::as_hms(data$`TIME_UTC_hh:mm:ss`)
  
# Ensure proper numeric types
  data <- data %>%
    mutate(
      SST = as.numeric(SST..deg.C.),   # Sea Surface Temperature
      SSS = as.numeric(Sal),           # Salinity
      PCO2_WAT_SST = as.numeric(pCO2_sw.uatm.), # Partial pressure of CO2 in water
      xCO2_equ = as.numeric(xCO2_equ.umol.mol.), # Equilibrium CO2
      Tequ = as.numeric(Tequ..deg.C.) # Equilibrium temperature
    ) %>%
    filter(!is.na(SST) & !is.na(SSS) & !is.na(PCO2_WAT_SST))
  
  # Calculate correlations for relevant plots
  correlation_sst_sss <- cor(data$SST, data$SSS, use = "complete.obs")
  correlation_sst_pco2 <- cor(data$SST, data$PCO2_WAT_SST, use = "complete.obs")
  correlation_tequ_xco2 <- cor(data$Tequ, data$xCO2_equ, use = "complete.obs")
  
  # Plot 1: Global map and survey detail
  p1 <- ggplot() +
    borders("world", colour = "#cccccc", fill = "#669900") +
    coord_fixed(ratio = 1) +
    annotate("rect", xmin = -58.4753, xmax = -36.4324, ymin = -58.4359, ymax = -51.9413,
             alpha = 0.3, fill = "#f00000", color = "#800000") +                                               
    labs(caption = "NCEI Accession: 0208364(d)\nStudy Area Location: South Atlantic Ocean\nSpatial:N=-51.9413,E=-58.4753,S=-58.4359,W=-36.4324\nTemporal Coverage: 2019-25-11 to 2019-20-12
       SurveyType: Research Cruise\nVessel Name/Code: James Clark Ross/74JC\nVessel Owner: UK-Natural Environment Research Council\nData Visualisation: Patrick Ford 2025") +
    theme_void() 
  
  # Plot 2: Sea Surface Temperature (SST) Across Latitude
  p2 <- ggplot(data, aes(x = `SST`, y = LAT_dec_degree)) +
    geom_line(color = "#3399cc") +
    labs(title = "Sea Surface Temperature (SST) Across Latitude", face = "bold",
         y = "Latitude",
         x = "Sea Surface Temperature (°C)\n(at intake depth: 6 m)") +
    theme_minimal()
  
  # Plot 3: Spatial Distribution of Sea Surface Temperature (SST)
  p3 <- ggplot(data, aes(x = LONG_dec_degree, y = LAT_dec_degree, color = SST)) +
    geom_point(size = 2) +
    scale_color_viridis_c() +
    labs(title = "Spatial Distribution of Sea Surface Temperature (SST)", face = "bold",
         x = "Longitude",
         y = "Latitude",
         color = "SST (\u00B0C)") +
    theme_minimal() 
  
  # Plot 4: Sea Surface Temperature (SST) Over Time
  p4 <- ggplot(data, aes(x = Datetime, y = `SST`)) +
    geom_line(color = "#3399cc") +
    labs(title = "Sea Surface Temperature (SST) Over Time", face = "bold",
         x = "Datetime (UTC)",
         y = "Sea Surface Temperature (°C)\n(at intake depth: 6 m)") +
    theme_minimal()
  
  # Plot 5: Correlation Between SST and Salinity
  p5 <- ggplot(data, aes(x = `SST`, y = `SSS`)) +
    geom_point(alpha = 0.5, color = "#ff9933") +
    geom_smooth(method = "lm", se = FALSE, color = "#f00000") +
    labs(
      title = paste0("Correlation btwn SST & Salinity (r = ", round(correlation_sst_sss, 2), ")"),
      x = "Sea Surface Temperature (°C)\n(at intake depth: 6 m)",
      y = "Salinity (PSU)"
    ) +
    theme_minimal()
  
  # Plot 6: Spatial Distribution of Seawater pCOâ‚‚
  p6 <- ggplot(data, aes(x = LONG_dec_degree, y = LAT_dec_degree, color = `pCO2_sw.uatm.`)) +
    geom_point(size = 2) +
    scale_color_viridis_c() +
    labs(title = "Spatial Distribution of Seawater pCOâ‚‚", face = "bold",
         x = "Longitude",
         y = "Latitude",
         color = "pCOâ‚‚") +
    theme_minimal() 
  
  # Plot 7: Comparison of Atmospheric and Seawater COâ‚‚
  p7 <- ggplot(data) +
    geom_line(aes(x = Datetime, y = pCO2_sw.uatm., color = "Seawater CO2")) +
    geom_line(aes(x = Datetime, y = pCO2_atm.uatm., color = "Atmospheric CO2")) +
    scale_color_manual(values = c("Seawater CO2" = "#0033ff", "Atmospheric CO2" = "#f00000")) +
    labs(title = "Comparison of Atmospheric & Seawater COâ‚‚", face = "bold",
         x = "Datetime (UTC)",
         y = "Partial Pressure of CO₂ (μatm)",
         color = "Variable") +
    theme_minimal() +
    theme(legend.position = "top")
  
  # Plot 8: Correlation Between Seawater pCOâ‚‚ and SST
  p8 <- ggplot(data, aes(x = SST, y = PCO2_WAT_SST)) +
    geom_point(alpha = 0.6, color = "#006600") +
    geom_smooth(method = "lm", se = FALSE, color = "#f00000") +
    labs(
      title = paste0("Correlation btwn Seawater pCOâ‚‚ & SST (r = ", round(correlation_sst_pco2, 2), ")"),
      x = "Sea Surface Temperature (°C)\n(at intake depth: 6 m)",
      y = "Seawater pCO₂ (μatm)\n(SST-corrected, wet)"
    ) +
    theme_minimal()
  
  # Plot 9: Temporal Variation of Salinity and Seawater pCOâ‚‚
  p9 <- ggplot(data, aes(x = Datetime)) +
    geom_line(aes(y = SSS, color = "Salinity"), linewidth = 1) +
    geom_line(aes(y = PCO2_WAT_SST / 10, color = "Seawater pCOâ‚‚"), linewidth = 1) +
    scale_color_manual(values = c("Salinity" = "#33cc99", "Seawater pCOâ‚‚" = "#0033ff")) +
    scale_y_continuous(
      name = "Salinity (PSU)",
      sec.axis = sec_axis(~ . * 10, name = expression(paste("Seawater ", CO[2], " (", mu, "atm)")))
    ) +
    labs(title = "Temporal Variation of Salinity & Seawater pCOâ‚‚", face = "bold",
         x = "Datetime (UTC)",
         color = "Variable") +
    theme_minimal() +
    theme(legend.position = "top")
  
  # Plot 10: Temporal Variation of Salinity
  p10 <- ggplot(data, aes(x = Datetime, y = Sal)) +
    geom_line(color = "#33cc99") +
    labs(title = "Temporal Variation of Salinity", face  = "bold",
         x = "Datetime (UTC)",
         y = "Salinity (PSU)") +
    theme_minimal()
  
  # Plot 11: Correlation Between Equilibrium Temperature and COâ‚‚
  p11 <- ggplot(data, aes(x = Tequ, y = xCO2_equ)) +
    geom_point(alpha = 0.5, color = "#003399") +
    geom_smooth(method = "lm", se = FALSE, color = "#f00000") +
    labs(
      title = paste0("Correlation btwn Equilibrium Temperature & COâ‚‚ (r = ", round(correlation_tequ_xco2, 2), ")"),
      x = "Equilibrium Temperature (°C)",
      y = expression(paste("CO"[2], " (", mu, "mol/mol)"))
    ) +
    theme_minimal()
  
  # Plot 12: Normalise SST and Latitude
  data$SST_norm <- (data$SST - min(data$SST)) / (max(data$SST) - min(data$SST))
  data$LAT_norm <- (data$LAT_dec_degree - min(data$LAT_dec_degree)) / (max(data$LAT_dec_degree) - min(data$LAT_dec_degree))
  
  # Combined plot for Sea temp over time and latitude (with normalisation)
  p12 <- ggplot(data, aes(x = Datetime)) +
    geom_line(aes(y = SST_norm, color = "SST"), linewidth = 1) +
    geom_line(aes(y = LAT_norm, color = "Latitude"), linewidth = 1, linetype = "dashed") +
    scale_color_manual(values = c("SST" = "#330099", "Latitude" = "#3399cc")) +
    labs(title = "SST & Latitude Over Time (°C) Normalised",
         x = "Datetime (UTC)",
         y = "Normalised Value (°C) ", 
         color = "Variable") +
    theme_minimal() +
    theme(legend.position = "top")
  
  # Plot 13: Distribution of Key Variables; Note not used
  data_long <- data %>%
    pivot_longer(cols = c(SST, PCO2_WAT_SST, SSS),
                 names_to = "Variable", values_to = "Value") %>%
    filter(!is.na(Value)) %>%
    mutate(Value = as.numeric(Value))
  
  p13 <- ggplot(data_long, aes(x = Variable, y = Value, fill = Variable)) +
    geom_boxplot(outlier.color = "#f00000", alpha = 0.7) +
    scale_fill_viridis_d() +
    labs(title = "Distribution of Key Variables", face = "bold",
         x = "Variable",
         y = "Value") +
    theme_minimal()
  
  # Combine all plots in a grid layout (3 x 4)
  grid.arrange(
    p1, p4, p2,
    p3, p12, p11,  
    p6, p7, p8,  
    p9, p10, p5,  
    ncol = 3, nrow = 4
  )
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'