1 Overview

This report explores high-frequency sensor data from:

The objective is to examine how CDOM relates to other sensor-based water quality variables using:

  1. Correlation analysis
  2. Interactive time series visualization
  3. Site-level CDOM trend comparison

This document is structured for reproducibility and portfolio presentation.

Limitations


2 Project Structure

Place files in the following structure:

project/
  sensor_cdom_atl_bos_portfolio.Rmd
  data/
    dataATL.csv
    dataBOS2.csv

3 Load Data

dataATL <- read.csv("~/Library/CloudStorage/OneDrive-FloridaInternationalUniversity/PhD data/Sensor data/dataATL.csv")

dataBOS <- read.csv("~/Library/CloudStorage/OneDrive-FloridaInternationalUniversity/PhD data/Sensor data/dataBOS2.csv")

glimpse(dataATL)
## Rows: 58,999
## Columns: 16
## $ City         <chr> "Atlanta", "Atlanta", "Atlanta", "Atlanta", "Atlanta", "A…
## $ Site         <chr> "SR02", "SR02", "SR02", "SR02", "SR02", "SR02", "SR02", "…
## $ DATE         <chr> "2021-12-17T00:00:00Z", "2021-12-17T00:00:00Z", "2021-12-…
## $ TIME         <chr> "1899-12-31T17:15:01Z", "1899-12-31T17:30:01Z", "1899-12-…
## $ Time_cor     <chr> "1899-12-31T17:15:00Z", "1899-12-31T17:30:00Z", "1899-12-…
## $ DT           <chr> "2021-12-17T17:15:00Z", "2021-12-17T17:30:00Z", "2021-12-…
## $ Temp_deg_C   <dbl> 14.06, 14.07, 14.07, 14.07, 14.06, 14.04, 14.03, 14.02, 1…
## $ pH_units     <dbl> 7.46, 7.42, 7.42, 7.42, 7.42, 7.42, 7.42, 7.42, 7.43, 7.4…
## $ SpCond_uS.cm <dbl> 143.3, 143.4, 143.3, 143.3, 143.3, 143.3, 143.2, 143.3, 1…
## $ HDO_mg.l     <dbl> 9.21, 9.19, 9.17, 9.13, 9.13, 9.11, 9.09, 9.07, 9.06, 9.0…
## $ HDO_.Sat     <dbl> 91.8, 91.7, 91.5, 91.1, 91.0, 90.8, 90.6, 90.4, 90.3, 90.…
## $ CDOM_ppb     <dbl> 15.34, 15.44, 15.61, 15.66, 15.82, 15.96, 16.02, 16.12, 1…
## $ OB_ppb       <dbl> 0.12, 0.14, 0.14, 0.14, 0.17, 0.17, 0.16, 0.17, 0.17, 0.1…
## $ Int_Batt_V   <dbl> 7.46, 7.39, 7.26, 7.17, 7.12, 7.04, 7.09, 7.12, 7.06, 7.0…
## $ Turb_NTU     <dbl> 4.46, 4.36, 4.40, 4.96, 4.31, 4.38, 4.35, 4.68, 4.57, 4.3…
## $ Turb_FNU     <dbl> 4.46, 4.36, 4.40, 4.96, 4.31, 4.38, 4.35, 4.68, 4.57, 4.3…
glimpse(dataBOS)
## Rows: 93,481
## Columns: 12
## $ site     <chr> "StonyBrook", "StonyBrook", "StonyBrook", "StonyBrook", "Ston…
## $ datetime <chr> "2021-11-23T12:00:00Z", "2021-11-23T12:15:00Z", "2021-11-23T1…
## $ date     <chr> "2021-11-23T00:00:00Z", "2021-11-23T00:00:00Z", "2021-11-23T0…
## $ time     <dbl> 0.5000000, 0.5104167, 0.5208333, 0.5312500, 0.5416667, 0.5520…
## $ temp_c   <dbl> 5.39, 5.35, 5.36, 5.36, 5.36, 5.35, 5.35, 5.34, 5.33, 5.33, 5…
## $ pH       <dbl> 7.22, 7.28, 7.21, 7.18, 7.18, 7.18, 7.18, 7.18, 7.18, 7.17, 7…
## $ spcond   <dbl> 299.5, 298.9, 298.9, 299.0, 299.0, 298.9, 298.9, 298.9, 299.0…
## $ do_mgl   <dbl> 12.09, 11.90, 11.81, 11.82, 11.82, 11.82, 11.81, 11.82, 11.80…
## $ do_sat   <dbl> 96.0, 94.4, 93.7, 93.8, 93.8, 93.8, 93.7, 93.7, 93.6, 93.4, 9…
## $ cdom_ppb <dbl> 92.88, 96.15, 96.13, 95.83, 96.08, 95.60, 95.88, 95.76, 95.96…
## $ ob_ppb   <dbl> 6.54, 7.20, 7.12, 7.12, 7.17, 7.12, 7.12, 7.14, 7.16, 7.12, 7…
## $ BP_mmHg  <dbl> 757.5, 757.5, 757.5, 757.5, 757.5, 757.5, 757.5, 757.5, 757.5…

4 Data Cleaning

Standardize dates and rename key variables for clarity.

# ATL
atl <- dataATL %>%
  dplyr::mutate(DATE = ymd_hms(DATE, quiet = TRUE) %||% ymd(DATE, quiet = TRUE)) %>%
 dplyr:: rename(
    CDOM = CDOM_ppb,
    Conductivity = SpCond_uS.cm,
    Temp = Temp_deg_C,
    HDO = HDO_.Sat,
    OB = OB_ppb, 
    Time= DATE
  )

# BOS
bos <- dataBOS %>%
 dplyr:: mutate(date = ymd_hms(date, quiet = TRUE) %||% ymd(date, quiet = TRUE)) %>%
  dplyr::rename(
    CDOM = cdom_ppb,
    Conductivity = spcond,
    pH = pH,
    HDO = do_sat, 
    Time= date, Temp = temp_c, OB = ob_ppb
  )

5 Correlation Analysis

5.1 Correlation Plot Function

make_corrplot <- function(df, vars, title = "Correlation plot") {
  df_sel <- df %>%
    select(all_of(vars)) %>%
    drop_na() %>%
    mutate(across(everything(), as.numeric))

  R <- cor(df_sel, use = "complete.obs")

  corrplot(
    R,
    method = "color",
    type = "upper",
    order = "hclust",
    addCoef.col = "black",
    tl.cex = 0.8,
    number.cex = 0.7,
    title = title,
    mar = c(0, 0, 2, 0)
  )

  invisible(R)
}

5.2 ATL Correlations

vars_atl <- c("CDOM", "Conductivity", "Temp", "HDO", "OB")

R_atl <- make_corrplot(
  atl,
  vars = vars_atl,
  title = "ATL: Correlation among CDOM and Sensor Variables"
)

5.3 BOS Correlations

vars_bos <- c("CDOM", "Conductivity", "pH", "HDO")

R_bos <- make_corrplot(
  bos,
  vars = vars_bos,
  title = "BOS: Correlation among CDOM and Sensor Variables"
)


6 Interactive Time Series

6.1 ATL Time Series

# create plot


data <- atl %>% select("Time", "CDOM", "Conductivity", "Temp", "HDO", "OB") %>% na.omit()


# axis properties
minyaxis1 <- floor(min(-1.12,min(data$CDOM)))
maxyaxis1 <- ceiling(max(119.0,max(data$CDOM)))
minyaxis2 <- floor(min(min(data$Conductivity)))
maxyaxis2 <- ceiling(max(max(data$Conductivity)))
minyaxis3 <- floor(min(min(data$Temp)))
maxyaxis3 <- ceiling(max(max(data$Temp)))
minyaxis4 <- floor(min(min(data$HDO)))
maxyaxis4 <- ceiling(max(max(data$HDO)))
minyaxis5 <- floor(min(min(data$OB)))
maxyaxis5 <- ceiling(max(max(data$OB)))

dateRange <- c(min(data$Time), max(data$Time))

plotColors <- c("darkgreen", "orange","black","red", "blue")
pAxisSpacer <- 0.07

plot <- plot_ly(data, x = ~Time) %>%
  add_lines(y = ~CDOM, 
            type="scatter",
            mode="lines",
            name='CDOM',
            line = list(color = plotColors[1], width = 1)
            ) %>%
  add_lines(y = ~Conductivity, 
            type="scatter",
            mode="lines",
            name='Conductivity',
            yaxis='y2', 
            line = list(color = plotColors[2], width = 1)
            ) %>% 
  add_lines(y = ~Temp, 
            type="scatter",
            mode="lines",
            name='Temperature',
            yaxis='y3', 
            line = list(color = plotColors[3], width = 1)
            ) %>% 
  add_lines(y = ~HDO, 
            type="scatter",
            mode="lines",
            name='HDO%',
            yaxis='y4', 
            line = list(color = plotColors[4], width = 1)
            ) %>% 
  add_lines(y = ~OB, 
            type="scatter",
            mode="lines",
            name='OB',
            yaxis='y5', 
            line = list(color = plotColors[5], width = 1)
            ) %>% 
  layout(
    xaxis = list(title = "Date", 
                 domain = c(pAxisSpacer*3, 1), 
                 type = "date",
                 range = dateRange, 
                 ticks='outside', 
                 zeroline=TRUE, 
                 showline = T),
    yaxis = list(title = 'CDOM', 
                 side = "left", 
                 color = plotColors[1], 
                 range = c(minyaxis1,maxyaxis1), 
                 ticks='outside', 
                 dtick = 2, 
                 tick0 = minyaxis1, 
                 tickmode = "linear",
                 position = 0,
                 anchor = 'free', 
                 zeroline = F, 
                 showline = T),
    yaxis2 = list(title = 'Conductivity', 
                  side = "left",
                  color = plotColors[2],
                  range = c(minyaxis2,maxyaxis2), 
                  ticks='outside', 
                  dtick = 10, 
                  tick0 = minyaxis2, 
                  tickmode = "linear", 
                  position = pAxisSpacer, 
                  overlaying = "y",
                  anchor = 'free',
                  zeroline=F, 
                  showline = T),
     yaxis3 = list(title = 'Temperature', 
                  side = "left",
                  color = plotColors[3],
                  range = c(minyaxis3,maxyaxis3), 
                  ticks='outside', 
                  dtick = 10, 
                  tick0 = minyaxis3, 
                  tickmode = "linear", 
                  position = pAxisSpacer*2, 
                  overlaying = "y",
                  anchor = 'free',
                  zeroline=F, 
                  showline = T),
     yaxis4 = list(title = 'HDO%', 
                  side = "left",
                  color = plotColors[4],
                  range = c(minyaxis4,maxyaxis4), 
                  ticks='outside', 
                  dtick = 10, 
                  tick0 = minyaxis4, 
                  tickmode = "linear", 
                  position = pAxisSpacer*2.5, 
                  overlaying = "y",
                  anchor = 'free',
                  zeroline=F, 
                  showline = T),
     yaxis5= list(title = 'OB', 
                  side = "left",
                  color = plotColors[5],
                  range = c(minyaxis5,maxyaxis5), 
                  ticks='outside', 
                  dtick = 10, 
                  tick0 = minyaxis5, 
                  tickmode = "linear", 
                  position = pAxisSpacer*3, 
                  overlaying = "y",
                  anchor = 'free',
                  zeroline=F, 
                  showline = T),
     legend = list(x=pAxisSpacer*3.5, y= 1), 
    showlegend = T,
    title = list(text = "Correlated ATL variables")
  )

plot2<-plot %>%
  layout(showlegend = F, title='Time Series ATL',
         xaxis = list(rangeslider = list(visible = T),
                      rangeselector=list(
                        buttons=list(
                          list(count=1, label="1m", step="month", stepmode="backward"),
                          list(count=6, label="6m", step="month", stepmode="backward"),
                          list(count=1, label="YTD", step="year", stepmode="todate"),
                          list(step="all")
                        ))))
plot2

6.2 BOS Time Series

# ----------------------------
# BOS multi-axis time series (ATL-style)
# ----------------------------

data2 <- bos %>%
  dplyr::select("Time", "CDOM", "Conductivity", "Temp", "HDO", "OB") %>%
  na.omit() 

# axis properties
minyaxis1 <- floor(min(-1.12,min(data2$CDOM)))
maxyaxis1 <- ceiling(max(119.0,max(data2$CDOM)))
minyaxis2 <- floor(min(min(data2$Conductivity)))
maxyaxis2 <- ceiling(max(max(data2$Conductivity)))
minyaxis3 <- floor(min(min(data2$Temp)))
maxyaxis3 <- ceiling(max(max(data2$Temp)))
minyaxis4 <- floor(min(min(data2$HDO)))
maxyaxis4 <- ceiling(max(max(data2$HDO)))
minyaxis5 <- floor(min(min(data2$OB)))
maxyaxis5 <- ceiling(max(max(data2$OB)))

dateRange <- c(min(data2$Time), max(data2$Time))

plotColors <- c("darkgreen", "orange","black","red", "blue")
pAxisSpacer <- 0.07

plot3 <- plot_ly(data2, x = ~Time) %>%
  add_lines(y = ~CDOM, 
            type="scatter",
            mode="lines",
            name='CDOM',
            line = list(color = plotColors[1], width = 1)
            ) %>%
  add_lines(y = ~Conductivity, 
            type="scatter",
            mode="lines",
            name='Conductivity',
            yaxis='y2', 
            line = list(color = plotColors[2], width = 1)
            ) %>% 
  add_lines(y = ~Temp, 
            type="scatter",
            mode="lines",
            name='Temperature',
            yaxis='y3', 
            line = list(color = plotColors[3], width = 1)
            ) %>% 
  add_lines(y = ~HDO, 
            type="scatter",
            mode="lines",
            name='HDO%',
            yaxis='y4', 
            line = list(color = plotColors[4], width = 1)
            ) %>% 
  add_lines(y = ~OB, 
            type="scatter",
            mode="lines",
            name='OB',
            yaxis='y5', 
            line = list(color = plotColors[5], width = 1)
            ) %>% 
  layout(
    xaxis = list(title = "Date", 
                 domain = c(pAxisSpacer*3, 1), 
                 type = "date",
                 range = dateRange, 
                 ticks='outside', 
                 zeroline=TRUE, 
                 showline = T),
    yaxis = list(title = 'CDOM', 
                 side = "left", 
                 color = plotColors[1], 
                 range = c(minyaxis1,maxyaxis1), 
                 ticks='outside', 
                 dtick = 2, 
                 tick0 = minyaxis1, 
                 tickmode = "linear",
                 position = 0,
                 anchor = 'free', 
                 zeroline = F, 
                 showline = T),
    yaxis2 = list(title = 'Conductivity', 
                  side = "left",
                  color = plotColors[2],
                  range = c(minyaxis2,maxyaxis2), 
                  ticks='outside', 
                  dtick = 10, 
                  tick0 = minyaxis2, 
                  tickmode = "linear", 
                  position = pAxisSpacer, 
                  overlaying = "y",
                  anchor = 'free',
                  zeroline=F, 
                  showline = T),
     yaxis3 = list(title = 'Temperature', 
                  side = "left",
                  color = plotColors[3],
                  range = c(minyaxis3,maxyaxis3), 
                  ticks='outside', 
                  dtick = 10, 
                  tick0 = minyaxis3, 
                  tickmode = "linear", 
                  position = pAxisSpacer*2, 
                  overlaying = "y",
                  anchor = 'free',
                  zeroline=F, 
                  showline = T),
     yaxis4 = list(title = 'HDO%', 
                  side = "left",
                  color = plotColors[4],
                  range = c(minyaxis4,maxyaxis4), 
                  ticks='outside', 
                  dtick = 10, 
                  tick0 = minyaxis4, 
                  tickmode = "linear", 
                  position = pAxisSpacer*2.5, 
                  overlaying = "y",
                  anchor = 'free',
                  zeroline=F, 
                  showline = T),
     yaxis5= list(title = 'OB', 
                  side = "left",
                  color = plotColors[5],
                  range = c(minyaxis5,maxyaxis5), 
                  ticks='outside', 
                  dtick = 10, 
                  tick0 = minyaxis5, 
                  tickmode = "linear", 
                  position = pAxisSpacer*3, 
                  overlaying = "y",
                  anchor = 'free',
                  zeroline=F, 
                  showline = T),
     legend = list(x=pAxisSpacer*3.5, y= 1), 
    showlegend = T,
    title = list(text = "Correlated BOS variables")
  )

plot4<-plot3 %>%
  layout(showlegend = F, title='Time Series BOS',
         xaxis = list(rangeslider = list(visible = T),
                      rangeselector=list(
                        buttons=list(
                          list(count=1, label="1m", step="month", stepmode="backward"),
                          list(count=6, label="6m", step="month", stepmode="backward"),
                          list(count=1, label="YTD", step="year", stepmode="todate"),
                          list(step="all")
                        ))))
plot4

7 Reproducibility

sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.2
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plotly_4.11.0   corrplot_0.95   lubridate_1.9.4 forcats_1.0.1  
##  [5] stringr_1.6.0   dplyr_1.1.4     purrr_1.2.0     readr_2.1.6    
##  [9] tidyr_1.3.2     tibble_3.3.0    ggplot2_4.0.1   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       jsonlite_2.0.0     compiler_4.5.2     tidyselect_1.2.1  
##  [5] jquerylib_0.1.4    scales_1.4.0       yaml_2.3.12        fastmap_1.2.0     
##  [9] R6_2.6.1           generics_0.1.4     knitr_1.51         htmlwidgets_1.6.4 
## [13] bslib_0.9.0        pillar_1.11.1      RColorBrewer_1.1-3 tzdb_0.5.0        
## [17] rlang_1.1.6        stringi_1.8.7      cachem_1.1.0       xfun_0.55         
## [21] sass_0.4.10        S7_0.2.1           lazyeval_0.2.2     otel_0.2.0        
## [25] viridisLite_0.4.2  timechange_0.3.0   cli_3.6.5          withr_3.0.2       
## [29] magrittr_2.0.4     crosstalk_1.2.2    digest_0.6.39      grid_4.5.2        
## [33] rstudioapi_0.17.1  hms_1.1.4          lifecycle_1.0.4    vctrs_0.6.5       
## [37] data.table_1.18.0  evaluate_1.0.5     glue_1.8.0         farver_2.1.2      
## [41] httr_1.4.7         rmarkdown_2.30     tools_4.5.2        pkgconfig_2.0.3   
## [45] htmltools_0.5.9