This report explores high-frequency sensor data from:
The objective is to examine how CDOM relates to other sensor-based water quality variables using:
This document is structured for reproducibility and portfolio presentation.
Limitations
Place files in the following structure:
project/
sensor_cdom_atl_bos_portfolio.Rmd
data/
dataATL.csv
dataBOS2.csv
dataATL <- read.csv("~/Library/CloudStorage/OneDrive-FloridaInternationalUniversity/PhD data/Sensor data/dataATL.csv")
dataBOS <- read.csv("~/Library/CloudStorage/OneDrive-FloridaInternationalUniversity/PhD data/Sensor data/dataBOS2.csv")
glimpse(dataATL)
## Rows: 58,999
## Columns: 16
## $ City <chr> "Atlanta", "Atlanta", "Atlanta", "Atlanta", "Atlanta", "A…
## $ Site <chr> "SR02", "SR02", "SR02", "SR02", "SR02", "SR02", "SR02", "…
## $ DATE <chr> "2021-12-17T00:00:00Z", "2021-12-17T00:00:00Z", "2021-12-…
## $ TIME <chr> "1899-12-31T17:15:01Z", "1899-12-31T17:30:01Z", "1899-12-…
## $ Time_cor <chr> "1899-12-31T17:15:00Z", "1899-12-31T17:30:00Z", "1899-12-…
## $ DT <chr> "2021-12-17T17:15:00Z", "2021-12-17T17:30:00Z", "2021-12-…
## $ Temp_deg_C <dbl> 14.06, 14.07, 14.07, 14.07, 14.06, 14.04, 14.03, 14.02, 1…
## $ pH_units <dbl> 7.46, 7.42, 7.42, 7.42, 7.42, 7.42, 7.42, 7.42, 7.43, 7.4…
## $ SpCond_uS.cm <dbl> 143.3, 143.4, 143.3, 143.3, 143.3, 143.3, 143.2, 143.3, 1…
## $ HDO_mg.l <dbl> 9.21, 9.19, 9.17, 9.13, 9.13, 9.11, 9.09, 9.07, 9.06, 9.0…
## $ HDO_.Sat <dbl> 91.8, 91.7, 91.5, 91.1, 91.0, 90.8, 90.6, 90.4, 90.3, 90.…
## $ CDOM_ppb <dbl> 15.34, 15.44, 15.61, 15.66, 15.82, 15.96, 16.02, 16.12, 1…
## $ OB_ppb <dbl> 0.12, 0.14, 0.14, 0.14, 0.17, 0.17, 0.16, 0.17, 0.17, 0.1…
## $ Int_Batt_V <dbl> 7.46, 7.39, 7.26, 7.17, 7.12, 7.04, 7.09, 7.12, 7.06, 7.0…
## $ Turb_NTU <dbl> 4.46, 4.36, 4.40, 4.96, 4.31, 4.38, 4.35, 4.68, 4.57, 4.3…
## $ Turb_FNU <dbl> 4.46, 4.36, 4.40, 4.96, 4.31, 4.38, 4.35, 4.68, 4.57, 4.3…
glimpse(dataBOS)
## Rows: 93,481
## Columns: 12
## $ site <chr> "StonyBrook", "StonyBrook", "StonyBrook", "StonyBrook", "Ston…
## $ datetime <chr> "2021-11-23T12:00:00Z", "2021-11-23T12:15:00Z", "2021-11-23T1…
## $ date <chr> "2021-11-23T00:00:00Z", "2021-11-23T00:00:00Z", "2021-11-23T0…
## $ time <dbl> 0.5000000, 0.5104167, 0.5208333, 0.5312500, 0.5416667, 0.5520…
## $ temp_c <dbl> 5.39, 5.35, 5.36, 5.36, 5.36, 5.35, 5.35, 5.34, 5.33, 5.33, 5…
## $ pH <dbl> 7.22, 7.28, 7.21, 7.18, 7.18, 7.18, 7.18, 7.18, 7.18, 7.17, 7…
## $ spcond <dbl> 299.5, 298.9, 298.9, 299.0, 299.0, 298.9, 298.9, 298.9, 299.0…
## $ do_mgl <dbl> 12.09, 11.90, 11.81, 11.82, 11.82, 11.82, 11.81, 11.82, 11.80…
## $ do_sat <dbl> 96.0, 94.4, 93.7, 93.8, 93.8, 93.8, 93.7, 93.7, 93.6, 93.4, 9…
## $ cdom_ppb <dbl> 92.88, 96.15, 96.13, 95.83, 96.08, 95.60, 95.88, 95.76, 95.96…
## $ ob_ppb <dbl> 6.54, 7.20, 7.12, 7.12, 7.17, 7.12, 7.12, 7.14, 7.16, 7.12, 7…
## $ BP_mmHg <dbl> 757.5, 757.5, 757.5, 757.5, 757.5, 757.5, 757.5, 757.5, 757.5…
Standardize dates and rename key variables for clarity.
# ATL
atl <- dataATL %>%
dplyr::mutate(DATE = ymd_hms(DATE, quiet = TRUE) %||% ymd(DATE, quiet = TRUE)) %>%
dplyr:: rename(
CDOM = CDOM_ppb,
Conductivity = SpCond_uS.cm,
Temp = Temp_deg_C,
HDO = HDO_.Sat,
OB = OB_ppb,
Time= DATE
)
# BOS
bos <- dataBOS %>%
dplyr:: mutate(date = ymd_hms(date, quiet = TRUE) %||% ymd(date, quiet = TRUE)) %>%
dplyr::rename(
CDOM = cdom_ppb,
Conductivity = spcond,
pH = pH,
HDO = do_sat,
Time= date, Temp = temp_c, OB = ob_ppb
)
make_corrplot <- function(df, vars, title = "Correlation plot") {
df_sel <- df %>%
select(all_of(vars)) %>%
drop_na() %>%
mutate(across(everything(), as.numeric))
R <- cor(df_sel, use = "complete.obs")
corrplot(
R,
method = "color",
type = "upper",
order = "hclust",
addCoef.col = "black",
tl.cex = 0.8,
number.cex = 0.7,
title = title,
mar = c(0, 0, 2, 0)
)
invisible(R)
}
vars_atl <- c("CDOM", "Conductivity", "Temp", "HDO", "OB")
R_atl <- make_corrplot(
atl,
vars = vars_atl,
title = "ATL: Correlation among CDOM and Sensor Variables"
)
vars_bos <- c("CDOM", "Conductivity", "pH", "HDO")
R_bos <- make_corrplot(
bos,
vars = vars_bos,
title = "BOS: Correlation among CDOM and Sensor Variables"
)
# create plot
data <- atl %>% select("Time", "CDOM", "Conductivity", "Temp", "HDO", "OB") %>% na.omit()
# axis properties
minyaxis1 <- floor(min(-1.12,min(data$CDOM)))
maxyaxis1 <- ceiling(max(119.0,max(data$CDOM)))
minyaxis2 <- floor(min(min(data$Conductivity)))
maxyaxis2 <- ceiling(max(max(data$Conductivity)))
minyaxis3 <- floor(min(min(data$Temp)))
maxyaxis3 <- ceiling(max(max(data$Temp)))
minyaxis4 <- floor(min(min(data$HDO)))
maxyaxis4 <- ceiling(max(max(data$HDO)))
minyaxis5 <- floor(min(min(data$OB)))
maxyaxis5 <- ceiling(max(max(data$OB)))
dateRange <- c(min(data$Time), max(data$Time))
plotColors <- c("darkgreen", "orange","black","red", "blue")
pAxisSpacer <- 0.07
plot <- plot_ly(data, x = ~Time) %>%
add_lines(y = ~CDOM,
type="scatter",
mode="lines",
name='CDOM',
line = list(color = plotColors[1], width = 1)
) %>%
add_lines(y = ~Conductivity,
type="scatter",
mode="lines",
name='Conductivity',
yaxis='y2',
line = list(color = plotColors[2], width = 1)
) %>%
add_lines(y = ~Temp,
type="scatter",
mode="lines",
name='Temperature',
yaxis='y3',
line = list(color = plotColors[3], width = 1)
) %>%
add_lines(y = ~HDO,
type="scatter",
mode="lines",
name='HDO%',
yaxis='y4',
line = list(color = plotColors[4], width = 1)
) %>%
add_lines(y = ~OB,
type="scatter",
mode="lines",
name='OB',
yaxis='y5',
line = list(color = plotColors[5], width = 1)
) %>%
layout(
xaxis = list(title = "Date",
domain = c(pAxisSpacer*3, 1),
type = "date",
range = dateRange,
ticks='outside',
zeroline=TRUE,
showline = T),
yaxis = list(title = 'CDOM',
side = "left",
color = plotColors[1],
range = c(minyaxis1,maxyaxis1),
ticks='outside',
dtick = 2,
tick0 = minyaxis1,
tickmode = "linear",
position = 0,
anchor = 'free',
zeroline = F,
showline = T),
yaxis2 = list(title = 'Conductivity',
side = "left",
color = plotColors[2],
range = c(minyaxis2,maxyaxis2),
ticks='outside',
dtick = 10,
tick0 = minyaxis2,
tickmode = "linear",
position = pAxisSpacer,
overlaying = "y",
anchor = 'free',
zeroline=F,
showline = T),
yaxis3 = list(title = 'Temperature',
side = "left",
color = plotColors[3],
range = c(minyaxis3,maxyaxis3),
ticks='outside',
dtick = 10,
tick0 = minyaxis3,
tickmode = "linear",
position = pAxisSpacer*2,
overlaying = "y",
anchor = 'free',
zeroline=F,
showline = T),
yaxis4 = list(title = 'HDO%',
side = "left",
color = plotColors[4],
range = c(minyaxis4,maxyaxis4),
ticks='outside',
dtick = 10,
tick0 = minyaxis4,
tickmode = "linear",
position = pAxisSpacer*2.5,
overlaying = "y",
anchor = 'free',
zeroline=F,
showline = T),
yaxis5= list(title = 'OB',
side = "left",
color = plotColors[5],
range = c(minyaxis5,maxyaxis5),
ticks='outside',
dtick = 10,
tick0 = minyaxis5,
tickmode = "linear",
position = pAxisSpacer*3,
overlaying = "y",
anchor = 'free',
zeroline=F,
showline = T),
legend = list(x=pAxisSpacer*3.5, y= 1),
showlegend = T,
title = list(text = "Correlated ATL variables")
)
plot2<-plot %>%
layout(showlegend = F, title='Time Series ATL',
xaxis = list(rangeslider = list(visible = T),
rangeselector=list(
buttons=list(
list(count=1, label="1m", step="month", stepmode="backward"),
list(count=6, label="6m", step="month", stepmode="backward"),
list(count=1, label="YTD", step="year", stepmode="todate"),
list(step="all")
))))
plot2