Data was obtained from the PacIOOS buoy in Hilo, Hawaii.
start_2018 <- read.csv("wqb_05_8418_122b_46c2.csv")
start_2010 <- read.csv("wqb_04_73f9_6371_d07e.csv")
units <- start_2010[1,]
print(units)
## time temperature salinity turbidity chlorophyll oxygen oxygen_saturation
## 1 UTC Celsius 1 1 ug/L mg/L %
clean_2010 <- start_2010 %>%
dplyr::filter(row_number() > 1) %>%
mutate(components = str_split(time, "[-T:Z]")) %>%
mutate(year = sapply(components, `[`, 1),
month = sapply(components, `[`, 2),
day = sapply(components, `[`, 3),
hour = sapply(components, `[`, 4),
minute = sapply(components, `[`, 5),
second = sapply(components, `[`, 6)) %>%
dplyr::select(-time,-components) %>%
mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>%
mutate(time = paste(hour, minute, second, sep = ":")) %>%
dplyr::select(
year, date, time, temperature, salinity, turbidity,
chlorophyll, oxygen, oxygen_saturation
)
clean_2018 <- start_2018 %>%
dplyr::filter(row_number() > 1) %>%
mutate(components = str_split(time, "[-T:Z]")) %>%
mutate(year = sapply(components, `[`, 1),
month = sapply(components, `[`, 2),
day = sapply(components, `[`, 3),
hour = sapply(components, `[`, 4),
minute = sapply(components, `[`, 5),
second = sapply(components, `[`, 6)) %>%
dplyr::select(-time,-components) %>%
mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>%
mutate(time = paste(hour, minute, second, sep = ":")) %>%
dplyr::select(
year, date, time, temperature, salinity, turbidity,
chlorophyll, oxygen, oxygen_saturation
)
buoy <- bind_rows(clean_2010, clean_2018)
buoy <- buoy %>%
mutate(
year = as.integer(year),
date = as.Date(date),
temperature = as.numeric(temperature),
salinity = as.numeric(salinity),
turbidity = as.numeric(turbidity),
chlorophyll = as.numeric(chlorophyll),
oxygen = as.numeric(oxygen),
oxygen_saturation = as.numeric(oxygen_saturation)
)
average_by_day <- buoy %>%
dplyr::group_by(date) %>%
dplyr::summarize(
temperature = mean(temperature),
salinity = mean(salinity),
turbidity = mean(turbidity),
chlorophyll = mean (chlorophyll),
oxygen = mean(oxygen),
oxygen_saturation = mean(oxygen_saturation)
)
Pick out outliers! Insturments are not perfect..
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
add_trace(x = ~date, y = ~temperature, text = ~format(date, "%Y-%m-%d"), name = "")%>%
layout(showlegend = F, title='Temperature at Buoy',
xaxis = list(rangeslider = list(visible = T)),
shapes = list(
list(type = 'line',
y0 = quantile(average_by_day$temperature, 0.25, na.rm = TRUE),
y1 = quantile(average_by_day$temperature, 0.25, na.rm = TRUE),
x0 = min(average_by_day$date, na.rm = TRUE),
x1 = max(average_by_day$date, na.rm = TRUE),
line = list(color = 'red', width = 1)),
list(type = 'line',
y0 = quantile(average_by_day$temperature, 0.5, na.rm = TRUE),
y1 = quantile(average_by_day$temperature, 0.5, na.rm = TRUE),
x0 = min(average_by_day$date, na.rm = TRUE),
x1 = max(average_by_day$date, na.rm = TRUE),
line = list(color = 'blue', width = 1)),
list(type = 'line',
y0 = quantile(average_by_day$temperature, 0.75, na.rm = TRUE),
y1 = quantile(average_by_day$temperature, 0.75, na.rm = TRUE),
x0 = min(average_by_day$date, na.rm = TRUE),
x1 = max(average_by_day$date, na.rm = TRUE),
line = list(color = 'red', width = 1))
)
)
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
add_trace(x = ~date, y = ~salinity, text = ~format(date, "%Y-%m-%d"), name = "")%>%
layout(showlegend = F, title='Salinity at Buoy',
xaxis = list(rangeslider = list(visible = T)),
shapes = list(
list(type = 'line',
y0 = quantile(average_by_day$salinity, 0.25, na.rm = TRUE),
y1 = quantile(average_by_day$salinity, 0.25, na.rm = TRUE),
x0 = min(average_by_day$date, na.rm = TRUE),
x1 = max(average_by_day$date, na.rm = TRUE),
line = list(color = 'red', width = 1)),
list(type = 'line',
y0 = quantile(average_by_day$salinity, 0.5, na.rm = TRUE),
y1 = quantile(average_by_day$salinity, 0.5, na.rm = TRUE),
x0 = min(average_by_day$date, na.rm = TRUE),
x1 = max(average_by_day$date, na.rm = TRUE),
line = list(color = 'blue', width = 1)),
list(type = 'line',
y0 = quantile(average_by_day$salinity, 0.75, na.rm = TRUE),
y1 = quantile(average_by_day$salinity, 0.75, na.rm = TRUE),
x0 = min(average_by_day$date, na.rm = TRUE),
x1 = max(average_by_day$date, na.rm = TRUE),
line = list(color = 'red', width = 1))
)
)
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
add_trace(x = ~date, y = ~turbidity, text = ~format(date, "%Y-%m-%d"), name = "")%>%
layout(showlegend = F, title='Turbidity at Buoy',
xaxis = list(rangeslider = list(visible = T)))
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
add_trace(x = ~date, y = ~chlorophyll, text = ~format(date, "%Y-%m-%d"), name = "")%>%
layout(showlegend = F, title='Chlorophyll at Buoy',
xaxis = list(rangeslider = list(visible = T)))
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
add_trace(x = ~date, y = ~oxygen, text = ~format(date, "%Y-%m-%d"), name = "")%>%
layout(showlegend = F, title='Oxygen at Buoy',
xaxis = list(rangeslider = list(visible = T)))
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
add_trace(x = ~date, y = ~oxygen_saturation, text = ~format(date, "%Y-%m-%d"), name = "")%>%
layout(showlegend = F, title='Oxygen saturation at Buoy',
xaxis = list(rangeslider = list(visible = T)))
temp <- ggplot(average_by_day, aes(x=temperature)) +
geom_density()
sal <- ggplot(average_by_day, aes(x=salinity)) +
geom_density()
turb <- ggplot(average_by_day, aes(x=turbidity)) +
geom_density()
chl <- ggplot(average_by_day, aes(x=chlorophyll)) +
geom_density()
o2 <- ggplot(average_by_day, aes(x=oxygen)) +
geom_density()
o2_sat <- ggplot(average_by_day, aes(x=oxygen_saturation)) +
geom_density()
grid.arrange(temp, sal, turb, chl, o2, o2_sat, ncol = 3, nrow = 2)