Import data

Data was obtained from the PacIOOS buoy in Hilo, Hawaii.

start_2018 <- read.csv("wqb_05_8418_122b_46c2.csv")
start_2010 <- read.csv("wqb_04_73f9_6371_d07e.csv")
units <- start_2010[1,]
print(units)
##   time temperature salinity turbidity chlorophyll oxygen oxygen_saturation
## 1  UTC     Celsius        1         1        ug/L   mg/L                 %

Cleaning data

clean_2010 <- start_2010 %>%
  dplyr::filter(row_number() > 1) %>%
  mutate(components = str_split(time, "[-T:Z]")) %>%
  mutate(year = sapply(components, `[`, 1),
         month = sapply(components, `[`, 2),
         day = sapply(components, `[`, 3),
         hour = sapply(components, `[`, 4),
         minute = sapply(components, `[`, 5),
         second = sapply(components, `[`, 6)) %>%
  dplyr::select(-time,-components) %>%
  mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>%
  mutate(time = paste(hour, minute, second, sep = ":")) %>%
  dplyr::select(
    year, date, time, temperature, salinity, turbidity, 
    chlorophyll, oxygen, oxygen_saturation
  )

clean_2018 <- start_2018 %>%
  dplyr::filter(row_number() > 1) %>%
  mutate(components = str_split(time, "[-T:Z]")) %>%
  mutate(year = sapply(components, `[`, 1),
         month = sapply(components, `[`, 2),
         day = sapply(components, `[`, 3),
         hour = sapply(components, `[`, 4),
         minute = sapply(components, `[`, 5),
         second = sapply(components, `[`, 6)) %>%
  dplyr::select(-time,-components) %>%
  mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>%
  mutate(time = paste(hour, minute, second, sep = ":")) %>%
  dplyr::select(
    year, date, time, temperature, salinity, turbidity, 
    chlorophyll, oxygen, oxygen_saturation
  )

buoy <- bind_rows(clean_2010, clean_2018)

buoy <- buoy %>%
  mutate(
    year = as.integer(year),
    date = as.Date(date),
    temperature = as.numeric(temperature),
    salinity = as.numeric(salinity),
    turbidity = as.numeric(turbidity),
    chlorophyll = as.numeric(chlorophyll),
    oxygen = as.numeric(oxygen),
    oxygen_saturation = as.numeric(oxygen_saturation)
  )

average_by_day <- buoy %>%
  dplyr::group_by(date) %>%
  dplyr::summarize(
    temperature = mean(temperature),
    salinity = mean(salinity),
    turbidity = mean(turbidity),
    chlorophyll = mean (chlorophyll),
    oxygen = mean(oxygen), 
    oxygen_saturation = mean(oxygen_saturation)
  )

Plot time-series

Pick out outliers! Insturments are not perfect..

plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
  add_trace(x = ~date, y = ~temperature, text = ~format(date, "%Y-%m-%d"), name = "")%>%
  layout(showlegend = F, title='Temperature at Buoy',
         xaxis = list(rangeslider = list(visible = T)),
         shapes = list(
           list(type = 'line', 
                y0 = quantile(average_by_day$temperature, 0.25, na.rm = TRUE), 
                y1 = quantile(average_by_day$temperature, 0.25, na.rm = TRUE), 
                x0 = min(average_by_day$date, na.rm = TRUE),
                x1 = max(average_by_day$date, na.rm = TRUE),
                line = list(color = 'red', width = 1)),
           list(type = 'line', 
                y0 = quantile(average_by_day$temperature, 0.5, na.rm = TRUE), 
                y1 = quantile(average_by_day$temperature, 0.5, na.rm = TRUE), 
                x0 = min(average_by_day$date, na.rm = TRUE),
                x1 = max(average_by_day$date, na.rm = TRUE),
                line = list(color = 'blue', width = 1)),
           list(type = 'line', 
                y0 = quantile(average_by_day$temperature, 0.75, na.rm = TRUE), 
                y1 = quantile(average_by_day$temperature, 0.75, na.rm = TRUE), 
                x0 = min(average_by_day$date, na.rm = TRUE),
                x1 = max(average_by_day$date, na.rm = TRUE),
                line = list(color = 'red', width = 1))
         )
         )
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
  add_trace(x = ~date, y = ~salinity, text = ~format(date, "%Y-%m-%d"), name = "")%>%
  layout(showlegend = F, title='Salinity at Buoy',
         xaxis = list(rangeslider = list(visible = T)),
         shapes = list(
           list(type = 'line', 
                y0 = quantile(average_by_day$salinity, 0.25, na.rm = TRUE), 
                y1 = quantile(average_by_day$salinity, 0.25, na.rm = TRUE), 
                x0 = min(average_by_day$date, na.rm = TRUE),
                x1 = max(average_by_day$date, na.rm = TRUE),
                line = list(color = 'red', width = 1)),
           list(type = 'line', 
                y0 = quantile(average_by_day$salinity, 0.5, na.rm = TRUE), 
                y1 = quantile(average_by_day$salinity, 0.5, na.rm = TRUE), 
                x0 = min(average_by_day$date, na.rm = TRUE),
                x1 = max(average_by_day$date, na.rm = TRUE),
                line = list(color = 'blue', width = 1)),
           list(type = 'line', 
                y0 = quantile(average_by_day$salinity, 0.75, na.rm = TRUE), 
                y1 = quantile(average_by_day$salinity, 0.75, na.rm = TRUE), 
                x0 = min(average_by_day$date, na.rm = TRUE),
                x1 = max(average_by_day$date, na.rm = TRUE),
                line = list(color = 'red', width = 1))
         )
         )
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
  add_trace(x = ~date, y = ~turbidity, text = ~format(date, "%Y-%m-%d"), name = "")%>%
  layout(showlegend = F, title='Turbidity at Buoy',
         xaxis = list(rangeslider = list(visible = T)))  
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
  add_trace(x = ~date, y = ~chlorophyll, text = ~format(date, "%Y-%m-%d"), name = "")%>%
  layout(showlegend = F, title='Chlorophyll at Buoy',
         xaxis = list(rangeslider = list(visible = T)))
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
  add_trace(x = ~date, y = ~oxygen, text = ~format(date, "%Y-%m-%d"), name = "")%>%
  layout(showlegend = F, title='Oxygen at Buoy',
         xaxis = list(rangeslider = list(visible = T)))
plot_ly(average_by_day, type = 'scatter', mode = 'lines')%>%
  add_trace(x = ~date, y = ~oxygen_saturation, text = ~format(date, "%Y-%m-%d"), name = "")%>%
  layout(showlegend = F, title='Oxygen saturation at Buoy',
         xaxis = list(rangeslider = list(visible = T)))
temp <- ggplot(average_by_day, aes(x=temperature)) +
    geom_density()

sal <- ggplot(average_by_day, aes(x=salinity)) +
    geom_density()

turb <- ggplot(average_by_day, aes(x=turbidity)) +
    geom_density()

chl <- ggplot(average_by_day, aes(x=chlorophyll)) +
    geom_density()

o2 <- ggplot(average_by_day, aes(x=oxygen)) +
    geom_density()

o2_sat <- ggplot(average_by_day, aes(x=oxygen_saturation)) +
    geom_density()

grid.arrange(temp, sal, turb, chl, o2, o2_sat, ncol = 3, nrow = 2)