Tidal filter

Author

Victor Munoz S.

1 Time series decomposition

The original time series was divided in a time series decomposition, where the information was divided in seasonal, trend and remainder.

Where it is expected that seasonal + reminder will be expected as tidal component.

Trend is expected as the time series without tide effect.

For reference to the time series decomposition check this: https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/stl

Code
tuk_tbl <- read_excel(here::here("data/csv/Tuksuk Butterworth smoothing.xlsx"), 
                                           skip = 1)

#force the time seires to be every 5 minutes
ideal_z<-zoo(,seq.POSIXt(from = tuk_tbl$`date&time`[1],
                         tuk_tbl$`date&time`[nrow(tuk_tbl)],
                         by = "5 min") %>% as.POSIXct()) 

#makes a ideal TS
tuk_z<-merge(zoo(tuk_tbl$discharge,tuk_tbl$`date&time`),ideal_z) %>% 
  na.approx()

tuk_tbl<-fortify.zoo(tuk_z)

#return the information of frequency of 5 min -> 24hr * 12 times = 288
tuk_ts<-ts(tuk_tbl$tuk_z,frequency = 288)

# tuk_dec<-decompose(tuk_ts,type="additive")

tuk_dec<-stl(tuk_ts, s.window = "periodic")

tuk_seasonal_component <- tuk_dec$time.series[, "seasonal"] + 
  tuk_dec$time.series[, "remainder"]

plot(tuk_dec)

2 Tide Check

To confirm previous assumptions, the tide from Imuruk Basin was obtained from here:

https://tidesandcurrents.noaa.gov/noaatideannual.html?id=9469146

Where records were captued from years 2022 to 2026, this allowed to create a tidal model, which it is continuos and match the 5 minute records as inputs.

I found the location of the tide station here:

https://tidesandcurrents.noaa.gov/stationhome.html?id=9469146

The tidal model is based on this mathematical model:

https://rdrr.io/cran/oce/man/tidem.html

Code
library(oce)

latitude<- 65+7.1/60

years<-2022:2026

tide_tbl<-lapply(years,FUN=function(year){

  #https://tidesandcurrents.noaa.gov/noaatideannual.html?id=9469146
  tide_tbl <- read_table(here::here(paste0("data/csv/9469146_",year,".txt")), 
                         skip = 12)
  
  tide_tbl<-data.frame(
    time=lubridate::fast_strptime(paste0(tide_tbl$Date," ",
                                         tide_tbl$Time," ",
                                         tide_tbl$`Pred(Ft)`),
                                  format = "%Y-%m-%d %H:%M:%S%p") %>% 
      as.POSIXct(),
    depth=tide_tbl$`High/Low`/100)
  
  
}) %>% 
  do.call(rbind,.)

#create the model
tide_model<-tidem(t=tide_tbl$time,x=tide_tbl$depth,latitude = latitude)


predicted_tide_z<-zoo(predict.tidem(tide_model, 
                                    newdata = time(ideal_z)),time(ideal_z))

merge(model=predicted_tide_z,
      obs=zoo(tide_tbl$depth,tide_tbl$time) %>% 
  window(start=start(tuk_z),end=end(tuk_z))) %>% 
  dygraph(main="Tide at Imuruk Basin [m]",ylab="tide [m]",group=1) %>% 
  dySeries("obs",label="observed",col="red",pointSize=5) %>% 
  dygraphs::dyRangeSelector()

3 Lag identification

Lag identification was implemented with a correlation matrix, where the maximum Pearson correlation was identified.

Code
cor_val<-merge(model=zoo(as.numeric(tuk_seasonal_component),time(tuk_z) %>% 
                           as.POSIXct()),
      obs=stats::lag(predicted_tide_z,seq(-12*6*5,12*6*5,6))
      # obs=stats::lag(-predicted_tide_z,-114)
      
      ) %>% 
  data.frame() %>%
  cor(x=.,use = "pairwise.complete.obs",method = "pearson") %>% 
  .[1,-1]

cor_max<-cor_val[match(max(cor_val),cor_val)]

cor_max_lag<-str_remove_all(names(cor_max),"lag.")

data.frame(max_cor=cor_max,
           lag=cor_max_lag) %>% 
  pander::pandoc.table()
  max_cor lag
lag.336 0.6849 336

Curiously, it was found an optimal lag between the seasonal + reminder component of 28 hours (336 time steps) and the tidal model. The maximum Pearson correlation was of 0.68, which is considered as relevant.

4 Comparison between Seasonal + Reminder with the Tide Prediction

It was found that the decomposition was relevant related with the tidal estimation, when the tidal is lagged (28 hours). This proof relevance of the decomposition method as it is correlated with the tidal model with an \(R^2\) of 0.53.

To compare were scaled, which means it was subtracted the mean and divided by their respective standard deviation.

\[ X= \frac{x - \text{mean}}{\text{standard deviation}} \]

Code
merge("seasonal + reminder"=zoo(as.numeric(tuk_seasonal_component),
                                time(tuk_z) %>% as.POSIXct()),
      predicted_tide_adjusted=stats::lag(-predicted_tide_z,336)) %>% 
  scale() %>% 
  dygraph(main="Comparison - values scaled",group=1) %>% 
  dygraphs::dyRangeSelector()
Code
library(ggpmisc)

merge(decomposition=zoo(as.numeric(tuk_seasonal_component),
                        time(tuk_z) %>% as.POSIXct()),
      predicted_tide=stats::lag(-predicted_tide_z,336)) %>% 
  fortify.zoo() %>% 
  dplyr::filter(complete.cases(decomposition)) %>% 
  ggplot(data=.,aes(x=decomposition,y=predicted_tide))+
  geom_point(alpha=0.1)+
  stat_poly_eq(use_label("eq", "adj.R2"),
               formula = y ~ poly(x, 1, raw = TRUE),size=5)+
  geom_smooth(method="lm")+
  labs(x="seasonal + reminder",y="tide prediction")+
  theme_bw()

5 Final Results

The results are related with the original decomposition “trend” component. Daily values are presented as daily averages from hourly values.

Code
library(hydroTSM)

ts_hly_z<-zoo(tuk_dec$time.series[,"trend"],time(tuk_z))

ts_dly_z<-hydroTSM::subdaily2daily(ts_hly_z,FUN=median)  

ts_dly_tbl<-fortify.zoo(ts_dly_z)

fortify.zoo(ts_hly_z) %>% 
  ggplot(data=.,aes(x=Index,y=ts_hly_z))+
  geom_point(data=ts_dly_tbl,
             aes(x=as.POSIXct(Index)+lubridate::hours(12),
                                 y=ts_dly_z),col="red",size=2)+
  geom_line()+
  theme_bw()+
  scale_x_datetime(date_breaks = "2 weeks", date_labels = "%Y-%b-%d")+
  labs(x=NULL,y="units")

Code
ts_dly_tbl %>% 
  dplyr::rename("daily"="ts_dly_z") %>% 
pander::pandoc.table(.,style="simple",
                     caption="daily estimated values")
daily estimated values
Index daily
2024-07-11 13875
2024-07-12 36315
2024-07-13 24028
2024-07-14 37105
2024-07-15 28319
2024-07-16 38117
2024-07-17 22814
2024-07-18 19872
2024-07-19 20231
2024-07-20 34888
2024-07-21 30271
2024-07-22 37934
2024-07-23 26884
2024-07-24 20641
2024-07-25 14826
2024-07-26 24260
2024-07-27 24783
2024-07-28 28864
2024-07-29 30512
2024-07-30 11439
2024-07-31 3613
2024-08-01 28720
2024-08-02 28428
2024-08-03 6729
2024-08-04 22892
2024-08-05 53571
2024-08-06 49565
2024-08-07 30765
2024-08-08 35350
2024-08-09 33599
2024-08-10 28681
2024-08-11 25741
2024-08-12 35458
2024-08-13 18117
2024-08-14 36176
2024-08-15 34474
2024-08-16 17359
2024-08-17 32195
2024-08-18 4637
2024-08-19 50127
2024-08-20 37761
2024-08-21 3691
2024-08-22 31424
2024-08-23 54126
2024-08-24 51791
2024-08-25 26698
2024-08-26 9494
2024-08-27 25989
2024-08-28 32494
2024-08-29 36467
2024-08-30 33092
2024-08-31 29257
2024-09-01 15570
2024-09-02 5988
2024-09-03 13479
2024-09-04 32336
2024-09-05 35537
2024-09-06 38359
2024-09-07 9715
2024-09-08 4362
2024-09-09 32131
2024-09-10 30651
2024-09-11 34089
2024-09-12 30262
2024-09-13 19475
2024-09-14 37902
2024-09-15 26994
2024-09-16 17506
2024-09-17 45275
2024-09-18 28448
2024-09-19 15952
2024-09-20 34481
2024-09-21 27141
2024-09-22 25038
2024-09-23 35801
2024-09-24 12184
2024-09-25 1428
2024-09-26 11433
2024-09-27 31266
2024-09-28 30234
2024-09-29 2795
2024-09-30 17215
2024-10-01 22478
2024-10-02 9204
2024-10-03 39554
2024-10-04 20343
2024-10-05 24804
2024-10-06 34828
2024-10-07 33083
2024-10-08 2820
2024-10-09 9700
2024-10-10 20725
2024-10-11 55275
2024-10-12 58344
2024-10-13 36949
2024-10-14 -14346
2024-10-15 -14009
2024-10-16 8267
2024-10-17 20525
2024-10-18 19259
2024-10-19 12352
2024-10-20 -209.7
2024-10-21 -28705
2024-10-22 12698
2024-10-23 64392
2024-10-24 63910

6 Comparison of results

When comparing the results the values between the Butterworth and the time series decomposition seems similar in values; however, the Butterworth presents more “sinousoidal noise” on the signal.

Code
library(timetk)

butter_info <- read_excel(here::here("data","csv",
                                     "Tuksuk Butterworth smoothing.xlsx"), 
    skip = 1)

butter_hly_z<-butter_info[,1:9] %>% tk_zoo()

ts_comp_z<-merge(discharge=butter_hly_z$discharge,
      ts_decomposition=ts_hly_z,
      butter_hly_z[,-1]) 

plot.zoo(ts_comp_z[,1:6],
         yax.flip = T,main="Comparison of Time series")

Code
ts_comp_z[,2:3] %>%
  dygraph(group=2) %>% 
  dygraphs::dyRangeSelector()
Code
ts_comp_z[,2:5] %>%
  dygraph(group=2) %>% 
  dygraphs::dyRangeSelector()

When comparing the seasonal + reminder from the time series decomposition and the discharge - butterworth (400); comparing both with the tidal model, the relationship is higher with the time series decomposition; which means more data from the tide is captured from that model.

Then, it is recommended to proceed with these results instead of the Butterworth analysis.

Code
merge("ts_decomposition"=zoo(as.numeric(tuk_seasonal_component),
                        time(tuk_z) %>% as.POSIXct()),
      "discharge - butterworth_400"=butter_hly_z$discharge-butter_hly_z$`400`,
      predicted_tide=stats::lag(-predicted_tide_z,336)) %>% 
  fortify.zoo() %>% 
  dplyr::filter(complete.cases(`ts_decomposition`)) %>% 
  reshape2::melt(id.vars=c("Index","predicted_tide")) %>% 
  ggplot(data=.,aes(x=value,y=predicted_tide))+
  geom_point(alpha=0.1)+
  stat_poly_eq(use_label( "adj.R2"),
               formula = y ~ poly(x, 1, raw = TRUE),size=5)+
  geom_smooth(method="lm")+
  facet_wrap(~variable,ncol=1)+
  labs(x="models(seasonal + reminder / butterworth)",
       y="tide prediction")+
  theme_bw()

7 Outputs

The information is exported as xlsx with hourly and daily time series.

Code
list("hourly_ts"=fortify.zoo(ts_hly_z) %>%
       dplyr::rename("date_time"=Index,flow=ts_hly_z) %>% 
       mutate(date=as.Date(date_time),
              time=str_split(as.character(date_time),
                             " ",simplify = T) %>%.[,2] ) %>% 
       dplyr::select(date_time,date,time,flow),
     
     "daily_ts"=ts_dly_tbl %>% 
       dplyr::rename("date"=Index,flow=ts_dly_z)
) %>% 
  openxlsx::write.xlsx(
    x=.,
    file=here::here("outputs",
                    "filtered_daily-hourly_flows.xlsx"),
    overwrite = T)