Author

Victor Munoz

1 Setup

2 Site Gauges

2.1 Gauge Location

Code
gauge_loc<-tibble::tribble(
        ~Gauge,        ~Lat,        ~Lon,
         "G01", 47.10191667,     75.4545,
         "G02", 47.10069444, 75.45394444,
         "G03", 46.79672222, 75.48727778,
         "G04",      46.796, 75.48763889,
         "G05", 46.98530556, 75.10533333
        )

pander::pandoc.table(gauge_loc)
Gauge Lat Lon
G01 47.1 75.45
G02 47.1 75.45
G03 46.8 75.49
G04 46.8 75.49
G05 46.99 75.11

2.2 Location within watersheds

Code
library(sf)
library(ggrepel)
file_sh<-read_sf(here::here("data","shp","Watersheds_local_v4.shp")) 

wc_sh<-read_sf(here::here("data","shp","flow_gauges.shp")) 

g1<-ggplot()+
        # geom_tile(data=cn_tbl,aes(x=x,y=y,fill=CN),col="grey")+
        # geom_text(data=cn_tbl,aes(x=x,y=y,label=CN),size=2.0)+
        # scale_fill_binned(type =  "viridis",breaks = seq(0,100,5))+
        geom_sf(data=file_sh,aes(fill=as.factor(Id)),alpha=0.8,size=2,col="black")+
        geom_sf_label(data=file_sh,aes(label=Id),
                      alpha=0.6,size=2)+
        geom_point(data=gauge_loc,aes(x=Lon,y=Lat),col="red",size=5)+
        geom_label_repel(data=gauge_loc,aes(x=Lon,y=Lat,label=Gauge),size=2)+

        # geom_sf(data=wc_sh)+
        
        # geom_sf(data=file_sh,aes(),fill="grey",col="black",alpha=0.2,linewidth=1.5)+
        # geom_sf_label(data=file_sh,aes(label=NAME))+
        coord_sf()+
        labs(x="longitude",y="latitude",fill="watershed ID:")+
        theme_light()+
        theme(legend.position="bottom")



g1

Code
g2<-ggplot()+
        # geom_tile(data=cn_tbl,aes(x=x,y=y,fill=CN),col="grey")+
        # geom_text(data=cn_tbl,aes(x=x,y=y,label=CN),size=2.0)+
        # scale_fill_binned(type =  "viridis",breaks = seq(0,100,5))+
        geom_sf(data=file_sh,aes(fill=as.factor(Id)),alpha=0.8,size=2,col="black")+
        # geom_sf_label(data=file_sh,aes(label=Id),
        #               alpha=0.6,size=2)+
        geom_point(data=gauge_loc,aes(x=Lon,y=Lat),col="red",size=3)+
        geom_label_repel(data=gauge_loc,aes(x=Lon,y=Lat,label=Gauge))+

        geom_sf(data=wc_sh%>% slice(1:6))+
        geom_sf_label(data=wc_sh %>% slice(1:6),aes(label=Station_ID),
                      alpha=0.6,size=2)+

        
        # geom_sf(data=file_sh,aes(),fill="grey",col="black",alpha=0.2,linewidth=1.5)+
        # geom_sf_label(data=file_sh,aes(label=NAME))+
        coord_sf()+
        labs(x="longitude",y="latitude",fill="watershed ID:")+
        theme_light()+
        theme(legend.position="bottom")

g2

3 Read Local files

3.1 raw information

Code
csv_files<-dir(here::here("data","csv"),recursive = T,pattern = ".csv",full.names = T)

#filtered by G0 on name
csv_files<-csv_files[grep("G0",csv_files)]

#use the raw records
csv_files<-csv_files[grep("Compensated",csv_files,invert = T)]


site_info_tbl<-lapply(csv_files,FUN=function(x){
        
        # print(x)
        # x<-csv_files[7]
        
        
        st_name<-str_split(basename(x),"_",simplify = T)[1] %>% 
                str_remove_all(.,".csv")
        
        station_info <- read_csv(x, 
                                 col_types = cols(Date = col_date(format = "%d/%m/%Y"), 
                                                  Time = col_time(format = "%H:%M:%S"))) %>% 
                mutate(station=st_name) %>% 
                dplyr::select(Date,Time,ms,LEVEL,TEMPERATURE,station)
        
        # head(station_info) %>% print()
        
        return(station_info)        
        
}) %>% do.call(rbind,.) %>% 
        mutate(date_time=ymd_hms(paste0(Date," ",Time)))

The information is presented in three stages:

1.- As it is or it is shared

2.- It is noted information with different units, then values higher than 50 were divided by 10

3.- At the end of the records, there are irregularities; then records after 2023-07-17 were removed.

Code
ggplot(data=site_info_tbl,aes(x=date_time,y=LEVEL))+
        geom_point()+
        facet_wrap(~station)+
        labs(title="Pressure - Level",subtitle="records: as it is")

Code
site_info_summary<-site_info_tbl %>% 
        mutate(yr=lubridate::year(date_time)) %>% 
        group_by(yr,station) %>% 
        summarize(mean=mean(LEVEL,na.rm=T),
                  sd=sd(LEVEL,na.rm=T))

pander::pandoc.table(site_info_summary,caption="mean and sd for raw records")
mean and sd for raw records
yr station mean sd
2022 G01 97.03 0.7061
2022 G03 97.05 0.6904
2023 G01 9.789 0.2033
2023 G02 9.789 0.2039
2023 G03 9.861 0.2222
2023 G04 9.859 0.2225
2023 G05 9.826 0.2127
Code
site_info_adj_tbl<-site_info_tbl %>% 
        mutate(LEVEL=if_else(LEVEL>50,LEVEL/10,LEVEL))

site_info_adj_tbl%>% 
        ggplot(data=.,aes(x=date_time,y=LEVEL))+
        geom_point()+
        facet_wrap(~station)+ 
        labs(title="Pressure - Level",
             subtitle="1) records higher than value of 50, were divided by 10")

Code
site_info_adj_tbl%>% 
        dplyr::filter(date_time<=as.POSIXct("2023-07-17")) %>% 
        ggplot(data=.,aes(x=date_time,y=LEVEL))+
        geom_point()+
        facet_wrap(~station)+ 
        labs(title="Pressure - Level",
             subtitle="1) records higher than value of 50, were divided by 10, 2) removed records after 2023/07/17")

Code
site_info_adj_tbl<-site_info_adj_tbl%>% 
        dplyr::filter(date_time<=as.POSIXct("2023-07-17"))

site_info_adj_summary<-site_info_adj_tbl %>% 
        mutate(yr=lubridate::year(date_time)) %>% 
        group_by(yr,station) %>% 
        summarize(mean=mean(LEVEL,na.rm=T),
                  sd=sd(LEVEL,na.rm=T))
Code
pander::pandoc.table(site_info_adj_summary,caption="mean and sd for cleaned records")
mean and sd for cleaned records
yr station mean sd
2022 G01 9.703 0.07061
2022 G03 9.705 0.06904
2023 G01 9.842 0.07149
2023 G02 9.842 0.07101
2023 G03 9.92 0.07397
2023 G04 9.917 0.07543
2023 G05 9.882 0.07319

4 Capturing hourly records from MERRA2

4.1 Atmospheric Pressure (PS)

Code
reanalysis_fn(file = here::here("data","MERRA","MERRA2_hly_ps.nc"),
              par = "PS",fun = mean,yrs = 2000:2022)

Code
gauge_ps_z<-pbapply::pblapply(1:nrow(gauge_loc),FUN=function(x){

        ps_z<-nc2z_fn(Longitude = gauge_loc[x,3] %>% pull(),
                      Latitude = gauge_loc[x,2] %>% pull(),
                      source = "MERRA2") %>% 
                udunits2::ud.convert(.,"Pa","kPa")*0.102  #conversion from Kpa to meters of head
        
}) %>% do.call(merge,.)

names(gauge_ps_z)<-gauge_loc$Gauge

plot(gauge_ps_z,main="Hourly records for PS from MERRA2[m Head]")

4.2 Air Temperature

Code
reanalysis_fn(file = here::here("data","MERRA","MERRA2_hly_t2m.nc"),
              par = "T2M",
              fun = mean,yrs = 2000:2022)

Code
gauge_t2m_z<-pbapply::pblapply(1:nrow(gauge_loc),FUN=function(x){

        ps_z<-nc2z_fn(Longitude = gauge_loc[x,3] %>% pull(),
                      Latitude = gauge_loc[x,2] %>% pull(),
                      source = "MERRA2") %>% 
                udunits2::ud.convert(.,"K","degC")
        
}) %>% do.call(merge,.)

names(gauge_t2m_z)<-gauge_loc$Gauge

plot(gauge_t2m_z,main="Hourly records for Air Temperature from MERRA2[degC]")

5 Barologger results

Barrologger results present issues specifically with temperature where no daily oscillations are observed. further, information after 2023-06-30 presents issues and therefore was removed.

Code
baro_tbl <- read_csv(here::here("data","csv","1037A_Baro.csv"), 
    col_types = cols(Date = col_date(format = "%Y-%m-%d"), 
        Time = col_time(format = "%H:%M:%S")), 
    skip = 10)

baro_ps_z<-zoo(baro_tbl$LEVEL,ymd_hms(paste0(baro_tbl$Date," ",baro_tbl$Time))) %>% 
        window(end=as.POSIXct("2023-06-30")) 


plot(baro_ps_z,main="Barometric Pressure measured at Barologger")

Code
baro_t2m_z<-zoo(baro_tbl$TEMPERATURE,ymd_hms(paste0(baro_tbl$Date," ",baro_tbl$Time)))

plot(baro_t2m_z,main="Air Temperature measured at Barologger")

6 Comparison between Barologger and MERRA2

6.1 Atmospheric Pressure

Code
merge(baro_ps_z-4.25,
      "MERRA2@G01"=gauge_ps_z$G01) %>%
        na.approx() %>% 
        window(end=as.POSIXct("2023-06-30")) %>% 
        dygraphs::dygraph(ylab = "m of Head")
Code
merge("baro-4.25m"=baro_ps_z-4.25,
      "MERRA2"=gauge_ps_z) %>% 
        subdaily2daily(FUN=mean) %>% 
        as.data.frame() %>% 
        dplyr::filter(complete.cases(.)) %>% 
        hydroTSM::hydropairs()

6.2 Air Temperature

Code
merge(baro_t2m_z,
      "MERRA2@G01"=gauge_t2m_z$G01) %>%
        na.approx() %>% 
        dygraphs::dygraph(ylab = "m of Head")
Code
merge("air_temp_baro"=baro_t2m_z,
      "MERRA2"=gauge_t2m_z) %>% 
        subdaily2daily(FUN=mean) %>% 
        as.data.frame() %>% 
        dplyr::filter(complete.cases(.)) %>% 
        hydroTSM::hydropairs()

7 Evaluation of Raw Pressure records

Values of raw records, were compared with Atmospheric pressure captured from MERRA2 at different locations.

It is noted observed present pressure issues before and after winter periods.

Code
all_gauges_info_tbl<-full_join(
        site_info_adj_tbl %>% 
                dplyr::select(date_time,station,LEVEL) %>% 
                as.data.frame(), 
        
        gauge_ps_z %>% 
                fortify.zoo() %>% 
                reshape2::melt(id.vars="Index") %>% 
                dplyr::rename("PS"="value") %>% 
                dplyr::filter(Index>=as.POSIXct("2022-03-07")) %>% 
                mutate(Index=Index+hours(6)),
        
        by=c("date_time"="Index","station"="variable")
)

all_gauges_info_lst<-split(all_gauges_info_tbl,all_gauges_info_tbl$station)

all_gauges_info_lst$G01 %>%timetk::tk_zoo() %>% dygraphs::dygraph(group="1",main="G01")
Code
all_gauges_info_lst$G02 %>%timetk::tk_zoo() %>% dygraphs::dygraph(group="1",main="G02")
Code
all_gauges_info_lst$G03 %>%timetk::tk_zoo() %>% dygraphs::dygraph(group="1",main="G03")
Code
all_gauges_info_lst$G04 %>%timetk::tk_zoo() %>% dygraphs::dygraph(group="1",main="G04")
Code
all_gauges_info_lst$G05 %>%timetk::tk_zoo() %>% dygraphs::dygraph(group="1",main="G05")
Code
info_diff_tbl<-all_gauges_info_tbl %>% 
         mutate(yr=lubridate::year(date_time)) %>%
        dplyr::filter(complete.cases(.)) %>% 
        group_by(yr,station) %>% 
        summarize(LEVEL_mean=mean(LEVEL,na.rm=T),
                  LEVEL_sd=sd(LEVEL,na.rm=T),
                  PS_mean=mean(PS,na.rm=T),
                  PS_sd=sd(PS,na.rm=T)) %>% 
        mutate(diff=LEVEL_mean-PS_mean)


pander::pandoc.table(info_diff_tbl)
yr station LEVEL_mean LEVEL_sd PS_mean PS_sd diff
2022 G01 9.703 0.07063 9.842 0.06633 -0.1388
2022 G03 9.705 0.06905 9.842 0.06633 -0.1374
2023 G01 9.842 0.07153 9.864 0.07213 -0.02235
2023 G02 9.842 0.07104 9.864 0.07213 -0.02262
2023 G03 9.92 0.07398 9.864 0.07213 0.05578
2023 G04 9.917 0.07545 9.864 0.07213 0.05302
2023 G05 9.882 0.0732 9.849 0.07181 0.03268

Records were adjusted based on the differences obtained by year per station.

Time series LEVEL were adjusted based on the mean of the differences between LEVEL_mean and PS_mean;then:

Level_{adjusted}=Level-(Level_{mean}-PS_{mean})

Where:

  • Level is from the observed records

  • Level_{mean} is the mean obtained per station per year

  • PS_{mean} is the mean obtained from Atmospheric Pressure from MERRA2

Level and PS mean were obtained when common information is available.

Code
all_gauges_info_adj_tbl<-all_gauges_info_tbl %>% 
        mutate(yr=lubridate::year(date_time)) %>% 
        left_join(.,info_diff_tbl,by=c("station"="station","yr"="yr")) %>% 
        mutate(LEVEL_adj=LEVEL-diff) %>% 
        dplyr::select(date_time,station, LEVEL,LEVEL_adj,PS)

all_gauges_info_adj_lst<-split(all_gauges_info_adj_tbl,all_gauges_info_adj_tbl$station)

all_gauges_info_adj_lst$G01 %>% 
        dplyr::select(date_time,LEVEL,PS, LEVEL_adj)%>%timetk::tk_zoo() %>% dygraphs::dygraph(group="1",main="G01")
Code
all_gauges_info_adj_lst$G02 %>% 
        dplyr::select(date_time,LEVEL,PS, LEVEL_adj)%>%timetk::tk_zoo() %>% dygraphs::dygraph(group="1",main="G02")

WL adjusted will be the difference between Level adjusted and atmospheric pressure from MERRA2.

WL=Level_{adjusted}-PS_{MERRA2@location}

It was noted that “seems” to have a zero close to -0.025 (presented as red line).

Code
all_gauges_info_adj_tbl %>% 
        mutate(WL=LEVEL_adj-PS) %>% 
        dplyr::filter(complete.cases(.)) %>% 
        ggplot(data=.,aes(x=date_time,y=WL))+
        geom_hline(yintercept = -0.025,col="red")+
        geom_point()+
        facet_wrap(~station)+
        scale_y_continuous(breaks=seq(-0.2,0.2,0.025))+
        labs(x="dates",y="WL / Pressure - Atmospheric Pressure (including adjustement) [m]")

Based on this observation, the values were raised on 0.025 m and values below this new zero were removed.

This resultant was considered as relevant water levels were gauge station.

Code
all_gauges_info_adj_tbl %>% 
        mutate(WL=LEVEL_adj-PS) %>% 
        mutate(WL_adj=WL+0.025) %>%
        dplyr::filter(WL_adj>=0) %>%
        mutate(date=as.Date(date_time)) %>% 
        group_by(date,station) %>% 
        summarize(WL_adj=mean(WL_adj)) %>% 
        ggplot(data=.,aes(x=date,y=WL_adj))+
        geom_hline(yintercept = 0,col="red")+
        geom_line()+
        facet_wrap(~station)+
        labs(x="dates",y="Head as Pressure - Atmospheric Pressure [m] ",
             caption="note: values considered values over -0.025 from previous figure")

Code
arrow::write_parquet(x = all_gauges_info_adj_tbl %>% 
        mutate(WL=LEVEL_adj-PS) %>% 
        mutate(WL_adj=WL+0.025) %>%
        dplyr::filter(WL_adj>=0),
                     sink =here::here("outputs","all gauges WL.par") )