Exploratory Test of New FloodScan Pipeline

Published

October 14, 2024

Intro

exploratory code to test new floodscan pipeline

Code
box::use(
  terra[...],
  dplyr[...],
  stringr[...],
  lubridate[...],
  purrr[...],
  AzureStor,
  utils = ../R/utils,
  blob = ../src/utils/blob
)

Sys.setenv(AZURE_SAS = Sys.getenv("DSCI_AZ_SAS_DEV"))
Sys.setenv(AZURE_STORAGE_ACCOUNT = Sys.getenv("DSCI_AZ_STORAGE_ACCOUNT"))
dev_container <- blob$load_containers(containers = c("raster","global"),stage = "dev")




cog_catalogue_old <- utils$floodscan_cog_meta_df(container = dev_container$GLOBAL_CONT, prefix = "raster/cogs/aer_area_300s")
cog_catalogue_new <- utils$floodscan_cog_meta_df(container = dev_container$RASTER_CONT, prefix = "floodscan/v5/processed")

Missing Dates

Code
cog_catalogue_new_proc <- tibble(cog_catalogue_new) |> 
  mutate(
    diff_lag = as.numeric(date -lag(date)),
    diff_lead = as.numeric(lead(date)-date),
    type = str_extract(name,"sfed|mfed|SFED|MFED")
  ) 
cog_catalogue_old_proc <- tibble(cog_catalogue_old) |> 
  mutate(
    diff_lag = as.numeric(date -lag(date)),
    diff_lead = as.numeric(lead(date)-date),
    type = str_extract(name,"sfed|mfed|SFED|MFED")
  ) 


# these are the dates surrounding date gaps
# cog_catalogue_new_proc |> 
#   filter(
#     diff_lag > 1 |diff_lead>1
#   ) |> 
#   select(name,date,year,type)
# cog_catalogue_old_proc |>
#   filter(
#     diff_lag > 1 |diff_lead>1
#   ) |>
#   select(name,date,year,type)

end_date <-  Sys.Date()-2
date_seq <- seq(as_date("1998-01-12"),end_date, by = "day")

ldf_missing <- split(cog_catalogue_new_proc,cog_catalogue_new_proc$type) |> 
  map(
    \(dft){
      date_seq[!date_seq %in% dft$date]
    }
  )
Code
ldf_missing
$MFED
 [1] "2024-02-27" "2024-03-08" "2024-03-15" "2024-05-26" "2024-05-28"
 [6] "2024-05-29" "2024-05-30" "2024-06-14" "2024-06-15" "2024-06-16"
[11] "2024-06-21" "2024-06-22" "2024-06-24" "2024-08-26" "2024-08-27"
[16] "2024-08-28" "2024-08-29" "2024-08-30" "2024-08-31" "2024-09-01"
[21] "2024-09-02" "2024-09-03" "2024-09-04" "2024-09-05" "2024-09-06"
[26] "2024-09-07" "2024-09-08" "2024-09-09" "2024-09-10" "2024-09-11"
[31] "2024-09-12" "2024-09-13" "2024-09-14" "2024-09-15" "2024-09-16"
[36] "2024-09-17" "2024-09-18" "2024-09-19" "2024-09-20" "2024-09-21"
[41] "2024-09-22" "2024-09-23" "2024-09-24" "2024-09-25" "2024-09-26"
[46] "2024-09-27" "2024-09-28" "2024-09-29" "2024-09-30" "2024-10-01"
[51] "2024-10-02" "2024-10-03" "2024-10-04" "2024-10-05" "2024-10-06"
[56] "2024-10-07" "2024-10-08" "2024-10-09" "2024-10-10" "2024-10-11"
[61] "2024-10-12"

$SFED
 [1] "2024-02-27" "2024-08-14" "2024-08-15" "2024-08-16" "2024-08-17"
 [6] "2024-08-18" "2024-08-19" "2024-08-20" "2024-08-21" "2024-08-22"
[11] "2024-08-23" "2024-08-24" "2024-08-25" "2024-08-26" "2024-08-27"
[16] "2024-08-28" "2024-08-29" "2024-08-30" "2024-08-31" "2024-09-01"
[21] "2024-09-02" "2024-09-03" "2024-09-04" "2024-09-05" "2024-09-06"
[26] "2024-09-07" "2024-09-08" "2024-09-09" "2024-09-10" "2024-09-11"
[31] "2024-09-12" "2024-09-13" "2024-09-14" "2024-09-15" "2024-09-16"
[36] "2024-09-17" "2024-09-18" "2024-09-19" "2024-09-20" "2024-09-21"
[41] "2024-09-22" "2024-09-23" "2024-09-24" "2024-09-25" "2024-09-26"
[46] "2024-09-27" "2024-09-28" "2024-09-29" "2024-09-30" "2024-10-01"
[51] "2024-10-02" "2024-10-03" "2024-10-04" "2024-10-05" "2024-10-06"
[56] "2024-10-07" "2024-10-08" "2024-10-09" "2024-10-10" "2024-10-11"
[61] "2024-10-12"

Value Comparison

randomly sample 5 dates in both the old and new pipeline catalogues and compare the values.

All values are exactly the same: 0 difference - very nice!

Code
missing_dates <- ldf_missing |> 
  purrr::list_c() |> 
  unique()

all_dates_present <- date_seq[!date_seq %in% missing_dates]

set.seed(1)
num_samples <- 5
rnd_date <- sample(all_dates_present,num_samples)

cog_catalogue_new_proc <- cog_catalogue_new_proc |> 
  mutate(
    urls = str_replace(urls,"global","raster")
  )


lr <- list(
  OLD = cog_catalogue_old,
  NEW = cog_catalogue_new_proc
) |> 
  imap(
    \(tmp_cat,type){
      if(type == "OLD"){
        tmp_urls <- tmp_cat |> 
          filter(
            date %in% rnd_date
          ) |> 
          pull(urls)
        r <-  rast(tmp_urls)
        r <- r[[names(r)=="SFED"]]
        set.names(r, utils$extract_date(sources( r)))
        return(r)
      }
      if(type == "NEW"){
        tmp_urls <- tmp_cat |> 
          filter(
            type == "SFED",
            date %in% rnd_date
          ) |> 
          pull(urls)
        r <-  rast(tmp_urls)
        set.names(r, utils$extract_date(sources( r)))
        return(r)
      }
      
      
    }
    
  )
Code
lr_compare <- 1:num_samples |> 
  map(
    \(tmp_idx){
      
      r_old <- lr$OLD[[tmp_idx]] 
      r_new <- lr$NEW[[tmp_idx]]
      r_diff <-  r_old - r_new
      r_tmp <- rast(
        list(
          r_old,
          r_new,
          r_diff   
        )
        
      )
      set.names(r_tmp, c("OLD PIPELINE","NEW PIPELINE", "Difference"))
      r_tmp
      
    }
  )


unique_diff_values <- lr_compare |> 
  map(
    \(tmp_r){
      unique(values(tmp_r$Difference))
    }
  )

unique_diff_values |> list_c()
     [,1]
[1,]    0
[2,]    0
[3,]    0
[4,]    0
[5,]    0