Intro
exploratory code to test new floodscan pipeline
Code
box::use(
terra[...],
dplyr[...],
stringr[...],
lubridate[...],
purrr[...],
AzureStor,
utils = ../R/utils,
blob = ../src/utils/blob
)
Sys.setenv(AZURE_SAS = Sys.getenv("DSCI_AZ_SAS_DEV"))
Sys.setenv(AZURE_STORAGE_ACCOUNT = Sys.getenv("DSCI_AZ_STORAGE_ACCOUNT"))
dev_container <- blob$load_containers(containers = c("raster","global"),stage = "dev")
cog_catalogue_old <- utils$floodscan_cog_meta_df(container = dev_container$GLOBAL_CONT, prefix = "raster/cogs/aer_area_300s")
cog_catalogue_new <- utils$floodscan_cog_meta_df(container = dev_container$RASTER_CONT, prefix = "floodscan/v5/processed")
Missing Dates
Code
cog_catalogue_new_proc <- tibble(cog_catalogue_new) |>
mutate(
diff_lag = as.numeric(date -lag(date)),
diff_lead = as.numeric(lead(date)-date),
type = str_extract(name,"sfed|mfed|SFED|MFED")
)
cog_catalogue_old_proc <- tibble(cog_catalogue_old) |>
mutate(
diff_lag = as.numeric(date -lag(date)),
diff_lead = as.numeric(lead(date)-date),
type = str_extract(name,"sfed|mfed|SFED|MFED")
)
# these are the dates surrounding date gaps
# cog_catalogue_new_proc |>
# filter(
# diff_lag > 1 |diff_lead>1
# ) |>
# select(name,date,year,type)
# cog_catalogue_old_proc |>
# filter(
# diff_lag > 1 |diff_lead>1
# ) |>
# select(name,date,year,type)
end_date <- Sys.Date()-2
date_seq <- seq(as_date("1998-01-12"),end_date, by = "day")
ldf_missing <- split(cog_catalogue_new_proc,cog_catalogue_new_proc$type) |>
map(
\(dft){
date_seq[!date_seq %in% dft$date]
}
)
Code
$MFED
[1] "2024-02-27" "2024-03-08" "2024-03-15" "2024-05-26" "2024-05-28"
[6] "2024-05-29" "2024-05-30" "2024-06-14" "2024-06-15" "2024-06-16"
[11] "2024-06-21" "2024-06-22" "2024-06-24" "2024-08-26" "2024-08-27"
[16] "2024-08-28" "2024-08-29" "2024-08-30" "2024-08-31" "2024-09-01"
[21] "2024-09-02" "2024-09-03" "2024-09-04" "2024-09-05" "2024-09-06"
[26] "2024-09-07" "2024-09-08" "2024-09-09" "2024-09-10" "2024-09-11"
[31] "2024-09-12" "2024-09-13" "2024-09-14" "2024-09-15" "2024-09-16"
[36] "2024-09-17" "2024-09-18" "2024-09-19" "2024-09-20" "2024-09-21"
[41] "2024-09-22" "2024-09-23" "2024-09-24" "2024-09-25" "2024-09-26"
[46] "2024-09-27" "2024-09-28" "2024-09-29" "2024-09-30" "2024-10-01"
[51] "2024-10-02" "2024-10-03" "2024-10-04" "2024-10-05" "2024-10-06"
[56] "2024-10-07" "2024-10-08" "2024-10-09" "2024-10-10" "2024-10-11"
[61] "2024-10-12"
$SFED
[1] "2024-02-27" "2024-08-14" "2024-08-15" "2024-08-16" "2024-08-17"
[6] "2024-08-18" "2024-08-19" "2024-08-20" "2024-08-21" "2024-08-22"
[11] "2024-08-23" "2024-08-24" "2024-08-25" "2024-08-26" "2024-08-27"
[16] "2024-08-28" "2024-08-29" "2024-08-30" "2024-08-31" "2024-09-01"
[21] "2024-09-02" "2024-09-03" "2024-09-04" "2024-09-05" "2024-09-06"
[26] "2024-09-07" "2024-09-08" "2024-09-09" "2024-09-10" "2024-09-11"
[31] "2024-09-12" "2024-09-13" "2024-09-14" "2024-09-15" "2024-09-16"
[36] "2024-09-17" "2024-09-18" "2024-09-19" "2024-09-20" "2024-09-21"
[41] "2024-09-22" "2024-09-23" "2024-09-24" "2024-09-25" "2024-09-26"
[46] "2024-09-27" "2024-09-28" "2024-09-29" "2024-09-30" "2024-10-01"
[51] "2024-10-02" "2024-10-03" "2024-10-04" "2024-10-05" "2024-10-06"
[56] "2024-10-07" "2024-10-08" "2024-10-09" "2024-10-10" "2024-10-11"
[61] "2024-10-12"
Value Comparison
randomly sample 5 dates in both the old and new pipeline catalogues and compare the values.
All values are exactly the same: 0 difference - very nice!
Code
missing_dates <- ldf_missing |>
purrr::list_c() |>
unique()
all_dates_present <- date_seq[!date_seq %in% missing_dates]
set.seed(1)
num_samples <- 5
rnd_date <- sample(all_dates_present,num_samples)
cog_catalogue_new_proc <- cog_catalogue_new_proc |>
mutate(
urls = str_replace(urls,"global","raster")
)
lr <- list(
OLD = cog_catalogue_old,
NEW = cog_catalogue_new_proc
) |>
imap(
\(tmp_cat,type){
if(type == "OLD"){
tmp_urls <- tmp_cat |>
filter(
date %in% rnd_date
) |>
pull(urls)
r <- rast(tmp_urls)
r <- r[[names(r)=="SFED"]]
set.names(r, utils$extract_date(sources( r)))
return(r)
}
if(type == "NEW"){
tmp_urls <- tmp_cat |>
filter(
type == "SFED",
date %in% rnd_date
) |>
pull(urls)
r <- rast(tmp_urls)
set.names(r, utils$extract_date(sources( r)))
return(r)
}
}
)
Code
lr_compare <- 1:num_samples |>
map(
\(tmp_idx){
r_old <- lr$OLD[[tmp_idx]]
r_new <- lr$NEW[[tmp_idx]]
r_diff <- r_old - r_new
r_tmp <- rast(
list(
r_old,
r_new,
r_diff
)
)
set.names(r_tmp, c("OLD PIPELINE","NEW PIPELINE", "Difference"))
r_tmp
}
)
unique_diff_values <- lr_compare |>
map(
\(tmp_r){
unique(values(tmp_r$Difference))
}
)
unique_diff_values |> list_c()
[,1]
[1,] 0
[2,] 0
[3,] 0
[4,] 0
[5,] 0