Author

Victor Munoz

1 Sources

1.1 ERA5-Land Records

Capture ERA5-Land at the site.

Code
Longitude= -73.962799
Latitude = 46.632005

ws_ras<-rast(here::here("data","netcdf","era5-land_ws_hly.nc"))

era5_land_xy<-function(Lon,Lat){
        
        # uwind_tbl<-dir(here::here("data","netcdf","era5-land","uwind"),full.names = T)
        # vwind_tbl<-dir(here::here("data","netcdf","era5-land","vwind"),full.names = T)
        # ws_tbl<-dir(here::here("data","netcdf","era5-land","ws"),full.names = T)
        
        # cl<-makeCluster(detectCores())
        
        # clusterExport(cl,varlist = c("uwind_tbl","vwind_tbl","Longitude","Latitude"))
        # 
        # pbapply::pblapply(cl=cl,1:length(uwind_tbl),FUN=function(x){
        #         
        #         library(magrittr)
        #         
        #         ws_ras<-(terra::rast(uwind_tbl[x])^2 + terra::rast(vwind_tbl[x])^2)^0.5
        #         
        #         terra::writeCDF(x = ws_ras,
        #                  filename=stringr::str_replace_all(uwind_tbl[x],"uwind","ws"))
        # })
        
        # clusterExport(cl,varlist = c("ws_tbl","Longitude","Latitude"))
        
        # ws_dt<-pbapply::pblapply(cl=cl,1:length(ws_tbl),FUN=function(x){
        
        # library(magrittr)
        
        if(!exists("ws_ras")){
                ws_ras<-rast(here::here("data","netcdf","era5-land_ws_hly.nc"))
        }
        
        
        ws_info<-terra::extract(ws_ras,
                                terra::vect(cbind(Lon,Lat),
                                            crs="+proj=longlat +datum=WGS84")) %>%
                unlist() %>% .[-1]
        
        
        ws_dt<-data.frame(ws=ws_info,"date_time"=terra::time(ws_ras)) %>%
                data.table::as.data.table()%>% 
                arrange(date_time) %>% 
                distinct()
        
        
        ws_z<-zoo(ws_dt$ws,ws_dt$date_time) %>% 
                window(end=as.Date("2023-10-31"))
        
        return(ws_z)        
}

ws_z<-era5_land_xy(Lon = Longitude,Lat=Latitude)

1.2 ECCC Records

Information available for comparison ERA5-Land vs regional observations

Code
source(here::here("scripts","eccc_download.r"))

# library(weathercan)
# library(tidyverse)
# 
# eccc_file_hly<-here::here("data","csv","eccc_info_2024_02_23_hly.csv")
# 
# station.selection_hly <- stations_search(coords = c(Latitude,Longitude),
#                                          dist = 100, interval ="hour")
# 
# station.selection_hly <- station.selection_hly[station.selection_hly$end -
#                                                  station.selection_hly$start >= 10, ]
# 
# ## Remove stations with last year of data before 1980
# station.selection_hly <- station.selection_hly[station.selection_hly$end>1990,]
# 
# station.selection.idx<-rbind(
#   # station.selection_dly %>% dplyr::select(-interval,-start,-end),
#   station.selection_hly%>% dplyr::select(-interval,-start,-end)) %>%
#   distinct() %>%
#   arrange(distance)
# 
# if(file.exists(eccc_file_hly)){
# 
#   eccc_info_hly.dt<-data.table::fread(eccc_file_hly)
# 
# }else{
# 
#   ## Selecting all stations
# 
#   ##remove stations with less than 10 years of data
#   #write.csv(station.selection, file = "SDH_regional-stations_500km-daily.csv")
#   regional.db <- weather_dl(station.selection_hly$station_id, interval = "hour")
# 
#   data.table::fwrite(x = regional.db,file = eccc_file_hly)
# 
#   eccc_info_hly.dt<-regional.db
# 
# }



ws_eccc_hly_z<-reshape2::dcast(data = eccc_info_hly.dt,formula = time~station_name,
                value.var = "wind_spd") %>% tk_zoo()

#sort for proximity.
ws_eccc_hly_z<-ws_eccc_hly_z[,station.selection_hly$station_name]

ws_eccc_hly_z %>% 
        subdaily2daily(FUN=mean) %>% 
dwi.graph()

1.3 Comparison ERA5-Land with ECCC records

Code
pbapply::pbwalk(1:3,FUN=function(x){
        
        st_sel<-station.selection_hly[x,]
        
        pander::pandoc.p(''); pander::pandoc.p('')

        pander::pandoc.header(st_sel$station_name,level=3)
        
        era5_land_eccc_z<-era5_land_xy(Lon =st_sel$lon,Lat=st_sel$lat)
        
        ws_comb_z<-merge(era5=era5_land_eccc_z,
                         eccc=ud.convert(ws_eccc_hly_z[,x],"km/hr","m/s")
        )
        
        pander::pandoc.p(''); pander::pandoc.p('')
        
        
        g5<-ws_comb_z %>% 
                     daily2monthly.V2(FUN=mean) %>% 
                fortify.zoo() %>% 
                reshape2::melt(id.vars=c("Index")) %>% 
                ggplot(data=.,aes(x=Index,y=value,col=variable))+
                geom_point()+
                geom_line()+
                labs(x=NULL,y="monthly wind speed [m/s]",col="source:")+
                theme_bw()+
                # scale_x_date()+
                theme(legend.position="bottom")
        
        print(g5)
        
        pander::pandoc.p(''); pander::pandoc.p('')
        
        subdaily2daily(ws_comb_z,FUN=mean) %>% 
                daily2annual.avg(FUN=mean) %>% 
                pandoc.table(round=1,caption="Comparison of monthly wind speed [m/s]")
        
        pander::pandoc.p(''); pander::pandoc.p('')
        
})

1.3.1 SAINT-MICHEL-DES-SAINTS

Comparison of monthly wind speed [m/s]
Station Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Ann
era5 2.5 2.5 2.6 2.7 2.5 2.4 2.3 2.2 2.5 2.6 2.6 2.5 2.5
eccc 1.9 2.1 2.3 2.3 2 1.7 1.6 1.4 1.5 1.7 1.9 1.8 1.8

1.3.2 STE AGATHE DES MONTS

Comparison of monthly wind speed [m/s]
Station Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Ann
era5 2.3 2.4 2.5 2.6 2.4 2.2 2.1 2 2.2 2.4 2.4 2.3 2.3
eccc 3.1 2.9 3.2 3.2 2.8 2.7 2.4 2.2 2.6 2.8 3.3 3.2 2.9

1.3.3 ST-JOVITE

Comparison of monthly wind speed [m/s]
Station Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Ann
era5 2.3 2.3 2.4 2.5 2.3 2.2 2 2 2.2 2.4 2.4 2.3 2.3
eccc 1.1 1.2 1.3 1.4 1.3 1 1 0.8 0.9 1 1.1 1 1.1

1.4 Mean Annual Wind Speed

Code
station.selection.idx<-station.selection.idx %>% 
        mutate(ma_ws=daily2annual.avg(
                ud.convert(subdaily2daily(ws_eccc_hly_z,FUN=mean),"km/hr","m/s"),
                 FUN=mean)$Ann)



ma_ws_ras<-mean(tapp(ws_ras,index="years",fun=mean))


ggplot()+
        tidyterra::geom_spatraster(data=ma_ws_ras)+
        # geom_text()
        geom_point(data=station.selection.idx[-4,],aes(x=lon,y=lat,fill=ma_ws),
                   shape=22,size=5)+
        geom_text_repel(data=station.selection.idx[-4,],
                        aes(x=lon,y=lat,
                            label=paste0(station_name,"\n",round(ma_ws,1))),
                        size=2.5)+
        scale_fill_binned(type="viridis",breaks=seq(0,5,0.5))+
        labs(x="longitude [deg]",y="latitude [deg]",fill="mean annual\nwind speed\n[m/s]",
             title="Mean annual wind speed",
             subtitle="Background: ERA5-Land / Points : ECCC records")+
        coord_sf()

2 Local Records

It is noted zero values may affect the bias correction process, then the values less than 0.4 m/s were removed.

Code
library(MBC)

library(readxl)

ws_site_tbl <- read_excel(here::here("data","csv","WindSpeed NMG.xlsx"), 
    col_types = c("date", "numeric")) %>% 
        arrange(`Date Heure`) %>% 
        distinct()

ws_obs_z<-zoo(ws_site_tbl$`Vitesse des rafales, m/s`,ws_site_tbl$`Date Heure`)

zoo::time(ws_obs_z)<-round.POSIXt(time(ws_obs_z),units = "hours") %>% 
        as.POSIXct()

ws_obs_z<-ws_obs_z[!duplicated(time(ws_obs_z)),]

ws_z<-ws_z[!duplicated(time(ws_z)),]

#for simplicity the records were simplified and removed the values less that 0.4 m/s
ws_comb_z<-merge(mod=ws_z,obs=ws_obs_z) %>% 
        fortify.zoo() %>% 
        # mutate(obs=if_else(obs<=0.4,NA,obs)) %>%
        dplyr::filter(complete.cases(.))


subdaily2daily(tk_zoo(ws_comb_z),FUN=mean) %>%as.xts %>% 
        plot.xts(main="Comparison between ERA5-Land and local observed records")

addLegend("topright", on=1, 
          legend.names = c("Observed Records", "ERA5-Land"), 
          lty=c(1, 1), lwd=c(2, 2),
          col=c("black",  "red"))

Code
ws_comb_z<-merge(mod=ws_z,obs=ws_obs_z) %>% 
        fortify.zoo() %>% 
        mutate(obs=if_else(obs<=0.4,NA,obs)) %>%
        dplyr::filter(complete.cases(.))

subdaily2daily(tk_zoo(ws_comb_z),FUN=mean) %>%as.xts %>% 
        plot.xts(main="Comparison between ERA5-Land and local observed records,values <= 0.4 m/s were removed")

addLegend("topright", on=1, 
          legend.names = c("Observed Records", "ERA5-Land"), 
          lty=c(1, 1), lwd=c(2, 2),
          col=c("black",  "red"))

2.1 Bias Correction

Code
#QQ plot
qqplot(ws_comb_z$mod,ws_comb_z$obs,
       xlab="ERA5-Land [m/s]",ylab="Observed [m/s]",main="Q-Q plot for Wind Speed - ERA5-Land vs Observed [m/s]")


abline(a=0,b=1,col="red")

Comparison of the results after quantile delta mapping matching (bias correction)

Code
ws_model_out<-QDM(o.c =ws_comb_z$obs,m.c = ws_comb_z$mod,m.p =ws_z )


ws_result_z<-merge(mod=ws_model_out$mhat.p,
                   obs=ws_obs_z)

ws_result_z$obs[ws_result_z$obs<=0.4]<-NA

subdaily2daily(tk_zoo(ws_result_z),FUN=mean) %>% 
        daily2monthly.V2(FUN=mean) %>% as.xts %>%
        plot.xts(main="Comparison between ERA5-Land (Corrected) and local observed records",type="b")

addLegend("topright", on=1, 
          legend.names = c("Observed Records", "ERA5-Land (Corrected)"), 
          lty=c(1, 1), lwd=c(2, 2),
          col=c("black",  "red"))

Code
g1<-subdaily2daily(tk_zoo(ws_result_z),FUN=mean) %>% 
         daily2monthly.V2(FUN=mean) %>% 
        fortify.zoo() %>% 
        ggplot(data=.,aes(x=mod,y=obs))+
        geom_point()+
        geom_smooth(method="lm")+
        stat_poly_eq(use_label(c("eq", "adj.R2")),
               formula = y ~ poly(x, 1, raw = TRUE))+
        geom_abline(slope=1,col="red")+
        labs(x="ERA5-Land (after bias correction)\nmean monthly wind speed [m/s]",
             y="Observed\nmean monthly wind speed [m/s]",
             title="Wind Speed - Monthly Comparison")+
        theme_bw()


g2<-subdaily2daily(tk_zoo(ws_result_z),FUN=mean) %>% 
        # daily2monthly.V2(FUN=mean) %>% 
        fortify.zoo() %>% 
        ggplot(data=.,aes(x=mod,y=obs))+
        geom_point(alpha=0.6)+
        geom_smooth(method="lm")+
                stat_poly_eq(use_label(c("eq", "adj.R2")),
               formula = y ~ poly(x, 1, raw = TRUE))+

        geom_abline(slope=1,col="red")+
        labs(x="ERA5-Land (after bias correction)\nmean monthly wind speed [m/s]",
             y="Observed\nmean monthly wind speed [m/s]",
             title="Wind Speed - Daily Comparison")+
        theme_bw()


# g3<-tk_zoo(ws_result_z) %>% 
#         # daily2monthly.V2(FUN=mean) %>% 
#         fortify.zoo() %>% 
#         ggplot(data=.,aes(x=mod,y=obs))+
#         geom_point(alpha=0.6)+
#         geom_smooth(method="lm")+
#                 stat_poly_eq(use_label(c("eq", "adj.R2")),
#                formula = y ~ poly(x, 1, raw = TRUE))+
# 
#         geom_abline(slope=1,col="red")+
#         labs(x="ERA5-Land (after bias correction)\nmean monthly wind speed [m/s]",
#              y="Observed\nmean monthly wind speed [m/s]",
#              title="Wind Speed - Daily Comparison")+
#         theme_bw()

library(patchwork)

g1+g2

3 Frequency Analysis

Frequency analysis is a process used to estimate the frequency of occurrence of a value based on a long-term time series. The desired frequency is typically in months or years and the periods are called return periods. The historical information is adjusted into a probabilistic distribution, and the most suitable distribution is selected.

In this case, the frequency analysis was prepared considering the following probabilistic distributions: Normal, Log-Normal, Generalized Extreme Value (GEV), Gumbel, Pearson III, and Log-Pearson III. The selection of the distribution parameters was prepared with the L-moments methodology.

The L-moments approach provides a more resistant estimation to outliers when compared with typical moment estimations. The selection of the best-fit distribution was prepared based on three criteria: Akaike Information Criterion, Anderson-Darling Criterion, and Bayesian Information Criterion, as described by Laio et al. (2009) and implemented in the statistical software, R.

With this information it was selected a Gumbel distribution.

Code
fa_res<-FA(IDF(ws_result_z$mod,1) %>% coredata())$results %>% 
        mutate(RP=RP.text(Prob),
               RP_simple=RP.text(Prob,dry.wet=F)) %>% 
        dplyr::filter(Prob>=0.5,Prob<=0.9990) %>% 
        mutate(RP=str_replace(RP," ", "\n")) %>% 
        dplyr::rename("wind_speed"=2)

[1] 1

Tested distributions: [1] NORM LN GEV GUMBEL P3 LP3
———————— Chosen distributions: AIC AICc BIC ADC
GUMBEL GUMBEL GUMBEL GUMBEL
whose Maximum-Likelihood parameters are: GUMBEL parameters of x: 16.65707 1.520097

Code
ggplot(data=fa_res,aes(x=RP_simple,y=wind_speed))+
        geom_point()+
        geom_line()+
        scale_x_log10(breaks=fa_res$RP_simple,labels = fa_res$RP)+
        labs(x="return period",y="maximum wind speed [m/s]")+
        theme_bw()

Code
pandoc.table(fa_res[,-4] %>% arrange(Prob),caption="frequency analysis of site records extended with ERA5-Land at hourly scale in m/s")
frequency analysis of site records extended with ERA5-Land at hourly scale in m/s
Prob wind_speed RP
0.5 17.23 2 Wet
0.5708 17.58 2.33 Wet
0.8 19.08 5 Wet
0.9 20.3 10 Wet
0.95 21.47 20 Wet
0.96 21.84 25 Wet
0.98 22.98 50 Wet
0.99 24.12 100 Wet
0.995 25.25 200 Wet
0.998 26.74 500 Wet
0.999 27.87 1000 Wet