It is noted zero values may affect the bias correction process, then the values less than 0.4 m/s were removed.
Code
library(MBC)library(readxl)ws_site_tbl <-read_excel(here::here("data","csv","WindSpeed NMG.xlsx"), col_types =c("date", "numeric")) %>%arrange(`Date Heure`) %>%distinct()ws_obs_z<-zoo(ws_site_tbl$`Vitesse des rafales, m/s`,ws_site_tbl$`Date Heure`)zoo::time(ws_obs_z)<-round.POSIXt(time(ws_obs_z),units ="hours") %>%as.POSIXct()ws_obs_z<-ws_obs_z[!duplicated(time(ws_obs_z)),]ws_z<-ws_z[!duplicated(time(ws_z)),]#for simplicity the records were simplified and removed the values less that 0.4 m/sws_comb_z<-merge(mod=ws_z,obs=ws_obs_z) %>%fortify.zoo() %>%# mutate(obs=if_else(obs<=0.4,NA,obs)) %>% dplyr::filter(complete.cases(.))subdaily2daily(tk_zoo(ws_comb_z),FUN=mean) %>%as.xts %>%plot.xts(main="Comparison between ERA5-Land and local observed records")addLegend("topright", on=1, legend.names =c("Observed Records", "ERA5-Land"), lty=c(1, 1), lwd=c(2, 2),col=c("black", "red"))
Code
ws_comb_z<-merge(mod=ws_z,obs=ws_obs_z) %>%fortify.zoo() %>%mutate(obs=if_else(obs<=0.4,NA,obs)) %>% dplyr::filter(complete.cases(.))subdaily2daily(tk_zoo(ws_comb_z),FUN=mean) %>%as.xts %>%plot.xts(main="Comparison between ERA5-Land and local observed records,values <= 0.4 m/s were removed")addLegend("topright", on=1, legend.names =c("Observed Records", "ERA5-Land"), lty=c(1, 1), lwd=c(2, 2),col=c("black", "red"))
2.1 Bias Correction
Code
#QQ plotqqplot(ws_comb_z$mod,ws_comb_z$obs,xlab="ERA5-Land [m/s]",ylab="Observed [m/s]",main="Q-Q plot for Wind Speed - ERA5-Land vs Observed [m/s]")abline(a=0,b=1,col="red")
Comparison of the results after quantile delta mapping matching (bias correction)
Code
ws_model_out<-QDM(o.c =ws_comb_z$obs,m.c = ws_comb_z$mod,m.p =ws_z )ws_result_z<-merge(mod=ws_model_out$mhat.p,obs=ws_obs_z)ws_result_z$obs[ws_result_z$obs<=0.4]<-NAsubdaily2daily(tk_zoo(ws_result_z),FUN=mean) %>%daily2monthly.V2(FUN=mean) %>% as.xts %>%plot.xts(main="Comparison between ERA5-Land (Corrected) and local observed records",type="b")addLegend("topright", on=1, legend.names =c("Observed Records", "ERA5-Land (Corrected)"), lty=c(1, 1), lwd=c(2, 2),col=c("black", "red"))
Frequency analysis is a process used to estimate the frequency of occurrence of a value based on a long-term time series. The desired frequency is typically in months or years and the periods are called return periods. The historical information is adjusted into a probabilistic distribution, and the most suitable distribution is selected.
In this case, the frequency analysis was prepared considering the following probabilistic distributions: Normal, Log-Normal, Generalized Extreme Value (GEV), Gumbel, Pearson III, and Log-Pearson III. The selection of the distribution parameters was prepared with the L-moments methodology.
The L-moments approach provides a more resistant estimation to outliers when compared with typical moment estimations. The selection of the best-fit distribution was prepared based on three criteria: Akaike Information Criterion, Anderson-Darling Criterion, and Bayesian Information Criterion, as described by Laio et al. (2009) and implemented in the statistical software, R.
With this information it was selected a Gumbel distribution.
pandoc.table(fa_res[,-4] %>%arrange(Prob),caption="frequency analysis of site records extended with ERA5-Land at hourly scale in m/s")
frequency analysis of site records extended with ERA5-Land at hourly scale in m/s
Prob
wind_speed
RP
0.5
17.23
2 Wet
0.5708
17.58
2.33 Wet
0.8
19.08
5 Wet
0.9
20.3
10 Wet
0.95
21.47
20 Wet
0.96
21.84
25 Wet
0.98
22.98
50 Wet
0.99
24.12
100 Wet
0.995
25.25
200 Wet
0.998
26.74
500 Wet
0.999
27.87
1000 Wet
Source Code
---title: "Wind Speed Review"author: "Victor Munoz"format: html: toc: true toc-depth: 4 toc-location: left number-sections: true number-depth: 4 code-fold: true code-tools: true theme: paper df-print: paged highlight: "tango" html-math-method: katex encoding: 'UTF-8' embed-resources: trueeditor: visualeditor_options: chunk_output_type: console---```{r setup, include=FALSE}knitr::opts_chunk$set(echo =TRUE, message =FALSE, warning =FALSE,cache=FALSE, dpi=150,results="hide",fig.height=7,fig.width=9,figcap.prefix ="Figure", figcap.sep =":", figcap.prefix.highlight ="**")#Tidyverselibrary(plyr); library(dplyr)library(tidyverse); library(lubridate);library(magrittr)#Time serieslibrary(zoo);library(xts);library(timetk);library(hydroTSM);library(dygraphs)#ggplot supportlibrary(ggmap);library(ggpmisc);library(ggrepel);library(ggsn)#spreadsheetslibrary(openxlsx);library(readxl)#paralel computinglibrary(parallel);library(pbapply)#general supportlibrary(here);library(glue);library(udunits2);library(usethis);library(matrixStats);library(pander);library(reshape2);library(Hydro)library(geosphere);library(nasapower)#data table supportlibrary(data.table);library(dtplyr);library(arrow)#raster supportlibrary(raster);library(sp);library(kgc)library(terra);library(tidyterra)#These directories are implemented to organize the regional analysisuse_directory("data")use_directory("data/csv")use_directory("data/netcdf")use_directory("data/rds")use_directory("graphs")use_directory("reports")use_directory("scripts")use_directory("outputs")# glue::glue("Work directory: {here()}")```# Sources## ERA5-Land RecordsCapture ERA5-Land at the site.```{r}Longitude=-73.962799Latitude =46.632005ws_ras<-rast(here::here("data","netcdf","era5-land_ws_hly.nc"))era5_land_xy<-function(Lon,Lat){# uwind_tbl<-dir(here::here("data","netcdf","era5-land","uwind"),full.names = T)# vwind_tbl<-dir(here::here("data","netcdf","era5-land","vwind"),full.names = T)# ws_tbl<-dir(here::here("data","netcdf","era5-land","ws"),full.names = T)# cl<-makeCluster(detectCores())# clusterExport(cl,varlist = c("uwind_tbl","vwind_tbl","Longitude","Latitude"))# # pbapply::pblapply(cl=cl,1:length(uwind_tbl),FUN=function(x){# # library(magrittr)# # ws_ras<-(terra::rast(uwind_tbl[x])^2 + terra::rast(vwind_tbl[x])^2)^0.5# # terra::writeCDF(x = ws_ras,# filename=stringr::str_replace_all(uwind_tbl[x],"uwind","ws"))# })# clusterExport(cl,varlist = c("ws_tbl","Longitude","Latitude"))# ws_dt<-pbapply::pblapply(cl=cl,1:length(ws_tbl),FUN=function(x){# library(magrittr)if(!exists("ws_ras")){ ws_ras<-rast(here::here("data","netcdf","era5-land_ws_hly.nc")) } ws_info<-terra::extract(ws_ras, terra::vect(cbind(Lon,Lat),crs="+proj=longlat +datum=WGS84")) %>%unlist() %>% .[-1] ws_dt<-data.frame(ws=ws_info,"date_time"=terra::time(ws_ras)) %>% data.table::as.data.table()%>%arrange(date_time) %>%distinct() ws_z<-zoo(ws_dt$ws,ws_dt$date_time) %>%window(end=as.Date("2023-10-31"))return(ws_z) }ws_z<-era5_land_xy(Lon = Longitude,Lat=Latitude)```## ECCC RecordsInformation available for comparison ERA5-Land vs regional observations```{r,fig.width=8,fig.height=4}source(here::here("scripts","eccc_download.r"))# library(weathercan)# library(tidyverse)# # eccc_file_hly<-here::here("data","csv","eccc_info_2024_02_23_hly.csv")# # station.selection_hly <- stations_search(coords = c(Latitude,Longitude),# dist = 100, interval ="hour")# # station.selection_hly <- station.selection_hly[station.selection_hly$end -# station.selection_hly$start >= 10, ]# # ## Remove stations with last year of data before 1980# station.selection_hly <- station.selection_hly[station.selection_hly$end>1990,]# # station.selection.idx<-rbind(# # station.selection_dly %>% dplyr::select(-interval,-start,-end),# station.selection_hly%>% dplyr::select(-interval,-start,-end)) %>%# distinct() %>%# arrange(distance)# # if(file.exists(eccc_file_hly)){# # eccc_info_hly.dt<-data.table::fread(eccc_file_hly)# # }else{# # ## Selecting all stations# # ##remove stations with less than 10 years of data# #write.csv(station.selection, file = "SDH_regional-stations_500km-daily.csv")# regional.db <- weather_dl(station.selection_hly$station_id, interval = "hour")# # data.table::fwrite(x = regional.db,file = eccc_file_hly)# # eccc_info_hly.dt<-regional.db# # }ws_eccc_hly_z<-reshape2::dcast(data = eccc_info_hly.dt,formula = time~station_name,value.var ="wind_spd") %>%tk_zoo()#sort for proximity.ws_eccc_hly_z<-ws_eccc_hly_z[,station.selection_hly$station_name]ws_eccc_hly_z %>%subdaily2daily(FUN=mean) %>%dwi.graph()```## Comparison ERA5-Land with ECCC records```{r,results="asis"}pbapply::pbwalk(1:3,FUN=function(x){ st_sel<-station.selection_hly[x,] pander::pandoc.p(''); pander::pandoc.p('') pander::pandoc.header(st_sel$station_name,level=3) era5_land_eccc_z<-era5_land_xy(Lon =st_sel$lon,Lat=st_sel$lat) ws_comb_z<-merge(era5=era5_land_eccc_z,eccc=ud.convert(ws_eccc_hly_z[,x],"km/hr","m/s") ) pander::pandoc.p(''); pander::pandoc.p('') g5<-ws_comb_z %>%daily2monthly.V2(FUN=mean) %>%fortify.zoo() %>% reshape2::melt(id.vars=c("Index")) %>%ggplot(data=.,aes(x=Index,y=value,col=variable))+geom_point()+geom_line()+labs(x=NULL,y="monthly wind speed [m/s]",col="source:")+theme_bw()+# scale_x_date()+theme(legend.position="bottom")print(g5) pander::pandoc.p(''); pander::pandoc.p('')subdaily2daily(ws_comb_z,FUN=mean) %>%daily2annual.avg(FUN=mean) %>%pandoc.table(round=1,caption="Comparison of monthly wind speed [m/s]") pander::pandoc.p(''); pander::pandoc.p('')})```## Mean Annual Wind Speed```{r,fig.width=8,fig.height=8}station.selection.idx<-station.selection.idx %>%mutate(ma_ws=daily2annual.avg(ud.convert(subdaily2daily(ws_eccc_hly_z,FUN=mean),"km/hr","m/s"),FUN=mean)$Ann)ma_ws_ras<-mean(tapp(ws_ras,index="years",fun=mean))ggplot()+ tidyterra::geom_spatraster(data=ma_ws_ras)+# geom_text()geom_point(data=station.selection.idx[-4,],aes(x=lon,y=lat,fill=ma_ws),shape=22,size=5)+geom_text_repel(data=station.selection.idx[-4,],aes(x=lon,y=lat,label=paste0(station_name,"\n",round(ma_ws,1))),size=2.5)+scale_fill_binned(type="viridis",breaks=seq(0,5,0.5))+labs(x="longitude [deg]",y="latitude [deg]",fill="mean annual\nwind speed\n[m/s]",title="Mean annual wind speed",subtitle="Background: ERA5-Land / Points : ECCC records")+coord_sf()```# Local RecordsIt is noted zero values may affect the bias correction process, then the values less than 0.4 m/s were removed.```{r,fig.height=6,fig.width=11,fig.keep="last"}library(MBC)library(readxl)ws_site_tbl <-read_excel(here::here("data","csv","WindSpeed NMG.xlsx"), col_types =c("date", "numeric")) %>%arrange(`Date Heure`) %>%distinct()ws_obs_z<-zoo(ws_site_tbl$`Vitesse des rafales, m/s`,ws_site_tbl$`Date Heure`)zoo::time(ws_obs_z)<-round.POSIXt(time(ws_obs_z),units ="hours") %>%as.POSIXct()ws_obs_z<-ws_obs_z[!duplicated(time(ws_obs_z)),]ws_z<-ws_z[!duplicated(time(ws_z)),]#for simplicity the records were simplified and removed the values less that 0.4 m/sws_comb_z<-merge(mod=ws_z,obs=ws_obs_z) %>%fortify.zoo() %>%# mutate(obs=if_else(obs<=0.4,NA,obs)) %>% dplyr::filter(complete.cases(.))subdaily2daily(tk_zoo(ws_comb_z),FUN=mean) %>%as.xts %>%plot.xts(main="Comparison between ERA5-Land and local observed records")addLegend("topright", on=1, legend.names =c("Observed Records", "ERA5-Land"), lty=c(1, 1), lwd=c(2, 2),col=c("black", "red"))``````{r,fig.height=6,fig.width=11,fig.keep="last"}ws_comb_z<-merge(mod=ws_z,obs=ws_obs_z) %>%fortify.zoo() %>%mutate(obs=if_else(obs<=0.4,NA,obs)) %>% dplyr::filter(complete.cases(.))subdaily2daily(tk_zoo(ws_comb_z),FUN=mean) %>%as.xts %>%plot.xts(main="Comparison between ERA5-Land and local observed records,values <= 0.4 m/s were removed")addLegend("topright", on=1, legend.names =c("Observed Records", "ERA5-Land"), lty=c(1, 1), lwd=c(2, 2),col=c("black", "red"))```## Bias Correction```{r,fig.height=6,fig.width=6,fig.keep="last"}#QQ plotqqplot(ws_comb_z$mod,ws_comb_z$obs,xlab="ERA5-Land [m/s]",ylab="Observed [m/s]",main="Q-Q plot for Wind Speed - ERA5-Land vs Observed [m/s]")abline(a=0,b=1,col="red")```Comparison of the results after quantile delta mapping matching (bias correction)```{r,fig.height=6,fig.width=11,fig.keep="last"}ws_model_out<-QDM(o.c =ws_comb_z$obs,m.c = ws_comb_z$mod,m.p =ws_z )ws_result_z<-merge(mod=ws_model_out$mhat.p,obs=ws_obs_z)ws_result_z$obs[ws_result_z$obs<=0.4]<-NAsubdaily2daily(tk_zoo(ws_result_z),FUN=mean) %>%daily2monthly.V2(FUN=mean) %>% as.xts %>%plot.xts(main="Comparison between ERA5-Land (Corrected) and local observed records",type="b")addLegend("topright", on=1, legend.names =c("Observed Records", "ERA5-Land (Corrected)"), lty=c(1, 1), lwd=c(2, 2),col=c("black", "red"))``````{r,fig.height=6,fig.width=12}g1<-subdaily2daily(tk_zoo(ws_result_z),FUN=mean) %>%daily2monthly.V2(FUN=mean) %>%fortify.zoo() %>%ggplot(data=.,aes(x=mod,y=obs))+geom_point()+geom_smooth(method="lm")+stat_poly_eq(use_label(c("eq", "adj.R2")),formula = y ~poly(x, 1, raw =TRUE))+geom_abline(slope=1,col="red")+labs(x="ERA5-Land (after bias correction)\nmean monthly wind speed [m/s]",y="Observed\nmean monthly wind speed [m/s]",title="Wind Speed - Monthly Comparison")+theme_bw()g2<-subdaily2daily(tk_zoo(ws_result_z),FUN=mean) %>%# daily2monthly.V2(FUN=mean) %>% fortify.zoo() %>%ggplot(data=.,aes(x=mod,y=obs))+geom_point(alpha=0.6)+geom_smooth(method="lm")+stat_poly_eq(use_label(c("eq", "adj.R2")),formula = y ~poly(x, 1, raw =TRUE))+geom_abline(slope=1,col="red")+labs(x="ERA5-Land (after bias correction)\nmean monthly wind speed [m/s]",y="Observed\nmean monthly wind speed [m/s]",title="Wind Speed - Daily Comparison")+theme_bw()# g3<-tk_zoo(ws_result_z) %>% # # daily2monthly.V2(FUN=mean) %>% # fortify.zoo() %>% # ggplot(data=.,aes(x=mod,y=obs))+# geom_point(alpha=0.6)+# geom_smooth(method="lm")+# stat_poly_eq(use_label(c("eq", "adj.R2")),# formula = y ~ poly(x, 1, raw = TRUE))+# # geom_abline(slope=1,col="red")+# labs(x="ERA5-Land (after bias correction)\nmean monthly wind speed [m/s]",# y="Observed\nmean monthly wind speed [m/s]",# title="Wind Speed - Daily Comparison")+# theme_bw()library(patchwork)g1+g2```# Frequency AnalysisFrequency analysis is a process used to estimate the frequency of occurrence of a value based on a long-term time series. The desired frequency is typically in months or years and the periods are called return periods. The historical information is adjusted into a probabilistic distribution, and the most suitable distribution is selected. In this case, the frequency analysis was prepared considering the following probabilistic distributions: Normal, Log-Normal, Generalized Extreme Value (GEV), Gumbel, Pearson III, and Log-Pearson III. The selection of the distribution parameters was prepared with the L-moments methodology. The L-moments approach provides a more resistant estimation to outliers when compared with typical moment estimations. The selection of the best-fit distribution was prepared based on three criteria: Akaike Information Criterion, Anderson-Darling Criterion, and Bayesian Information Criterion, as described by Laio et al. (2009) and implemented in the statistical software, R.With this information it was selected a Gumbel distribution.```{r,results="asis",fig.width=9,fig.height=6}fa_res<-FA(IDF(ws_result_z$mod,1) %>%coredata())$results %>%mutate(RP=RP.text(Prob),RP_simple=RP.text(Prob,dry.wet=F)) %>% dplyr::filter(Prob>=0.5,Prob<=0.9990) %>%mutate(RP=str_replace(RP," ", "\n")) %>% dplyr::rename("wind_speed"=2)ggplot(data=fa_res,aes(x=RP_simple,y=wind_speed))+geom_point()+geom_line()+scale_x_log10(breaks=fa_res$RP_simple,labels = fa_res$RP)+labs(x="return period",y="maximum wind speed [m/s]")+theme_bw()pandoc.table(fa_res[,-4] %>%arrange(Prob),caption="frequency analysis of site records extended with ERA5-Land at hourly scale in m/s")```