The first step clears the RStudio environment and ensures you are starting out with a clean slate.
# Clear the environment
rm(list = ls()); cat("/014"); gc(verbose = TRUE)
## /014
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 577662 30.9 1321324 70.6 644242 34.5
## Vcells 988573 7.6 8388608 64.0 1634574 12.5
The next step sets the working directory - the folder your script is saved in. This tells your computer the location to work from.
# Set the working directory
setwd(dirname(rstudioapi::getSourceEditorContext()$path)); getwd()
## [1] "C:/Users/mbedinge/OneDrive - University of Edinburgh/Documents"
The last step installs and loads any packages (individual bits of software) you will need to apply the code.
# Load required packages
pacman::p_load(progress, tidyverse, lubridate, extRemes, lfstat)
To read in your data, you will have to change the name of your file (e.g. from “19010_gdf_edited.csv” to “yourfilename.csv”) depending on which gauge station you have chosen. Ensure this file is in the same folder as the R script
# Read in and reformat data
data =
read_csv("19010_gdf_edited.csv") %>%
mutate(date = as.Date(date, format= "%d/%m/%Y")) %>% # reformat date column for easier processing
arrange(-desc(date)) %>% # reformat dataframe from earliest to latest date
na.omit() %>% # omit any rows with missing data to keep it simple
mutate(year = as.numeric(format(date, "%Y"))) # add column for year for easier processing
## Rows: 19266 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): date
## dbl (1): Q
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
This means finding the AMAX (annual maximum) flow rates within a block of years, in this case a 30-year block. We can then compare the three blocks to understand trends in how flow rates may be changing over time. By plotting a histogram for each block, you can inspect the data visually.
# Find the annual maximum flow rates for 1970-1999
amax_block1 =
data %>%
filter(year > 1969 & year < 2000) %>%
group_by(year) %>%
summarise(amax = max(Q, na.rm = TRUE)) %>%
na.omit()
# Plot the histogram for 1970-1999
hist(
amax_block1$amax, xlab = "Annual Maximum Flow Rate",
font = 2, main = "1970-1999", font.lab = 2)
# Find the annual maximum flow rates for 1980-2009
amax_block2 =
data %>%
filter(year > 1979 & year < 2010) %>%
group_by(year) %>%
summarise(amax = max(Q, na.rm = TRUE)) %>%
na.omit()
# Plot the histogram for 1980-2009
hist(
amax_block2$amax, xlab = "Annual Maximum Flow Rate",
font = 2, main = "1980-2009", font.lab = 2)
# Find the annual maximum flow rates for 1990-2019
amax_block3 =
data %>%
filter(year > 1989 & year < 2020) %>%
group_by(year) %>%
summarise(amax = max(Q, na.rm = TRUE)) %>%
na.omit()
# Plot the histogram for 1990-2019
hist(
amax_block3$amax, xlab = "Annual Maximum Flow Rate",
font = 2, main = "1990-2019", font.lab = 2)
Did you spot any differences? Do these look like normal
distributions?
Because some of the histograms include extreme values, we can fit a general extreme value (GEV) to the flow rate (Q) data for each block.
# Fit a general extreme value (GEV) to the flow rate (Q) data for 1970-1999
fitQ_block1 = fevd(amax_block1$amax, type = "GEV")
# Inspect a summary of the results for 1970-1999
summary(fitQ_block1)
##
## fevd(x = amax_block1$amax, type = "GEV")
##
## [1] "Estimation Method used: MLE"
##
##
## Negative Log-Likelihood Value: 47.17853
##
##
## Estimated parameters:
## location scale shape
## 1.58092399 0.99168934 -0.01298812
##
## Standard Error Estimates:
## location scale shape
## 0.2244851 0.1742988 0.2243629
##
## Estimated parameter covariance matrix.
## location scale shape
## location 0.05039356 0.01969783 -0.02666306
## scale 0.01969783 0.03038008 -0.02188589
## shape -0.02666306 -0.02188589 0.05033870
##
## AIC = 100.3571
##
## BIC = 104.5607
# Inspect the plot, though it may be easiest to view after export via RPub
plot(fitQ_block1)
In particular, pay attention to the two plots on the bottom. Then
repeat for the second and third blocks, as follows.
# Fit a general extreme value (GEV) to the flow rate (Q) data for 1980-2009
fitQ_block2 = fevd(amax_block2$amax, type = "GEV")
# Inspect the plot, though it may be easiest to view after export via RPub
plot(fitQ_block2)
# Fit a general extreme value (GEV) to the flow rate (Q) data for 1990-2019
fitQ_block3 = fevd(amax_block3$amax, type = "GEV")
# Inspect the plot, though it may be easiest to view after export via RPub
plot(fitQ_block3)
You can use these plots to understand conditions at different
return periods.
# Calculate the exceedence probability (P)
results_flowDuration =
data %>%
arrange(desc(Q)) %>% # Arrange rows by flow rate (Q) value in descending order
na.omit() %>% # Omit rows with missing data so that the total n of rows is accurate
mutate(Q_rank = row_number()) %>% # Assign a unique rank for each data point
mutate(P = 100 * (Q_rank / (nrow(data) + 1))) # Calculate the exceedence probability (P)
# Plot the flow rate (Q) against the exceedance probability (P)
plot(results_flowDuration$P, results_flowDuration$Q)
firstDate = data$date[1]
values = data$Q
time = ts(values)
data_qx = createlfobj(time, startdate = firstDate, hyearstart = 10)
data_qx = createlfobj(data_qx, meta = list(river = "data_qx"))
Q5 =
Qxx(data_qx,
Qxx = 5,
year = "any",
monthly = FALSE,
yearly = FALSE,
breakdays = NULL,
na.rm = TRUE)
Q5
## Q5
## 0.474
Q10 =
Qxx(data_qx,
Qxx = 10,
year = "any",
monthly = FALSE,
yearly = FALSE,
breakdays = NULL,
na.rm = TRUE)
Q10
## Q10
## 0.354
Q90 =
Q90(data_qx,
year = "any",
monthly = FALSE,
yearly = FALSE,
breakdays = NULL,
na.rm = TRUE)
Q90
## Q90
## 0.046
Q95 =
Q95(data_qx,
year = "any",
monthly = FALSE,
yearly = FALSE,
breakdays = NULL,
na.rm = TRUE)
Q95
## Q95
## 0.036
These will help understand how frequently the flow is
exceeded. For example, Q95 is the flow exceeded 95% of the time and is
typical of a dry summer flow. Q5 is the flow exceeded 5% of the time and
is equivalent to a full spate.