The first step clears the RStudio environment and ensures you are starting out with a clean slate.
# Clear the environment
rm(list = ls()); cat("/014"); gc(verbose = TRUE)
## /014
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 577718 30.9 1321538 70.6 644242 34.5
## Vcells 989326 7.6 8388608 64.0 1634574 12.5
The next step sets the working directory - the folder your script is saved in. This tells your computer the location to work from.
# Set the working directory
setwd(dirname(rstudioapi::getSourceEditorContext()$path)); getwd()
## [1] "C:/Users/mbedinge/OneDrive - University of Edinburgh/Documents"
The last step installs and loads any packages (individual bits of software) you will need to apply the code.
# Load required packages
pacman::p_load(progress, tidyverse, lubridate, extRemes, lfstat)
To read in your data, you will have to change the name of your file (e.g. from “19010_gdf_edited.csv” to “yourfilename.csv”) depending on which gauge station you have chosen. Ensure this file is in the same folder as the R script
# Read in and reformat data
data =
read_csv("19010_gdf_edited.csv") %>%
mutate(date = as.Date(date, format= "%d/%m/%Y")) %>% # reformat date column for easier processing
arrange(-desc(date)) %>% # reformat dataframe from earliest to latest date
na.omit() %>% # omit any rows with missing data to keep it simple
mutate(year = as.numeric(format(date, "%Y"))) # add column for year for easier processing
## Rows: 19266 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): date
## dbl (1): Q
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Prepare data in three 30-year blocks
# For 1970-1999
data_block1 =
data %>%
filter(year > 1969 & year < 2000)
# For 1980-2009
data_block2 =
data %>%
filter(year > 1979 & year < 2010)
# For 1990-2019
data_block3 =
data %>%
filter(year > 1989 & year < 2020)
This means finding the AMAX (annual maximum) flow rates within a block of years, in this case a 30-year block. We can then compare the three blocks to understand trends in how flow rates may be changing over time. By plotting a histogram for each block, you can inspect the data visually.
# Find the annual maximum flow rates for 1970-1999
amax_block1 =
data_block1 %>%
group_by(year) %>%
summarise(amax = max(Q, na.rm = TRUE)) %>%
na.omit()
# Plot the histogram for 1970-1999
hist(
amax_block1$amax, xlab = "Annual Maximum Flow Rate",
font = 2, main = "1970-1999", font.lab = 2)
# Find the annual maximum flow rates for 1980-2009
amax_block2 =
data_block2 %>%
group_by(year) %>%
summarise(amax = max(Q, na.rm = TRUE)) %>%
na.omit()
# Plot the histogram for 1980-2009
hist(
amax_block2$amax, xlab = "Annual Maximum Flow Rate",
font = 2, main = "1980-2009", font.lab = 2)
# Find the annual maximum flow rates for 1990-2019
amax_block3 =
data_block3 %>%
group_by(year) %>%
summarise(amax = max(Q, na.rm = TRUE)) %>%
na.omit()
# Plot the histogram for 1990-2019
hist(
amax_block3$amax, xlab = "Annual Maximum Flow Rate",
font = 2, main = "1990-2019", font.lab = 2)
Did you spot any differences? Do these look like normal
distributions?
Because some of the histograms include extreme values, we can fit a general extreme value (GEV) to the flow rate (Q) data for each block.
# Fit a general extreme value (GEV) to the flow rate (Q) data for 1970-1999
fitQ_block1 = fevd(amax_block1$amax, type = "GEV")
# Inspect a summary of the results for 1970-1999
summary(fitQ_block1)
##
## fevd(x = amax_block1$amax, type = "GEV")
##
## [1] "Estimation Method used: MLE"
##
##
## Negative Log-Likelihood Value: 47.17853
##
##
## Estimated parameters:
## location scale shape
## 1.58092399 0.99168934 -0.01298812
##
## Standard Error Estimates:
## location scale shape
## 0.2244851 0.1742988 0.2243629
##
## Estimated parameter covariance matrix.
## location scale shape
## location 0.05039356 0.01969783 -0.02666306
## scale 0.01969783 0.03038008 -0.02188589
## shape -0.02666306 -0.02188589 0.05033870
##
## AIC = 100.3571
##
## BIC = 104.5607
# Inspect the plot, though it may be easiest to view after export via RPub
plot(fitQ_block1)
In particular, pay attention to the two plots on the bottom. Then
repeat for the second and third blocks, as follows.
# Fit a general extreme value (GEV) to the flow rate (Q) data for 1980-2009
fitQ_block2 = fevd(amax_block2$amax, type = "GEV")
# Inspect the plot, though it may be easiest to view after export via RPub
plot(fitQ_block2)
# Fit a general extreme value (GEV) to the flow rate (Q) data for 1990-2019
fitQ_block3 = fevd(amax_block3$amax, type = "GEV")
# Inspect the plot, though it may be easiest to view after export via RPub
plot(fitQ_block3)
You can use these plots to understand conditions at different
return periods.
# Calculate the exceedence probability (P) for 1970-1999
flowDuration_block1 =
data_block1 %>%
arrange(desc(Q)) %>% # Arrange rows by flow rate (Q) value in descending order
na.omit() %>% # Omit rows with missing data so that the total n of rows is accurate
mutate(Q_rank = row_number()) %>% # Assign a unique rank for each data point
mutate(P = 100 * (Q_rank / (nrow(data) + 1))) # Calculate the exceedence probability (P)
# Calculate the exceedence probability (P) for 1980-2009
flowDuration_block2 =
data_block2 %>%
arrange(desc(Q)) %>% # Arrange rows by flow rate (Q) value in descending order
na.omit() %>% # Omit rows with missing data so that the total n of rows is accurate
mutate(Q_rank = row_number()) %>% # Assign a unique rank for each data point
mutate(P = 100 * (Q_rank / (nrow(data) + 1))) # Calculate the exceedence probability (P)
# Calculate the exceedence probability (P) for 1990-2019
flowDuration_block3 =
data_block3 %>%
arrange(desc(Q)) %>% # Arrange rows by flow rate (Q) value in descending order
na.omit() %>% # Omit rows with missing data so that the total n of rows is accurate
mutate(Q_rank = row_number()) %>% # Assign a unique rank for each data point
mutate(P = 100 * (Q_rank / (nrow(data) + 1))) # Calculate the exceedence probability (P)
# Plot the flow rate (Q) against the exceedance probability (P)
plot(flowDuration_block1$P, flowDuration_block1$Q, log = 'y',
xlab = "P (exceedance probability)", ylab = "Q (flow rate in m3/s)",
col = "black", cex = 0.4)
points(flowDuration_block2$P, flowDuration_block2$Q, log = 'y',
col = "blue", cex = 0.4)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "log" is not a graphical
## parameter
points(flowDuration_block3$P, flowDuration_block3$Q, log = 'y',
col = "green", cex = 0.4)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "log" is not a graphical
## parameter
legend(40, 4, legend = c("1970-1999", "1980-2009", "1990-2019"),
fill = c("black", "blue", "green"), cex = 0.8)
# Function to return Qx values in a table
function_Qx <-
function(data) {
# Setting up a low flow object as per R package lfstat
firstDate = data$date[1]
values = data$Q
time = ts(values)
data_qx = createlfobj(time, startdate = firstDate, hyearstart = 1)
data_qx = createlfobj(data_qx, meta = list(river = "data_qx"))
# Finding the Q5, Q10, Q90, and Q95 values for the input data
Q5 =
Qxx(data_qx, Qxx = 5, year = "any", monthly = FALSE,
yearly = FALSE, breakdays = NULL, na.rm = TRUE)
Q10 =
Qxx(data_qx, Qxx = 10, year = "any", monthly = FALSE,
yearly = FALSE, breakdays = NULL, na.rm = TRUE)
Q90 =
Q90(data_qx, year = "any", monthly = FALSE,
yearly = FALSE, breakdays = NULL, na.rm = TRUE)
Q95 =
Q95(data_qx, year = "any", monthly = FALSE,
yearly = FALSE, breakdays = NULL, na.rm = TRUE)
# Formatting results into a table
results =
c(Q5, Q10, Q90, Q95) %>%
as.data.frame() %>%
rownames_to_column()
colnames(results) =
c("Qx", "Q")
# Finding the total number of days with a flow rate Q under a Qx value
# Then the total number of days with a flow rate Q over a Qx value
Qvals <- results %>% pull(Q)
ndays_under <- c()
for (value in Qvals) {
output_val <-
data %>% filter(Q < value) %>% nrow() %>% as.numeric()
ndays_under <- c(ndays_under, output_val)
}
ndays_over <- c()
for (value in Qvals) {
output_val <-
data %>% filter(Q > value) %>% nrow() %>% as.numeric()
ndays_over <- c(ndays_over, output_val)
}
# Appending to the results table and returning the output
ndays <- cbind(ndays_under, ndays_over)
results <- cbind(results, ndays)
return(results)
}
# Find Qx values and total number of days under/over for whole dataset
Qx_all = function_Qx(data = data)
Qx_all
## Qx Q ndays_under ndays_over
## 1 Q5 0.474 18030 947
## 2 Q10 0.354 17076 1893
## 3 Q90 0.046 1845 17059
## 4 Q95 0.036 947 17948
# Find Qx values and total number of days under/over for 1970-1999
Qx_block1 = function_Qx(data = data_block1)
Qx_block1
## Qx Q ndays_under ndays_over
## 1 Q5 0.440 10202 536
## 2 Q10 0.320 9662 1072
## 3 Q90 0.038 1027 9615
## 4 Q95 0.030 502 10166
# Find Qx values and total number of days under/over for 1980-2009
Qx_block2 = function_Qx(data = data_block2)
Qx_block2
## Qx Q ndays_under ndays_over
## 1 Q5 0.522 10136 533
## 2 Q10 0.392 9599 1064
## 3 Q90 0.047 1045 9596
## 4 Q95 0.034 500 10135
# Find Qx values and total number of days under/over for 1990-2019
Qx_block3 = function_Qx(data = data_block3)
Qx_block3
## Qx Q ndays_under ndays_over
## 1 Q5 0.5187 10342 545
## 2 Q10 0.4000 9797 1080
## 3 Q90 0.0690 1074 9782
## 4 Q95 0.0500 544 10315
These will help understand how frequently the flow is
exceeded. For example, Q95 is the flow exceeded 95% of the time and is
typical of a dry summer flow. Q5 is the flow exceeded 5% of the time and
is equivalent to a full spate. Then you can find the total number of
days under or over a specific Q value, for example the total number of
days in the block with a flow rate (Q) over the Q5 value for that block.