These are the codes I used to run to extract and analyze discharge (Q) data collected from the Gwynns Falls - Villa Nova USGS gauging station. All credit goes to the EGRET package developed by USGS and also to Dr. Joel Moore, my thesis committee member, who modified the sample codes and provided me the codes.
## load necessary libraries
library(lubridate) # Manipulate dates
# USGS library
library(EGRET)
# Required for some of functions below that EGRET uses and to run the functions pasted in at the bottom
library(rkt)
library(zyp)
library(Kendall)
# Set gauging site number
siteNumber <- '01589300' # Gwynns Falls at Villa Nova MD
# Set start & end dates for period
startDate <- '1957-02-28'
endDate <- '2019-12-31' # through end of the study period, 2019
# Read in daily discharge (Q) data from USGS National Water Info System (NWIS)
Daily <- readNWISDaily(siteNumber, '00060', startDate, endDate)
## There are 20030 data points, and 22952 days.
##
## discharge data jumps from 1988-09-30 to 1996-10-01
# 00060 is the USGS parameter code for stream discharge
# Read in information about site & set short name - note ** interactive
INFO <- readNWISInfo(siteNumber, '00060')
## Your site for streamflow data is:
## 01589300 .
## Your site name is GWYNNS FALLS AT VILLA NOVA, MD
## but you can modify this to a short name in a style you prefer.
## This name will be used to label graphs and tables.
## If you want the program to use the name given above, just do a carriage return,
## otherwise enter the preferred short name(no quotes):
##
## The latitude and longitude of the site are: 39.34589 , -76.73319 (degrees north and west).
##
## The drainage area at this site is 32.5 square miles
## which is being stored as 84.17461 square kilometers.
##
## It is helpful to set up a station abbreviation when doing multi-site studies,
## enter a unique id (three or four characters should work). It is case sensitive.
## Even if you don't feel you need an abbreviation for your site you need to enter something(no quotes):
##
## Your water quality data are for parameter number:
## 00060
## which has the name:' Discharge, cubic feet per second '.
## Typically you will want a shorter name to be used in graphs and tables.
## The suggested short name is:' Stream flow, mean. daily '.
## If you would like to change the short name, enter it here,
## otherwise just hit enter (no quotes):
## The units for the water quality data are: ft3/s .
## It is helpful to set up a constiuent abbreviation, enter a unique id
## three or four characters should work something like tn or tp or NO3).
## Even if you don't feel you need an abbreviation you need to enter something (no quotes):
##
## Required concentration units are mg/l.
## The INFO dataframe indicates: ft3/s
## Flux calculations will be wrong if units are not consistent.
# Set short name to 'Gwynns Falls-Villa Nova'
# Set station abbreviation to 'GWVP'
# Set 'Stream flow, mean. daily' to 'DailyQ_cfs'
# Set abbreviation for discharge units (ft3/s) to 'cfs'
# Create eList -- EGRET variable
eList <- as.egret(INFO, Daily, NA, NA)
# Plot mean daily Q per year
plotFlowSingle(eList, istat=5, qunit='cfs')
# plot standard deviation
plotSDLogQ(eList)
# plot days with daily Q >1 cfs
plotQTimeDaily(eList, qLower=1, qunit = 'cfs')
# plot various daily Q stats
plotFour(eList, qUnit = 'cfs')
In order to plot low, high, mean daily Q for year or season and to plot quantile Kendall plot, different functions provided by the USGS need to be run.
### 99. Functions from EGRET vignette page for Flow Trends ####
# http://usgs-r.github.io/EGRET/articles/streamflow_trend.html
########## this is the function you will use to make a single trend graph ##############
plotFlowTrend <- function (eList, istat, startDate = NA, endDate = NA,
paStart = 4, paLong = 12, window = 30, qMax = NA,
printTitle = TRUE, tinyPlot = FALSE,
customPar = FALSE, runoff = FALSE,
qUnit = 2, printStaName = TRUE, printPA = TRUE,
printIstat = TRUE, cex = 0.8, cex.axis = 1.1,
cex.main = 1.1, lwd = 2, col = "black", ...){
localDaily <- getDaily(eList)
localINFO <- getInfo(eList)
localINFO$paStart <- paStart
localINFO$paLong <- paLong
localINFO$window <- window
start <- as.Date(startDate)
end <- as.Date(endDate)
if(is.na(startDate)){
start <- as.Date(localDaily$Date[1])
}
if(is.na(endDate)){
end <- as.Date(localDaily$Date[length(localDaily$Date)])
}
localDaily <- subset(localDaily, Date >= start & Date <= end)
eList <- as.egret(localINFO,localDaily)
localAnnualSeries <- makeAnnualSeries(eList)
qActual <- localAnnualSeries[2, istat, ]
qSmooth <- localAnnualSeries[3, istat, ]
years <- localAnnualSeries[1, istat, ]
Q <- qActual
time <- years
LogQ <- log(Q)
mktFrame <- data.frame(time,LogQ)
mktFrame <- na.omit(mktFrame)
mktOut <- rkt::rkt(mktFrame$time,mktFrame$LogQ)
zypOut <- zyp::zyp.zhang(mktFrame$LogQ,mktFrame$time)
slope <- mktOut$B
slopePct <- 100 * (exp(slope)) - 100
slopePct <- format(slopePct,digits=2)
pValue <- zypOut[6]
pValue <- format(pValue,digits = 3)
if (is.numeric(qUnit)) {
qUnit <- qConst[shortCode = qUnit][[1]]
} else if (is.character(qUnit)) {
qUnit <- qConst[qUnit][[1]]
}
qFactor <- qUnit@qUnitFactor
yLab <- qUnit@qUnitTiny
if (runoff) {
qActual <- qActual * 86.4/localINFO$drainSqKm
qSmooth <- qSmooth * 86.4/localINFO$drainSqKm
yLab <- "Runoff in mm/day"
} else {
qActual <- qActual * qFactor
qSmooth <- qSmooth * qFactor
}
localSeries <- data.frame(years, qActual, qSmooth)
yInfo <- generalAxis(x = qActual, maxVal = qMax, minVal = 0,
tinyPlot = tinyPlot)
xInfo <- generalAxis(x = localSeries$years, maxVal = decimal_date(end),
minVal = decimal_date(start), padPercent = 0, tinyPlot = tinyPlot)
line1 <- localINFO$shortName
nameIstat <- c("minimum day", "7-day minimum", "30-day minimum",
"median daily", "mean daily", "30-day maximum", "7-day maximum",
"maximum day")
line2 <- paste0("\n", setSeasonLabelByUser(paStartInput = paStart,
paLongInput = paLong), " ", nameIstat[istat])
line3 <- paste0("\nSlope estimate is ",slopePct,"% per year, Mann-Kendall p-value is ",pValue)
if(tinyPlot){
title <- paste(nameIstat[istat])
} else {
title <- paste(line1, line2, line3)
}
if (!printTitle){
title <- ""
}
genericEGRETDotPlot(x = localSeries$years, y = localSeries$qActual,
xlim = c(xInfo$bottom, xInfo$top), ylim = c(yInfo$bottom,
yInfo$top), xlab = "", ylab = yLab, customPar = customPar,
xTicks = xInfo$ticks, yTicks = yInfo$ticks, cex = cex,
plotTitle = title, cex.axis = cex.axis, cex.main = cex.main,
tinyPlot = tinyPlot, lwd = lwd, col = col, ...)
lines(localSeries$years, localSeries$qSmooth, lwd = lwd,
col = col)
}
#########################################################################################
###### this the the function you will use to make the Quantile Kendall Plot #############
#########################################################################################
plotQuantileKendall <- function(eList, startDate = NA, endDate = NA,
paStart = 4, paLong = 12,
legendLocation = "topleft", legendSize = 1.0,
yMax = NA, yMin = NA) {
localDaily <- eList$Daily
localINFO <- eList$INFO
localINFO$paStart <- paStart
localINFO$paLong <- paLong
start <- as.Date(startDate)
end <- as.Date(endDate)
if(is.na(startDate)){
start <- as.Date(localDaily$Date[1])
}
if(is.na(endDate)){
end <- as.Date(localDaily$Date[length(localDaily$Date)])
}
localDaily <- subset(localDaily, Date >= start & Date <= end)
eList <- as.egret(localINFO,localDaily)
eList <- setPA(eList, paStart=paStart, paLong=paLong)
v <- makeSortQ(eList)
sortQ <- v[[1]]
time <- v[[2]]
results <- trendSortQ(sortQ, time)
pvals <- c(0.001,0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.95,0.99,0.999)
zvals <- qnorm(pvals)
name <- eList$INFO$shortName
# ymax <- trunc(max(results$slopePct)*10)
# ymax <- max(ymax + 2, 5)
# ymin <- floor(min(results$slopePct)*10)
# ymin <- min(ymin - 2, -5)
# yrange <- c(ymin/10, ymax/10)
# yticks <- axisTicks(yrange, log = FALSE)
ymax <- max(results$slopePct + 0.5, yMax, na.rm = TRUE)
ymin <- min(results$slopePct - 0.5, yMin, na.rm = TRUE)
yrange <- c(ymin, ymax)
yticks <- axisTicks(yrange, log = FALSE, nint =7)
p <- results$pValueAdj
color <- ifelse(p <= 0.1,"black","snow3")
color <- ifelse(p < 0.05, "red", color)
pvals <- c(0.001,0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.95,0.99,0.999)
zvals <- qnorm(pvals)
name <- paste0("\n", eList$INFO$shortName,"\n",
start," through ", end, "\n",
setSeasonLabelByUser(paStartInput = paStart, paLongInput = paLong))
plot(results$z,results$slopePct,col = color, pch = 20, cex = 1.0,
xlab = "Daily non-exceedance probability",
ylab = "Trend slope in percent per year",
xlim = c(-3.2, 3.2), ylim = yrange, yaxs = "i",
las = 1, tck = 0.02, cex.lab = 1.2, cex.axis = 1.2,
axes = FALSE, frame.plot=TRUE)
mtext(name, side =3, line = 0.2, cex = 1.2)
axis(1,at=zvals,labels=pvals, las = 1, tck = 0.02)
axis(2,at=yticks,labels = TRUE, las = 1, tck = 0.02)
axis(3,at=zvals,labels=FALSE, las = 1, tck=0.02)
axis(4,at=yticks,labels = FALSE, tick = TRUE, tck = 0.02)
abline(h=0,col="blue")
legend(legendLocation,c("> 0.1","0.05 - 0.1","< 0.05"),col = c("snow3", "black","red"),pch = 20, title = "p-value",
pt.cex=1.0, cex = legendSize * 1.5)
}
#########################################################################################
############ This next function combines four individual trend graphs (for mimimum day,
########### median day, mean day, and maximum day) along with the quantile kendall graph
#########################################################################################
plotFiveTrendGraphs <- function(eList, startDate = NA, endDate = NA,
paStart = 4, paLong = 12, qUnit = 2, window = 30,
legendLocation = "topleft", legendSize = 1.0) {
localDaily <- eList$Daily
localINFO <- eList$INFO
localINFO$paStart <- paStart
localINFO$paLong <- paLong
localINFO$window <- window
start <- as.Date(startDate)
end <- as.Date(endDate)
if(is.na(startDate)){
start <- as.Date(localDaily$Date[1])
}
if(is.na(endDate)){
end <- as.Date(localDaily$Date[length(localDaily$Date)])
}
localDaily <- subset(localDaily, Date >= start & Date <= end)
eList <- as.egret(localINFO,localDaily)
eList <- setPA(eList, paStart=paStart, paLong=paLong, window=window)
# this next line of code is inserted so that when paLong = 12, we always use the
# climate year when looking at the trends in the annual minimum flow
paStart1 <- if(paLong == 12) 4 else paStart
plotFlowTrend(eList, istat = 1, qUnit = qUnit, paStart = paStart1, paLong = paLong, window = window)
plotFlowTrend(eList, istat = 4, qUnit = qUnit, paStart = paStart, paLong = paLong, window = window)
plotFlowTrend(eList, istat = 8, qUnit = qUnit, paStart = paStart, paLong = paLong, window = window)
plotFlowTrend(eList, istat = 5, qUnit = qUnit, paStart = paStart, paLong = paLong, window = window)
# now the quantile kendall
plotQuantileKendall(eList, startDate = startDate, endDate = endDate, paStart = paStart,
paLong = paLong, legendLocation = legendLocation, legendSize = legendSize)
}
#########################################################################################
########### makeSortQ creates a matrix called Qsort.
############It sorted from smallest to largest over dimDays
############(if working with full year dimDays=365),
#############and also creates other vectors that contain information about this array.
#########################################################################################
makeSortQ <- function(eList){
localINFO <- getInfo(eList)
localDaily <- getDaily(eList)
paStart <- localINFO$paStart
paLong <- localINFO$paLong
# determine the maximum number of days to put in the array
numDays <- length(localDaily$DecYear)
monthSeqFirst <- localDaily$MonthSeq[1]
monthSeqLast <- localDaily$MonthSeq[numDays]
# creating a data frame (called startEndSeq) of the MonthSeq values that go into each year
Starts <- seq(paStart, monthSeqLast, 12)
Ends <- Starts + paLong - 1
startEndSeq <- data.frame(Starts, Ends)
# trim this list of Starts and Ends to fit the period of record
startEndSeq <- subset(startEndSeq, Ends >= monthSeqFirst & Starts <= monthSeqLast)
numYearsRaw <- length(startEndSeq$Ends)
# set up some vectors to keep track of years
good <- rep(0, numYearsRaw)
numDays <- rep(0, numYearsRaw)
midDecYear <- rep(0, numYearsRaw)
Qraw <- matrix(nrow = 366, ncol = numYearsRaw)
for(i in 1: numYearsRaw) {
startSeq <- startEndSeq$Starts[i]
endSeq <- startEndSeq$Ends[i]
startJulian <- getFirstJulian(startSeq)
# startJulian is the first julian day of the first month in the year being processed
# endJulian is the first julian day of the month right after the last month in the year being processed
endJulian <- getFirstJulian(endSeq + 1)
fullDuration <- endJulian - startJulian
yearDaily <- localDaily[localDaily$MonthSeq >= startSeq & (localDaily$MonthSeq <= endSeq), ]
nDays <- length(yearDaily$Q)
if(nDays == fullDuration) {
good[i] <- 1
numDays[i] <- nDays
midDecYear[i] <- (yearDaily$DecYear[1] + yearDaily$DecYear[nDays]) / 2
Qraw[1:nDays,i] <- yearDaily$Q
} else {
numDays[i] <- NA
midDecYear[i] <- NA
}
}
# now we compress the matrix down to equal number of values in each column
j <- 0
numGoodYears <- sum(good)
dayCounts <- ifelse(good==1, numDays, NA)
lowDays <- min(dayCounts, na.rm = TRUE)
highDays <- max(dayCounts, na.rm = TRUE)
dimYears <- numGoodYears
dimDays <- lowDays
sortQ <- matrix(nrow = dimDays, ncol = dimYears)
time <- rep(0,dimYears)
for (i in 1:numYearsRaw){
if(good[i]==1) {
j <- j + 1
numD <- numDays[i]
x <- sort(Qraw[1:numD, i])
# separate odd numbers from even numbers of days
if(numD == lowDays) {
sortQ[1:dimDays,j] <- x
} else {
sortQ[1:dimDays,j] <- if(odd(numD)) leapOdd(x) else leapEven(x)
}
time[j] <- midDecYear[i]
}
}
sortQList = list(sortQ,time)
return(sortQList)
}
#########################################################################################
########## Another function trendSortQ needed for Quantile Kendall
#########################################################################################
trendSortQ <- function(Qsort, time){
# note requires packages zyp and rkt
nFreq <- dim(Qsort)[1]
nYears <- length(time)
results <- as.data.frame(matrix(ncol=9,nrow=nFreq))
colnames(results) <- c("slopeLog","slopePct","pValue","pValueAdj","tau","rho1","rho2","freq","z")
for(iRank in 1:nFreq){
mkOut <- rkt::rkt(time,log(Qsort[iRank,]))
results$slopeLog[iRank] <- mkOut$B
results$slopePct[iRank] <- 100 * (exp(mkOut$B) - 1)
results$pValue[iRank] <- mkOut$sl
outZYP <- zyp.zhang(log(Qsort[iRank,]),time)
results$pValueAdj[iRank] <- outZYP[6]
results$tau[iRank] <- mkOut$tau
# I don't actually use this information in the current outputs, but the code is there
# if one wanted to look at the serial correlation structure of the flow series
serial <- acf(log(Qsort[iRank,]), lag.max = 2, plot = FALSE)
results$rho1[iRank] <- serial$acf[2]
results$rho2[iRank] <- serial$acf[3]
frequency <- iRank / (nFreq + 1)
results$freq[iRank] <- frequency
results$z[iRank] <- qnorm(frequency)
}
return(results)
}
#########################################################################################
################################## getFirstJulian finds the julian date of first day
################################## of a given month
#########################################################################################
getFirstJulian <- function(monthSeq){
year <- 1850 + trunc((monthSeq - 1) / 12)
month <- monthSeq - 12 * (trunc((monthSeq-1)/12))
charMonth <- ifelse(month<10, paste0("0",as.character(month)), as.character(month))
theDate <- paste0(year,"-",charMonth,"-01")
Julian1 <- as.numeric(julian(as.Date(theDate),origin=as.Date("1850-01-01")))
return(Julian1)
}
#########################################################################################
########### leapOdd is a function for deleting one value
############when the period that contains Februaries has a length that is an odd number
#########################################################################################
leapOdd <- function(x){
n <- length(x)
m <- n - 1
mid <- (n + 1) / 2
mid1 <- mid + 1
midMinus <- mid - 1
y <- rep(NA, m)
y[1:midMinus] <- x[1:midMinus]
y[mid:m] <- x[mid1:n]
return(y)}
#########################################################################################
########### leapEven is a function for deleting one value
############when the period that contains Februaries has a length that is an even number
#########################################################################################
leapEven <- function(x){
n <- length(x)
m <- n - 1
mid <- n / 2
y <- rep(NA, m)
mid1 <- mid + 1
mid2 <- mid + 2
midMinus <- mid - 1
y[1:midMinus] <- x[1:midMinus]
y[mid] <- (x[mid] + x[mid1]) / 2
y[mid1:m] <- x[mid2 : n]
return(y)
}
#########################################################################################
####### determines if the length of a vector is an odd number ###########################
#########################################################################################
odd <- function(x) {(!(x %% 2) == 0)}
#########################################################################################
########### calcWY calculates the water year and inserts it into a data frame
#########################################################################################
calcWY <- function (df) {
df$WaterYear <- as.integer(df$DecYear)
df$WaterYear[df$Month >= 10] <- df$WaterYear[df$Month >=
10] + 1
return(df)
}
#########################################################################################
##### calcCY calculates the climate year and inserts it into a data frame
#########################################################################################
calcCY <- function (df){
df$ClimateYear <- as.integer(df$DecYear)
df$ClimateYear[df$Month >= 4] <- df$ClimateYear[df$Month >=
4] + 1
return(df)
}
#########################################################################################
######## smoother is a function does the trend in real discharge units and not logs.
######## It is placed here so that users wanting to run this alternative have it available
######## but it is not actually called by any function in this document
#########################################################################################
smoother <- function(xy, window){
edgeAdjust <- TRUE
x <- xy$x
y <- xy$y
n <- length(y)
z <- rep(0,n)
x1 <- x[1]
xn <- x[n]
for (i in 1:n) {
xi <- x[i]
distToEdge <- min((xi - x1), (xn - xi))
close <- (distToEdge < window)
thisWindow <- if (edgeAdjust & close)
(2 * window) - distToEdge
else window
w <- triCube(x - xi, thisWindow)
mod <- lm(xy$y ~ x, weights = w)
new <- data.frame(x = x[i])
z[i] <- predict(mod, new)
}
return(z)
}
Start <- 1957 # first year of Gwynns Falls - Villa Nova Park record
End <- 2019
# plot daily Q low, mean, high for annual & season
plot15(eList, yearStart = Start, yearEnd = End)
plotFlowTrend(eList, qUnit = 'cfs', paStart = 10, istat = 5) # see stat #s below; mean daily Q
plotFlowTrend(eList, qUnit = 'cfs', paStart = 10, istat = 8) # see stat #s below; 1 day max Q
plotFlowTrend(eList, qUnit = 'cfs', paStart = 10, istat = 4) # see stat #s below; median daily Q
plotFlowTrend(eList, qUnit = 'cfs', paStart = 10, istat = 7) # see stat #s below; 7 day max Q
# create start & end dates
Start.date <- paste(Start, '-04-01', sep='')
End.date <- paste(End, '-09-30', sep='')
plotQuantileKendall(eList, startDate = Start.date, endDate = End.date,
paStart = 10, paLong = 12,
legendLocation = "topright", legendSize = 0.5)
# Plot Five Trend Graphs
plotFiveTrendGraphs(eList)
### 98. iStat & Units names/codes for EGRET ####
# The index of the flow statistics is istat. These statistics are: (1) 1-day minimum, (2) 7-day minimum,
# (3) 30-day minimum, (4) median (5) mean, (6) 30-day maximum, (7) 7-day maximum, and (8) 1-day maximum
# Number ObjectName shortName unitFactor
# 1 cfs Cubic Feet per Second 35.31467
# 2 cms Cubic Meters per Second 1
# 3 thousandCfs Thousand Cubic Feet per Second 0.03531467
# 4 thousandCms Thousand Cubic Meters per Second 0.001
# 5 mmDay mm per day
# 6 mmYear mm per year
#
It is also helpful to investigate whether low and high flows are decreasing or increasing. Low flow is defined by the lowest 7Q value annually (Evett et al., 1994). 7Q value is the minimum average daily Q for any seven consecutive days.
plotFlowTrend(eList, qUnit = 'cfs', paStart = 6, paLong = 3, istat = 2) #flow trend (7 day min) in summer months (JJA); no significance
plotFlowTrend(eList, qUnit = 'cfs', paStart = 6, paLong = 3, istat = 7) #flow trend (7 day max) in summer months (JJA); significant
plotFlowTrend(eList, qUnit = 'cfs', paStart = 12, paLong = 3, istat = 2) #flow trend (7 day min) in winter months (DJF); significant
plotFlowTrend(eList, qUnit = 'cfs', paStart = 12, paLong = 3, istat = 7) #flow trend (7 day max) in winter months (DJF); not significant
plotFlowTrend(eList, qUnit = 'cfs', paStart = 9, paLong = 3, istat = 7) #flow trend (7 day max) in fall months (SON); significant