In this Exercise you will - Load water quality data - Plot Q-C relationships - Use linear models to quantify the Q-C relationships - Plot discharge data on the plot to determine hydrological conditions at time of sampling

0. Water quality data

You can download water quality data from

  1. The CEDEN website (California): http://ceden.waterboards.ca.gov/AdvancedQueryTool

  2. Watershed urban management plans (San Diego County only): http://www.projectcleanwater.org/index.php Go to “Stormwater copermiettees > Work Products” or to: “Watersheds > Your Watershed > Data, Plans and Projects”

I. Load water quality data for San Diego River at Mast Blvd

A. Read in csv:

indir = "Y:/wquality_data/"
fname= "SDR_MLS_2005_to_2010_transpose.csv"
x = read.csv(paste0(indir,fname))

B. Create a formatted Date vector

dates = as.Date(x$Date,format="%Y-%m-%d")

C. Plot C vs Q for Electrical conductivity (EC), Total suspended solids (TSS), turbidity, and Total coliforms

par(mfrow=c(2,2),mar=c(1,4,0,4),oma=c(5,1,2,0))
plot(x$Q.ft3.s,x$EC.umhos.cm,xlab="",ylab="EC umhos/cm",xaxt="n")
axis(side=1,labels=FALSE)
plot(x$Q.ft3.s,x$TSS.mgL,ylab="TSS mg/L",xaxt="n")
axis(side=1,labels=FALSE)
plot(x$Q.ft3.s,x$Turbidity.NTU,,xlab="",ylab="Turbidity, NTU")
mtext(side=1,"Q ft3/s", line=3)
plot(x$Q.ft3.s,x$Total.Colif.MPN.100mL,ylab="Total coliforms, MPU/100mL")
mtext(side=1,"Q ft3/s", line=3)

D. Plot them as log-log:

par(mfrow=c(2,2),mar=c(1,4,0,4),oma=c(5,1,2,0))
plot(x$Q.ft3.s,x$EC.umhos.cm,xlab="",ylab="EC umhos/cm",xaxt="n",log="xy")
axis(side=1,labels=FALSE)
plot(x$Q.ft3.s,x$TSS.mgL,ylab="TSS mg/L",xaxt="n",log="xy")
axis(side=1,labels=FALSE)
plot(x$Q.ft3.s,x$Turbidity.NTU,,xlab="",ylab="Turbidity, NTU",log="xy")
mtext(side=1,"Q ft3/s", line=3)
plot(x$Q.ft3.s,x$Total.Colif.MPN.100mL,ylab="Total coliforms, MPU/100mL",log="xy")
mtext(side=1,"Q ft3/s", line=3)

D. Determine if there is a statistically signifiant relationship, and the exponent on the relationship

Are you surprised by any of the C-Q exponents?
What would be the corresponding exponent for load?

x$EC.log = log(x$EC.umhos.cm)
x$Q.log = log(x$Q.ft3.s)
lm.EC = lm(EC.log ~ Q.log,x) #  do ?lm to see format of input
summary(lm.EC)
## 
## Call:
## lm(formula = EC.log ~ Q.log, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3417 -0.1645 -0.1325  0.2430  0.4397 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.42908    0.30398  27.729 3.09e-09 ***
## Q.log       -0.20220    0.07043  -2.871   0.0208 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2906 on 8 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5075, Adjusted R-squared:  0.4459 
## F-statistic: 8.243 on 1 and 8 DF,  p-value: 0.0208
Qnew = seq(5,400,by=10)
Qnew.log = data.frame(Q.log=log(Qnew))  #  Create a new dataframe that has a variable "Q.log" in it

EC.pred.log = predict.lm(lm.EC,Qnew.log)
# Use exp to turn the log-EC values into non-log values:
EC.pred = exp(EC.pred.log)

Turbidity

x$turb.log = log(x$Turbidity.NTU)
lm.turb = lm(turb.log ~ Q.log,x)
summary(lm.turb)
## 
## Call:
## lm(formula = turb.log ~ Q.log, data = x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83459 -0.31994 -0.00517  0.42493  0.65739 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.5015     0.5445   0.921 0.381064    
## Q.log         0.5996     0.1227   4.887 0.000863 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.532 on 9 degrees of freedom
## Multiple R-squared:  0.7263, Adjusted R-squared:  0.6959 
## F-statistic: 23.88 on 1 and 9 DF,  p-value: 0.000863
turb.pred = exp(predict.lm(lm.turb,Qnew.log))

TSS

x$TSS.log = log(x$TSS.mgL)
lm.TSS = lm(TSS.log ~ Q.log,x)
summary(lm.TSS)
## 
## Call:
## lm(formula = TSS.log ~ Q.log, data = x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.01415 -0.34337 -0.02042  0.32490  0.91866 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.5106     0.5902   0.865 0.409366    
## Q.log         0.6667     0.1330   5.014 0.000725 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5765 on 9 degrees of freedom
## Multiple R-squared:  0.7363, Adjusted R-squared:  0.7071 
## F-statistic: 25.14 on 1 and 9 DF,  p-value: 0.0007254
TSS.pred = exp(predict.lm(lm.TSS,Qnew.log))

Total Coliforms

x$TC.log = log(x$Total.Colif.MPN.100mL)
lm.TC = lm(TC.log ~ Q.log,x)
summary(lm.TC)
## 
## Call:
## lm(formula = TC.log ~ Q.log, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0443 -0.9515 -0.4254  0.9244  3.9137 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   6.5980     2.3146   2.851   0.0191 *
## Q.log         0.8407     0.5216   1.612   0.1414  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.261 on 9 degrees of freedom
## Multiple R-squared:  0.224,  Adjusted R-squared:  0.1378 
## F-statistic: 2.598 on 1 and 9 DF,  p-value: 0.1414
TC.pred = exp(predict.lm(lm.TC,Qnew.log))

E. Redo the log-log plots and add the regression line.

par(mfrow=c(2,2),mar=c(1,4,0,4),oma=c(5,1,2,0))
plot(x$Q.ft3.s,x$EC.umhos.cm,xlab="",ylab="EC umhos/cm",xaxt="n",log="xy",ylim=c(500,5000))
axis(side=1,labels=FALSE)
abline(lm.EC,untf=TRUE)
lines(Qnew,EC.pred)
plot(x$Q.ft3.s,x$TSS.mgL,ylab="TSS mg/L",xaxt="n",log="xy")
axis(side=1,labels=FALSE)
abline(lm.TSS,untf=TRUE,lty=3)
lines(Qnew,TSS.pred)
plot(x$Q.ft3.s,x$Turbidity.NTU,,xlab="",ylab="Turbidity, NTU",log="xy")
mtext(side=1,"Q ft3/s", line=3)
lines(Qnew,turb.pred)
plot(x$Q.ft3.s,x$Total.Colif.MPN.100mL,ylab="Total coliforms, MPU/100mL",log="xy")
lines(Qnew,TC.pred)
mtext(side=1,"Q ft3/s", line=3)

II. Load and plot Q data, and sampling points on top.

A. Load Q data for Fashion Valley

library(dataRetrieval)
parameter.code = "00060"  # this is the code for stream discharge.
start.date = ""  # Blanks get all of the data
end.date = ""
xQ = readNWISdv("11023000",parameter.code,start.date,end.date)
head(xQ)
##   agency_cd  site_no       Date X_00060_00003 X_00060_00003_cd
## 1      USGS 11023000 1982-01-18           7.6                A
## 2      USGS 11023000 1982-01-19           8.8                A
## 3      USGS 11023000 1982-01-20         151.0                A
## 4      USGS 11023000 1982-01-21         574.0                A
## 5      USGS 11023000 1982-01-22         215.0                A
## 6      USGS 11023000 1982-01-23          70.0                A
names(xQ)[c(4,5)] = c("Q.ft.s","QA")
head(xQ)
##   agency_cd  site_no       Date Q.ft.s QA
## 1      USGS 11023000 1982-01-18    7.6  A
## 2      USGS 11023000 1982-01-19    8.8  A
## 3      USGS 11023000 1982-01-20  151.0  A
## 4      USGS 11023000 1982-01-21  574.0  A
## 5      USGS 11023000 1982-01-22  215.0  A
## 6      USGS 11023000 1982-01-23   70.0  A

B. Choose a larger event and plot the annual hydrograph around that point.

par(mfrow=c(1,1))
plot(as.Date(xQ$Date),xQ$Q.ft.s,type="l",xlim=as.Date(c("2006-10-01","2007-04-01")),ylim=c(0,500))
points(dates,x$Q.ft3.s)

III. C-Q plots for hysteresis

A. Define arrowplot function

You can write your own functions in R. Here’s one that plots arrows between consecutive points. Copy, paste and execute this code to define the function (nothing happens…it just puts the function in memory):

arrowLine <- function(x, y, N, ...){
  lengths <- c(0, sqrt(diff(x)^2 + diff(y)^2))
  l <- cumsum(lengths)
  tl <- l[length(l)]
  el <- seq(0, to=tl, length=N+1)[-1]
  plot(x, y, t="l", ...)
  for(ii in el){
    int <- findInterval(ii, l)
    xx <- x[int:(int+1)]
    yy <- y[int:(int+1)]
    ## points(xx,yy, col="grey", cex=0.5)
    dx <- diff(xx)
    dy <- diff(yy)
    new.length <- ii - l[int]
    segment.length <- lengths[int+1]
    ratio <- new.length / segment.length
    xend <- x[int] + ratio * dx
    yend <- y[int] + ratio * dy
    #points(xend,yend, col="black", pch=19)
    arrows(x[int], y[int], xend, yend, length=0.15)
  }
  points(x,y,pch=20)
}

Now use it to plot Q-C for a fake storm (there isn’t enough data to do this with real data)

par(mfrow=c(1,1)) # get back to a single-panel plot

# Make up a fake storm with Q and C
Q = c(1,3,10,15,12,8,3,2,1)
C = c(1,10,50,40,30,10,5,4,1)

# Plot it.
arrowLine(Q,C,N=10,xlab="Q",ylab="C")