In this Exercise you will - Load water quality data - Plot Q-C relationships - Use linear models to quantify the Q-C relationships - Plot discharge data on the plot to determine hydrological conditions at time of sampling
You can download water quality data from
The CEDEN website (California): http://ceden.waterboards.ca.gov/AdvancedQueryTool
Watershed urban management plans (San Diego County only): http://www.projectcleanwater.org/index.php Go to “Stormwater copermiettees > Work Products” or to: “Watersheds > Your Watershed > Data, Plans and Projects”
indir = "Y:/wquality_data/"
fname= "SDR_MLS_2005_to_2010_transpose.csv"
x = read.csv(paste0(indir,fname))
dates = as.Date(x$Date,format="%Y-%m-%d")
par(mfrow=c(2,2),mar=c(1,4,0,4),oma=c(5,1,2,0))
plot(x$Q.ft3.s,x$EC.umhos.cm,xlab="",ylab="EC umhos/cm",xaxt="n")
axis(side=1,labels=FALSE)
plot(x$Q.ft3.s,x$TSS.mgL,ylab="TSS mg/L",xaxt="n")
axis(side=1,labels=FALSE)
plot(x$Q.ft3.s,x$Turbidity.NTU,,xlab="",ylab="Turbidity, NTU")
mtext(side=1,"Q ft3/s", line=3)
plot(x$Q.ft3.s,x$Total.Colif.MPN.100mL,ylab="Total coliforms, MPU/100mL")
mtext(side=1,"Q ft3/s", line=3)
par(mfrow=c(2,2),mar=c(1,4,0,4),oma=c(5,1,2,0))
plot(x$Q.ft3.s,x$EC.umhos.cm,xlab="",ylab="EC umhos/cm",xaxt="n",log="xy")
axis(side=1,labels=FALSE)
plot(x$Q.ft3.s,x$TSS.mgL,ylab="TSS mg/L",xaxt="n",log="xy")
axis(side=1,labels=FALSE)
plot(x$Q.ft3.s,x$Turbidity.NTU,,xlab="",ylab="Turbidity, NTU",log="xy")
mtext(side=1,"Q ft3/s", line=3)
plot(x$Q.ft3.s,x$Total.Colif.MPN.100mL,ylab="Total coliforms, MPU/100mL",log="xy")
mtext(side=1,"Q ft3/s", line=3)
Are you surprised by any of the C-Q exponents?
What would be the corresponding exponent for load?
x$EC.log = log(x$EC.umhos.cm)
x$Q.log = log(x$Q.ft3.s)
lm.EC = lm(EC.log ~ Q.log,x) # do ?lm to see format of input
summary(lm.EC)
##
## Call:
## lm(formula = EC.log ~ Q.log, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3417 -0.1645 -0.1325 0.2430 0.4397
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.42908 0.30398 27.729 3.09e-09 ***
## Q.log -0.20220 0.07043 -2.871 0.0208 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2906 on 8 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5075, Adjusted R-squared: 0.4459
## F-statistic: 8.243 on 1 and 8 DF, p-value: 0.0208
Qnew = seq(5,400,by=10)
Qnew.log = data.frame(Q.log=log(Qnew)) # Create a new dataframe that has a variable "Q.log" in it
EC.pred.log = predict.lm(lm.EC,Qnew.log)
# Use exp to turn the log-EC values into non-log values:
EC.pred = exp(EC.pred.log)
Turbidity
x$turb.log = log(x$Turbidity.NTU)
lm.turb = lm(turb.log ~ Q.log,x)
summary(lm.turb)
##
## Call:
## lm(formula = turb.log ~ Q.log, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83459 -0.31994 -0.00517 0.42493 0.65739
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5015 0.5445 0.921 0.381064
## Q.log 0.5996 0.1227 4.887 0.000863 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.532 on 9 degrees of freedom
## Multiple R-squared: 0.7263, Adjusted R-squared: 0.6959
## F-statistic: 23.88 on 1 and 9 DF, p-value: 0.000863
turb.pred = exp(predict.lm(lm.turb,Qnew.log))
TSS
x$TSS.log = log(x$TSS.mgL)
lm.TSS = lm(TSS.log ~ Q.log,x)
summary(lm.TSS)
##
## Call:
## lm(formula = TSS.log ~ Q.log, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.01415 -0.34337 -0.02042 0.32490 0.91866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5106 0.5902 0.865 0.409366
## Q.log 0.6667 0.1330 5.014 0.000725 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5765 on 9 degrees of freedom
## Multiple R-squared: 0.7363, Adjusted R-squared: 0.7071
## F-statistic: 25.14 on 1 and 9 DF, p-value: 0.0007254
TSS.pred = exp(predict.lm(lm.TSS,Qnew.log))
Total Coliforms
x$TC.log = log(x$Total.Colif.MPN.100mL)
lm.TC = lm(TC.log ~ Q.log,x)
summary(lm.TC)
##
## Call:
## lm(formula = TC.log ~ Q.log, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0443 -0.9515 -0.4254 0.9244 3.9137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5980 2.3146 2.851 0.0191 *
## Q.log 0.8407 0.5216 1.612 0.1414
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.261 on 9 degrees of freedom
## Multiple R-squared: 0.224, Adjusted R-squared: 0.1378
## F-statistic: 2.598 on 1 and 9 DF, p-value: 0.1414
TC.pred = exp(predict.lm(lm.TC,Qnew.log))
par(mfrow=c(2,2),mar=c(1,4,0,4),oma=c(5,1,2,0))
plot(x$Q.ft3.s,x$EC.umhos.cm,xlab="",ylab="EC umhos/cm",xaxt="n",log="xy",ylim=c(500,5000))
axis(side=1,labels=FALSE)
abline(lm.EC,untf=TRUE)
lines(Qnew,EC.pred)
plot(x$Q.ft3.s,x$TSS.mgL,ylab="TSS mg/L",xaxt="n",log="xy")
axis(side=1,labels=FALSE)
abline(lm.TSS,untf=TRUE,lty=3)
lines(Qnew,TSS.pred)
plot(x$Q.ft3.s,x$Turbidity.NTU,,xlab="",ylab="Turbidity, NTU",log="xy")
mtext(side=1,"Q ft3/s", line=3)
lines(Qnew,turb.pred)
plot(x$Q.ft3.s,x$Total.Colif.MPN.100mL,ylab="Total coliforms, MPU/100mL",log="xy")
lines(Qnew,TC.pred)
mtext(side=1,"Q ft3/s", line=3)
library(dataRetrieval)
parameter.code = "00060" # this is the code for stream discharge.
start.date = "" # Blanks get all of the data
end.date = ""
xQ = readNWISdv("11023000",parameter.code,start.date,end.date)
head(xQ)
## agency_cd site_no Date X_00060_00003 X_00060_00003_cd
## 1 USGS 11023000 1982-01-18 7.6 A
## 2 USGS 11023000 1982-01-19 8.8 A
## 3 USGS 11023000 1982-01-20 151.0 A
## 4 USGS 11023000 1982-01-21 574.0 A
## 5 USGS 11023000 1982-01-22 215.0 A
## 6 USGS 11023000 1982-01-23 70.0 A
names(xQ)[c(4,5)] = c("Q.ft.s","QA")
head(xQ)
## agency_cd site_no Date Q.ft.s QA
## 1 USGS 11023000 1982-01-18 7.6 A
## 2 USGS 11023000 1982-01-19 8.8 A
## 3 USGS 11023000 1982-01-20 151.0 A
## 4 USGS 11023000 1982-01-21 574.0 A
## 5 USGS 11023000 1982-01-22 215.0 A
## 6 USGS 11023000 1982-01-23 70.0 A
par(mfrow=c(1,1))
plot(as.Date(xQ$Date),xQ$Q.ft.s,type="l",xlim=as.Date(c("2006-10-01","2007-04-01")),ylim=c(0,500))
points(dates,x$Q.ft3.s)
You can write your own functions in R. Here’s one that plots arrows between consecutive points. Copy, paste and execute this code to define the function (nothing happens…it just puts the function in memory):
arrowLine <- function(x, y, N, ...){
lengths <- c(0, sqrt(diff(x)^2 + diff(y)^2))
l <- cumsum(lengths)
tl <- l[length(l)]
el <- seq(0, to=tl, length=N+1)[-1]
plot(x, y, t="l", ...)
for(ii in el){
int <- findInterval(ii, l)
xx <- x[int:(int+1)]
yy <- y[int:(int+1)]
## points(xx,yy, col="grey", cex=0.5)
dx <- diff(xx)
dy <- diff(yy)
new.length <- ii - l[int]
segment.length <- lengths[int+1]
ratio <- new.length / segment.length
xend <- x[int] + ratio * dx
yend <- y[int] + ratio * dy
#points(xend,yend, col="black", pch=19)
arrows(x[int], y[int], xend, yend, length=0.15)
}
points(x,y,pch=20)
}
Now use it to plot Q-C for a fake storm (there isn’t enough data to do this with real data)
par(mfrow=c(1,1)) # get back to a single-panel plot
# Make up a fake storm with Q and C
Q = c(1,3,10,15,12,8,3,2,1)
C = c(1,10,50,40,30,10,5,4,1)
# Plot it.
arrowLine(Q,C,N=10,xlab="Q",ylab="C")