## [1] "[@Xie_2021; @Xie_2015; @Xie_2014]"
This file is an outgrowth of How Wet Is It This Year? A History of Santa Cruz Water Years.
Part of that document was a scatterplot showing the relationship of annual precipitation and river flow (reproduced below). The observation was made that although most water years followed a best fit curve fairly closely there were some outliers. Here we look at 1998 and 2017.
Read in the data. See the other document for more detailed commentary. In the other document should save all of the downloaded data in a single file for easier access.
SC_weather <- read.csv("2854946.csv")
SC_weather$date1 <- as.Date(SC_weather$DATE, format="%Y-%m-%d")
SC_weather$YEAR <- lubridate::year(SC_weather$date1)
SC_weather$JULIAN <- lubridate::yday(SC_weather$date1)
SC_weather$month <- lubridate::month(SC_weather$date1, label=TRUE, abbr=TRUE)
#summary(SC_weather)
#glimpse(tail(SC_weather))
# calculate the sum precipitation for each month
SC_daily_precip_month <- SC_weather %>%
group_by(month, YEAR) %>%
summarise(max_precip = sum(PRCP, na.rm = TRUE)) # Note treating NAs as zeroes here!
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
SC_daily_precip_month <- SC_daily_precip_month[with(SC_daily_precip_month, order(YEAR, month)),]
#SC_daily_precip_month
#summary(SC_daily_precip_month)
Work with water years. It will turn out below that water years work much better as factors.
water_year <- function(date) {
ifelse(lubridate::month(date) < 10, lubridate::year(date), lubridate::year(date)+1)}
SC_weather$wateryear <- water_year(SC_weather$date1)
SC_daily_precip_water_year <- SC_weather %>%
group_by(wateryear) %>%
summarise(wy_precip = sum(PRCP, na.rm = TRUE)) # Note treating NAs as zeroes here!
#summary(SC_daily_precip_water_year)
Compute day of water year.
# https://stackoverflow.com/questions/55525965/is-there-a-r-function-to-calculate-day-of-water-year
hydro.day.new = function(x, start.month = 10L){
start.yr = year(x) - (month(x) < start.month)
start.date = make_date(start.yr, start.month, 1L)
as.integer(x - start.date + 1L)
}
SC_weather$hydroday <- hydro.day.new(SC_weather$date1)
summary(SC_weather$hydroday)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 91.0 182.0 182.7 274.0 366.0
siteNumber <- "11160500"
SantaCruzInfo <- readNWISsite(siteNumber)
SantaCruzInfo
## agency_cd site_no station_nm site_tp_cd lat_va long_va
## 1 USGS 11160500 SAN LORENZO R A BIG TREES CA ST 370240 1220417
## dec_lat_va dec_long_va coord_meth_cd coord_acy_cd coord_datum_cd
## 1 37.04439 -122.0725 M F NAD27
## dec_coord_datum_cd district_cd state_cd county_cd country_cd land_net_ds
## 1 NAD83 06 06 087 US NA
## map_nm map_scale_fc alt_va alt_meth_cd alt_acy_va alt_datum_cd huc_cd
## 1 FELTON CA NA 227 M 20 NGVD29 18060015
## basin_cd topo_cd instruments_cd construction_dt inventory_dt
## 1 <NA> <NA> NNNNYNNNNNNNNNNNNNNNNNNNNNNNNN NA NA
## drain_area_va contrib_drain_area_va tz_cd local_time_fg reliability_cd
## 1 106 NA PST Y <NA>
## gw_file_cd nat_aqfr_cd aqfr_cd aqfr_type_cd well_depth_va hole_depth_va
## 1 NNNNNNNN <NA> <NA> <NA> NA NA
## depth_src_cd project_no
## 1 <NA> <NA>
#comment(SantaCruzInfo)
pcodes <- readNWISpCode("all")
## Warning: 3 parsing failures.
## row col expected actual file
## 230 -- 1 columns 3 columns 'C:\Users\RICHAR~1\AppData\Local\Temp\RtmpcJlpbJ\file1a602a887752'
## 231 -- 1 columns 3 columns 'C:\Users\RICHAR~1\AppData\Local\Temp\RtmpcJlpbJ\file1a602a887752'
## 232 -- 1 columns 3 columns 'C:\Users\RICHAR~1\AppData\Local\Temp\RtmpcJlpbJ\file1a602a887752'
pcodes_short <- readNWISpCode(c("00060", "00065", "00010", "00400"))
#kable(pcodes_short, row.names=FALSE)
# I don't see a way to get statistics codes from their package, so downloaded and created two CSVs. This short version is much more useful than the complete version.
scodes_short <- read.csv("USGS_statistic_codes_short.csv")
#kable(scodes_short)
siteNumbers <- c("11160500")
siteINFO <- readNWISsite(siteNumbers)
#kable(siteINFO[, c(2, 3, 30)])
# Data type codes listed at https://waterservices.usgs.gov/rest/Site-Service.html#outputDataTypeCd
availableData <- whatNWISdata(siteNumber=siteNumbers, service="dv")
#kable(availableData[, c(2, 3, 14, 15)])
# We are primarily interested in mean daily flow values
availableData <- whatNWISdata(siteNumber=siteNumbers, service="dv", parameterCd = c("00060"), statCd = "00003")
Download the full data set and save as a CSV file. This is usually disabled to prevent burdening the data server.
Note that multiple site data is downloaded in long format (one row per observation). This means there will be multiple observations (one for each site) for each date.
Plotting with ggplot is cleaner with the long data, so try using that even though I find a wide format more intuitive here.
Also save data as a RDS file since the API returns additional useful information in the data structure! See str call below.
pCode <- "00060"
start.date <- "1936-10-01" # Earliest available data for Big Trees which is our focus
end.date <- Sys.Date()
flow.data <- readNWISdv(siteNumbers = siteNumbers,
parameterCd = pCode,
startDate = start.date,
endDate = end.date)
flow.data <- renameNWISColumns(flow.data)
#summary(flow.data)
#kable(head(flow.data[flow.data$site_no == "11160500",]))
flow.data.all <- flow.data
# Check that data looks all right (e.g. no missing data)
flow.data.all %>%
ggplot( aes(x=Date, y=Flow, group=site_no, color=site_no)) +
geom_line() + scale_y_log10()
# Just look at Big Trees
flow.data.all %>%
filter(site_no == "11160500") %>%
ggplot( aes(x=Date, y=Flow, group=site_no, color=site_no)) +
geom_line() + scale_y_log10()
write.csv(flow.data.all, "flow_data_all.csv", row.names=FALSE)
saveRDS(flow.data.all, file="flow_data_all.rds")
Read the saved flow data. Plots look nicer if site_no is a string (not a number, probably would be even better as a factor) and Date is a date (not a string). Decided to handle this by saving as RDS rather than CSV.
#flow.data.all <- read.csv("flow_data_all.csv", colClasses=c("character", "character", "Date", "number", "character"))
flow.data.all <- readRDS("flow_data_all.rds")
flow.data.all$date1 <- flow.data.all$Date # For easier merging
summary(flow.data.all)
## agency_cd site_no Date Flow
## Length:31162 Length:31162 Min. :1936-10-01 Min. : 5.6
## Class :character Class :character 1st Qu.:1958-01-29 1st Qu.: 19.0
## Mode :character Mode :character Median :1979-05-29 Median : 32.0
## Mean :1979-05-29 Mean : 129.0
## 3rd Qu.:2000-09-25 3rd Qu.: 85.0
## Max. :2022-01-24 Max. :17000.0
## Flow_cd date1
## Length:31162 Min. :1936-10-01
## Class :character 1st Qu.:1958-01-29
## Mode :character Median :1979-05-29
## Mean :1979-05-29
## 3rd Qu.:2000-09-25
## Max. :2022-01-24
str(flow.data.all) # Note additional information beyond what is in the CSV
## 'data.frame': 31162 obs. of 6 variables:
## $ agency_cd: chr "USGS" "USGS" "USGS" "USGS" ...
## $ site_no : chr "11160500" "11160500" "11160500" "11160500" ...
## $ Date : Date, format: "1936-10-01" "1936-10-02" ...
## $ Flow : num 12 12 12 12 12 12 12 12 12 12 ...
## $ Flow_cd : chr "A" "A" "A" "A" ...
## $ date1 : Date, format: "1936-10-01" "1936-10-02" ...
## - attr(*, "url")= chr "https://waterservices.usgs.gov/nwis/dv/?site=11160500&format=waterml,1.1&ParameterCd=00060&StatCd=00003&startDT"| __truncated__
## - attr(*, "siteInfo")='data.frame': 1 obs. of 13 variables:
## ..$ station_nm : chr "SAN LORENZO R A BIG TREES CA"
## ..$ site_no : chr "11160500"
## ..$ agency_cd : chr "USGS"
## ..$ timeZoneOffset : chr "-08:00"
## ..$ timeZoneAbbreviation: chr "PST"
## ..$ dec_lat_va : num 37
## ..$ dec_lon_va : num -122
## ..$ srs : chr "EPSG:4326"
## ..$ siteTypeCd : chr "ST"
## ..$ hucCd : chr "18060015"
## ..$ stateCd : chr "06"
## ..$ countyCd : chr "06087"
## ..$ network : chr "NWIS"
## - attr(*, "variableInfo")='data.frame': 1 obs. of 7 variables:
## ..$ variableCode : chr "00060"
## ..$ variableName : chr "Streamflow, ft³/s"
## ..$ variableDescription: chr "Discharge, cubic feet per second"
## ..$ valueType : chr "Derived Value"
## ..$ unit : chr "ft3/s"
## ..$ options : chr "Mean"
## ..$ noDataValue : logi NA
## - attr(*, "disclaimer")= chr "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information."
## - attr(*, "statisticInfo")='data.frame': 1 obs. of 2 variables:
## ..$ statisticCd : chr "00003"
## ..$ statisticName: chr "Mean"
## - attr(*, "queryTime")= POSIXct[1:1], format: "2022-01-25 19:11:16"
unique(flow.data.all$site_no)
## [1] "11160500"
flow.and.precip <- merge(SC_weather, flow.data.all, by="date1")
#flow.and.precip$wateryear1 <- toString(flow.and.precip$wateryear)
flow.and.precip$wateryear1 <- as.factor(flow.and.precip$wateryear)
#summary(flow.and.precip)
# Calculate the cumulative daily precipitation sums for each day within each water year
flow.and.precip <- flow.and.precip %>% group_by(site_no, wateryear) %>% mutate(wy_PRCP=cumsum(replace_na(PRCP,0)))
summary(flow.and.precip$wy_PRCP)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 8.55 19.98 20.41 30.18 61.29
# Compute daily runoffs in acre feet given flows in cfs
flow.and.precip$daily_runoff_af <- flow.and.precip$Flow * 60 * 60 * 24 / 43560
# Calculate the cumulative daily runoff sums for each day within each water year
flow.and.precip <- flow.and.precip %>% group_by(site_no, wateryear) %>% mutate(wy_runoff_af=cumsum(daily_runoff_af))
summary(flow.and.precip$wy_runoff_af)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.73 6967.74 32539.54 54747.43 81704.13 283196.03
First look at all of the water years.
flow.and.precip.totals <- flow.and.precip[flow.and.precip$hydroday == 365,]
flow.and.precip.totals.big.trees <- flow.and.precip.totals[flow.and.precip.totals$site_no == "11160500",]
#flow.and.precip.totals.big.trees$wateryear
car::scatterplot(wy_runoff_af ~ wy_PRCP, data=flow.and.precip.totals.big.trees,
id=list(labels=flow.and.precip.totals.big.trees$wateryear, n=length(flow.and.precip.totals.big.trees$wateryear)),
main="Runoff vs. Precipitation for Water Years in Santa Cruz",
xlab="Water Year Precipitation (inches)",
ylab="Water Year Runoff (acre feet)")
## 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968
## 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
## 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984
## 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000
## 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
## 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
## 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 2017 2018 2019 2021
## 81 82 83 84
Look at rainy water year 2017.
Dualplot version seems somewhat useful, though I would like to compare with a log axis for flow.
# Try dualplot version
# https://rdrr.io/github/eloualiche/miscr/man/dualplot.html
# https://web.archive.org/web/20210606023233/http://freerangestats.info/blog/2016/08/18/dualaxes
source("https://gist.githubusercontent.com/ellisp/4002241def4e2b360189e58c3f461b4a/raw/9ab547bff18f73e783aaf30a7e4851c9a2f95b80/dualplot.R")
# hydroday start and ylim choices are finicky
flow.and.precip.subset <- subset(flow.and.precip, (site_no == "11160500") & (wateryear == 2017) & (between(hydroday, 15, 180)))
summary(flow.and.precip.subset[,c("date1", "PRCP", "Flow")])
## date1 PRCP Flow
## Min. :2016-10-15 Min. :0.0000 Min. : 17.0
## 1st Qu.:2016-11-24 1st Qu.:0.0000 1st Qu.: 44.0
## Median :2017-01-03 Median :0.0000 Median : 348.0
## Mean :2017-01-03 Mean :0.3004 Mean : 780.5
## 3rd Qu.:2017-02-12 3rd Qu.:0.3100 3rd Qu.: 888.0
## Max. :2017-03-29 Max. :2.9500 Max. :8180.0
with(flow.and.precip.subset, dualplot(x1 = date1, y1 = PRCP, y2 = Flow, colgrid = "grey90",
main = "Santa Cruz Precipitation and River Flow for Water Year 2017",
ylab1 = "Daily Precipitation (inches)",
ylab2 = "Discharge, cubic feet per second",
yleg1 = "Daily Precipitation (left axis)",
yleg2 = "River Flow (right axis)",
ylim.ref=c(which.max(PRCP), which.max(Flow))))
## The two series will be presented visually as though they had been converted to indexes.
Notice above how it took a while for the runoff to really start. I saw an estimate that most of the soils here can absorb about 10 inches of rain (need to find reference). That looks about right for plot above.
Alternatively, we can look at cumulative precipitation and runoff. Notice how the runoff lags precipitation and only really picks up after about 15" of rain have fallen. Also note that the y axis choices are artificial and primarily intended to show the lag behavior.
with(flow.and.precip.subset, dualplot(x1 = date1, y1 = wy_PRCP, y2 = wy_runoff_af, colgrid = "grey90",
main = "Santa Cruz Cumulative Precipitation and River Flow for Water Year 2017",
ylab1 = "Cumulative Daily Precipitation (inches)",
ylab2 = "Cumulative Discharge, cubic feet per second",
yleg1 = "Cumulative Daily Precipitation (left axis)",
yleg2 = "Cumulative River Flow (right axis)",
ylim.ref=c(length(date1), length(date1))))
## The two series will be presented visually as though they had been converted to indexes.
Now compare the above plots with 1998.
# hydroday start and ylim choices are finicky
flow.and.precip.subset <- subset(flow.and.precip, (site_no == "11160500") & (wateryear == 1998) & (between(hydroday, 41, 210)))
summary(flow.and.precip.subset[,c("date1", "PRCP", "Flow")])
## date1 PRCP Flow
## Min. :1997-11-10 Min. :0.0000 Min. : 25.0
## 1st Qu.:1997-12-22 1st Qu.:0.0000 1st Qu.: 89.5
## Median :1998-02-02 Median :0.0000 Median : 278.0
## Mean :1998-02-02 Mean :0.3248 Mean : 519.2
## 3rd Qu.:1998-03-16 3rd Qu.:0.3375 3rd Qu.: 441.5
## Max. :1998-04-28 Max. :3.2300 Max. :7330.0
with(flow.and.precip.subset, dualplot(x1 = date1, y1 = PRCP, y2 = Flow, colgrid = "grey90",
main = "Santa Cruz Precipitation and River Flow for Water Year 1998",
ylab1 = "Daily Precipitation (inches)",
ylab2 = "Discharge, cubic feet per second",
yleg1 = "Daily Precipitation (left axis)",
yleg2 = "River Flow (right axis)",
ylim.ref=c(which.max(PRCP), which.max(Flow))))
## The two series will be presented visually as though they had been converted to indexes.
with(flow.and.precip.subset, dualplot(x1 = date1, y1 = wy_PRCP, y2 = wy_runoff_af, colgrid = "grey90",
main = "Santa Cruz Cumulative Precipitation and River Flow for Water Year 1998",
ylab1 = "Cumulative Daily Precipitation (inches)",
ylab2 = "Cumulative Discharge, cubic feet per second",
yleg1 = "Cumulative Daily Precipitation (left axis)",
yleg2 = "Cumulative River Flow (right axis)",
ylim.ref=c(length(date1), length(date1))))
## The two series will be presented visually as though they had been converted to indexes.
File Initially created: February 9, 2022
File knitted: Wed Feb 09 21:40:44 2022