2 Data Sources

Read in the data. See the other document for more detailed commentary. In the other document should save all of the downloaded data in a single file for easier access.

SC_weather <- read.csv("2854946.csv")
SC_weather$date1 <- as.Date(SC_weather$DATE, format="%Y-%m-%d")
SC_weather$YEAR <- lubridate::year(SC_weather$date1)
SC_weather$JULIAN <- lubridate::yday(SC_weather$date1)
SC_weather$month <- lubridate::month(SC_weather$date1, label=TRUE, abbr=TRUE)
#summary(SC_weather)
#glimpse(tail(SC_weather))

# calculate the sum precipitation for each month
SC_daily_precip_month <- SC_weather %>%
  group_by(month, YEAR) %>%
  summarise(max_precip = sum(PRCP, na.rm = TRUE))  # Note treating NAs as zeroes here!

## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.

SC_daily_precip_month <- SC_daily_precip_month[with(SC_daily_precip_month, order(YEAR, month)),]
#SC_daily_precip_month
#summary(SC_daily_precip_month)

Work with water years. It will turn out below that water years work much better as factors.

water_year <- function(date) {
               ifelse(lubridate::month(date) < 10, lubridate::year(date), lubridate::year(date)+1)}
SC_weather$wateryear <- water_year(SC_weather$date1)
SC_daily_precip_water_year <- SC_weather %>%
  group_by(wateryear) %>%
  summarise(wy_precip = sum(PRCP, na.rm = TRUE))  # Note treating NAs as zeroes here!
#summary(SC_daily_precip_water_year)

Compute day of water year.

# https://stackoverflow.com/questions/55525965/is-there-a-r-function-to-calculate-day-of-water-year

hydro.day.new = function(x, start.month = 10L){
  start.yr = year(x) - (month(x) < start.month)
  start.date = make_date(start.yr, start.month, 1L)
  as.integer(x - start.date + 1L)
}

SC_weather$hydroday <- hydro.day.new(SC_weather$date1)
summary(SC_weather$hydroday)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0    91.0   182.0   182.7   274.0   366.0

siteNumber <- "11160500" 
SantaCruzInfo <- readNWISsite(siteNumber)
SantaCruzInfo

##   agency_cd  site_no                   station_nm site_tp_cd lat_va long_va
## 1      USGS 11160500 SAN LORENZO R A BIG TREES CA         ST 370240 1220417
##   dec_lat_va dec_long_va coord_meth_cd coord_acy_cd coord_datum_cd
## 1   37.04439   -122.0725             M            F          NAD27
##   dec_coord_datum_cd district_cd state_cd county_cd country_cd land_net_ds
## 1              NAD83          06       06       087         US          NA
##      map_nm map_scale_fc alt_va alt_meth_cd alt_acy_va alt_datum_cd   huc_cd
## 1 FELTON CA           NA    227           M         20       NGVD29 18060015
##   basin_cd topo_cd                 instruments_cd construction_dt inventory_dt
## 1     <NA>    <NA> NNNNYNNNNNNNNNNNNNNNNNNNNNNNNN              NA           NA
##   drain_area_va contrib_drain_area_va tz_cd local_time_fg reliability_cd
## 1           106                    NA   PST             Y           <NA>
##   gw_file_cd nat_aqfr_cd aqfr_cd aqfr_type_cd well_depth_va hole_depth_va
## 1   NNNNNNNN        <NA>    <NA>         <NA>            NA            NA
##   depth_src_cd project_no
## 1         <NA>       <NA>

#comment(SantaCruzInfo)

pcodes <- readNWISpCode("all")

## Warning: 3 parsing failures.
## row col  expected    actual                                                                      file
## 230  -- 1 columns 3 columns 'C:\Users\RICHAR~1\AppData\Local\Temp\RtmpcJlpbJ\file1a602a887752'
## 231  -- 1 columns 3 columns 'C:\Users\RICHAR~1\AppData\Local\Temp\RtmpcJlpbJ\file1a602a887752'
## 232  -- 1 columns 3 columns 'C:\Users\RICHAR~1\AppData\Local\Temp\RtmpcJlpbJ\file1a602a887752'

pcodes_short <- readNWISpCode(c("00060", "00065", "00010", "00400"))
#kable(pcodes_short, row.names=FALSE)

# I don't see a way to get statistics codes from their package, so downloaded and created two CSVs.  This short version is much more useful than the complete version.
scodes_short <- read.csv("USGS_statistic_codes_short.csv")
#kable(scodes_short)

siteNumbers <- c("11160500")
siteINFO <- readNWISsite(siteNumbers)
#kable(siteINFO[, c(2, 3, 30)])

# Data type codes listed at https://waterservices.usgs.gov/rest/Site-Service.html#outputDataTypeCd
availableData <- whatNWISdata(siteNumber=siteNumbers, service="dv")
#kable(availableData[, c(2, 3, 14, 15)])

# We are primarily interested in mean daily flow values
availableData <- whatNWISdata(siteNumber=siteNumbers, service="dv", parameterCd = c("00060"), statCd = "00003")

Download the full data set and save as a CSV file. This is usually disabled to prevent burdening the data server.
Note that multiple site data is downloaded in long format (one row per observation). This means there will be multiple observations (one for each site) for each date.

Plotting with ggplot is cleaner with the long data, so try using that even though I find a wide format more intuitive here.

Also save data as a RDS file since the API returns additional useful information in the data structure! See str call below.

pCode <- "00060"
start.date <- "1936-10-01" # Earliest available data for Big Trees which is our focus
end.date <- Sys.Date()

flow.data <- readNWISdv(siteNumbers = siteNumbers,
                     parameterCd = pCode,
                     startDate = start.date,
                     endDate = end.date)
flow.data <- renameNWISColumns(flow.data)
#summary(flow.data)
#kable(head(flow.data[flow.data$site_no == "11160500",]))
flow.data.all <- flow.data

# Check that data looks all right (e.g. no missing data)
flow.data.all %>%
  ggplot( aes(x=Date, y=Flow, group=site_no, color=site_no)) +
    geom_line() + scale_y_log10()

# Just look at Big Trees
flow.data.all %>%
  filter(site_no == "11160500") %>%
  ggplot( aes(x=Date, y=Flow, group=site_no, color=site_no)) +
    geom_line() + scale_y_log10()

write.csv(flow.data.all, "flow_data_all.csv", row.names=FALSE)
saveRDS(flow.data.all, file="flow_data_all.rds")

Read the saved flow data. Plots look nicer if site_no is a string (not a number, probably would be even better as a factor) and Date is a date (not a string). Decided to handle this by saving as RDS rather than CSV.

#flow.data.all <- read.csv("flow_data_all.csv", colClasses=c("character", "character", "Date", "number", "character"))
flow.data.all <- readRDS("flow_data_all.rds")
flow.data.all$date1 <- flow.data.all$Date # For easier merging
summary(flow.data.all)

##   agency_cd           site_no               Date                 Flow        
##  Length:31162       Length:31162       Min.   :1936-10-01   Min.   :    5.6  
##  Class :character   Class :character   1st Qu.:1958-01-29   1st Qu.:   19.0  
##  Mode  :character   Mode  :character   Median :1979-05-29   Median :   32.0  
##                                        Mean   :1979-05-29   Mean   :  129.0  
##                                        3rd Qu.:2000-09-25   3rd Qu.:   85.0  
##                                        Max.   :2022-01-24   Max.   :17000.0  
##    Flow_cd              date1           
##  Length:31162       Min.   :1936-10-01  
##  Class :character   1st Qu.:1958-01-29  
##  Mode  :character   Median :1979-05-29  
##                     Mean   :1979-05-29  
##                     3rd Qu.:2000-09-25  
##                     Max.   :2022-01-24

str(flow.data.all) # Note additional information beyond what is in the CSV

## 'data.frame':    31162 obs. of  6 variables:
##  $ agency_cd: chr  "USGS" "USGS" "USGS" "USGS" ...
##  $ site_no  : chr  "11160500" "11160500" "11160500" "11160500" ...
##  $ Date     : Date, format: "1936-10-01" "1936-10-02" ...
##  $ Flow     : num  12 12 12 12 12 12 12 12 12 12 ...
##  $ Flow_cd  : chr  "A" "A" "A" "A" ...
##  $ date1    : Date, format: "1936-10-01" "1936-10-02" ...
##  - attr(*, "url")= chr "https://waterservices.usgs.gov/nwis/dv/?site=11160500&format=waterml,1.1&ParameterCd=00060&StatCd=00003&startDT"| __truncated__
##  - attr(*, "siteInfo")='data.frame': 1 obs. of  13 variables:
##   ..$ station_nm          : chr "SAN LORENZO R A BIG TREES CA"
##   ..$ site_no             : chr "11160500"
##   ..$ agency_cd           : chr "USGS"
##   ..$ timeZoneOffset      : chr "-08:00"
##   ..$ timeZoneAbbreviation: chr "PST"
##   ..$ dec_lat_va          : num 37
##   ..$ dec_lon_va          : num -122
##   ..$ srs                 : chr "EPSG:4326"
##   ..$ siteTypeCd          : chr "ST"
##   ..$ hucCd               : chr "18060015"
##   ..$ stateCd             : chr "06"
##   ..$ countyCd            : chr "06087"
##   ..$ network             : chr "NWIS"
##  - attr(*, "variableInfo")='data.frame': 1 obs. of  7 variables:
##   ..$ variableCode       : chr "00060"
##   ..$ variableName       : chr "Streamflow, ft&#179;/s"
##   ..$ variableDescription: chr "Discharge, cubic feet per second"
##   ..$ valueType          : chr "Derived Value"
##   ..$ unit               : chr "ft3/s"
##   ..$ options            : chr "Mean"
##   ..$ noDataValue        : logi NA
##  - attr(*, "disclaimer")= chr "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information."
##  - attr(*, "statisticInfo")='data.frame':    1 obs. of  2 variables:
##   ..$ statisticCd  : chr "00003"
##   ..$ statisticName: chr "Mean"
##  - attr(*, "queryTime")= POSIXct[1:1], format: "2022-01-25 19:11:16"

unique(flow.data.all$site_no)

## [1] "11160500"

flow.and.precip <- merge(SC_weather, flow.data.all, by="date1")
#flow.and.precip$wateryear1 <- toString(flow.and.precip$wateryear)
flow.and.precip$wateryear1 <- as.factor(flow.and.precip$wateryear)
#summary(flow.and.precip)

# Calculate the cumulative daily precipitation sums for each day within each water year
flow.and.precip <- flow.and.precip %>% group_by(site_no, wateryear) %>% mutate(wy_PRCP=cumsum(replace_na(PRCP,0)))
summary(flow.and.precip$wy_PRCP)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    8.55   19.98   20.41   30.18   61.29

# Compute daily runoffs in acre feet given flows in cfs
flow.and.precip$daily_runoff_af <- flow.and.precip$Flow * 60 * 60 * 24 / 43560

# Calculate the cumulative daily runoff sums for each day within each water year
flow.and.precip <- flow.and.precip %>% group_by(site_no, wateryear) %>% mutate(wy_runoff_af=cumsum(daily_runoff_af))
summary(flow.and.precip$wy_runoff_af)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##     13.73   6967.74  32539.54  54747.43  81704.13 283196.03

First look at all of the water years.

flow.and.precip.totals <- flow.and.precip[flow.and.precip$hydroday == 365,]
flow.and.precip.totals.big.trees <- flow.and.precip.totals[flow.and.precip.totals$site_no == "11160500",]
#flow.and.precip.totals.big.trees$wateryear

car::scatterplot(wy_runoff_af ~ wy_PRCP, data=flow.and.precip.totals.big.trees,
                 id=list(labels=flow.and.precip.totals.big.trees$wateryear, n=length(flow.and.precip.totals.big.trees$wateryear)),
                 main="Runoff vs. Precipitation for Water Years in Santa Cruz",
                 xlab="Water Year Precipitation (inches)",
                 ylab="Water Year Runoff (acre feet)")

## 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 
##   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32 
## 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 
##   33   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48 
## 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 
##   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63   64 
## 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 
##   65   66   67   68   69   70   71   72   73   74   75   76   77   78   79   80 
## 2017 2018 2019 2021 
##   81   82   83   84

Look at rainy water year 2017.

Dualplot version seems somewhat useful, though I would like to compare with a log axis for flow.

# Try dualplot version
# https://rdrr.io/github/eloualiche/miscr/man/dualplot.html
# https://web.archive.org/web/20210606023233/http://freerangestats.info/blog/2016/08/18/dualaxes
source("https://gist.githubusercontent.com/ellisp/4002241def4e2b360189e58c3f461b4a/raw/9ab547bff18f73e783aaf30a7e4851c9a2f95b80/dualplot.R")    

# hydroday start and ylim choices are finicky
flow.and.precip.subset <- subset(flow.and.precip, (site_no == "11160500") & (wateryear == 2017) & (between(hydroday, 15, 180)))
summary(flow.and.precip.subset[,c("date1", "PRCP", "Flow")])

##      date1                 PRCP             Flow       
##  Min.   :2016-10-15   Min.   :0.0000   Min.   :  17.0  
##  1st Qu.:2016-11-24   1st Qu.:0.0000   1st Qu.:  44.0  
##  Median :2017-01-03   Median :0.0000   Median : 348.0  
##  Mean   :2017-01-03   Mean   :0.3004   Mean   : 780.5  
##  3rd Qu.:2017-02-12   3rd Qu.:0.3100   3rd Qu.: 888.0  
##  Max.   :2017-03-29   Max.   :2.9500   Max.   :8180.0

with(flow.and.precip.subset, dualplot(x1 = date1, y1 = PRCP, y2 = Flow, colgrid = "grey90",
                                      main = "Santa Cruz Precipitation and River Flow for Water Year 2017",
                                      ylab1 = "Daily Precipitation (inches)",
                                      ylab2 = "Discharge, cubic feet per second",
                                      yleg1 = "Daily Precipitation (left axis)",
                                      yleg2 = "River Flow (right axis)",
                                      ylim.ref=c(which.max(PRCP), which.max(Flow))))

## The two series will be presented visually as though they had been converted to indexes.

Notice above how it took a while for the runoff to really start. I saw an estimate that most of the soils here can absorb about 10 inches of rain (need to find reference). That looks about right for plot above.

Alternatively, we can look at cumulative precipitation and runoff. Notice how the runoff lags precipitation and only really picks up after about 15" of rain have fallen. Also note that the y axis choices are artificial and primarily intended to show the lag behavior.

with(flow.and.precip.subset, dualplot(x1 = date1, y1 = wy_PRCP, y2 = wy_runoff_af, colgrid = "grey90",
                                      main = "Santa Cruz Cumulative Precipitation and River Flow for Water Year 2017",
                                      ylab1 = "Cumulative Daily Precipitation (inches)",
                                      ylab2 = "Cumulative Discharge, cubic feet per second",
                                      yleg1 = "Cumulative Daily Precipitation (left axis)",
                                      yleg2 = "Cumulative River Flow (right axis)",
                                      ylim.ref=c(length(date1), length(date1))))

## The two series will be presented visually as though they had been converted to indexes.

Now compare the above plots with 1998.

# hydroday start and ylim choices are finicky
flow.and.precip.subset <- subset(flow.and.precip, (site_no == "11160500") & (wateryear == 1998) & (between(hydroday, 41, 210)))
summary(flow.and.precip.subset[,c("date1", "PRCP", "Flow")])

##      date1                 PRCP             Flow       
##  Min.   :1997-11-10   Min.   :0.0000   Min.   :  25.0  
##  1st Qu.:1997-12-22   1st Qu.:0.0000   1st Qu.:  89.5  
##  Median :1998-02-02   Median :0.0000   Median : 278.0  
##  Mean   :1998-02-02   Mean   :0.3248   Mean   : 519.2  
##  3rd Qu.:1998-03-16   3rd Qu.:0.3375   3rd Qu.: 441.5  
##  Max.   :1998-04-28   Max.   :3.2300   Max.   :7330.0

with(flow.and.precip.subset, dualplot(x1 = date1, y1 = PRCP, y2 = Flow, colgrid = "grey90",
                                      main = "Santa Cruz Precipitation and River Flow for Water Year 1998",
                                      ylab1 = "Daily Precipitation (inches)",
                                      ylab2 = "Discharge, cubic feet per second",
                                      yleg1 = "Daily Precipitation (left axis)",
                                      yleg2 = "River Flow (right axis)",
                                      ylim.ref=c(which.max(PRCP), which.max(Flow))))

## The two series will be presented visually as though they had been converted to indexes.

with(flow.and.precip.subset, dualplot(x1 = date1, y1 = wy_PRCP, y2 = wy_runoff_af, colgrid = "grey90",
                                      main = "Santa Cruz Cumulative Precipitation and River Flow for Water Year 1998",
                                      ylab1 = "Cumulative Daily Precipitation (inches)",
                                      ylab2 = "Cumulative Discharge, cubic feet per second",
                                      yleg1 = "Cumulative Daily Precipitation (left axis)",
                                      yleg2 = "Cumulative River Flow (right axis)",
                                      ylim.ref=c(length(date1), length(date1))))

## The two series will be presented visually as though they had been converted to indexes.

Santa Cruz Water Year Comparison of 1998 and 2017

Richard Seiter

Wednesday, February 09, 2022

1 Overview

2 Data Sources

3 File History

4 Bibliography