title: “Lab 2” author: “John King” date: “April 12th, 2024” This script analyzes the water year 2003 in San Jose, CA Install necessary packages

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(seas)
## Warning: package 'seas' was built under R version 4.3.3
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(zoo)
## Warning: package 'zoo' was built under R version 4.3.3
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)

Read in precipitation data

prcp_data <- read.csv(file="3659227.csv")

Changing date column to date format and replacing missing data with “NA”

prcp_data$DATE <- as.Date(prcp_data$DATE, format="%Y-%m-%d")
prcp_data$PRCP[prcp_data$PRCP <= -999 | prcp_data$PRCP >= 999 ] = NA
wateryr <- function(d) {
  if (as.numeric(format(d, "%m")) >= 10) {
    wy = as.numeric(format(d, "%Y")) + 1
  } else {
    wy = as.numeric(format(d, "%Y"))
  }
}
prcp_data$wy <- sapply(prcp_data$DATE, wateryr)

Create simplified water data table

waterdata <- data.frame(prcp_data$DATE, prcp_data$wy, prcp_data$PRCP)
colnames(waterdata) <- c("date", "wy", "PRCP")

Creating a variable with assigned year

year_assigned <- 2003

Creating data table with only values from assigned year

AssignedYear <- subset(waterdata, wy==year_assigned)

Making ggplot of assigned year

ggplot(AssignedYear, aes(x=date, y=PRCP)) + 
  geom_bar(stat="identity",color="blue") +
  labs(x="", y="precipitation, inch/day") +
  scale_x_date(date_breaks = "1 month", date_labels =  "%b %d")

Reporting desired statistics

MaxPrecip <- max(AssignedYear$PRCP)
TotalPrecip <- sum(AssignedYear$PRCP)
AvgRainPrecip <- TotalPrecip/count(subset(AssignedYear, PRCP>0.01))
RainyDays <- count(subset(AssignedYear, PRCP>0.01))
SeasTable <- interarrival(AssignedYear, var = "PRCP", p.cut = 0.3, inv = FALSE)
MedianDryDays <- median(na.omit(SeasTable$dry))
MeanWetDays <- mean(na.omit(SeasTable$wet))

Intensity data and calculations

#First extract one water year of data
df.one.year <- subset(waterdata, date>=as.Date("2002-10-01") & 
                        date<=as.Date("2003-09-30"))
#Calculate the running mean value for the defined durations
dur <- c(1,3,7,30)
px <- numeric(length(dur))
for (i in 1:4) {
  px[i] <- max(zoo::rollmean(df.one.year$PRCP,dur[i]))
}
#create the intensity-duration data frame
df.id <- data.frame(duration=dur,intensity=px)

Plot intensity curve

#fit a power curve to the data
fit <- stats::nls(intensity ~ a*duration^b, data=df.id, start=list(a=1,b=-0.5))
print(signif(coef(fit),3))
##      a      b 
##  1.260 -0.407
#>      a      b 
#>  1.850 -0.751
#find estimated y-values using the fit
df.id$intensity_est <- predict(fit, list(x = df.id$duration))
#duration-intensity plot with base graphics
plot(x=df.id$duration,y=df.id$intensity,log='xy', pch=1, xaxt="n",
     xlab="Duration, day" , ylab="Intensity, inches/day")
lines(x=df.id$duration,y=df.id$intensity_est,lty=2)
abline( h = c(seq( 0.1,1,0.1),2.0), lty = 3, col = "lightgray")
abline( v = c(1,2,3,4,5,7,10,15,20,30), lty = 3, col = "lightgray")
axis(side = 1, at =c(1,2,3,4,5,7,10,15,20,30) ,labels = T)
axis(side = 2, at =c(seq( 0.1,1,0.1),2.0) ,labels = T)