library(dataRetrieval)
library(lubridate)
library(tidyverse)
library(plotly)
library(devtools)
library(stringr)
### Write dataframes to files
library(RColorBrewer)
head(DO.data)
weather <- read.csv('X:/transfer/RobW/TNC_DO/44069h2017 (1).txt/wind_2017.txt', header=F, sep=' ', stringsAsFactors = FALSE) %>% select('year'=V1,
'month'=V2,
'day'=V3,
'hour'=V4,
'min'=V5,
'wind_speed'=V8,
'wind_gust'=V10,
'air_temp'=V17,
'water_temp'=V20) %>%
slice(3:n())
# get all into numeric data types
i <- c(1:9)
weather[ , i] <- apply(weather[ , i], 2, # Specify own function within apply
function(x) as.numeric(as.character(x)))
weather$month <- str_pad(weather$month, 2, pad = "0")
weather$day <- str_pad(weather$day,2,pad="0")
weather <- weather %>% unite(monthday, month,day,sep='-', remove=FALSE)
weather$monthday <- as.Date(weather$monthday, format='%m-%d')
# compute daily weather values for 2017 - months June-October (hours midnight to 7am)
weather <-
weather %>%
filter(month==c('06','07','08','09','10'),
hour %in% c(16:24)) %>% group_by(monthday) %>%
summarize('mean_wind_speed'=mean(wind_speed, na.rm=T),
'mean_wind_gust'=mean(wind_gust,na.rm=T),
'mean_air_temp'=mean(air_temp,na.rm=T),
'mean_water_temp'=mean(water_temp,na.rm=T))
# get max day temp
day.2017 <-
DO.2017.all %>%
filter(year==2017, TOD=='day') %>%
group_by(SiteNum, monthday) %>%
summarize('day_max_DO'=max(DO_mgL))
day.2017$monthday <- as.Date(day.2017$monthday, format='%m-%d')
day.2017$seq_1 <- lead(ifelse(difftime(day.2017$monthday, lag(day.2017$monthday), units="days")==1, TRUE, FALSE))
# get DO min night daily values from the next day
night.2017 <-
DO.2017.all %>%
filter(year==2017, TOD=='night') %>%
group_by(SiteNum, monthday) %>%
summarize('night_min_DO'=min(DO_mgL)) %>%
mutate(next_night_minDO= lead(night_min_DO))
night.2017$monthday <- as.Date(night.2017$monthday, format='%m-%d')
night.2017$seq_2 <- lead(ifelse(difftime(night.2017$monthday, lag(night.2017$monthday), units="days")==1, TRUE, FALSE))
#subtract 1 from night.2017$monthday for join purposes
night.2017$monthday <- night.2017$monthday - 1
water_temp <- DO.data %>%filter(year==2017) %>% select(SiteNum, water.temp, monthday)
water_temp$monthday <- as.Date(water_temp$monthday, format='%m-%d')
water_temp$SiteNum <- as.numeric(water_temp$SiteNum)
water_temp <- water_temp %>%
group_by(SiteNum,monthday) %>% summarize(mean_sensor_water_temp=mean(water.temp,na.rm=T))
dnw <- day.2017 %>%
left_join(night.2017, by=c("SiteNum"="SiteNum", "monthday"="monthday")) %>%
left_join(weather, by="monthday") %>%
left_join(site.info, by="SiteNum") %>%
left_join(water_temp, by=c("SiteNum"="SiteNum", "monthday"="monthday")) %>%
filter(seq_1==TRUE, seq_2==TRUE)
# make a plot
dnw %>%
#filter(StationName=="Great South Bay 15 ") %>%
filter(complete.cases(mean_sensor_water_temp)) %>%
#select(SiteNum,StationName, monthday,night_min_DO, day_max_DO, mean_wind_speed) %>%
ggplot(aes(y=next_night_minDO,x=day_max_DO, col=mean_sensor_water_temp)) +
geom_point(size=2) +
geom_abline(slope=1,intercept=0) +
#facet_wrap(~StationName, ncol=4) +
scale_color_gradientn(colours = rainbow(6)) +
labs(title='ALL')
# try some mulitple linear regression -
### fit1 - night as a function of day DO and Station (Station 11,12,14,15 and 7 are the strongest)
fit1 <- lm(next_night_minDO ~ day_max_DO + StationName, data=dnw)
summary(fit1)
##
## Call:
## lm(formula = next_night_minDO ~ day_max_DO + StationName, data = dnw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1432 -1.0083 0.3413 1.2920 4.2443
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.33580 0.33706 3.963 7.81e-05 ***
## day_max_DO 0.38471 0.03403 11.306 < 2e-16 ***
## StationNameGreat South Bay 11 0.76309 0.26093 2.925 0.003510 **
## StationNameGreat South Bay 12 0.87654 0.28262 3.102 0.001967 **
## StationNameGreat South Bay 13 0.04183 0.25909 0.161 0.871756
## StationNameGreat South Bay 14 0.85906 0.25076 3.426 0.000632 ***
## StationNameGreat South Bay 15 1.22960 0.25613 4.801 1.77e-06 ***
## StationNameGreat South Bay 4 -0.15437 0.25885 -0.596 0.551029
## StationNameGreat South Bay 5 -0.66548 0.25731 -2.586 0.009811 **
## StationNameGreat South Bay 6 0.16608 0.25214 0.659 0.510200
## StationNameGreat South Bay 7 0.46955 0.25151 1.867 0.062135 .
## StationNameGreat South Bay 8 0.26851 0.26984 0.995 0.319895
## StationNameGreat South Bay 9 0.29135 0.25331 1.150 0.250301
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.849 on 1277 degrees of freedom
## Multiple R-squared: 0.1708, Adjusted R-squared: 0.1631
## F-statistic: 21.93 on 12 and 1277 DF, p-value: < 2.2e-16
plot(fit1)
fit2 <- lm(next_night_minDO ~ mean_wind_speed, data=dnw)
summary(fit2)
##
## Call:
## lm(formula = next_night_minDO ~ mean_wind_speed, data = dnw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0034 -1.1587 0.4467 1.4111 5.3824
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.25517 0.14089 30.201 <2e-16 ***
## mean_wind_speed 0.06166 0.02522 2.445 0.0146 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.026 on 1191 degrees of freedom
## (97 observations deleted due to missingness)
## Multiple R-squared: 0.004995, Adjusted R-squared: 0.00416
## F-statistic: 5.979 on 1 and 1191 DF, p-value: 0.01462
plot(fit2)
Figure 1 - Locations of continuous DO sensors in Great South Bay. Map processed in ArcGIS v. 10.7.1.
Notes: Need Bluepoints polygon
AUV survey of GSB performed August 23-26 2016. Products (Sample points and interpolated DO surface) released on ScienceBase at https://www.sciencebase.gov/catalog/item/59284998e4b0c6c925587b35.
The interpolated surface below shows the the spatial distribution of DO in GSB under the conditions associated with 8/23/16 between 1200 and 0500.
The interpolated surface below is a heat map from the continuous sensors. Even with a dense sampling setup, interpolation misses key information.
The map below shows areas where AUV and continuous sensor interpolations vary. Created via ArcMap ‘Raster Calculator’ tool.
Notes: Will be 2 figures - describe conditions of AUV survey ie time of day, tidal stage, weather etc
### Create DO intervals in increments of 0.1
# get daily means from NWIS
daily_means.1617 <- readNWISdv(site.numbers,"00300", "2016-06-01", "2017-11-03", statCd='00003') %>% select(Date,SiteNum=site_no, daily_mean="X_00300_00003" )
daily_means.1617 <- daily_means.1617 %>% filter(daily_mean > 3.0 & daily_mean < 4.9)
DO.data$date <- as.Date(DO.data$date)
DO.data$chronic <- FALSE
for(i in 1:nrow(daily_means.1617)){
# get subset of data from the site that is within a 66 day range
a <- daily_means.1617 %>% filter(Date >= daily_means.1617$Date[i] & Date <= daily_means.1617$Date[i] + days(66), SiteNum==SiteNum[i])
# store variables for start of 66 day window, end of window and site number
start.event <- min(a$Date)
end.event <- max(a$Date)
site <- a$SiteNum
# calculate the statistic - cumulative fraction
b <- a %>% count(daily_mean) %>%
left_join(allowed.chronic, by='daily_mean') %>%
mutate(cum_fraction=n/time_allowed) %>%
summarize(cum_fraction=sum(cum_fraction))
# if cumulative fraction is greater than 1, then the entire 66 day window is labeled as chronic
if(b > 1){
DO.data$chronic[DO.data$date >= start.event & DO.data$date <=end.event & DO.data$SiteNum==site] <- TRUE
}
}
sum(DO.data$chronic)
## [1] 102567
# get daily mean DO for each site in 2017
daily_means.17 <- readNWISdv(site.numbers,"00300", "2017-06-01", "2017-12-03", statCd='00003') %>% select(Date,SiteNum=site_no, daily_mean="X_00300_00003")
# filter to get means between 3.0 and 4.9 only
daily_means.17 <- daily_means.17 %>% filter(daily_mean > 3.0 & daily_mean < 4.9)
# get date into date format
DO.2017.all$date <- as.Date(DO.2017.all$date)
# set chronic variable to NA
DO.2017.all$chronic <- FALSE
# run the for loop.
# Each iteration step is a daily value
#1. retrieve all data from that day within a 66 day window
#2. store the beginning date and end date of the window, and the site number
#3. tally the number of each daily min value (interval frequency)
#4. join in the DEC allowable table
#5. calculate the cumulative fraction
#6. sum the cumulative fraction
#7. if the sum is greater than 1, all dates between the start and end dates(STEP#2) are chronic, otherwise they remain as NA
for(i in 1:nrow(daily_means.17)){
# get subset of data from the site that is within a 66 day range
a <- daily_means.17 %>% filter(Date >= daily_means.17$Date[i], Date <= daily_means.17$Date[i] + days(66), SiteNum==SiteNum[i])
# store variables for start of 66 day window, end of window and site number
start.event <- min(a$Date)
end.event <- max(a$Date)
site <- a$SiteNum
# calculate the statistic - cumulative fraction
b <- a %>% count(daily_mean) %>%
left_join(allowed.chronic, by='daily_mean') %>%
mutate(cum_fraction=n/time_allowed) %>%
summarize(cum_fraction=sum(cum_fraction))
# if cumulative fraction is greater than 1, then the entire 66 day window is labeled as chronic
if(b > 1){
DO.2017.all$chronic[DO.2017.all$date >= start.event & DO.2017.all$date <=end.event & DO.2017.all$SiteNum==site] <- TRUE
}
}
sum(DO.2017.all$chronic)
## [1] 85333
### acute daily
# get daily mean DO for each site in 2017
daily_means.17 <- readNWISdv(site.numbers,"00300", "2017-06-01", "2017-12-03", statCd='00003') %>% select(Date,SiteNum=site_no, daily_mean="X_00300_00003")
# filter to get means between 3.0 and 4.9 only
daily_means.17.lt3 <- daily_means.17 %>% filter( daily_mean < 3.0)
DO.2017.all$dailyacute <- FALSE
# get date into date format
DO.2017.all$date <- as.Date(DO.2017.all$date)
for(i in 1:nrow(daily_means.17.lt3)){
c <- daily_means.17.lt3 %>% filter(Date == daily_means.17.lt3$Date[i], SiteNum==SiteNum[i])
# store variables for start of 66 day window, end of window and site number
start.event <- c$Date
site <- c$SiteNum
DO.2017.all$dailyacute[DO.2017.all$date >= start.event & DO.2017.all$SiteNum==site] <- TRUE
}
Figure 4 - Spatial distribution of acute violations
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
### WT curve - all 2017
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
### USGS daily min from NWIS
daily_mins %>%
filter(year %in% c(2016,2017)) %>%
group_by(monthday, year) %>%
ggplot(aes(x=max_temp,y=daily_min)) +
geom_point(position='jitter') +
geom_smooth() +
xlab('Temp daily max [C]') +
ylab('DO daily min [mg/L') +
facet_grid(~watershed)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
###daily min as a function of max Temp faceting by site (one for each year)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
### Water Temp Curves (2016 and 2017) not daily value
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
### Water Temperature Curves (2017 only)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
### WT 2016 only
DO.data %>%
filter(year==2016) %>%
#filter(TOD == c('day', 'night')) %>%
group_by(monthday,StationName, month) %>%
summarize(minDO = min(DO_mgL), maxWT= max(water.temp, na.rm=T)) %>%
ggplot(aes(x=maxWT,y=minDO)) +
geom_point(aes(color=as.factor(month)),position='jitter') +
geom_smooth() +
xlab('Temp daily max [C]') +
ylab('DO daily min [mg/L') +
guides(color=guide_legend(order=1)) +
facet_wrap(~StationName, ncol=4) +
scale_color_manual(values=c('#999999','#E69F00','#56B4E9','blue'))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
### Water Temperature Curves (2017 only July and August )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
DO.data %>% filter(year==2017) %>% ggplot(aes(x=TOD,y=DO_mgL,color=stage)) + geom_boxplot()
included in map below
Figure 2a - Mean DO for 2016 at each location with interplated surface of mean water temperature.
Figure 2b - Mean DO for 2017 at each location with interpolated surface of mean water temperature.
### DO Cycle
## Selecting by hr_mean
### Day/Night exceedences by site number
### Tide Stage
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
DO.data %>% filter(year==2016,monthday==c('08-24','08-25','08-26'), StationName=='Great South Bay 11 ')
A <-matrix(c(1,3,2,4,1,3,1,0,1), byrow=TRUE, ncol=3)
det(A)
## [1] -4