This code is mostly exploratory analysis of WOAC environmental data at bio stations between 2014-2022.

The report is here: https://rpubs.com/HailaSchultz/1097377

Load Packages

library(dplyr)
library(ggplot2)
library(tidyr)
library(forcats)
library(reshape2)
library(xts)
library(cowplot)

Import Data

WOAC water chemistry data This dataset was acquired from BethElLee Herrmann and is a compilation of mesurements from bottle samples and from the CTD collected at each depth where bottle water was collected. This sheet is the second tab of the raw data spreadsheet. This dataset incluedes data on: - pH - pCO2 - CTD pressure - CTD oxygenerature - CTD salinity - CTD oxygen - oxygen - DIC - total alkalinity - phosphate - silicate - aragonite saturation - note that this dataset does not have nitrogen-based nutrients.

WOAC Stgations

Read data into R

Environmental <- read.csv("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/data/WOAC_Chem_data_2014-2022_all-niskins.csv")

Temperature

The purpose of this section is to explore patterns in temperatures across, stations, seasons, and years. I am also interested in seeing if we can detect the marine heatwave from 2015-2017. I may need to look at averages as well as maximum temperature. However, this dataset may not be the best at capturing maximum temperature since it is restricted to the depths where bottle samples were collected. I may need to look at the entire CTD profile from NANOOS files, assuming they have continuous measurements. Another thing to note about this dataset is that the data are separated by month - it would probably be more beneficial to separate by season because for example some years may have collections during June or July, but these would both be aimed at being the same cruise.

convert date format

temp <- Environmental %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

summarize by max temperature

temp_max<-temp %>%
  group_by(Station,Month,date_convert) %>%
  summarise(
    temp = max(CTDTMP_DEG_C_ITS90))

plot by max temperature

max_temp_plot<-ggplot(data = temp_max, aes(date_convert, temp)) +
  geom_line()+ 
  geom_point()+
  facet_grid(Station ~ .) +
  theme_classic()+
  xlab("Year") + ylab("temperature")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")
max_temp_plot

summarize by depth category

temp<-temp %>%
  group_by(Station,date_convert,Depth) %>%
  summarise(
    temp = mean(CTDTMP_DEG_C_ITS90))

make plot

temp_plot<-ggplot(data = temp, aes(date_convert, temp,color=Depth)) +
  geom_line()+ 
  geom_point()+
  facet_grid(Station ~ .) +
  theme_classic()+
  xlab("Year") + ylab("temperature")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")
temp_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "time-series_temperature.png", plot = temp_plot, width = 10, height = 17, device='png', dpi=700)

It looks like P12 and P402 have the highest variability in temperatures, and have especially high summer temperatures. This makes sense becasue these are the hood canal stations.

Try anomaly analysis

library(anomalize)
library(tibble)

P402, 5 m

subset to station, depth

temp_5m<- subset(temp, Depth=='5-m')
temp_5m_P402<- subset(temp_5m, Station=='P402')
temp_5m_P402<- subset(temp_5m_P402, date_convert!='2022-07-01')
temp_5m_P402<- subset(temp_5m_P402, date_convert!='2022-04-29')
temp_5m_P402 <- temp_5m_P402[ -c(1,3) ]
temp_5m_P402<-temp_5m_P402[order(temp_5m_P402$date_convert),]
P402_ts <- temp_5m_P402 %>% as.tibble() 

decomposition

P402_ts %>% 
  time_decompose(temp, method = "stl", frequency = "auto", trend = "auto") %>%
  anomalize(remainder, method = "gesd",  max_anoms = 0.2) %>%
  plot_anomaly_decomposition()

P28, 5 m

subset to station, depth

temp_5m<- subset(temp, Depth=='5-m')
temp_5m_P28<- subset(temp_5m, Station=='P28')
temp_5m_P28<- subset(temp_5m_P28, date_convert!='2022-07-01')
temp_5m_P28<- subset(temp_5m_P28, date_convert!='2022-04-29')
temp_5m_P28 <- temp_5m_P28[ -c(1,3) ]
temp_5m_P28<-temp_5m_P28[order(temp_5m_P28$date_convert),]
P28_ts <- temp_5m_P28 %>% as.tibble() 

decomposition

P28_ts %>% 
  time_decompose(temp, method = "stl", frequency = "auto", trend = "auto") %>%
  anomalize(remainder, method = "gesd",  max_anoms = 0.2) %>%
  plot_anomaly_decomposition()

P12, 5 m

subset to station, depth

temp_5m<- subset(temp, Depth=='5-m')
temp_5m_P12<- subset(temp_5m, Station=='P12')
temp_5m_P12<- subset(temp_5m_P12, date_convert!='2022-07-01')
temp_5m_P12<- subset(temp_5m_P12, date_convert!='2022-04-29')
temp_5m_P12 <- temp_5m_P12[ -c(1,3) ]
temp_5m_P12<-temp_5m_P12[order(temp_5m_P12$date_convert),]
P12_ts <- temp_5m_P12 %>% as.tibble() 

decomposition

P12_ts %>% 
  time_decompose(temp, method = "stl", frequency = "auto", trend = "auto") %>%
  anomalize(remainder, method = "gesd",  max_anoms = 0.2) %>%
  plot_anomaly_decomposition()

Average across stations

temp<-Environmental %>%
  group_by(Station,Date,Depth) %>%
  summarise(
    temp = mean(CTDTMP_DEG_C_ITS90))
#convert date format

temp <- temp %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
#select only surface samples
temp_surf<- subset(temp, Depth=='Surface')
#extract year
temp_surf$year <- format(as.Date(temp_surf$Date, format="%m/%d/%y"),"%Y")
#extract month
temp_surf$month <- format(as.Date(temp_surf$Date, format="%m/%d/%y"),"%m")
#make season column
temp_surf$season<-temp_surf$month
temp_surf$season <- recode(temp_surf$season, 
                                 "04" = "spring",
                                 "05" = "spring",
                                 "06" = "summer",
                                 "07" = "summer",
                           "08" = "summer",
                                "09" = "autumn",
                                 "10"= "autumn",
                                 "11"= "autumn")
#average by year and month
temp_surf<-temp_surf %>%
  group_by(year,season) %>%
  summarise(
    mean = mean(temp),
    sd=sd(temp))

plot

temp_surf$year<-as.numeric(temp_surf$year)
temp_ave<-ggplot(data = temp_surf, aes(year,mean)) +
  geom_point()+
  theme_classic()+
  xlab("Year") + ylab("temperature")+ 
  geom_line()+
  geom_pointrange(aes(ymin=mean-sd, ymax=mean+sd))+
  facet_grid(season ~ .)
temp_ave

pH Plot

remove missing rows and remove weird values

pH<-Environmental %>% drop_na(pH)
pH <- subset(pH, pH != 2130.2)

summarize by depth category

pH<-pH %>%
  group_by(Station,Date,Depth) %>%
  summarise(
    pH = mean(pH))

convert date format

pH <- pH %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

make plot

pH_plot<-ggplot(data = pH, aes(date_convert, pH,color=Depth)) +
  geom_line()+ 
  geom_point()+
  facet_grid(Station ~ .) +
  theme_classic()+
  xlab("Year") + ylab("pH")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")
pH_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "time-series_pH.png", plot = pH_plot, width = 10, height = 17, device='png', dpi=700)

Try anomaly analysis

library(anomalize)
library(tibble)

subset to station, depth

pH_5m<- subset(pH, Depth=='5-m')
pH_5m_P402<- subset(pH_5m, Station=='P402')
pH_5m_P402 <- pH_5m_P402[ -c(1:3) ]
pH_5m_P402<-pH_5m_P402[order(pH_5m_P402$date_convert),]
P402_ts <- pH_5m_P402 %>% as.tibble() 

decomposition

P402_ts %>% 
  time_decompose(pH, method = "stl", frequency = "auto", trend = "auto") %>%
  anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.2) %>%
  plot_anomaly_decomposition()

P402_ts %>% 
  time_decompose(pH) %>%
  anomalize(remainder) %>%
  time_recompose() %>%
  plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.5)

Average across stations

#select only surface samples
pH_surf<- subset(pH, Depth=='Surface')
#extract year
pH_surf$year <- format(as.Date(pH_surf$Date, format="%m/%d/%y"),"%Y")
#extract month
pH_surf$month <- format(as.Date(pH_surf$Date, format="%m/%d/%y"),"%m")
#make season column
pH_surf$season<-pH_surf$month
pH_surf$season <- recode(pH_surf$season, 
                                 "04" = "spring",
                                 "05" = "spring",
                                 "06" = "summer",
                                 "07" = "summer",
                           "08" = "summer",
                                "09" = "autumn",
                                 "10"= "autumn",
                                 "11"= "autumn")
#average by year and month
pH_surf<-pH_surf %>%
  group_by(year,season) %>%
  summarise(
    mean = mean(pH),
    sd=sd(pH))

plot

pH_surf$year<-as.numeric(pH_surf$year)
pH_ave<-ggplot(data = pH_surf, aes(year,mean)) +
  geom_point()+
  theme_classic()+
  xlab("Year") + ylab("pH")+ 
  geom_line()+
  geom_pointrange(aes(ymin=mean-sd, ymax=mean+sd))+
  facet_grid(season ~ .)
pH_ave

O2 Plot

remove missing rows and remove weird values

Oxygen<-Environmental %>% drop_na(O2.in.mg.l)

summarize by depth category

Oxygen<-Oxygen %>%
  group_by(Station,Date,Depth) %>%
  summarise(
    Oxygen = mean(O2.in.mg.l))

convert date format

Oxygen <- Oxygen %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

make plot

Oxygen_plot<-ggplot(data = Oxygen, aes(date_convert, Oxygen,color=Depth)) +
  geom_line()+ 
  geom_point()+
  facet_grid(Station ~ .) +
  theme_classic()+
  xlab("Year") + ylab("Oxygen")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")
Oxygen_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "time-series_Oxygen.png", plot = Oxygen_plot, width = 10, height = 17, device='png', dpi=700)

Average across stations

#select only surface samples
oxygen_surf<- subset(Oxygen, Depth=='Surface')
#extract year
oxygen_surf$year <- format(as.Date(oxygen_surf$Date, format="%m/%d/%y"),"%Y")
#extract month
oxygen_surf$month <- format(as.Date(oxygen_surf$Date, format="%m/%d/%y"),"%m")
#make season column
oxygen_surf$season<-oxygen_surf$month
oxygen_surf$season <- recode(oxygen_surf$season, 
                                 "04" = "spring",
                                 "05" = "spring",
                                 "06" = "summer",
                                 "07" = "summer",
                           "08" = "summer",
                                "09" = "autumn",
                                 "10"= "autumn",
                                 "11"= "autumn")
#average by year and month
oxygen_surf<-oxygen_surf %>%
  group_by(year,season) %>%
  summarise(
    mean = mean(Oxygen),
    sd=sd(Oxygen))

plot

oxygen_surf$year<-as.numeric(oxygen_surf$year)
oxygen_ave<-ggplot(data = oxygen_surf, aes(year,mean)) +
  geom_point()+
  theme_classic()+
  xlab("Year") + ylab("oxygen")+ 
  geom_line()+
  geom_pointrange(aes(ymin=mean-sd, ymax=mean+sd))+
  facet_grid(season ~ .)
oxygen_ave

#Time Series Analysis I’m struggling with this because of the missing NA values, so I am just going to average over the years ## detrend temperature

#select only surface samples
temp_timeseries<- subset(temp, Depth=='Surface')
#extract month
temp_timeseries$month <- format(as.Date(temp_timeseries$Date, format="%m/%d/%y"),"%m")

add month column

temp_timeseries$month<-fct_recode(temp_timeseries$month, "Oct"="10", "Apr"="04","Jul"="07","Sep"="09","May"="05","Jun"="06","Oct"="11")
#extract year
temp_timeseries$year <- format(as.Date(temp_timeseries$Date, format="%m/%d/%y"),"%Y")

subset to one station

temp_timeseries_p12<-subset(temp_timeseries,Station=="P12")
#remove extra row
temp_timeseries_p12<-temp_timeseries_p12[-c(2), ]

convert to time series object

#remove excess columns
temp_timeseries_p12 <- temp_timeseries_p12[ -c(1:3,5) ]
p12<-dcast(temp_timeseries_p12, year ~ month, value.var = "temp")
#add other months
p12$Jan<-NA
p12$Feb<-NA
p12$Mar<-NA
p12$May<-NA
p12$Aug<-NA
p12$Nov<-NA
p12$Dec<-NA
#reorder columns
p12 <- p12[, c(1,7,8,9,2,10,3,4,11,5,6,12,13)]
#convert back to long format
p12<-melt(p12, id.vars=c("year"))
#sort chronologically
p12<-p12[
  order( p12[,1], p12[,2] ),
]
#remove year and month columns
p12 <- p12[ -c(1:2) ]
str(p12)
## 'data.frame':    108 obs. of  1 variable:
##  $ value: num  NA NA NA NA NA ...
p12_ts<-ts(p12, start=c(2014,1),freq=12)

Because there are missing values, they must be imputed I followed the tutorial here: https://jtr13.github.io/EDAVold/missingTS.html

p12_ts
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2014       NA       NA       NA       NA       NA       NA 20.43800       NA
## 2015       NA       NA       NA 11.63720       NA       NA 20.89510       NA
## 2016       NA       NA       NA 12.18830       NA       NA 17.74860       NA
## 2017       NA       NA       NA 10.20495       NA       NA 19.78030       NA
## 2018       NA       NA       NA 10.36320       NA       NA 20.44650       NA
## 2019       NA       NA       NA 11.60450       NA       NA 17.14440       NA
## 2020       NA       NA       NA       NA       NA       NA 17.56200       NA
## 2021       NA       NA       NA 12.65805       NA       NA 18.60945       NA
## 2022       NA       NA       NA 10.62945       NA 13.65970       NA       NA
##           Sep      Oct      Nov      Dec
## 2014       NA 14.77650       NA       NA
## 2015 13.39490       NA       NA       NA
## 2016 13.30880       NA       NA       NA
## 2017 16.35710       NA       NA       NA
## 2018 12.92045       NA       NA       NA
## 2019 13.64740       NA       NA       NA
## 2020 17.25145       NA       NA       NA
## 2021 16.33925       NA       NA       NA
## 2022       NA       NA       NA       NA

subset to one station

temp_timeseries_p12<-subset(temp_timeseries,Station=="P12")
#remove extra row
temp_timeseries_p12<-temp_timeseries_p12[-c(2), ]

convert to time series object

#remove excess columns
temp_timeseries_p12 <- temp_timeseries_p12[ -c(1:3,6:7) ]
xt1 <- xts(temp_timeseries_p12$temp, order.by = as.Date(temp_timeseries_p12$date_convert))
plot(xt1)

Individual station plots

P402

remove missing rows and remove weird values

Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)

summarize by depth category

Environmental_sum<-Environmental_sum %>%
  group_by(Station,Date,Depth) %>%
  summarise(
    temp = mean(CTDTMP_DEG_C_ITS90),
    pH = mean(pH),
    Oxygen = mean(O2.in.mg.l))

convert date format

Environmental_sum <- Environmental_sum %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

subset to station and depth

Environmental_sum_P402<-subset(Environmental_sum,Station=="P402")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Surface")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Mid")

convert to long format

Environmental_sum_P402<-melt(Environmental_sum_P402, na.rm = FALSE,  id = c("Station","Date","Depth","date_convert"))

make plot

#change factor order
Environmental_sum_P402$Depth <- factor(Environmental_sum_P402$Depth, levels = c("5-m","Deep"))

P402_temp<-subset(Environmental_sum_P402,variable=="temp")

P402_temp<-ggplot(data = P402_temp, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Temperature (C)")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_temp

P402_pH<-subset(Environmental_sum_P402,variable=="pH")

P402_pH<-ggplot(data = P402_pH, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("pH")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_pH

P402_Oxy<-subset(Environmental_sum_P402,variable=="Oxygen")

P402_Oxy<-ggplot(data = P402_Oxy, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Oxygen mg/l")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_Oxy

save plots

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "P402_temp.png", plot = P402_temp, height = 2.5, width = 5, units="in", device='png', dpi=600)

ggsave(filename = "P402_pH.png", plot = P402_pH, height = 2.5, width = 5, units="in", device='png', dpi=600)

ggsave(filename = "P402_Oxy.png", plot = P402_Oxy, height = 2.5, width = 5, units="in", device='png', dpi=600)

P22

remove missing rows and remove weird values

Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)

summarize by depth category

Environmental_sum<-Environmental_sum %>%
  group_by(Station,Date,Depth) %>%
  summarise(
    temp = mean(CTDTMP_DEG_C_ITS90),
    pH = mean(pH),
    Oxygen = mean(O2.in.mg.l))

convert date format

Environmental_sum <- Environmental_sum %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

subset to station and depth

Environmental_sum_P22<-subset(Environmental_sum,Station=="P22")
Environmental_sum_P22<-subset(Environmental_sum_P22,Depth!="Surface")
Environmental_sum_P22<-subset(Environmental_sum_P22,Depth!="Mid")

convert to long format

Environmental_sum_P22<-melt(Environmental_sum_P22, na.rm = FALSE,  id = c("Station","Date","Depth","date_convert"))

make plot

#change factor order
Environmental_sum_P22$Depth <- factor(Environmental_sum_P22$Depth, levels = c("5-m","Deep"))

P22_temp<-subset(Environmental_sum_P22,variable=="temp")

P22_temp<-ggplot(data = P22_temp, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Temperature (C)")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P22_temp

P22_pH<-subset(Environmental_sum_P22,variable=="pH")

P22_pH<-ggplot(data = P22_pH, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("pH")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P22_pH

P22_Oxy<-subset(Environmental_sum_P22,variable=="Oxygen")

P22_Oxy<-ggplot(data = P22_Oxy, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Oxygen mg/l")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P22_Oxy

save plots

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "P22_temp.png", plot = P22_temp, height = 2.5, width = 5, units="in", device='png', dpi=600)

ggsave(filename = "P22_pH.png", plot = P22_pH, height = 2.5, width = 5, units="in", device='png', dpi=600)

ggsave(filename = "P22_Oxy.png", plot = P22_Oxy, height = 2.5, width = 5, units="in", device='png', dpi=600)

P28

remove missing rows and remove weird values

Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)

summarize by depth category

Environmental_sum<-Environmental_sum %>%
  group_by(Station,Date,Depth) %>%
  summarise(
    temp = mean(CTDTMP_DEG_C_ITS90),
    pH = mean(pH),
    Oxygen = mean(O2.in.mg.l))

convert date format

Environmental_sum <- Environmental_sum %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

subset to station and depth

Environmental_sum_P28<-subset(Environmental_sum,Station=="P28")
Environmental_sum_P28<-subset(Environmental_sum_P28,Depth!="Surface")
Environmental_sum_P28<-subset(Environmental_sum_P28,Depth!="Mid")

convert to long format

Environmental_sum_P28<-melt(Environmental_sum_P28, na.rm = FALSE,  id = c("Station","Date","Depth","date_convert"))

make plot

#change factor order
Environmental_sum_P28$Depth <- factor(Environmental_sum_P28$Depth, levels = c("5-m","Deep"))

P28_temp<-subset(Environmental_sum_P28,variable=="temp")

P28_temp<-ggplot(data = P28_temp, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Temperature (C)")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P28_temp

P28_pH<-subset(Environmental_sum_P28,variable=="pH")

P28_pH<-ggplot(data = P28_pH, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("pH")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P28_pH

P28_Oxy<-subset(Environmental_sum_P28,variable=="Oxygen")

P28_Oxy<-ggplot(data = P28_Oxy, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Oxygen mg/l")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P28_Oxy

save plots

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "P28_temp.png", plot = P28_temp, height = 2.5, width = 5, units="in", device='png', dpi=600)

ggsave(filename = "P28_pH.png", plot = P28_pH, height = 2.5, width = 5, units="in", device='png', dpi=600)

ggsave(filename = "P28_Oxy.png", plot = P28_Oxy, height = 2.5, width = 5, units="in", device='png', dpi=600)

P402 April

remove missing rows and remove weird values

Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)

summarize by depth category

Environmental_sum<-Environmental_sum %>%
  group_by(Station,Date,Depth, Month) %>%
  summarise(
    temp = mean(CTDTMP_DEG_C_ITS90),
    pH = mean(pH),
    Oxygen = mean(O2.in.mg.l))

convert date format

Environmental_sum <- Environmental_sum %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

subset to station and depth

Environmental_sum_P402<-subset(Environmental_sum,Station=="P402")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Surface")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Mid")
Environmental_sum_P402<-subset(Environmental_sum_P402,Month=="APR")

convert to long format

Environmental_sum_P402<-melt(Environmental_sum_P402, na.rm = FALSE,  id = c("Station","Date","Depth","date_convert","Month"))

make plot

#change factor order
Environmental_sum_P402$Depth <- factor(Environmental_sum_P402$Depth, levels = c("5-m","Deep"))

P402_temp<-subset(Environmental_sum_P402,variable=="temp")

P402_temp<-ggplot(data = P402_temp, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Temperature (C)")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_temp

P402_pH<-subset(Environmental_sum_P402,variable=="pH")

P402_pH<-ggplot(data = P402_pH, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("pH")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_pH

P402_Oxy<-subset(Environmental_sum_P402,variable=="Oxygen")

P402_Oxy<-ggplot(data = P402_Oxy, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Oxygen mg/l")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_Oxy

P28 July

remove missing rows and remove weird values

Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)

summarize by depth category

Environmental_sum<-Environmental_sum %>%
  group_by(Station,Date,Depth, Month) %>%
  summarise(
    temp = mean(CTDTMP_DEG_C_ITS90),
    pH = mean(pH),
    Oxygen = mean(O2.in.mg.l))

convert date format

Environmental_sum <- Environmental_sum %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

subset to station and depth

Environmental_sum_P402<-subset(Environmental_sum,Station=="P402")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Surface")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Mid")
Environmental_sum_P402<-subset(Environmental_sum_P402,Month=="JUL")

convert to long format

Environmental_sum_P402<-melt(Environmental_sum_P402, na.rm = FALSE,  id = c("Station","Date","Depth","date_convert","Month"))

make plot

#change factor order
Environmental_sum_P402$Depth <- factor(Environmental_sum_P402$Depth, levels = c("5-m","Deep"))

P402_temp<-subset(Environmental_sum_P402,variable=="temp")

P402_temp<-ggplot(data = P402_temp, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Temperature (C)")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_temp

P402_pH<-subset(Environmental_sum_P402,variable=="pH")

P402_pH<-ggplot(data = P402_pH, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("pH")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_pH

P402_Oxy<-subset(Environmental_sum_P402,variable=="Oxygen")

P402_Oxy<-ggplot(data = P402_Oxy, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Oxygen mg/l")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_Oxy

P402 Sep

remove missing rows and remove weird values

Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)

summarize by depth category

Environmental_sum<-Environmental_sum %>%
  group_by(Station,Date,Depth, Month) %>%
  summarise(
    temp = mean(CTDTMP_DEG_C_ITS90),
    pH = mean(pH),
    Oxygen = mean(O2.in.mg.l))

convert date format

Environmental_sum <- Environmental_sum %>% 
  mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))

subset to station and depth

Environmental_sum_P402<-subset(Environmental_sum,Station=="P402")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Surface")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Mid")
Environmental_sum_P402<-subset(Environmental_sum_P402,Month=="SEP")

convert to long format

Environmental_sum_P402<-melt(Environmental_sum_P402, na.rm = FALSE,  id = c("Station","Date","Depth","date_convert","Month"))

make plot

#change factor order
Environmental_sum_P402$Depth <- factor(Environmental_sum_P402$Depth, levels = c("5-m","Deep"))

P402_temp<-subset(Environmental_sum_P402,variable=="temp")

P402_temp<-ggplot(data = P402_temp, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Temperature (C)")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_temp

P402_pH<-subset(Environmental_sum_P402,variable=="pH")

P402_pH<-ggplot(data = P402_pH, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("pH")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_pH

P402_Oxy<-subset(Environmental_sum_P402,variable=="Oxygen")

P402_Oxy<-ggplot(data = P402_Oxy, aes(date_convert, value,color=Depth)) +
  geom_line()+ 
  geom_point() +
  scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
  theme_classic()+
  xlab("Year") +
  ylab("Oxygen mg/l")+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
    theme(axis.text=element_text(size=11))+
  theme(axis.title.x = element_blank())
P402_Oxy