This code is mostly exploratory analysis of WOAC environmental data at bio stations between 2014-2022.
The report is here: https://rpubs.com/HailaSchultz/1097377
library(dplyr)
library(ggplot2)
library(tidyr)
library(forcats)
library(reshape2)
library(xts)
library(cowplot)
WOAC water chemistry data This dataset was acquired from BethElLee Herrmann and is a compilation of mesurements from bottle samples and from the CTD collected at each depth where bottle water was collected. This sheet is the second tab of the raw data spreadsheet. This dataset incluedes data on: - pH - pCO2 - CTD pressure - CTD oxygenerature - CTD salinity - CTD oxygen - oxygen - DIC - total alkalinity - phosphate - silicate - aragonite saturation - note that this dataset does not have nitrogen-based nutrients.
WOAC Stgations
Read data into R
Environmental <- read.csv("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/data/WOAC_Chem_data_2014-2022_all-niskins.csv")
The purpose of this section is to explore patterns in temperatures across, stations, seasons, and years. I am also interested in seeing if we can detect the marine heatwave from 2015-2017. I may need to look at averages as well as maximum temperature. However, this dataset may not be the best at capturing maximum temperature since it is restricted to the depths where bottle samples were collected. I may need to look at the entire CTD profile from NANOOS files, assuming they have continuous measurements. Another thing to note about this dataset is that the data are separated by month - it would probably be more beneficial to separate by season because for example some years may have collections during June or July, but these would both be aimed at being the same cruise.
convert date format
temp <- Environmental %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
summarize by max temperature
temp_max<-temp %>%
group_by(Station,Month,date_convert) %>%
summarise(
temp = max(CTDTMP_DEG_C_ITS90))
plot by max temperature
max_temp_plot<-ggplot(data = temp_max, aes(date_convert, temp)) +
geom_line()+
geom_point()+
facet_grid(Station ~ .) +
theme_classic()+
xlab("Year") + ylab("temperature")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")
max_temp_plot
summarize by depth category
temp<-temp %>%
group_by(Station,date_convert,Depth) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90))
make plot
temp_plot<-ggplot(data = temp, aes(date_convert, temp,color=Depth)) +
geom_line()+
geom_point()+
facet_grid(Station ~ .) +
theme_classic()+
xlab("Year") + ylab("temperature")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")
temp_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "time-series_temperature.png", plot = temp_plot, width = 10, height = 17, device='png', dpi=700)
It looks like P12 and P402 have the highest variability in temperatures, and have especially high summer temperatures. This makes sense becasue these are the hood canal stations.
library(anomalize)
library(tibble)
subset to station, depth
temp_5m<- subset(temp, Depth=='5-m')
temp_5m_P402<- subset(temp_5m, Station=='P402')
temp_5m_P402<- subset(temp_5m_P402, date_convert!='2022-07-01')
temp_5m_P402<- subset(temp_5m_P402, date_convert!='2022-04-29')
temp_5m_P402 <- temp_5m_P402[ -c(1,3) ]
temp_5m_P402<-temp_5m_P402[order(temp_5m_P402$date_convert),]
P402_ts <- temp_5m_P402 %>% as.tibble()
decomposition
P402_ts %>%
time_decompose(temp, method = "stl", frequency = "auto", trend = "auto") %>%
anomalize(remainder, method = "gesd", max_anoms = 0.2) %>%
plot_anomaly_decomposition()
subset to station, depth
temp_5m<- subset(temp, Depth=='5-m')
temp_5m_P28<- subset(temp_5m, Station=='P28')
temp_5m_P28<- subset(temp_5m_P28, date_convert!='2022-07-01')
temp_5m_P28<- subset(temp_5m_P28, date_convert!='2022-04-29')
temp_5m_P28 <- temp_5m_P28[ -c(1,3) ]
temp_5m_P28<-temp_5m_P28[order(temp_5m_P28$date_convert),]
P28_ts <- temp_5m_P28 %>% as.tibble()
decomposition
P28_ts %>%
time_decompose(temp, method = "stl", frequency = "auto", trend = "auto") %>%
anomalize(remainder, method = "gesd", max_anoms = 0.2) %>%
plot_anomaly_decomposition()
subset to station, depth
temp_5m<- subset(temp, Depth=='5-m')
temp_5m_P12<- subset(temp_5m, Station=='P12')
temp_5m_P12<- subset(temp_5m_P12, date_convert!='2022-07-01')
temp_5m_P12<- subset(temp_5m_P12, date_convert!='2022-04-29')
temp_5m_P12 <- temp_5m_P12[ -c(1,3) ]
temp_5m_P12<-temp_5m_P12[order(temp_5m_P12$date_convert),]
P12_ts <- temp_5m_P12 %>% as.tibble()
decomposition
P12_ts %>%
time_decompose(temp, method = "stl", frequency = "auto", trend = "auto") %>%
anomalize(remainder, method = "gesd", max_anoms = 0.2) %>%
plot_anomaly_decomposition()
temp<-Environmental %>%
group_by(Station,Date,Depth) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90))
#convert date format
temp <- temp %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
#select only surface samples
temp_surf<- subset(temp, Depth=='Surface')
#extract year
temp_surf$year <- format(as.Date(temp_surf$Date, format="%m/%d/%y"),"%Y")
#extract month
temp_surf$month <- format(as.Date(temp_surf$Date, format="%m/%d/%y"),"%m")
#make season column
temp_surf$season<-temp_surf$month
temp_surf$season <- recode(temp_surf$season,
"04" = "spring",
"05" = "spring",
"06" = "summer",
"07" = "summer",
"08" = "summer",
"09" = "autumn",
"10"= "autumn",
"11"= "autumn")
#average by year and month
temp_surf<-temp_surf %>%
group_by(year,season) %>%
summarise(
mean = mean(temp),
sd=sd(temp))
plot
temp_surf$year<-as.numeric(temp_surf$year)
temp_ave<-ggplot(data = temp_surf, aes(year,mean)) +
geom_point()+
theme_classic()+
xlab("Year") + ylab("temperature")+
geom_line()+
geom_pointrange(aes(ymin=mean-sd, ymax=mean+sd))+
facet_grid(season ~ .)
temp_ave
remove missing rows and remove weird values
pH<-Environmental %>% drop_na(pH)
pH <- subset(pH, pH != 2130.2)
summarize by depth category
pH<-pH %>%
group_by(Station,Date,Depth) %>%
summarise(
pH = mean(pH))
convert date format
pH <- pH %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
make plot
pH_plot<-ggplot(data = pH, aes(date_convert, pH,color=Depth)) +
geom_line()+
geom_point()+
facet_grid(Station ~ .) +
theme_classic()+
xlab("Year") + ylab("pH")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")
pH_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "time-series_pH.png", plot = pH_plot, width = 10, height = 17, device='png', dpi=700)
library(anomalize)
library(tibble)
subset to station, depth
pH_5m<- subset(pH, Depth=='5-m')
pH_5m_P402<- subset(pH_5m, Station=='P402')
pH_5m_P402 <- pH_5m_P402[ -c(1:3) ]
pH_5m_P402<-pH_5m_P402[order(pH_5m_P402$date_convert),]
P402_ts <- pH_5m_P402 %>% as.tibble()
decomposition
P402_ts %>%
time_decompose(pH, method = "stl", frequency = "auto", trend = "auto") %>%
anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.2) %>%
plot_anomaly_decomposition()
P402_ts %>%
time_decompose(pH) %>%
anomalize(remainder) %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.5)
#select only surface samples
pH_surf<- subset(pH, Depth=='Surface')
#extract year
pH_surf$year <- format(as.Date(pH_surf$Date, format="%m/%d/%y"),"%Y")
#extract month
pH_surf$month <- format(as.Date(pH_surf$Date, format="%m/%d/%y"),"%m")
#make season column
pH_surf$season<-pH_surf$month
pH_surf$season <- recode(pH_surf$season,
"04" = "spring",
"05" = "spring",
"06" = "summer",
"07" = "summer",
"08" = "summer",
"09" = "autumn",
"10"= "autumn",
"11"= "autumn")
#average by year and month
pH_surf<-pH_surf %>%
group_by(year,season) %>%
summarise(
mean = mean(pH),
sd=sd(pH))
plot
pH_surf$year<-as.numeric(pH_surf$year)
pH_ave<-ggplot(data = pH_surf, aes(year,mean)) +
geom_point()+
theme_classic()+
xlab("Year") + ylab("pH")+
geom_line()+
geom_pointrange(aes(ymin=mean-sd, ymax=mean+sd))+
facet_grid(season ~ .)
pH_ave
remove missing rows and remove weird values
Oxygen<-Environmental %>% drop_na(O2.in.mg.l)
summarize by depth category
Oxygen<-Oxygen %>%
group_by(Station,Date,Depth) %>%
summarise(
Oxygen = mean(O2.in.mg.l))
convert date format
Oxygen <- Oxygen %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
make plot
Oxygen_plot<-ggplot(data = Oxygen, aes(date_convert, Oxygen,color=Depth)) +
geom_line()+
geom_point()+
facet_grid(Station ~ .) +
theme_classic()+
xlab("Year") + ylab("Oxygen")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")
Oxygen_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "time-series_Oxygen.png", plot = Oxygen_plot, width = 10, height = 17, device='png', dpi=700)
#select only surface samples
oxygen_surf<- subset(Oxygen, Depth=='Surface')
#extract year
oxygen_surf$year <- format(as.Date(oxygen_surf$Date, format="%m/%d/%y"),"%Y")
#extract month
oxygen_surf$month <- format(as.Date(oxygen_surf$Date, format="%m/%d/%y"),"%m")
#make season column
oxygen_surf$season<-oxygen_surf$month
oxygen_surf$season <- recode(oxygen_surf$season,
"04" = "spring",
"05" = "spring",
"06" = "summer",
"07" = "summer",
"08" = "summer",
"09" = "autumn",
"10"= "autumn",
"11"= "autumn")
#average by year and month
oxygen_surf<-oxygen_surf %>%
group_by(year,season) %>%
summarise(
mean = mean(Oxygen),
sd=sd(Oxygen))
plot
oxygen_surf$year<-as.numeric(oxygen_surf$year)
oxygen_ave<-ggplot(data = oxygen_surf, aes(year,mean)) +
geom_point()+
theme_classic()+
xlab("Year") + ylab("oxygen")+
geom_line()+
geom_pointrange(aes(ymin=mean-sd, ymax=mean+sd))+
facet_grid(season ~ .)
oxygen_ave
#Time Series Analysis I’m struggling with this because of the missing NA values, so I am just going to average over the years ## detrend temperature
#select only surface samples
temp_timeseries<- subset(temp, Depth=='Surface')
#extract month
temp_timeseries$month <- format(as.Date(temp_timeseries$Date, format="%m/%d/%y"),"%m")
add month column
temp_timeseries$month<-fct_recode(temp_timeseries$month, "Oct"="10", "Apr"="04","Jul"="07","Sep"="09","May"="05","Jun"="06","Oct"="11")
#extract year
temp_timeseries$year <- format(as.Date(temp_timeseries$Date, format="%m/%d/%y"),"%Y")
subset to one station
temp_timeseries_p12<-subset(temp_timeseries,Station=="P12")
#remove extra row
temp_timeseries_p12<-temp_timeseries_p12[-c(2), ]
convert to time series object
#remove excess columns
temp_timeseries_p12 <- temp_timeseries_p12[ -c(1:3,5) ]
p12<-dcast(temp_timeseries_p12, year ~ month, value.var = "temp")
#add other months
p12$Jan<-NA
p12$Feb<-NA
p12$Mar<-NA
p12$May<-NA
p12$Aug<-NA
p12$Nov<-NA
p12$Dec<-NA
#reorder columns
p12 <- p12[, c(1,7,8,9,2,10,3,4,11,5,6,12,13)]
#convert back to long format
p12<-melt(p12, id.vars=c("year"))
#sort chronologically
p12<-p12[
order( p12[,1], p12[,2] ),
]
#remove year and month columns
p12 <- p12[ -c(1:2) ]
str(p12)
## 'data.frame': 108 obs. of 1 variable:
## $ value: num NA NA NA NA NA ...
p12_ts<-ts(p12, start=c(2014,1),freq=12)
Because there are missing values, they must be imputed I followed the tutorial here: https://jtr13.github.io/EDAVold/missingTS.html
p12_ts
## Jan Feb Mar Apr May Jun Jul Aug
## 2014 NA NA NA NA NA NA 20.43800 NA
## 2015 NA NA NA 11.63720 NA NA 20.89510 NA
## 2016 NA NA NA 12.18830 NA NA 17.74860 NA
## 2017 NA NA NA 10.20495 NA NA 19.78030 NA
## 2018 NA NA NA 10.36320 NA NA 20.44650 NA
## 2019 NA NA NA 11.60450 NA NA 17.14440 NA
## 2020 NA NA NA NA NA NA 17.56200 NA
## 2021 NA NA NA 12.65805 NA NA 18.60945 NA
## 2022 NA NA NA 10.62945 NA 13.65970 NA NA
## Sep Oct Nov Dec
## 2014 NA 14.77650 NA NA
## 2015 13.39490 NA NA NA
## 2016 13.30880 NA NA NA
## 2017 16.35710 NA NA NA
## 2018 12.92045 NA NA NA
## 2019 13.64740 NA NA NA
## 2020 17.25145 NA NA NA
## 2021 16.33925 NA NA NA
## 2022 NA NA NA NA
subset to one station
temp_timeseries_p12<-subset(temp_timeseries,Station=="P12")
#remove extra row
temp_timeseries_p12<-temp_timeseries_p12[-c(2), ]
convert to time series object
#remove excess columns
temp_timeseries_p12 <- temp_timeseries_p12[ -c(1:3,6:7) ]
xt1 <- xts(temp_timeseries_p12$temp, order.by = as.Date(temp_timeseries_p12$date_convert))
plot(xt1)
remove missing rows and remove weird values
Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)
summarize by depth category
Environmental_sum<-Environmental_sum %>%
group_by(Station,Date,Depth) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90),
pH = mean(pH),
Oxygen = mean(O2.in.mg.l))
convert date format
Environmental_sum <- Environmental_sum %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
subset to station and depth
Environmental_sum_P402<-subset(Environmental_sum,Station=="P402")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Surface")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Mid")
convert to long format
Environmental_sum_P402<-melt(Environmental_sum_P402, na.rm = FALSE, id = c("Station","Date","Depth","date_convert"))
make plot
#change factor order
Environmental_sum_P402$Depth <- factor(Environmental_sum_P402$Depth, levels = c("5-m","Deep"))
P402_temp<-subset(Environmental_sum_P402,variable=="temp")
P402_temp<-ggplot(data = P402_temp, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Temperature (C)")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_temp
P402_pH<-subset(Environmental_sum_P402,variable=="pH")
P402_pH<-ggplot(data = P402_pH, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("pH")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_pH
P402_Oxy<-subset(Environmental_sum_P402,variable=="Oxygen")
P402_Oxy<-ggplot(data = P402_Oxy, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Oxygen mg/l")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_Oxy
save plots
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "P402_temp.png", plot = P402_temp, height = 2.5, width = 5, units="in", device='png', dpi=600)
ggsave(filename = "P402_pH.png", plot = P402_pH, height = 2.5, width = 5, units="in", device='png', dpi=600)
ggsave(filename = "P402_Oxy.png", plot = P402_Oxy, height = 2.5, width = 5, units="in", device='png', dpi=600)
remove missing rows and remove weird values
Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)
summarize by depth category
Environmental_sum<-Environmental_sum %>%
group_by(Station,Date,Depth) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90),
pH = mean(pH),
Oxygen = mean(O2.in.mg.l))
convert date format
Environmental_sum <- Environmental_sum %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
subset to station and depth
Environmental_sum_P22<-subset(Environmental_sum,Station=="P22")
Environmental_sum_P22<-subset(Environmental_sum_P22,Depth!="Surface")
Environmental_sum_P22<-subset(Environmental_sum_P22,Depth!="Mid")
convert to long format
Environmental_sum_P22<-melt(Environmental_sum_P22, na.rm = FALSE, id = c("Station","Date","Depth","date_convert"))
make plot
#change factor order
Environmental_sum_P22$Depth <- factor(Environmental_sum_P22$Depth, levels = c("5-m","Deep"))
P22_temp<-subset(Environmental_sum_P22,variable=="temp")
P22_temp<-ggplot(data = P22_temp, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Temperature (C)")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P22_temp
P22_pH<-subset(Environmental_sum_P22,variable=="pH")
P22_pH<-ggplot(data = P22_pH, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("pH")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P22_pH
P22_Oxy<-subset(Environmental_sum_P22,variable=="Oxygen")
P22_Oxy<-ggplot(data = P22_Oxy, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Oxygen mg/l")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P22_Oxy
save plots
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "P22_temp.png", plot = P22_temp, height = 2.5, width = 5, units="in", device='png', dpi=600)
ggsave(filename = "P22_pH.png", plot = P22_pH, height = 2.5, width = 5, units="in", device='png', dpi=600)
ggsave(filename = "P22_Oxy.png", plot = P22_Oxy, height = 2.5, width = 5, units="in", device='png', dpi=600)
remove missing rows and remove weird values
Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)
summarize by depth category
Environmental_sum<-Environmental_sum %>%
group_by(Station,Date,Depth) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90),
pH = mean(pH),
Oxygen = mean(O2.in.mg.l))
convert date format
Environmental_sum <- Environmental_sum %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
subset to station and depth
Environmental_sum_P28<-subset(Environmental_sum,Station=="P28")
Environmental_sum_P28<-subset(Environmental_sum_P28,Depth!="Surface")
Environmental_sum_P28<-subset(Environmental_sum_P28,Depth!="Mid")
convert to long format
Environmental_sum_P28<-melt(Environmental_sum_P28, na.rm = FALSE, id = c("Station","Date","Depth","date_convert"))
make plot
#change factor order
Environmental_sum_P28$Depth <- factor(Environmental_sum_P28$Depth, levels = c("5-m","Deep"))
P28_temp<-subset(Environmental_sum_P28,variable=="temp")
P28_temp<-ggplot(data = P28_temp, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Temperature (C)")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P28_temp
P28_pH<-subset(Environmental_sum_P28,variable=="pH")
P28_pH<-ggplot(data = P28_pH, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("pH")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P28_pH
P28_Oxy<-subset(Environmental_sum_P28,variable=="Oxygen")
P28_Oxy<-ggplot(data = P28_Oxy, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Oxygen mg/l")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P28_Oxy
save plots
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/output")
ggsave(filename = "P28_temp.png", plot = P28_temp, height = 2.5, width = 5, units="in", device='png', dpi=600)
ggsave(filename = "P28_pH.png", plot = P28_pH, height = 2.5, width = 5, units="in", device='png', dpi=600)
ggsave(filename = "P28_Oxy.png", plot = P28_Oxy, height = 2.5, width = 5, units="in", device='png', dpi=600)
remove missing rows and remove weird values
Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)
summarize by depth category
Environmental_sum<-Environmental_sum %>%
group_by(Station,Date,Depth, Month) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90),
pH = mean(pH),
Oxygen = mean(O2.in.mg.l))
convert date format
Environmental_sum <- Environmental_sum %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
subset to station and depth
Environmental_sum_P402<-subset(Environmental_sum,Station=="P402")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Surface")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Mid")
Environmental_sum_P402<-subset(Environmental_sum_P402,Month=="APR")
convert to long format
Environmental_sum_P402<-melt(Environmental_sum_P402, na.rm = FALSE, id = c("Station","Date","Depth","date_convert","Month"))
make plot
#change factor order
Environmental_sum_P402$Depth <- factor(Environmental_sum_P402$Depth, levels = c("5-m","Deep"))
P402_temp<-subset(Environmental_sum_P402,variable=="temp")
P402_temp<-ggplot(data = P402_temp, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Temperature (C)")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_temp
P402_pH<-subset(Environmental_sum_P402,variable=="pH")
P402_pH<-ggplot(data = P402_pH, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("pH")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_pH
P402_Oxy<-subset(Environmental_sum_P402,variable=="Oxygen")
P402_Oxy<-ggplot(data = P402_Oxy, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Oxygen mg/l")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_Oxy
remove missing rows and remove weird values
Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)
summarize by depth category
Environmental_sum<-Environmental_sum %>%
group_by(Station,Date,Depth, Month) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90),
pH = mean(pH),
Oxygen = mean(O2.in.mg.l))
convert date format
Environmental_sum <- Environmental_sum %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
subset to station and depth
Environmental_sum_P402<-subset(Environmental_sum,Station=="P402")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Surface")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Mid")
Environmental_sum_P402<-subset(Environmental_sum_P402,Month=="JUL")
convert to long format
Environmental_sum_P402<-melt(Environmental_sum_P402, na.rm = FALSE, id = c("Station","Date","Depth","date_convert","Month"))
make plot
#change factor order
Environmental_sum_P402$Depth <- factor(Environmental_sum_P402$Depth, levels = c("5-m","Deep"))
P402_temp<-subset(Environmental_sum_P402,variable=="temp")
P402_temp<-ggplot(data = P402_temp, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Temperature (C)")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_temp
P402_pH<-subset(Environmental_sum_P402,variable=="pH")
P402_pH<-ggplot(data = P402_pH, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("pH")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_pH
P402_Oxy<-subset(Environmental_sum_P402,variable=="Oxygen")
P402_Oxy<-ggplot(data = P402_Oxy, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Oxygen mg/l")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_Oxy
remove missing rows and remove weird values
Environmental_sum<-Environmental %>% drop_na(pH)
Environmental_sum <- subset(Environmental_sum, pH != 2130.2)
Environmental_sum<-Environmental_sum %>% drop_na(O2.in.mg.l)
summarize by depth category
Environmental_sum<-Environmental_sum %>%
group_by(Station,Date,Depth, Month) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90),
pH = mean(pH),
Oxygen = mean(O2.in.mg.l))
convert date format
Environmental_sum <- Environmental_sum %>%
mutate(date_convert = as.Date(Date, format = "%m/%d/%y"))
subset to station and depth
Environmental_sum_P402<-subset(Environmental_sum,Station=="P402")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Surface")
Environmental_sum_P402<-subset(Environmental_sum_P402,Depth!="Mid")
Environmental_sum_P402<-subset(Environmental_sum_P402,Month=="SEP")
convert to long format
Environmental_sum_P402<-melt(Environmental_sum_P402, na.rm = FALSE, id = c("Station","Date","Depth","date_convert","Month"))
make plot
#change factor order
Environmental_sum_P402$Depth <- factor(Environmental_sum_P402$Depth, levels = c("5-m","Deep"))
P402_temp<-subset(Environmental_sum_P402,variable=="temp")
P402_temp<-ggplot(data = P402_temp, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(5,10,15,20), limits = c(5,20), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Temperature (C)")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_temp
P402_pH<-subset(Environmental_sum_P402,variable=="pH")
P402_pH<-ggplot(data = P402_pH, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(7,7.5,8,8.5), limits = c(7,8.5), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("pH")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_pH
P402_Oxy<-subset(Environmental_sum_P402,variable=="Oxygen")
P402_Oxy<-ggplot(data = P402_Oxy, aes(date_convert, value,color=Depth)) +
geom_line()+
geom_point() +
scale_y_continuous(breaks = c(0,5,10,15), limits = c(0,15), expand = c(0,0))+
theme_classic()+
xlab("Year") +
ylab("Oxygen mg/l")+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")+
theme(axis.text=element_text(size=11))+
theme(axis.title.x = element_blank())
P402_Oxy