# Set chunk options
knitr::opts_chunk$set(echo = TRUE, warning = TRUE, message = FALSE)

# Set working directory
setwd("~/Downloads/Intro to R/Module 7")

# Load libraries
library(ggplot2)

# Load data
temp <- read.csv("Temperature.csv")

# Part 1

# Histogram of Salinity
ggplot(temp, aes(x = Salinity)) +
  geom_histogram(binwidth = 0.5, fill = "steelblue", color = "black") +
  labs(title = "Histogram of Salinity",
       x = "Salinity",
       y = "Count")
## Warning: Removed 798 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Histogram of Salinity by Year
ggplot(temp, aes(x = Salinity)) +
  geom_histogram(binwidth = 0.5, fill = "orange", color = "black") +
  facet_wrap(~ Year) +
  labs(title = "Salinity Histogram by Year",
       x = "Salinity",
       y = "Count")
## Warning: Removed 798 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Histogram of Salinity by Month
ggplot(temp, aes(x = Salinity)) +
  geom_histogram(binwidth = 0.5, fill = "purple", color = "black") +
  facet_wrap(~ Month) +
  labs(title = "Salinity Histogram by Month",
       x = "Salinity",
       y = "Count")
## Warning: Removed 798 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Boxplot of Temperature by Station
ggplot(temp, aes(x = Station, y = Temperature)) +
  geom_boxplot(fill = "skyblue") +
  labs(title = "Boxplot of Temperature by Station",
       x = "Station",
       y = "Temperature") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 927 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Boxplot Ordered by Median Temperature
ggplot(temp, aes(x = reorder(Station, Temperature, FUN = median), y = Temperature)) +
  geom_boxplot(fill = "skyblue") +
  labs(title = "Boxplot Ordered by Median Temperature",
       x = "Station (Ordered by Median)",
       y = "Temperature") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 927 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Part 2

# Create decimal date
temp$decdate <- temp$Year + temp$dDay3 / 365

# Scatterplot of Temperature over Time
ggplot(temp, aes(x = decdate, y = Temperature)) +
  geom_point(color = "firebrick", alpha = 0.6) +
  labs(title = "Temperature Over Time",
       x = "Decimal Date", y = "Temperature")
## Warning: Removed 927 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Scatterplot of Salinity over Time
ggplot(temp, aes(x = decdate, y = Salinity)) +
  geom_point(color = "navy", alpha = 0.6) +
  labs(title = "Salinity Over Time",
       x = "Decimal Date", y = "Salinity")
## Warning: Removed 798 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Faceted Scatterplot of Salinity Over Time by Area
ggplot(temp, aes(x = decdate, y = Salinity)) +
  geom_point(alpha = 0.6, color = "darkgreen") +
  facet_wrap(~ Area) +
  labs(title = "Salinity Over Time by Area",
       x = "Decimal Date", y = "Salinity")
## Warning: Removed 798 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Lineplot of Salinity by Station, Faceted by Area
ggplot(temp, aes(x = decdate, y = Salinity, group = Station)) +
  geom_line(alpha = 0.5) +
  facet_wrap(~ Area) +
  labs(title = "Salinity Time Series by Station and Area",
       x = "Decimal Date", y = "Salinity")
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

# BONUS: Lineplot of Salinity for Area 'OS' only
os_data <- subset(temp, Area == "OS")

ggplot(os_data, aes(x = decdate, y = Salinity, group = Station)) +
  geom_line(color = "blue", alpha = 0.7) +
  labs(title = "Salinity Time Series in Area 'OS'",
       x = "Decimal Date", y = "Salinity")