Module 7 Code and Images

This is all from a data set of temperature and salinity from several locations over several years.

library(ggplot2)
temperature = read.csv("Temperature.csv")
str(temperature)
## 'data.frame':    8528 obs. of  16 variables:
##  $ Sample     : chr  "DANT.19900110" "DANT.19900206" "DANT.19900308" "DANT.19900404" ...
##  $ Date       : int  19900110 19900206 19900308 19900404 19900509 19900620 19900718 19900815 19900919 19901017 ...
##  $ DateNr     : chr  "10/1/90" "6/2/90" "8/3/90" "4/4/90" ...
##  $ dDay1      : int  7 34 64 91 126 168 196 224 259 287 ...
##  $ dDay2      : int  9 36 66 93 128 170 198 226 261 289 ...
##  $ dDay3      : int  9 36 66 93 128 170 198 226 261 289 ...
##  $ Station    : chr  "DANT" "DANT" "DANT" "DANT" ...
##  $ Area       : chr  "WZ" "WZ" "WZ" "WZ" ...
##  $ X31UE_ED50 : num  681380 681380 681380 681380 681380 ...
##  $ X31UN_ED50 : num  5920571 5920571 5920571 5920571 5920571 ...
##  $ Year       : int  1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
##  $ Month      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Season     : chr  "winter" "winter" "spring" "spring" ...
##  $ Salinity   : num  29.2 27.4 25 28.8 33.3 ...
##  $ Temperature: num  4 6 7.3 8.2 17.4 18.1 17 21 15.1 13.8 ...
##  $ CHLFa      : num  1.3 NA 21.1 25 10.2 6.2 7.9 7.85 13 11.8 ...

Part 1

Using ggplot to make a histogram of salinity values:

ggplot(temperature, aes(x = Salinity)) +
  geom_histogram(binwidth = 1, fill = 'orchid4')
## Warning: Removed 798 rows containing non-finite outside the scale range
## (`stat_bin()`).

Histograms for each year of study:

ggplot(temperature, aes(x = Salinity)) +
  geom_histogram(binwidth = 1, fill = 'orchid4') +
  facet_wrap(~ Year)
## Warning: Removed 798 rows containing non-finite outside the scale range
## (`stat_bin()`).

Histograms for each month of study:

ggplot(temperature, aes (x = Salinity)) +
     geom_histogram(binwidth = 1, fill = 'orchid4')+
     facet_wrap(~ Month)
## Warning: Removed 798 rows containing non-finite outside the scale range
## (`stat_bin()`).

Box plot of temperature values for each station:

myplot <- ggplot(temperature, aes(x = Temperature, y = Station))+
  geom_boxplot()
myplot 
## Warning: Removed 927 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Saving that last box plot as a png:

ggsave("temperature_boxplot.png", myplot)
## Saving 7 x 5 in image
## Warning: Removed 927 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Reordering the boxplot from low to high median temperatures:

temperature$Station <- with(temperature, reorder(Station, Temperature, median))

ggplot(temperature, aes(x = Temperature, y = Station))+ 
    geom_boxplot()
## Warning: Removed 927 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Part 2

Scatter plots of temperature and salinity over time:

temperature$decdate <- temperature$Year + temperature$dDay3 / 365
ggplot(temperature, aes(x = decdate, y = Temperature)) +
  geom_point(color = "tomato") +
  theme_minimal() +
  labs(x = "Date", y = "Temperature (°C)", title = "Temperature Over Time")
## Warning: Removed 927 rows containing missing values or values outside the scale range
## (`geom_point()`).

temperature$decdate <- temperature$Year + temperature$dDay3 / 365
ggplot(temperature, aes(x = decdate, y = Salinity)) +
  geom_point(color = "steelblue") +
  theme_minimal() +
  labs(x = "Date", y = "Salinity (ppt)", title = "Salinity Over Time")
## Warning: Removed 798 rows containing missing values or values outside the scale range
## (`geom_point()`).

Scatter plot of salinity grouped by area:

temperature$decdate <- temperature$Year + temperature$dDay3 / 365
ggplot(temperature, aes(x = decdate, y = Salinity)) +
  geom_point(color = "blue", alpha = 0.6) +
  facet_wrap(~ Area) +
  theme_minimal() +
  labs(
    x = "Decimal Date",
    y = "Salinity",
    title = "Salinity Over Time by Area"
  )
## Warning: Removed 798 rows containing missing values or values outside the scale range
## (`geom_point()`).

Line plot of salinity for each station and grouped by area:

temperature$decdate <- temperature$Year + temperature$dDay3 / 365
ggplot(temperature, aes(x = decdate, y = Salinity, group = Station, color = Station)) +
  geom_line() +
  facet_wrap(~ Area) +
  theme_minimal() +
  labs(
    x = "Decimal Date",
    y = "Salinity",
    title = "Salinity Over Time by Station, Grouped by Area"
  )
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

Line plot of salinity for each station, but only for area OS:

temperature$decdate <- temperature$Year + temperature$dDay3 / 365
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
temperature_OS <- temperature %>% filter(Area == "OS")
ggplot(temperature_OS, aes(x = decdate, y = Salinity, group = Station, color = Station)) +
  geom_line() +
  theme_minimal() +
  labs(
    x = "Decimal Date",
    y = "Salinity",
    title = "Salinity Over Time for Each Station in Area 'OS'"
  )

The end.