Module 7

Author

K M

Module 7

reading in csv file

temp<-read.csv("~/Desktop/GEOG 5680/Data/Temperature.csv")
str(temp)
'data.frame':   8528 obs. of  16 variables:
 $ Sample     : chr  "DANT.19900110" "DANT.19900206" "DANT.19900308" "DANT.19900404" ...
 $ Date       : int  19900110 19900206 19900308 19900404 19900509 19900620 19900718 19900815 19900919 19901017 ...
 $ DateNr     : chr  "10/1/90" "6/2/90" "8/3/90" "4/4/90" ...
 $ dDay1      : int  7 34 64 91 126 168 196 224 259 287 ...
 $ dDay2      : int  9 36 66 93 128 170 198 226 261 289 ...
 $ dDay3      : int  9 36 66 93 128 170 198 226 261 289 ...
 $ Station    : chr  "DANT" "DANT" "DANT" "DANT" ...
 $ Area       : chr  "WZ" "WZ" "WZ" "WZ" ...
 $ X31UE_ED50 : num  681380 681380 681380 681380 681380 ...
 $ X31UN_ED50 : num  5920571 5920571 5920571 5920571 5920571 ...
 $ Year       : int  1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
 $ Month      : int  1 2 3 4 5 6 7 8 9 10 ...
 $ Season     : chr  "winter" "winter" "spring" "spring" ...
 $ Salinity   : num  29.2 27.4 25 28.8 33.3 ...
 $ Temperature: num  4 6 7.3 8.2 17.4 18.1 17 21 15.1 13.8 ...
 $ CHLFa      : num  1.3 NA 21.1 25 10.2 6.2 7.9 7.85 13 11.8 ...
library(knitr)
Warning: package 'knitr' was built under R version 4.5.2
library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.5.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.2
✔ ggplot2   4.0.3     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins)

Attaching package: 'palmerpenguins'

The following objects are masked from 'package:datasets':

    penguins, penguins_raw
library(ggplot2)

Part 1

Sal<-ggplot(data = temp, aes(x = Salinity))
Sal + geom_histogram(col = "black", fill = "lightblue", binwidth = 1) + 
  ggtitle("Histogram of Salinity Values")
Warning: Removed 798 rows containing non-finite outside the scale range
(`stat_bin()`).

Sal + geom_histogram(col = "black", fill = "lightblue", binwidth = 1) + 
  facet_wrap(~ Year) + 
  ggtitle("Salinity Values by Year")
Warning: Removed 798 rows containing non-finite outside the scale range
(`stat_bin()`).

Sal + geom_histogram(col = "black", fill = "lightblue", binwidth = 1) + 
  facet_wrap(~ Month) + 
  ggtitle("Salinity Values by Month")
Warning: Removed 798 rows containing non-finite outside the scale range
(`stat_bin()`).

ggplot(data = temp, aes(x = Temperature, y = Station, na.rm = T)) + 
  geom_boxplot(aes(fill = Station)) + 
  ggtitle("Station Temperatures")
Warning: Removed 927 rows containing non-finite outside the scale range
(`stat_boxplot()`).

ggsave("Temperature_Values_by_Station_Boxplot.png")
Saving 7 x 5 in image
Warning: Removed 927 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Bonus

myplot = ggplot(data = temp, aes(x = Station, y = Temperature))
myplot + geom_boxplot(aes(fill = Station, 
                          x = reorder(Station, Temperature, median, na.rm = T))) + 
  xlab("Station") + 
  ggtitle("Boxplot of Station Temperature in numerical order")
Warning: Removed 927 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Part 2

temp$decdate <- (temp$Year + temp$dDay3)/365

ggplot(data = temp, 
       aes(x = decdate, y = Temperature, na.rm = T, alpha = 0.5)) + 
  geom_point(size = 0.5, aes(color = Area)) + 
  ggtitle("Temperature over Time")
Warning: Removed 927 rows containing missing values or values outside the scale range
(`geom_point()`).

ggplot(data = temp, 
       aes(x = decdate, y = Salinity, na.rm = T, alpha = 0.5)) + 
  geom_point(size = 0.5, aes(color = Area)) + 
  ggtitle("Salinity over Time")
Warning: Removed 798 rows containing missing values or values outside the scale range
(`geom_point()`).

myplot1 = ggplot(data = temp, 
                 aes(x = decdate, y = Salinity, na.rm = T))
myplot1 + geom_point(size = 0.5, alpha = 0.5, aes(color = Area)) + 
  facet_wrap(~ Area) + 
  ggtitle("Salinity over Time by Area")
Warning: Removed 798 rows containing missing values or values outside the scale range
(`geom_point()`).

myplot1 + geom_line(alpha = 0.5, 
                    aes(group = Station, color = Station)) + 
  facet_wrap(~ Area) + 
  ggtitle("Station Salinity grouped by Area")
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_line()`).

Bonus

ggplot(subset(temp, Area == "OS"),
       aes(x = decdate, y = Salinity, na.rm=T)) +
  geom_line(alpha = 0.6) + 
  geom_point(size = 0.6, alpha = 0.8) +
  ggtitle("OS Area Salinity")
Warning: Removed 23 rows containing missing values or values outside the scale range
(`geom_point()`).