Directory and doc rules

knitr::opts_chunk$set(
  echo = TRUE,         # Display code chunks
  eval = TRUE,         # Evaluate code chunks
  warning = FALSE,     # Hide warnings
  message = FALSE,     # Hide messages
  fig.width = 20,       # Set plot width in inches
  fig.height = 9,      # Set plot height in inches
  fig.align = "center" # Align plots to the center
)

Load packages

library(tinytex)
library(tidyr)
library(tidyverse)
library(vegan)

Load data

getwd()

## [1] "/Users/cmantegna/Documents/WDFWmussels/code"

sdata<- read.csv("/Users/cmantegna/Documents/WDFWmussels/data/soddata.csv")

# Data contains numbers below 0 that must be adjusted.These numbers represent samples whose values were below the limit of quantification using the SOD kit. There was some activity in the raw sample, but it is too far below the limit of the standard curve to be accurately quantified.


#replace any SOD values at or below 0 with half of the lower detection limit of .005 (.005*.5). Lower detection limit determined by assay protocol by the manufacturer, Cayman.
sdata$sod[sdata$sod <= 0] <- 0.0025

Check data

summary(sdata)

##     latitude       longitude      reporting_area   site_number   
##  Min.   :47.05   Min.   :-123.5   Min.   : 6.00   Min.   : 1.00  
##  1st Qu.:47.33   1st Qu.:-122.7   1st Qu.: 8.20   1st Qu.:21.00  
##  Median :47.61   Median :-122.6   Median :10.00   Median :40.00  
##  Mean   :47.71   Mean   :-122.6   Mean   :10.01   Mean   :39.74  
##  3rd Qu.:48.02   3rd Qu.:-122.4   3rd Qu.:11.00   3rd Qu.:59.00  
##  Max.   :48.82   Max.   :-122.2   Max.   :13.00   Max.   :77.00  
##   site_name           sample_id          sod              sumPAH       
##  Length:311         Min.   :  1.0   Min.   : 0.0025   Min.   :   97.4  
##  Class :character   1st Qu.: 78.5   1st Qu.: 2.0003   1st Qu.:  176.7  
##  Mode  :character   Median :156.0   Median : 5.8743   Median :  356.9  
##                     Mean   :156.1   Mean   : 7.5653   Mean   :  989.0  
##                     3rd Qu.:233.5   3rd Qu.:10.9163   3rd Qu.:  694.8  
##                     Max.   :312.0   Max.   :73.3449   Max.   :14700.7  
##      hmwPAH            sumPCB      
##  Min.   :  11.61   Min.   : 16.95  
##  1st Qu.:  74.83   1st Qu.: 35.82  
##  Median : 156.65   Median : 49.07  
##  Mean   : 580.13   Mean   : 58.35  
##  3rd Qu.: 284.32   3rd Qu.: 71.36  
##  Max.   :9394.33   Max.   :175.58

Plot data - boxplot, all SOD values by site name

#sod is in unit/ mg protein

pplot<- ggplot(sdata, aes(x = site_name, y = sod)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

print(pplot)

ggsave(plot=pplot, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/SODboxplotSITE.png", width=20, height=9)

Plot data - ranked box plot, all SOD values by site name

#order the sites by value

data_ordered <- sdata[order(sdata$sod),]

#create a factor with the ordered site names
data_ordered$site_name <- factor(data_ordered$site_name, levels = unique(data_ordered$site_name))

#plot with ordered site names
ranked<- ggplot(data_ordered, aes(x = site_name, y = sod)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Rotate x labels if needed

print(ranked)

ggsave(plot=ranked, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/SODboxplotSITEranked.png", width=20, height=9)

All analytes were plotted by site and reporting area in p450 flow.

Plot data - boxplot, all p450 values by reporting area

sdata$reporting_area <- as.character(sdata$reporting_area)

#sod
splot<- ggplot(sdata, aes(x = reporting_area, y = sod)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

print(splot)

ggsave(plot=splot, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/SODboxplotRA.png", width=20, height=9)

Shapiro-Wilkes

#test for normality. Data is not normally distributed.
# *All analytes were plotted by site and reporting area in p450 flow.* 

shapiro.test(sdata$sod)

## 
##  Shapiro-Wilk normality test
## 
## data:  sdata$sod
## W = 0.76932, p-value < 2.2e-16

Kruskal-Wallis

#test for significant interaction

kruskal.test(sod ~ site_name, data = sdata)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  sod by site_name
## Kruskal-Wallis chi-squared = 114.05, df = 73, p-value = 0.001515

kruskal.test(sod ~ reporting_area, data = sdata)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  sod by reporting_area
## Kruskal-Wallis chi-squared = 9.8218, df = 8, p-value = 0.2778

Kruskal-Wallac Multiple Comparisons (post hoc)

Reporting Areas Are: 6 - East Juan de Fuca Strait 7 - San Juan Islands 8.1 - Deception Pass, Hope Island, and Skagit Bay 8.2 - Port Susan and Port Gardner 9 - Admiralty Inlet 10 - Seattle-Bremerton 11 - Tacoma-Vashon 12 - Hood Canal 13 - South Puget Sound

library(pgirmess)

# no significance confirmed between sites
# no significance confirmed between reporting areas

mc_site<- as.data.frame(kruskalmc(sod ~ site_name, data = sdata, method = "bonferroni"))
mc_reporting<- as.data.frame(kruskalmc(sod ~ reporting_area, data = sdata, method = "bonferroni")) 

head(mc_site)

##                                                                                    statistic
## Aiston Preserve-Arroyo Beach                   Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Blair Waterway                 Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Blair Waterway #2              Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Brackenwood Ln                 Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Broad Spit (Fisherman's Point) Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Browns Point Lighthouse        Multiple comparison test after Kruskal-Wallis
##                                                alpha dif.com.obs.dif
## Aiston Preserve-Arroyo Beach                    0.05          81.525
## Aiston Preserve-Blair Waterway                  0.05          63.875
## Aiston Preserve-Blair Waterway #2               0.05          81.125
## Aiston Preserve-Brackenwood Ln                  0.05          63.000
## Aiston Preserve-Broad Spit (Fisherman's Point)  0.05          72.625
## Aiston Preserve-Browns Point Lighthouse         0.05          11.625
##                                                dif.com.critical.dif
## Aiston Preserve-Arroyo Beach                               258.3048
## Aiston Preserve-Blair Waterway                             272.2772
## Aiston Preserve-Blair Waterway #2                          272.2772
## Aiston Preserve-Brackenwood Ln                             272.2772
## Aiston Preserve-Broad Spit (Fisherman's Point)             272.2772
## Aiston Preserve-Browns Point Lighthouse                    272.2772
##                                                dif.com.stat.signif
## Aiston Preserve-Arroyo Beach                                 FALSE
## Aiston Preserve-Blair Waterway                               FALSE
## Aiston Preserve-Blair Waterway #2                            FALSE
## Aiston Preserve-Brackenwood Ln                               FALSE
## Aiston Preserve-Broad Spit (Fisherman's Point)               FALSE
## Aiston Preserve-Browns Point Lighthouse                      FALSE

head(mc_reporting)

##                                            statistic alpha dif.com.obs.dif
## 10-11  Multiple comparison test after Kruskal-Wallis  0.05        4.277778
## 10-12  Multiple comparison test after Kruskal-Wallis  0.05       65.650000
## 10-13  Multiple comparison test after Kruskal-Wallis  0.05       15.491071
## 10-6   Multiple comparison test after Kruskal-Wallis  0.05        4.406250
## 10-7   Multiple comparison test after Kruskal-Wallis  0.05        8.216216
## 10-8.1 Multiple comparison test after Kruskal-Wallis  0.05       30.250000
##        dif.com.critical.dif dif.com.stat.signif
## 10-11              48.83724               FALSE
## 10-12              96.63112               FALSE
## 10-13              50.48812               FALSE
## 10-6               78.98395               FALSE
## 10-7               57.50544               FALSE
## 10-8.1            106.78785               FALSE

Correlation test - SOD and SumPAH

# no correlation
correlation_result <- cor.test(sdata$sod, sdata$sumPAH, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  sdata$sod and sdata$sumPAH
## t = 1.3246, df = 309, p-value = 0.1863
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03638184  0.18481239
## sample estimates:
##        cor 
## 0.07513952

Correlation test - SOD and hmwPAH

# no correlation
correlation_result <- cor.test(sdata$sod, sdata$hmwPAH, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  sdata$sod and sdata$hmwPAH
## t = 1.3837, df = 309, p-value = 0.1674
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03303055  0.18805110
## sample estimates:
##        cor 
## 0.07847505

Correlation test - SOD and SumPCB

# correlation= 0.1557987 with a p-value of .0059. This is a significant weak correlation.

correlation_result <- cor.test(sdata$sod, sdata$sumPCB, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  sdata$sod and sdata$sumPCB
## t = 2.7725, df = 309, p-value = 0.0059
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04536753 0.26246814
## sample estimates:
##       cor 
## 0.1557987

02- SOD Data Exploration