knitr::opts_chunk$set(
echo = TRUE, # Display code chunks
eval = TRUE, # Evaluate code chunks
warning = FALSE, # Hide warnings
message = FALSE, # Hide messages
fig.width = 20, # Set plot width in inches
fig.height = 9, # Set plot height in inches
fig.align = "center" # Align plots to the center
)
library(tinytex)
library(tidyr)
library(tidyverse)
library(vegan)
getwd()
## [1] "/Users/cmantegna/Documents/WDFWmussels/code"
sdata<- read.csv("/Users/cmantegna/Documents/WDFWmussels/data/soddata.csv")
# Data contains numbers below 0 that must be adjusted.These numbers represent samples whose values were below the limit of quantification using the SOD kit. There was some activity in the raw sample, but it is too far below the limit of the standard curve to be accurately quantified.
#replace any SOD values at or below 0 with half of the lower detection limit of .005 (.005*.5). Lower detection limit determined by assay protocol by the manufacturer, Cayman.
sdata$sod[sdata$sod <= 0] <- 0.0025
summary(sdata)
## latitude longitude reporting_area site_number
## Min. :47.05 Min. :-123.5 Min. : 6.00 Min. : 1.00
## 1st Qu.:47.33 1st Qu.:-122.7 1st Qu.: 8.20 1st Qu.:21.00
## Median :47.61 Median :-122.6 Median :10.00 Median :40.00
## Mean :47.71 Mean :-122.6 Mean :10.01 Mean :39.74
## 3rd Qu.:48.02 3rd Qu.:-122.4 3rd Qu.:11.00 3rd Qu.:59.00
## Max. :48.82 Max. :-122.2 Max. :13.00 Max. :77.00
## site_name sample_id sod sumPAH
## Length:311 Min. : 1.0 Min. : 0.0025 Min. : 97.4
## Class :character 1st Qu.: 78.5 1st Qu.: 2.0003 1st Qu.: 176.7
## Mode :character Median :156.0 Median : 5.8743 Median : 356.9
## Mean :156.1 Mean : 7.5653 Mean : 989.0
## 3rd Qu.:233.5 3rd Qu.:10.9163 3rd Qu.: 694.8
## Max. :312.0 Max. :73.3449 Max. :14700.7
## hmwPAH sumPCB
## Min. : 11.61 Min. : 16.95
## 1st Qu.: 74.83 1st Qu.: 35.82
## Median : 156.65 Median : 49.07
## Mean : 580.13 Mean : 58.35
## 3rd Qu.: 284.32 3rd Qu.: 71.36
## Max. :9394.33 Max. :175.58
#sod is in unit/ mg protein
pplot<- ggplot(sdata, aes(x = site_name, y = sod)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(pplot)
ggsave(plot=pplot, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/SODboxplotSITE.png", width=20, height=9)
#order the sites by value
data_ordered <- sdata[order(sdata$sod),]
#create a factor with the ordered site names
data_ordered$site_name <- factor(data_ordered$site_name, levels = unique(data_ordered$site_name))
#plot with ordered site names
ranked<- ggplot(data_ordered, aes(x = site_name, y = sod)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Rotate x labels if needed
print(ranked)
ggsave(plot=ranked, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/SODboxplotSITEranked.png", width=20, height=9)
sdata$reporting_area <- as.character(sdata$reporting_area)
#sod
splot<- ggplot(sdata, aes(x = reporting_area, y = sod)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(splot)
ggsave(plot=splot, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/SODboxplotRA.png", width=20, height=9)
#test for normality. Data is not normally distributed.
# *All analytes were plotted by site and reporting area in p450 flow.*
shapiro.test(sdata$sod)
##
## Shapiro-Wilk normality test
##
## data: sdata$sod
## W = 0.76932, p-value < 2.2e-16
#test for significant interaction
kruskal.test(sod ~ site_name, data = sdata)
##
## Kruskal-Wallis rank sum test
##
## data: sod by site_name
## Kruskal-Wallis chi-squared = 114.05, df = 73, p-value = 0.001515
kruskal.test(sod ~ reporting_area, data = sdata)
##
## Kruskal-Wallis rank sum test
##
## data: sod by reporting_area
## Kruskal-Wallis chi-squared = 9.8218, df = 8, p-value = 0.2778
Reporting Areas Are: 6 - East Juan de Fuca Strait 7 - San Juan Islands 8.1 - Deception Pass, Hope Island, and Skagit Bay 8.2 - Port Susan and Port Gardner 9 - Admiralty Inlet 10 - Seattle-Bremerton 11 - Tacoma-Vashon 12 - Hood Canal 13 - South Puget Sound
library(pgirmess)
# no significance confirmed between sites
# no significance confirmed between reporting areas
mc_site<- as.data.frame(kruskalmc(sod ~ site_name, data = sdata, method = "bonferroni"))
mc_reporting<- as.data.frame(kruskalmc(sod ~ reporting_area, data = sdata, method = "bonferroni"))
head(mc_site)
## statistic
## Aiston Preserve-Arroyo Beach Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Blair Waterway Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Blair Waterway #2 Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Brackenwood Ln Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Broad Spit (Fisherman's Point) Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Browns Point Lighthouse Multiple comparison test after Kruskal-Wallis
## alpha dif.com.obs.dif
## Aiston Preserve-Arroyo Beach 0.05 81.525
## Aiston Preserve-Blair Waterway 0.05 63.875
## Aiston Preserve-Blair Waterway #2 0.05 81.125
## Aiston Preserve-Brackenwood Ln 0.05 63.000
## Aiston Preserve-Broad Spit (Fisherman's Point) 0.05 72.625
## Aiston Preserve-Browns Point Lighthouse 0.05 11.625
## dif.com.critical.dif
## Aiston Preserve-Arroyo Beach 258.3048
## Aiston Preserve-Blair Waterway 272.2772
## Aiston Preserve-Blair Waterway #2 272.2772
## Aiston Preserve-Brackenwood Ln 272.2772
## Aiston Preserve-Broad Spit (Fisherman's Point) 272.2772
## Aiston Preserve-Browns Point Lighthouse 272.2772
## dif.com.stat.signif
## Aiston Preserve-Arroyo Beach FALSE
## Aiston Preserve-Blair Waterway FALSE
## Aiston Preserve-Blair Waterway #2 FALSE
## Aiston Preserve-Brackenwood Ln FALSE
## Aiston Preserve-Broad Spit (Fisherman's Point) FALSE
## Aiston Preserve-Browns Point Lighthouse FALSE
head(mc_reporting)
## statistic alpha dif.com.obs.dif
## 10-11 Multiple comparison test after Kruskal-Wallis 0.05 4.277778
## 10-12 Multiple comparison test after Kruskal-Wallis 0.05 65.650000
## 10-13 Multiple comparison test after Kruskal-Wallis 0.05 15.491071
## 10-6 Multiple comparison test after Kruskal-Wallis 0.05 4.406250
## 10-7 Multiple comparison test after Kruskal-Wallis 0.05 8.216216
## 10-8.1 Multiple comparison test after Kruskal-Wallis 0.05 30.250000
## dif.com.critical.dif dif.com.stat.signif
## 10-11 48.83724 FALSE
## 10-12 96.63112 FALSE
## 10-13 50.48812 FALSE
## 10-6 78.98395 FALSE
## 10-7 57.50544 FALSE
## 10-8.1 106.78785 FALSE
# no correlation
correlation_result <- cor.test(sdata$sod, sdata$sumPAH, method = "pearson")
print(correlation_result)
##
## Pearson's product-moment correlation
##
## data: sdata$sod and sdata$sumPAH
## t = 1.3246, df = 309, p-value = 0.1863
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03638184 0.18481239
## sample estimates:
## cor
## 0.07513952
# no correlation
correlation_result <- cor.test(sdata$sod, sdata$hmwPAH, method = "pearson")
print(correlation_result)
##
## Pearson's product-moment correlation
##
## data: sdata$sod and sdata$hmwPAH
## t = 1.3837, df = 309, p-value = 0.1674
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03303055 0.18805110
## sample estimates:
## cor
## 0.07847505
# correlation= 0.1557987 with a p-value of .0059. This is a significant weak correlation.
correlation_result <- cor.test(sdata$sod, sdata$sumPCB, method = "pearson")
print(correlation_result)
##
## Pearson's product-moment correlation
##
## data: sdata$sod and sdata$sumPCB
## t = 2.7725, df = 309, p-value = 0.0059
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.04536753 0.26246814
## sample estimates:
## cor
## 0.1557987