Directory and doc rules

knitr::opts_chunk$set(
  echo = TRUE,         # Display code chunks
  eval = TRUE,         # Evaluate code chunks
  warning = FALSE,     # Hide warnings
  message = FALSE,     # Hide messages
  fig.width = 20,       # Set plot width in inches
  fig.height = 9,      # Set plot height in inches
  fig.align = "center" # Align plots to the center
)

Load packages

library(tinytex)
library(tidyr)
library(tidyverse)
library(vegan)

Load data

getwd()

## [1] "/Users/cmantegna/Documents/WDFWmussels/code"

mdata<- read.csv("/Users/cmantegna/Documents/WDFWmussels/data/morphometricdata.csv")

Check data

summary(mdata)

##     latitude       longitude      reporting_area   site_number   
##  Min.   :47.05   Min.   :-123.5   Min.   : 6.00   Min.   : 1.00  
##  1st Qu.:47.33   1st Qu.:-122.7   1st Qu.: 8.20   1st Qu.:21.00  
##  Median :47.61   Median :-122.6   Median :10.00   Median :40.00  
##  Mean   :47.71   Mean   :-122.6   Mean   :10.01   Mean   :39.66  
##  3rd Qu.:48.02   3rd Qu.:-122.4   3rd Qu.:11.00   3rd Qu.:59.00  
##  Max.   :48.82   Max.   :-122.2   Max.   :13.00   Max.   :77.00  
##   site_name           sample_id            at               cf        
##  Length:312         Min.   :  1.00   Min.   :0.3550   Min.   :0.0430  
##  Class :character   1st Qu.: 78.75   1st Qu.:0.6800   1st Qu.:0.1541  
##  Mode  :character   Median :156.50   Median :0.7975   Median :0.1746  
##                     Mean   :156.50   Mean   :0.7835   Mean   :0.2053  
##                     3rd Qu.:234.25   3rd Qu.:0.8762   3rd Qu.:0.1925  
##                     Max.   :312.00   Max.   :1.2600   Max.   :1.8630  
##      sumPAH            hmwPAH            sumPCB      
##  Min.   :   97.4   Min.   :  11.61   Min.   : 16.95  
##  1st Qu.:  176.7   1st Qu.:  74.95   1st Qu.: 35.82  
##  Median :  360.1   Median : 156.65   Median : 49.07  
##  Mean   :  987.1   Mean   : 578.87   Mean   : 58.32  
##  3rd Qu.:  694.8   3rd Qu.: 284.32   3rd Qu.: 71.36  
##  Max.   :14700.7   Max.   :9394.33   Max.   :175.58

Plot data - boxplot, all condition factor values by site name

All analytes were plotted by site and reporting area in p450 flow.

mplot<- ggplot(mdata, aes(x = site_name, y = cf)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

print(mplot)

ggsave(plot=mplot, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/CFboxplotSITE.png", width=20, height=9)

Plot data - boxplot, all shell thickness values by site name

mplot<- ggplot(mdata, aes(x = site_name, y = at)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

print(mplot)

ggsave(plot=mplot, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/ATboxplotSITE.png", width=20, height=9)

Plot data - ranked box plot, all condition factor by site name

#order the sites by value

data_ordered <- mdata[order(mdata$cf),]

#create a factor with the ordered site names
data_ordered$site_name <- factor(data_ordered$site_name, levels = unique(data_ordered$site_name))

#plot with ordered site names
ranked<- ggplot(data_ordered, aes(x = site_name, y = cf)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Rotate x labels if needed

print(ranked)

ggsave(plot=ranked, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/CFboxplotSITEranked.png", width=20, height=9)

Plot data - ranked box plot, all shell thickness by site name

#order the sites by value

data_ordered <- mdata[order(mdata$at),]

#create a factor with the ordered site names
data_ordered$site_name <- factor(data_ordered$site_name, levels = unique(data_ordered$site_name))

#plot with ordered site names
ranked<- ggplot(data_ordered, aes(x = site_name, y = at)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Rotate x labels if needed

print(ranked)

ggsave(plot=ranked, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/ATboxplotSITE.png", width=20, height=9)

Plot data - boxplot, all condition factor values by reporting area

mdata$reporting_area <- as.character(mdata$reporting_area)


mplot<- ggplot(mdata, aes(x = reporting_area, y = cf)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

print(mplot)

ggsave(plot=mplot, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/CFboxplotRA.png", width=20, height=9)

Plot data - boxplot, all shell thickness values by reporting area

mdata$reporting_area <- as.character(mdata$reporting_area)


mplot<- ggplot(mdata, aes(x = reporting_area, y = at)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

print(mplot)

ggsave(plot=mplot, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/ATboxplotRA.png", width=20, height=9)

Shapiro-Wilkes

#test for normality
# *All analytes were plotted by site and reporting area in p450 flow.* 

# condition factor is not normally distributed
# average shell thickness is normally distributed. 
shapiro.test(mdata$cf)

## 
##  Shapiro-Wilk normality test
## 
## data:  mdata$cf
## W = 0.22769, p-value < 2.2e-16

shapiro.test(mdata$at)

## 
##  Shapiro-Wilk normality test
## 
## data:  mdata$at
## W = 0.99674, p-value = 0.7814

Kruskal-Wallis (non-parametric data)

#test for significant differences 

#cf
kruskal.test(cf ~ site_name, data = mdata)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  cf by site_name
## Kruskal-Wallis chi-squared = 117.36, df = 73, p-value = 0.0007696

kruskal.test(cf ~ reporting_area, data = mdata)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  cf by reporting_area
## Kruskal-Wallis chi-squared = 24.919, df = 8, p-value = 0.001604

Kruskal-Wallac Multiple Comparisons (post hoc)

Reporting Areas Are: 6 - East Juan de Fuca Strait 7 - San Juan Islands 8.1 - Deception Pass, Hope Island, and Skagit Bay 8.2 - Port Susan and Port Gardner 9 - Admiralty Inlet 10 - Seattle-Bremerton 11 - Tacoma-Vashon 12 - Hood Canal 13 - South Puget Sound

library(pgirmess)

# significant differences across the sites in pairwise matches: Blair Waterway #2-Port Angeles Yacht Club, Cap Sante-Port Angeles Yacht Club, Eastsound, Fishing Bay-Port Angeles Yacht Club, Everett Harbor-Port Angeles Yacht Club

# significant differences across reporting area pairwise matches: 6-7, 6-8.2

mc_site<- as.data.frame(kruskalmc(cf ~ site_name, data = mdata, method = "bonferroni"))
mc_reporting<- as.data.frame(kruskalmc(cf ~ reporting_area, data = mdata, method = "bonferroni")) 

head(mc_site)

##                                                                                    statistic
## Aiston Preserve-Arroyo Beach                   Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Blair Waterway                 Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Blair Waterway #2              Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Brackenwood Ln                 Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Broad Spit (Fisherman's Point) Multiple comparison test after Kruskal-Wallis
## Aiston Preserve-Browns Point Lighthouse        Multiple comparison test after Kruskal-Wallis
##                                                alpha dif.com.obs.dif
## Aiston Preserve-Arroyo Beach                    0.05          71.000
## Aiston Preserve-Blair Waterway                  0.05         112.375
## Aiston Preserve-Blair Waterway #2               0.05          49.875
## Aiston Preserve-Brackenwood Ln                  0.05         125.625
## Aiston Preserve-Broad Spit (Fisherman's Point)  0.05          61.000
## Aiston Preserve-Browns Point Lighthouse         0.05         111.000
##                                                dif.com.critical.dif
## Aiston Preserve-Arroyo Beach                               249.3519
## Aiston Preserve-Blair Waterway                             273.1513
## Aiston Preserve-Blair Waterway #2                          273.1513
## Aiston Preserve-Brackenwood Ln                             273.1513
## Aiston Preserve-Broad Spit (Fisherman's Point)             273.1513
## Aiston Preserve-Browns Point Lighthouse                    273.1513
##                                                dif.com.stat.signif
## Aiston Preserve-Arroyo Beach                                 FALSE
## Aiston Preserve-Blair Waterway                               FALSE
## Aiston Preserve-Blair Waterway #2                            FALSE
## Aiston Preserve-Brackenwood Ln                               FALSE
## Aiston Preserve-Broad Spit (Fisherman's Point)               FALSE
## Aiston Preserve-Browns Point Lighthouse                      FALSE

head(mc_reporting)

##                                            statistic alpha dif.com.obs.dif
## 10-11  Multiple comparison test after Kruskal-Wallis  0.05        6.175325
## 10-12  Multiple comparison test after Kruskal-Wallis  0.05       39.124675
## 10-13  Multiple comparison test after Kruskal-Wallis  0.05        2.074675
## 10-6   Multiple comparison test after Kruskal-Wallis  0.05       69.418425
## 10-7   Multiple comparison test after Kruskal-Wallis  0.05       39.553703
## 10-8.1 Multiple comparison test after Kruskal-Wallis  0.05       49.449675
##        dif.com.critical.dif dif.com.stat.signif
## 10-11              48.78305               FALSE
## 10-12              96.94133               FALSE
## 10-13              50.65020               FALSE
## 10-6               79.23751               FALSE
## 10-7               57.69005               FALSE
## 10-8.1            107.13067               FALSE

ANOVA (parametric data)

#at
anova_site <- aov(at ~ site_name, data = mdata)
summary(anova_site)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## site_name    73  3.443 0.04716   3.971 5.71e-16 ***
## Residuals   238  2.826 0.01188                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova_ra <- aov(at ~ reporting_area, data = mdata)
summary(anova_ra)

##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## reporting_area   8  0.813 0.10164   5.645 1.09e-06 ***
## Residuals      303  5.456 0.01801                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tukey (post hoc)

library(broom)
library(readr)

#significant differences between 104 site comparisons. Table printed and added to GH for review.
tukey_site<- TukeyHSD(anova_site)
ttable_site<- tidy(tukey_site)
sigttable_site <- ttable_site %>% filter(adj.p.value < 0.05)
write.csv(sigttable_site, "/Users/cmantegna/Documents/WDFWmussels/output/tables/SignificantThicknessSite.csv", row.names = FALSE)

# significant differences between the following reporting area comparisons: 11-10, 12-11, 6-11, 7-11, 8.1-11, 13-12 
tukey_ra<- TukeyHSD(anova_ra)
ttable_ra<- tidy(tukey_ra)
sigttable_ra <- ttable_ra %>% filter(adj.p.value < 0.05)
write.csv(sigttable_ra, "/Users/cmantegna/Documents/WDFWmussels/output/tables/SignificantThicknessRA.csv", row.names = FALSE)

Correlation test - cf and SumPAH

# no correlation
correlation_result <- cor.test(mdata$cf, mdata$sumPAH, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  mdata$cf and mdata$sumPAH
## t = 0.84873, df = 310, p-value = 0.3967
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06322793  0.15834095
## sample estimates:
##       cor 
## 0.0481488

Correlation test - at and SumPAH

# # correlation= 0.144847 with a p-value= .01041. This is a significant weak correlation.
correlation_result <- cor.test(mdata$at, mdata$sumPAH, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  mdata$at and mdata$sumPAH
## t = 2.5775, df = 310, p-value = 0.01041
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03436097 0.25183529
## sample estimates:
##      cor 
## 0.144847

Correlation test - cf and hmwPAH

# no correlation
correlation_result <- cor.test(mdata$cf, mdata$hmwPAH, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  mdata$cf and mdata$hmwPAH
## t = 0.8103, df = 310, p-value = 0.4184
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06539903  0.15621476
## sample estimates:
##        cor 
## 0.04597352

Correlation test - at and hmwPAH

# correlation= 0.1297819 with a p-value of .02185. This is a significant weak correlation.
correlation_result <- cor.test(mdata$at, mdata$hmwPAH, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  mdata$at and mdata$hmwPAH
## t = 2.3045, df = 310, p-value = 0.02185
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.01901726 0.23739947
## sample estimates:
##       cor 
## 0.1297819

Correlation test - cf and SumPCB

# no correlation

correlation_result <- cor.test(mdata$cf, mdata$sumPCB, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  mdata$cf and mdata$sumPCB
## t = 0.83247, df = 310, p-value = 0.4058
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06414663  0.15744151
## sample estimates:
##        cor 
## 0.04722847

Correlation test - at and SumPCB

# no correlation

correlation_result <- cor.test(mdata$at, mdata$sumPCB, method = "pearson")
print(correlation_result)

## 
##  Pearson's product-moment correlation
## 
## data:  mdata$at and mdata$sumPCB
## t = 1.4966, df = 310, p-value = 0.1355
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02659083  0.19391294
## sample estimates:
##        cor 
## 0.08469794

03- Morphometrics Data Exploration