knitr::opts_chunk$set(
echo = TRUE, # Display code chunks
eval = TRUE, # Evaluate code chunks
warning = FALSE, # Hide warnings
message = FALSE, # Hide messages
fig.width = 20, # Set plot width in inches
fig.height = 9, # Set plot height in inches
fig.align = "center" # Align plots to the center
)
library(tidyr)
library(tidyverse)
library(vegan)
Weight = mg
Length, width, height = mm
p450, SOD = activity/ (mg/protein)
Condition factor, economic factor = unitless
getwd()
## [1] "/Users/cmantegna/Documents/WDFWmussels/code"
#data has all sites, coordinates, p450, sod, condition factor, economic factor data
data<- read.csv("/Users/cmantegna/Documents/WDFWmussels/data/biomarkerfull.csv")
#alldata has the site names, biomarkers, condition factor, average thickness and analyte data - each row is an individual sample
alldata<- read.csv("/Users/cmantegna/Documents/WDFWmussels/data/alldata.csv")
# Data contains 0's and must be adjusted in this order to preserve all usable data.
#sod
#replace any SOD values at or below 0 with half of the lower detection limit of .005 (.005*.5). Lower detection limit determined by assay protocol by the manufacturer, Cayman.
data$SOD[data$SOD <= 0] <- 0.0025
alldata$SOD[alldata$SOD <= 0] <- 0.0025
#p450
#remove any p450 values that are 0 - those are true 0's not non-detectable. I am replacing with na so I don't lose the entire row of data, including the SOD values.
data$p450[data$p450 <= 0] <- NA
alldata$p450[alldata$p450 <= 0] <- NA
#write.csv(alldata, "/Users/cmantegna/Documents/WDFWmussels/data/alldata.csv")
# check the data frame
#summary(alldata)
#str(alldata)
library(dplyr)
library(ggplot2)
# Ensure the columns are numeric by converting them explicitly if necessary
data$p450 <- as.numeric(data$p450)
data$SOD <- as.numeric(data$SOD)
data$condition_factor <- as.numeric(data$condition_factor)
data$avg_thickness <- as.numeric(data$avg_thickness)
data$site_number <- as.numeric(data$site_number)
# Fill missing values with median for 'p450' and 'condition_factor'
data$p450[is.na(data$p450)] <- median(data$p450, na.rm = TRUE)
data$condition_factor[is.na(data$condition_factor)] <- median(data$condition_factor, na.rm = TRUE)
# Calculate Spearman's rank correlation matrix for selected variables
selected_vars <- data %>% select(site_number, p450, SOD, condition_factor, avg_thickness)
correlation_matrix <- cor(selected_vars, method = "spearman")
print(correlation_matrix)
## site_number p450 SOD condition_factor
## site_number 1.00000000 0.09682605 -0.09801992 -0.22554332
## p450 0.09682605 1.00000000 -0.23212416 0.02678856
## SOD -0.09801992 -0.23212416 1.00000000 -0.08788349
## condition_factor -0.22554332 0.02678856 -0.08788349 1.00000000
## avg_thickness 0.12109155 0.19029387 -0.07075347 0.13921086
## avg_thickness
## site_number 0.12109155
## p450 0.19029387
## SOD -0.07075347
## condition_factor 0.13921086
## avg_thickness 1.00000000
#write.csv(correlation_matrix, "/Users/cmantegna/Documents/WDFWmussels/output/tables/all_corr.csv")
library(dplyr)
# Define regions based on latitude
data$region <- case_when(
data$latitude >= 47.78184 & data$latitude <= 48.8208 ~ "North",
data$latitude >= 47.38566 & data$latitude <= 47.7296079 ~ "Central",
data$latitude >= 47.052362 & data$latitude <= 47.3539722 ~ "South",
TRUE ~ NA_character_
)
# Filter out rows without a region designation (if any)
data <- data %>% filter(!is.na(region))
# Function to calculate and format Spearman correlation as a data frame
calculate_correlation <- function(data, region_name) {
corr <- cor(data %>% select(site_number, p450, SOD, condition_factor, avg_thickness),
method = "spearman")
corr_df <- as.data.frame(as.table(corr))
names(corr_df) <- c("Variable1", "Variable2", "Correlation")
corr_df$Region <- region_name
return(corr_df)
}
# Calculate correlations for each region and bind them into a single data frame
north_corr <- calculate_correlation(data %>% filter(region == "North"), "North")
central_corr <- calculate_correlation(data %>% filter(region == "Central"), "Central")
south_corr <- calculate_correlation(data %>% filter(region == "South"), "South")
all_corr <- bind_rows(north_corr, central_corr, south_corr)
# Print the consolidated correlation data frame
head(all_corr)
## Variable1 Variable2 Correlation Region
## 1 site_number site_number 1.00000000 North
## 2 p450 site_number 0.08244169 North
## 3 SOD site_number -0.18383185 North
## 4 condition_factor site_number -0.32105058 North
## 5 avg_thickness site_number 0.19111321 North
## 6 site_number p450 0.08244169 North
#write.csv(all_corr, "/Users/cmantegna/Documents/WDFWmussels/output/tables/region_corr.csv")
library(dplyr)
# Split data into North, Central, and South regions based on latitude
data$region <- case_when(
data$latitude >= 47.78184 & data$latitude <= 48.8208 ~ "North",
data$latitude >= 47.38566 & data$latitude <= 47.7296079 ~ "Central",
data$latitude >= 47.052362 & data$latitude <= 47.3539722 ~ "South",
TRUE ~ NA_character_
)
# Filter out rows without a region designation (if any)
data <- data %>% filter(!is.na(region))
# Create a data frame listing sites by region
sites_by_region <- data %>%
select(region, site_name) %>%
distinct() %>%
arrange(region, site_name) # Optional: sort by region and site_name
# Print the data frame
head(sites_by_region)
## region site_name
## 1 Central Arroyo Beach
## 2 Central Brackenwood Ln
## 3 Central Des Moines Marina
## 4 Central Eagle Harbor Dr
## 5 Central Elliot Bay Myrtle Edwards
## 6 Central Elliott Bay, Harbor Island, Pier 17
#write.csv(sites_by_region,"/Users/cmantegna/Documents/WDFWmussels/data/regions.csv" )
library(dplyr)
# Convert all relevant variables to numeric and filter out any NAs
data <- data %>%
mutate(
p450 = as.numeric(p450),
SOD = as.numeric(SOD),
condition_factor = as.numeric(condition_factor),
avg_thickness = as.numeric(avg_thickness),
site_number = as.numeric(site_number),
region = case_when(
latitude >= 47.78184 & latitude <= 48.8208 ~ "North",
latitude >= 47.38566 & latitude <= 47.7296079 ~ "Central",
latitude >= 47.052362 & latitude <= 47.3539722 ~ "South",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(region)) %>%
drop_na(p450, SOD, condition_factor, avg_thickness, site_number) # Drops rows with NA in any of these columns
# Define variables for correlation
variables <- c("site_number", "p450", "SOD", "condition_factor", "avg_thickness")
# Calculate Spearman's rank correlation matrix and p-values
get_correlation_and_pvalues <- function(df, region) {
# Calculate correlation matrix
corr_matrix <- cor(df[variables], method = "spearman")
# Prepare to store p-values
p_values <- matrix(nrow = length(variables), ncol = length(variables), dimnames = list(variables, variables))
# Calculate p-values
for (i in seq_along(variables)) {
for (j in seq_along(variables)) {
if (i != j) {
test_result <- cor.test(df[[variables[i]]], df[[variables[j]]], method = "spearman")
p_values[i, j] <- test_result$p.value
} else {
p_values[i, j] <- NA # Diagonal elements are not applicable
}
}
}
list(correlation = corr_matrix, p_values = p_values, region = region)
}
# Apply the function to each region
north_results <- get_correlation_and_pvalues(data %>% filter(region == "North"), "North")
central_results <- get_correlation_and_pvalues(data %>% filter(region == "Central"), "Central")
south_results <- get_correlation_and_pvalues(data %>% filter(region == "South"), "South")
head(north_results)
## $correlation
## site_number p450 SOD condition_factor
## site_number 1.00000000 0.08244169 -0.18383185 -0.32105058
## p450 0.08244169 1.00000000 -0.26279653 0.07646417
## SOD -0.18383185 -0.26279653 1.00000000 -0.04809044
## condition_factor -0.32105058 0.07646417 -0.04809044 1.00000000
## avg_thickness 0.19111321 0.36623867 -0.40940032 0.17280803
## avg_thickness
## site_number 0.1911132
## p450 0.3662387
## SOD -0.4094003
## condition_factor 0.1728080
## avg_thickness 1.0000000
##
## $p_values
## site_number p450 SOD condition_factor
## site_number NA 3.853384e-01 5.128632e-02 0.0005260579
## p450 0.3853384288 NA 4.921766e-03 0.4208386358
## SOD 0.0512863219 4.921766e-03 NA 0.6129849311
## condition_factor 0.0005260579 4.208386e-01 6.129849e-01 NA
## avg_thickness 0.0425905506 6.623795e-05 6.713738e-06 0.0671982559
## avg_thickness
## site_number 4.259055e-02
## p450 6.623795e-05
## SOD 6.713738e-06
## condition_factor 6.719826e-02
## avg_thickness NA
##
## $region
## [1] "North"
#write.csv(north_results,"/Users/cmantegna/Documents/WDFWmussels/output/tables/north_spearman.csv" )
head(central_results)
## $correlation
## site_number p450 SOD condition_factor
## site_number 1.000000000 -0.07224937 -0.07368597 -0.02921879
## p450 -0.072249365 1.00000000 -0.21051421 -0.04389451
## SOD -0.073685971 -0.21051421 1.00000000 -0.13044439
## condition_factor -0.029218787 -0.04389451 -0.13044439 1.00000000
## avg_thickness -0.005281988 0.08280128 0.15217392 0.14770352
## avg_thickness
## site_number -0.005281988
## p450 0.082801278
## SOD 0.152173917
## condition_factor 0.147703525
## avg_thickness 1.000000000
##
## $p_values
## site_number p450 SOD condition_factor
## site_number NA 0.46390525 0.45504274 0.7673197
## p450 0.4639053 NA 0.03112088 0.6565979
## SOD 0.4550427 0.03112088 NA 0.1847299
## condition_factor 0.7673197 0.65659789 0.18472988 NA
## avg_thickness 0.9573521 0.40105002 0.12121525 0.1326706
## avg_thickness
## site_number 0.9573521
## p450 0.4010500
## SOD 0.1212153
## condition_factor 0.1326706
## avg_thickness NA
##
## $region
## [1] "Central"
#write.csv(central_results,"/Users/cmantegna/Documents/WDFWmussels/output/tables/central_spearman.csv" )
head(south_results)
## $correlation
## site_number p450 SOD condition_factor
## site_number 1.00000000 0.13309113 -0.02303667 0.10318273
## p450 0.13309113 1.00000000 -0.23856887 0.06372156
## SOD -0.02303667 -0.23856887 1.00000000 -0.08617974
## condition_factor 0.10318273 0.06372156 -0.08617974 1.00000000
## avg_thickness -0.01938553 0.03066833 0.09951509 0.11243735
## avg_thickness
## site_number -0.01938553
## p450 0.03066833
## SOD 0.09951509
## condition_factor 0.11243735
## avg_thickness 1.00000000
##
## $p_values
## site_number p450 SOD condition_factor
## site_number NA 0.20096793 0.82556768 0.3223437
## p450 0.2009679 NA 0.02057981 0.5417573
## SOD 0.8255677 0.02057981 NA 0.4088596
## condition_factor 0.3223437 0.54175726 0.40885963 NA
## avg_thickness 0.8528743 0.76919264 0.33993349 0.2806053
## avg_thickness
## site_number 0.8528743
## p450 0.7691926
## SOD 0.3399335
## condition_factor 0.2806053
## avg_thickness NA
##
## $region
## [1] "South"
#write.csv(south_results,"/Users/cmantegna/Documents/WDFWmussels/output/tables/south_spearman.csv" )
# p450
pregion<- ggplot(data, aes(x = region, y = p450)) +
geom_boxplot() +
labs(title = "p450 Levels by Region", x = "Region", y = "p450 Values")
print(pregion)
#ggsave(plot=pregion, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/p450region.png", width=15, height=8)
# SOD
sregion<- ggplot(data, aes(x = region, y = SOD)) +
geom_boxplot() +
labs(title = "SOD Levels by Region", x = "Region", y = "SOD Values")
print(sregion)
#ggsave(plot=sregion, filename="/Users/cmantegna/Documents/WDFWmussels/output/figures/SODregion.png", width=15, height=8)