Objective: We need to better understand the sediment
contaminant loads at the Kennerdell, PA Superfund Site.
Load Libraries
library(dplyr)
library(stringr)
library(ggplot2)
Load data
Sediment <- read.csv("Sediment_Data_for_Test.csv")
Rename a few columns
colnames(Sediment)
## [1] "Sample.Name" "Location.Name"
## [3] "x_coord" "y_coord"
## [5] "coord_type_code" "Matrix"
## [7] "Sample.Date" "Start.Depth"
## [9] "End.Depth" "Depth.Interval"
## [11] "Interval.Name" "Depth.Unit"
## [13] "Sample.Type" "Exposure.Area"
## [15] "Drainage.Sequence.Order" "CAS.RN"
## [17] "Chemical.Name" "Result"
## [19] "Result.Unit" "Result..ND.0."
## [21] "Result..ND.QL." "pH"
## [23] "Analytical.Method" "Method.Name"
## [25] "Method.Group" "Qualifier"
## [27] "Detect" "Detect.Number"
## [29] "Quantitation.Limit..QL." "QL.Unit"
colnames(Sediment)[1] <- "SampleName"
colnames(Sediment)[21] <- "ResultNDQL"
Filter dataset for only the desirable components: Lead and Nickel,
Depth <= 20 cm, samples that are not field duplicates
Sediment_filter <- Sediment %>%
filter(str_detect(SampleName, 'N')) %>%
filter(Start.Depth <= 0.20 & Chemical.Name %in% c("Lead", "Nickel"))
Create a boxplot of Lead and Nickel by Exposure Area
ggplot(Sediment_filter, aes(x = Chemical.Name, y = ResultNDQL)) +
geom_point(aes(color = factor(Detect)), size = 3, alpha = 0.5) +
geom_boxplot(alpha=0) +
facet_wrap(vars(Exposure.Area)) +
theme_bw() +
labs(y = "Concentration mg/kg",
x = "Chemical Name",
title = "Kennerdell Superfund Site",
color ='Detect')
Export figure
ggsave("Lead_Nickel_by_ExposureArea.jpeg", device = "jpeg", dpi=600)