Mystic River TMDL Database Upload 2015

1 Loading EPA lab data

Beginning in April, each month has two (rarely three) events. For each event there is either one lab report that combines all results, or two separate reports one for phos/Ophos data, the other for Chlorophyll.

First, the raw data files are read from Dropbox, and merged into one data frame.

setwd("\\\\psf/Home/Dropbox/")
#setwd("c:/Users/Katrina/Dropbox/")

# Assemble data into one dataframe
options(stringsAsFactors=FALSE)

Apr1 <- read.csv("Phosphorus/raw_grab/20150414/lab/PN15040023CHLA.csv", strip.white=TRUE)
Apr2 <- read.csv("Phosphorus/raw_grab/20150428/lab/PN15040050.csv", strip.white=TRUE)
May1 <- read.csv("Phosphorus/raw_grab/20150512/lab/PN15050020CHLA.csv", strip.white=TRUE)
May2 <- read.csv("Phosphorus/raw_grab/20150526/lab/PN15050041CHLA.csv", strip.white=TRUE)
Jun1 <- read.csv("Phosphorus/raw_grab/20150609/lab/PN15060012CHLA.csv", strip.white=TRUE)
Jun2 <- read.csv("Phosphorus/raw_grab/20150623/lab/15060040_CHLA.csv", strip.white=TRUE)
Jul1_phos <- read.csv("Phosphorus/raw_grab/20150707/lab/15070011.csv", strip.white=TRUE)
Jul1_chl <- read.csv("Phosphorus/raw_grab/20150707/lab/PN15070012CHLA.csv", strip.white=TRUE)
Jul2_phos <- read.csv("Phosphorus/raw_grab/20150721/lab/15070026.csv", strip.white=TRUE)
Jul2_chl <- read.csv("Phosphorus/raw_grab/20150721/lab/PN15070027_MRL_CHLA.csv", strip.white=TRUE)
Aug1_phos <- read.csv("Phosphorus/raw_grab/20150804/lab/15080002.csv", strip.white=TRUE)
Aug1_chl <- read.csv("Phosphorus/raw_grab/20150804/lab/PN15080004_CHLA.csv", strip.white=TRUE)
Aug2_phos <- read.csv("Phosphorus/raw_grab/20150818/lab/15080034.csv", strip.white=TRUE)
Aug2_chl <- read.csv("Phosphorus/raw_grab/20150818/lab/PN15080035_CHLA.csv", strip.white=TRUE)
Sep1_phos <- read.csv("Phosphorus/raw_grab/20150901/lab/15090004.csv", strip.white=TRUE)
Sep1_chl <- read.csv("Phosphorus/raw_grab/20150901/lab/PN15090005CHLA.csv", strip.white=TRUE)
Sep2_phos <- read.csv("Phosphorus/raw_grab/20150915/lab/15090019.csv", strip.white=TRUE)
Sep2_chl <- read.csv("Phosphorus/raw_grab/20150915/lab/PN15090020.csv", strip.white=TRUE)
Sep3_phos <- read.csv("Phosphorus/raw_grab/20150929/lab/15090037.csv", strip.white=TRUE)
Sep3_chl <- read.csv("Phosphorus/raw_grab/20150929/lab/PN15090038_CHLA.csv", strip.white=TRUE)
Oct1_phos <- read.csv("Phosphorus/raw_grab/20151013/lab/15100018.csv", strip.white=TRUE)
Oct1_chl <- read.csv("Phosphorus/raw_grab/20151013/lab/PN15100019CHLA.csv", strip.white=TRUE)
Oct2 <- read.csv("Phosphorus/raw_grab/20151027/lab/15100040.csv", strip.white=TRUE)
Nov1 <- read.csv("Phosphorus/raw_grab/20151110/lab/15110020.csv", strip.white=TRUE)

phos <- rbind(Apr1, Apr2, May1, May2, Jun1, Jun2, Jul1_phos, Jul1_chl, Jul2_phos, Jul2_chl, Aug1_phos,
              Aug1_chl, Aug2_phos, Aug2_chl, Sep1_phos, Sep1_chl, Sep2_phos, Sep2_chl, Sep3_phos,
              Sep3_chl, Oct1_phos, Oct1_chl, Oct2, Nov1)

write.csv(phos, file = "Phosphorus/raw_grab/Phos_lab_Apr_Nov_2015.csv")

2 Cleaning up Sample names and Column headers in EPA lab data

Some sample names in the raw results need correcting. Sites were mis-identified in early surveys (ex. MAR006 should be MAR 003), and there is inconsisency in the labels for duplicates. Some of the code here corrects those errors and others.

Also column names are changed to agree with Field data sheet and Conversion Templates.

phos <- phos %>%
  select(SAMPLE_NUMBER, COLLECTION_DATE, ANALYSIS_CODE, RESULT = COMBINATION_RESULT, ANALYSIS_UNIT)
# phos <- rbind_all(list(phos, chl))

#ALIASES:   
phos$ANALYSIS_CODE <- as.character(phos$ANALYSIS_CODE)
phos$ANALYSIS_CODE[phos$ANALYSIS_CODE == "CLPHYL"] <- "CHLA"
phos$ANALYSIS_CODE <- as.factor(phos$ANALYSIS_CODE)

#CHANGE MAR006 to MAR003; early samples had wrong name; correct some other sample names
phos$SAMPLE_NUMBER <- as.character(phos$SAMPLE_NUMBER)
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER == "MAR006"] <- "MAR003"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER == "MYR071-B"] <- "MYR071"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER == "MYR036"] <- "MAR036"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER == "ULPCTR"] <- "UPLCTR"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER == "ULPUPL"] <- "UPLUPL"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER == "MAR071"] <- "MYR071"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER %in% c("CHL_DUP","CHL-DUP", "Chl-dup", "CHL-dup", "CHL_Dup", "CHL-Dup")] <- "Chl_dup"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER %in% c("TP-DUP", "TP-Dup", "TP-dup")] <- "TP_dup"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER %in% c("OPHOS-DUP 1", "OPHOS-Dup 1", "OPHOS-dup 1","OPHOS-DUP1", "OPhos_dup1")] <- "Ophos_dup1"
phos$SAMPLE_NUMBER[phos$SAMPLE_NUMBER %in% c("OPHOS-DUP 2", "OPHOS-Dup 2", "OPHOS-dup 2","OPHOS-DUP2", "OPhos_dup2")] <- "Ophos_dup2"
phos$SAMPLE_NUMBER <- as.factor(phos$SAMPLE_NUMBER)

phos$COLLECTION_DATE <- as.Date(phos$COLLECTION_DATE, "%m/%d/%y")

3 Loading field data

setwd("\\\\psf/Home/Dropbox/")
#setwd("c:/Users/Katrina/Dropbox/")

Apr1F <- read.csv("Phosphorus/raw_grab/20150414/field/Field_data_20150414.csv", strip.white=TRUE)
Apr2F <- read.csv("Phosphorus/raw_grab/20150428/field/Field_data_20150428.csv", strip.white=TRUE)
May1F <- read.csv("Phosphorus/raw_grab/20150512/field/Field_data_20150512.csv", strip.white=TRUE)
May2F <- read.csv("Phosphorus/raw_grab/20150526/field/Field_data_20150526.csv", strip.white=TRUE)
Jun1F <- read.csv("Phosphorus/raw_grab/20150609/field/Field_data_20150609.csv", strip.white=TRUE)
Jun2F <- read.csv("Phosphorus/raw_grab/20150623/field/Field_data_20150623.csv", strip.white=TRUE)
Jul1F <- read.csv("Phosphorus/raw_grab/20150707/field/Field_data_20150707.csv", strip.white=TRUE)
Jul2F <- read.csv("Phosphorus/raw_grab/20150721/field/Field_data_20150721.csv", strip.white=TRUE)
Aug1F <- read.csv("Phosphorus/raw_grab/20150804/field/Field_data_20150804.csv", strip.white=TRUE)
Aug2F <- read.csv("Phosphorus/raw_grab/20150818/field/Field_data_20150818.csv", strip.white=TRUE)
Sep1F <- read.csv("Phosphorus/raw_grab/20150901/field/Field_data_20150901.csv", strip.white=TRUE)
Sep2F <- read.csv("Phosphorus/raw_grab/20150915/field/Field_data_20150915.csv", strip.white=TRUE)
Sep3F <- read.csv("Phosphorus/raw_grab/20150929/field/Field_data_20150929.csv", strip.white=TRUE)
Oct1F <- read.csv("Phosphorus/raw_grab/20151013/field/Field_data_20151013.csv", strip.white=TRUE)
Oct2F <- read.csv("Phosphorus/raw_grab/20151027/field/Field_data_20151027.csv", strip.white=TRUE)
Nov1F <- read.csv("Phosphorus/raw_grab/20151110/field/Field_data_20151110.csv", strip.white=TRUE)

field <- rbind(Apr1F, Apr2F, May1F, May2F, Jun1F, Jun2F, Jul1F, Jul2F, Aug1F,
              Aug2F, Sep1F, Sep2F, Sep3F,Oct1F, Oct2F, Nov1F)

field$SampleID[field$SampleID %in% c("CHL_dup")] <- "Chl_dup"
field$SampleID[field$SampleID == "MAR006"] <- "MAR003"
field$SampleID[field$SampleID == "MAR006"] <- "MAR003"
field$SampleID[field$SampleID == "MYR071-B"] <- "MYR071"
field$SampleID[field$SampleID == "MYR071_B"] <- "MYR071"
field$SampleID[field$SampleID == "MYR036"] <- "MAR036"
field$SampleID[field$SampleID == "ULPCTR"] <- "UPLCTR"
field$SampleID[field$SampleID == "ULPUPL"] <- "UPLUPL"
field$SampleID[field$SampleID == "MAR071"] <- "MYR071"
field$SampleID[field$SampleID == "Orthophos_dup2"] <- "Ophos_dup2"

field$Date <- as.Date(field$Date, "%m/%d/%Y")

write.csv(field, file = "Phosphorus/raw_grab/Phos_field_Apr_Nov_2015.csv")

4 Combining lab and field data

Transform lab data to wide format, and then join to field data.

setwd("\\\\psf/Home/Dropbox/")
#setwd("c:/Users/Katrina/Dropbox/")

phos_spread <-  phos %>%
  filter(ANALYSIS_CODE %in% c("TP", "OPHOS", "CHLA")) %>%
  mutate(Date = COLLECTION_DATE, SampleID = SAMPLE_NUMBER)%>%
  spread(ANALYSIS_CODE, RESULT, fill = NA)

phos_spread$Date <- as.Date(phos_spread$Date, "%m/%d/%y")

merged <-  right_join(phos_spread, field)

## Joining by: c("Date", "SampleID")

## Warning in right_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector

write.csv(merged, file = "Phosphorus/raw_grab/Merged_Apr_Nov_2015.csv")

5 Exporting assembled data

The assembled data set is written to a .csv file, which can be downloaded here:
It is ready to be copied into the General conversion template, where some additional work is required.

6 Additional work done in the Conversion template

6.1 Consolidating Field Dup samples and final station name corrections

Field duplicate sample (TP_dup, CHL_dup, Turbidity_dup, and OPhos_dup) need to be cleaned up and consolidated, such that all TP_dups have turbidity values, and any dups at the same site on a given day get recorded in one row. In addition a few StationIDs did not agree with LocationIDs (two different fields in field sheets; one corrected in code, the other not). These were changed manually. See the Main sheet of the Processed file for the details of changes made.

6.2 Adding flags and comments

A relatively small number of samples have flags (<25) and a handful have additional comments. These are also most easily added manually in the conversion template. The comments and flags can be found in the table of all results exported above: Phosphorus/raw_grab/Phos_lab_Apr_Nov_2015.csv.

6.3 Correcting units and change NDs

First, values for parameters reported in ug/L (TP, PO4) were divided by 1000 to get value in mg/L.

Second, ND values were changed to qualified values (ex. “<.005”), according to parameter, using corrected units.

6.4 Adding necessary locations, flags, and methods to the database

Flags introduced in step 6.2 (J, B) were added to the database Flags table with their complete EPA descriptions. Methods for EPA lab were added. And several locations were added UPLCTR, WEPCTR, UPLUPL, WIPCTR, SPPCTR.

6.3 Manually changing method for temperature

Within a given survey, some temperature is taken with manual thermometer and others with sonde. Assigned a single method. Change others manually in EXPORT_NUMERIC tab.

7 Lab Results: Graphs and Tables

The following graphs and tables are generated from the lab results (phos) above.

7.1 TP and CHLA

#remove sample numbers not needed for graphs
phosGraph <- phos %>%
  filter(ANALYSIS_CODE =="TP" |ANALYSIS_CODE == "CHLA" ) %>%
  filter(!(SAMPLE_NUMBER %in% c("CHL_DUP","CHL_LAB_DUP","TP-DUP", "LABDUP", "CHL-DUP", "TP-blank-new",
                               "TP-blank-old", "TP-Dup",  "HOB002-2", "ABR-C0MP-BLANK",
                               "ABR-COMP-1", "ABR-COMP-2", "ABR-COMP-3", "Chl-dup", "CHL-dup", "CHL_Dup", "CHL-Dup", "COMP-LAB-DUP", "ABR006-COMP", "TP-blank", "CHL-LAB-DUP", "COMP-LABDUP-2", "CHL-L-DUP",
                               "CPMP-LABDUP2", "CPMP-LABDUP3", "CPMP-LABDUP1", "COMP-LABDUP-1")) & RESULT != "ND")

phosGraph$RESULT <- as.numeric(as.character(phosGraph$RESULT))
phosGraph$COLLECTION_DATE <- as.Date(phosGraph$COLLECTION_DATE, "%m/%d/%y")
  
g <- ggplot(phosGraph, aes(COLLECTION_DATE, RESULT, color = ANALYSIS_CODE, 
                           group=interaction(ANALYSIS_CODE, SAMPLE_NUMBER), 
                           fill= ANALYSIS_CODE)) 

g + geom_line(size = 1) + geom_point() + facet_wrap(~ SAMPLE_NUMBER, ncol = 3) + 
  scale_color_manual(values = c("darkolivegreen4",  "violetred3")) +
  theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(c(0,100))

## geom_path: Each group consist of only one observation. Do you need to adjust the group aesthetic?

## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

TP results:

#For tables, organize results to display by site and date.
TP_to_date <- tbl_df(phos) %>%
  filter(ANALYSIS_CODE == "TP") 
OPHOS_to_date <- tbl_df(phos) %>%
  filter(ANALYSIS_CODE == "OPHOS")
CHL_to_date <- tbl_df(phos) %>%
  filter(ANALYSIS_CODE == "CHLA") 

PHOS_to_date <- tbl_df(phos) %>%
  filter(ANALYSIS_CODE %in% c("TP", "OPHOS", "CHLA"))

TP_to_date <- dcast(TP_to_date, SAMPLE_NUMBER ~ COLLECTION_DATE, value.var = "RESULT")
OPHOS_to_date <- dcast(OPHOS_to_date, SAMPLE_NUMBER ~ COLLECTION_DATE, value.var = "RESULT")
CHL_to_date <- dcast(CHL_to_date, SAMPLE_NUMBER ~ COLLECTION_DATE, value.var = "RESULT")

#write.csv(TP_to_date, file = "Phosphorus/raw_grab/TP_to_date.csv")

tab <- xtable(TP_to_date, align = rep("c", dim(TP_to_date)[2] + 1))
print(tab, type = "html")

	SAMPLE_NUMBER	2015-04-14	2015-04-28	2015-05-12	2015-05-26	2015-06-09	2015-06-23	2015-07-07	2015-07-21	2015-08-04	2015-08-18	2015-09-01	2015-09-15	2015-09-29	2015-10-13	2015-10-22	2015-10-27	2015-10-29	2015-10-30	2015-11-05	2015-11-10
1	ABR-C0MP-BLANK															15.2
2	ABR-COMP-1																	85
3	ABR-COMP-2																		41
4	ABR-COMP-3																			20
5	ABR006	22	27	21	29	28	44	33	26	9.5	30	19	22	22	20		13.1				17
6	ABR006-COMP																14.9
7	ALB006	69	50	37	41	51	66	58.7	63	50.6	63	53	39	55	53		35.2				36
8	COMP-LABDUP-1																	83
9	COMP-LABDUP-2																		41
10	COMP-SPLIT-BLANK																ND		ND
11	CPMP-LABDUP1																15.5
12	CPMP-LABDUP2																13.6
13	CPMP-LABDUP3																14.1
14	HOB002	12	16	8.1	18	11.0	22	16	34	554	44	36	24	37	350		33.3				25
15	HOB002-2														110
16	MAR003	25	28	14	28	37	34	26	46	37.2	29	28	54	40	36		15.7				43
17	MAR036		29	25	55	55	54	36	40	23.3	24	20	33	42	63		23.6				30
18	MIB0045	34	41	35	25	36	41	26.2	17	15.7	14	12	52	17	160		23.5				25
19	MYR071		14	14	11.0	22	20	13	15	13.9	14	11	11	10	13		12.8				13
20	MYR33	26	24	17	28	27	40	50	62	42.1	39	38	62	40	27		16.8				40
21	MYR43		24	24	24	28	33	26	54	21.5	32	36	41	35	35		17.5				22
22	SPPCTR	23	27	21	20	24	18	16.8	12	ND	13	15	14	17	23		18.4				23
23	TP-blank													ND
24	TP-blank-new					ND
25	TP-blank-old					ND
26	TP_dup		27	7.3	8.4	35	21	57	57	25.4	46	21	23	8.7	160		18.6				37
27	UPLCTR	15	13	6.2	8.2	17	14	11	9.1	ND	9.6	7.2	8.7	9.2	16		10				14
28	UPLUPL	26									26	34	28	24	25		18.3				25
29	WEPCTR	15	17.0	6.9	7.4	21	16	17	21	31.7	27.3	31	19	28	30		21.8				29.3

Orthophosphate results:

tab2 <- xtable(OPHOS_to_date, align = rep("c", dim(OPHOS_to_date)[2] + 1))
print(tab2, type = "html")

	SAMPLE_NUMBER	2015-04-28	2015-05-26	2015-06-23	2015-07-21	2015-08-18	2015-09-15	2015-10-13
1	ABR006	ND	ND	ND	ND	ND	ND	ND
2	ALB006	ND	7.8	8.1	15.2	18	ND	5
3	HOB002	ND	5.15	ND	ND	ND	ND
4	HOB002-2							31
5	MAR003	ND	ND	ND	ND	ND	ND	ND
6	MAR036	ND	ND	ND	ND	ND	ND	ND
7	MIB0045	5.2	ND	6.1	ND	ND	ND	66
8	MYR071	ND	ND	ND	ND	ND	ND	ND
9	MYR33	ND	ND	ND	ND	ND	ND	ND
10	MYR43		ND	ND	ND	ND	ND	ND
11	Ophos_dup1	5.0	ND	ND	ND	ND	ND	ND
12	Ophos_dup2	ND	ND	ND	ND	7.83	ND	ND
13	SPPCTR	ND	ND	ND	ND	ND	ND	ND
14	UPLCTR	ND	ND	ND	ND	ND	ND	ND
15	UPLUPL					ND	ND	ND
16	WEPCTR	ND	ND	ND	ND	ND	ND	ND

Chlorophyll results:

tab3 <- xtable(CHL_to_date, align = rep("c", dim(CHL_to_date)[2] + 1))
print(tab3, type = "html")

	SAMPLE_NUMBER	2015-04-14	2015-04-28	2015-05-12	2015-05-26	2015-06-09	2015-06-23	2015-07-07	2015-07-21	2015-08-04	2015-08-18	2015-09-01	2015-09-15	2015-09-29	2015-10-13
1	CHL-L-DUP												16.59
2	CHL-LAB-DUP			5.27
3	Chl_dup		16.07	11.20	9.32	12.34	20.02	17.17	15.37	5.34	3.04	15.97	19.85	21.4	24.74
4	HOPCTR				8.34			6.56
5	LABDUP	5.06
6	LOLCTR					7.65			8.57
7	MAR003	2.47	21.69	34.04	55.91	21.22	26.45	15.1	18.9	12.61	12.38	17.06	47.48	21.68	20.53
8	MYR33	4.09	22.17	36.94	39.46	20.62	23.65	21.6	32.99	11.52	17.98	24.81	52.41	18.15	14.07
9	MYR43	3.68	13.36	28.09	30.73	11.18	12.07	16.73	30.97	19.68	20.59	18.41	13.45	24.01	15.84
10	SPPCTR	14.12	35.63	9.42	10.17	8.71	11.94	14.1	7.71	5.51	3.28	19.77	7.42	5.9	17.82
11	UPLCTR	2.13	9.42	6.57	16.31	12.47	8.34	8.52	7.56	5.86	8.76	12.38	7.61	6.56	13.84
12	UPLUPL	1.76					4.55			32.96	16.48	28.99	16.38	17.52	15.68
13	WEPCTR	5.06	14.13	6.60	5.31	5.42	8.88	13.81	15.02	16.71	11.55	12.56		26.11	25.46
14	WIPCTR		12.32