#setwd("E:/rpauloo/Documents/GitHub/Contaminant-Mapping")
setwd("/Users/richpauloo/Documents/GitHub/Contaminant Mapping/Contaminant-Mapping")
required.packages <- c("sp", "XML", "rgdal", "rgeos", "raster", "ggplot2",
"dismo", "leaflet", "RColorBrewer", "classInt")
suppressMessages(lapply(required.packages, require, character.only = TRUE))
dat = read.csv(file = "TDS_Features_Tulare_Clip_TableToExcel.csv", stringsAsFactors = FALSE) # 54,171 total observations
dat = dat[, c("Well_ID", "IAZ", "Database", "Samp_Date",
"Year", "Time_perio", "Result", "Latitude",
"Longitude", "Depth_ft", "Well_Type", "DepthClass")]
library(plyr)
str(dat)
# create logical arrays to define sampling time intervals
time1 = ifelse(dat$Year > 1950 & dat$Year <= 1960, 1, NA)
time2 = ifelse(dat$Year > 1960 & dat$Year <= 1970, 1, NA)
time3 = ifelse(dat$Year > 1970 & dat$Year <= 1980, 1, NA)
time4 = ifelse(dat$Year > 1980 & dat$Year <= 1990, 1, NA)
time5 = ifelse(dat$Year > 1990 & dat$Year <= 2000, 1, NA)
time6 = ifelse(dat$Year > 2000 & dat$Year <= 2014, 1, NA)
# substitute logical values with numerics for plyr
time1 = as.numeric(time1)
time2 = as.numeric(sub(1, 2, time2))
time3 = as.numeric(sub(1, 3, time3))
time4 = as.numeric(sub(1, 4, time4))
time5 = as.numeric(sub(1, 5, time5))
time6 = as.numeric(sub(1, 6, time6))
tframe = cbind(time1, time2, time3, time4, time5, time6)
tframe = rowSums(tframe[,1:6], na.rm = TRUE)
# ensure all values are in tframe
unique(tframe)
table(tframe) # 0s correspond to pre 1950 data
# add tframe to dat
dat$tframe = tframe
# subset for Shallow and Deep depth class
unique(dat$DepthClass) # need to get rid of unknown values
d.dat = subset(dat, DepthClass == "Deep" | DepthClass == "Shallow") # subset for deep or shallow depth class
unique(d.dat$DepthClass) # shows that Deep and Shallow are the only depth classes now
# subset for time and depth
t.d.dat = subset(d.dat, tframe != 0) # 30,916 observations after cleaning
unique(t.d.dat$tframe) # shows that tframe 0, pr pre 1950 records have been removed
# ddply by well_ID, year, depthclass, and tframe and take Result means
columns = c("Well_ID", "DepthClass", "tframe", "Latitude", "Longitude", "Database")
plydat = ddply(t.d.dat, columns, summarize, Result = round(mean(Result), digits=2)) # 5,552 observations after reduction
# Data per Decade and Database before Cleaning
ggplot(dat, aes(as.factor(tframe))) +
geom_bar(aes(fill = Database)) +
scale_x_discrete(name = "Decade",
labels = c("Pre-1950","1950-1960","1960-1970","1970-1980", "1980-1990", "1990-2000", "2000-2014")) +
ggtitle("Data per Decade and Database before Cleaning") +
scale_fill_brewer(palette = "Spectral") +
theme_bw()
# Data per Decade and Database after Cleaning
ggplot(t.d.dat, aes(as.factor(tframe))) +
geom_bar(aes(fill = Database)) +
scale_x_discrete(name = "Decade",
labels = c("1950-1960","1960-1970","1970-1980", "1980-1990", "1990-2000", "2000-2014")) +
ggtitle("Data per Decade and Database after Cleaning") +
scale_fill_brewer(palette = "Spectral") +
theme_bw()
# Data per Decade and Database after Cleaning and Averaging
ggplot(plydat, aes(as.factor(tframe))) +
geom_bar(aes(fill = Database)) +
scale_x_discrete(name = "Decade",
labels = c("1950-1960","1960-1970","1970-1980", "1980-1990", "1990-2000", "2000-2014")) +
ggtitle("Data per Decade and Database after Cleaning and Averaging") +
scale_fill_brewer(palette = "Spectral") +
theme_bw()
# plot Metadata
meta.data = data.frame(Stage = c("All Observations", "Cleaned for Time & Depth", "Averaged"),
Count = c(nrow(dat), nrow(t.d.dat), nrow(plydat)))
ggplot(meta.data, aes(Stage, Count)) +
geom_bar(stat = "identity") +
ggtitle("Data Count as Cleaning Occured") +
scale_x_discrete(limits = c("All Observations", "Cleaned for Time & Depth", "Averaged")) +
theme_bw()
ggplot(plydat, aes(Result)) +
geom_histogram(bins = 50) +
xlab("TDS (mg/L)") +
ggtitle("Histogram of TDS observations in the Tulare Basin (1902-2014)") +
theme_bw()
plydat$logResult = log10(plydat$Result)
ggplot(plydat, aes(logResult)) +
geom_histogram(bins = 50) +
xlab("log TDS (mg/L)") +
ggtitle("Log Scale Histogram of TDS observations in the Tulare Basin (1902-2014)") +
theme_bw()
well_coords = cbind(plydat$Longitude, plydat$Latitude)
pts = SpatialPoints(well_coords)
# deinfe color palette to be used. blue values correspond to fresh water, red to salty water
co = c("blue","steelblue2","steelblue1","seashell1","orangered1","red3")
latCenter = mean(subset(plydat, plydat$DepthClass == "Deep" & plydat$tframe == 6)$Latitude) - .23 # .23 is not arbitrary. it centers the data
lonCenter = mean(subset(plydat, plydat$DepthClass == "Deep" & plydat$tframe == 6)$Longitude)
# filter plydat for only observations in time frame t and depth class d
map.TDS = function(t, d){
# create time and depth class (t.d) subset for the t and d arguments
t.d = subset(plydat, DepthClass == d &
tframe == t) # remove lat and long from df, and filter for desired results
# create logical vector for all t and d indexes to extract coordinates of these observations
t.d.pts = ifelse(plydat$DepthClass == d &
plydat$tframe == t, TRUE, FALSE)
# use logical vector to subset well_coords, and create spatial points data frame for time frame t and depth d
ptsdf = SpatialPointsDataFrame(pts[t.d.pts , ], data = t.d)
# define palette based on time frame t and depth class d observations
# set bins with a vector of breaks, so they are fine from 0 - 1,000 and coarse from 1,000 - 50,000
pal = colorBin(palette = co,
domain = t.d$Result, bins = c(0,200,400,600,800,1000,
5000,10000,50000))
# overwite t argument inside the function for the legend to display time frame
if(t == 6){t = "(2000-2014)"}
if(t == 5){t = "(1990-2000)"}
if(t == 4){t = "(1980-1990)"}
# generate map assocaited with time frame t and depth d
map = leaflet(data = ptsdf)
map = addTiles(map)
map %>%
addCircleMarkers(map,
lng = well_coords[t.d.pts,1], # longitude
lat = well_coords[t.d.pts,2], # latitude
radius = 4, # fixed radius size
color = ~pal(Result),
stroke = FALSE,
fillOpacity = 0.8,
popup = paste(ptsdf$Result, " mg/L TDS", "<br>",
"Database: ", ptsdf$Database, "<br>",
"Well ID: ", ptsdf$Well_ID, "<br>",
"Latitude: ", ptsdf$Latitude, "<br>",
"Longitude: ", ptsdf$Longitude)
) %>%
addLegend("topright", pal = pal, # use custom palette
values = ~Result,
title = paste(d, t, sep = " "), # legend displays time frame and depth class as title
labFormat = labelFormat(suffix = " mg/L"),
opacity = 1
) %>%
addProviderTiles(providers$Esri.WorldTerrain # override default map with ESRI world terrain map
) %>%
setView(lng = lonCenter, lat = latCenter, zoom = 8) # iteratively changed zoom until it centered on the data
}
Wells were classified into three depth classes (Shallow, Deep, and Unknown) based on information provided by the original source, as shown in Table 3-2. Most wells in the database did not contain quantitative information on well depth or screened interval; however, other information such as well type was used when available to infer the depth from which a well was sampled. Only the USGS database contained quantitative information regarding well depth. For wells lacking a specified value of well depth, the well type was used to infer the depth (see Table 3-2 for examples). Wells in the DWR, GeotrackerGAMA, and RWQCB WDR Dairy Data databases sometimes contain a description of the well type which enabled categorization of the well into a depth class. All wells from the DPH database were assumed to be drinking water supply wells. Irrigation/agricultural38, industrial, and municipal supply wells were classified as “Deep” whereas domestic wells and monitoring wells were classified as “Shallow”. All DPH wells were therefore classified as “Deep” as these were all assumed to be drinking water supply wells. All other well types were classified as “Unknown”. A large number of USGS wells provided numerical values for well depth; therefore, these were used when provided. USGS wells were assigned a depth class based on the 20-year travel depth for a particular CVHM cell that it was located within. Wells with a depth less than the 20-year travel depth were classified as “Shallow,” and those below the 20-year travel depth were classified as “Deep”. Wells without depth information or a well type were classified as “Unknown”.
Larry Walker Associates. 2013. “Initial Conceptual Model (ICM) Technical Services Tasks 7 and 8 – Salt and Nitrate Analysis for the Central Valley Floor and a Focused Analysis of Modesto and Kings Subregions Final Report.”