## Set wd, load packages
setwd("/Users/richpauloo/Documents/GitHub/Regional_GW_Quality_Stats/geomorph_merged_east_west")

packages <- list('data.table','raster','dplyr','geosphere','maptools', 'rgdal','maptools','spdplyr', 'polyclip', 'broom', 'sp', 'rgeos', 'ggplot2')
lapply(packages, library, character.only = TRUE)

Bring in data

# set wd ----
setwd('/Users/richpauloo/Documents/GitHub/Regional_GW_Quality_Stats/geomorph_merged_east_west')

# east side shapefile generated in ArcMap ----
east_side_shapefile <- "geomorph_tulare_east.shp"
east <- shapefile(east_side_shapefile)
plot(east)

# west side shapefile generated in ArcMap ----
west_side_shapefile <- "geomorph_tulare_west.shp"
west <- shapefile(west_side_shapefile)
plot(west)

# water quality observations ----
dat <- fread('TDS_Features_Tulare_Clip_TableToExcel.csv')
# get coords of dat points ----
dat_pts <- dat %>% select(Longitude, Latitude)
colnames(dat_pts) <- c('x','y')

# bring in east and west outlines from ArcMap ----
temp <- fread('geomorph_east_Feature_TableToExcel.csv')
east_outline <- temp %>% select(POINT_X, POINT_Y)
colnames(east_outline) <- c('x','y')
# west ----
temp <- fread('geomorph_west_Feature_TableToExcel.csv')
west_outline <- temp %>% select(POINT_X, POINT_Y)
colnames(west_outline) <- c('x','y')
# visualize ----
ggplot() + 
  geom_point(data = dat, mapping = aes(x=Longitude,y=Latitude)) +
  geom_point(data = west_outline, mapping = aes(x,y), color = 'red') + 
  geom_point(data = east_outline, mapping = aes(x,y), color = 'blue') +
  coord_fixed(1.3)

# subset dat by east and west by using east and west outlines ----
require(splancs) 
east_pts <- dat[inout(dat_pts, east_outline), ]
west_pts <- dat[inout(dat_pts, west_outline), ]
# visualize outlines and data
ggplot() +
  geom_point(data = east_outline, mapping = aes(x=x,y=y)) +
  geom_point(data = west_outline, mapping = aes(x=x,y=y)) +
  geom_point(data = east_pts, mapping = aes(x=Longitude,y=Latitude), cex = .05, color = 'blue') +
  geom_point(data = west_pts, mapping = aes(x=Longitude,y=Latitude), cex = .05, color = 'red') +
  coord_fixed(1.3)

# lines from east and west alluvial boundaries ----
east_line <- fread('alluvial_east_line.csv')
west_line <- fread('alluvial_west_line.csv')
# visualize ---
ggplot() + 
  geom_point(data = east_line, mapping = aes(x = POINT_X, y = POINT_Y), color = "red") +
  geom_point(data = east_pts, mapping = aes(x = Longitude, y = Latitude), color = "green", alpha = .3) +
  geom_point(data = west_line, mapping = aes(x = POINT_X, y = POINT_Y), color = "blue") +
  geom_point(data = west_pts, mapping = aes(x = Longitude, y = Latitude), color = "orange", alpha = .3) +
  coord_fixed(1.3) 

Calculate Distance between east points and east line, west points and west line

# Haversine (Great Circle distance) for spatial points ----

# define a line by a sequence of points between two points at the ends of a line
# length can be any number. as length approaches infinity, the distance between points and
# the line appraoches a normal (orthogonal) orientation ----

# rename column names of lines and points for spDists ----
colnames(east_line)[10:11] <- c("x","y")
colnames(west_line)[10:11] <- c("x","y")
colnames(east_pts)[20:21] <-c("y","x")
colnames(west_pts)[20:21] <-c("y","x")

# make lines and pts spatial points objects for spDists ----
coordinates(east_line) <- c("x","y")
coordinates(east_pts) <- c("x","y")
coordinates(west_line) <- c("x","y")
coordinates(west_pts) <- c("x","y")
# ensure the conversion occured ----
#lapply(list(east_line,east_pts,west_line,west_pts), class)


# takes a line defined by a set of points along a line, and a set of points, and
# returns the minimum (orthogonal) Great circle distance between all pts and
# the line.
gc_min_d <- function(line, pts){
  d <- vector(mode = "integer", length = nrow(pts))
  for(i in 1:nrow(pts@coords)){
    d[i] = min(spDistsN1(line, pts@coords[i,]))
  }
  return(d)
}

# apply function to east and west subsets ----
west_d <- gc_min_d(west_line, west_pts)
east_d <- gc_min_d(east_line, east_pts)

# convert SPDFs back into dataframes ----
library(sf)
west_pts <- as(west_pts, "data.frame")
east_pts <- as(east_pts, "data.frame")

# add distances to these dataframes
west_pts$d_flowpath <- west_d
east_pts$d_flowpath <- east_d

Examine TDS increase along regional GW flowpaths

# subset for Deep wells to examine relationship between distance and TDS
west_pts %>%
  filter(Depth_ft >= 1000) %>% 
  ggplot() +
  geom_point(aes(d_flowpath, Result)) +
  geom_smooth(aes(d_flowpath, Result), method = 'lm') + 
  scale_y_log10()

east_pts %>% 
  filter(Depth_ft >= 1000) %>% 
  ggplot() +
  geom_point(aes(d_flowpath, Result)) +
  geom_smooth(aes(d_flowpath, Result), method = 'lm') + 
  scale_y_log10()

TDS v well depth for USGS database (PLOTS)

# Plots
library(scales) # for commas in graphs
# Entire TB
dat_plot <- dat %>%
  filter(!is.na(Depth_ft)) %>% 
  ggplot() +
  geom_point(aes(x = Depth_ft, y = Result, color = Time_perio), alpha = .4) +
  geom_smooth(aes(Depth_ft, Result), method = 'lm', color = "black") +
  scale_y_log10(breaks = c(0,10,100,1000,10000,100000), labels = comma(c(0,10,100,1000,10000,100000)), limits = c(1,100000)) +
  scale_x_continuous(limits = c(0,2500)) +
  xlab('Depth (ft)') +
  ylab('TDS (mg/L)') +
  #ggtitle('Entire Tulare Basin') +
  #guides(color = guide_legend(override.aes= list(alpha = 1))) +
  scale_color_discrete(name = "Time Period") +
  theme_light() +
  theme(legend.position = "bottom") + 
  guides(color=guide_legend(nrow=2,byrow=TRUE, override.aes= list(alpha = 1)))

# East Side
east_plot <- east_pts %>% 
  filter(!is.na(Depth_ft)) %>% 
  ggplot() +
  geom_point(aes(x = Depth_ft, y = Result, color = Time_perio), alpha = .4) +
  geom_smooth(aes(Depth_ft, Result), method = 'lm', color = "black") +
  scale_y_log10(breaks = c(0,10,100,1000,10000,100000), labels = comma(c(0,10,100,1000,10000,100000)), limits = c(1,100000)) +
  scale_x_continuous(limits = c(0,2500)) +
  xlab('Depth (ft)') +
  ylab('TDS (mg/L)') +
  #ggtitle('East Side') +
  #guides(color = guide_legend(override.aes= list(alpha = 1))) +
  scale_color_discrete(name = "Time Period") +
  theme_light() +
  theme(legend.position = "bottom") + 
  guides(color=guide_legend(nrow=2,byrow=TRUE, override.aes= list(alpha = 1)))

# West Side
west_plot <- west_pts %>% 
  filter(!is.na(Depth_ft)) %>% 
  ggplot() +
  geom_point(aes(x = Depth_ft, y = Result, color = Time_perio), alpha = .4) +
  geom_smooth(aes(Depth_ft, Result), method = 'lm', color = "black") +
  scale_y_log10(breaks = c(0,10,100,1000,10000,100000), labels = comma(c(0,10,100,1000,10000,100000)), limits = c(1,100000)) +
  scale_x_continuous(limits = c(0,2500)) +
  xlab('Depth (ft)') +
  ylab('TDS (mg/L)') +
  #ggtitle('West Side') +
  scale_color_discrete(name = "Time Period") +
  theme_light() +
  theme(legend.position = "bottom") + 
  guides(color=guide_legend(nrow=2,byrow=TRUE, override.aes= list(alpha = 1)))

TDS v well depth for USGS database (MAPS)

# cities in the Tulare Basin
cities <- data.frame(city = c('Fresno', 'Bakersfield', 'Visalia'),
                     longitude = c(-119.7726, -119.0187, -119.2921), 
                     latitude = c(36.7468, 35.3733, 36.3302)
                     )
# read in tulare_union shapefile generated by Tulare_well_stats.Rmd
temp <- "/Users/richpauloo/Documents/GitHub/Regional_GW_Quality_Stats/tulare_union.shp"
tulare_union <- shapefile(temp)

# create the basemap for plotting points for east and west
tulare_base <- ggplot() + 
  coord_fixed(1.3) + 
  geom_polygon(data = tulare_union, aes(x = long, y = lat, group = group), color = "black", fill = "white") +
  theme_void()

######################

# subset data for EAST+WEST points with depth
dat_d <- dat %>% 
  filter(!is.na(Depth_ft))
# plot those points on the basemap
dat_map <- tulare_base + 
  geom_point(data = dat_d, aes(Longitude,Latitude, color = Time_perio), alpha = .3) +
  geom_point(data = cities, mapping = aes(x = longitude, y = latitude, group = factor(city))) +
  geom_text(data = cities, mapping = aes(x = longitude, y = latitude, group = factor(city), label = city), vjust = 0, nudge_y = -0.1, nudge_x = -0.1) +
  #guides(color = guide_legend(override.aes= list(alpha = 1))) +
  guides(color = FALSE) +
  scale_color_discrete(name = "Time Period")

# subset data for EAST side points with depth
east_pts_d <- east_pts %>% 
  filter(!is.na(Depth_ft))
# plot those points on the basemap
east_map <- tulare_base + 
  geom_point(data = east_pts_d, aes(x,y, color = Time_perio), alpha = .3) +
  geom_point(data = cities, mapping = aes(x = longitude, y = latitude, group = factor(city))) +
  geom_text(data = cities, mapping = aes(x = longitude, y = latitude, group = factor(city), label = city), vjust = 0, nudge_y = -0.1, nudge_x = -0.1) +
  #guides(color = guide_legend(override.aes= list(alpha = 1))) +
  guides(color = FALSE) +
  scale_color_discrete(name = "Time Period") 

# subset data for WEST side points with depth
west_pts_d <- west_pts %>% 
  filter(!is.na(Depth_ft))
# plot those points on the basemap
west_map <- tulare_base + 
  geom_point(data = west_pts_d, aes(x,y, color = Time_perio), alpha = .3) +
  geom_point(data = cities, mapping = aes(x = longitude, y = latitude, group = factor(city))) +
  geom_text(data = cities, mapping = aes(x = longitude, y = latitude, group = factor(city), label = city), vjust = 0, nudge_y = -0.1, nudge_x = -0.1) +
  #guides(color = guide_legend(override.aes= list(alpha = 1))) +
  guides(color = FALSE) +
  scale_color_discrete(name = "Time Period")

Put together Maps and Plots

library(cowplot)
# all data
ggdraw() +
  draw_plot(dat_map, x = 0, y = 0, width = .5, height = 1) +
  draw_plot(dat_plot, x = .5, y = 0, width = .5, height = 1) +
  draw_plot_label(label = c("(A)", "(B)"), x = c(0, .48), y = c(1, 1))

# east side
ggdraw() +
  draw_plot(east_map, x = 0, y = 0, width = .5, height = 1) +
  draw_plot(east_plot, x = .5, y = 0, width = .5, height = 1) +
  draw_plot_label(label = c("(A)", "(B)"), x = c(0, .48), y = c(1, 1))

# west side
ggdraw() +
  draw_plot(west_map, x = 0, y = 0, width = .5, height = 1) +
  draw_plot(west_plot, x = .5, y = 0, width = .5, height = 1) +
  draw_plot_label(label = c("(A)", "(B)"), x = c(0, .48), y = c(1, 1))

Hexbin TDS over time

##########################################################
# All data
# facet by DepthClass and Database emphasizing data on Deep wells
library(viridis)
dat %>% 
  filter(Database %in% c("CDPH", "CDWR", "USGS") &
         DepthClass == "Deep") %>% 
  ggplot() +
  geom_hex(aes(Year, Result)) +
  #geom_smooth(aes(Year, Result), method = 'lm', color = "red") +
  scale_fill_viridis() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 1000000)) +
  theme_light() +
  facet_grid(DepthClass ~ Database)

# facet by DepthClass and Database emphasizing data on Shallow wells
dat %>% 
  filter(Database %in% c("GAMA-EDF", "Dairy", "USGS") & 
           DepthClass == "Shallow") %>% 
  ggplot() +
  geom_hex(aes(Year, Result)) +
  #geom_smooth(aes(Year, Result), method = 'lm', color = "red") +
  scale_fill_viridis() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 1000000)) +
  theme_light() +
  facet_grid(DepthClass ~ Database)

##########################################################
# EAST Side
# facet by DepthClass and Database emphasizing data on Deep wells
east_pts %>% 
  filter(Database %in% c("CDPH", "CDWR", "USGS") &
         DepthClass == "Deep") %>% 
  ggplot() +
  geom_hex(aes(Year, Result)) +
  #geom_smooth(aes(Year, Result), method = 'lm', color = "red") +
  scale_fill_viridis() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 1000000)) +
  theme_light() +
  facet_grid(DepthClass ~ Database)

# facet by DepthClass and Database emphasizing data on Shallow wells
east_pts %>% 
  filter(Database %in% c("GAMA-EDF", "Dairy", "USGS") & 
           DepthClass == "Shallow") %>% 
  ggplot() +
  geom_hex(aes(Year, Result)) +
  #geom_smooth(aes(Year, Result), method = 'lm', color = "red") +
  scale_fill_viridis() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 1000000)) +
  theme_light() +
  facet_grid(DepthClass ~ Database)

##########################################################
# WEST Side
# facet by DepthClass and Database emphasizing data on Deep wells
west_pts %>% 
  filter(Database %in% c("CDPH", "CDWR", "USGS") &
         DepthClass == "Deep") %>% 
  ggplot() +
  geom_hex(aes(Year, Result)) +
  #geom_smooth(aes(Year, Result), method = 'lm', color = "red") +
  scale_fill_viridis() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 1000000)) +
  theme_light() +
  facet_grid(DepthClass ~ Database)

# facet by DepthClass and Database emphasizing data on Shallow wells
west_pts %>% 
  filter(Database %in% c("GAMA-EDF", "Dairy", "USGS") & 
           DepthClass == "Shallow") %>% 
  ggplot() +
  geom_hex(aes(Year, Result)) +
  #geom_smooth(aes(Year, Result), method = 'lm', color = "red") +
  scale_fill_viridis() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 1000000)) +
  theme_light() +
  facet_grid(DepthClass ~ Database)

Boxplots

# mutate decade column
dat <- dat %>% 
  mutate(decade = ifelse(Year >= 1922 & Year <= 1929, 1920,
                  ifelse(Year >= 1930 & Year <= 1939, 1930,
                  ifelse(Year >= 1940 & Year <= 1949, 1940,
                  ifelse(Year >= 1950 & Year <= 1959, 1950,
                  ifelse(Year >= 1960 & Year <= 1969, 1960,
                  ifelse(Year >= 1970 & Year <= 1979, 1970,
                  ifelse(Year >= 1980 & Year <= 1989, 1980,
                  ifelse(Year >= 1990 & Year <= 1999, 1990,
                  ifelse(Year >= 2000 & Year <= 2014, 2000, NA))))))))))

# all TDS observations
t1 <- dat %>% group_by(decade) %>% summarise(count = n())

dat %>% 
  ggplot() +
  geom_boxplot(aes(x = factor(decade), y = Result)) +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 9000000)) +
  xlab('Decade') + 
  ylab('TDS (mg/L)') + 
  ggtitle('All TDS observations') +
  theme_light() +
  geom_label(data = t1, 
             aes(x = factor(decade), y = 900000, label = paste("n=", comma(count), sep = "\n")))

# function that returns a label of count per boxplot
give.n <- function(x){
   return(c(y = 6, label = length(x)))
}

# all observations by depthClass
dat %>% 
  filter(DepthClass != "Unknown") %>% 
  ggplot(aes(x = factor(Time_perio), y = Result)) +
  geom_boxplot() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 9000000)) +
  theme_light() +
  stat_summary(fun.data = give.n, geom = "label") +
  xlab('Time Period') +
  ylab('TDS (mg/L)') +
  ggtitle('Tulare Basin') +
  facet_wrap(~DepthClass)

### EAST Side
east_pts %>% 
  mutate(decade = ifelse(Year >= 1922 & Year <= 1929, 1920,
                  ifelse(Year >= 1930 & Year <= 1939, 1930,
                  ifelse(Year >= 1940 & Year <= 1949, 1940,
                  ifelse(Year >= 1950 & Year <= 1959, 1950,
                  ifelse(Year >= 1960 & Year <= 1969, 1960,
                  ifelse(Year >= 1970 & Year <= 1979, 1970,
                  ifelse(Year >= 1980 & Year <= 1989, 1980,
                  ifelse(Year >= 1990 & Year <= 1999, 1990,
                  ifelse(Year >= 2000 & Year <= 2014, 2000, NA)))))))))) %>% 
  filter(DepthClass != "Unknown") %>% 
  ggplot(aes(x = factor(Time_perio), y = Result)) +
  geom_boxplot() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 9000000)) +
  theme_light() +
  stat_summary(fun.data = give.n, geom = "label") +
  xlab('Time Period') +
  ylab('TDS (mg/L)') +
  ggtitle('East Side') +
  facet_wrap(~DepthClass)

### WEST Side
west_pts %>% 
  mutate(decade = ifelse(Year >= 1922 & Year <= 1929, 1920,
                  ifelse(Year >= 1930 & Year <= 1939, 1930,
                  ifelse(Year >= 1940 & Year <= 1949, 1940,
                  ifelse(Year >= 1950 & Year <= 1959, 1950,
                  ifelse(Year >= 1960 & Year <= 1969, 1960,
                  ifelse(Year >= 1970 & Year <= 1979, 1970,
                  ifelse(Year >= 1980 & Year <= 1989, 1980,
                  ifelse(Year >= 1990 & Year <= 1999, 1990,
                  ifelse(Year >= 2000 & Year <= 2014, 2000, NA)))))))))) %>% 
  filter(DepthClass != "Unknown") %>% 
  ggplot(aes(x = factor(Time_perio), y = Result)) +
  geom_boxplot() +
  scale_y_log10(breaks = c(10,100,1000,10000,100000), 
                labels = comma(c(10,100,1000,10000,100000)), 
                limits = c(1, 9000000)) +
  theme_light() +
  stat_summary(fun.data = give.n, geom = "label") +
  xlab('Time Period') +
  ylab('TDS (mg/L)') +
  ggtitle('West Side') +
  facet_wrap(~DepthClass)