Water Quality Analysis

Alex Sharp

11/17/2020

library(hydrogeo)
library(tidyverse)
library(rgdal)
library(sp)
library(sf)
library(ggplot2)
library(dplyr)
library(plotly)

Importing data

# Reading in Wimpy water quality data:
data_monthly <- read.csv("Monthlydata.csv")
summary(data_monthly)
##   Sample.ID             Depth            Temp       Specific.Cond.  
##  Length:8           Min.   :124.0   Min.   : 8.90   Min.   :  78.0  
##  Class :character   1st Qu.:124.2   1st Qu.:13.70   1st Qu.: 789.8  
##  Mode  :character   Median :125.0   Median :15.53   Median : 851.8  
##                     Mean   :125.0   Mean   :14.20   Mean   : 870.9  
##                     3rd Qu.:125.8   3rd Qu.:16.10   3rd Qu.:1160.5  
##                     Max.   :126.0   Max.   :16.30   Max.   :1313.0  
##                     NA's   :2                                       
##      Cond.              pH             TSS             TDS       
##  Min.   :  56.0   Min.   :6.600   Min.   : 9.00   Min.   : 91.0  
##  1st Qu.: 655.5   1st Qu.:6.867   1st Qu.:15.75   1st Qu.:518.5  
##  Median : 695.2   Median :6.955   Median :18.50   Median :741.5  
##  Mean   : 727.7   Mean   :7.014   Mean   :25.25   Mean   :615.2  
##  3rd Qu.: 864.8   3rd Qu.:7.122   3rd Qu.:28.25   3rd Qu.:799.8  
##  Max.   :1234.0   Max.   :7.720   Max.   :58.00   Max.   :923.0  
##                                                                  
##    Alkalinity          DO         Total.Coliform       E.coli     
##  Min.   : 38.0   Min.   : 0.020   Min.   :   4.0   Min.   : 6.00  
##  1st Qu.:400.0   1st Qu.: 0.205   1st Qu.:  22.5   1st Qu.: 8.75  
##  Median :574.0   Median : 0.710   Median : 156.0   Median :11.50  
##  Mean   :497.8   Mean   : 2.504   Mean   : 678.2   Mean   :11.50  
##  3rd Qu.:684.0   3rd Qu.: 1.685   3rd Qu.: 782.0   3rd Qu.:14.25  
##  Max.   :856.0   Max.   :13.650   Max.   :2420.0   Max.   :17.00  
##                                                    NA's   :6      
##       TOC              Cl.            SO4..            P...       
##  Min.   : 2.790   Min.   : 6.97   Min.   : 0.60   Min.   :0.0360  
##  1st Qu.: 3.413   1st Qu.:16.11   1st Qu.: 7.31   1st Qu.:0.1202  
##  Median : 3.740   Median :23.35   Median :31.70   Median :0.1460  
##  Mean   :17.117   Mean   :23.69   Mean   :29.75   Mean   :0.1549  
##  3rd Qu.:17.445   3rd Qu.:28.77   3rd Qu.:53.58   3rd Qu.:0.1945  
##  Max.   :58.200   Max.   :44.70   Max.   :56.10   Max.   :0.2640  
##  NA's   :4                                                        
##       Ba.              Cd.              Ca..            Fe...       
##  Min.   :0.0469   Min.   :0.0390   Min.   : 12.20   Min.   : 3.670  
##  1st Qu.:0.1419   1st Qu.:0.0455   1st Qu.: 42.45   1st Qu.: 4.022  
##  Median :0.6135   Median :0.0520   Median :138.00   Median : 4.455  
##  Mean   :0.4928   Mean   :0.0520   Mean   :126.62   Mean   : 6.206  
##  3rd Qu.:0.6388   3rd Qu.:0.0585   3rd Qu.:148.25   3rd Qu.: 7.495  
##  Max.   :1.1600   Max.   :0.0650   Max.   :363.00   Max.   :11.900  
##                   NA's   :6                                         
##       Mg..            Mn..              K.             Na.        
##  Min.   : 4.39   Min.   :0.1530   Min.   :2.530   Min.   :  7.04  
##  1st Qu.:12.81   1st Qu.:0.2527   1st Qu.:4.963   1st Qu.: 31.63  
##  Median :30.10   Median :1.1260   Median :6.380   Median :115.00  
##  Mean   :30.57   Mean   :1.6213   Mean   :5.844   Mean   : 95.55  
##  3rd Qu.:34.70   3rd Qu.:2.7350   3rd Qu.:7.000   3rd Qu.:149.50  
##  Max.   :90.30   Max.   :4.0400   Max.   :8.730   Max.   :180.00  
##                                                                   
##       Sr..              F.              Br.              Be..       
##  Min.   :0.0578   Min.   :0.0590   Min.   :0.1000   Min.   :0.1740  
##  1st Qu.:0.3596   1st Qu.:0.1422   1st Qu.:0.1060   1st Qu.:0.1797  
##  Median :0.7400   Median :0.1850   Median :0.1120   Median :0.1855  
##  Mean   :0.9158   Mean   :0.1680   Mean   :0.1197   Mean   :0.1855  
##  3rd Qu.:1.2150   3rd Qu.:0.2107   3rd Qu.:0.1258   3rd Qu.:0.1913  
##  Max.   :2.7900   Max.   :0.2430   Max.   :0.1550   Max.   :0.1970  
##                   NA's   :4        NA's   :4        NA's   :6       
##      Cr...           Al...             As...               Co..       
##  Min.   :1.670   Min.   :   67.8   Min.   :0.001030   Min.   :0.3200  
##  1st Qu.:4.315   1st Qu.:   80.1   1st Qu.:0.001395   1st Qu.:0.8905  
##  Median :6.960   Median :  203.0   Median :0.002055   Median :1.1100  
##  Mean   :5.250   Mean   : 3982.2   Mean   :0.002499   Mean   :1.9130  
##  3rd Qu.:7.040   3rd Qu.: 8760.0   3rd Qu.:0.003703   3rd Qu.:2.5750  
##  Max.   :7.120   Max.   :10800.0   Max.   :0.004580   Max.   :5.0300  
##  NA's   :5       NA's   :3                            NA's   :1       
##       Cu.             Pb..            Ag.             Ni..       
##  Min.   :0.667   Min.   :0.328   Min.   :0.024   Min.   :0.2600  
##  1st Qu.:1.254   1st Qu.:1.664   1st Qu.:0.024   1st Qu.:0.8475  
##  Median :2.125   Median :3.000   Median :0.024   Median :1.7000  
##  Mean   :1.949   Mean   :2.199   Mean   :0.024   Mean   :2.5650  
##  3rd Qu.:2.820   3rd Qu.:3.135   3rd Qu.:0.024   3rd Qu.:3.3650  
##  Max.   :2.880   Max.   :3.270   Max.   :0.024   Max.   :7.0700  
##  NA's   :4       NA's   :5       NA's   :7                       
##       Mo..             Zn..      
##  Min.   : 0.193   Min.   : 13.1  
##  1st Qu.: 0.442   1st Qu.: 15.1  
##  Median : 1.995   Median : 18.5  
##  Mean   : 3.598   Mean   :154.7  
##  3rd Qu.: 3.632   3rd Qu.: 39.0  
##  Max.   :15.300   Max.   :688.0  
##                   NA's   :3
str(data_monthly)
## 'data.frame':    8 obs. of  38 variables:
##  $ Sample.ID     : chr  "Reservoir" "Well 1" "Well 2" "Well 3" ...
##  $ Depth         : int  NA 125 126 124 NA 125 126 124
##  $ Temp          : num  10.4 16.1 15.9 16.3 8.9 ...
##  $ Specific.Cond.: num  78 807 862 738 1157 ...
##  $ Cond.         : num  56 669 711 615 801 ...
##  $ pH            : num  6.97 6.94 6.93 7.11 7.72 6.68 6.6 7.16
##  $ TSS           : int  58 15 23 20 44 9 17 16
##  $ TDS           : int  91 770 889 714 112 769 923 654
##  $ Alkalinity    : int  40 622 702 526 38 678 856 520
##  $ DO            : num  3.5 0.23 0.13 0.02 13.65 ...
##  $ Total.Coliform: int  116 4 2420 6 236 28 2420 196
##  $ E.coli        : int  17 NA NA NA 6 NA NA NA
##  $ TOC           : num  3.62 2.79 58.2 3.86 NA NA NA NA
##  $ Cl.           : num  8.66 24.1 44.7 25.6 6.97 22.6 38.3 18.6
##  $ SO4..         : num  10.3 56.1 0.6 53.5 9.33 53.8 1.25 53.1
##  $ P...          : num  0.147 0.036 0.247 0.177 0.145 0.094 0.264 0.129
##  $ Ba.           : num  0.0484 0.626 0.601 1.16 0.0469 0.653 0.634 0.173
##  $ Cd.           : num  NA 0.065 NA 0.039 NA NA NA NA
##  $ Ca..          : num  12.2 149 148 363 12.3 139 137 52.5
##  $ Fe...         : num  4.09 3.67 10.9 6.36 4.35 4.56 11.9 3.82
##  $ Mg..          : num  4.43 35 30.8 90.3 4.39 34.6 29.4 15.6
##  $ Mn..          : num  0.153 1.38 3.65 0.283 0.162 2.43 4.04 0.872
##  $ K.            : num  2.63 8.73 6.82 5.97 2.53 7.54 6.79 5.74
##  $ Na.           : num  7.72 115 149 39.6 7.04 115 151 180
##  $ Sr..          : num  0.0578 0.763 1.29 2.79 0.0584 0.717 1.19 0.46
##  $ F.            : num  0.243 NA NA NA 0.2 0.17 NA 0.059
##  $ Br.           : num  NA NA 0.155 0.116 NA NA 0.108 0.1
##  $ Be..          : num  0.197 NA NA NA 0.174 NA NA NA
##  $ Cr...         : num  6.96 NA NA NA 7.12 NA NA 1.67
##  $ Al...         : num  8760 NA NA NA 10800 67.8 80.1 203
##  $ As...         : num  0.00103 0.00165 0.00358 0.00148 0.00114 0.00246 0.00458 0.00407
##  $ Co..          : num  1.06 3.77 NA 0.32 1.11 5.03 1.38 0.721
##  $ Cu.           : num  2.88 NA 1.45 NA 2.8 NA NA 0.667
##  $ Pb..          : num  3 NA NA NA 3.27 NA 0.328 NA
##  $ Ag.           : num  0.024 NA NA NA NA NA NA NA
##  $ Ni..          : num  2.5 5.96 0.3 0.26 2.22 7.07 1.03 1.18
##  $ Mo..          : num  0.298 2.85 0.49 5.5 0.193 3.01 1.14 15.3
##  $ Zn..          : num  15.1 NA NA NA 13.1 688 18.5 39
# Reading in water sample cation-anion data for piper diagram:
ions <- read.csv("cations.anions.csv", header = TRUE)

Using two functions: transform_piper_data transforms the data to match the coordinates of the piper diagram, and ggplot_piper does all of the background.

# transform_piper_data for data transformation:

transform_piper_data <- function(Mg, Ca, Cl,SO4, name=NULL){
  if(is.null(name)){
    name = rep(1:length(Mg),3)
  } else {
    name = rep(name,3)
  }
  y1 <- Mg * 0.86603
  x1 <- 100*(1-(Ca/100) - (Mg/200))
  y2 <- SO4 * 0.86603
  x2 <-120+(100*Cl/100 + 0.5 * 100*SO4/100)
  new_point <- function(x1, x2, y1, y2, grad=1.73206){
    b1 <- y1-(grad*x1)
    b2 <- y2-(-grad*x2)
    M <- matrix(c(grad, -grad, -1,-1), ncol=2)
    intercepts <- as.matrix(c(b1,b2))
    t_mat <- -solve(M) %*% intercepts
    data.frame(x=t_mat[1,1], y=t_mat[2,1])
  }
  np_list <- lapply(1:length(x1), function(i) new_point(x1[i], x2[i], y1[i], y2[i])) 
  npoints <- do.call("rbind",np_list)
  data.frame(observation=name,x=c(x1, x2, npoints$x), y=c(y=y1, y2, npoints$y))
}


# ggplot_piper for piper diagram background:

ggplot_piper <- function() {
  library(ggplot2)
  grid1p1 <<- data.frame(x1 = c(20,40,60,80), x2= c(10,20,30,40),y1 = c(0,0,0,0), 
                         y2 = c(17.3206,34.6412,51.9618, 69.2824))
  grid1p2 <<- data.frame(x1 = c(20,40,60,80), x2= c(60,70,80,90),y1 = c(0,0,0,0), 
                         y2 = c(69.2824, 51.9618,34.6412,17.3206))
  grid1p3 <<- data.frame(x1 = c(10,20,30,40), x2= c(90,80,70,60),
                         y1 = c(17.3206,34.6412,51.9618, 69.2824), 
                         y2 = c(17.3206,34.6412,51.9618, 69.2824))
  grid2p1 <<- grid1p1
  grid2p1$x1 <- grid2p1$x1+120
  grid2p1$x2 <- grid2p1$x2+120
  grid2p2 <<- grid1p2
  grid2p2$x1 <- grid2p2$x1+120
  grid2p2$x2 <- grid2p2$x2+120
  grid2p3 <<- grid1p3
  grid2p3$x1 <- grid2p3$x1+120
  grid2p3$x2 <- grid2p3$x2+120
  grid3p1 <<- data.frame(x1=c(100,90, 80, 70), y1=c(34.6412, 51.9618, 69.2824, 
   86.603), x2=c(150, 140, 130, 120), y2=c(121.2442,138.5648,155.8854,173.2060))
  grid3p2 <<- data.frame(x1=c(70, 80, 90, 100),y1=c(121.2442,138.5648,155.8854,
   173.2060), x2=c(120, 130, 140, 150), y2=c(34.6412, 51.9618, 69.2824, 86.603))
  
  p <- ggplot() +
    
  # Left hand ternary plot:
    
    geom_polygon(data=polygons2, aes(x=x,y=y,fill=Major.Water.Type,
                                     group=Major.Water.Type)) +
    geom_segment(aes(x=0,y=0, xend=100, yend=0)) +
    geom_segment(aes(x=0,y=0, xend=50, yend=86.603)) +
    geom_segment(aes(x=50,y=86.603, xend=100, yend=0)) +
    
  # Right hand ternary plot:
    
    geom_polygon(data=polygons3, aes(x=x,y=y,fill=Major.Water.Type,
                                     group=Major.Water.Type)) +
    geom_segment(aes(x=120,y=0, xend=220, yend=0)) +
    geom_segment(aes(x=120,y=0, xend=170, yend=86.603)) +
    geom_segment(aes(x=170,y=86.603, xend=220, yend=0)) +
    
  # Upper diamond:
    
    geom_polygon(data=polygons, aes(x=x,y=y,fill=Major.Water.Type,
                                    group=Major.Water.Type)) +
    geom_segment(aes(x=110,y=190.5266, xend=60, yend=103.9236)) +
    geom_segment(aes(x=110,y=190.5266, xend=160, yend=103.9236)) +
    geom_segment(aes(x=110,y=17.3206, xend=160, yend=103.9236)) +
    geom_segment(aes(x=110,y=17.3206, xend=60, yend=103.9236)) +
    
  # Add grid lines to the plots:
    
    geom_segment(aes(x=x1, y=y1, yend=y2, xend=x2), data=grid1p1, 
                 linetype = "dashed", size = 0.5, colour = "grey50") +
    geom_segment(aes(x=x1, y=y1, yend=y2, xend=x2), data=grid1p2, 
                 linetype = "dashed", size = 0.5, colour = "grey50") +
    geom_segment(aes(x=x1, y=y1, yend=y2, xend=x2), data=grid1p3, 
                 linetype = "dashed", size = 0.5, colour = "grey50") +
    geom_segment(aes(x=x1, y=y1, yend=y2, xend=x2), data=grid2p1, 
                 linetype = "dashed", size = 0.5, colour = "grey50") +
    geom_segment(aes(x=x1, y=y1, yend=y2, xend=x2), data=grid2p2, 
                 linetype = "dashed", size = 0.5, colour = "grey50") +
    geom_segment(aes(x=x1, y=y1, yend=y2, xend=x2), data=grid2p3, 
                 linetype = "dashed", size = 0.5, colour = "grey50") +
    geom_segment(aes(x=x1, y=y1, yend=y2, xend=x2), data=grid3p1, 
                 linetype = "dashed", size = 0.5, colour = "grey50") +
    geom_segment(aes(x=x1, y=y1, yend=y2, xend=x2), data=grid3p2, 
                 linetype = "dashed", size = 0.5, colour = "grey50") +
    
    
  # Labels and grid values
    
    geom_text(aes(c(20,40,60,80),c(-5,-5,-5,-5), label=c(80, 60, 40, 20)), size=2) +
    geom_text(aes(c(35,25,15,5),grid1p2$y2, label=c(80, 60, 40, 20)), size=2) +
    geom_text(aes(c(95,85,75,65),grid1p3$y2, label=c(80, 60, 40, 20)), size=2) +
    
    coord_equal(ratio=1)+  
    geom_text(aes(17,50, label="Mg^2"), angle=60, size=4, parse=TRUE) +  
    geom_text(aes(82.5,50, label="Na + K"), angle=-60, size=4) +
    geom_text(aes(50,-10, label="Ca^2"), size=4, parse=TRUE) +
    
    geom_text(aes(170,-10, label="Cl^-phantom()"), size=4, parse=TRUE) +
    geom_text(aes(205,50, label="SO^4"), angle=-60, size=4, parse=TRUE) +
    geom_text(aes(137.5,50, label="Alkalinity~as~HCO^3"), angle=60, size=4, parse=TRUE) +
    geom_text(aes(72.5,150, label="SO^4~+~Cl^-phantom()"), angle=60, size=4, parse=TRUE) +
    geom_text(aes(147.5,150, label="Ca^2~+~Mg^2"), angle=-60, size=4, parse=TRUE) + 
    
    geom_text(aes(c(155,145,135,125),grid2p2$y2, label=c(20, 40, 60, 80)), size=2) +
    geom_text(aes(c(215,205,195,185),grid2p3$y2, label=c(20, 40, 60, 80)), size=2) +
    geom_text(aes(c(140,160,180,200),c(-5,-5,-5,-5), label=c(20, 40, 60, 80)), size=2) +
    geom_text(aes(grid3p1$x1-5,grid3p1$y1, label=c(80, 60, 40, 20)), size=2) +
    geom_text(aes(grid3p1$x2+5,grid3p1$y2, label=c(20, 40, 60, 80)), size=2) +
    geom_text(aes(grid3p2$x1-5,grid3p2$y1, label=c(20, 40, 60, 80)), size=2) +
    geom_text(aes(grid3p2$x2+5,grid3p2$y2, label=c(80, 60, 40, 20)), size=2) +
    theme_bw() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.border = element_blank(), axis.ticks = element_blank(),
          axis.text.x = element_blank(), axis.text.y = element_blank(),
          axis.title.x = element_blank(), axis.title.y = element_blank())
  return(p)
}

# Transform the data into piper based coordinate:

piper_data <- transform_piper_data(Ca = ions$Ca, Mg = ions$Mg, Cl = ions$Cl, 
                                   SO4= ions$SO4, name = ions$Sample.ID)

Color the plot based on major water type/chemistry and run to create polygons:

# Upper Diamond

ids <- factor(c("Sodium Bicarbonate", "Sodium Chloride",
                  "Calcium Bicarbonate", "Calcium Sulfate"))
values <- data.frame(Major.Water.Type = ids, value = c(1,2,3,4))
positions <- data.frame(Major.Water.Type=rep(ids, each = 4),
  x=c(110,85,110,135,
      135,110,135,160,
      85,60,85,110,
      110,85,110,135),
  y=c(17.3206,60.6221, 103.9236,60.6221,
      60.6221, 103.9236, 147.2251, 103.9236,
      60.6221,103.9236,147.2251,103.9236,
      103.9236,147.2251,190.5266,147.2251))
polygons <- merge(values, positions)

# Left Ternary Plot

ids2 <- factor(c("5", "6", "7", "8"))
values2 <- data.frame(Major.Water.Type = ids, value = c(5,6,7,8))
positions2 <- data.frame(Major.Water.Type=rep(ids2, each = 3),
  x=c(50,0,25,
      50,25,75,
      100,50,75,
      75,25,50),
  y=c(0,0,43.3015,
      0,43.3015,43.3015,
      0,0,43.3015,
      43.3015,43.3015,86.603))
polygons2 <- merge(values2, positions2)

# Right Ternary Plot

ids3 <- factor(c("9", "10", "11", "12"))
values3 <- data.frame(Major.Water.Type = ids, value = c(9,10,11,12))
positions3 <- data.frame(Major.Water.Type=rep(ids2, each = 3),
  x=c(170,120,145,
      170,145,195,
      220,170,195,
      195,145,170),
  y=c(0,0,43.3015,
      0,43.3015,43.3015,
      0,0,43.3015,
      43.3015,43.3015,86.603))
polygons3 <- merge(values3, positions3)
# Then, add a line for each 'plot' (upper, left, right) within the ggplot_piper 
#function above

Piper Diagram

ggplot_piper() + geom_point(aes(x,y, colour=factor(observation), 
  shape=factor(observation), stroke=1), size=3,  data=piper_data) +  scale_radius(7) +
  scale_colour_manual(name="Sample ID", values=c("#000000", fill="blue", 
  "springgreen", "cyan1"), labels=c("Reservoir", "Well 1", "Well 2", "Well 3")) +
  scale_shape_manual(name="Sample ID", values=c(21:24), 
                     labels=c("Reservoir", "Well 1", "Well 2", "Well 3")) + 
   theme(legend.position = c(0.1, 0.7), 
        legend.title = element_text(face = "bold", size = 8),
        legend.text = element_text(size = 7))

Scatter Plots

library(car)
#create species objects
TSS <- data_monthly$TSS
TDS <- data_monthly$TDS
pH <- data_monthly$pH
DO <- data_monthly$DO
alkalinity <- data_monthly$Alkalinity
Mg2 <- data_monthly$Mg..
Ca2 <- data_monthly$Ca..
Mn <- data_monthly$Mn..
As <- data_monthly$As...                          
Cl <- data_monthly$Cl.
Fe <- data_monthly$Fe...
Al <- data_monthly$Al...
Samples <- data_monthly$Sample.ID

qplot(x = Mg2, y = Ca2, data = data_monthly, main="Ca vs Mg Concentrations (mg/L)") + geom_point(aes(Mg2,Ca2, size=10,
  colour=factor(Samples), shape=factor(Samples)), size=3, stroke=1, data=data_monthly) +
  scale_colour_manual(name="Sample ID", values=c("#000000", fill="blue", 
  "springgreen", "cyan1"), labels=c("Reservoir", "Well 1", "Well 2", "Well 3")) +
  scale_shape_manual(name="Sample ID", values=c(21,22,23,3), labels=c("Reservoir", 
  "Well 1", "Well 2", "Well 3")) +
  scale_fill_manual(values = c("#000000", "blue", "springgreen", "cyan1"), 
                    labels=c("Reservoir", "Well 1", "Well 2", "Well 3")) + theme(legend.position = c(0.12, 0.82), 
        legend.title = element_text(face = "bold", size = 7),
        legend.text = element_text(size = 6), axis.text = element_text(size=7), axis.title = element_text(size=9))

#create data.frame for new object group
myVariables <- data.frame(Samples, Mn, As, Fe, Mg2, Ca2)
library(lattice)

#assign colors to each sample
cols   <- character(nrow(myVariables))
cols[myVariables$Samples == "Reservoir"] <- "#000000"
cols[myVariables$Samples == "Well 1"] <- "blue"
cols[myVariables$Samples == "Well 2"] <- "springgreen"
cols[myVariables$Samples == "Well 3"] <- "cyan1"

Scatter plot matrices

pairs(myVariables[,2:6], col=cols, lower.panel=NULL, cex.axis=1, pch=1, cex.labels=1.7, cex=2, main="Chemical Species Concentrations (mg/L)")

#create data.frame for second group of water quality parameters
myVariables1 <- data.frame(Samples, TSS, TDS, DO, alkalinity)

#assign colors to each sample
cols[myVariables1$Samples == "Reservoir"] <- "#000000"
cols[myVariables1$Samples == "Well 1"] <- "blue"
cols[myVariables1$Samples == "Well 2"] <- "springgreen"
cols[myVariables1$Samples == "Well 3"] <- "cyan1"

# scatter plot matrix
pairs(myVariables1[,2:5], col= cols, lower.panel=NULL, cex.axis=1, pch=1, cex.labels=1.7, cex=2, main="Water Quality Parameters (mg/L)")