Table of Contents


Synopsis

Working Directory and Required Packages

Load Data


Lakewood Gulch R^2, Coefficients, and Correlations

Weir Gulch R^2, Coefficients, and Correlations

Sanderson Gulch R^2, Coefficients, and Correlations



Synopsis



This document develops a correlation model of the total traffic from a storm event - through the entire dry period - to the monitoring event, and the different pollutant mass loads.

3 final data tables were formatted with the following data:

  1. 4 storm days at Lakewood Gulch, Weir Gulch, and Sanderson Gulch
  2. Storm Regimes, (First flush, peak, and recession), as factors for improved correlation
  3. Numeric representation of Storm Regimes for improved correlation
  4. Traffic volumes during the 1 - 12 antecedent dry days before the storm days
  5. Total traffic volumes before the 4 storm days
  6. Average traffic volumes before the 4 storm days
  7. The mass flow rate, and concentration rate, of the 5 pollutants

The dataset variables, “storm regime”, “total traffic volume”, and 5 pollutants (mass flow, & concentration) were linear modeled and correlated, for the three locations.

R-squared accuracy of the linear models were calculated.

Graphs and spreadsheets were created of the correlations for Lakewood Gulch, Weir Gulch, and Sanderson Gulch.



Working Directory and Required Packages



# setwd("C:/Users/...")
library(data.table)
library(readxl)
library(corrplot)
library(tidyverse)
library(VIM)
library(knitr)



Load Data



# Load final dataframes
Lakewood_Gulch_All_Data <- readRDS("Lakewood_Gulch_All_Data.rds")
Weir_Gulch_All_Data <- readRDS("Weir_Gulch_All_Data.rds")
Sanderson_Gulch_All_Data <- readRDS("Sanderson_Gulch_All_Data.rds")



Lakewood Gulch R^2, Coefficients, and Correlations



# Lakewood Gulch Pollutants Linear Modeling
set.seed(123)
model_lg <- lm(Copper_Cu_Mass_Flow_Rate_MG_S~
                        Storm_Regime+Total_Ant_Traffic_Volume,
                      Lakewood_Gulch_All_Data)

# Find R Squared Value
# The coefficient of determination (commonly denoted R2)
# is the proportion of the variance in the response variable
# that can be explained by the explanatory variables in a
# regression model.

# The R-squared of the Lakewood Gulch model turns out to be 0.5695.

# This means that 56.95% of the variation in the copper mass flow rate
# can be explained by the storm regime and the cumulative traffic volume.

# R-squared values range from 0 through 1. A value of 1
# indicates that the explanatory variables has 100% correlation with
# the response variable. A value of 0 indicates that
# the explanatory variables is independent of the response variable.

kable(summary(model_lg)$r.squared, caption ="R^2 Value of Lakewood Gulch Linear Regression")
R^2 Value of Lakewood Gulch Linear Regression
x
0.5695737
# Dataframe of linear modeling coefficients
StormRegimeFirstFlush <- coef(model_lg)[1]
StormRegimePeak <- coef(model_lg)[2]
StormRegimeRecession <- coef(model_lg)[3]
TrafficVolume <- coef(model_lg)[4]
modelCoef <- data.frame("StormRegimeFirstFlush"= StormRegimeFirstFlush,"StormRegimePeak"=StormRegimePeak,
                      "StormRegimeRecession"=StormRegimeRecession,
                        "TrafficVolume"=TrafficVolume)
row.names(modelCoef) <- c("Coefficients")
kable(modelCoef, caption = "Lakewood Gulch Data Coefficients")
Lakewood Gulch Data Coefficients
StormRegimeFirstFlush StormRegimePeak StormRegimeRecession TrafficVolume
Coefficients 3.576138 9.112522 11.05698 -2.5e-06
# Dataframe of predictive analytics results
LG_Predictive_Analytics <- data.frame(Copper_Flow_MGS=Lakewood_Gulch_All_Data$Copper_Cu_Mass_Flow_Rate_MG_S,Predictions=predict(model_lg))
LG_Predictive_Analytics
##    Copper_Flow_MGS Predictions
## 1           0.9864   0.3602008
## 2          17.9145   9.4727229
## 3           6.2710   9.4727229
## 4          12.7885  11.4171834
## 5           0.7873   2.0450339
## 6           7.2038   2.0450339
## 7          15.1554  11.1575560
## 8          11.7307  13.1020166
## 9           0.1364   0.9294761
## 10          0.1364   0.9294761
## 11          0.8041  10.0419982
## 12          0.9441   2.0044896
## 13          0.1238   2.0044896
# Quantile-Quantile plot
plot(model_lg, which = 2)

# Correlate Lakewood Gulch data, (Graph & Spreadsheet)
lg_corr_variables <- Lakewood_Gulch_All_Data[c(3,16,17,18,19,20,21,
                                               22,23,24,25,26)]
lg_corr_variables$Storm_Regime <- as.numeric(lg_corr_variables$Storm_Regime)
lg_cor <- cor(lg_corr_variables[, unlist(lapply(lg_corr_variables, is.numeric))])
corrplot(lg_cor, tl.cex = 0.6)

kable(lg_cor, caption = "Lakewood Gulch Data Correlations")
Lakewood Gulch Data Correlations
Storm_Regime Total_Ant_Traffic_Volume Copper_Cu_Mass_Flow_Rate_MG_S Iron_Fe_Mass_Flow_Rate_MG_S Nickel_Ni_Mass_Flow_Rate_MG_S Lead_Pb_Mass_Flow_Rate_MG_S Zinc_Zn_Mass_Flow_Rate_MG_S Copper_Cu_Concentration_MG_L Iron_Fe_Concentration_MG_L Nickel_Ni_Concentration_MG_L Lead_Pb_Concentration_MG_L Zinc_Zn_Concentration_MG_L
Storm_Regime 1.0000000 0.2248380 0.7138477 0.7335943 0.6301980 0.7328496 0.6371892 0.6667170 0.7581214 0.5504145 0.6425996 0.5672885
Total_Ant_Traffic_Volume 0.2248380 1.0000000 0.1083048 -0.0714368 0.4148306 0.2193400 0.2668855 0.4312068 0.2947854 0.5899354 0.4493593 0.4988031
Copper_Cu_Mass_Flow_Rate_MG_S 0.7138477 0.1083048 1.0000000 0.9512056 0.9175412 0.9702144 0.9647209 0.8791949 0.9416285 0.7708087 0.8460783 0.8026019
Iron_Fe_Mass_Flow_Rate_MG_S 0.7335943 -0.0714368 0.9512056 1.0000000 0.7796665 0.9115418 0.8481161 0.7224315 0.8589503 0.5757091 0.6930157 0.6149260
Nickel_Ni_Mass_Flow_Rate_MG_S 0.6301980 0.4148306 0.9175412 0.7796665 1.0000000 0.9535039 0.9769103 0.9729884 0.9542452 0.9557490 0.9649794 0.9463375
Lead_Pb_Mass_Flow_Rate_MG_S 0.7328496 0.2193400 0.9702144 0.9115418 0.9535039 1.0000000 0.9701545 0.9299959 0.9825406 0.8497929 0.9250540 0.8704953
Zinc_Zn_Mass_Flow_Rate_MG_S 0.6371892 0.2668855 0.9647209 0.8481161 0.9769103 0.9701545 1.0000000 0.9593636 0.9625910 0.8957189 0.9368054 0.9243891
Copper_Cu_Concentration_MG_L 0.6667170 0.4312068 0.8791949 0.7224315 0.9729884 0.9299959 0.9593636 1.0000000 0.9641117 0.9660339 0.9891852 0.9824587
Iron_Fe_Concentration_MG_L 0.7581214 0.2947854 0.9416285 0.8589503 0.9542452 0.9825406 0.9625910 0.9641117 1.0000000 0.8874468 0.9566519 0.9136763
Nickel_Ni_Concentration_MG_L 0.5504145 0.5899354 0.7708087 0.5757091 0.9557490 0.8497929 0.8957189 0.9660339 0.8874468 1.0000000 0.9718143 0.9786231
Lead_Pb_Concentration_MG_L 0.6425996 0.4493593 0.8460783 0.6930157 0.9649794 0.9250540 0.9368054 0.9891852 0.9566519 0.9718143 1.0000000 0.9779869
Zinc_Zn_Concentration_MG_L 0.5672885 0.4988031 0.8026019 0.6149260 0.9463375 0.8704953 0.9243891 0.9824587 0.9136763 0.9786231 0.9779869 1.0000000



Weir Gulch R^2, Coefficients, and Correlations



# Weir Gulch Pollutants Linear Modeling
set.seed(123)
model_wg <- lm(Copper_Cu_Mass_Flow_Rate_MG_S~
                 Storm_Regime+Total_Ant_Traffic_Volume,
               Weir_Gulch_All_Data)

# Find R Squared Value
kable(summary(model_wg)$r.squared, caption ="R^2 Value of Weir Gulch Linear Regression")
R^2 Value of Weir Gulch Linear Regression
x
0.5857899
# Dataframe of linear modeling coefficients
StormRegimeFirstFlush <- coef(model_wg)[1]
StormRegimePeak <- coef(model_wg)[2]
StormRegimeRecession <- coef(model_wg)[3]
TrafficVolume <- coef(model_wg)[4]
modelCoef <- data.frame("StormRegimeFirstFlush"= StormRegimeFirstFlush,"StormRegimePeak"=StormRegimePeak,
                      "StormRegimeRecession"=StormRegimeRecession,
                        "TrafficVolume"=TrafficVolume)
row.names(modelCoef) <- c("Coefficients")
kable(modelCoef, caption = "Weir Gulch Data Coefficients")
Weir Gulch Data Coefficients
StormRegimeFirstFlush StormRegimePeak StormRegimeRecession TrafficVolume
Coefficients 5.681272 9.367205 11.10583 -4e-06
# Dataframe of predictive analytics results
WG_Predictive_Analytics <- data.frame(Copper_Flow_MGS=Weir_Gulch_All_Data$Copper_Cu_Mass_Flow_Rate_MG_S,Predictions=predict(model_wg))
WG_Predictive_Analytics
##    Copper_Flow_MGS Predictions
## 1           0.9864  0.42100530
## 2          17.9145  9.78821059
## 3           6.2710  9.78821059
## 4          12.7885 11.52683654
## 5           0.7873  1.88653223
## 6           7.2038  1.88653223
## 7          15.1554 11.25373752
## 8          11.7307 12.99236346
## 9           0.1364 -0.05236399
## 10          0.1364 -0.05236399
## 11          0.8041  9.31484130
## 12          0.9441  3.11442911
## 13          0.1238  3.11442911
# Quantile-Quantile plot
plot(model_wg, which = 2)

# Correlate Weir Gulch data, (Graph & Spreadsheet)
wg_corr_variables <- Weir_Gulch_All_Data[c(3,16,17,18,19,20,21,
                                           22,23,24,25,26)]
wg_corr_variables$Storm_Regime <- as.numeric(wg_corr_variables$Storm_Regime)
wg_cor <- cor(wg_corr_variables[, unlist(lapply(wg_corr_variables, is.numeric))])
corrplot(wg_cor, tl.cex = 0.6)

kable(wg_cor, caption = "Weir Gulch Data Correlations")
Weir Gulch Data Correlations
Storm_Regime Total_Ant_Traffic_Volume Copper_Cu_Mass_Flow_Rate_MG_S Iron_Fe_Mass_Flow_Rate_MG_S Nickel_Ni_Mass_Flow_Rate_MG_S Lead_Pb_Mass_Flow_Rate_MG_S Zinc_Zn_Mass_Flow_Rate_MG_S Copper_Cu_Concentration_MG_L Iron_Fe_Concentration_MG_L Nickel_Ni_Concentration_MG_L Lead_Pb_Concentration_MG_L Zinc_Zn_Concentration_MG_L
Storm_Regime 1.0000000 0.1976328 0.7138477 0.7335943 0.6301980 0.7328496 0.6371892 0.6667170 0.7581214 0.5504145 0.6425996 0.5672885
Total_Ant_Traffic_Volume 0.1976328 1.0000000 0.0350532 -0.0641429 0.2186792 0.1010966 0.1346029 0.2305220 0.1579158 0.3357514 0.2344433 0.2939717
Copper_Cu_Mass_Flow_Rate_MG_S 0.7138477 0.0350532 1.0000000 0.9512056 0.9175412 0.9702144 0.9647209 0.8791949 0.9416285 0.7708087 0.8460783 0.8026019
Iron_Fe_Mass_Flow_Rate_MG_S 0.7335943 -0.0641429 0.9512056 1.0000000 0.7796665 0.9115418 0.8481161 0.7224315 0.8589503 0.5757091 0.6930157 0.6149260
Nickel_Ni_Mass_Flow_Rate_MG_S 0.6301980 0.2186792 0.9175412 0.7796665 1.0000000 0.9535039 0.9769103 0.9729884 0.9542452 0.9557490 0.9649794 0.9463375
Lead_Pb_Mass_Flow_Rate_MG_S 0.7328496 0.1010966 0.9702144 0.9115418 0.9535039 1.0000000 0.9701545 0.9299959 0.9825406 0.8497929 0.9250540 0.8704953
Zinc_Zn_Mass_Flow_Rate_MG_S 0.6371892 0.1346029 0.9647209 0.8481161 0.9769103 0.9701545 1.0000000 0.9593636 0.9625910 0.8957189 0.9368054 0.9243891
Copper_Cu_Concentration_MG_L 0.6667170 0.2305220 0.8791949 0.7224315 0.9729884 0.9299959 0.9593636 1.0000000 0.9641117 0.9660339 0.9891852 0.9824587
Iron_Fe_Concentration_MG_L 0.7581214 0.1579158 0.9416285 0.8589503 0.9542452 0.9825406 0.9625910 0.9641117 1.0000000 0.8874468 0.9566519 0.9136763
Nickel_Ni_Concentration_MG_L 0.5504145 0.3357514 0.7708087 0.5757091 0.9557490 0.8497929 0.8957189 0.9660339 0.8874468 1.0000000 0.9718143 0.9786231
Lead_Pb_Concentration_MG_L 0.6425996 0.2344433 0.8460783 0.6930157 0.9649794 0.9250540 0.9368054 0.9891852 0.9566519 0.9718143 1.0000000 0.9779869
Zinc_Zn_Concentration_MG_L 0.5672885 0.2939717 0.8026019 0.6149260 0.9463375 0.8704953 0.9243891 0.9824587 0.9136763 0.9786231 0.9779869 1.0000000



Sanderson Gulch R^2, Coefficients, and Correlation



# Sanderson Gulch Pollutants Linear Modeling
set.seed(123)
model_sg <- lm(Copper_Cu_Mass_Flow_Rate_MG_S~
                 Storm_Regime+Total_Ant_Traffic_Volume,
               Sanderson_Gulch_All_Data)

# Find R Squared Value
kable(summary(model_sg)$r.squared, caption ="R^2 Value of Weir Gulch Linear Regression")
R^2 Value of Weir Gulch Linear Regression
x
0.200325
# Dataframe of linear modeling coefficients
StormRegimeFirstFlush <- coef(model_sg)[1]
StormRegimePeak <- coef(model_sg)[2]
StormRegimeRecession <- coef(model_sg)[3]
TrafficVolume <- coef(model_sg)[4]
modelCoef <- data.frame("StormRegimeFirstFlush"= StormRegimeFirstFlush,"StormRegimePeak"=StormRegimePeak,
                      "StormRegimeRecession"=StormRegimeRecession,
                        "TrafficVolume"=TrafficVolume)
row.names(modelCoef) <- c("Coefficients")
kable(modelCoef, caption = "Weir Gulch Data Coefficients")
Weir Gulch Data Coefficients
StormRegimeFirstFlush StormRegimePeak StormRegimeRecession TrafficVolume
Coefficients 123.2414 -0.5612102 -33.22621 -0.0001737
# Dataframe of predictive analytics results
SG_Predictive_Analytics <- data.frame(Copper_Flow_MGS=Sanderson_Gulch_All_Data$Copper_Cu_Mass_Flow_Rate_MG_S,Predictions=predict(model_sg))
SG_Predictive_Analytics
##    Copper_Flow_MGS Predictions
## 1           0.2297   46.114367
## 2           0.2100   46.114367
## 3           0.2012   46.114367
## 4           0.3310   45.553157
## 5           0.3602   45.553157
## 6           0.1291   12.888161
## 7           0.0952   12.888161
## 8         192.4344   53.936672
## 9          82.8360   53.936672
## 10         41.2056   53.936672
## 11         45.8784   53.936672
## 12          8.7084   53.375462
## 13        106.6248   53.375462
## 14        142.3080   53.375462
## 15        134.6616   53.375462
## 16          0.2230    9.128509
## 17          0.4710    8.567298
## 18          0.3599  -24.097698
## 19          0.2347  -24.097698
## 20         19.7879   58.783769
## 21         17.4901   58.783769
## 22          2.8808   26.118773
# Quantile-Quantile plot
plot(model_sg, which = 2)

# Correlate Sanderson Gulch data, (Graph & Spreadsheet)
sg_corr_variables <- Sanderson_Gulch_All_Data[c(3,16,17,18,19,20,21,22,23,24,25,26)]
sg_corr_variables$Storm_Regime <- as.numeric(sg_corr_variables$Storm_Regime)
sg_cor <- cor(sg_corr_variables[, unlist(lapply(sg_corr_variables, is.numeric))])
corrplot(sg_cor, tl.cex = 0.6)

kable(sg_cor, caption = "Sanderson Gulch Data Correlations")
Sanderson Gulch Data Correlations
Storm_Regime Total_Ant_Traffic_Volume Copper_Cu_Mass_Flow_Rate_MG_S Iron_Fe_Mass_Flow_Rate_MG_S Nickel_Ni_Mass_Flow_Rate_MG_S Lead_Pb_Mass_Flow_Rate_MG_S Zinc_Zn_Mass_Flow_Rate_MG_S Copper_Cu_Concentration_MG_L Iron_Fe_Concentration_MG_L Nickel_Ni_Concentration_MG_L Lead_Pb_Concentration_MG_L Zinc_Zn_Concentration_MG_L
Storm_Regime 1.0000000 0.2159889 -0.2684140 -0.3978584 -0.1504427 -0.3762634 -0.2943384 0.1017045 0.1248655 0.2793162 0.0660914 0.2766469
Total_Ant_Traffic_Volume 0.2159889 1.0000000 -0.3821202 -0.3268307 -0.3501923 -0.3282053 -0.3646734 0.0022290 -0.1157265 0.0173602 -0.0948337 0.0777648
Copper_Cu_Mass_Flow_Rate_MG_S -0.2684140 -0.3821202 1.0000000 0.7809311 0.9637244 0.8112705 0.9614060 0.0038870 -0.0567177 -0.2859930 -0.0700763 -0.1159839
Iron_Fe_Mass_Flow_Rate_MG_S -0.3978584 -0.3268307 0.7809311 1.0000000 0.6476954 0.9742852 0.9010885 0.0615363 0.1637237 -0.2382660 0.1052219 0.0339256
Nickel_Ni_Mass_Flow_Rate_MG_S -0.1504427 -0.3501923 0.9637244 0.6476954 1.0000000 0.7025833 0.8948126 0.0466638 -0.0624985 -0.2048496 -0.0683493 -0.0860492
Lead_Pb_Mass_Flow_Rate_MG_S -0.3762634 -0.3282053 0.8112705 0.9742852 0.7025833 1.0000000 0.9116081 0.0115043 0.0906768 -0.2655889 0.0728974 -0.0296397
Zinc_Zn_Mass_Flow_Rate_MG_S -0.2943384 -0.3646734 0.9614060 0.9010885 0.8948126 0.9116081 1.0000000 0.0711508 0.0592867 -0.2374345 0.0217660 -0.0135210
Copper_Cu_Concentration_MG_L 0.1017045 0.0022290 0.0038870 0.0615363 0.0466638 0.0115043 0.0711508 1.0000000 0.8176381 0.6345659 0.6849425 0.9260198
Iron_Fe_Concentration_MG_L 0.1248655 -0.1157265 -0.0567177 0.1637237 -0.0624985 0.0906768 0.0592867 0.8176381 1.0000000 0.4274688 0.5516646 0.8753224
Nickel_Ni_Concentration_MG_L 0.2793162 0.0173602 -0.2859930 -0.2382660 -0.2048496 -0.2655889 -0.2374345 0.6345659 0.4274688 1.0000000 0.6550923 0.6012223
Lead_Pb_Concentration_MG_L 0.0660914 -0.0948337 -0.0700763 0.1052219 -0.0683493 0.0728974 0.0217660 0.6849425 0.5516646 0.6550923 1.0000000 0.6992236
Zinc_Zn_Concentration_MG_L 0.2766469 0.0777648 -0.1159839 0.0339256 -0.0860492 -0.0296397 -0.0135210 0.9260198 0.8753224 0.6012223 0.6992236 1.0000000