This document develops a correlation model of the total traffic from a storm event - through the entire dry period - to the monitoring event, and the different pollutant mass loads.
3 final data tables were formatted with the following data:
The dataset variables, “storm regime”, “total traffic volume”, and 5 pollutants (mass flow, & concentration) were linear modeled and correlated, for the three locations.
R-squared accuracy of the linear models were calculated.
Graphs and spreadsheets were created of the correlations for Lakewood Gulch, Weir Gulch, and Sanderson Gulch.
# setwd("C:/Users/...")
library(data.table)
library(readxl)
library(corrplot)
library(tidyverse)
library(VIM)
library(knitr)
# Load final dataframes
Lakewood_Gulch_All_Data <- readRDS("Lakewood_Gulch_All_Data.rds")
Weir_Gulch_All_Data <- readRDS("Weir_Gulch_All_Data.rds")
Sanderson_Gulch_All_Data <- readRDS("Sanderson_Gulch_All_Data.rds")
# Lakewood Gulch Pollutants Linear Modeling
set.seed(123)
model_lg <- lm(Copper_Cu_Mass_Flow_Rate_MG_S~
Storm_Regime+Total_Ant_Traffic_Volume,
Lakewood_Gulch_All_Data)
# Find R Squared Value
# The coefficient of determination (commonly denoted R2)
# is the proportion of the variance in the response variable
# that can be explained by the explanatory variables in a
# regression model.
# The R-squared of the Lakewood Gulch model turns out to be 0.5695.
# This means that 56.95% of the variation in the copper mass flow rate
# can be explained by the storm regime and the cumulative traffic volume.
# R-squared values range from 0 through 1. A value of 1
# indicates that the explanatory variables has 100% correlation with
# the response variable. A value of 0 indicates that
# the explanatory variables is independent of the response variable.
kable(summary(model_lg)$r.squared, caption ="R^2 Value of Lakewood Gulch Linear Regression")
| x |
|---|
| 0.5695737 |
# Dataframe of linear modeling coefficients
StormRegimeFirstFlush <- coef(model_lg)[1]
StormRegimePeak <- coef(model_lg)[2]
StormRegimeRecession <- coef(model_lg)[3]
TrafficVolume <- coef(model_lg)[4]
modelCoef <- data.frame("StormRegimeFirstFlush"= StormRegimeFirstFlush,"StormRegimePeak"=StormRegimePeak,
"StormRegimeRecession"=StormRegimeRecession,
"TrafficVolume"=TrafficVolume)
row.names(modelCoef) <- c("Coefficients")
kable(modelCoef, caption = "Lakewood Gulch Data Coefficients")
| StormRegimeFirstFlush | StormRegimePeak | StormRegimeRecession | TrafficVolume | |
|---|---|---|---|---|
| Coefficients | 3.576138 | 9.112522 | 11.05698 | -2.5e-06 |
# Dataframe of predictive analytics results
LG_Predictive_Analytics <- data.frame(Copper_Flow_MGS=Lakewood_Gulch_All_Data$Copper_Cu_Mass_Flow_Rate_MG_S,Predictions=predict(model_lg))
LG_Predictive_Analytics
## Copper_Flow_MGS Predictions
## 1 0.9864 0.3602008
## 2 17.9145 9.4727229
## 3 6.2710 9.4727229
## 4 12.7885 11.4171834
## 5 0.7873 2.0450339
## 6 7.2038 2.0450339
## 7 15.1554 11.1575560
## 8 11.7307 13.1020166
## 9 0.1364 0.9294761
## 10 0.1364 0.9294761
## 11 0.8041 10.0419982
## 12 0.9441 2.0044896
## 13 0.1238 2.0044896
# Quantile-Quantile plot
plot(model_lg, which = 2)
# Correlate Lakewood Gulch data, (Graph & Spreadsheet)
lg_corr_variables <- Lakewood_Gulch_All_Data[c(3,16,17,18,19,20,21,
22,23,24,25,26)]
lg_corr_variables$Storm_Regime <- as.numeric(lg_corr_variables$Storm_Regime)
lg_cor <- cor(lg_corr_variables[, unlist(lapply(lg_corr_variables, is.numeric))])
corrplot(lg_cor, tl.cex = 0.6)
kable(lg_cor, caption = "Lakewood Gulch Data Correlations")
| Storm_Regime | Total_Ant_Traffic_Volume | Copper_Cu_Mass_Flow_Rate_MG_S | Iron_Fe_Mass_Flow_Rate_MG_S | Nickel_Ni_Mass_Flow_Rate_MG_S | Lead_Pb_Mass_Flow_Rate_MG_S | Zinc_Zn_Mass_Flow_Rate_MG_S | Copper_Cu_Concentration_MG_L | Iron_Fe_Concentration_MG_L | Nickel_Ni_Concentration_MG_L | Lead_Pb_Concentration_MG_L | Zinc_Zn_Concentration_MG_L | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Storm_Regime | 1.0000000 | 0.2248380 | 0.7138477 | 0.7335943 | 0.6301980 | 0.7328496 | 0.6371892 | 0.6667170 | 0.7581214 | 0.5504145 | 0.6425996 | 0.5672885 |
| Total_Ant_Traffic_Volume | 0.2248380 | 1.0000000 | 0.1083048 | -0.0714368 | 0.4148306 | 0.2193400 | 0.2668855 | 0.4312068 | 0.2947854 | 0.5899354 | 0.4493593 | 0.4988031 |
| Copper_Cu_Mass_Flow_Rate_MG_S | 0.7138477 | 0.1083048 | 1.0000000 | 0.9512056 | 0.9175412 | 0.9702144 | 0.9647209 | 0.8791949 | 0.9416285 | 0.7708087 | 0.8460783 | 0.8026019 |
| Iron_Fe_Mass_Flow_Rate_MG_S | 0.7335943 | -0.0714368 | 0.9512056 | 1.0000000 | 0.7796665 | 0.9115418 | 0.8481161 | 0.7224315 | 0.8589503 | 0.5757091 | 0.6930157 | 0.6149260 |
| Nickel_Ni_Mass_Flow_Rate_MG_S | 0.6301980 | 0.4148306 | 0.9175412 | 0.7796665 | 1.0000000 | 0.9535039 | 0.9769103 | 0.9729884 | 0.9542452 | 0.9557490 | 0.9649794 | 0.9463375 |
| Lead_Pb_Mass_Flow_Rate_MG_S | 0.7328496 | 0.2193400 | 0.9702144 | 0.9115418 | 0.9535039 | 1.0000000 | 0.9701545 | 0.9299959 | 0.9825406 | 0.8497929 | 0.9250540 | 0.8704953 |
| Zinc_Zn_Mass_Flow_Rate_MG_S | 0.6371892 | 0.2668855 | 0.9647209 | 0.8481161 | 0.9769103 | 0.9701545 | 1.0000000 | 0.9593636 | 0.9625910 | 0.8957189 | 0.9368054 | 0.9243891 |
| Copper_Cu_Concentration_MG_L | 0.6667170 | 0.4312068 | 0.8791949 | 0.7224315 | 0.9729884 | 0.9299959 | 0.9593636 | 1.0000000 | 0.9641117 | 0.9660339 | 0.9891852 | 0.9824587 |
| Iron_Fe_Concentration_MG_L | 0.7581214 | 0.2947854 | 0.9416285 | 0.8589503 | 0.9542452 | 0.9825406 | 0.9625910 | 0.9641117 | 1.0000000 | 0.8874468 | 0.9566519 | 0.9136763 |
| Nickel_Ni_Concentration_MG_L | 0.5504145 | 0.5899354 | 0.7708087 | 0.5757091 | 0.9557490 | 0.8497929 | 0.8957189 | 0.9660339 | 0.8874468 | 1.0000000 | 0.9718143 | 0.9786231 |
| Lead_Pb_Concentration_MG_L | 0.6425996 | 0.4493593 | 0.8460783 | 0.6930157 | 0.9649794 | 0.9250540 | 0.9368054 | 0.9891852 | 0.9566519 | 0.9718143 | 1.0000000 | 0.9779869 |
| Zinc_Zn_Concentration_MG_L | 0.5672885 | 0.4988031 | 0.8026019 | 0.6149260 | 0.9463375 | 0.8704953 | 0.9243891 | 0.9824587 | 0.9136763 | 0.9786231 | 0.9779869 | 1.0000000 |
# Weir Gulch Pollutants Linear Modeling
set.seed(123)
model_wg <- lm(Copper_Cu_Mass_Flow_Rate_MG_S~
Storm_Regime+Total_Ant_Traffic_Volume,
Weir_Gulch_All_Data)
# Find R Squared Value
kable(summary(model_wg)$r.squared, caption ="R^2 Value of Weir Gulch Linear Regression")
| x |
|---|
| 0.5857899 |
# Dataframe of linear modeling coefficients
StormRegimeFirstFlush <- coef(model_wg)[1]
StormRegimePeak <- coef(model_wg)[2]
StormRegimeRecession <- coef(model_wg)[3]
TrafficVolume <- coef(model_wg)[4]
modelCoef <- data.frame("StormRegimeFirstFlush"= StormRegimeFirstFlush,"StormRegimePeak"=StormRegimePeak,
"StormRegimeRecession"=StormRegimeRecession,
"TrafficVolume"=TrafficVolume)
row.names(modelCoef) <- c("Coefficients")
kable(modelCoef, caption = "Weir Gulch Data Coefficients")
| StormRegimeFirstFlush | StormRegimePeak | StormRegimeRecession | TrafficVolume | |
|---|---|---|---|---|
| Coefficients | 5.681272 | 9.367205 | 11.10583 | -4e-06 |
# Dataframe of predictive analytics results
WG_Predictive_Analytics <- data.frame(Copper_Flow_MGS=Weir_Gulch_All_Data$Copper_Cu_Mass_Flow_Rate_MG_S,Predictions=predict(model_wg))
WG_Predictive_Analytics
## Copper_Flow_MGS Predictions
## 1 0.9864 0.42100530
## 2 17.9145 9.78821059
## 3 6.2710 9.78821059
## 4 12.7885 11.52683654
## 5 0.7873 1.88653223
## 6 7.2038 1.88653223
## 7 15.1554 11.25373752
## 8 11.7307 12.99236346
## 9 0.1364 -0.05236399
## 10 0.1364 -0.05236399
## 11 0.8041 9.31484130
## 12 0.9441 3.11442911
## 13 0.1238 3.11442911
# Quantile-Quantile plot
plot(model_wg, which = 2)
# Correlate Weir Gulch data, (Graph & Spreadsheet)
wg_corr_variables <- Weir_Gulch_All_Data[c(3,16,17,18,19,20,21,
22,23,24,25,26)]
wg_corr_variables$Storm_Regime <- as.numeric(wg_corr_variables$Storm_Regime)
wg_cor <- cor(wg_corr_variables[, unlist(lapply(wg_corr_variables, is.numeric))])
corrplot(wg_cor, tl.cex = 0.6)
kable(wg_cor, caption = "Weir Gulch Data Correlations")
| Storm_Regime | Total_Ant_Traffic_Volume | Copper_Cu_Mass_Flow_Rate_MG_S | Iron_Fe_Mass_Flow_Rate_MG_S | Nickel_Ni_Mass_Flow_Rate_MG_S | Lead_Pb_Mass_Flow_Rate_MG_S | Zinc_Zn_Mass_Flow_Rate_MG_S | Copper_Cu_Concentration_MG_L | Iron_Fe_Concentration_MG_L | Nickel_Ni_Concentration_MG_L | Lead_Pb_Concentration_MG_L | Zinc_Zn_Concentration_MG_L | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Storm_Regime | 1.0000000 | 0.1976328 | 0.7138477 | 0.7335943 | 0.6301980 | 0.7328496 | 0.6371892 | 0.6667170 | 0.7581214 | 0.5504145 | 0.6425996 | 0.5672885 |
| Total_Ant_Traffic_Volume | 0.1976328 | 1.0000000 | 0.0350532 | -0.0641429 | 0.2186792 | 0.1010966 | 0.1346029 | 0.2305220 | 0.1579158 | 0.3357514 | 0.2344433 | 0.2939717 |
| Copper_Cu_Mass_Flow_Rate_MG_S | 0.7138477 | 0.0350532 | 1.0000000 | 0.9512056 | 0.9175412 | 0.9702144 | 0.9647209 | 0.8791949 | 0.9416285 | 0.7708087 | 0.8460783 | 0.8026019 |
| Iron_Fe_Mass_Flow_Rate_MG_S | 0.7335943 | -0.0641429 | 0.9512056 | 1.0000000 | 0.7796665 | 0.9115418 | 0.8481161 | 0.7224315 | 0.8589503 | 0.5757091 | 0.6930157 | 0.6149260 |
| Nickel_Ni_Mass_Flow_Rate_MG_S | 0.6301980 | 0.2186792 | 0.9175412 | 0.7796665 | 1.0000000 | 0.9535039 | 0.9769103 | 0.9729884 | 0.9542452 | 0.9557490 | 0.9649794 | 0.9463375 |
| Lead_Pb_Mass_Flow_Rate_MG_S | 0.7328496 | 0.1010966 | 0.9702144 | 0.9115418 | 0.9535039 | 1.0000000 | 0.9701545 | 0.9299959 | 0.9825406 | 0.8497929 | 0.9250540 | 0.8704953 |
| Zinc_Zn_Mass_Flow_Rate_MG_S | 0.6371892 | 0.1346029 | 0.9647209 | 0.8481161 | 0.9769103 | 0.9701545 | 1.0000000 | 0.9593636 | 0.9625910 | 0.8957189 | 0.9368054 | 0.9243891 |
| Copper_Cu_Concentration_MG_L | 0.6667170 | 0.2305220 | 0.8791949 | 0.7224315 | 0.9729884 | 0.9299959 | 0.9593636 | 1.0000000 | 0.9641117 | 0.9660339 | 0.9891852 | 0.9824587 |
| Iron_Fe_Concentration_MG_L | 0.7581214 | 0.1579158 | 0.9416285 | 0.8589503 | 0.9542452 | 0.9825406 | 0.9625910 | 0.9641117 | 1.0000000 | 0.8874468 | 0.9566519 | 0.9136763 |
| Nickel_Ni_Concentration_MG_L | 0.5504145 | 0.3357514 | 0.7708087 | 0.5757091 | 0.9557490 | 0.8497929 | 0.8957189 | 0.9660339 | 0.8874468 | 1.0000000 | 0.9718143 | 0.9786231 |
| Lead_Pb_Concentration_MG_L | 0.6425996 | 0.2344433 | 0.8460783 | 0.6930157 | 0.9649794 | 0.9250540 | 0.9368054 | 0.9891852 | 0.9566519 | 0.9718143 | 1.0000000 | 0.9779869 |
| Zinc_Zn_Concentration_MG_L | 0.5672885 | 0.2939717 | 0.8026019 | 0.6149260 | 0.9463375 | 0.8704953 | 0.9243891 | 0.9824587 | 0.9136763 | 0.9786231 | 0.9779869 | 1.0000000 |
# Sanderson Gulch Pollutants Linear Modeling
set.seed(123)
model_sg <- lm(Copper_Cu_Mass_Flow_Rate_MG_S~
Storm_Regime+Total_Ant_Traffic_Volume,
Sanderson_Gulch_All_Data)
# Find R Squared Value
kable(summary(model_sg)$r.squared, caption ="R^2 Value of Weir Gulch Linear Regression")
| x |
|---|
| 0.200325 |
# Dataframe of linear modeling coefficients
StormRegimeFirstFlush <- coef(model_sg)[1]
StormRegimePeak <- coef(model_sg)[2]
StormRegimeRecession <- coef(model_sg)[3]
TrafficVolume <- coef(model_sg)[4]
modelCoef <- data.frame("StormRegimeFirstFlush"= StormRegimeFirstFlush,"StormRegimePeak"=StormRegimePeak,
"StormRegimeRecession"=StormRegimeRecession,
"TrafficVolume"=TrafficVolume)
row.names(modelCoef) <- c("Coefficients")
kable(modelCoef, caption = "Weir Gulch Data Coefficients")
| StormRegimeFirstFlush | StormRegimePeak | StormRegimeRecession | TrafficVolume | |
|---|---|---|---|---|
| Coefficients | 123.2414 | -0.5612102 | -33.22621 | -0.0001737 |
# Dataframe of predictive analytics results
SG_Predictive_Analytics <- data.frame(Copper_Flow_MGS=Sanderson_Gulch_All_Data$Copper_Cu_Mass_Flow_Rate_MG_S,Predictions=predict(model_sg))
SG_Predictive_Analytics
## Copper_Flow_MGS Predictions
## 1 0.2297 46.114367
## 2 0.2100 46.114367
## 3 0.2012 46.114367
## 4 0.3310 45.553157
## 5 0.3602 45.553157
## 6 0.1291 12.888161
## 7 0.0952 12.888161
## 8 192.4344 53.936672
## 9 82.8360 53.936672
## 10 41.2056 53.936672
## 11 45.8784 53.936672
## 12 8.7084 53.375462
## 13 106.6248 53.375462
## 14 142.3080 53.375462
## 15 134.6616 53.375462
## 16 0.2230 9.128509
## 17 0.4710 8.567298
## 18 0.3599 -24.097698
## 19 0.2347 -24.097698
## 20 19.7879 58.783769
## 21 17.4901 58.783769
## 22 2.8808 26.118773
# Quantile-Quantile plot
plot(model_sg, which = 2)
# Correlate Sanderson Gulch data, (Graph & Spreadsheet)
sg_corr_variables <- Sanderson_Gulch_All_Data[c(3,16,17,18,19,20,21,22,23,24,25,26)]
sg_corr_variables$Storm_Regime <- as.numeric(sg_corr_variables$Storm_Regime)
sg_cor <- cor(sg_corr_variables[, unlist(lapply(sg_corr_variables, is.numeric))])
corrplot(sg_cor, tl.cex = 0.6)
kable(sg_cor, caption = "Sanderson Gulch Data Correlations")
| Storm_Regime | Total_Ant_Traffic_Volume | Copper_Cu_Mass_Flow_Rate_MG_S | Iron_Fe_Mass_Flow_Rate_MG_S | Nickel_Ni_Mass_Flow_Rate_MG_S | Lead_Pb_Mass_Flow_Rate_MG_S | Zinc_Zn_Mass_Flow_Rate_MG_S | Copper_Cu_Concentration_MG_L | Iron_Fe_Concentration_MG_L | Nickel_Ni_Concentration_MG_L | Lead_Pb_Concentration_MG_L | Zinc_Zn_Concentration_MG_L | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Storm_Regime | 1.0000000 | 0.2159889 | -0.2684140 | -0.3978584 | -0.1504427 | -0.3762634 | -0.2943384 | 0.1017045 | 0.1248655 | 0.2793162 | 0.0660914 | 0.2766469 |
| Total_Ant_Traffic_Volume | 0.2159889 | 1.0000000 | -0.3821202 | -0.3268307 | -0.3501923 | -0.3282053 | -0.3646734 | 0.0022290 | -0.1157265 | 0.0173602 | -0.0948337 | 0.0777648 |
| Copper_Cu_Mass_Flow_Rate_MG_S | -0.2684140 | -0.3821202 | 1.0000000 | 0.7809311 | 0.9637244 | 0.8112705 | 0.9614060 | 0.0038870 | -0.0567177 | -0.2859930 | -0.0700763 | -0.1159839 |
| Iron_Fe_Mass_Flow_Rate_MG_S | -0.3978584 | -0.3268307 | 0.7809311 | 1.0000000 | 0.6476954 | 0.9742852 | 0.9010885 | 0.0615363 | 0.1637237 | -0.2382660 | 0.1052219 | 0.0339256 |
| Nickel_Ni_Mass_Flow_Rate_MG_S | -0.1504427 | -0.3501923 | 0.9637244 | 0.6476954 | 1.0000000 | 0.7025833 | 0.8948126 | 0.0466638 | -0.0624985 | -0.2048496 | -0.0683493 | -0.0860492 |
| Lead_Pb_Mass_Flow_Rate_MG_S | -0.3762634 | -0.3282053 | 0.8112705 | 0.9742852 | 0.7025833 | 1.0000000 | 0.9116081 | 0.0115043 | 0.0906768 | -0.2655889 | 0.0728974 | -0.0296397 |
| Zinc_Zn_Mass_Flow_Rate_MG_S | -0.2943384 | -0.3646734 | 0.9614060 | 0.9010885 | 0.8948126 | 0.9116081 | 1.0000000 | 0.0711508 | 0.0592867 | -0.2374345 | 0.0217660 | -0.0135210 |
| Copper_Cu_Concentration_MG_L | 0.1017045 | 0.0022290 | 0.0038870 | 0.0615363 | 0.0466638 | 0.0115043 | 0.0711508 | 1.0000000 | 0.8176381 | 0.6345659 | 0.6849425 | 0.9260198 |
| Iron_Fe_Concentration_MG_L | 0.1248655 | -0.1157265 | -0.0567177 | 0.1637237 | -0.0624985 | 0.0906768 | 0.0592867 | 0.8176381 | 1.0000000 | 0.4274688 | 0.5516646 | 0.8753224 |
| Nickel_Ni_Concentration_MG_L | 0.2793162 | 0.0173602 | -0.2859930 | -0.2382660 | -0.2048496 | -0.2655889 | -0.2374345 | 0.6345659 | 0.4274688 | 1.0000000 | 0.6550923 | 0.6012223 |
| Lead_Pb_Concentration_MG_L | 0.0660914 | -0.0948337 | -0.0700763 | 0.1052219 | -0.0683493 | 0.0728974 | 0.0217660 | 0.6849425 | 0.5516646 | 0.6550923 | 1.0000000 | 0.6992236 |
| Zinc_Zn_Concentration_MG_L | 0.2766469 | 0.0777648 | -0.1159839 | 0.0339256 | -0.0860492 | -0.0296397 | -0.0135210 | 0.9260198 | 0.8753224 | 0.6012223 | 0.6992236 | 1.0000000 |