Linear Regression and Time Series Model_MW for 1,4 DiOxane

Author

Callistus Obunadike

setwd("C:/Users/obunadic8159/OneDrive - ARCADIS/Desktop/Data_Analysis_11DCE_14DOX/Recent_New_Data/MW")
#Load necessary libraries
#install.packages("car")
#install.packages("corrplot")
#install.packages("MASS")
#install.packages("lmtest")
library(MASS)
library(dplyr)
library(ggplot2)
library(caret)
library(car)
library(corrplot)
library(e1071)
library(forecast)
library(lubridate)
library(lmtest)

Importing the Data from our directory

# Load the dataset
data <- read.csv("C:/Users/obunadic8159/OneDrive - ARCADIS/Desktop/Data_Analysis_11DCE_14DOX/Recent_New_Data/MW/14DOX.csv")

Visualizing the Data Type and Structure

##################Checking the Data Type and Structure ###############
head(data)
  SiteName       Date Quarter X14DOX
1    DMW13 11/30/2001      Q4      0
2    DMW13  5/29/2002      Q2      0
3    DMW13  11/6/2002      Q4      0
4    DMW13  9/23/2003      Q3      0
5    DMW13  5/20/2004      Q2      0
6    DMW13  4/27/2005      Q2      0
str(data)
'data.frame':   1247 obs. of  4 variables:
 $ SiteName: chr  "DMW13" "DMW13" "DMW13" "DMW13" ...
 $ Date    : chr  "11/30/2001" "5/29/2002" "11/6/2002" "9/23/2003" ...
 $ Quarter : chr  "Q4" "Q2" "Q4" "Q3" ...
 $ X14DOX  : num  0 0 0 0 0 0 0 0 0 0 ...

Convert Date to Date format and factorizing Site Name

# Convert Date to Date format and factorize SiteName
data$Date <- as.Date(data$Date, format="%m/%d/%Y")
data$SiteName <- as.factor(data$SiteName)

Checking for Missing data across the data using For-Loop

#### Handling Missing Data
vnames <- colnames(data)
n <- nrow(data)
out <- NULL
for (j in 1:ncol(data)){
  vname <- colnames(data)[j]
  x <- as.vector(data[,j])
  n1 <- sum(is.na(x), na.rm=TRUE)  # NA
  n2 <- sum(x=="NA", na.rm=TRUE) # "NA"
  n3 <- sum(x==" ", na.rm=TRUE)  # missing
  nmiss <- n1 + n2 + n3
  nmiss <- sum(is.na(x))
  ncomplete <- n-nmiss
  out <- rbind(out, c(col.num=j, v.name=vname, mode=mode(x),
                      n.level=length(unique(x)),
                      ncom=ncomplete, nmiss= nmiss, miss.prop=nmiss/n))
}
out <- as.data.frame(out)
row.names(out) <- NULL
out
  col.num   v.name      mode n.level ncom nmiss miss.prop
1       1 SiteName character      67 1247     0         0
2       2     Date   numeric     173 1247     0         0
3       3  Quarter character       4 1247     0         0
4       4   X14DOX   numeric     432 1247     0         0
#for (j in 1:NCOL(data)){
#  print(head(colnames(data)[j]))
#  #print(head(table(data[,j], useNA="ifany")))
#}

Checking for Outliers using Box-plot

# Check for outliers using boxplots for 14DOX
ggplot(data, aes(x = SiteName, y = X14DOX)) + geom_boxplot() + ggtitle("Boxplot for 14DOX by SiteName")

Calculating and Filtering Outliers in 1,4-DiOxane

# Calculate IQR and detect outliers for 14DOX
Q1_14DOX <- quantile(data$X14DOX, 0.25)
Q3_14DOX <- quantile(data$X14DOX, 0.75)
IQR_14DOX <- Q3_14DOX - Q1_14DOX
outliers_14DOX <- data %>% filter(X14DOX < (Q1_14DOX - 1.5 * IQR_14DOX) | X14DOX > (Q3_14DOX + 1.5 * IQR_14DOX))

There are 201 observations of 1,4DiOxane (14DOX) with outliers. Therefore, it is important to remove them before building the linear regression model and time series forecast.

# Outliers in 14DOX 
# Print outliers
print(head(outliers_14DOX))
  SiteName       Date Quarter X14DOX
1    DMW31 2016-07-20      Q3 0.0108
2    DMW31 2016-10-13      Q4 0.0152
3    DMW31 2017-01-10      Q1 0.0195
4    DMW31 2017-04-23      Q2 0.0176
5    DMW31 2017-07-18      Q3 0.0217
6    DMW31 2018-07-04      Q3 0.0117

Checking for skewness in 1,4-DiOxane

[1] "Skewness for X14DOX: 5.18068968998706"

Interpretation of Skewness Values:

Skewness for 14DOX (5.18):This is also a high positive skewness value. The distribution of 14DOX is positively skewed, with most data points clustered towards the lower end and a long tail to the right. This indicates the presence of some high values. The high skewness values suggest that the data for 14DOX are not normally distributed and are influenced by a few very large values. When working with these variables in analyses, it might be necessary to consider transformations (such as log or square root transformations) to reduce skewness and achieve a more normal distribution, which can be beneficial for certain statistical analyses and models.~

# Visualize distributions
par(mfrow=c(1, 2))
hist(data$X14DOX, main="14DOX", xlab="14DOX", col = 'blue', breaks=05)

Transforming data due to skewness by using log transformation.

# Transform data if skewness is high (optional, example using log transformation)
if(abs(skewness_14DOX) > 1){data$X14DOX <- log1p(data$X14DOX)}

Removing the Outliers for 14DOX.

# Remove outliers (optional, depending on analysis)
data <- data %>% filter(X14DOX >= (Q1_14DOX - 1.5 * IQR_14DOX) & X14DOX <= (Q3_14DOX + 1.5 * IQR_14DOX))

##################Checking the Data Type and Structure ###############
str(data)
'data.frame':   1046 obs. of  4 variables:
 $ SiteName: Factor w/ 67 levels "DMW13","DMW28",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Date    : Date, format: "2001-11-30" "2002-05-29" ...
 $ Quarter : chr  "Q4" "Q2" "Q4" "Q3" ...
 $ X14DOX  : num  0 0 0 0 0 0 0 0 0 0 ...
head(data)
  SiteName       Date Quarter X14DOX
1    DMW13 2001-11-30      Q4      0
2    DMW13 2002-05-29      Q2      0
3    DMW13 2002-11-06      Q4      0
4    DMW13 2003-09-23      Q3      0
5    DMW13 2004-05-20      Q2      0
6    DMW13 2005-04-27      Q2      0

Convert Date to numeric for VIF calculation

data$Date_numeric <- as.numeric(data$Date)

Check for multicollinearity using VIF

# Check for multicollinearity using VIF
# Check for multicollinearity using VIF
vif_data_14DOX <- lm(X14DOX ~ Date_numeric + SiteName, data = data)
vif(vif_data_14DOX)
                 GVIF Df GVIF^(1/(2*Df))
Date_numeric 3.192664  1        1.786803
SiteName     3.192664 65        1.008970

In this case, both Date_numeric and SiteName have GVIF values close to 1, indicating that there is no significant multicollinearity between these predictors in the model. This suggests that the model is not adversely affected by multicollinearity, and the estimates of the regression coefficients should be reliable.

Check correlation matrix for predictor variable (14DOX) and Date

  • 1: Perfect positive correlation (as one variable increases, the other variable also increases).

  • -1: Perfect negative correlation (as one variable increases, the other variable decreases).

  • 0: No linear correlation (the variables do not have a linear relationship).

# Check correlation matrix for predictor and dependent variables
cor_matrix <- cor(data %>% select(X14DOX, Date_numeric))
print(cor_matrix)
               X14DOX Date_numeric
X14DOX       1.000000     0.384752
Date_numeric 0.384752     1.000000

Implications of Correlation Matrix on the Model:

The correlation coefficient between X14DOX and Date_numeric is 0.385 ~ 0.4, which suggests a moderate positive linear relationship between these two variables.

# Visualize the correlation matrix
corrplot(cor_matrix, method = "circle")

Removing the 5th column (Date Numeric) for Linear Regression Model

# Convert Date back to as.Date data type and removing the 5th column
data$Date <- as.Date(data$Date, format="%m/%d/%Y")
head(data)
  SiteName       Date Quarter X14DOX Date_numeric
1    DMW13 2001-11-30      Q4      0        11656
2    DMW13 2002-05-29      Q2      0        11836
3    DMW13 2002-11-06      Q4      0        11997
4    DMW13 2003-09-23      Q3      0        12318
5    DMW13 2004-05-20      Q2      0        12558
6    DMW13 2005-04-27      Q2      0        12900
#colnames(data)
data= data[, -(5)]

Splitting the data into training and testing sets for the Linear Regression Modelling of 14DOX

# Split the data into training and testing sets for X14DOX
set.seed(123)
train_Index <- createDataPartition(data$X14DOX, p = .8, list = FALSE, times = 1)
train_Data <- data[train_Index,]
test_Data  <- data[-train_Index,]
  • This line uses the createDataPartition function from the caret package to create an index for splitting the data.

  • X11DCE: The target variable, which is 1,1 Dicholorethene, is used to ensure that the split maintains the same distribution of this variable in both the training and test sets.

  • p = .8: This specifies that 80% of the data should be used for training. The remaining 20% will be used for testing.

  • list = FALSE: By setting this to FALSE, the function returns the indices as a vector instead of a list.

  • times = 1: This specifies that only one partition should be created.

Using set.seed(123) ensures that every time you run this code, the training and test splits will be the same, allowing for consistent and reproducible results.

Factoring SiteName in testData to Match with the trainData (Categorical Variable)

# Ensure factor levels in test set match training set
test_Data$SiteName <- factor(test_Data$SiteName , levels = levels(train_Data$SiteName))

The above code ensures that the SiteName factor levels in the testData data-set match those in the trainData data-set. This step is crucial when you want to make predictions on the test data using a model trained on the training data, especially when dealing with categorical variables.

Linear Regression Model and Step-Wise Backward Elimination Model Selection

# Model for X14DOX using stepwise backward elimination
full_model_14DOX <- lm(X14DOX ~ Date + SiteName, data = train_Data)
step_model_14DOX <- step(full_model_14DOX, direction = "backward", trace = 0)
  • Purpose: The goal of step-wise model selection is to improve the model by removing predictors that do not contribute significantly to the prediction of the response variable. This can lead to a more parsimonious model that is easier to interpret and may perform better on new data.

  • Backward Elimination: In this specific procedure, predictors are removed one by one based on their statistical significance, starting with the least significant predictor. The process continues until only predictors that contribute meaningfully to the model remain.

Summary and Interpretation of the Linear Regression Model

# Summary of the final model
summary(step_model_14DOX)

Call:
lm(formula = X14DOX ~ Date + SiteName, data = train_Data)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.0048533 -0.0004697 -0.0000301  0.0002664  0.0074529 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)   -3.050e-03  6.136e-04  -4.970 8.26e-07 ***
Date           1.973e-07  3.239e-08   6.093 1.75e-09 ***
SiteNameDMW28 -2.034e-04  5.530e-04  -0.368 0.713124    
SiteNameDMW29 -2.093e-04  5.396e-04  -0.388 0.698211    
SiteNameDMW30  2.757e-03  5.867e-04   4.699 3.10e-06 ***
SiteNameDMW31  3.311e-03  6.342e-04   5.220 2.30e-07 ***
SiteNameDMW32  1.560e-03  5.399e-04   2.890 0.003959 ** 
SiteNameDMW33 -2.100e-04  5.685e-04  -0.369 0.711918    
SiteNameDMW34 -1.674e-04  5.281e-04  -0.317 0.751338    
SiteNameDMW35  9.796e-05  6.077e-04   0.161 0.871971    
SiteNameDMW36  1.628e-03  5.395e-04   3.018 0.002631 ** 
SiteNameDMW37  1.536e-03  5.103e-04   3.009 0.002702 ** 
SiteNameDMW38  1.343e-03  6.334e-04   2.121 0.034256 *  
SiteNameDMW39 -1.255e-05  5.280e-04  -0.024 0.981047    
SiteNameDMW40 -3.440e-05  7.040e-04  -0.049 0.961045    
SiteNameDMW41 -2.178e-04  5.867e-04  -0.371 0.710494    
SiteNameDMW42 -7.897e-05  4.873e-04  -0.162 0.871301    
SiteNameDMW43  2.674e-03  6.080e-04   4.398 1.25e-05 ***
SiteNameDMW44  6.892e-04  5.862e-04   1.176 0.240059    
SiteNameMW01   7.107e-04  5.145e-04   1.381 0.167540    
SiteNameMW02   7.185e-04  4.753e-04   1.512 0.130999    
SiteNameMW03   5.935e-04  4.474e-04   1.327 0.185064    
SiteNameMW04   8.469e-04  4.368e-04   1.939 0.052869 .  
SiteNameMW05   8.729e-04  5.877e-04   1.485 0.137876    
SiteNameMW06   7.342e-04  4.720e-04   1.556 0.120221    
SiteNameMW07   7.474e-04  4.906e-04   1.523 0.128069    
SiteNameMW08   5.551e-04  4.468e-04   1.242 0.214513    
SiteNameMW09   1.240e-03  4.364e-04   2.843 0.004592 ** 
SiteNameMW10   8.018e-04  4.792e-04   1.673 0.094681 .  
SiteNameMW11   6.651e-04  4.551e-04   1.462 0.144257    
SiteNameMW12   8.039e-04  4.932e-04   1.630 0.103568    
SiteNameMW15   1.774e-04  6.075e-04   0.292 0.770376    
SiteNameMW15R  3.694e-03  8.280e-04   4.462 9.34e-06 ***
SiteNameMW16   1.214e-03  5.065e-04   2.397 0.016760 *  
SiteNameMW17   1.519e-04  4.907e-04   0.309 0.757039    
SiteNameMW18   1.473e-03  4.776e-04   3.084 0.002114 ** 
SiteNameMW19  -6.754e-05  5.067e-04  -0.133 0.893983    
SiteNameMW20   2.160e-03  5.079e-04   4.253 2.37e-05 ***
SiteNameMW21  -4.811e-05  5.265e-04  -0.091 0.927223    
SiteNameMW22   2.498e-03  5.682e-04   4.396 1.26e-05 ***
SiteNameMW23  -2.678e-04  6.640e-04  -0.403 0.686764    
SiteNameMW24   8.410e-05  5.095e-04   0.165 0.868937    
SiteNameMW25   1.706e-03  5.092e-04   3.350 0.000848 ***
SiteNameMW26  -2.548e-04  7.036e-04  -0.362 0.717326    
SiteNameMW27   2.478e-03  5.006e-04   4.950 9.14e-07 ***
SiteNameMW45  -2.182e-04  6.336e-04  -0.344 0.730675    
SiteNameMW46   1.141e-03  5.562e-04   2.051 0.040566 *  
SiteNameMW47   1.753e-03  6.657e-04   2.633 0.008641 ** 
SiteNameMW48   3.910e-03  6.111e-04   6.397 2.74e-10 ***
SiteNameMW49  -2.351e-04  7.570e-04  -0.311 0.756178    
SiteNameMW50  -2.101e-04  7.566e-04  -0.278 0.781362    
SiteNameMW51  -2.341e-04  6.346e-04  -0.369 0.712261    
SiteNameMW52   1.745e-03  1.121e-03   1.557 0.119949    
SiteNameMW53   3.411e-03  5.542e-04   6.155 1.21e-09 ***
SiteNameMW54  -2.845e-04  1.548e-03  -0.184 0.854226    
SiteNameMW55   4.081e-03  6.367e-04   6.409 2.55e-10 ***
SiteNameMW57   6.500e-04  1.125e-03   0.578 0.563487    
SiteNameMW58  -2.779e-04  6.676e-04  -0.416 0.677343    
SiteNameMW59   1.149e-03  7.064e-04   1.626 0.104293    
SiteNameMW60   5.067e-04  6.371e-04   0.795 0.426642    
SiteNameMW61   4.592e-03  7.066e-04   6.498 1.46e-10 ***
SiteNameMW62   6.332e-03  1.121e-03   5.648 2.29e-08 ***
SiteNameMW63   4.891e-03  6.679e-04   7.322 6.17e-13 ***
SiteNameSMW30  1.630e-03  5.530e-04   2.947 0.003309 ** 
SiteNameSMW43 -2.687e-04  9.355e-04  -0.287 0.774035    
SiteNameTC01   1.581e-03  6.647e-04   2.379 0.017591 *  
SiteNameTC10  -2.233e-04  9.353e-04  -0.239 0.811366    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.00151 on 770 degrees of freedom
Multiple R-squared:  0.4633,    Adjusted R-squared:  0.4173 
F-statistic: 10.07 on 66 and 770 DF,  p-value: < 2.2e-16
  • Residuals: Min ( -0.0048533), 1Q(-0.0004697), Median(-0.0000301), 3Q(0.0002664) and Max(0.0074529) shows the distribution of the residuals (difference between observed and predicted values). The small values of the residuals indicate that the model’s prediction are close to the observed values.

  • Date: The coefficient for Date is significant at 5% level (p<0.05), suggesting that there is a relationship between Date and X11DCE.

Significant SiteName Coefficients:

The site names that are statistically significant (p-value < 0.05) are:

  • SiteNameDMW30: p=3.10×10−6

  • SiteNameDMW31: p=2.30×10−7

  • SiteNameDMW32: p=0.003959

  • SiteNameDMW36: p=0.002631

  • SiteNameDMW37: p=0.002702

  • SiteNameDMW38: p=0.034256

  • SiteNameDMW43: p=1.25×10−5

  • SiteNameMW09: p=0.004592

  • SiteNameMW16: p=0.016760

  • SiteNameMW18: p=0.002114

  • SiteNameMW20: p=2.37×10−5

  • SiteNameMW22: p=1.26×10−5

  • SiteNameMW25: p=0.000848

  • SiteNameMW27: p=9.14×10−7

  • SiteNameMW46: p=0.040566

  • SiteNameMW47: p=0.008641

  • SiteNameMW48: p=2.74×10−10

  • SiteNameMW53: p=1.21×10−9

  • SiteNameMW55: p=2.55×10−10

  • SiteNameMW61: p=1.46×10−10

  • SiteNameMW62: p=2.29×10−8

  • SiteNameMW63: p=6.17×10−13

  • SiteNameSMW30: p=0.003309

  • SiteNameTC01: p=0.017591

These site names have a statistically significant relationship with the dependent variable (X14DOX). These significant coefficients indicate that the corresponding SiteName levels have a statistically significant effect on X14DOX.

Model Fit Statistics

  • Residual standard error: 0.00151 on 770 degrees of freedom

  • Multiple R-squared (0.4633): This indicates that approximately 46% of the variability in X14DOX is explained by the model.

Identifying High Leverage Points:

Leverage Values: These values indicate how much influence each data point has on the fitted values of the model. High leverage points are those that can potentially have a large impact on the model.

Data Points with High Leverage Points:

# Print high leverage points
#print(high_leverage_points)
print(head(high_leverage_points))
201 203 205 206 207 212 
162 163 164 165 166 167 

Removing the Data with High Leverage Points for Improved Model:

# Remove high leverage points and refit the model if necessary
train_Data_cleaned <- train_Data[-high_leverage_points, ]

Refitting the Model after removing high leverage points:

# Refit the model without high leverage points
full_model_14DOX_cleaned <- lm(X14DOX ~ Date + SiteName, data = train_Data_cleaned)
step_model_14DOX_cleaned <- step(full_model_14DOX_cleaned, direction = "backward", trace = 0)

Summary and Interpretation of the Re-fitted Linear Regression Model

# Summary of the final model without high leverage points
summary(step_model_14DOX_cleaned)

Call:
lm(formula = X14DOX ~ Date + SiteName, data = train_Data_cleaned)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.0041944 -0.0005018 -0.0000373  0.0002673  0.0074534 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)   -3.044e-03  6.119e-04  -4.974 8.17e-07 ***
Date           1.970e-07  3.232e-08   6.095 1.77e-09 ***
SiteNameDMW28 -2.029e-04  5.506e-04  -0.369 0.712606    
SiteNameDMW29 -2.088e-04  5.373e-04  -0.389 0.697649    
SiteNameDMW30  2.757e-03  5.842e-04   4.719 2.83e-06 ***
SiteNameDMW31  3.311e-03  6.315e-04   5.243 2.07e-07 ***
SiteNameDMW32  1.561e-03  5.376e-04   2.903 0.003805 ** 
SiteNameDMW33 -2.095e-04  5.662e-04  -0.370 0.711414    
SiteNameDMW34 -1.669e-04  5.259e-04  -0.317 0.751060    
SiteNameDMW35  9.844e-05  6.051e-04   0.163 0.870810    
SiteNameDMW36  1.628e-03  5.372e-04   3.031 0.002521 ** 
SiteNameDMW37  1.536e-03  5.081e-04   3.023 0.002586 ** 
SiteNameDMW38  1.344e-03  6.307e-04   2.131 0.033459 *  
SiteNameDMW39 -1.206e-05  5.258e-04  -0.023 0.981713    
SiteNameDMW41 -2.173e-04  5.842e-04  -0.372 0.709996    
SiteNameDMW42 -7.831e-05  4.853e-04  -0.161 0.871841    
SiteNameDMW43  2.675e-03  6.055e-04   4.417 1.15e-05 ***
SiteNameDMW44  6.896e-04  5.837e-04   1.181 0.237809    
SiteNameMW01   7.093e-04  5.124e-04   1.384 0.166698    
SiteNameMW02   7.170e-04  4.734e-04   1.515 0.130247    
SiteNameMW03   5.924e-04  4.456e-04   1.329 0.184126    
SiteNameMW04   8.459e-04  4.350e-04   1.945 0.052191 .  
SiteNameMW05   8.712e-04  5.853e-04   1.488 0.137089    
SiteNameMW06   7.327e-04  4.701e-04   1.559 0.119500    
SiteNameMW07   7.459e-04  4.886e-04   1.526 0.127323    
SiteNameMW08   5.540e-04  4.450e-04   1.245 0.213565    
SiteNameMW09   1.240e-03  4.345e-04   2.853 0.004458 ** 
SiteNameMW10   8.002e-04  4.773e-04   1.677 0.094044 .  
SiteNameMW11   6.639e-04  4.532e-04   1.465 0.143373    
SiteNameMW12   8.022e-04  4.913e-04   1.633 0.102897    
SiteNameMW15   1.769e-04  6.050e-04   0.292 0.770026    
SiteNameMW16   1.214e-03  5.044e-04   2.407 0.016325 *  
SiteNameMW17   1.520e-04  4.886e-04   0.311 0.755903    
SiteNameMW18   1.473e-03  4.756e-04   3.097 0.002027 ** 
SiteNameMW19  -6.740e-05  5.045e-04  -0.134 0.893770    
SiteNameMW20   2.161e-03  5.058e-04   4.272 2.20e-05 ***
SiteNameMW21  -4.791e-05  5.243e-04  -0.091 0.927215    
SiteNameMW22   2.498e-03  5.658e-04   4.415 1.16e-05 ***
SiteNameMW23  -2.675e-04  6.612e-04  -0.405 0.685924    
SiteNameMW24   8.472e-05  5.074e-04   0.167 0.867429    
SiteNameMW25   1.706e-03  5.071e-04   3.365 0.000805 ***
SiteNameMW27   2.478e-03  4.985e-04   4.971 8.28e-07 ***
SiteNameMW45  -2.176e-04  6.310e-04  -0.345 0.730254    
SiteNameMW46   1.142e-03  5.538e-04   2.061 0.039613 *  
SiteNameMW47   1.753e-03  6.629e-04   2.645 0.008351 ** 
SiteNameMW48   3.911e-03  6.086e-04   6.426 2.37e-10 ***
SiteNameMW51  -2.335e-04  6.319e-04  -0.369 0.711911    
SiteNameMW53   3.412e-03  5.519e-04   6.182 1.05e-09 ***
SiteNameMW55   4.082e-03  6.340e-04   6.437 2.20e-10 ***
SiteNameMW58  -2.770e-04  6.648e-04  -0.417 0.677022    
SiteNameMW60   5.077e-04  6.344e-04   0.800 0.423875    
SiteNameMW63   4.891e-03  6.652e-04   7.354 5.17e-13 ***
SiteNameSMW30  1.630e-03  5.507e-04   2.960 0.003176 ** 
SiteNameTC01   1.582e-03  6.619e-04   2.390 0.017099 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.001504 on 732 degrees of freedom
Multiple R-squared:  0.4362,    Adjusted R-squared:  0.3954 
F-statistic: 10.69 on 53 and 732 DF,  p-value: < 2.2e-16
  • Residuals: Min (-0.0041944), 1Q(-0.0005018), Median(-0.0000373), 3Q(0.0002673) and Max(0.0074534) shows the distribution of the residuals (difference between observed and predicted values0.0074534). The small values of the residuals indicate that the model’s prediction are close to the observed values.

  • Intercept: The intercept is statistically significant at the 5% level, indicating a small but significant base level of X14DOX when all SiteName factors are zero (though in practice, SiteName factors wouldn’t be zero).

Significant SiteName Coefficients:

Here are the site names that are statistically significant (p-value < 0.05) in your cleaned data model:

  • SiteNameDMW30: p=2.83×10−6
  • SiteNameDMW31: p=2.07×10−7

  • SiteNameDMW32: p=0.003805

  • SiteNameDMW36: p=0.002521

  • SiteNameDMW37: p=0.002586

  • SiteNameDMW38: p=0.033459

  • SiteNameDMW43: p=1.15×10−5

  • SiteNameMW09: p=0.004458

  • SiteNameMW16: p=0.016325

  • SiteNameMW18: p=0.002027

  • SiteNameMW20: p=2.20×10−5

  • SiteNameMW22: p=1.16×10−5

  • SiteNameMW25: p=0.000805

  • SiteNameMW27: p=8.28×10−7

  • SiteNameMW46: p=0.039613

  • SiteNameMW47: p=0.008351

  • SiteNameMW48: p=2.37×10−10

  • SiteNameMW53: p=1.05×10−9

  • SiteNameMW55: p=2.20×10−10

  • SiteNameMW63: p=5.17×10−13

  • SiteNameSMW30: p=0.003176

  • SiteNameTC01: p=0.017099

These significant coefficients indicate that the corresponding SiteName levels have a statistically significant effect on X14DOX.

Model Fit Statistics

  • Residual standard error: 0.001504 on 732 degrees of freedom

  • Multiple R-squared (0.4362): This indicates that approximately 44% of the variability in X14DOXis explained by the model.

Conclusion:

The model explains a significant portion of the variance in X14DOX(R-squared = 44%). Several SiteName levels are significant, indicating that these levels are important for predicting X14DOX. The model has a good fit, but there might be room for improvement or further refinement to increase the explained variance.

Checking for Model Assumption:

# Check for model assumptions
par(mfrow=c(2, 2))
plot(step_model_14DOX_cleaned)

Durbin-Watson test:

The Durbin-Watson test is used to detect the presence of autocorrelation in the residuals of a regression analysis.

# Durbin-Watson test for autocorrelation
dwtest(step_model_14DOX_cleaned)

    Durbin-Watson test

data:  step_model_14DOX_cleaned
DW = 1.2273, p-value < 2.2e-16
alternative hypothesis: true autocorrelation is greater than 0
  • DW = 1.23: The Durbin-Watson statistic value is approximately. The DW statistic ranges from 0 to 4. A value around 2 suggests no autocorrelation. Values less than 2 indicate positive autocorrelation, while values greater than 2 indicate negative autocorrelation.

  • P-value = <2.2e-16: The p-value is <2.2e-16. This value is used to determine the significance of the test result. Typically, a p-value less than 0.05 indicates that the null hypothesis can be rejected with a very high level of confidence.

  • Alternative hypothesis: true autocorrelation is greater than 0: The alternative hypothesis in this test suggests that there is positive autocorrelation in the residuals (i.e., the residuals are positively correlated).

Conclusion

The Durbin-Watson test statistic of 1.23 and the very low p-value suggest that there is significant positive autocorrelation in the residuals of the model step_model_14DOX_cleaned. Positive autocorrelation means that consecutive residuals are correlated with each other, which can indicate that the model might be missing some important variables or that there are patterns in the data that are not captured by the model.

Variance Inflation Factor (VIF) for Multicollinearity

# Variance Inflation Factor (VIF) for multicollinearity
vif(step_model_14DOX_cleaned)
             GVIF Df GVIF^(1/(2*Df))
Date     3.142861  1        1.772811
SiteName 3.142861 52        1.011072

Residual Plots Vs Time or Fitted Values to Visualize Autocorrelation

####Create Plots of Residuals Vs Time or Fitted Values to Visually Inspect the AutoCorrelation

plot(residuals(step_model_14DOX_cleaned), type = "l", main = "Residuals over Time")

acf(residuals(step_model_14DOX_cleaned), main = "ACF of Residuals")

If most autocorrelation coefficients fall within the confidence bands, it indicates that the residuals are roughly uncorrelated. If many coefficients fall outside the confidence bands, especially at low lags, this suggests autocorrelation in the residuals. A random pattern with most points within the confidence bands indicates that the residuals are likely independent. A pattern where many points outside the bands or showing systematic trends (e.g., gradually decreasing) indicates that residuals are autocorrelated.

Factoring SiteName in testData to Match with the Non-High Leverage trainData (Categorical Variable)

# Ensure factor levels in test set match training set

test_Data$SiteName <- factor(test_Data$SiteName , levels = levels(train_Data_cleaned$SiteName))

The above code ensures that the SiteName factor levels in the testData matches those in the Non-High Leverage trainData dataset. This step is crucial when you want to make predictions on the test data using a model trained on the training data, especially when dealing with categorical variables.

Removing Data Points with High Levels for Prediction of Actual Vs Predicted Values

Removing data points with high leverage and influence is important in linear regression modeling to ensure accurate, reliable, and interpretable results. By addressing these points, we can improve the model’s robustness, reduce bias and variance, and achieve better generalization to new data

# Filter out rows in testData that have levels not present in trainData_cleaned
test_Data <- test_Data[!test_Data$SiteName %in% c('DMW40', 'MW15R', 'MW26', 'MW49', 'MW50', 'MW59', 'MW62', 'SMW43', 'TC10'), ]

Result of the Prediction

Having Trained our model to predict X14DOX based on the test-data. We shall now proceed to investigate how efficient our model is. The correlation coefficient of 0.63 indicates a strong positive relationship between the actual and predicted values, suggesting that the model performs well. However, further analysis and refinement can be conducted to improve the model’s accuracy and reliability.

# Predict on test data
predictions_14DOX_cleaned <- predict(step_model_14DOX_cleaned, newdata = test_Data)

Model Evaluation

# Evaluate the model
X14DOX_results_cleaned <- data.frame(WellName = test_Data$SiteName, Actual = test_Data$X14DOX, Predicted = predictions_14DOX_cleaned)

#print(X14DOX_results_cleaned)
print(cor(X14DOX_results_cleaned$Actual, X14DOX_results_cleaned$Predicted))
[1] 0.628662

A correlation of 0.63 suggests a strong positive linear relationship between the actual and predicted values. This means that as the actual values increase, the predicted values tend to also increase, and vice versa. While the correlation provides a measure of the strength of the linear relationship, it does not directly indicate how well the model fits the data in terms of variance explained. For this, metrics such as R-squared, RMSE (Root Mean Square Error), and MAE (Mean Absolute Error) are also important.

Visual Inspection of Actual Vs Predicted Values

## Visual Inspection ##
plot(X14DOX_results_cleaned$Actual, X14DOX_results_cleaned$Predicted, main = "Actual vs Predicted Values", xlab = "Actual", ylab = "Predicted")
abline(lm(X14DOX_results_cleaned$Predicted ~ X14DOX_results_cleaned$Actual), col = "blue")

Residual Analysis

Analyze the residuals (actual - predicted) to check for patterns that might indicate model deficiencies. The residuals plot suggests that while the model captures some patterns in the data, there are potential issues with heteroscedasticity and clustering of residuals. Addressing these issues can involve further model diagnostics, transformations, or considering alternative modeling approaches to improve the fit and reliability of the predictions.

### Residual Analysis###
residuals <- X14DOX_results_cleaned$Actual - X14DOX_results_cleaned$Predicted
plot(residuals, main = "Residuals", ylab = "Residuals")

Model Evaluation Metrics

### Model Evaluation Metrics  ##
rmse <- sqrt(mean((X14DOX_results_cleaned$Actual - X14DOX_results_cleaned$Predicted)^2))
print("Root Mean Square Error:")
[1] "Root Mean Square Error:"
print(rmse)
[1] 0.001520784
mae <- mean(abs(X14DOX_results_cleaned$Actual - X14DOX_results_cleaned$Predicted))
print("Mean Absolute Error:")
[1] "Mean Absolute Error:"
print(mae)
[1] 0.0008952809
r_squared <- summary(lm(X14DOX_results_cleaned$Predicted ~ X14DOX_results_cleaned$Actual))$r.squared
print("Root Square Error:")
[1] "Root Square Error:"
print(r_squared)
[1] 0.3952159

Summary of Model Performan0787

  • RMSE (0.0015): Indicates the average error magnitude in predicting the dependent variable. A very low value suggests that the model’s predictions are close to the actual value564

  • MAE (0.000895): Shows the average absolute error in predictions. The low value reinforces the indication from RMSE that the model performs w11ll.

  • R^2 (0.40): Indica1es that about 40% of the variance in the dependent variable is explained by the model. While this shows a decent fit, there’s still room for improvement to capture more variance.

Time series forecasting for 1,4-DiOxane

Aggregating data by month for a better time series analysis

For the Time Series Forecasting,

# Time series forecasting for X14DOX
# Aggregating data by month for a better time series analysis
data_ts_14DOX <- data %>%
  group_by(Date, SiteName) %>%
  summarize(X14DOX = mean(X14DOX), .groups = "drop")

Convert to time series object

# Convert to time series object
ts_14DOX <- ts(data_ts_14DOX$X14DOX, start = c(year(min(data_ts_14DOX$Date)), month(min(data_ts_14DOX$Date))), frequency = 12)
#Decompose time series
decomp_14DOX <- stl(ts_14DOX, s.window="periodic")
# Plot decompositions
plot(decomp_14DOX)

Fit ARIMA model for 14DOX

# Fit ARIMA model
fit_14DOX <- auto.arima(ts_14DOX)
summary(fit_14DOX)
Series: ts_14DOX 
ARIMA(1,1,2) with drift 

Coefficients:
         ar1      ma1     ma2  drift
      0.6656  -1.5043  0.5135      0
s.e.  0.0981   0.1095  0.1066      0

sigma^2 = 2.741e-06:  log likelihood = 4975.39
AIC=-9940.79   AICc=-9940.73   BIC=-9916.26

Training set error measures:
                        ME        RMSE          MAE  MPE MAPE      MASE
Training set -3.531053e-06 0.001651407 0.0009300744 -Inf  Inf 0.8089326
                   ACF1
Training set 0.00661027

Forecast for 14DOX in the next 12 months

# Forecast for the next 12 months
forecast_14DOX <- forecast(fit_14DOX, h = 12)
plot(forecast_14DOX)

# Combine forecasts into a data frame
future_forecasts_14DOX <- data.frame(
  Date = seq.Date(from = max(data$Date) + 1, by = "month", length.out = 48),
  X14DOX_Forecast = as.numeric(forecast_14DOX$mean)
)
# Print future forecasts
print(future_forecasts_14DOX)
         Date X14DOX_Forecast
1  2024-01-31     0.002528237
2  2024-03-02     0.002519341
3  2024-03-31     0.002514303
4  2024-05-01     0.002511834
5  2024-05-31     0.002511073
6  2024-07-01     0.002511451
7  2024-07-31     0.002512586
8  2024-08-31     0.002514225
9  2024-10-01     0.002516199
10 2024-10-31     0.002518397
11 2024-12-01     0.002520743
12 2024-12-31     0.002523188
13 2025-01-31     0.002528237
14 2025-03-03     0.002519341
15 2025-03-31     0.002514303
16 2025-05-01     0.002511834
17 2025-05-31     0.002511073
18 2025-07-01     0.002511451
19 2025-07-31     0.002512586
20 2025-08-31     0.002514225
21 2025-10-01     0.002516199
22 2025-10-31     0.002518397
23 2025-12-01     0.002520743
24 2025-12-31     0.002523188
25 2026-01-31     0.002528237
26 2026-03-03     0.002519341
27 2026-03-31     0.002514303
28 2026-05-01     0.002511834
29 2026-05-31     0.002511073
30 2026-07-01     0.002511451
31 2026-07-31     0.002512586
32 2026-08-31     0.002514225
33 2026-10-01     0.002516199
34 2026-10-31     0.002518397
35 2026-12-01     0.002520743
36 2026-12-31     0.002523188
37 2027-01-31     0.002528237
38 2027-03-03     0.002519341
39 2027-03-31     0.002514303
40 2027-05-01     0.002511834
41 2027-05-31     0.002511073
42 2027-07-01     0.002511451
43 2027-07-31     0.002512586
44 2027-08-31     0.002514225
45 2027-10-01     0.002516199
46 2027-10-31     0.002518397
47 2027-12-01     0.002520743
48 2027-12-31     0.002523188

Method2: Time Series Forecast for each Site w.r.t 14DOX

# Time series forecasting for X14DOX considering SiteName
# Aggregating data by month for a better time series analysis
data_ts <- data %>%
  group_by(Date, SiteName) %>%
  summarize(X14DOX = mean(X14DOX), .groups = "drop")

print(head(data_ts))
# A tibble: 6 × 3
  Date       SiteName X14DOX
  <date>     <fct>     <dbl>
1 1994-09-01 MW02          0
2 1994-09-01 MW03          0
3 1994-09-01 MW04          0
4 1994-09-01 MW05          0
5 1994-09-01 MW06          0
6 1994-09-01 MW08          0
# Create an empty list to store forecasts for each site
forecasts_list <- list()
# Iterate over each site
for(site in unique(data_ts$SiteName)) {site_data <- data_ts %>% filter(SiteName == site)

# Check if there is enough data for decomposition
if(nrow(site_data) < 24) {  # Ensure at least two full years of data for monthly series
  warning(paste("Not enough data for site", site, ". Skipping decomposition and ARIMA modeling."))
  next
}

# Convert to time series object
ts_X14DOX <- ts(site_data$X14DOX, start = c(year(min(site_data$Date)), month(min(site_data$Date))), frequency = 12)

# Check if time series has sufficient length for decomposition
if (length(ts_X14DOX) < 24) {  # Ensure at least two full periods of data for decomposition
  warning(paste("Series is too short for STL decomposition for site", site, ". Skipping decomposition and ARIMA modeling."))
  next
}

# Decompose time series
decomp_14DOX <- tryCatch({
  stl(ts_X14DOX, s.window = "periodic")
}, error = function(e) {
  warning(paste("Error in STL decomposition for site", site, ":", e$message))
  NULL
})

if (!is.null(decomp_14DOX)) {
  # Plot decompositions
  plot(decomp_14DOX, main = paste("Decomposition for Site", site))
}

# Fit ARIMA model
fit_X14DOX <- auto.arima(ts_X14DOX)
summary(fit_X14DOX)

# Forecast for the next 12 months
forecast_X14DOX <- forecast(fit_X14DOX, h = 12)
plot(forecast_X14DOX, main = paste("Forecast for Site", site))

# Store forecast in the list
forecasts_list[[site]] <- data.frame(
  Date = seq.Date(from = max(site_data$Date) + 1, by = "month", length.out = 12),
  SiteName = site,
  X14DOX_Forecast = as.numeric(forecast_X14DOX$mean)
)
}

Warning: Not enough data for site MW05 . Skipping decomposition and ARIMA
modeling.

Warning: Not enough data for site MW01 . Skipping decomposition and ARIMA
modeling.

Warning: Not enough data for site MW15 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW16 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW17 . Skipping decomposition and ARIMA
modeling.
Warning in value[[3L]](cond): Error in STL decomposition for site MW18 : series
is not periodic or has less than two periods

Warning: Not enough data for site MW19 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW20 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW21 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site TC01 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site TC10 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW22 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW23 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW24 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW25 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW26 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW27 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW28 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site SMW30 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW32 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW29 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW30 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW31 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW33 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW39 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW35 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW36 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW37 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW38 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW34 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW40 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW41 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW42 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW44 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site DMW43 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site SMW43 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW45 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW47 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW48 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW49 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW46 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW50 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW51 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW54 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW15R . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW52 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW53 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW55 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW58 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW59 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW60 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW61 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW62 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW63 . Skipping decomposition and ARIMA
modeling.
Warning: Not enough data for site MW57 . Skipping decomposition and ARIMA
modeling.

# Combine all forecasts into a single data frame
all_forecasts <- do.call(rbind, forecasts_list)

# Print future forecasts
print(all_forecasts)
               Date SiteName X14DOX_Forecast
MW02.1   2012-06-15     MW02    0.0000000000
MW02.2   2012-07-15     MW02    0.0000000000
MW02.3   2012-08-15     MW02    0.0000000000
MW02.4   2012-09-15     MW02    0.0000000000
MW02.5   2012-10-15     MW02    0.0000000000
MW02.6   2012-11-15     MW02    0.0000000000
MW02.7   2012-12-15     MW02    0.0000000000
MW02.8   2013-01-15     MW02    0.0000000000
MW02.9   2013-02-15     MW02    0.0000000000
MW02.10  2013-03-15     MW02    0.0000000000
MW02.11  2013-04-15     MW02    0.0000000000
MW02.12  2013-05-15     MW02    0.0000000000
MW03.1   2024-01-24     MW03    0.0005968219
MW03.2   2024-02-24     MW03    0.0005968219
MW03.3   2024-03-24     MW03    0.0005968219
MW03.4   2024-04-24     MW03    0.0005968219
MW03.5   2024-05-24     MW03    0.0005968219
MW03.6   2024-06-24     MW03    0.0005968219
MW03.7   2024-07-24     MW03    0.0005968219
MW03.8   2024-08-24     MW03    0.0005968219
MW03.9   2024-09-24     MW03    0.0005968219
MW03.10  2024-10-24     MW03    0.0005968219
MW03.11  2024-11-24     MW03    0.0005968219
MW03.12  2024-12-24     MW03    0.0005968219
MW04.1   2023-01-20     MW04    0.0065882497
MW04.2   2023-02-20     MW04    0.0065882497
MW04.3   2023-03-20     MW04    0.0065882497
MW04.4   2023-04-20     MW04    0.0065882497
MW04.5   2023-05-20     MW04    0.0065882497
MW04.6   2023-06-20     MW04    0.0065882497
MW04.7   2023-07-20     MW04    0.0065882497
MW04.8   2023-08-20     MW04    0.0065882497
MW04.9   2023-09-20     MW04    0.0065882497
MW04.10  2023-10-20     MW04    0.0065882497
MW04.11  2023-11-20     MW04    0.0065882497
MW04.12  2023-12-20     MW04    0.0065882497
MW06.1   2012-06-14     MW06    0.0000000000
MW06.2   2012-07-14     MW06    0.0000000000
MW06.3   2012-08-14     MW06    0.0000000000
MW06.4   2012-09-14     MW06    0.0000000000
MW06.5   2012-10-14     MW06    0.0000000000
MW06.6   2012-11-14     MW06    0.0000000000
MW06.7   2012-12-14     MW06    0.0000000000
MW06.8   2013-01-14     MW06    0.0000000000
MW06.9   2013-02-14     MW06    0.0000000000
MW06.10  2013-03-14     MW06    0.0000000000
MW06.11  2013-04-14     MW06    0.0000000000
MW06.12  2013-05-14     MW06    0.0000000000
MW08.1   2019-01-18     MW08    0.0001934801
MW08.2   2019-02-18     MW08    0.0001963875
MW08.3   2019-03-18     MW08    0.0002014019
MW08.4   2019-04-18     MW08    0.0002080622
MW08.5   2019-05-18     MW08    0.0002127030
MW08.6   2019-06-18     MW08    0.0002182355
MW08.7   2019-07-18     MW08    0.0002238589
MW08.8   2019-08-18     MW08    0.0002290801
MW08.9   2019-09-18     MW08    0.0002345677
MW08.10  2019-10-18     MW08    0.0002400071
MW08.11  2019-11-18     MW08    0.0002453826
MW08.12  2019-12-18     MW08    0.0002508239
MW09.1   2024-01-31     MW09    0.0027161093
MW09.2   2024-03-02     MW09    0.0021543140
MW09.3   2024-03-31     MW09    0.0021543140
MW09.4   2024-05-01     MW09    0.0021543140
MW09.5   2024-05-31     MW09    0.0021543140
MW09.6   2024-07-01     MW09    0.0021543140
MW09.7   2024-07-31     MW09    0.0021543140
MW09.8   2024-08-31     MW09    0.0021543140
MW09.9   2024-10-01     MW09    0.0021543140
MW09.10  2024-10-31     MW09    0.0021543140
MW09.11  2024-12-01     MW09    0.0021543140
MW09.12  2024-12-31     MW09    0.0021543140
MW10.1   2012-06-13     MW10    0.0000000000
MW10.2   2012-07-13     MW10    0.0000000000
MW10.3   2012-08-13     MW10    0.0000000000
MW10.4   2012-09-13     MW10    0.0000000000
MW10.5   2012-10-13     MW10    0.0000000000
MW10.6   2012-11-13     MW10    0.0000000000
MW10.7   2012-12-13     MW10    0.0000000000
MW10.8   2013-01-13     MW10    0.0000000000
MW10.9   2013-02-13     MW10    0.0000000000
MW10.10  2013-03-13     MW10    0.0000000000
MW10.11  2013-04-13     MW10    0.0000000000
MW10.12  2013-05-13     MW10    0.0000000000
MW11.1   2021-02-09     MW11    0.0000000000
MW11.2   2021-03-09     MW11    0.0000000000
MW11.3   2021-04-09     MW11    0.0000000000
MW11.4   2021-05-09     MW11    0.0000000000
MW11.5   2021-06-09     MW11    0.0000000000
MW11.6   2021-07-09     MW11    0.0000000000
MW11.7   2021-08-09     MW11    0.0000000000
MW11.8   2021-09-09     MW11    0.0000000000
MW11.9   2021-10-09     MW11    0.0000000000
MW11.10  2021-11-09     MW11    0.0000000000
MW11.11  2021-12-09     MW11    0.0000000000
MW11.12  2022-01-09     MW11    0.0000000000
MW12.1   2008-06-19     MW12    0.0000000000
MW12.2   2008-07-19     MW12    0.0000000000
MW12.3   2008-08-19     MW12    0.0000000000
MW12.4   2008-09-19     MW12    0.0000000000
MW12.5   2008-10-19     MW12    0.0000000000
MW12.6   2008-11-19     MW12    0.0000000000
MW12.7   2008-12-19     MW12    0.0000000000
MW12.8   2009-01-19     MW12    0.0000000000
MW12.9   2009-02-19     MW12    0.0000000000
MW12.10  2009-03-19     MW12    0.0000000000
MW12.11  2009-04-19     MW12    0.0000000000
MW12.12  2009-05-19     MW12    0.0000000000
MW07.1   2012-06-13     MW07    0.0000000000
MW07.2   2012-07-13     MW07    0.0000000000
MW07.3   2012-08-13     MW07    0.0000000000
MW07.4   2012-09-13     MW07    0.0000000000
MW07.5   2012-10-13     MW07    0.0000000000
MW07.6   2012-11-13     MW07    0.0000000000
MW07.7   2012-12-13     MW07    0.0000000000
MW07.8   2013-01-13     MW07    0.0000000000
MW07.9   2013-02-13     MW07    0.0000000000
MW07.10  2013-03-13     MW07    0.0000000000
MW07.11  2013-04-13     MW07    0.0000000000
MW07.12  2013-05-13     MW07    0.0000000000
DMW13.1  2024-01-26    DMW13    0.0004736931
DMW13.2  2024-02-26    DMW13    0.0004736931
DMW13.3  2024-03-26    DMW13    0.0004736931
DMW13.4  2024-04-26    DMW13    0.0004736931
DMW13.5  2024-05-26    DMW13    0.0004736931
DMW13.6  2024-06-26    DMW13    0.0004736931
DMW13.7  2024-07-26    DMW13    0.0004736931
DMW13.8  2024-08-26    DMW13    0.0004736931
DMW13.9  2024-09-26    DMW13    0.0004736931
DMW13.10 2024-10-26    DMW13    0.0004736931
DMW13.11 2024-11-26    DMW13    0.0004736931
DMW13.12 2024-12-26    DMW13    0.0004736931
MW18.1   2024-01-19     MW18    0.0005968219
MW18.2   2024-02-19     MW18    0.0005968219
MW18.3   2024-03-19     MW18    0.0005968219
MW18.4   2024-04-19     MW18    0.0005968219
MW18.5   2024-05-19     MW18    0.0005968219
MW18.6   2024-06-19     MW18    0.0005968219
MW18.7   2024-07-19     MW18    0.0005968219
MW18.8   2024-08-19     MW18    0.0005968219
MW18.9   2024-09-19     MW18    0.0005968219
MW18.10  2024-10-19     MW18    0.0005968219
MW18.11  2024-11-19     MW18    0.0005968219
MW18.12  2024-12-19     MW18    0.0005968219