title: “Project 2”

author: “Deepa Sharma/William Aiken”

date: “2024-11-13”

output: html_document


Data Acquisition

Here we can preview the data structure:

student_train = read.csv('https://raw.githubusercontent.com/deepasharma06/Data-624/refs/heads/main/StudentData_training.csv')

student_eval = read.csv('https://raw.githubusercontent.com/deepasharma06/Data-624/refs/heads/main/StudentEvaluation_test.csv')

head(student_train) %>% kable()
Brand.Code Carb.Volume Fill.Ounces PC.Volume Carb.Pressure Carb.Temp PSC PSC.Fill PSC.CO2 Mnf.Flow Carb.Pressure1 Fill.Pressure Hyd.Pressure1 Hyd.Pressure2 Hyd.Pressure3 Hyd.Pressure4 Filler.Level Filler.Speed Temperature Usage.cont Carb.Flow Density MFR Balling Pressure.Vacuum PH Oxygen.Filler Bowl.Setpoint Pressure.Setpoint Air.Pressurer Alch.Rel Carb.Rel Balling.Lvl
B 5.340000 23.96667 0.2633333 68.2 141.2 0.104 0.26 0.04 -100 118.8 46.0 0 NA NA 118 121.2 4002 66.0 16.18 2932 0.88 725.0 1.398 -4.0 8.36 0.022 120 46.4 142.6 6.58 5.32 1.48
A 5.426667 24.00667 0.2386667 68.4 139.6 0.124 0.22 0.04 -100 121.6 46.0 0 NA NA 106 118.6 3986 67.6 19.90 3144 0.92 726.8 1.498 -4.0 8.26 0.026 120 46.8 143.0 6.56 5.30 1.56
B 5.286667 24.06000 0.2633333 70.8 144.8 0.090 0.34 0.16 -100 120.2 46.0 0 NA NA 82 120.0 4020 67.0 17.76 2914 1.58 735.0 3.142 -3.8 8.94 0.024 120 46.6 142.0 7.66 5.84 3.28
A 5.440000 24.00667 0.2933333 63.0 132.6 NA 0.42 0.04 -100 115.2 46.4 0 0 0 92 117.8 4012 65.6 17.42 3062 1.54 730.6 3.042 -4.4 8.24 0.030 120 46.0 146.2 7.14 5.42 3.04
A 5.486667 24.31333 0.1113333 67.2 136.8 0.026 0.16 0.12 -100 118.4 45.8 0 0 0 92 118.6 4010 65.6 17.68 3054 1.54 722.8 3.042 -4.4 8.26 0.030 120 46.0 146.2 7.14 5.44 3.04
A 5.380000 23.92667 0.2693333 66.6 138.4 0.090 0.24 0.04 -100 119.6 45.6 0 0 0 116 120.2 4014 66.2 23.82 2948 1.52 738.8 2.992 -4.4 8.32 0.024 120 46.0 146.6 7.16 5.44 3.02

Missing values

colSums(is.na(student_train))
##        Brand.Code       Carb.Volume       Fill.Ounces         PC.Volume 
##                 0                10                38                39 
##     Carb.Pressure         Carb.Temp               PSC          PSC.Fill 
##                27                26                33                23 
##           PSC.CO2          Mnf.Flow    Carb.Pressure1     Fill.Pressure 
##                39                 2                32                22 
##     Hyd.Pressure1     Hyd.Pressure2     Hyd.Pressure3     Hyd.Pressure4 
##                11                15                15                30 
##      Filler.Level      Filler.Speed       Temperature        Usage.cont 
##                20                57                14                 5 
##         Carb.Flow           Density               MFR           Balling 
##                 2                 1               212                 1 
##   Pressure.Vacuum                PH     Oxygen.Filler     Bowl.Setpoint 
##                 0                 4                12                 2 
## Pressure.Setpoint     Air.Pressurer          Alch.Rel          Carb.Rel 
##                12                 0                 9                10 
##       Balling.Lvl 
##                 1

Correlation Plot

# Select only numeric columns

numeric_data <- student_train %>% select(where(is.numeric))

# Calculate the correlation matrix

correlation_matrix <- cor(numeric_data, use = "pairwise.complete.obs")

# Create the correlation plot

corrplot(correlation_matrix, tl.col = "black", tl.cex = 0.6, order = 'AOE')

Distribution Visualization

mlt.train <- student_train  # Use your actual dataframe name

mlt.train$ID <- rownames(mlt.train)  # Assign row names to ID

mlt.train <- melt(mlt.train, id.vars = "ID")  # Melt the data

# Convert the value column to numeric

mlt.train$value <- as.numeric(mlt.train$value)

# Create histograms of the predictors

ggplot(data = mlt.train, aes(x = value)) +

  geom_histogram(binwidth = 6, fill = "skyblue", color = "black", alpha = 0.8) +  # Adjust binwidth as needed

  facet_wrap(~ variable, scales = "free") +

  labs(title = "Distributions of Predictors", x = "Predictors", y = "Frequency") +

  theme_minimal(base_size = 9) +  # Use a minimal theme for better clarity

  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())  # Clean up grid lines

Build initial linear regression model and access the VIF
library(car)
library(caret)
# Setting up the model

model <- lm(PH ~ ., data = student_train)
summary(model)
## 
## Call:
## lm(formula = PH ~ ., data = student_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54899 -0.07379  0.00727  0.08502  0.44635 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.190e+01  1.275e+00   9.328  < 2e-16 ***
## Brand.CodeA       -7.697e-02  3.371e-02  -2.284 0.022496 *  
## Brand.CodeB        5.798e-02  1.549e-02   3.744 0.000186 ***
## Brand.CodeC       -9.798e-02  1.672e-02  -5.862 5.31e-09 ***
## Brand.CodeD       -2.011e-02  3.827e-02  -0.525 0.599363    
## Carb.Volume       -2.249e-01  1.138e-01  -1.976 0.048259 *  
## Fill.Ounces       -1.101e-01  3.551e-02  -3.100 0.001959 ** 
## PC.Volume          1.751e-02  6.417e-02   0.273 0.784990    
## Carb.Pressure      7.195e-03  5.337e-03   1.348 0.177725    
## Carb.Temp         -5.106e-03  4.180e-03  -1.221 0.222073    
## PSC               -3.704e-02  6.208e-02  -0.597 0.550794    
## PSC.Fill          -1.856e-02  2.476e-02  -0.749 0.453717    
## PSC.CO2           -1.158e-01  6.923e-02  -1.672 0.094683 .  
## Mnf.Flow          -6.998e-04  5.075e-05 -13.789  < 2e-16 ***
## Carb.Pressure1     6.039e-03  7.571e-04   7.976 2.46e-15 ***
## Fill.Pressure     -3.697e-03  1.893e-03  -1.953 0.050927 .  
## Hyd.Pressure1      1.355e-04  3.922e-04   0.345 0.729793    
## Hyd.Pressure2     -1.720e-03  5.634e-04  -3.053 0.002298 ** 
## Hyd.Pressure3      3.569e-03  6.409e-04   5.569 2.89e-08 ***
## Hyd.Pressure4      1.962e-04  4.214e-04   0.466 0.641613    
## Filler.Level      -8.615e-04  8.986e-04  -0.959 0.337821    
## Filler.Speed       1.672e-05  2.706e-05   0.618 0.536787    
## Temperature       -1.680e-02  3.191e-03  -5.264 1.55e-07 ***
## Usage.cont        -7.156e-03  1.271e-03  -5.630 2.04e-08 ***
## Carb.Flow          1.166e-05  4.638e-06   2.514 0.011999 *  
## Density           -1.252e-01  3.010e-02  -4.161 3.30e-05 ***
## MFR               -9.439e-05  1.460e-04  -0.647 0.518019    
## Balling           -1.944e-01  3.909e-02  -4.973 7.11e-07 ***
## Pressure.Vacuum   -5.030e-02  1.004e-02  -5.010 5.91e-07 ***
## Oxygen.Filler     -4.289e-01  8.473e-02  -5.062 4.52e-07 ***
## Bowl.Setpoint      2.779e-03  9.281e-04   2.994 0.002788 ** 
## Pressure.Setpoint -2.866e-03  2.536e-03  -1.130 0.258485    
## Air.Pressurer     -1.245e-03  2.528e-03  -0.492 0.622521    
## Alch.Rel           6.719e-02  3.211e-02   2.092 0.036534 *  
## Carb.Rel           1.492e-01  6.102e-02   2.444 0.014589 *  
## Balling.Lvl        2.585e-01  4.525e-02   5.712 1.27e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1282 on 2093 degrees of freedom
##   (442 observations deleted due to missingness)
## Multiple R-squared:  0.4488, Adjusted R-squared:  0.4396 
## F-statistic:  48.7 on 35 and 2093 DF,  p-value: < 2.2e-16
We see that ‘Brand.Code’ is highly collinear
# Calculating VIF
vif_values <- vif(model)
vif_values
##                         GVIF Df GVIF^(1/(2*Df))
## Brand.Code        161.462006  4        1.888030
## Carb.Volume        17.895899  1        4.230354
## Fill.Ounces         1.181213  1        1.086836
## PC.Volume           1.712473  1        1.308615
## Carb.Pressure      43.629727  1        6.605280
## Carb.Temp          35.744235  1        5.978648
## PSC                 1.163257  1        1.078544
## PSC.Fill            1.112048  1        1.054537
## PSC.CO2             1.067962  1        1.033423
## Mnf.Flow            4.979607  1        2.231503
## Carb.Pressure1      1.467316  1        1.211328
## Fill.Pressure       3.741073  1        1.934185
## Hyd.Pressure1       3.000519  1        1.732201
## Hyd.Pressure2      10.635457  1        3.261205
## Hyd.Pressure3      12.964503  1        3.600625
## Hyd.Pressure4       2.686600  1        1.639085
## Filler.Level       25.385854  1        5.038438
## Filler.Speed       11.224377  1        3.350280
## Temperature         1.390864  1        1.179349
## Usage.cont          1.773788  1        1.331836
## Carb.Flow           2.256335  1        1.502110
## Density            16.438808  1        4.054480
## MFR                11.435430  1        3.381631
## Balling           168.721067  1       12.989267
## Pressure.Vacuum     4.289511  1        2.071113
## Oxygen.Filler       1.579977  1        1.256971
## Bowl.Setpoint      26.051364  1        5.104054
## Pressure.Setpoint   3.486682  1        1.867266
## Air.Pressurer       1.234829  1        1.111229
## Alch.Rel           33.466584  1        5.785031
## Carb.Rel            7.483881  1        2.735668
## Balling.Lvl       196.699873  1       14.024973
We see that Brand B appears to have a different relationship than the other brands, Brand B has a R-squared of ~0.7 while all the other brands have a R-squared of ~0.3. We can replace all the brands with a binary variable of whether it is Brand B or not.
I’m also replacing all the missing values with zeros
student_train_0 <- student_train |> mutate(BCB = as.numeric(Brand.Code =='B')) |> select(-Brand.Code)
student_eval_0 <- student_eval |> mutate(BCB = as.numeric(Brand.Code =='B')) |> select(-Brand.Code)

student_train_0[is.na(student_train_0)] <- 0
student_eval_0[is.na(student_eval_0)] <- 0
Next we check if any of the variables have a near zero variance and surprisingly only one variable is identified by default. I changed the parameters ‘freqCut’ and ‘uniqueCut’ but it required large changes to pick up other variables.
nearZeroVar(student_train_0, freqCut = 60/40, uniqueCut = 40, names = TRUE)
## [1] "PC.Volume"     "Fill.Pressure" "Hyd.Pressure1" "Hyd.Pressure2"
## [5] "Hyd.Pressure3" "Carb.Flow"     "MFR"           "Bowl.Setpoint"
Just by removing the ‘Brand.Code’ variable we reduce a lot of the multicollinearity. Here I create a new linear model without ‘Brand.Code’ and check the VIF again.
model <- lm(PH ~ ., data = student_train_0)
summary(model)
## 
## Call:
## lm(formula = PH ~ ., data = student_train_0)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.2484 -0.1000  0.0063  0.1082  4.3610 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        5.533e+00  8.238e-01   6.717 2.28e-11 ***
## Carb.Volume       -1.142e-02  1.900e-02  -0.601 0.547971    
## Fill.Ounces       -4.544e-04  2.133e-03  -0.213 0.831266    
## PC.Volume          2.736e-01  9.988e-02   2.740 0.006193 ** 
## Carb.Pressure     -3.057e-04  8.905e-04  -0.343 0.731420    
## Carb.Temp         -2.178e-04  4.450e-04  -0.489 0.624539    
## PSC               -1.324e-01  1.270e-01  -1.042 0.297427    
## PSC.Fill          -1.249e-01  5.286e-02  -2.363 0.018202 *  
## PSC.CO2           -7.328e-02  1.433e-01  -0.511 0.609141    
## Mnf.Flow          -8.741e-04  1.061e-04  -8.239 2.75e-16 ***
## Carb.Pressure1     2.181e-03  4.597e-04   4.744 2.21e-06 ***
## Fill.Pressure      1.035e-02  1.422e-03   7.283 4.35e-13 ***
## Hyd.Pressure1     -1.115e-03  8.292e-04  -1.345 0.178899    
## Hyd.Pressure2      3.253e-03  1.220e-03   2.666 0.007717 ** 
## Hyd.Pressure3     -1.113e-03  1.330e-03  -0.837 0.402719    
## Hyd.Pressure4      2.497e-03  4.144e-04   6.025 1.94e-09 ***
## Filler.Level       5.257e-03  6.719e-04   7.825 7.39e-15 ***
## Filler.Speed      -8.270e-06  2.691e-05  -0.307 0.758608    
## Temperature        8.146e-03  1.282e-03   6.354 2.48e-10 ***
## Usage.cont        -4.710e-03  2.412e-03  -1.953 0.050988 .  
## Carb.Flow          3.148e-05  8.443e-06   3.729 0.000197 ***
## Density           -9.107e-02  6.321e-02  -1.441 0.149777    
## MFR               -1.414e-04  1.217e-04  -1.161 0.245598    
## Balling            5.935e-01  4.942e-02  12.010  < 2e-16 ***
## Pressure.Vacuum    1.073e-01  1.706e-02   6.288 3.76e-10 ***
## Oxygen.Filler     -1.589e+00  1.576e-01 -10.085  < 2e-16 ***
## Bowl.Setpoint     -2.398e-03  8.268e-04  -2.901 0.003753 ** 
## Pressure.Setpoint -1.634e-03  1.752e-03  -0.933 0.351111    
## Air.Pressurer      2.809e-03  5.417e-03   0.519 0.604140    
## Alch.Rel           1.694e-01  1.811e-02   9.353  < 2e-16 ***
## Carb.Rel           9.053e-02  2.165e-02   4.181 3.01e-05 ***
## Balling.Lvl       -5.907e-01  4.896e-02 -12.065  < 2e-16 ***
## BCB                1.079e-01  1.798e-02   5.999 2.27e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.307 on 2538 degrees of freedom
## Multiple R-squared:  0.3502, Adjusted R-squared:  0.342 
## F-statistic: 42.75 on 32 and 2538 DF,  p-value: < 2.2e-16
# Calculating VIF
vif_values <- vif(model)
vif_values
##       Carb.Volume       Fill.Ounces         PC.Volume     Carb.Pressure 
##          1.212068          1.039624          1.299425          1.313630 
##         Carb.Temp               PSC          PSC.Fill           PSC.CO2 
##          1.163947          1.094245          1.073751          1.048554 
##          Mnf.Flow    Carb.Pressure1     Fill.Pressure     Hyd.Pressure1 
##          4.379200          1.193121          1.626473          2.899194 
##     Hyd.Pressure2     Hyd.Pressure3     Hyd.Pressure4      Filler.Level 
##         10.943147         12.358010          1.298333          4.145435 
##      Filler.Speed       Temperature        Usage.cont         Carb.Flow 
##         17.298895          1.142105          1.540381          2.249256 
##           Density               MFR           Balling   Pressure.Vacuum 
##         15.585262         17.190024         57.847514          2.578797 
##     Oxygen.Filler     Bowl.Setpoint Pressure.Setpoint     Air.Pressurer 
##          1.473160          4.536445          1.228467          1.175515 
##          Alch.Rel          Carb.Rel       Balling.Lvl               BCB 
##          3.761892          1.676540         49.610553          2.203483
There are still a couple variables to be addressed but first we are going to a step-wise feature reduction.
stats::step(model)
## Start:  AIC=-6040.25
## PH ~ Carb.Volume + Fill.Ounces + PC.Volume + Carb.Pressure + 
##     Carb.Temp + PSC + PSC.Fill + PSC.CO2 + Mnf.Flow + Carb.Pressure1 + 
##     Fill.Pressure + Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure3 + 
##     Hyd.Pressure4 + Filler.Level + Filler.Speed + Temperature + 
##     Usage.cont + Carb.Flow + Density + MFR + Balling + Pressure.Vacuum + 
##     Oxygen.Filler + Bowl.Setpoint + Pressure.Setpoint + Air.Pressurer + 
##     Alch.Rel + Carb.Rel + Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - Fill.Ounces        1    0.0043 239.13 -6042.2
## - Filler.Speed       1    0.0089 239.14 -6042.2
## - Carb.Pressure      1    0.0111 239.14 -6042.1
## - Carb.Temp          1    0.0226 239.15 -6042.0
## - PSC.CO2            1    0.0246 239.15 -6042.0
## - Air.Pressurer      1    0.0253 239.15 -6042.0
## - Carb.Volume        1    0.0340 239.16 -6041.9
## - Hyd.Pressure3      1    0.0660 239.19 -6041.5
## - Pressure.Setpoint  1    0.0819 239.21 -6041.4
## - PSC                1    0.1023 239.23 -6041.1
## - MFR                1    0.1271 239.26 -6040.9
## - Hyd.Pressure1      1    0.1703 239.30 -6040.4
## <none>                           239.13 -6040.2
## - Density            1    0.1956 239.32 -6040.1
## - Usage.cont         1    0.3592 239.49 -6038.4
## - PSC.Fill           1    0.5261 239.66 -6036.6
## - Hyd.Pressure2      1    0.6698 239.80 -6035.1
## - PC.Volume          1    0.7072 239.84 -6034.7
## - Bowl.Setpoint      1    0.7929 239.92 -6033.7
## - Carb.Flow          1    1.3100 240.44 -6028.2
## - Carb.Rel           1    1.6466 240.78 -6024.6
## - Carb.Pressure1     1    2.1207 241.25 -6019.5
## - BCB                1    3.3904 242.52 -6006.1
## - Hyd.Pressure4      1    3.4202 242.55 -6005.7
## - Pressure.Vacuum    1    3.7259 242.85 -6002.5
## - Temperature        1    3.8040 242.93 -6001.7
## - Fill.Pressure      1    4.9969 244.13 -5989.1
## - Filler.Level       1    5.7690 244.90 -5981.0
## - Mnf.Flow           1    6.3959 245.53 -5974.4
## - Alch.Rel           1    8.2425 247.37 -5955.1
## - Oxygen.Filler      1    9.5821 248.71 -5941.2
## - Balling            1   13.5903 252.72 -5900.1
## - Balling.Lvl        1   13.7153 252.84 -5898.9
## 
## Step:  AIC=-6042.2
## PH ~ Carb.Volume + PC.Volume + Carb.Pressure + Carb.Temp + PSC + 
##     PSC.Fill + PSC.CO2 + Mnf.Flow + Carb.Pressure1 + Fill.Pressure + 
##     Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure3 + Hyd.Pressure4 + 
##     Filler.Level + Filler.Speed + Temperature + Usage.cont + 
##     Carb.Flow + Density + MFR + Balling + Pressure.Vacuum + Oxygen.Filler + 
##     Bowl.Setpoint + Pressure.Setpoint + Air.Pressurer + Alch.Rel + 
##     Carb.Rel + Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - Filler.Speed       1    0.0090 239.14 -6044.1
## - Carb.Pressure      1    0.0112 239.14 -6044.1
## - Carb.Temp          1    0.0228 239.16 -6044.0
## - PSC.CO2            1    0.0244 239.16 -6043.9
## - Air.Pressurer      1    0.0251 239.16 -6043.9
## - Carb.Volume        1    0.0365 239.17 -6043.8
## - Hyd.Pressure3      1    0.0657 239.20 -6043.5
## - Pressure.Setpoint  1    0.0817 239.22 -6043.3
## - PSC                1    0.1024 239.24 -6043.1
## - MFR                1    0.1273 239.26 -6042.8
## - Hyd.Pressure1      1    0.1692 239.30 -6042.4
## <none>                           239.13 -6042.2
## - Density            1    0.1945 239.33 -6042.1
## - Usage.cont         1    0.3589 239.49 -6040.3
## - PSC.Fill           1    0.5321 239.66 -6038.5
## - Hyd.Pressure2      1    0.6679 239.80 -6037.0
## - PC.Volume          1    0.7029 239.84 -6036.7
## - Bowl.Setpoint      1    0.7892 239.92 -6035.7
## - Carb.Flow          1    1.3194 240.45 -6030.1
## - Carb.Rel           1    1.6486 240.78 -6026.5
## - Carb.Pressure1     1    2.1235 241.26 -6021.5
## - BCB                1    3.3884 242.52 -6008.0
## - Hyd.Pressure4      1    3.4576 242.59 -6007.3
## - Pressure.Vacuum    1    3.7305 242.86 -6004.4
## - Temperature        1    3.8034 242.94 -6003.6
## - Fill.Pressure      1    4.9942 244.13 -5991.1
## - Filler.Level       1    5.7700 244.90 -5982.9
## - Mnf.Flow           1    6.4031 245.54 -5976.3
## - Alch.Rel           1    8.2531 247.39 -5957.0
## - Oxygen.Filler      1    9.5835 248.72 -5943.2
## - Balling            1   13.6369 252.77 -5901.6
## - Balling.Lvl        1   13.7832 252.92 -5900.1
## 
## Step:  AIC=-6044.11
## PH ~ Carb.Volume + PC.Volume + Carb.Pressure + Carb.Temp + PSC + 
##     PSC.Fill + PSC.CO2 + Mnf.Flow + Carb.Pressure1 + Fill.Pressure + 
##     Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure3 + Hyd.Pressure4 + 
##     Filler.Level + Temperature + Usage.cont + Carb.Flow + Density + 
##     MFR + Balling + Pressure.Vacuum + Oxygen.Filler + Bowl.Setpoint + 
##     Pressure.Setpoint + Air.Pressurer + Alch.Rel + Carb.Rel + 
##     Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - Carb.Pressure      1    0.0105 239.15 -6046.0
## - Carb.Temp          1    0.0232 239.16 -6045.9
## - PSC.CO2            1    0.0251 239.17 -6045.8
## - Air.Pressurer      1    0.0260 239.17 -6045.8
## - Carb.Volume        1    0.0386 239.18 -6045.7
## - Hyd.Pressure3      1    0.0660 239.21 -6045.4
## - Pressure.Setpoint  1    0.0828 239.22 -6045.2
## - PSC                1    0.1017 239.24 -6045.0
## - Hyd.Pressure1      1    0.1654 239.31 -6044.3
## <none>                           239.14 -6044.1
## - Density            1    0.1920 239.33 -6044.0
## - Usage.cont         1    0.3628 239.50 -6042.2
## - PSC.Fill           1    0.5304 239.67 -6040.4
## - Hyd.Pressure2      1    0.6602 239.80 -6039.0
## - PC.Volume          1    0.7104 239.85 -6038.5
## - Bowl.Setpoint      1    0.7804 239.92 -6037.7
## - Carb.Flow          1    1.3130 240.46 -6032.0
## - MFR                1    1.4836 240.63 -6030.2
## - Carb.Rel           1    1.6492 240.79 -6028.4
## - Carb.Pressure1     1    2.1176 241.26 -6023.4
## - BCB                1    3.3888 242.53 -6009.9
## - Hyd.Pressure4      1    3.5250 242.67 -6008.5
## - Pressure.Vacuum    1    3.7340 242.88 -6006.3
## - Temperature        1    3.8012 242.94 -6005.6
## - Fill.Pressure      1    4.9861 244.13 -5993.1
## - Filler.Level       1    5.8536 245.00 -5983.9
## - Mnf.Flow           1    6.4037 245.55 -5978.2
## - Alch.Rel           1    8.2444 247.39 -5959.0
## - Oxygen.Filler      1    9.6358 248.78 -5944.5
## - Balling            1   13.6513 252.79 -5903.4
## - Balling.Lvl        1   13.7995 252.94 -5901.9
## 
## Step:  AIC=-6045.99
## PH ~ Carb.Volume + PC.Volume + Carb.Temp + PSC + PSC.Fill + PSC.CO2 + 
##     Mnf.Flow + Carb.Pressure1 + Fill.Pressure + Hyd.Pressure1 + 
##     Hyd.Pressure2 + Hyd.Pressure3 + Hyd.Pressure4 + Filler.Level + 
##     Temperature + Usage.cont + Carb.Flow + Density + MFR + Balling + 
##     Pressure.Vacuum + Oxygen.Filler + Bowl.Setpoint + Pressure.Setpoint + 
##     Air.Pressurer + Alch.Rel + Carb.Rel + Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - Air.Pressurer      1    0.0258 239.18 -6047.7
## - PSC.CO2            1    0.0259 239.18 -6047.7
## - Carb.Temp          1    0.0400 239.19 -6047.6
## - Carb.Volume        1    0.0566 239.21 -6047.4
## - Hyd.Pressure3      1    0.0673 239.22 -6047.3
## - Pressure.Setpoint  1    0.0827 239.24 -6047.1
## - PSC                1    0.0987 239.25 -6046.9
## - Hyd.Pressure1      1    0.1646 239.32 -6046.2
## <none>                           239.15 -6046.0
## - Density            1    0.1906 239.34 -6045.9
## - Usage.cont         1    0.3647 239.52 -6044.1
## - PSC.Fill           1    0.5276 239.68 -6042.3
## - Hyd.Pressure2      1    0.6642 239.82 -6040.9
## - PC.Volume          1    0.7069 239.86 -6040.4
## - Bowl.Setpoint      1    0.7797 239.93 -6039.6
## - Carb.Flow          1    1.3060 240.46 -6034.0
## - MFR                1    1.4807 240.63 -6032.1
## - Carb.Rel           1    1.6508 240.80 -6030.3
## - Carb.Pressure1     1    2.1170 241.27 -6025.3
## - BCB                1    3.3825 242.53 -6011.9
## - Hyd.Pressure4      1    3.5220 242.68 -6010.4
## - Pressure.Vacuum    1    3.7394 242.89 -6008.1
## - Temperature        1    3.7970 242.95 -6007.5
## - Fill.Pressure      1    4.9947 244.15 -5994.9
## - Filler.Level       1    5.8503 245.00 -5985.9
## - Mnf.Flow           1    6.4001 245.55 -5980.1
## - Alch.Rel           1    8.2436 247.40 -5960.9
## - Oxygen.Filler      1    9.6312 248.78 -5946.5
## - Balling            1   13.6427 252.79 -5905.4
## - Balling.Lvl        1   13.8074 252.96 -5903.7
## 
## Step:  AIC=-6047.72
## PH ~ Carb.Volume + PC.Volume + Carb.Temp + PSC + PSC.Fill + PSC.CO2 + 
##     Mnf.Flow + Carb.Pressure1 + Fill.Pressure + Hyd.Pressure1 + 
##     Hyd.Pressure2 + Hyd.Pressure3 + Hyd.Pressure4 + Filler.Level + 
##     Temperature + Usage.cont + Carb.Flow + Density + MFR + Balling + 
##     Pressure.Vacuum + Oxygen.Filler + Bowl.Setpoint + Pressure.Setpoint + 
##     Alch.Rel + Carb.Rel + Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - PSC.CO2            1    0.0262 239.21 -6049.4
## - Carb.Temp          1    0.0394 239.22 -6049.3
## - Hyd.Pressure3      1    0.0544 239.23 -6049.1
## - Carb.Volume        1    0.0557 239.23 -6049.1
## - Pressure.Setpoint  1    0.0826 239.26 -6048.8
## - PSC                1    0.0963 239.28 -6048.7
## - Hyd.Pressure1      1    0.1770 239.35 -6047.8
## <none>                           239.18 -6047.7
## - Density            1    0.1877 239.37 -6047.7
## - Usage.cont         1    0.3833 239.56 -6045.6
## - PSC.Fill           1    0.5341 239.71 -6044.0
## - Hyd.Pressure2      1    0.6393 239.82 -6042.9
## - PC.Volume          1    0.7118 239.89 -6042.1
## - Bowl.Setpoint      1    0.8129 239.99 -6041.0
## - Carb.Flow          1    1.3128 240.49 -6035.6
## - MFR                1    1.4860 240.66 -6033.8
## - Carb.Rel           1    1.6417 240.82 -6032.1
## - Carb.Pressure1     1    2.1588 241.34 -6026.6
## - BCB                1    3.4128 242.59 -6013.3
## - Hyd.Pressure4      1    3.5368 242.72 -6012.0
## - Temperature        1    3.8036 242.98 -6009.2
## - Pressure.Vacuum    1    3.9212 243.10 -6007.9
## - Fill.Pressure      1    4.9848 244.16 -5996.7
## - Filler.Level       1    5.8677 245.05 -5987.4
## - Mnf.Flow           1    6.4078 245.59 -5981.7
## - Alch.Rel           1    8.2751 247.45 -5962.3
## - Oxygen.Filler      1    9.6085 248.79 -5948.5
## - Balling            1   13.6446 252.82 -5907.1
## - Balling.Lvl        1   13.8371 253.01 -5905.1
## 
## Step:  AIC=-6049.43
## PH ~ Carb.Volume + PC.Volume + Carb.Temp + PSC + PSC.Fill + Mnf.Flow + 
##     Carb.Pressure1 + Fill.Pressure + Hyd.Pressure1 + Hyd.Pressure2 + 
##     Hyd.Pressure3 + Hyd.Pressure4 + Filler.Level + Temperature + 
##     Usage.cont + Carb.Flow + Density + MFR + Balling + Pressure.Vacuum + 
##     Oxygen.Filler + Bowl.Setpoint + Pressure.Setpoint + Alch.Rel + 
##     Carb.Rel + Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - Carb.Temp          1    0.0404 239.25 -6051.0
## - Hyd.Pressure3      1    0.0574 239.26 -6050.8
## - Carb.Volume        1    0.0586 239.26 -6050.8
## - Pressure.Setpoint  1    0.0836 239.29 -6050.5
## - PSC                1    0.0977 239.30 -6050.4
## - Hyd.Pressure1      1    0.1750 239.38 -6049.6
## <none>                           239.21 -6049.4
## - Density            1    0.1895 239.39 -6049.4
## - Usage.cont         1    0.3867 239.59 -6047.3
## - PSC.Fill           1    0.5879 239.79 -6045.1
## - Hyd.Pressure2      1    0.6476 239.85 -6044.5
## - PC.Volume          1    0.7178 239.92 -6043.7
## - Bowl.Setpoint      1    0.8168 240.02 -6042.7
## - Carb.Flow          1    1.3073 240.51 -6037.4
## - MFR                1    1.4803 240.69 -6035.6
## - Carb.Rel           1    1.6575 240.86 -6033.7
## - Carb.Pressure1     1    2.1759 241.38 -6028.2
## - BCB                1    3.4276 242.63 -6014.9
## - Hyd.Pressure4      1    3.5224 242.73 -6013.9
## - Temperature        1    3.7879 242.99 -6011.0
## - Pressure.Vacuum    1    3.9129 243.12 -6009.7
## - Fill.Pressure      1    4.9653 244.17 -5998.6
## - Filler.Level       1    5.8774 245.08 -5989.0
## - Mnf.Flow           1    6.4152 245.62 -5983.4
## - Alch.Rel           1    8.2742 247.48 -5964.0
## - Oxygen.Filler      1    9.6002 248.81 -5950.3
## - Balling            1   13.6483 252.85 -5908.8
## - Balling.Lvl        1   13.8254 253.03 -5907.0
## 
## Step:  AIC=-6051
## PH ~ Carb.Volume + PC.Volume + PSC + PSC.Fill + Mnf.Flow + Carb.Pressure1 + 
##     Fill.Pressure + Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure3 + 
##     Hyd.Pressure4 + Filler.Level + Temperature + Usage.cont + 
##     Carb.Flow + Density + MFR + Balling + Pressure.Vacuum + Oxygen.Filler + 
##     Bowl.Setpoint + Pressure.Setpoint + Alch.Rel + Carb.Rel + 
##     Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - Hyd.Pressure3      1    0.0616 239.31 -6052.3
## - Carb.Volume        1    0.0709 239.32 -6052.2
## - Pressure.Setpoint  1    0.0812 239.33 -6052.1
## - PSC                1    0.0933 239.34 -6052.0
## - Hyd.Pressure1      1    0.1754 239.42 -6051.1
## <none>                           239.25 -6051.0
## - Density            1    0.1933 239.44 -6050.9
## - Usage.cont         1    0.3859 239.63 -6048.9
## - PSC.Fill           1    0.5895 239.83 -6046.7
## - Hyd.Pressure2      1    0.6615 239.91 -6045.9
## - PC.Volume          1    0.7125 239.96 -6045.4
## - Bowl.Setpoint      1    0.8196 240.06 -6044.2
## - Carb.Flow          1    1.3190 240.56 -6038.9
## - MFR                1    1.4957 240.74 -6037.0
## - Carb.Rel           1    1.6667 240.91 -6035.2
## - Carb.Pressure1     1    2.1805 241.43 -6029.7
## - BCB                1    3.4263 242.67 -6016.4
## - Hyd.Pressure4      1    3.5188 242.76 -6015.5
## - Temperature        1    3.7572 243.00 -6012.9
## - Pressure.Vacuum    1    3.9054 243.15 -6011.4
## - Fill.Pressure      1    4.9599 244.21 -6000.2
## - Filler.Level       1    5.9140 245.16 -5990.2
## - Mnf.Flow           1    6.3855 245.63 -5985.3
## - Alch.Rel           1    8.2780 247.52 -5965.5
## - Oxygen.Filler      1    9.5887 248.83 -5952.0
## - Balling            1   13.6641 252.91 -5910.2
## - Balling.Lvl        1   13.8277 253.07 -5908.5
## 
## Step:  AIC=-6052.34
## PH ~ Carb.Volume + PC.Volume + PSC + PSC.Fill + Mnf.Flow + Carb.Pressure1 + 
##     Fill.Pressure + Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure4 + 
##     Filler.Level + Temperature + Usage.cont + Carb.Flow + Density + 
##     MFR + Balling + Pressure.Vacuum + Oxygen.Filler + Bowl.Setpoint + 
##     Pressure.Setpoint + Alch.Rel + Carb.Rel + Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - Carb.Volume        1    0.0662 239.37 -6053.6
## - PSC                1    0.0914 239.40 -6053.4
## - Pressure.Setpoint  1    0.1089 239.41 -6053.2
## - Density            1    0.1518 239.46 -6052.7
## - Hyd.Pressure1      1    0.1862 239.49 -6052.3
## <none>                           239.31 -6052.3
## - Usage.cont         1    0.3630 239.67 -6050.4
## - PSC.Fill           1    0.5873 239.89 -6048.0
## - PC.Volume          1    0.7005 240.01 -6046.8
## - Bowl.Setpoint      1    0.8013 240.11 -6045.7
## - Hyd.Pressure2      1    0.8523 240.16 -6045.2
## - Carb.Flow          1    1.2797 240.59 -6040.6
## - MFR                1    1.5349 240.84 -6037.9
## - Carb.Rel           1    1.6441 240.95 -6036.7
## - Carb.Pressure1     1    2.1750 241.48 -6031.1
## - BCB                1    3.4661 242.77 -6017.4
## - Hyd.Pressure4      1    3.5049 242.81 -6017.0
## - Temperature        1    3.7488 243.06 -6014.4
## - Pressure.Vacuum    1    4.0215 243.33 -6011.5
## - Fill.Pressure      1    4.8986 244.21 -6002.2
## - Filler.Level       1    6.0363 245.34 -5990.3
## - Mnf.Flow           1    7.8551 247.16 -5971.3
## - Alch.Rel           1    8.3168 247.62 -5966.5
## - Oxygen.Filler      1    9.6667 248.97 -5952.5
## - Balling            1   13.7614 253.07 -5910.6
## - Balling.Lvl        1   13.7782 253.09 -5910.4
## 
## Step:  AIC=-6053.63
## PH ~ PC.Volume + PSC + PSC.Fill + Mnf.Flow + Carb.Pressure1 + 
##     Fill.Pressure + Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure4 + 
##     Filler.Level + Temperature + Usage.cont + Carb.Flow + Density + 
##     MFR + Balling + Pressure.Vacuum + Oxygen.Filler + Bowl.Setpoint + 
##     Pressure.Setpoint + Alch.Rel + Carb.Rel + Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - PSC                1    0.0926 239.47 -6054.6
## - Pressure.Setpoint  1    0.1051 239.48 -6054.5
## - Density            1    0.1512 239.52 -6054.0
## - Hyd.Pressure1      1    0.1804 239.55 -6053.7
## <none>                           239.37 -6053.6
## - Usage.cont         1    0.3616 239.73 -6051.7
## - PSC.Fill           1    0.5910 239.96 -6049.3
## - PC.Volume          1    0.6764 240.05 -6048.4
## - Bowl.Setpoint      1    0.8071 240.18 -6047.0
## - Hyd.Pressure2      1    0.8532 240.23 -6046.5
## - Carb.Flow          1    1.2787 240.65 -6041.9
## - MFR                1    1.5238 240.90 -6039.3
## - Carb.Rel           1    1.6575 241.03 -6037.9
## - Carb.Pressure1     1    2.1786 241.55 -6032.3
## - BCB                1    3.4490 242.82 -6018.8
## - Hyd.Pressure4      1    3.5552 242.93 -6017.7
## - Temperature        1    3.6982 243.07 -6016.2
## - Pressure.Vacuum    1    4.0239 243.40 -6012.8
## - Fill.Pressure      1    4.8527 244.22 -6004.0
## - Filler.Level       1    6.0737 245.45 -5991.2
## - Mnf.Flow           1    7.8836 247.26 -5972.3
## - Alch.Rel           1    8.3403 247.71 -5967.6
## - Oxygen.Filler      1    9.6660 249.04 -5953.8
## - Balling            1   13.7253 253.10 -5912.3
## - Balling.Lvl        1   13.8191 253.19 -5911.3
## 
## Step:  AIC=-6054.63
## PH ~ PC.Volume + PSC.Fill + Mnf.Flow + Carb.Pressure1 + Fill.Pressure + 
##     Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure4 + Filler.Level + 
##     Temperature + Usage.cont + Carb.Flow + Density + MFR + Balling + 
##     Pressure.Vacuum + Oxygen.Filler + Bowl.Setpoint + Pressure.Setpoint + 
##     Alch.Rel + Carb.Rel + Balling.Lvl + BCB
## 
##                     Df Sum of Sq    RSS     AIC
## - Pressure.Setpoint  1    0.1046 239.57 -6055.5
## - Density            1    0.1514 239.62 -6055.0
## - Hyd.Pressure1      1    0.1769 239.64 -6054.7
## <none>                           239.47 -6054.6
## - Usage.cont         1    0.3757 239.84 -6052.6
## - PC.Volume          1    0.6101 240.07 -6050.1
## - PSC.Fill           1    0.6906 240.16 -6049.2
## - Bowl.Setpoint      1    0.8034 240.27 -6048.0
## - Hyd.Pressure2      1    0.8756 240.34 -6047.2
## - Carb.Flow          1    1.2928 240.76 -6042.8
## - MFR                1    1.5506 241.02 -6040.0
## - Carb.Rel           1    1.6425 241.11 -6039.1
## - Carb.Pressure1     1    2.1935 241.66 -6033.2
## - BCB                1    3.4631 242.93 -6019.7
## - Hyd.Pressure4      1    3.5462 243.01 -6018.8
## - Temperature        1    3.6758 243.14 -6017.5
## - Pressure.Vacuum    1    3.9588 243.42 -6014.5
## - Fill.Pressure      1    4.8292 244.29 -6005.3
## - Filler.Level       1    6.0535 245.52 -5992.4
## - Mnf.Flow           1    8.1194 247.59 -5970.9
## - Alch.Rel           1    8.3169 247.78 -5968.9
## - Oxygen.Filler      1    9.6845 249.15 -5954.7
## - Balling            1   13.7030 253.17 -5913.6
## - Balling.Lvl        1   13.7754 253.24 -5912.8
## 
## Step:  AIC=-6055.51
## PH ~ PC.Volume + PSC.Fill + Mnf.Flow + Carb.Pressure1 + Fill.Pressure + 
##     Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure4 + Filler.Level + 
##     Temperature + Usage.cont + Carb.Flow + Density + MFR + Balling + 
##     Pressure.Vacuum + Oxygen.Filler + Bowl.Setpoint + Alch.Rel + 
##     Carb.Rel + Balling.Lvl + BCB
## 
##                   Df Sum of Sq    RSS     AIC
## - Density          1    0.1384 239.71 -6056.0
## - Hyd.Pressure1    1    0.1767 239.75 -6055.6
## <none>                         239.57 -6055.5
## - Usage.cont       1    0.3808 239.95 -6053.4
## - PC.Volume        1    0.5934 240.16 -6051.1
## - PSC.Fill         1    0.6894 240.26 -6050.1
## - Bowl.Setpoint    1    0.7908 240.36 -6049.0
## - Hyd.Pressure2    1    0.8833 240.45 -6048.0
## - Carb.Flow        1    1.3275 240.90 -6043.3
## - MFR              1    1.5504 241.12 -6040.9
## - Carb.Rel         1    1.6409 241.21 -6040.0
## - Carb.Pressure1   1    2.2268 241.80 -6033.7
## - BCB              1    3.4063 242.98 -6021.2
## - Hyd.Pressure4    1    3.6050 243.18 -6019.1
## - Temperature      1    3.7213 243.29 -6017.9
## - Pressure.Vacuum  1    3.9945 243.56 -6015.0
## - Fill.Pressure    1    4.7757 244.35 -6006.8
## - Filler.Level     1    6.3022 245.87 -5990.8
## - Mnf.Flow         1    8.1603 247.73 -5971.4
## - Alch.Rel         1    8.3671 247.94 -5969.2
## - Oxygen.Filler    1    9.7054 249.28 -5955.4
## - Balling          1   13.7275 253.30 -5914.3
## - Balling.Lvl      1   13.8742 253.44 -5912.8
## 
## Step:  AIC=-6056.02
## PH ~ PC.Volume + PSC.Fill + Mnf.Flow + Carb.Pressure1 + Fill.Pressure + 
##     Hyd.Pressure1 + Hyd.Pressure2 + Hyd.Pressure4 + Filler.Level + 
##     Temperature + Usage.cont + Carb.Flow + MFR + Balling + Pressure.Vacuum + 
##     Oxygen.Filler + Bowl.Setpoint + Alch.Rel + Carb.Rel + Balling.Lvl + 
##     BCB
## 
##                   Df Sum of Sq    RSS     AIC
## - Hyd.Pressure1    1    0.1657 239.87 -6056.2
## <none>                         239.71 -6056.0
## - Usage.cont       1    0.3499 240.06 -6054.3
## - PC.Volume        1    0.5919 240.30 -6051.7
## - PSC.Fill         1    0.6804 240.39 -6050.7
## - Bowl.Setpoint    1    0.7360 240.44 -6050.1
## - Hyd.Pressure2    1    0.8224 240.53 -6049.2
## - Carb.Flow        1    1.2086 240.92 -6045.1
## - MFR              1    1.4817 241.19 -6042.2
## - Carb.Rel         1    1.5681 241.28 -6041.3
## - Carb.Pressure1   1    2.2355 241.94 -6034.2
## - BCB              1    3.3972 243.10 -6021.8
## - Hyd.Pressure4    1    3.5846 243.29 -6019.9
## - Temperature      1    3.6888 243.40 -6018.8
## - Pressure.Vacuum  1    3.8839 243.59 -6016.7
## - Fill.Pressure    1    4.8494 244.56 -6006.5
## - Filler.Level     1    6.3042 246.01 -5991.3
## - Mnf.Flow         1    8.0221 247.73 -5973.4
## - Alch.Rel         1    8.3983 248.11 -5969.5
## - Oxygen.Filler    1    9.7532 249.46 -5955.5
## - Balling.Lvl      1   14.1438 253.85 -5910.6
## - Balling          1   15.4278 255.14 -5897.7
## 
## Step:  AIC=-6056.25
## PH ~ PC.Volume + PSC.Fill + Mnf.Flow + Carb.Pressure1 + Fill.Pressure + 
##     Hyd.Pressure2 + Hyd.Pressure4 + Filler.Level + Temperature + 
##     Usage.cont + Carb.Flow + MFR + Balling + Pressure.Vacuum + 
##     Oxygen.Filler + Bowl.Setpoint + Alch.Rel + Carb.Rel + Balling.Lvl + 
##     BCB
## 
##                   Df Sum of Sq    RSS     AIC
## <none>                         239.87 -6056.2
## - Usage.cont       1    0.3247 240.20 -6054.8
## - PC.Volume        1    0.5030 240.38 -6052.9
## - PSC.Fill         1    0.6857 240.56 -6050.9
## - Hyd.Pressure2    1    0.7736 240.65 -6050.0
## - Bowl.Setpoint    1    0.8663 240.74 -6049.0
## - Carb.Flow        1    1.1455 241.02 -6046.0
## - MFR              1    1.3691 241.24 -6043.6
## - Carb.Rel         1    1.5586 241.43 -6041.6
## - Carb.Pressure1   1    2.4900 242.36 -6031.7
## - BCB              1    3.4378 243.31 -6021.7
## - Hyd.Pressure4    1    3.5512 243.43 -6020.5
## - Temperature      1    3.6501 243.52 -6019.4
## - Pressure.Vacuum  1    3.7374 243.61 -6018.5
## - Fill.Pressure    1    4.9162 244.79 -6006.1
## - Filler.Level     1    6.2090 246.08 -5992.5
## - Mnf.Flow         1    8.1206 248.00 -5972.7
## - Alch.Rel         1    8.4781 248.35 -5968.9
## - Oxygen.Filler    1    9.7274 249.60 -5956.0
## - Balling.Lvl      1   14.2512 254.12 -5909.9
## - Balling          1   15.5533 255.43 -5896.7
## 
## Call:
## lm(formula = PH ~ PC.Volume + PSC.Fill + Mnf.Flow + Carb.Pressure1 + 
##     Fill.Pressure + Hyd.Pressure2 + Hyd.Pressure4 + Filler.Level + 
##     Temperature + Usage.cont + Carb.Flow + MFR + Balling + Pressure.Vacuum + 
##     Oxygen.Filler + Bowl.Setpoint + Alch.Rel + Carb.Rel + Balling.Lvl + 
##     BCB, data = student_train_0)
## 
## Coefficients:
##     (Intercept)        PC.Volume         PSC.Fill         Mnf.Flow  
##       5.6966783        0.2219495       -0.1385031       -0.0009060  
##  Carb.Pressure1    Fill.Pressure    Hyd.Pressure2    Hyd.Pressure4  
##       0.0023250        0.0098435        0.0016407        0.0025172  
##    Filler.Level      Temperature       Usage.cont        Carb.Flow  
##       0.0053094        0.0079330       -0.0044215        0.0000285  
##             MFR          Balling  Pressure.Vacuum    Oxygen.Filler  
##      -0.0001667        0.5619052        0.1033532       -1.5930429  
##   Bowl.Setpoint         Alch.Rel         Carb.Rel      Balling.Lvl  
##      -0.0024400        0.1713421        0.0875079       -0.5968109  
##             BCB  
##       0.1081677
We reduce our predictors to just those remaining after the feature reduction.
student_train_1 <- student_train_0 |> select(PH, PC.Volume , PSC.Fill , Mnf.Flow , Carb.Pressure1 , 
    Fill.Pressure , Hyd.Pressure2 , Hyd.Pressure4 , Filler.Level , 
    Temperature , Usage.cont , Carb.Flow , MFR , Balling , 
    Pressure.Vacuum , Oxygen.Filler , Bowl.Setpoint , Alch.Rel , 
    Carb.Rel , Balling.Lvl , BCB)

student_eval_1 <- student_eval_0 |> select(PH, PC.Volume , PSC.Fill , Mnf.Flow , Carb.Pressure1 , 
    Fill.Pressure , Hyd.Pressure2 , Hyd.Pressure4 , Filler.Level , 
    Temperature , Usage.cont , Carb.Flow , MFR , Balling , 
    Pressure.Vacuum , Oxygen.Filler , Bowl.Setpoint , Alch.Rel , 
    Carb.Rel , Balling.Lvl , BCB)

model <- lm(PH ~ ., data = student_train_1)
summary(model)
## 
## Call:
## lm(formula = PH ~ ., data = student_train_1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.2646 -0.1012  0.0052  0.1088  4.3654 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.697e+00  1.843e-01  30.908  < 2e-16 ***
## PC.Volume        2.219e-01  9.598e-02   2.312 0.020836 *  
## PSC.Fill        -1.385e-01  5.130e-02  -2.700 0.006983 ** 
## Mnf.Flow        -9.060e-04  9.751e-05  -9.291  < 2e-16 ***
## Carb.Pressure1   2.325e-03  4.519e-04   5.145 2.88e-07 ***
## Fill.Pressure    9.843e-03  1.362e-03   7.229 6.39e-13 ***
## Hyd.Pressure2    1.641e-03  5.721e-04   2.868 0.004168 ** 
## Hyd.Pressure4    2.517e-03  4.097e-04   6.144 9.30e-10 ***
## Filler.Level     5.309e-03  6.535e-04   8.124 6.93e-16 ***
## Temperature      7.933e-03  1.274e-03   6.229 5.47e-10 ***
## Usage.cont      -4.422e-03  2.380e-03  -1.858 0.063293 .  
## Carb.Flow        2.850e-05  8.167e-06   3.490 0.000492 ***
## MFR             -1.667e-04  4.368e-05  -3.815 0.000139 ***
## Balling          5.619e-01  4.370e-02  12.859  < 2e-16 ***
## Pressure.Vacuum  1.034e-01  1.640e-02   6.303 3.42e-10 ***
## Oxygen.Filler   -1.593e+00  1.567e-01 -10.169  < 2e-16 ***
## Bowl.Setpoint   -2.440e-03  8.041e-04  -3.035 0.002432 ** 
## Alch.Rel         1.713e-01  1.805e-02   9.494  < 2e-16 ***
## Carb.Rel         8.751e-02  2.150e-02   4.071 4.83e-05 ***
## Balling.Lvl     -5.968e-01  4.849e-02 -12.308  < 2e-16 ***
## BCB              1.082e-01  1.789e-02   6.045 1.71e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3067 on 2550 degrees of freedom
## Multiple R-squared:  0.3482, Adjusted R-squared:  0.3431 
## F-statistic: 68.12 on 20 and 2550 DF,  p-value: < 2.2e-16
We evaluate the VIF again and see that Balling.Lvl and Balling are collinear, I created a new predictor by dividing Balling.Lvl by Balling.
# Calculating VIF
vif_values <- vif(model)
vif_values
##       PC.Volume        PSC.Fill        Mnf.Flow  Carb.Pressure1   Fill.Pressure 
##        1.202038        1.012851        3.705385        1.154856        1.494303 
##   Hyd.Pressure2   Hyd.Pressure4    Filler.Level     Temperature      Usage.cont 
##        2.410119        1.270967        3.928468        1.128926        1.501908 
##       Carb.Flow             MFR         Balling Pressure.Vacuum   Oxygen.Filler 
##        2.107870        2.217223       45.309741        2.386455        1.458751 
##   Bowl.Setpoint        Alch.Rel        Carb.Rel     Balling.Lvl             BCB 
##        4.297317        3.741577        1.654980       48.738174        2.184705
# Visualizing the model
#plot(model, which = 1, main = "Model Fit")
student_train_2 <- student_train_1 |> mutate(PT = Balling.Lvl/Balling) |> select(-c(Balling, Balling.Lvl))

student_eval_2 <- student_eval_1 |> mutate(PT = Balling.Lvl/Balling) |> select(-c(Balling, Balling.Lvl))

model <- lm(PH ~ ., data = student_train_2)
summary(model)
## 
## Call:
## lm(formula = PH ~ ., data = student_train_2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8265 -0.0918  0.0077  0.1043  2.9896 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.910e+00  1.854e-01  37.274  < 2e-16 ***
## PC.Volume        1.582e-01  9.068e-02   1.745 0.081139 .  
## PSC.Fill        -1.100e-01  4.870e-02  -2.259 0.023993 *  
## Mnf.Flow        -8.162e-04  9.264e-05  -8.811  < 2e-16 ***
## Carb.Pressure1   2.032e-03  4.287e-04   4.741 2.25e-06 ***
## Fill.Pressure    8.599e-03  1.290e-03   6.666 3.20e-11 ***
## Hyd.Pressure2    1.613e-03  5.426e-04   2.974 0.002968 ** 
## Hyd.Pressure4    1.296e-03  3.801e-04   3.410 0.000661 ***
## Filler.Level     5.568e-03  6.195e-04   8.988  < 2e-16 ***
## Temperature      4.021e-03  1.223e-03   3.287 0.001028 ** 
## Usage.cont      -2.956e-03  2.258e-03  -1.309 0.190487    
## Carb.Flow        2.530e-05  7.722e-06   3.277 0.001064 ** 
## MFR             -1.068e-04  4.077e-05  -2.620 0.008836 ** 
## Pressure.Vacuum  4.958e-02  1.406e-02   3.527 0.000428 ***
## Oxygen.Filler   -1.211e+00  1.505e-01  -8.041 1.35e-15 ***
## Bowl.Setpoint   -2.980e-03  7.634e-04  -3.903 9.74e-05 ***
## Alch.Rel         1.400e-01  1.324e-02  10.573  < 2e-16 ***
## Carb.Rel         1.649e-02  2.059e-02   0.801 0.423272    
## BCB              9.657e-02  1.426e-02   6.772 1.57e-11 ***
## PT              -4.733e-01  4.340e-02 -10.907  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.291 on 2550 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2687, Adjusted R-squared:  0.2633 
## F-statistic: 49.32 on 19 and 2550 DF,  p-value: < 2.2e-16
This resolves our collinearity but our model still has terrible performance so we are going to build some MARS models to capture some of the nonlinear relationships
# Calculating VIF
vif_values <- vif(model)
vif_values
##       PC.Volume        PSC.Fill        Mnf.Flow  Carb.Pressure1   Fill.Pressure 
##        1.191026        1.013459        3.716245        1.154787        1.445729 
##   Hyd.Pressure2   Hyd.Pressure4    Filler.Level     Temperature      Usage.cont 
##        2.406789        1.200213        3.869511        1.081680        1.501337 
##       Carb.Flow             MFR Pressure.Vacuum   Oxygen.Filler   Bowl.Setpoint 
##        2.089432        2.137275        1.949095        1.463420        4.294010 
##        Alch.Rel        Carb.Rel             BCB              PT 
##        2.140017        1.540402        1.541232        1.463051
student_train_2[is.na(student_train_2)] <- 0
library(earth)
y = student_train_2$PH
x = student_train_2 |> select(-PH)
marsFit <- earth(x, y)
summary(marsFit)
## Call: earth(x=x, y=y)
## 
##                         coefficients
## (Intercept)                60.142179
## BCB                         0.109982
## h(155.4-Mnf.Flow)           0.000713
## h(115.2-Carb.Pressure1)    -0.001086
## h(Carb.Pressure1-115.2)     0.012040
## h(Carb.Pressure1-129.2)    -0.032280
## h(44-Fill.Pressure)        -0.010383
## h(22.8-Hyd.Pressure2)      -0.003566
## h(Hyd.Pressure4-112)        0.007462
## h(65.4-Filler.Level)       -0.007803
## h(Temperature-64.8)        -0.020892
## h(73.2-Temperature)        -0.007473
## h(Temperature-73.2)         0.357884
## h(Usage.cont-22.06)        -0.036833
## h(280.4-MFR)               -0.000513
## h(MFR-280.4)               -0.000281
## h(Oxygen.Filler-0.0026)  -199.580131
## h(Oxygen.Filler-0.224)     22.772443
## h(0.262-Oxygen.Filler)   -199.212897
## h(Oxygen.Filler-0.262)    142.671213
## h(Bowl.Setpoint-90)         0.003388
## h(6.52-Alch.Rel)           -0.105306
## h(Alch.Rel-6.52)            0.136097
## h(PT-0.953488)              0.544001
## h(PT-1.05341)              -0.872367
## 
## Selected 25 of 33 terms, and 14 of 19 predictors
## Termination condition: Reached nk 39
## Importance: Oxygen.Filler, Filler.Level, Mnf.Flow, Alch.Rel, BCB, ...
## Number of terms at each degree of interaction: 1 24 (additive model)
## GCV 0.06876423    RSS 170.1182    GRSq 0.5199934    RSq 0.5377561
plotmo(marsFit)
##  plotmo grid:    PC.Volume PSC.Fill Mnf.Flow Carb.Pressure1 Fill.Pressure
##                       0.27     0.18     64.8            123          46.4
##  Hyd.Pressure2 Hyd.Pressure4 Filler.Level Temperature Usage.cont Carb.Flow
##           28.6            96        118.2        65.6      21.78      3028
##    MFR Pressure.Vacuum Oxygen.Filler Bowl.Setpoint Alch.Rel Carb.Rel BCB
##  721.4            -5.4        0.0334           120     6.56      5.4   0
##         PT
##  0.9302326

We are going to add some 2nd order relationships and we get an R-squared of 0.89
student_train_2[is.na(student_train_2)] <- 0
library(earth)
y = student_train_2$PH
x = student_train_2 |> select(-PH)

y_eval = student_eval_2$PH
x_eval = student_eval_2 |> select(-PH)

marsFit2 <- earth(x, y, degree = 2)
summary(marsFit2)
## Call: earth(x=x, y=y, degree=2)
## 
##                                               coefficients
## (Intercept)                                       8.390041
## BCB                                               0.051556
## h(0.2-Mnf.Flow)                                   0.001352
## h(Mnf.Flow-0.2)                                  -0.000508
## h(115.2-Carb.Pressure1)                          -0.001204
## h(Carb.Pressure1-115.2)                           0.010962
## h(20.2-Hyd.Pressure2)                            -0.004303
## h(Hyd.Pressure2-20.2)                             0.002212
## h(Oxygen.Filler-0.262)                          -90.613327
## h(6.66-Alch.Rel)                                  0.338152
## Filler.Level * h(Oxygen.Filler-0.262)             0.518596
## h(Oxygen.Filler-0.262) * Carb.Rel                 5.429397
## h(0.024-Oxygen.Filler) * BCB                     -3.529366
## h(Oxygen.Filler-0.024) * BCB                     -0.604163
## h(90-Bowl.Setpoint) * BCB                         0.004278
## h(Bowl.Setpoint-90) * BCB                         0.004900
## h(0.2-Mnf.Flow) * h(Pressure.Vacuum- -5)         -0.000994
## h(0.2-Mnf.Flow) * h(-5-Pressure.Vacuum)          -0.001447
## h(0.2-Mnf.Flow) * h(Alch.Rel-6.5)                 0.000878
## h(Mnf.Flow-0.2) * h(Alch.Rel-6.48)                0.000530
## h(Mnf.Flow-0.2) * h(6.48-Alch.Rel)                0.003911
## h(Carb.Pressure1-115.2) * h(Usage.cont-22.06)    -0.006320
## h(Carb.Pressure1-115.2) * h(22.06-Usage.cont)    -0.000666
## h(Carb.Pressure1-129.2) * h(6.66-Alch.Rel)       -0.088171
## h(Hyd.Pressure2-20.2) * h(Bowl.Setpoint-110)     -0.000349
## h(Hyd.Pressure2-20.2) * h(110-Bowl.Setpoint)     -0.000169
## h(120-Hyd.Pressure4) * h(6.66-Alch.Rel)           0.005533
## h(Hyd.Pressure4-120) * h(6.66-Alch.Rel)           0.047192
## h(0.262-Oxygen.Filler) * h(Alch.Rel-6.52)         0.475863
## h(0.262-Oxygen.Filler) * h(6.52-Alch.Rel)        -3.839015
## 
## Selected 30 of 36 terms, and 12 of 19 predictors
## Termination condition: Reached nk 39
## Importance: Oxygen.Filler, Filler.Level, Carb.Pressure1, Alch.Rel, ...
## Number of terms at each degree of interaction: 1 9 20
## GCV 0.01620618    RSS 39.31784    GRSq 0.8868732    RSq 0.8931658
plotmo(marsFit2)
##  plotmo grid:    PC.Volume PSC.Fill Mnf.Flow Carb.Pressure1 Fill.Pressure
##                       0.27     0.18     64.8            123          46.4
##  Hyd.Pressure2 Hyd.Pressure4 Filler.Level Temperature Usage.cont Carb.Flow
##           28.6            96        118.2        65.6      21.78      3028
##    MFR Pressure.Vacuum Oxygen.Filler Bowl.Setpoint Alch.Rel Carb.Rel BCB
##  721.4            -5.4        0.0334           120     6.56      5.4   0
##         PT
##  0.9302326

Lastly we make some predictions with our evaluation dataset
#library(Metrics)
preds <- stats::predict(marsFit2, student_eval_2)