#install.packages(c('readxl', 'ggplot2', 'caret', 'randomForest', 'gbm', 'xgboost', 'psych', 'corrplot', 'openxlsx'))
#install.packages("readxl")
#install.packages("openxlsx")
# Load necessary libraries
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.3.3
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(gbm)
## Warning: package 'gbm' was built under R version 4.3.3
## Loaded gbm 2.1.9
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
library(xgboost)
## Warning: package 'xgboost' was built under R version 4.3.3
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:randomForest':
## 
##     outlier
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:xgboost':
## 
##     slice
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.3.3
library(tidyr)
 #Load the dataset
train_data <- read_excel('StudentData.xlsx')
test_data <- read_excel('StudentEvaluation.xlsx')
# Display the first five rows of training data
head(train_data)
## # A tibble: 6 × 33
##   Brand.Code Carb.Volume Fill.Ounces PC.Volume Carb.Pressure Carb.Temp    PSC
##   <chr>            <dbl>       <dbl>     <dbl>         <dbl>     <dbl>  <dbl>
## 1 B                 5.34        24.0     0.263          68.2      141.  0.104
## 2 A                 5.43        24.0     0.239          68.4      140.  0.124
## 3 B                 5.29        24.1     0.263          70.8      145.  0.09 
## 4 A                 5.44        24.0     0.293          63        133. NA    
## 5 A                 5.49        24.3     0.111          67.2      137.  0.026
## 6 A                 5.38        23.9     0.269          66.6      138.  0.09 
## # ℹ 26 more variables: PSC.Fill <dbl>, PSC.CO2 <dbl>, Mnf.Flow <dbl>,
## #   Carb.Pressure1 <dbl>, Fill.Pressure <dbl>, Hyd.Pressure1 <dbl>,
## #   Hyd.Pressure2 <dbl>, Hyd.Pressure3 <dbl>, Hyd.Pressure4 <dbl>,
## #   Filler.Level <dbl>, Filler.Speed <dbl>, Temperature <dbl>,
## #   Usage.cont <dbl>, Carb.Flow <dbl>, Density <dbl>, MFR <dbl>, Balling <dbl>,
## #   Pressure.Vacuum <dbl>, PH <dbl>, Oxygen.Filler <dbl>, Bowl.Setpoint <dbl>,
## #   Pressure.Setpoint <dbl>, Air.Pressurer <dbl>, Alch.Rel <dbl>, …
# Display the first five rows of test data
head(test_data)
## # A tibble: 6 × 33
##   Brand.Code Carb.Volume Fill.Ounces PC.Volume Carb.Pressure Carb.Temp   PSC
##   <chr>            <dbl>       <dbl>     <dbl>         <dbl>     <dbl> <dbl>
## 1 D                 5.48        24.0     0.27           65.4      135. 0.236
## 2 A                 5.39        24.0     0.227          63.2      135  0.042
## 3 B                 5.29        23.9     0.303          66.4      140. 0.068
## 4 B                 5.27        23.9     0.186          64.8      139  0.004
## 5 B                 5.41        24.2     0.16           69.4      142. 0.04 
## 6 B                 5.29        24.1     0.212          73.4      147. 0.078
## # ℹ 26 more variables: PSC.Fill <dbl>, PSC.CO2 <dbl>, Mnf.Flow <dbl>,
## #   Carb.Pressure1 <dbl>, Fill.Pressure <dbl>, Hyd.Pressure1 <dbl>,
## #   Hyd.Pressure2 <dbl>, Hyd.Pressure3 <dbl>, Hyd.Pressure4 <dbl>,
## #   Filler.Level <dbl>, Filler.Speed <dbl>, Temperature <dbl>,
## #   Usage.cont <dbl>, Carb.Flow <dbl>, Density <dbl>, MFR <dbl>, Balling <dbl>,
## #   Pressure.Vacuum <dbl>, PH <lgl>, Oxygen.Filler <dbl>, Bowl.Setpoint <dbl>,
## #   Pressure.Setpoint <dbl>, Air.Pressurer <dbl>, Alch.Rel <dbl>, …
# Display basic information about the dataset
str(train_data)
## tibble [2,571 × 33] (S3: tbl_df/tbl/data.frame)
##  $ Brand.Code       : chr [1:2571] "B" "A" "B" "A" ...
##  $ Carb.Volume      : num [1:2571] 5.34 5.43 5.29 5.44 5.49 ...
##  $ Fill.Ounces      : num [1:2571] 24 24 24.1 24 24.3 ...
##  $ PC.Volume        : num [1:2571] 0.263 0.239 0.263 0.293 0.111 ...
##  $ Carb.Pressure    : num [1:2571] 68.2 68.4 70.8 63 67.2 66.6 64.2 67.6 64.2 72 ...
##  $ Carb.Temp        : num [1:2571] 141 140 145 133 137 ...
##  $ PSC              : num [1:2571] 0.104 0.124 0.09 NA 0.026 0.09 0.128 0.154 0.132 0.014 ...
##  $ PSC.Fill         : num [1:2571] 0.26 0.22 0.34 0.42 0.16 ...
##  $ PSC.CO2          : num [1:2571] 0.04 0.04 0.16 0.04 0.12 ...
##  $ Mnf.Flow         : num [1:2571] -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 ...
##  $ Carb.Pressure1   : num [1:2571] 119 122 120 115 118 ...
##  $ Fill.Pressure    : num [1:2571] 46 46 46 46.4 45.8 45.6 51.8 46.8 46 45.2 ...
##  $ Hyd.Pressure1    : num [1:2571] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Hyd.Pressure2    : num [1:2571] NA NA NA 0 0 0 0 0 0 0 ...
##  $ Hyd.Pressure3    : num [1:2571] NA NA NA 0 0 0 0 0 0 0 ...
##  $ Hyd.Pressure4    : num [1:2571] 118 106 82 92 92 116 124 132 90 108 ...
##  $ Filler.Level     : num [1:2571] 121 119 120 118 119 ...
##  $ Filler.Speed     : num [1:2571] 4002 3986 4020 4012 4010 ...
##  $ Temperature      : num [1:2571] 66 67.6 67 65.6 65.6 66.2 65.8 65.2 65.4 66.6 ...
##  $ Usage.cont       : num [1:2571] 16.2 19.9 17.8 17.4 17.7 ...
##  $ Carb.Flow        : num [1:2571] 2932 3144 2914 3062 3054 ...
##  $ Density          : num [1:2571] 0.88 0.92 1.58 1.54 1.54 1.52 0.84 0.84 0.9 0.9 ...
##  $ MFR              : num [1:2571] 725 727 735 731 723 ...
##  $ Balling          : num [1:2571] 1.4 1.5 3.14 3.04 3.04 ...
##  $ Pressure.Vacuum  : num [1:2571] -4 -4 -3.8 -4.4 -4.4 -4.4 -4.4 -4.4 -4.4 -4.4 ...
##  $ PH               : num [1:2571] 8.36 8.26 8.94 8.24 8.26 8.32 8.4 8.38 8.38 8.5 ...
##  $ Oxygen.Filler    : num [1:2571] 0.022 0.026 0.024 0.03 0.03 0.024 0.066 0.046 0.064 0.022 ...
##  $ Bowl.Setpoint    : num [1:2571] 120 120 120 120 120 120 120 120 120 120 ...
##  $ Pressure.Setpoint: num [1:2571] 46.4 46.8 46.6 46 46 46 46 46 46 46 ...
##  $ Air.Pressurer    : num [1:2571] 143 143 142 146 146 ...
##  $ Alch.Rel         : num [1:2571] 6.58 6.56 7.66 7.14 7.14 7.16 6.54 6.52 6.52 6.54 ...
##  $ Carb.Rel         : num [1:2571] 5.32 5.3 5.84 5.42 5.44 5.44 5.38 5.34 5.34 5.34 ...
##  $ Balling.Lvl      : num [1:2571] 1.48 1.56 3.28 3.04 3.04 3.02 1.44 1.44 1.44 1.38 ...
# Descriptive analysis
describe(train_data)
##                   vars    n    mean      sd  median trimmed    mad     min
## Brand.Code*          1 2451    2.51    1.00    2.00    2.51   0.00    1.00
## Carb.Volume          2 2561    5.37    0.11    5.35    5.37   0.11    5.04
## Fill.Ounces          3 2533   23.97    0.09   23.97   23.98   0.08   23.63
## PC.Volume            4 2532    0.28    0.06    0.27    0.27   0.05    0.08
## Carb.Pressure        5 2544   68.19    3.54   68.20   68.12   3.56   57.00
## Carb.Temp            6 2545  141.09    4.04  140.80  140.99   3.85  128.60
## PSC                  7 2538    0.08    0.05    0.08    0.08   0.05    0.00
## PSC.Fill             8 2548    0.20    0.12    0.18    0.18   0.12    0.00
## PSC.CO2              9 2532    0.06    0.04    0.04    0.05   0.03    0.00
## Mnf.Flow            10 2569   24.57  119.48   65.20   21.07 169.02 -100.20
## Carb.Pressure1      11 2539  122.59    4.74  123.20  122.54   4.45  105.60
## Fill.Pressure       12 2549   47.92    3.18   46.40   47.71   2.37   34.60
## Hyd.Pressure1       13 2560   12.44   12.43   11.40   10.84  16.90   -0.80
## Hyd.Pressure2       14 2556   20.96   16.39   28.60   21.05  13.34    0.00
## Hyd.Pressure3       15 2556   20.46   15.98   27.60   20.51  13.94   -1.20
## Hyd.Pressure4       16 2541   96.29   13.12   96.00   95.45  11.86   52.00
## Filler.Level        17 2551  109.25   15.70  118.40  111.04   9.19   55.80
## Filler.Speed        18 2514 3687.20  770.82 3982.00 3919.99  47.44  998.00
## Temperature         19 2557   65.97    1.38   65.60   65.80   0.89   63.60
## Usage.cont          20 2566   20.99    2.98   21.79   21.25   3.19   12.08
## Carb.Flow           21 2569 2468.35 1073.70 3028.00 2601.14 326.17   26.00
## Density             22 2570    1.17    0.38    0.98    1.15   0.15    0.24
## MFR                 23 2359  704.05   73.90  724.00  718.16  15.42   31.40
## Balling             24 2570    2.20    0.93    1.65    2.13   0.37   -0.17
## Pressure.Vacuum     25 2571   -5.22    0.57   -5.40   -5.25   0.59   -6.60
## PH                  26 2567    8.55    0.17    8.54    8.55   0.18    7.88
## Oxygen.Filler       27 2559    0.05    0.05    0.03    0.04   0.02    0.00
## Bowl.Setpoint       28 2569  109.33   15.30  120.00  111.35   0.00   70.00
## Pressure.Setpoint   29 2559   47.62    2.04   46.00   47.60   0.00   44.00
## Air.Pressurer       30 2571  142.83    1.21  142.60  142.58   0.59  140.80
## Alch.Rel            31 2562    6.90    0.51    6.56    6.84   0.06    5.28
## Carb.Rel            32 2561    5.44    0.13    5.40    5.43   0.12    4.96
## Balling.Lvl         33 2570    2.05    0.87    1.48    1.98   0.21    0.00
##                       max   range  skew kurtosis    se
## Brand.Code*          4.00    3.00  0.38    -1.06  0.02
## Carb.Volume          5.70    0.66  0.39    -0.47  0.00
## Fill.Ounces         24.32    0.69 -0.02     0.86  0.00
## PC.Volume            0.48    0.40  0.34     0.67  0.00
## Carb.Pressure       79.40   22.40  0.18    -0.01  0.07
## Carb.Temp          154.00   25.40  0.25     0.24  0.08
## PSC                  0.27    0.27  0.85     0.65  0.00
## PSC.Fill             0.62    0.62  0.93     0.77  0.00
## PSC.CO2              0.24    0.24  1.73     3.73  0.00
## Mnf.Flow           229.40  329.60  0.00    -1.87  2.36
## Carb.Pressure1     140.20   34.60  0.05     0.14  0.09
## Fill.Pressure       60.40   25.80  0.55     1.41  0.06
## Hyd.Pressure1       58.00   58.80  0.78    -0.14  0.25
## Hyd.Pressure2       59.40   59.40 -0.30    -1.56  0.32
## Hyd.Pressure3       50.00   51.20 -0.32    -1.57  0.32
## Hyd.Pressure4      142.00   90.00  0.55     0.63  0.26
## Filler.Level       161.20  105.40 -0.85     0.05  0.31
## Filler.Speed      4030.00 3032.00 -2.87     6.71 15.37
## Temperature         76.20   12.60  2.39    10.16  0.03
## Usage.cont          25.90   13.82 -0.54    -1.02  0.06
## Carb.Flow         5104.00 5078.00 -0.99    -0.58 21.18
## Density              1.92    1.68  0.53    -1.20  0.01
## MFR                868.60  837.20 -5.09    30.46  1.52
## Balling              4.01    4.18  0.59    -1.39  0.02
## Pressure.Vacuum     -3.60    3.00  0.53    -0.03  0.01
## PH                   9.36    1.48 -0.29     0.06  0.00
## Oxygen.Filler        0.40    0.40  2.66    11.09  0.00
## Bowl.Setpoint      140.00   70.00 -0.97    -0.06  0.30
## Pressure.Setpoint   52.00    8.00  0.20    -1.60  0.04
## Air.Pressurer      148.20    7.40  2.25     4.73  0.02
## Alch.Rel             8.62    3.34  0.88    -0.85  0.01
## Carb.Rel             6.06    1.10  0.50    -0.29  0.00
## Balling.Lvl          3.66    3.66  0.59    -1.49  0.02
# Check for missing values
colSums(is.na(train_data))
##        Brand.Code       Carb.Volume       Fill.Ounces         PC.Volume 
##               120                10                38                39 
##     Carb.Pressure         Carb.Temp               PSC          PSC.Fill 
##                27                26                33                23 
##           PSC.CO2          Mnf.Flow    Carb.Pressure1     Fill.Pressure 
##                39                 2                32                22 
##     Hyd.Pressure1     Hyd.Pressure2     Hyd.Pressure3     Hyd.Pressure4 
##                11                15                15                30 
##      Filler.Level      Filler.Speed       Temperature        Usage.cont 
##                20                57                14                 5 
##         Carb.Flow           Density               MFR           Balling 
##                 2                 1               212                 1 
##   Pressure.Vacuum                PH     Oxygen.Filler     Bowl.Setpoint 
##                 0                 4                12                 2 
## Pressure.Setpoint     Air.Pressurer          Alch.Rel          Carb.Rel 
##                12                 0                 9                10 
##       Balling.Lvl 
##                 1
# Check for missing values in test data
colSums(is.na(test_data))
##        Brand.Code       Carb.Volume       Fill.Ounces         PC.Volume 
##                 8                 1                 6                 4 
##     Carb.Pressure         Carb.Temp               PSC          PSC.Fill 
##                 0                 1                 5                 3 
##           PSC.CO2          Mnf.Flow    Carb.Pressure1     Fill.Pressure 
##                 5                 0                 4                 2 
##     Hyd.Pressure1     Hyd.Pressure2     Hyd.Pressure3     Hyd.Pressure4 
##                 0                 1                 1                 4 
##      Filler.Level      Filler.Speed       Temperature        Usage.cont 
##                 2                10                 2                 2 
##         Carb.Flow           Density               MFR           Balling 
##                 0                 1                31                 1 
##   Pressure.Vacuum                PH     Oxygen.Filler     Bowl.Setpoint 
##                 1               267                 3                 1 
## Pressure.Setpoint     Air.Pressurer          Alch.Rel          Carb.Rel 
##                 2                 1                 3                 2 
##       Balling.Lvl 
##                 0
# Shape of data
cat('Training Shape:', dim(train_data), '\n')
## Training Shape: 2571 33
cat('Testing Shape:', dim(test_data), '\n')
## Testing Shape: 267 33
# Fill missing values with median
train_data <- train_data %>% mutate_all(~ifelse(is.na(.), median(., na.rm = TRUE), .))
test_data <- test_data %>% mutate_all(~ifelse(is.na(.), median(., na.rm = TRUE), .))
library(dplyr)
library(tidyr)
library(ggplot2)

train_data %>%
  select_if(is.numeric) %>%  
  gather(key = "key", value = "value") %>%  
  ggplot(aes(value)) +  
    geom_histogram(bins = 15, fill = "skyblue", color = "black") +  
    facet_wrap(~key, scales = "free") +  
    labs(
      x = "Value",  
      y = "Frequency",  
      title = "Histograms of Predictors" 
    ) +
    theme_minimal() + 
    theme(
      strip.background = element_rect(fill = "lightblue"),  
      strip.text = element_text(size = 10, color = "darkblue"), 
      plot.title = element_text(hjust = 0.5)  
    )

# Select only numeric columns from train_data
numeric_data <- train_data[sapply(train_data, is.numeric)]

# Compute the correlation matrix for the numeric data
correlation_matrix <- cor(numeric_data, use = "complete.obs")

# Print the correlation matrix
print(correlation_matrix)
##                     Carb.Volume   Fill.Ounces    PC.Volume Carb.Pressure
## Carb.Volume        1.000000e+00  0.0212585043 -0.203017885   0.408811810
## Fill.Ounces        2.125850e-02  1.0000000000 -0.152103103   0.019440695
## PC.Volume         -2.030179e-01 -0.1521031026  1.000000000  -0.105272048
## Carb.Pressure      4.088118e-01  0.0194406946 -0.105272048   1.000000000
## Carb.Temp         -1.135742e-01  0.0060820029  0.001632062   0.784892224
## PSC               -3.330324e-02  0.0048197955  0.195269015  -0.042179331
## PSC.Fill          -1.311497e-02  0.0654515774 -0.043167796  -0.014965649
## PSC.CO2           -5.227773e-02  0.0176324870 -0.035754982  -0.005179737
## Mnf.Flow           8.818228e-02 -0.0069380349 -0.146197727   0.036711881
## Carb.Pressure1     6.506191e-02  0.0003739317 -0.218952619   0.011711845
## Fill.Pressure     -7.158830e-02  0.0583990269 -0.067742511  -0.051827142
## Hyd.Pressure1     -3.367920e-02 -0.1254435401  0.269003299  -0.043379935
## Hyd.Pressure2      4.229248e-02 -0.1065025764  0.049310957   0.010959930
## Hyd.Pressure3      5.485009e-02 -0.0830204931  0.019038611   0.028023539
## Hyd.Pressure4     -3.971862e-01  0.0224789228  0.066180007  -0.231357262
## Filler.Level      -3.465170e-02 -0.0195402489  0.207456711  -0.017347200
## Filler.Speed      -5.187535e-06  0.0215099079  0.041006375   0.040897285
## Temperature       -1.762623e-01  0.0030048371  0.075877049  -0.075230514
## Usage.cont         8.627883e-02  0.0984587275 -0.275367096   0.034490206
## Carb.Flow         -9.568562e-02 -0.0662478612  0.220199392  -0.009205311
## Density            7.597001e-01 -0.0831553027 -0.140703760   0.417712318
## MFR                2.596895e-02  0.0152775401 -0.056863729   0.022942419
## Balling            7.808131e-01 -0.0670957879 -0.171793417   0.420032034
## Pressure.Vacuum   -7.517856e-02  0.0358553350 -0.053689497  -0.022395753
## PH                 6.326444e-02 -0.0944601384  0.046138770   0.059408689
## Oxygen.Filler     -9.369217e-02 -0.0504971496  0.157412764  -0.048141822
## Bowl.Setpoint     -6.143418e-03 -0.0170378548  0.222874311  -0.008463085
## Pressure.Setpoint -1.453952e-01  0.0424012384  0.001706602  -0.089975821
## Air.Pressurer     -1.888702e-02  0.0686730099 -0.037307204   0.011318729
## Alch.Rel           7.767039e-01 -0.1178028521 -0.149882728   0.415779013
## Carb.Rel           7.877878e-01 -0.1203438602 -0.125787054   0.423861238
## Balling.Lvl        7.775300e-01 -0.0648004526 -0.178985293   0.419862138
##                      Carb.Temp          PSC     PSC.Fill      PSC.CO2
## Carb.Volume       -0.113574246 -0.033303237 -0.013114969 -0.052277728
## Fill.Ounces        0.006082003  0.004819796  0.065451577  0.017632487
## PC.Volume          0.001632062  0.195269015 -0.043167796 -0.035754982
## Carb.Pressure      0.784892224 -0.042179331 -0.014965649 -0.005179737
## Carb.Temp          1.000000000 -0.040047388 -0.007396084  0.029919967
## PSC               -0.040047388  1.000000000  0.173713246  0.054966224
## PSC.Fill          -0.007396084  0.173713246  1.000000000  0.192785462
## PSC.CO2            0.029919967  0.054966224  0.192785462  1.000000000
## Mnf.Flow          -0.007096046  0.063565881 -0.031966166  0.038963247
## Carb.Pressure1    -0.014292173 -0.045212973 -0.032224974  0.013569781
## Fill.Pressure     -0.012458179  0.026364186 -0.005232389  0.068088861
## Hyd.Pressure1     -0.030089401  0.008662144 -0.053122047 -0.020808360
## Hyd.Pressure2     -0.010109649 -0.009517101 -0.080164829 -0.007465919
## Hyd.Pressure3     -0.003090288  0.007357622 -0.069150888  0.018610596
## Hyd.Pressure4     -0.028362405  0.015485160  0.006811622  0.047918652
## Filler.Level      -0.006292672 -0.009343092  0.053225072 -0.039998561
## Filler.Speed       0.037106486  0.035686402 -0.003388272 -0.021362599
## Temperature        0.033292343 -0.006479381  0.027083206  0.049009936
## Usage.cont        -0.015683278  0.027699953 -0.023448029  0.022523411
## Carb.Flow          0.045087924  0.006439586  0.026151791  0.013040337
## Density            0.022041872 -0.065698299 -0.012918574 -0.048509384
## MFR                0.010827462  0.001657600 -0.012696695 -0.011978553
## Balling            0.009456554 -0.062168486 -0.009221786 -0.046855616
## Pressure.Vacuum    0.010437375  0.044917604  0.044840914  0.001105246
## PH                 0.028645798 -0.089204555 -0.036359228 -0.075490305
## Oxygen.Filler      0.010104475 -0.036485304 -0.022535145 -0.029249458
## Bowl.Setpoint     -0.010126866 -0.005530397  0.048616431 -0.036280336
## Pressure.Setpoint -0.022899480  0.035213899 -0.009713493  0.067938729
## Air.Pressurer      0.028303551  0.038250791 -0.012767962  0.009334976
## Alch.Rel           0.004009627 -0.057554901 -0.013298114 -0.061293422
## Carb.Rel           0.007826422 -0.069234155 -0.017395932 -0.061499927
## Balling.Lvl        0.010701517 -0.061921959 -0.004162511 -0.046977982
##                        Mnf.Flow Carb.Pressure1 Fill.Pressure Hyd.Pressure1
## Carb.Volume        0.0881822809   0.0650619093  -0.071588296  -0.033679196
## Fill.Ounces       -0.0069380349   0.0003739317   0.058399027  -0.125443540
## PC.Volume         -0.1461977271  -0.2189526185  -0.067742511   0.269003299
## Carb.Pressure      0.0367118813   0.0117118450  -0.051827142  -0.043379935
## Carb.Temp         -0.0070960458  -0.0142921733  -0.012458179  -0.030089401
## PSC                0.0635658805  -0.0452129729   0.026364186   0.008662144
## PSC.Fill          -0.0319661657  -0.0322249743  -0.005232389  -0.053122047
## PSC.CO2            0.0389632469   0.0135697815   0.068088861  -0.020808360
## Mnf.Flow           1.0000000000   0.3397780047   0.421612497   0.354647389
## Carb.Pressure1     0.3397780047   1.0000000000   0.084296700  -0.094922044
## Fill.Pressure      0.4216124969   0.0842966996   1.000000000   0.159953429
## Hyd.Pressure1      0.3546473886  -0.0949220437   0.159953429   1.000000000
## Hyd.Pressure2      0.6500363218   0.1133650745   0.332379939   0.721751331
## Hyd.Pressure3      0.7531156157   0.1827789521   0.427704701   0.632786285
## Hyd.Pressure4     -0.0116258950   0.1653392627   0.063820968  -0.079060137
## Filler.Level      -0.5772614063  -0.3323973970  -0.388796283  -0.055328525
## Filler.Speed       0.1348086547  -0.2450561521   0.062959153   0.168389135
## Temperature       -0.0862374625   0.0872338949  -0.083544484  -0.118319485
## Usage.cont         0.5195345181   0.2858221749   0.244249217   0.101829297
## Carb.Flow         -0.2754149263  -0.4058065533   0.010166611   0.032205974
## Density            0.0347737610   0.0212553126  -0.127991195   0.019981175
## MFR                0.0007046815   0.0165560951  -0.186363505  -0.037919752
## Balling            0.1168155486   0.0496738930  -0.094322493   0.041619876
## Pressure.Vacuum   -0.5275657528  -0.2424018649  -0.257391725  -0.289380451
## PH                -0.4468491012  -0.0802363140  -0.211710485  -0.073710476
## Oxygen.Filler     -0.5134063220  -0.1544553702  -0.208941385  -0.157199967
## Bowl.Setpoint     -0.5794484121  -0.3845654603  -0.346311255  -0.024582375
## Pressure.Setpoint  0.4636035923   0.2021631554   0.663017294   0.183350843
## Air.Pressurer     -0.0494007094   0.0647807324   0.025306043  -0.189376094
## Alch.Rel           0.0277588137   0.0239311722  -0.145511785   0.009272838
## Carb.Rel          -0.0300347813   0.0331118890  -0.167391236   0.024600617
## Balling.Lvl        0.0367470534   0.0321404950  -0.130743297  -0.005043180
##                   Hyd.Pressure2 Hyd.Pressure3 Hyd.Pressure4 Filler.Level
## Carb.Volume         0.042292485   0.054850093  -0.397186151 -0.034651701
## Fill.Ounces        -0.106502576  -0.083020493   0.022478923 -0.019540249
## PC.Volume           0.049310957   0.019038611   0.066180007  0.207456711
## Carb.Pressure       0.010959930   0.028023539  -0.231357262 -0.017347200
## Carb.Temp          -0.010109649  -0.003090288  -0.028362405 -0.006292672
## PSC                -0.009517101   0.007357622   0.015485160 -0.009343092
## PSC.Fill           -0.080164829  -0.069150888   0.006811622  0.053225072
## PSC.CO2            -0.007465919   0.018610596   0.047918652 -0.039998561
## Mnf.Flow            0.650036322   0.753115616  -0.011625895 -0.577261406
## Carb.Pressure1      0.113365074   0.182778952   0.165339263 -0.332397397
## Fill.Pressure       0.332379939   0.427704701   0.063820968 -0.388796283
## Hyd.Pressure1       0.721751331   0.632786285  -0.079060137 -0.055328525
## Hyd.Pressure2       1.000000000   0.924972596  -0.147981692 -0.403262899
## Hyd.Pressure3       0.924972596   1.000000000  -0.132469425 -0.496305114
## Hyd.Pressure4      -0.147981692  -0.132469425   1.000000000 -0.028225283
## Filler.Level       -0.403262899  -0.496305114  -0.028225283  1.000000000
## Filler.Speed        0.286461819   0.267409786  -0.413747808 -0.057003877
## Temperature        -0.214849991  -0.197208402   0.304622583  0.072381524
## Usage.cont          0.345801225   0.376257167   0.005455157 -0.344398170
## Carb.Flow          -0.036084644  -0.057023339  -0.257635685 -0.042991039
## Density             0.083343636   0.067942858  -0.466658299 -0.009377047
## MFR                 0.044974369   0.009765715  -0.189244427  0.052682060
## Balling             0.122964927   0.139358149  -0.473387802 -0.005204258
## Pressure.Vacuum    -0.561918494  -0.599776975  -0.029856857  0.327474979
## PH                 -0.200654459  -0.240165668  -0.139820117  0.321590911
## Oxygen.Filler      -0.286392720  -0.336583668   0.030543597  0.225601956
## Bowl.Setpoint      -0.376503487  -0.467832147  -0.082420592  0.934980656
## Pressure.Setpoint   0.307573236   0.413654063   0.207661599 -0.413709676
## Air.Pressurer      -0.153688896  -0.067530636   0.045250411 -0.129050678
## Alch.Rel            0.040340220   0.048006408  -0.497145681  0.035364991
## Carb.Rel            0.021099626   0.004035867  -0.403977016  0.105003490
## Balling.Lvl         0.031388745   0.039780107  -0.426624243  0.042005182
##                    Filler.Speed  Temperature   Usage.cont    Carb.Flow
## Carb.Volume       -5.187535e-06 -0.176262272  0.086278827 -0.095685618
## Fill.Ounces        2.150991e-02  0.003004837  0.098458727 -0.066247861
## PC.Volume          4.100638e-02  0.075877049 -0.275367096  0.220199392
## Carb.Pressure      4.089729e-02 -0.075230514  0.034490206 -0.009205311
## Carb.Temp          3.710649e-02  0.033292343 -0.015683278  0.045087924
## PSC                3.568640e-02 -0.006479381  0.027699953  0.006439586
## PSC.Fill          -3.388272e-03  0.027083206 -0.023448029  0.026151791
## PSC.CO2           -2.136260e-02  0.049009936  0.022523411  0.013040337
## Mnf.Flow           1.348087e-01 -0.086237463  0.519534518 -0.275414926
## Carb.Pressure1    -2.450562e-01  0.087233895  0.285822175 -0.405806553
## Fill.Pressure      6.295915e-02 -0.083544484  0.244249217  0.010166611
## Hyd.Pressure1      1.683891e-01 -0.118319485  0.101829297  0.032205974
## Hyd.Pressure2      2.864618e-01 -0.214849991  0.345801225 -0.036084644
## Hyd.Pressure3      2.674098e-01 -0.197208402  0.376257167 -0.057023339
## Hyd.Pressure4     -4.137478e-01  0.304622583  0.005455157 -0.257635685
## Filler.Level      -5.700388e-02  0.072381524 -0.344398170 -0.042991039
## Filler.Speed       1.000000e+00 -0.331925688  0.072860552  0.371706402
## Temperature       -3.319257e-01  1.000000000 -0.111202852 -0.126139136
## Usage.cont         7.286055e-02 -0.111202852  1.000000000 -0.327813493
## Carb.Flow          3.717064e-01 -0.126139136 -0.327813493  1.000000000
## Density            4.398180e-02 -0.183430617  0.019851201  0.036875954
## MFR                4.038970e-01 -0.082754103  0.021902165 -0.075202520
## Balling            5.159532e-02 -0.222935172  0.091263340 -0.059520322
## Pressure.Vacuum   -1.276654e-03  0.041422648 -0.302056601  0.206874349
## PH                -2.502336e-02 -0.157572482 -0.317971919  0.155921411
## Oxygen.Filler     -1.186126e-01  0.130050953 -0.290048579  0.166532635
## Bowl.Setpoint     -1.436573e-02  0.046336967 -0.367018708  0.005890908
## Pressure.Setpoint -2.923757e-02  0.033391252  0.239091965 -0.119713769
## Air.Pressurer     -5.871091e-03  0.056158411 -0.094573568  0.085071689
## Alch.Rel          -2.363671e-02 -0.174773338 -0.003334949 -0.021794425
## Carb.Rel          -4.163474e-02 -0.095495104 -0.029986329 -0.056676141
## Balling.Lvl       -1.364646e-02 -0.174817255  0.042979594 -0.060387420
##                        Density           MFR      Balling Pressure.Vacuum
## Carb.Volume        0.759700065  0.0259689485  0.780813102    -0.075178558
## Fill.Ounces       -0.083155303  0.0152775401 -0.067095788     0.035855335
## PC.Volume         -0.140703760 -0.0568637287 -0.171793417    -0.053689497
## Carb.Pressure      0.417712318  0.0229424195  0.420032034    -0.022395753
## Carb.Temp          0.022041872  0.0108274616  0.009456554     0.010437375
## PSC               -0.065698299  0.0016576001 -0.062168486     0.044917604
## PSC.Fill          -0.012918574 -0.0126966950 -0.009221786     0.044840914
## PSC.CO2           -0.048509384 -0.0119785529 -0.046855616     0.001105246
## Mnf.Flow           0.034773761  0.0007046815  0.116815549    -0.527565753
## Carb.Pressure1     0.021255313  0.0165560951  0.049673893    -0.242401865
## Fill.Pressure     -0.127991195 -0.1863635048 -0.094322493    -0.257391725
## Hyd.Pressure1      0.019981175 -0.0379197516  0.041619876    -0.289380451
## Hyd.Pressure2      0.083343636  0.0449743689  0.122964927    -0.561918494
## Hyd.Pressure3      0.067942858  0.0097657148  0.139358149    -0.599776975
## Hyd.Pressure4     -0.466658299 -0.1892444270 -0.473387802    -0.029856857
## Filler.Level      -0.009377047  0.0526820600 -0.005204258     0.327474979
## Filler.Speed       0.043981798  0.4038969514  0.051595323    -0.001276654
## Temperature       -0.183430617 -0.0827541030 -0.222935172     0.041422648
## Usage.cont         0.019851201  0.0219021648  0.091263340    -0.302056601
## Carb.Flow          0.036875954 -0.0752025201 -0.059520322     0.206874349
## Density            1.000000000  0.0274524052  0.955159423    -0.088177447
## MFR                0.027452405  1.0000000000  0.042084034     0.058987594
## Balling            0.955159423  0.0420840335  1.000000000    -0.164776248
## Pressure.Vacuum   -0.088177447  0.0589875941 -0.164776248     1.000000000
## PH                 0.078350007 -0.0072300187  0.065202156     0.220503718
## Oxygen.Filler     -0.047967697 -0.0196315379 -0.107357546     0.218429076
## Bowl.Setpoint      0.012268145  0.0354911162  0.018146800     0.340022205
## Pressure.Setpoint -0.232309434 -0.0635742534 -0.194874208    -0.286278602
## Air.Pressurer     -0.085953514  0.0103030502 -0.104541030     0.168334723
## Alch.Rel           0.900729636 -0.0077329151  0.923301609    -0.050870166
## Carb.Rel           0.821367472  0.0098159150  0.820305019    -0.003905683
## Balling.Lvl        0.947310974  0.0151664916  0.977623234    -0.044418488
##                             PH Oxygen.Filler Bowl.Setpoint Pressure.Setpoint
## Carb.Volume        0.063264438   -0.09369217  -0.006143418      -0.145395154
## Fill.Ounces       -0.094460138   -0.05049715  -0.017037855       0.042401238
## PC.Volume          0.046138770    0.15741276   0.222874311       0.001706602
## Carb.Pressure      0.059408689   -0.04814182  -0.008463085      -0.089975821
## Carb.Temp          0.028645798    0.01010447  -0.010126866      -0.022899480
## PSC               -0.089204555   -0.03648530  -0.005530397       0.035213899
## PSC.Fill          -0.036359228   -0.02253515   0.048616431      -0.009713493
## PSC.CO2           -0.075490305   -0.02924946  -0.036280336       0.067938729
## Mnf.Flow          -0.446849101   -0.51340632  -0.579448412       0.463603592
## Carb.Pressure1    -0.080236314   -0.15445537  -0.384565460       0.202163155
## Fill.Pressure     -0.211710485   -0.20894138  -0.346311255       0.663017294
## Hyd.Pressure1     -0.073710476   -0.15719997  -0.024582375       0.183350843
## Hyd.Pressure2     -0.200654459   -0.28639272  -0.376503487       0.307573236
## Hyd.Pressure3     -0.240165668   -0.33658367  -0.467832147       0.413654063
## Hyd.Pressure4     -0.139820117    0.03054360  -0.082420592       0.207661599
## Filler.Level       0.321590911    0.22560196   0.934980656      -0.413709676
## Filler.Speed      -0.025023364   -0.11861261  -0.014365727      -0.029237574
## Temperature       -0.157572482    0.13005095   0.046336967       0.033391252
## Usage.cont        -0.317971919   -0.29004858  -0.367018708       0.239091965
## Carb.Flow          0.155921411    0.16653263   0.005890908      -0.119713769
## Density            0.078350007   -0.04796770   0.012268145      -0.232309434
## MFR               -0.007230019   -0.01963154   0.035491116      -0.063574253
## Balling            0.065202156   -0.10735755   0.018146800      -0.194874208
## Pressure.Vacuum    0.220503718    0.21842908   0.340022205      -0.286278602
## PH                 1.000000000    0.16031508   0.348602509      -0.305412094
## Oxygen.Filler      0.160315075    1.00000000   0.211227788      -0.214025858
## Bowl.Setpoint      0.348602509    0.21122779   1.000000000      -0.414899059
## Pressure.Setpoint -0.305412094   -0.21402586  -0.414899059       1.000000000
## Air.Pressurer     -0.013657744    0.09372746  -0.135591107       0.080480506
## Alch.Rel           0.147170660   -0.04127405   0.048047535      -0.241679242
## Carb.Rel           0.162393549   -0.01193740   0.126835783      -0.236748177
## Balling.Lvl        0.099791970   -0.06023303   0.064468576      -0.231849552
##                   Air.Pressurer     Alch.Rel     Carb.Rel  Balling.Lvl
## Carb.Volume        -0.018887017  0.776703895  0.787787847  0.777529976
## Fill.Ounces         0.068673010 -0.117802852 -0.120343860 -0.064800453
## PC.Volume          -0.037307204 -0.149882728 -0.125787054 -0.178985293
## Carb.Pressure       0.011318729  0.415779013  0.423861238  0.419862138
## Carb.Temp           0.028303551  0.004009627  0.007826422  0.010701517
## PSC                 0.038250791 -0.057554901 -0.069234155 -0.061921959
## PSC.Fill           -0.012767962 -0.013298114 -0.017395932 -0.004162511
## PSC.CO2             0.009334976 -0.061293422 -0.061499927 -0.046977982
## Mnf.Flow           -0.049400709  0.027758814 -0.030034781  0.036747053
## Carb.Pressure1      0.064780732  0.023931172  0.033111889  0.032140495
## Fill.Pressure       0.025306043 -0.145511785 -0.167391236 -0.130743297
## Hyd.Pressure1      -0.189376094  0.009272838  0.024600617 -0.005043180
## Hyd.Pressure2      -0.153688896  0.040340220  0.021099626  0.031388745
## Hyd.Pressure3      -0.067530636  0.048006408  0.004035867  0.039780107
## Hyd.Pressure4       0.045250411 -0.497145681 -0.403977016 -0.426624243
## Filler.Level       -0.129050678  0.035364991  0.105003490  0.042005182
## Filler.Speed       -0.005871091 -0.023636707 -0.041634738 -0.013646459
## Temperature         0.056158411 -0.174773338 -0.095495104 -0.174817255
## Usage.cont         -0.094573568 -0.003334949 -0.029986329  0.042979594
## Carb.Flow           0.085071689 -0.021794425 -0.056676141 -0.060387420
## Density            -0.085953514  0.900729636  0.821367472  0.947310974
## MFR                 0.010303050 -0.007732915  0.009815915  0.015166492
## Balling            -0.104541030  0.923301609  0.820305019  0.977623234
## Pressure.Vacuum     0.168334723 -0.050870166 -0.003905683 -0.044418488
## PH                 -0.013657744  0.147170660  0.162393549  0.099791970
## Oxygen.Filler       0.093727461 -0.041274050 -0.011937401 -0.060233026
## Bowl.Setpoint      -0.135591107  0.048047535  0.126835783  0.064468576
## Pressure.Setpoint   0.080480506 -0.241679242 -0.236748177 -0.231849552
## Air.Pressurer       1.000000000 -0.084140642 -0.101247073 -0.086810997
## Alch.Rel           -0.084140642  1.000000000  0.842043763  0.921318584
## Carb.Rel           -0.101247073  0.842043763  1.000000000  0.842353791
## Balling.Lvl        -0.086810997  0.921318584  0.842353791  1.000000000
# Visualize the correlation matrix using a heatmap
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
corrplot(correlation_matrix, method = "color", tl.cex = 0.8, number.cex = 0.7)

# Select features and target
training_features <- train_data[, c('Carb.Volume', 'Fill.Ounces', 'Carb.Pressure', 'Temperature', 'Density', 'Balling', 'Oxygen.Filler', 'Pressure.Setpoint')]
training_target <- train_data$PH
testing_features <- test_data[, c('Carb.Volume', 'Fill.Ounces', 'Carb.Pressure', 'Temperature', 'Density', 'Balling', 'Oxygen.Filler', 'Pressure.Setpoint')]
# Standardize the features
preProc <- preProcess(training_features, method = c("center", "scale"))
features_scaled_train <- predict(preProc, training_features)
features_scaled_test <- predict(preProc, testing_features)
# Split the data into training and testing sets
set.seed(42)
trainIndex <- createDataPartition(training_target, p = 0.8, list = FALSE)
X_train <- features_scaled_train[trainIndex, ]
X_test <- features_scaled_train[-trainIndex, ]
y_train <- training_target[trainIndex]
y_test <- training_target[-trainIndex]
#normalize colums
X_train_df <- as.data.frame(X_train)
y_train_df <- as.data.frame(y_train)
# Load necessary libraries
library(caret)
library(randomForest)
library(gbm)
library(xgboost)
# Set the seed for reproducibility right before calling the GBM model
set.seed(42)
# Correcting variable names


models <- list(
    'Linear Regression' = train(X_train_df, y_train, method = 'lm'),
    'Random Forest' = {
        set.seed(42)
        randomForest(X_train_df, y_train, ntree = 100)
    },
    'Gradient Boosting' = gbm(y_train ~ ., data = data.frame(X_train_df, y_train), n.trees = 100, interaction.depth = 3, cv.folds = 5, shrinkage = 0.1, n.minobsinnode = 10),
    'XGBoost' = {
        set.seed(42)
        xgboost(data = as.matrix(X_train_df), label = y_train, nrounds = 100, objective = "reg:squarederror")
    }
)
## Distribution not specified, assuming gaussian ...
## [1]  train-rmse:5.635856 
## [2]  train-rmse:3.947828 
## [3]  train-rmse:2.766779 
## [4]  train-rmse:1.940941 
## [5]  train-rmse:1.364058 
## [6]  train-rmse:0.961981 
## [7]  train-rmse:0.682834 
## [8]  train-rmse:0.489984 
## [9]  train-rmse:0.358142 
## [10] train-rmse:0.268990 
## [11] train-rmse:0.210401 
## [12] train-rmse:0.173775 
## [13] train-rmse:0.151856 
## [14] train-rmse:0.137849 
## [15] train-rmse:0.130452 
## [16] train-rmse:0.125457 
## [17] train-rmse:0.122719 
## [18] train-rmse:0.119346 
## [19] train-rmse:0.117290 
## [20] train-rmse:0.114618 
## [21] train-rmse:0.113214 
## [22] train-rmse:0.112762 
## [23] train-rmse:0.111771 
## [24] train-rmse:0.109134 
## [25] train-rmse:0.108406 
## [26] train-rmse:0.107827 
## [27] train-rmse:0.106447 
## [28] train-rmse:0.104558 
## [29] train-rmse:0.102788 
## [30] train-rmse:0.101472 
## [31] train-rmse:0.100104 
## [32] train-rmse:0.098503 
## [33] train-rmse:0.097843 
## [34] train-rmse:0.095967 
## [35] train-rmse:0.094817 
## [36] train-rmse:0.093710 
## [37] train-rmse:0.093375 
## [38] train-rmse:0.091349 
## [39] train-rmse:0.089666 
## [40] train-rmse:0.088599 
## [41] train-rmse:0.087761 
## [42] train-rmse:0.087094 
## [43] train-rmse:0.085264 
## [44] train-rmse:0.083317 
## [45] train-rmse:0.082874 
## [46] train-rmse:0.082064 
## [47] train-rmse:0.080838 
## [48] train-rmse:0.080224 
## [49] train-rmse:0.080074 
## [50] train-rmse:0.078661 
## [51] train-rmse:0.078477 
## [52] train-rmse:0.076837 
## [53] train-rmse:0.076408 
## [54] train-rmse:0.075093 
## [55] train-rmse:0.073905 
## [56] train-rmse:0.073280 
## [57] train-rmse:0.072634 
## [58] train-rmse:0.072470 
## [59] train-rmse:0.072014 
## [60] train-rmse:0.071378 
## [61] train-rmse:0.070814 
## [62] train-rmse:0.070602 
## [63] train-rmse:0.070507 
## [64] train-rmse:0.070421 
## [65] train-rmse:0.070243 
## [66] train-rmse:0.070077 
## [67] train-rmse:0.069182 
## [68] train-rmse:0.068134 
## [69] train-rmse:0.067133 
## [70] train-rmse:0.065714 
## [71] train-rmse:0.065634 
## [72] train-rmse:0.064589 
## [73] train-rmse:0.063728 
## [74] train-rmse:0.062988 
## [75] train-rmse:0.061585 
## [76] train-rmse:0.061076 
## [77] train-rmse:0.060624 
## [78] train-rmse:0.059925 
## [79] train-rmse:0.059798 
## [80] train-rmse:0.058570 
## [81] train-rmse:0.058366 
## [82] train-rmse:0.057229 
## [83] train-rmse:0.056250 
## [84] train-rmse:0.055799 
## [85] train-rmse:0.055449 
## [86] train-rmse:0.055063 
## [87] train-rmse:0.054126 
## [88] train-rmse:0.053473 
## [89] train-rmse:0.053267 
## [90] train-rmse:0.053020 
## [91] train-rmse:0.052728 
## [92] train-rmse:0.052330 
## [93] train-rmse:0.051606 
## [94] train-rmse:0.051562 
## [95] train-rmse:0.050919 
## [96] train-rmse:0.050829 
## [97] train-rmse:0.050262 
## [98] train-rmse:0.049130 
## [99] train-rmse:0.048848 
## [100]    train-rmse:0.048448
# Predicting
for (name in names(models)) {
    model <- models[[name]]
    # Convert data to matrix for xgboost predictions
    if (name == 'XGBoost') {
        predictions <- predict(model, newdata = as.matrix(X_train_df))
    } else {
        predictions <- predict(model, newdata = X_train_df)
    }
    mse <- sqrt(mean((y_test - predictions)^2))
    cat(sprintf("%s - Mean Squared Error: %.4f\n", name, mse))
}
## Warning in y_test - predictions: longer object length is not a multiple of
## shorter object length
## Linear Regression - Mean Squared Error: 0.1899
## Warning in y_test - predictions: longer object length is not a multiple of
## shorter object length
## Random Forest - Mean Squared Error: 0.2168
## Using 94 trees...
## Warning in y_test - predictions: longer object length is not a multiple of
## shorter object length
## Gradient Boosting - Mean Squared Error: 0.1972
## Warning in y_test - predictions: longer object length is not a multiple of
## shorter object length
## XGBoost - Mean Squared Error: 0.2329
# Selecting the best model & applying on our student evaluation
best_model <- models[['Random Forest']]

# Make predictions on the test set
best_predictions <- predict(best_model, as.matrix(features_scaled_test))

# Save predictions to an Excel file
predicted_df <- data.frame(Predicted = best_predictions)
write.xlsx(predicted_df, 'pH_predictions.xlsx', rowNames = FALSE)