#install.packages(c('readxl', 'ggplot2', 'caret', 'randomForest', 'gbm', 'xgboost', 'psych', 'corrplot', 'openxlsx'))
#install.packages("readxl")
#install.packages("openxlsx")
# Load necessary libraries
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.3.3
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
library(gbm)
## Warning: package 'gbm' was built under R version 4.3.3
## Loaded gbm 2.1.9
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
library(xgboost)
## Warning: package 'xgboost' was built under R version 4.3.3
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
##
## Attaching package: 'psych'
## The following object is masked from 'package:randomForest':
##
## outlier
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:xgboost':
##
## slice
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.3.3
library(tidyr)
#Load the dataset
train_data <- read_excel('StudentData.xlsx')
test_data <- read_excel('StudentEvaluation.xlsx')
# Display the first five rows of training data
head(train_data)
## # A tibble: 6 × 33
## Brand.Code Carb.Volume Fill.Ounces PC.Volume Carb.Pressure Carb.Temp PSC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 B 5.34 24.0 0.263 68.2 141. 0.104
## 2 A 5.43 24.0 0.239 68.4 140. 0.124
## 3 B 5.29 24.1 0.263 70.8 145. 0.09
## 4 A 5.44 24.0 0.293 63 133. NA
## 5 A 5.49 24.3 0.111 67.2 137. 0.026
## 6 A 5.38 23.9 0.269 66.6 138. 0.09
## # ℹ 26 more variables: PSC.Fill <dbl>, PSC.CO2 <dbl>, Mnf.Flow <dbl>,
## # Carb.Pressure1 <dbl>, Fill.Pressure <dbl>, Hyd.Pressure1 <dbl>,
## # Hyd.Pressure2 <dbl>, Hyd.Pressure3 <dbl>, Hyd.Pressure4 <dbl>,
## # Filler.Level <dbl>, Filler.Speed <dbl>, Temperature <dbl>,
## # Usage.cont <dbl>, Carb.Flow <dbl>, Density <dbl>, MFR <dbl>, Balling <dbl>,
## # Pressure.Vacuum <dbl>, PH <dbl>, Oxygen.Filler <dbl>, Bowl.Setpoint <dbl>,
## # Pressure.Setpoint <dbl>, Air.Pressurer <dbl>, Alch.Rel <dbl>, …
# Display the first five rows of test data
head(test_data)
## # A tibble: 6 × 33
## Brand.Code Carb.Volume Fill.Ounces PC.Volume Carb.Pressure Carb.Temp PSC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 D 5.48 24.0 0.27 65.4 135. 0.236
## 2 A 5.39 24.0 0.227 63.2 135 0.042
## 3 B 5.29 23.9 0.303 66.4 140. 0.068
## 4 B 5.27 23.9 0.186 64.8 139 0.004
## 5 B 5.41 24.2 0.16 69.4 142. 0.04
## 6 B 5.29 24.1 0.212 73.4 147. 0.078
## # ℹ 26 more variables: PSC.Fill <dbl>, PSC.CO2 <dbl>, Mnf.Flow <dbl>,
## # Carb.Pressure1 <dbl>, Fill.Pressure <dbl>, Hyd.Pressure1 <dbl>,
## # Hyd.Pressure2 <dbl>, Hyd.Pressure3 <dbl>, Hyd.Pressure4 <dbl>,
## # Filler.Level <dbl>, Filler.Speed <dbl>, Temperature <dbl>,
## # Usage.cont <dbl>, Carb.Flow <dbl>, Density <dbl>, MFR <dbl>, Balling <dbl>,
## # Pressure.Vacuum <dbl>, PH <lgl>, Oxygen.Filler <dbl>, Bowl.Setpoint <dbl>,
## # Pressure.Setpoint <dbl>, Air.Pressurer <dbl>, Alch.Rel <dbl>, …
# Display basic information about the dataset
str(train_data)
## tibble [2,571 × 33] (S3: tbl_df/tbl/data.frame)
## $ Brand.Code : chr [1:2571] "B" "A" "B" "A" ...
## $ Carb.Volume : num [1:2571] 5.34 5.43 5.29 5.44 5.49 ...
## $ Fill.Ounces : num [1:2571] 24 24 24.1 24 24.3 ...
## $ PC.Volume : num [1:2571] 0.263 0.239 0.263 0.293 0.111 ...
## $ Carb.Pressure : num [1:2571] 68.2 68.4 70.8 63 67.2 66.6 64.2 67.6 64.2 72 ...
## $ Carb.Temp : num [1:2571] 141 140 145 133 137 ...
## $ PSC : num [1:2571] 0.104 0.124 0.09 NA 0.026 0.09 0.128 0.154 0.132 0.014 ...
## $ PSC.Fill : num [1:2571] 0.26 0.22 0.34 0.42 0.16 ...
## $ PSC.CO2 : num [1:2571] 0.04 0.04 0.16 0.04 0.12 ...
## $ Mnf.Flow : num [1:2571] -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 ...
## $ Carb.Pressure1 : num [1:2571] 119 122 120 115 118 ...
## $ Fill.Pressure : num [1:2571] 46 46 46 46.4 45.8 45.6 51.8 46.8 46 45.2 ...
## $ Hyd.Pressure1 : num [1:2571] 0 0 0 0 0 0 0 0 0 0 ...
## $ Hyd.Pressure2 : num [1:2571] NA NA NA 0 0 0 0 0 0 0 ...
## $ Hyd.Pressure3 : num [1:2571] NA NA NA 0 0 0 0 0 0 0 ...
## $ Hyd.Pressure4 : num [1:2571] 118 106 82 92 92 116 124 132 90 108 ...
## $ Filler.Level : num [1:2571] 121 119 120 118 119 ...
## $ Filler.Speed : num [1:2571] 4002 3986 4020 4012 4010 ...
## $ Temperature : num [1:2571] 66 67.6 67 65.6 65.6 66.2 65.8 65.2 65.4 66.6 ...
## $ Usage.cont : num [1:2571] 16.2 19.9 17.8 17.4 17.7 ...
## $ Carb.Flow : num [1:2571] 2932 3144 2914 3062 3054 ...
## $ Density : num [1:2571] 0.88 0.92 1.58 1.54 1.54 1.52 0.84 0.84 0.9 0.9 ...
## $ MFR : num [1:2571] 725 727 735 731 723 ...
## $ Balling : num [1:2571] 1.4 1.5 3.14 3.04 3.04 ...
## $ Pressure.Vacuum : num [1:2571] -4 -4 -3.8 -4.4 -4.4 -4.4 -4.4 -4.4 -4.4 -4.4 ...
## $ PH : num [1:2571] 8.36 8.26 8.94 8.24 8.26 8.32 8.4 8.38 8.38 8.5 ...
## $ Oxygen.Filler : num [1:2571] 0.022 0.026 0.024 0.03 0.03 0.024 0.066 0.046 0.064 0.022 ...
## $ Bowl.Setpoint : num [1:2571] 120 120 120 120 120 120 120 120 120 120 ...
## $ Pressure.Setpoint: num [1:2571] 46.4 46.8 46.6 46 46 46 46 46 46 46 ...
## $ Air.Pressurer : num [1:2571] 143 143 142 146 146 ...
## $ Alch.Rel : num [1:2571] 6.58 6.56 7.66 7.14 7.14 7.16 6.54 6.52 6.52 6.54 ...
## $ Carb.Rel : num [1:2571] 5.32 5.3 5.84 5.42 5.44 5.44 5.38 5.34 5.34 5.34 ...
## $ Balling.Lvl : num [1:2571] 1.48 1.56 3.28 3.04 3.04 3.02 1.44 1.44 1.44 1.38 ...
# Descriptive analysis
describe(train_data)
## vars n mean sd median trimmed mad min
## Brand.Code* 1 2451 2.51 1.00 2.00 2.51 0.00 1.00
## Carb.Volume 2 2561 5.37 0.11 5.35 5.37 0.11 5.04
## Fill.Ounces 3 2533 23.97 0.09 23.97 23.98 0.08 23.63
## PC.Volume 4 2532 0.28 0.06 0.27 0.27 0.05 0.08
## Carb.Pressure 5 2544 68.19 3.54 68.20 68.12 3.56 57.00
## Carb.Temp 6 2545 141.09 4.04 140.80 140.99 3.85 128.60
## PSC 7 2538 0.08 0.05 0.08 0.08 0.05 0.00
## PSC.Fill 8 2548 0.20 0.12 0.18 0.18 0.12 0.00
## PSC.CO2 9 2532 0.06 0.04 0.04 0.05 0.03 0.00
## Mnf.Flow 10 2569 24.57 119.48 65.20 21.07 169.02 -100.20
## Carb.Pressure1 11 2539 122.59 4.74 123.20 122.54 4.45 105.60
## Fill.Pressure 12 2549 47.92 3.18 46.40 47.71 2.37 34.60
## Hyd.Pressure1 13 2560 12.44 12.43 11.40 10.84 16.90 -0.80
## Hyd.Pressure2 14 2556 20.96 16.39 28.60 21.05 13.34 0.00
## Hyd.Pressure3 15 2556 20.46 15.98 27.60 20.51 13.94 -1.20
## Hyd.Pressure4 16 2541 96.29 13.12 96.00 95.45 11.86 52.00
## Filler.Level 17 2551 109.25 15.70 118.40 111.04 9.19 55.80
## Filler.Speed 18 2514 3687.20 770.82 3982.00 3919.99 47.44 998.00
## Temperature 19 2557 65.97 1.38 65.60 65.80 0.89 63.60
## Usage.cont 20 2566 20.99 2.98 21.79 21.25 3.19 12.08
## Carb.Flow 21 2569 2468.35 1073.70 3028.00 2601.14 326.17 26.00
## Density 22 2570 1.17 0.38 0.98 1.15 0.15 0.24
## MFR 23 2359 704.05 73.90 724.00 718.16 15.42 31.40
## Balling 24 2570 2.20 0.93 1.65 2.13 0.37 -0.17
## Pressure.Vacuum 25 2571 -5.22 0.57 -5.40 -5.25 0.59 -6.60
## PH 26 2567 8.55 0.17 8.54 8.55 0.18 7.88
## Oxygen.Filler 27 2559 0.05 0.05 0.03 0.04 0.02 0.00
## Bowl.Setpoint 28 2569 109.33 15.30 120.00 111.35 0.00 70.00
## Pressure.Setpoint 29 2559 47.62 2.04 46.00 47.60 0.00 44.00
## Air.Pressurer 30 2571 142.83 1.21 142.60 142.58 0.59 140.80
## Alch.Rel 31 2562 6.90 0.51 6.56 6.84 0.06 5.28
## Carb.Rel 32 2561 5.44 0.13 5.40 5.43 0.12 4.96
## Balling.Lvl 33 2570 2.05 0.87 1.48 1.98 0.21 0.00
## max range skew kurtosis se
## Brand.Code* 4.00 3.00 0.38 -1.06 0.02
## Carb.Volume 5.70 0.66 0.39 -0.47 0.00
## Fill.Ounces 24.32 0.69 -0.02 0.86 0.00
## PC.Volume 0.48 0.40 0.34 0.67 0.00
## Carb.Pressure 79.40 22.40 0.18 -0.01 0.07
## Carb.Temp 154.00 25.40 0.25 0.24 0.08
## PSC 0.27 0.27 0.85 0.65 0.00
## PSC.Fill 0.62 0.62 0.93 0.77 0.00
## PSC.CO2 0.24 0.24 1.73 3.73 0.00
## Mnf.Flow 229.40 329.60 0.00 -1.87 2.36
## Carb.Pressure1 140.20 34.60 0.05 0.14 0.09
## Fill.Pressure 60.40 25.80 0.55 1.41 0.06
## Hyd.Pressure1 58.00 58.80 0.78 -0.14 0.25
## Hyd.Pressure2 59.40 59.40 -0.30 -1.56 0.32
## Hyd.Pressure3 50.00 51.20 -0.32 -1.57 0.32
## Hyd.Pressure4 142.00 90.00 0.55 0.63 0.26
## Filler.Level 161.20 105.40 -0.85 0.05 0.31
## Filler.Speed 4030.00 3032.00 -2.87 6.71 15.37
## Temperature 76.20 12.60 2.39 10.16 0.03
## Usage.cont 25.90 13.82 -0.54 -1.02 0.06
## Carb.Flow 5104.00 5078.00 -0.99 -0.58 21.18
## Density 1.92 1.68 0.53 -1.20 0.01
## MFR 868.60 837.20 -5.09 30.46 1.52
## Balling 4.01 4.18 0.59 -1.39 0.02
## Pressure.Vacuum -3.60 3.00 0.53 -0.03 0.01
## PH 9.36 1.48 -0.29 0.06 0.00
## Oxygen.Filler 0.40 0.40 2.66 11.09 0.00
## Bowl.Setpoint 140.00 70.00 -0.97 -0.06 0.30
## Pressure.Setpoint 52.00 8.00 0.20 -1.60 0.04
## Air.Pressurer 148.20 7.40 2.25 4.73 0.02
## Alch.Rel 8.62 3.34 0.88 -0.85 0.01
## Carb.Rel 6.06 1.10 0.50 -0.29 0.00
## Balling.Lvl 3.66 3.66 0.59 -1.49 0.02
# Check for missing values
colSums(is.na(train_data))
## Brand.Code Carb.Volume Fill.Ounces PC.Volume
## 120 10 38 39
## Carb.Pressure Carb.Temp PSC PSC.Fill
## 27 26 33 23
## PSC.CO2 Mnf.Flow Carb.Pressure1 Fill.Pressure
## 39 2 32 22
## Hyd.Pressure1 Hyd.Pressure2 Hyd.Pressure3 Hyd.Pressure4
## 11 15 15 30
## Filler.Level Filler.Speed Temperature Usage.cont
## 20 57 14 5
## Carb.Flow Density MFR Balling
## 2 1 212 1
## Pressure.Vacuum PH Oxygen.Filler Bowl.Setpoint
## 0 4 12 2
## Pressure.Setpoint Air.Pressurer Alch.Rel Carb.Rel
## 12 0 9 10
## Balling.Lvl
## 1
# Check for missing values in test data
colSums(is.na(test_data))
## Brand.Code Carb.Volume Fill.Ounces PC.Volume
## 8 1 6 4
## Carb.Pressure Carb.Temp PSC PSC.Fill
## 0 1 5 3
## PSC.CO2 Mnf.Flow Carb.Pressure1 Fill.Pressure
## 5 0 4 2
## Hyd.Pressure1 Hyd.Pressure2 Hyd.Pressure3 Hyd.Pressure4
## 0 1 1 4
## Filler.Level Filler.Speed Temperature Usage.cont
## 2 10 2 2
## Carb.Flow Density MFR Balling
## 0 1 31 1
## Pressure.Vacuum PH Oxygen.Filler Bowl.Setpoint
## 1 267 3 1
## Pressure.Setpoint Air.Pressurer Alch.Rel Carb.Rel
## 2 1 3 2
## Balling.Lvl
## 0
# Shape of data
cat('Training Shape:', dim(train_data), '\n')
## Training Shape: 2571 33
cat('Testing Shape:', dim(test_data), '\n')
## Testing Shape: 267 33
# Fill missing values with median
train_data <- train_data %>% mutate_all(~ifelse(is.na(.), median(., na.rm = TRUE), .))
test_data <- test_data %>% mutate_all(~ifelse(is.na(.), median(., na.rm = TRUE), .))
library(dplyr)
library(tidyr)
library(ggplot2)
train_data %>%
select_if(is.numeric) %>%
gather(key = "key", value = "value") %>%
ggplot(aes(value)) +
geom_histogram(bins = 15, fill = "skyblue", color = "black") +
facet_wrap(~key, scales = "free") +
labs(
x = "Value",
y = "Frequency",
title = "Histograms of Predictors"
) +
theme_minimal() +
theme(
strip.background = element_rect(fill = "lightblue"),
strip.text = element_text(size = 10, color = "darkblue"),
plot.title = element_text(hjust = 0.5)
)

# Select only numeric columns from train_data
numeric_data <- train_data[sapply(train_data, is.numeric)]
# Compute the correlation matrix for the numeric data
correlation_matrix <- cor(numeric_data, use = "complete.obs")
# Print the correlation matrix
print(correlation_matrix)
## Carb.Volume Fill.Ounces PC.Volume Carb.Pressure
## Carb.Volume 1.000000e+00 0.0212585043 -0.203017885 0.408811810
## Fill.Ounces 2.125850e-02 1.0000000000 -0.152103103 0.019440695
## PC.Volume -2.030179e-01 -0.1521031026 1.000000000 -0.105272048
## Carb.Pressure 4.088118e-01 0.0194406946 -0.105272048 1.000000000
## Carb.Temp -1.135742e-01 0.0060820029 0.001632062 0.784892224
## PSC -3.330324e-02 0.0048197955 0.195269015 -0.042179331
## PSC.Fill -1.311497e-02 0.0654515774 -0.043167796 -0.014965649
## PSC.CO2 -5.227773e-02 0.0176324870 -0.035754982 -0.005179737
## Mnf.Flow 8.818228e-02 -0.0069380349 -0.146197727 0.036711881
## Carb.Pressure1 6.506191e-02 0.0003739317 -0.218952619 0.011711845
## Fill.Pressure -7.158830e-02 0.0583990269 -0.067742511 -0.051827142
## Hyd.Pressure1 -3.367920e-02 -0.1254435401 0.269003299 -0.043379935
## Hyd.Pressure2 4.229248e-02 -0.1065025764 0.049310957 0.010959930
## Hyd.Pressure3 5.485009e-02 -0.0830204931 0.019038611 0.028023539
## Hyd.Pressure4 -3.971862e-01 0.0224789228 0.066180007 -0.231357262
## Filler.Level -3.465170e-02 -0.0195402489 0.207456711 -0.017347200
## Filler.Speed -5.187535e-06 0.0215099079 0.041006375 0.040897285
## Temperature -1.762623e-01 0.0030048371 0.075877049 -0.075230514
## Usage.cont 8.627883e-02 0.0984587275 -0.275367096 0.034490206
## Carb.Flow -9.568562e-02 -0.0662478612 0.220199392 -0.009205311
## Density 7.597001e-01 -0.0831553027 -0.140703760 0.417712318
## MFR 2.596895e-02 0.0152775401 -0.056863729 0.022942419
## Balling 7.808131e-01 -0.0670957879 -0.171793417 0.420032034
## Pressure.Vacuum -7.517856e-02 0.0358553350 -0.053689497 -0.022395753
## PH 6.326444e-02 -0.0944601384 0.046138770 0.059408689
## Oxygen.Filler -9.369217e-02 -0.0504971496 0.157412764 -0.048141822
## Bowl.Setpoint -6.143418e-03 -0.0170378548 0.222874311 -0.008463085
## Pressure.Setpoint -1.453952e-01 0.0424012384 0.001706602 -0.089975821
## Air.Pressurer -1.888702e-02 0.0686730099 -0.037307204 0.011318729
## Alch.Rel 7.767039e-01 -0.1178028521 -0.149882728 0.415779013
## Carb.Rel 7.877878e-01 -0.1203438602 -0.125787054 0.423861238
## Balling.Lvl 7.775300e-01 -0.0648004526 -0.178985293 0.419862138
## Carb.Temp PSC PSC.Fill PSC.CO2
## Carb.Volume -0.113574246 -0.033303237 -0.013114969 -0.052277728
## Fill.Ounces 0.006082003 0.004819796 0.065451577 0.017632487
## PC.Volume 0.001632062 0.195269015 -0.043167796 -0.035754982
## Carb.Pressure 0.784892224 -0.042179331 -0.014965649 -0.005179737
## Carb.Temp 1.000000000 -0.040047388 -0.007396084 0.029919967
## PSC -0.040047388 1.000000000 0.173713246 0.054966224
## PSC.Fill -0.007396084 0.173713246 1.000000000 0.192785462
## PSC.CO2 0.029919967 0.054966224 0.192785462 1.000000000
## Mnf.Flow -0.007096046 0.063565881 -0.031966166 0.038963247
## Carb.Pressure1 -0.014292173 -0.045212973 -0.032224974 0.013569781
## Fill.Pressure -0.012458179 0.026364186 -0.005232389 0.068088861
## Hyd.Pressure1 -0.030089401 0.008662144 -0.053122047 -0.020808360
## Hyd.Pressure2 -0.010109649 -0.009517101 -0.080164829 -0.007465919
## Hyd.Pressure3 -0.003090288 0.007357622 -0.069150888 0.018610596
## Hyd.Pressure4 -0.028362405 0.015485160 0.006811622 0.047918652
## Filler.Level -0.006292672 -0.009343092 0.053225072 -0.039998561
## Filler.Speed 0.037106486 0.035686402 -0.003388272 -0.021362599
## Temperature 0.033292343 -0.006479381 0.027083206 0.049009936
## Usage.cont -0.015683278 0.027699953 -0.023448029 0.022523411
## Carb.Flow 0.045087924 0.006439586 0.026151791 0.013040337
## Density 0.022041872 -0.065698299 -0.012918574 -0.048509384
## MFR 0.010827462 0.001657600 -0.012696695 -0.011978553
## Balling 0.009456554 -0.062168486 -0.009221786 -0.046855616
## Pressure.Vacuum 0.010437375 0.044917604 0.044840914 0.001105246
## PH 0.028645798 -0.089204555 -0.036359228 -0.075490305
## Oxygen.Filler 0.010104475 -0.036485304 -0.022535145 -0.029249458
## Bowl.Setpoint -0.010126866 -0.005530397 0.048616431 -0.036280336
## Pressure.Setpoint -0.022899480 0.035213899 -0.009713493 0.067938729
## Air.Pressurer 0.028303551 0.038250791 -0.012767962 0.009334976
## Alch.Rel 0.004009627 -0.057554901 -0.013298114 -0.061293422
## Carb.Rel 0.007826422 -0.069234155 -0.017395932 -0.061499927
## Balling.Lvl 0.010701517 -0.061921959 -0.004162511 -0.046977982
## Mnf.Flow Carb.Pressure1 Fill.Pressure Hyd.Pressure1
## Carb.Volume 0.0881822809 0.0650619093 -0.071588296 -0.033679196
## Fill.Ounces -0.0069380349 0.0003739317 0.058399027 -0.125443540
## PC.Volume -0.1461977271 -0.2189526185 -0.067742511 0.269003299
## Carb.Pressure 0.0367118813 0.0117118450 -0.051827142 -0.043379935
## Carb.Temp -0.0070960458 -0.0142921733 -0.012458179 -0.030089401
## PSC 0.0635658805 -0.0452129729 0.026364186 0.008662144
## PSC.Fill -0.0319661657 -0.0322249743 -0.005232389 -0.053122047
## PSC.CO2 0.0389632469 0.0135697815 0.068088861 -0.020808360
## Mnf.Flow 1.0000000000 0.3397780047 0.421612497 0.354647389
## Carb.Pressure1 0.3397780047 1.0000000000 0.084296700 -0.094922044
## Fill.Pressure 0.4216124969 0.0842966996 1.000000000 0.159953429
## Hyd.Pressure1 0.3546473886 -0.0949220437 0.159953429 1.000000000
## Hyd.Pressure2 0.6500363218 0.1133650745 0.332379939 0.721751331
## Hyd.Pressure3 0.7531156157 0.1827789521 0.427704701 0.632786285
## Hyd.Pressure4 -0.0116258950 0.1653392627 0.063820968 -0.079060137
## Filler.Level -0.5772614063 -0.3323973970 -0.388796283 -0.055328525
## Filler.Speed 0.1348086547 -0.2450561521 0.062959153 0.168389135
## Temperature -0.0862374625 0.0872338949 -0.083544484 -0.118319485
## Usage.cont 0.5195345181 0.2858221749 0.244249217 0.101829297
## Carb.Flow -0.2754149263 -0.4058065533 0.010166611 0.032205974
## Density 0.0347737610 0.0212553126 -0.127991195 0.019981175
## MFR 0.0007046815 0.0165560951 -0.186363505 -0.037919752
## Balling 0.1168155486 0.0496738930 -0.094322493 0.041619876
## Pressure.Vacuum -0.5275657528 -0.2424018649 -0.257391725 -0.289380451
## PH -0.4468491012 -0.0802363140 -0.211710485 -0.073710476
## Oxygen.Filler -0.5134063220 -0.1544553702 -0.208941385 -0.157199967
## Bowl.Setpoint -0.5794484121 -0.3845654603 -0.346311255 -0.024582375
## Pressure.Setpoint 0.4636035923 0.2021631554 0.663017294 0.183350843
## Air.Pressurer -0.0494007094 0.0647807324 0.025306043 -0.189376094
## Alch.Rel 0.0277588137 0.0239311722 -0.145511785 0.009272838
## Carb.Rel -0.0300347813 0.0331118890 -0.167391236 0.024600617
## Balling.Lvl 0.0367470534 0.0321404950 -0.130743297 -0.005043180
## Hyd.Pressure2 Hyd.Pressure3 Hyd.Pressure4 Filler.Level
## Carb.Volume 0.042292485 0.054850093 -0.397186151 -0.034651701
## Fill.Ounces -0.106502576 -0.083020493 0.022478923 -0.019540249
## PC.Volume 0.049310957 0.019038611 0.066180007 0.207456711
## Carb.Pressure 0.010959930 0.028023539 -0.231357262 -0.017347200
## Carb.Temp -0.010109649 -0.003090288 -0.028362405 -0.006292672
## PSC -0.009517101 0.007357622 0.015485160 -0.009343092
## PSC.Fill -0.080164829 -0.069150888 0.006811622 0.053225072
## PSC.CO2 -0.007465919 0.018610596 0.047918652 -0.039998561
## Mnf.Flow 0.650036322 0.753115616 -0.011625895 -0.577261406
## Carb.Pressure1 0.113365074 0.182778952 0.165339263 -0.332397397
## Fill.Pressure 0.332379939 0.427704701 0.063820968 -0.388796283
## Hyd.Pressure1 0.721751331 0.632786285 -0.079060137 -0.055328525
## Hyd.Pressure2 1.000000000 0.924972596 -0.147981692 -0.403262899
## Hyd.Pressure3 0.924972596 1.000000000 -0.132469425 -0.496305114
## Hyd.Pressure4 -0.147981692 -0.132469425 1.000000000 -0.028225283
## Filler.Level -0.403262899 -0.496305114 -0.028225283 1.000000000
## Filler.Speed 0.286461819 0.267409786 -0.413747808 -0.057003877
## Temperature -0.214849991 -0.197208402 0.304622583 0.072381524
## Usage.cont 0.345801225 0.376257167 0.005455157 -0.344398170
## Carb.Flow -0.036084644 -0.057023339 -0.257635685 -0.042991039
## Density 0.083343636 0.067942858 -0.466658299 -0.009377047
## MFR 0.044974369 0.009765715 -0.189244427 0.052682060
## Balling 0.122964927 0.139358149 -0.473387802 -0.005204258
## Pressure.Vacuum -0.561918494 -0.599776975 -0.029856857 0.327474979
## PH -0.200654459 -0.240165668 -0.139820117 0.321590911
## Oxygen.Filler -0.286392720 -0.336583668 0.030543597 0.225601956
## Bowl.Setpoint -0.376503487 -0.467832147 -0.082420592 0.934980656
## Pressure.Setpoint 0.307573236 0.413654063 0.207661599 -0.413709676
## Air.Pressurer -0.153688896 -0.067530636 0.045250411 -0.129050678
## Alch.Rel 0.040340220 0.048006408 -0.497145681 0.035364991
## Carb.Rel 0.021099626 0.004035867 -0.403977016 0.105003490
## Balling.Lvl 0.031388745 0.039780107 -0.426624243 0.042005182
## Filler.Speed Temperature Usage.cont Carb.Flow
## Carb.Volume -5.187535e-06 -0.176262272 0.086278827 -0.095685618
## Fill.Ounces 2.150991e-02 0.003004837 0.098458727 -0.066247861
## PC.Volume 4.100638e-02 0.075877049 -0.275367096 0.220199392
## Carb.Pressure 4.089729e-02 -0.075230514 0.034490206 -0.009205311
## Carb.Temp 3.710649e-02 0.033292343 -0.015683278 0.045087924
## PSC 3.568640e-02 -0.006479381 0.027699953 0.006439586
## PSC.Fill -3.388272e-03 0.027083206 -0.023448029 0.026151791
## PSC.CO2 -2.136260e-02 0.049009936 0.022523411 0.013040337
## Mnf.Flow 1.348087e-01 -0.086237463 0.519534518 -0.275414926
## Carb.Pressure1 -2.450562e-01 0.087233895 0.285822175 -0.405806553
## Fill.Pressure 6.295915e-02 -0.083544484 0.244249217 0.010166611
## Hyd.Pressure1 1.683891e-01 -0.118319485 0.101829297 0.032205974
## Hyd.Pressure2 2.864618e-01 -0.214849991 0.345801225 -0.036084644
## Hyd.Pressure3 2.674098e-01 -0.197208402 0.376257167 -0.057023339
## Hyd.Pressure4 -4.137478e-01 0.304622583 0.005455157 -0.257635685
## Filler.Level -5.700388e-02 0.072381524 -0.344398170 -0.042991039
## Filler.Speed 1.000000e+00 -0.331925688 0.072860552 0.371706402
## Temperature -3.319257e-01 1.000000000 -0.111202852 -0.126139136
## Usage.cont 7.286055e-02 -0.111202852 1.000000000 -0.327813493
## Carb.Flow 3.717064e-01 -0.126139136 -0.327813493 1.000000000
## Density 4.398180e-02 -0.183430617 0.019851201 0.036875954
## MFR 4.038970e-01 -0.082754103 0.021902165 -0.075202520
## Balling 5.159532e-02 -0.222935172 0.091263340 -0.059520322
## Pressure.Vacuum -1.276654e-03 0.041422648 -0.302056601 0.206874349
## PH -2.502336e-02 -0.157572482 -0.317971919 0.155921411
## Oxygen.Filler -1.186126e-01 0.130050953 -0.290048579 0.166532635
## Bowl.Setpoint -1.436573e-02 0.046336967 -0.367018708 0.005890908
## Pressure.Setpoint -2.923757e-02 0.033391252 0.239091965 -0.119713769
## Air.Pressurer -5.871091e-03 0.056158411 -0.094573568 0.085071689
## Alch.Rel -2.363671e-02 -0.174773338 -0.003334949 -0.021794425
## Carb.Rel -4.163474e-02 -0.095495104 -0.029986329 -0.056676141
## Balling.Lvl -1.364646e-02 -0.174817255 0.042979594 -0.060387420
## Density MFR Balling Pressure.Vacuum
## Carb.Volume 0.759700065 0.0259689485 0.780813102 -0.075178558
## Fill.Ounces -0.083155303 0.0152775401 -0.067095788 0.035855335
## PC.Volume -0.140703760 -0.0568637287 -0.171793417 -0.053689497
## Carb.Pressure 0.417712318 0.0229424195 0.420032034 -0.022395753
## Carb.Temp 0.022041872 0.0108274616 0.009456554 0.010437375
## PSC -0.065698299 0.0016576001 -0.062168486 0.044917604
## PSC.Fill -0.012918574 -0.0126966950 -0.009221786 0.044840914
## PSC.CO2 -0.048509384 -0.0119785529 -0.046855616 0.001105246
## Mnf.Flow 0.034773761 0.0007046815 0.116815549 -0.527565753
## Carb.Pressure1 0.021255313 0.0165560951 0.049673893 -0.242401865
## Fill.Pressure -0.127991195 -0.1863635048 -0.094322493 -0.257391725
## Hyd.Pressure1 0.019981175 -0.0379197516 0.041619876 -0.289380451
## Hyd.Pressure2 0.083343636 0.0449743689 0.122964927 -0.561918494
## Hyd.Pressure3 0.067942858 0.0097657148 0.139358149 -0.599776975
## Hyd.Pressure4 -0.466658299 -0.1892444270 -0.473387802 -0.029856857
## Filler.Level -0.009377047 0.0526820600 -0.005204258 0.327474979
## Filler.Speed 0.043981798 0.4038969514 0.051595323 -0.001276654
## Temperature -0.183430617 -0.0827541030 -0.222935172 0.041422648
## Usage.cont 0.019851201 0.0219021648 0.091263340 -0.302056601
## Carb.Flow 0.036875954 -0.0752025201 -0.059520322 0.206874349
## Density 1.000000000 0.0274524052 0.955159423 -0.088177447
## MFR 0.027452405 1.0000000000 0.042084034 0.058987594
## Balling 0.955159423 0.0420840335 1.000000000 -0.164776248
## Pressure.Vacuum -0.088177447 0.0589875941 -0.164776248 1.000000000
## PH 0.078350007 -0.0072300187 0.065202156 0.220503718
## Oxygen.Filler -0.047967697 -0.0196315379 -0.107357546 0.218429076
## Bowl.Setpoint 0.012268145 0.0354911162 0.018146800 0.340022205
## Pressure.Setpoint -0.232309434 -0.0635742534 -0.194874208 -0.286278602
## Air.Pressurer -0.085953514 0.0103030502 -0.104541030 0.168334723
## Alch.Rel 0.900729636 -0.0077329151 0.923301609 -0.050870166
## Carb.Rel 0.821367472 0.0098159150 0.820305019 -0.003905683
## Balling.Lvl 0.947310974 0.0151664916 0.977623234 -0.044418488
## PH Oxygen.Filler Bowl.Setpoint Pressure.Setpoint
## Carb.Volume 0.063264438 -0.09369217 -0.006143418 -0.145395154
## Fill.Ounces -0.094460138 -0.05049715 -0.017037855 0.042401238
## PC.Volume 0.046138770 0.15741276 0.222874311 0.001706602
## Carb.Pressure 0.059408689 -0.04814182 -0.008463085 -0.089975821
## Carb.Temp 0.028645798 0.01010447 -0.010126866 -0.022899480
## PSC -0.089204555 -0.03648530 -0.005530397 0.035213899
## PSC.Fill -0.036359228 -0.02253515 0.048616431 -0.009713493
## PSC.CO2 -0.075490305 -0.02924946 -0.036280336 0.067938729
## Mnf.Flow -0.446849101 -0.51340632 -0.579448412 0.463603592
## Carb.Pressure1 -0.080236314 -0.15445537 -0.384565460 0.202163155
## Fill.Pressure -0.211710485 -0.20894138 -0.346311255 0.663017294
## Hyd.Pressure1 -0.073710476 -0.15719997 -0.024582375 0.183350843
## Hyd.Pressure2 -0.200654459 -0.28639272 -0.376503487 0.307573236
## Hyd.Pressure3 -0.240165668 -0.33658367 -0.467832147 0.413654063
## Hyd.Pressure4 -0.139820117 0.03054360 -0.082420592 0.207661599
## Filler.Level 0.321590911 0.22560196 0.934980656 -0.413709676
## Filler.Speed -0.025023364 -0.11861261 -0.014365727 -0.029237574
## Temperature -0.157572482 0.13005095 0.046336967 0.033391252
## Usage.cont -0.317971919 -0.29004858 -0.367018708 0.239091965
## Carb.Flow 0.155921411 0.16653263 0.005890908 -0.119713769
## Density 0.078350007 -0.04796770 0.012268145 -0.232309434
## MFR -0.007230019 -0.01963154 0.035491116 -0.063574253
## Balling 0.065202156 -0.10735755 0.018146800 -0.194874208
## Pressure.Vacuum 0.220503718 0.21842908 0.340022205 -0.286278602
## PH 1.000000000 0.16031508 0.348602509 -0.305412094
## Oxygen.Filler 0.160315075 1.00000000 0.211227788 -0.214025858
## Bowl.Setpoint 0.348602509 0.21122779 1.000000000 -0.414899059
## Pressure.Setpoint -0.305412094 -0.21402586 -0.414899059 1.000000000
## Air.Pressurer -0.013657744 0.09372746 -0.135591107 0.080480506
## Alch.Rel 0.147170660 -0.04127405 0.048047535 -0.241679242
## Carb.Rel 0.162393549 -0.01193740 0.126835783 -0.236748177
## Balling.Lvl 0.099791970 -0.06023303 0.064468576 -0.231849552
## Air.Pressurer Alch.Rel Carb.Rel Balling.Lvl
## Carb.Volume -0.018887017 0.776703895 0.787787847 0.777529976
## Fill.Ounces 0.068673010 -0.117802852 -0.120343860 -0.064800453
## PC.Volume -0.037307204 -0.149882728 -0.125787054 -0.178985293
## Carb.Pressure 0.011318729 0.415779013 0.423861238 0.419862138
## Carb.Temp 0.028303551 0.004009627 0.007826422 0.010701517
## PSC 0.038250791 -0.057554901 -0.069234155 -0.061921959
## PSC.Fill -0.012767962 -0.013298114 -0.017395932 -0.004162511
## PSC.CO2 0.009334976 -0.061293422 -0.061499927 -0.046977982
## Mnf.Flow -0.049400709 0.027758814 -0.030034781 0.036747053
## Carb.Pressure1 0.064780732 0.023931172 0.033111889 0.032140495
## Fill.Pressure 0.025306043 -0.145511785 -0.167391236 -0.130743297
## Hyd.Pressure1 -0.189376094 0.009272838 0.024600617 -0.005043180
## Hyd.Pressure2 -0.153688896 0.040340220 0.021099626 0.031388745
## Hyd.Pressure3 -0.067530636 0.048006408 0.004035867 0.039780107
## Hyd.Pressure4 0.045250411 -0.497145681 -0.403977016 -0.426624243
## Filler.Level -0.129050678 0.035364991 0.105003490 0.042005182
## Filler.Speed -0.005871091 -0.023636707 -0.041634738 -0.013646459
## Temperature 0.056158411 -0.174773338 -0.095495104 -0.174817255
## Usage.cont -0.094573568 -0.003334949 -0.029986329 0.042979594
## Carb.Flow 0.085071689 -0.021794425 -0.056676141 -0.060387420
## Density -0.085953514 0.900729636 0.821367472 0.947310974
## MFR 0.010303050 -0.007732915 0.009815915 0.015166492
## Balling -0.104541030 0.923301609 0.820305019 0.977623234
## Pressure.Vacuum 0.168334723 -0.050870166 -0.003905683 -0.044418488
## PH -0.013657744 0.147170660 0.162393549 0.099791970
## Oxygen.Filler 0.093727461 -0.041274050 -0.011937401 -0.060233026
## Bowl.Setpoint -0.135591107 0.048047535 0.126835783 0.064468576
## Pressure.Setpoint 0.080480506 -0.241679242 -0.236748177 -0.231849552
## Air.Pressurer 1.000000000 -0.084140642 -0.101247073 -0.086810997
## Alch.Rel -0.084140642 1.000000000 0.842043763 0.921318584
## Carb.Rel -0.101247073 0.842043763 1.000000000 0.842353791
## Balling.Lvl -0.086810997 0.921318584 0.842353791 1.000000000
# Visualize the correlation matrix using a heatmap
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
corrplot(correlation_matrix, method = "color", tl.cex = 0.8, number.cex = 0.7)

# Select features and target
training_features <- train_data[, c('Carb.Volume', 'Fill.Ounces', 'Carb.Pressure', 'Temperature', 'Density', 'Balling', 'Oxygen.Filler', 'Pressure.Setpoint')]
training_target <- train_data$PH
testing_features <- test_data[, c('Carb.Volume', 'Fill.Ounces', 'Carb.Pressure', 'Temperature', 'Density', 'Balling', 'Oxygen.Filler', 'Pressure.Setpoint')]
# Standardize the features
preProc <- preProcess(training_features, method = c("center", "scale"))
features_scaled_train <- predict(preProc, training_features)
features_scaled_test <- predict(preProc, testing_features)
# Split the data into training and testing sets
set.seed(42)
trainIndex <- createDataPartition(training_target, p = 0.8, list = FALSE)
X_train <- features_scaled_train[trainIndex, ]
X_test <- features_scaled_train[-trainIndex, ]
y_train <- training_target[trainIndex]
y_test <- training_target[-trainIndex]
#normalize colums
X_train_df <- as.data.frame(X_train)
y_train_df <- as.data.frame(y_train)
# Load necessary libraries
library(caret)
library(randomForest)
library(gbm)
library(xgboost)
# Set the seed for reproducibility right before calling the GBM model
set.seed(42)
# Correcting variable names
models <- list(
'Linear Regression' = train(X_train_df, y_train, method = 'lm'),
'Random Forest' = {
set.seed(42)
randomForest(X_train_df, y_train, ntree = 100)
},
'Gradient Boosting' = gbm(y_train ~ ., data = data.frame(X_train_df, y_train), n.trees = 100, interaction.depth = 3, cv.folds = 5, shrinkage = 0.1, n.minobsinnode = 10),
'XGBoost' = {
set.seed(42)
xgboost(data = as.matrix(X_train_df), label = y_train, nrounds = 100, objective = "reg:squarederror")
}
)
## Distribution not specified, assuming gaussian ...
## [1] train-rmse:5.635856
## [2] train-rmse:3.947828
## [3] train-rmse:2.766779
## [4] train-rmse:1.940941
## [5] train-rmse:1.364058
## [6] train-rmse:0.961981
## [7] train-rmse:0.682834
## [8] train-rmse:0.489984
## [9] train-rmse:0.358142
## [10] train-rmse:0.268990
## [11] train-rmse:0.210401
## [12] train-rmse:0.173775
## [13] train-rmse:0.151856
## [14] train-rmse:0.137849
## [15] train-rmse:0.130452
## [16] train-rmse:0.125457
## [17] train-rmse:0.122719
## [18] train-rmse:0.119346
## [19] train-rmse:0.117290
## [20] train-rmse:0.114618
## [21] train-rmse:0.113214
## [22] train-rmse:0.112762
## [23] train-rmse:0.111771
## [24] train-rmse:0.109134
## [25] train-rmse:0.108406
## [26] train-rmse:0.107827
## [27] train-rmse:0.106447
## [28] train-rmse:0.104558
## [29] train-rmse:0.102788
## [30] train-rmse:0.101472
## [31] train-rmse:0.100104
## [32] train-rmse:0.098503
## [33] train-rmse:0.097843
## [34] train-rmse:0.095967
## [35] train-rmse:0.094817
## [36] train-rmse:0.093710
## [37] train-rmse:0.093375
## [38] train-rmse:0.091349
## [39] train-rmse:0.089666
## [40] train-rmse:0.088599
## [41] train-rmse:0.087761
## [42] train-rmse:0.087094
## [43] train-rmse:0.085264
## [44] train-rmse:0.083317
## [45] train-rmse:0.082874
## [46] train-rmse:0.082064
## [47] train-rmse:0.080838
## [48] train-rmse:0.080224
## [49] train-rmse:0.080074
## [50] train-rmse:0.078661
## [51] train-rmse:0.078477
## [52] train-rmse:0.076837
## [53] train-rmse:0.076408
## [54] train-rmse:0.075093
## [55] train-rmse:0.073905
## [56] train-rmse:0.073280
## [57] train-rmse:0.072634
## [58] train-rmse:0.072470
## [59] train-rmse:0.072014
## [60] train-rmse:0.071378
## [61] train-rmse:0.070814
## [62] train-rmse:0.070602
## [63] train-rmse:0.070507
## [64] train-rmse:0.070421
## [65] train-rmse:0.070243
## [66] train-rmse:0.070077
## [67] train-rmse:0.069182
## [68] train-rmse:0.068134
## [69] train-rmse:0.067133
## [70] train-rmse:0.065714
## [71] train-rmse:0.065634
## [72] train-rmse:0.064589
## [73] train-rmse:0.063728
## [74] train-rmse:0.062988
## [75] train-rmse:0.061585
## [76] train-rmse:0.061076
## [77] train-rmse:0.060624
## [78] train-rmse:0.059925
## [79] train-rmse:0.059798
## [80] train-rmse:0.058570
## [81] train-rmse:0.058366
## [82] train-rmse:0.057229
## [83] train-rmse:0.056250
## [84] train-rmse:0.055799
## [85] train-rmse:0.055449
## [86] train-rmse:0.055063
## [87] train-rmse:0.054126
## [88] train-rmse:0.053473
## [89] train-rmse:0.053267
## [90] train-rmse:0.053020
## [91] train-rmse:0.052728
## [92] train-rmse:0.052330
## [93] train-rmse:0.051606
## [94] train-rmse:0.051562
## [95] train-rmse:0.050919
## [96] train-rmse:0.050829
## [97] train-rmse:0.050262
## [98] train-rmse:0.049130
## [99] train-rmse:0.048848
## [100] train-rmse:0.048448
# Predicting
for (name in names(models)) {
model <- models[[name]]
# Convert data to matrix for xgboost predictions
if (name == 'XGBoost') {
predictions <- predict(model, newdata = as.matrix(X_train_df))
} else {
predictions <- predict(model, newdata = X_train_df)
}
mse <- sqrt(mean((y_test - predictions)^2))
cat(sprintf("%s - Mean Squared Error: %.4f\n", name, mse))
}
## Warning in y_test - predictions: longer object length is not a multiple of
## shorter object length
## Linear Regression - Mean Squared Error: 0.1899
## Warning in y_test - predictions: longer object length is not a multiple of
## shorter object length
## Random Forest - Mean Squared Error: 0.2168
## Using 94 trees...
## Warning in y_test - predictions: longer object length is not a multiple of
## shorter object length
## Gradient Boosting - Mean Squared Error: 0.1972
## Warning in y_test - predictions: longer object length is not a multiple of
## shorter object length
## XGBoost - Mean Squared Error: 0.2329
# Selecting the best model & applying on our student evaluation
best_model <- models[['Random Forest']]
# Make predictions on the test set
best_predictions <- predict(best_model, as.matrix(features_scaled_test))
# Save predictions to an Excel file
predicted_df <- data.frame(Predicted = best_predictions)
write.xlsx(predicted_df, 'pH_predictions.xlsx', rowNames = FALSE)