Machine Learning Model Training and Evaluation

data<-read.csv("/Users/sunny/Downloads/HRDATA.csv")  # Reads the HRDATA.csv file into the 'data' dataframe.
summary(data)  # Provides a summary of the 'data' dataframe, including descriptive statistics for each variable.
##  satisfaction_level last_evaluation  number_project  average_montly_hours
##  Min.   :0.0900     Min.   :0.3600   Min.   :2.000   Min.   : 96.0       
##  1st Qu.:0.4400     1st Qu.:0.5600   1st Qu.:3.000   1st Qu.:156.0       
##  Median :0.6400     Median :0.7200   Median :4.000   Median :200.0       
##  Mean   :0.6128     Mean   :0.7161   Mean   :3.803   Mean   :201.1       
##  3rd Qu.:0.8200     3rd Qu.:0.8700   3rd Qu.:5.000   3rd Qu.:245.0       
##  Max.   :1.0000     Max.   :1.0000   Max.   :7.000   Max.   :310.0       
##  time_spend_company Work_accident         left        promotion_last_5years
##  Min.   : 2.000     Min.   :0.0000   Min.   :0.0000   Min.   :0.00000      
##  1st Qu.: 3.000     1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000      
##  Median : 3.000     Median :0.0000   Median :0.0000   Median :0.00000      
##  Mean   : 3.498     Mean   :0.1446   Mean   :0.2381   Mean   :0.02127      
##  3rd Qu.: 4.000     3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000      
##  Max.   :10.000     Max.   :1.0000   Max.   :1.0000   Max.   :1.00000      
##   Department           salary         
##  Length:14999       Length:14999      
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

Data Loading and Summary Statistics

data1<-read.csv("/Users/sunny/Downloads/HRDATA.csv")  # Reads the HRDATA.csv file into the 'data1' dataframe.
summary(data1)  # Provides a summary of the 'data1' dataframe, including descriptive statistics for each variable.
##  satisfaction_level last_evaluation  number_project  average_montly_hours
##  Min.   :0.0900     Min.   :0.3600   Min.   :2.000   Min.   : 96.0       
##  1st Qu.:0.4400     1st Qu.:0.5600   1st Qu.:3.000   1st Qu.:156.0       
##  Median :0.6400     Median :0.7200   Median :4.000   Median :200.0       
##  Mean   :0.6128     Mean   :0.7161   Mean   :3.803   Mean   :201.1       
##  3rd Qu.:0.8200     3rd Qu.:0.8700   3rd Qu.:5.000   3rd Qu.:245.0       
##  Max.   :1.0000     Max.   :1.0000   Max.   :7.000   Max.   :310.0       
##  time_spend_company Work_accident         left        promotion_last_5years
##  Min.   : 2.000     Min.   :0.0000   Min.   :0.0000   Min.   :0.00000      
##  1st Qu.: 3.000     1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000      
##  Median : 3.000     Median :0.0000   Median :0.0000   Median :0.00000      
##  Mean   : 3.498     Mean   :0.1446   Mean   :0.2381   Mean   :0.02127      
##  3rd Qu.: 4.000     3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000      
##  Max.   :10.000     Max.   :1.0000   Max.   :1.0000   Max.   :1.00000      
##   Department           salary         
##  Length:14999       Length:14999      
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
head(data1)  # Displays the first few rows of the 'data1' dataframe.
##   satisfaction_level last_evaluation number_project average_montly_hours
## 1               0.38            0.53              2                  157
## 2               0.80            0.86              5                  262
## 3               0.11            0.88              7                  272
## 4               0.72            0.87              5                  223
## 5               0.37            0.52              2                  159
## 6               0.41            0.50              2                  153
##   time_spend_company Work_accident left promotion_last_5years Department salary
## 1                  3             0    1                     0      sales    low
## 2                  6             0    1                     0      sales medium
## 3                  4             0    1                     0      sales medium
## 4                  5             0    1                     0      sales    low
## 5                  3             0    1                     0      sales    low
## 6                  3             0    1                     0      sales    low
pairs(data[c(1,2,3,4,5,6,7,8)])  # Generates a pairs plot for a subset of variables in the 'data' dataframe.

library(MASS)  # Loads the MASS package, which contains functions for linear models and other statistical methods.
lm1<-lm((satisfaction_level^1.1)~last_evaluation+number_project+average_montly_hours+time_spend_company+left,data=data1)  # Fits a linear regression model with a transformed response variable.
boxcox(lm1)  # Performs a Box-Cox transformation on the response variable to find the optimal lambda value.
lambda<-boxcox(lm1)  # Stores the lambda value obtained from the Box-Cox transformation.

Exploratory Data Analysis (EDA)

plot(lm1,1:2)  # Generates diagnostic plots for the linear regression model 'lm1'.

Linear Regression Modeling

lm10<-lm(satisfaction_level~last_evaluation+number_project+average_montly_hours+time_spend_company+left+promotion_last_5years+salary,data=data1)  # Fits a linear regression model with additional predictor variables.
summary(lm10)  # Provides a summary of the 'lm10' linear regression model.
## 
## Call:
## lm(formula = satisfaction_level ~ last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + left + promotion_last_5years + 
##     salary, data = data1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64739 -0.13676 -0.01193  0.17003  0.52774 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.148e-01  1.156e-02  53.189  < 2e-16 ***
## last_evaluation        2.460e-01  1.167e-02  21.073  < 2e-16 ***
## number_project        -4.090e-02  1.691e-03 -24.184  < 2e-16 ***
## average_montly_hours   1.913e-04  4.126e-05   4.636 3.58e-06 ***
## time_spend_company    -5.525e-03  1.294e-03  -4.268 1.98e-05 ***
## left                  -2.241e-01  4.395e-03 -50.980  < 2e-16 ***
## promotion_last_5years  9.282e-03  1.272e-02   0.730   0.4654    
## salarylow              1.200e-02  6.959e-03   1.724   0.0847 .  
## salarymedium           1.306e-02  6.955e-03   1.878   0.0604 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2227 on 14990 degrees of freedom
## Multiple R-squared:  0.1982, Adjusted R-squared:  0.1977 
## F-statistic:   463 on 8 and 14990 DF,  p-value: < 2.2e-16
table(data1$left)  # Displays the frequency table for the 'left' variable in the 'data1' dataframe.
## 
##     0     1 
## 11428  3571
library(car)  # Loads the car package, which contains functions for linear regression diagnostics.
## Loading required package: carData
summary(powerTransform(lm1))  # Applies power transformation to predictor variables in the linear regression model 'lm1' to improve model fit.
## bcPower Transformation to Normality 
##    Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1     1.012           1       0.9833       1.0406
## 
## Likelihood ratio test that transformation parameter is equal to 0
##  (log transformation)
##                            LRT df       pval
## LR test, lambda = (0) 6018.397  1 < 2.22e-16
## 
## Likelihood ratio test that no transformation is needed
##                             LRT df    pval
## LR test, lambda = (1) 0.6756013  1 0.41111
library(car)  # Loads the car package.
vif(lm1)  # Calculates the variance inflation factors (VIFs) for predictor variables in the linear regression model 'lm1' to assess multicollinearity.
##      last_evaluation       number_project average_montly_hours 
##             1.206278             1.313408             1.284216 
##   time_spend_company                 left 
##             1.067913             1.025957
library(pROC)  # Loads the pROC package, which contains functions for receiver operating characteristic (ROC) curve analysis.
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
roc.curve <- roc(satisfaction_level ~ predict(lm1, type = "response"), data = data1)  # Generates an ROC curve for the linear regression model 'lm1'.
## Warning in roc.default(response, predictors[, 1], ...): 'response' has more
## than two levels. Consider setting 'levels' explicitly or using 'multiclass.roc'
## instead
## Setting levels: control = 0.09, case = 0.1
## Setting direction: controls < cases
plot(roc.curve, main = "ROC curve for Satisfaction Levels", print.thres = "best")  # Plots the ROC curve.

auc(roc.curve)  # Calculates the area under the ROC curve (AUC) for the 'roc.curve'.
## Area under the curve: 0.4934
data1$Department<-as.numeric(as.factor(data1$Department))  # Converts the 'Department' variable to numeric after converting it to a factor.
data1$salary<-as.numeric(as.factor(data1$salary))  # Converts the 'salary' variable to numeric after converting it to a factor.
head(data1)  # Displays the first few rows of the 'data1' dataframe after converting 'Department' and 'salary' variables to numeric.
##   satisfaction_level last_evaluation number_project average_montly_hours
## 1               0.38            0.53              2                  157
## 2               0.80            0.86              5                  262
## 3               0.11            0.88              7                  272
## 4               0.72            0.87              5                  223
## 5               0.37            0.52              2                  159
## 6               0.41            0.50              2                  153
##   time_spend_company Work_accident left promotion_last_5years Department salary
## 1                  3             0    1                     0          8      2
## 2                  6             0    1                     0          8      3
## 3                  4             0    1                     0          8      3
## 4                  5             0    1                     0          8      2
## 5                  3             0    1                     0          8      2
## 6                  3             0    1                     0          8      2

Multiple Linear Regression Modeling

mlr_model <- lm(satisfaction_level ~ ., data =data1)  # Fits a multiple linear regression model using all predictor variables in the 'data1' dataframe.
summary(mlr_model)  # Provides a summary of the multiple linear regression model 'mlr_model'.
## 
## Call:
## lm(formula = satisfaction_level ~ ., data = data1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65320 -0.13779 -0.01234  0.17071  0.52592 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.058e-01  1.275e-02  47.526  < 2e-16 ***
## last_evaluation        2.462e-01  1.167e-02  21.101  < 2e-16 ***
## number_project        -4.097e-02  1.691e-03 -24.223  < 2e-16 ***
## average_montly_hours   1.908e-04  4.126e-05   4.623 3.80e-06 ***
## time_spend_company    -5.519e-03  1.292e-03  -4.270 1.96e-05 ***
## Work_accident         -9.185e-05  5.237e-03  -0.018   0.9860    
## left                  -2.233e-01  4.388e-03 -50.886  < 2e-16 ***
## promotion_last_5years  8.945e-03  1.268e-02   0.705   0.4806    
## Department             1.573e-03  6.631e-04   2.373   0.0177 *  
## salary                 4.055e-03  2.906e-03   1.395   0.1630    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2227 on 14989 degrees of freedom
## Multiple R-squared:  0.1984, Adjusted R-squared:  0.1979 
## F-statistic: 412.1 on 9 and 14989 DF,  p-value: < 2.2e-16
data$Department<-as.factor(data$Department)  # Converts the 'Department' variable to a factor in the 'data' dataframe.
data$salary<-as.factor(data$salary)  # Converts the 'salary' variable to a factor in the 'data' dataframe.
library(caret)  # Loads the caret package, which contains functions for machine learning and predictive modeling.
## Loading required package: ggplot2
## Loading required package: lattice
# split data into training and test sets
train_index <- sample(nrow(data1), nrow(data1) * 0.75)  # Generates random indices for selecting training data.
train <- data1[train_index, ]  # Creates a training dataset using the selected indices.
test <- data1[-train_index, ]  # Creates a test dataset using the remaining indices.
mlr_model <- lm(satisfaction_level ~ ., data = train)  # Fits a multiple linear regression model using all predictor variables in the training data.
predictions <- predict(mlr_model, newdata = test)  # Makes predictions on the test data using the trained model.
rmse <- RMSE(predictions, test$satisfaction_level)  # Calculates the root mean squared error (RMSE) between actual and predicted values.
rmse  # Displays the RMSE.
## [1] 0.2240144
summary(mlr_model)  # Provides a summary of the multiple linear regression model.
## 
## Call:
## lm(formula = satisfaction_level ~ ., data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64708 -0.13795 -0.01193  0.16964  0.52490 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.166e-01  1.472e-02  41.893  < 2e-16 ***
## last_evaluation        2.432e-01  1.346e-02  18.070  < 2e-16 ***
## number_project        -3.981e-02  1.953e-03 -20.381  < 2e-16 ***
## average_montly_hours   1.388e-04  4.743e-05   2.927  0.00343 ** 
## time_spend_company    -5.175e-03  1.489e-03  -3.476  0.00051 ***
## Work_accident         -4.362e-03  5.981e-03  -0.729  0.46576    
## left                  -2.276e-01  5.084e-03 -44.777  < 2e-16 ***
## promotion_last_5years  1.826e-02  1.465e-02   1.247  0.21249    
## Department             1.739e-03  7.660e-04   2.271  0.02319 *  
## salary                 2.741e-03  3.351e-03   0.818  0.41343    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2223 on 11239 degrees of freedom
## Multiple R-squared:    0.2,  Adjusted R-squared:  0.1993 
## F-statistic: 312.2 on 9 and 11239 DF,  p-value: < 2.2e-16
library(caret)  # Loads the caret package.
# split data into training and test sets
train_index <- sample(nrow(data1), nrow(data1) * 0.75)  # Generates random indices for selecting training data.
train <- data1[train_index, ]  # Creates a training dataset using the selected indices.
test <- data1[-train_index, ]  # Creates a test dataset using the remaining indices.
knn_model <- train(satisfaction_level ~ ., data = train, 
method = "knn", trControl = trainControl(method = "cv",
number = 5), tuneLength = 10)  # Trains a k-nearest neighbors (knn) model using cross-validation for hyperparameter tuning.
predictions <- predict(knn_model, newdata = test)  # Makes predictions on the test data using the trained model.
rmse <- RMSE(predictions, test$satisfaction_level)  # Calculates the root mean squared error (RMSE) between actual and predicted values.
rmse  # Displays the RMSE.
## [1] 0.1964364
knn_model  # Displays the trained knn model.
## k-Nearest Neighbors 
## 
## 11249 samples
##     9 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 8998, 9000, 9000, 8998, 9000 
## Resampling results across tuning parameters:
## 
##   k   RMSE       Rsquared   MAE      
##    5  0.2060301  0.3381178  0.1524995
##    7  0.2034742  0.3435083  0.1519791
##    9  0.2031038  0.3409996  0.1529153
##   11  0.2024010  0.3421331  0.1533562
##   13  0.2022033  0.3416447  0.1536898
##   15  0.2023878  0.3391117  0.1544352
##   17  0.2025088  0.3372876  0.1549230
##   19  0.2029253  0.3340218  0.1556012
##   21  0.2032374  0.3315789  0.1560191
##   23  0.2038273  0.3275502  0.1567571
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 13.
library(xgboost)
# split data into training and test sets
train_index <- sample(nrow(data1), nrow(data1) * 0.75)  # Randomly select 75% of the rows for training.
train <- data1[train_index, ]  # Create training dataset using selected indices.
test <- data1[-train_index, ]  # Create test dataset using the remaining indices.
dtrain <- xgb.DMatrix(data = as.matrix(train[, -1]), label = train$satisfaction_level)  # Convert training data to a format suitable for xgboost.
dtest <- xgb.DMatrix(data = as.matrix(test[, -1]), label = test$satisfaction_level)  # Convert test data to a format suitable for xgboost.

Random Forest Model Training and Testing

library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
# split data into training and test sets
train_index <- sample(nrow(data1), nrow(data1) * 0.80)  # Randomly select 80% of the rows for training.
train <- data1[train_index, ]  # Create training dataset using selected indices.
test <- data1[-train_index, ]  # Create test dataset using the remaining indices.
rf_model <- randomForest(satisfaction_level ~ ., 
                         data = train, ntree = 20)  # Train random forest model with 20 trees using the training data.
predicted_values <- predict(rf_model, newdata = test)  # Make predictions on the test data using the trained random forest model.
rmse <- sqrt(mean((test$satisfaction_level - predicted_values)^2))  # Calculate root mean squared error (RMSE) between actual and predicted values.
rmse  # Display the RMSE.
## [1] 0.1743369
rf_model  # Display the trained random forest model.
## 
## Call:
##  randomForest(formula = satisfaction_level ~ ., data = train,      ntree = 20) 
##                Type of random forest: regression
##                      Number of trees: 20
## No. of variables tried at each split: 3
## 
##           Mean of squared residuals: 0.03098704
##                     % Var explained: 49.86
data$Department <- as.numeric(as.factor(data$Department))  # Convert 'Department' variable to numeric after converting it to a factor.
data$salary <- as.numeric(as.factor(data$salary))  # Convert 'salary' variable to numeric after converting it to a factor.

XGBoost Model Training and Testing

library(xgboost)
# split data into training and test sets
train_index <- sample(nrow(data), nrow(data) * 0.75)  # Randomly select 75% of the rows for training.
train <- data[train_index, ]  # Create training dataset using selected indices.
test <- data[-train_index, ]  # Create test dataset using the remaining indices.
xgb_model <- xgboost(data = as.matrix(train[, -which(names(train) == "satisfaction_level")]), label = train$satisfaction_level, 
nrounds = 500)  # Train xgboost model with 500 rounds using the training data.
## [1]  train-rmse:0.231641 
## [2]  train-rmse:0.204860 
## [3]  train-rmse:0.189708 
## [4]  train-rmse:0.181412 
## [5]  train-rmse:0.177042 
## [6]  train-rmse:0.174257 
## [7]  train-rmse:0.172619 
## [8]  train-rmse:0.171128 
## [9]  train-rmse:0.169610 
## [10] train-rmse:0.168743 
## [11] train-rmse:0.167790 
## [12] train-rmse:0.167326 
## [13] train-rmse:0.166649 
## [14] train-rmse:0.165453 
## [15] train-rmse:0.164473 
## [16] train-rmse:0.164070 
## [17] train-rmse:0.163139 
## [18] train-rmse:0.162357 
## [19] train-rmse:0.161712 
## [20] train-rmse:0.160616 
## [21] train-rmse:0.159759 
## [22] train-rmse:0.159237 
## [23] train-rmse:0.158750 
## [24] train-rmse:0.158170 
## [25] train-rmse:0.157047 
## [26] train-rmse:0.156589 
## [27] train-rmse:0.156265 
## [28] train-rmse:0.155343 
## [29] train-rmse:0.155052 
## [30] train-rmse:0.154081 
## [31] train-rmse:0.153315 
## [32] train-rmse:0.152651 
## [33] train-rmse:0.151912 
## [34] train-rmse:0.151346 
## [35] train-rmse:0.150700 
## [36] train-rmse:0.150268 
## [37] train-rmse:0.149633 
## [38] train-rmse:0.149181 
## [39] train-rmse:0.148430 
## [40] train-rmse:0.147548 
## [41] train-rmse:0.146982 
## [42] train-rmse:0.146251 
## [43] train-rmse:0.145442 
## [44] train-rmse:0.144816 
## [45] train-rmse:0.144195 
## [46] train-rmse:0.143412 
## [47] train-rmse:0.143138 
## [48] train-rmse:0.142835 
## [49] train-rmse:0.142522 
## [50] train-rmse:0.142431 
## [51] train-rmse:0.142122 
## [52] train-rmse:0.141806 
## [53] train-rmse:0.141371 
## [54] train-rmse:0.140837 
## [55] train-rmse:0.140633 
## [56] train-rmse:0.140162 
## [57] train-rmse:0.139928 
## [58] train-rmse:0.139520 
## [59] train-rmse:0.139376 
## [60] train-rmse:0.138874 
## [61] train-rmse:0.138794 
## [62] train-rmse:0.138428 
## [63] train-rmse:0.137565 
## [64] train-rmse:0.137092 
## [65] train-rmse:0.136370 
## [66] train-rmse:0.136034 
## [67] train-rmse:0.135632 
## [68] train-rmse:0.135265 
## [69] train-rmse:0.134907 
## [70] train-rmse:0.134500 
## [71] train-rmse:0.134071 
## [72] train-rmse:0.133435 
## [73] train-rmse:0.132785 
## [74] train-rmse:0.132607 
## [75] train-rmse:0.132063 
## [76] train-rmse:0.131854 
## [77] train-rmse:0.131637 
## [78] train-rmse:0.131281 
## [79] train-rmse:0.131077 
## [80] train-rmse:0.130602 
## [81] train-rmse:0.130424 
## [82] train-rmse:0.130090 
## [83] train-rmse:0.129542 
## [84] train-rmse:0.129033 
## [85] train-rmse:0.128574 
## [86] train-rmse:0.128234 
## [87] train-rmse:0.128026 
## [88] train-rmse:0.127811 
## [89] train-rmse:0.127326 
## [90] train-rmse:0.127007 
## [91] train-rmse:0.126502 
## [92] train-rmse:0.126146 
## [93] train-rmse:0.125653 
## [94] train-rmse:0.125188 
## [95] train-rmse:0.124979 
## [96] train-rmse:0.124906 
## [97] train-rmse:0.124619 
## [98] train-rmse:0.124369 
## [99] train-rmse:0.124101 
## [100]    train-rmse:0.124000 
## [101]    train-rmse:0.123935 
## [102]    train-rmse:0.123758 
## [103]    train-rmse:0.123437 
## [104]    train-rmse:0.123305 
## [105]    train-rmse:0.122981 
## [106]    train-rmse:0.122537 
## [107]    train-rmse:0.122326 
## [108]    train-rmse:0.121806 
## [109]    train-rmse:0.121535 
## [110]    train-rmse:0.120983 
## [111]    train-rmse:0.120639 
## [112]    train-rmse:0.120258 
## [113]    train-rmse:0.119867 
## [114]    train-rmse:0.119552 
## [115]    train-rmse:0.119352 
## [116]    train-rmse:0.119278 
## [117]    train-rmse:0.119056 
## [118]    train-rmse:0.118537 
## [119]    train-rmse:0.118191 
## [120]    train-rmse:0.118105 
## [121]    train-rmse:0.118071 
## [122]    train-rmse:0.118039 
## [123]    train-rmse:0.117631 
## [124]    train-rmse:0.117254 
## [125]    train-rmse:0.116927 
## [126]    train-rmse:0.116497 
## [127]    train-rmse:0.116354 
## [128]    train-rmse:0.115963 
## [129]    train-rmse:0.115566 
## [130]    train-rmse:0.115092 
## [131]    train-rmse:0.114776 
## [132]    train-rmse:0.114530 
## [133]    train-rmse:0.114455 
## [134]    train-rmse:0.114024 
## [135]    train-rmse:0.113626 
## [136]    train-rmse:0.113566 
## [137]    train-rmse:0.113500 
## [138]    train-rmse:0.113083 
## [139]    train-rmse:0.112696 
## [140]    train-rmse:0.112347 
## [141]    train-rmse:0.112002 
## [142]    train-rmse:0.111871 
## [143]    train-rmse:0.111452 
## [144]    train-rmse:0.111084 
## [145]    train-rmse:0.110766 
## [146]    train-rmse:0.110543 
## [147]    train-rmse:0.110307 
## [148]    train-rmse:0.110020 
## [149]    train-rmse:0.109644 
## [150]    train-rmse:0.109234 
## [151]    train-rmse:0.108886 
## [152]    train-rmse:0.108642 
## [153]    train-rmse:0.108292 
## [154]    train-rmse:0.108149 
## [155]    train-rmse:0.107819 
## [156]    train-rmse:0.107578 
## [157]    train-rmse:0.107128 
## [158]    train-rmse:0.106951 
## [159]    train-rmse:0.106832 
## [160]    train-rmse:0.106478 
## [161]    train-rmse:0.106225 
## [162]    train-rmse:0.106005 
## [163]    train-rmse:0.105721 
## [164]    train-rmse:0.105461 
## [165]    train-rmse:0.105347 
## [166]    train-rmse:0.105122 
## [167]    train-rmse:0.105026 
## [168]    train-rmse:0.104627 
## [169]    train-rmse:0.104531 
## [170]    train-rmse:0.104358 
## [171]    train-rmse:0.104167 
## [172]    train-rmse:0.103865 
## [173]    train-rmse:0.103735 
## [174]    train-rmse:0.103596 
## [175]    train-rmse:0.103296 
## [176]    train-rmse:0.103063 
## [177]    train-rmse:0.102693 
## [178]    train-rmse:0.102351 
## [179]    train-rmse:0.102217 
## [180]    train-rmse:0.102199 
## [181]    train-rmse:0.101878 
## [182]    train-rmse:0.101808 
## [183]    train-rmse:0.101681 
## [184]    train-rmse:0.101549 
## [185]    train-rmse:0.101408 
## [186]    train-rmse:0.101203 
## [187]    train-rmse:0.101020 
## [188]    train-rmse:0.100651 
## [189]    train-rmse:0.100438 
## [190]    train-rmse:0.100330 
## [191]    train-rmse:0.100188 
## [192]    train-rmse:0.100026 
## [193]    train-rmse:0.099771 
## [194]    train-rmse:0.099675 
## [195]    train-rmse:0.099361 
## [196]    train-rmse:0.099063 
## [197]    train-rmse:0.099030 
## [198]    train-rmse:0.098982 
## [199]    train-rmse:0.098960 
## [200]    train-rmse:0.098765 
## [201]    train-rmse:0.098697 
## [202]    train-rmse:0.098383 
## [203]    train-rmse:0.098213 
## [204]    train-rmse:0.097906 
## [205]    train-rmse:0.097588 
## [206]    train-rmse:0.097307 
## [207]    train-rmse:0.097224 
## [208]    train-rmse:0.096908 
## [209]    train-rmse:0.096867 
## [210]    train-rmse:0.096692 
## [211]    train-rmse:0.096496 
## [212]    train-rmse:0.096390 
## [213]    train-rmse:0.096165 
## [214]    train-rmse:0.095971 
## [215]    train-rmse:0.095757 
## [216]    train-rmse:0.095712 
## [217]    train-rmse:0.095643 
## [218]    train-rmse:0.095443 
## [219]    train-rmse:0.095140 
## [220]    train-rmse:0.094992 
## [221]    train-rmse:0.094949 
## [222]    train-rmse:0.094587 
## [223]    train-rmse:0.094452 
## [224]    train-rmse:0.094333 
## [225]    train-rmse:0.094313 
## [226]    train-rmse:0.094122 
## [227]    train-rmse:0.093906 
## [228]    train-rmse:0.093833 
## [229]    train-rmse:0.093517 
## [230]    train-rmse:0.093238 
## [231]    train-rmse:0.092919 
## [232]    train-rmse:0.092694 
## [233]    train-rmse:0.092610 
## [234]    train-rmse:0.092592 
## [235]    train-rmse:0.092241 
## [236]    train-rmse:0.091961 
## [237]    train-rmse:0.091541 
## [238]    train-rmse:0.091493 
## [239]    train-rmse:0.091217 
## [240]    train-rmse:0.091182 
## [241]    train-rmse:0.090913 
## [242]    train-rmse:0.090784 
## [243]    train-rmse:0.090726 
## [244]    train-rmse:0.090382 
## [245]    train-rmse:0.090269 
## [246]    train-rmse:0.089989 
## [247]    train-rmse:0.089880 
## [248]    train-rmse:0.089698 
## [249]    train-rmse:0.089519 
## [250]    train-rmse:0.089420 
## [251]    train-rmse:0.089392 
## [252]    train-rmse:0.089148 
## [253]    train-rmse:0.088843 
## [254]    train-rmse:0.088721 
## [255]    train-rmse:0.088542 
## [256]    train-rmse:0.088309 
## [257]    train-rmse:0.088261 
## [258]    train-rmse:0.088242 
## [259]    train-rmse:0.088122 
## [260]    train-rmse:0.088018 
## [261]    train-rmse:0.087997 
## [262]    train-rmse:0.087936 
## [263]    train-rmse:0.087628 
## [264]    train-rmse:0.087585 
## [265]    train-rmse:0.087546 
## [266]    train-rmse:0.087484 
## [267]    train-rmse:0.087405 
## [268]    train-rmse:0.087284 
## [269]    train-rmse:0.086981 
## [270]    train-rmse:0.086860 
## [271]    train-rmse:0.086792 
## [272]    train-rmse:0.086575 
## [273]    train-rmse:0.086431 
## [274]    train-rmse:0.086420 
## [275]    train-rmse:0.086274 
## [276]    train-rmse:0.086171 
## [277]    train-rmse:0.086134 
## [278]    train-rmse:0.085765 
## [279]    train-rmse:0.085686 
## [280]    train-rmse:0.085622 
## [281]    train-rmse:0.085395 
## [282]    train-rmse:0.085258 
## [283]    train-rmse:0.085098 
## [284]    train-rmse:0.084904 
## [285]    train-rmse:0.084802 
## [286]    train-rmse:0.084600 
## [287]    train-rmse:0.084393 
## [288]    train-rmse:0.084093 
## [289]    train-rmse:0.083880 
## [290]    train-rmse:0.083719 
## [291]    train-rmse:0.083572 
## [292]    train-rmse:0.083394 
## [293]    train-rmse:0.083138 
## [294]    train-rmse:0.082966 
## [295]    train-rmse:0.082681 
## [296]    train-rmse:0.082585 
## [297]    train-rmse:0.082462 
## [298]    train-rmse:0.082233 
## [299]    train-rmse:0.082139 
## [300]    train-rmse:0.082104 
## [301]    train-rmse:0.081945 
## [302]    train-rmse:0.081875 
## [303]    train-rmse:0.081793 
## [304]    train-rmse:0.081682 
## [305]    train-rmse:0.081501 
## [306]    train-rmse:0.081230 
## [307]    train-rmse:0.081174 
## [308]    train-rmse:0.080986 
## [309]    train-rmse:0.080924 
## [310]    train-rmse:0.080722 
## [311]    train-rmse:0.080645 
## [312]    train-rmse:0.080450 
## [313]    train-rmse:0.080285 
## [314]    train-rmse:0.080253 
## [315]    train-rmse:0.080170 
## [316]    train-rmse:0.079924 
## [317]    train-rmse:0.079816 
## [318]    train-rmse:0.079584 
## [319]    train-rmse:0.079304 
## [320]    train-rmse:0.079117 
## [321]    train-rmse:0.078950 
## [322]    train-rmse:0.078781 
## [323]    train-rmse:0.078656 
## [324]    train-rmse:0.078651 
## [325]    train-rmse:0.078597 
## [326]    train-rmse:0.078425 
## [327]    train-rmse:0.078337 
## [328]    train-rmse:0.078163 
## [329]    train-rmse:0.077911 
## [330]    train-rmse:0.077766 
## [331]    train-rmse:0.077617 
## [332]    train-rmse:0.077509 
## [333]    train-rmse:0.077489 
## [334]    train-rmse:0.077309 
## [335]    train-rmse:0.077193 
## [336]    train-rmse:0.076913 
## [337]    train-rmse:0.076783 
## [338]    train-rmse:0.076580 
## [339]    train-rmse:0.076364 
## [340]    train-rmse:0.076329 
## [341]    train-rmse:0.076162 
## [342]    train-rmse:0.076149 
## [343]    train-rmse:0.076128 
## [344]    train-rmse:0.076054 
## [345]    train-rmse:0.075994 
## [346]    train-rmse:0.075966 
## [347]    train-rmse:0.075797 
## [348]    train-rmse:0.075591 
## [349]    train-rmse:0.075354 
## [350]    train-rmse:0.075149 
## [351]    train-rmse:0.075083 
## [352]    train-rmse:0.074989 
## [353]    train-rmse:0.074789 
## [354]    train-rmse:0.074688 
## [355]    train-rmse:0.074459 
## [356]    train-rmse:0.074346 
## [357]    train-rmse:0.074202 
## [358]    train-rmse:0.074069 
## [359]    train-rmse:0.073948 
## [360]    train-rmse:0.073759 
## [361]    train-rmse:0.073608 
## [362]    train-rmse:0.073478 
## [363]    train-rmse:0.073195 
## [364]    train-rmse:0.073018 
## [365]    train-rmse:0.072808 
## [366]    train-rmse:0.072567 
## [367]    train-rmse:0.072435 
## [368]    train-rmse:0.072321 
## [369]    train-rmse:0.072215 
## [370]    train-rmse:0.072068 
## [371]    train-rmse:0.072029 
## [372]    train-rmse:0.071981 
## [373]    train-rmse:0.071941 
## [374]    train-rmse:0.071843 
## [375]    train-rmse:0.071746 
## [376]    train-rmse:0.071590 
## [377]    train-rmse:0.071510 
## [378]    train-rmse:0.071357 
## [379]    train-rmse:0.071166 
## [380]    train-rmse:0.071039 
## [381]    train-rmse:0.070887 
## [382]    train-rmse:0.070791 
## [383]    train-rmse:0.070696 
## [384]    train-rmse:0.070547 
## [385]    train-rmse:0.070384 
## [386]    train-rmse:0.070339 
## [387]    train-rmse:0.070255 
## [388]    train-rmse:0.070232 
## [389]    train-rmse:0.070114 
## [390]    train-rmse:0.069932 
## [391]    train-rmse:0.069749 
## [392]    train-rmse:0.069632 
## [393]    train-rmse:0.069462 
## [394]    train-rmse:0.069377 
## [395]    train-rmse:0.069291 
## [396]    train-rmse:0.069191 
## [397]    train-rmse:0.069143 
## [398]    train-rmse:0.069076 
## [399]    train-rmse:0.068984 
## [400]    train-rmse:0.068891 
## [401]    train-rmse:0.068812 
## [402]    train-rmse:0.068799 
## [403]    train-rmse:0.068712 
## [404]    train-rmse:0.068530 
## [405]    train-rmse:0.068418 
## [406]    train-rmse:0.068253 
## [407]    train-rmse:0.068078 
## [408]    train-rmse:0.067915 
## [409]    train-rmse:0.067747 
## [410]    train-rmse:0.067697 
## [411]    train-rmse:0.067594 
## [412]    train-rmse:0.067452 
## [413]    train-rmse:0.067410 
## [414]    train-rmse:0.067320 
## [415]    train-rmse:0.067316 
## [416]    train-rmse:0.067266 
## [417]    train-rmse:0.067217 
## [418]    train-rmse:0.067075 
## [419]    train-rmse:0.066936 
## [420]    train-rmse:0.066761 
## [421]    train-rmse:0.066602 
## [422]    train-rmse:0.066413 
## [423]    train-rmse:0.066378 
## [424]    train-rmse:0.066273 
## [425]    train-rmse:0.066241 
## [426]    train-rmse:0.066181 
## [427]    train-rmse:0.066056 
## [428]    train-rmse:0.065920 
## [429]    train-rmse:0.065827 
## [430]    train-rmse:0.065621 
## [431]    train-rmse:0.065564 
## [432]    train-rmse:0.065547 
## [433]    train-rmse:0.065393 
## [434]    train-rmse:0.065329 
## [435]    train-rmse:0.065314 
## [436]    train-rmse:0.065150 
## [437]    train-rmse:0.065008 
## [438]    train-rmse:0.064854 
## [439]    train-rmse:0.064688 
## [440]    train-rmse:0.064561 
## [441]    train-rmse:0.064397 
## [442]    train-rmse:0.064338 
## [443]    train-rmse:0.064316 
## [444]    train-rmse:0.064102 
## [445]    train-rmse:0.063933 
## [446]    train-rmse:0.063796 
## [447]    train-rmse:0.063639 
## [448]    train-rmse:0.063521 
## [449]    train-rmse:0.063482 
## [450]    train-rmse:0.063428 
## [451]    train-rmse:0.063313 
## [452]    train-rmse:0.063293 
## [453]    train-rmse:0.063225 
## [454]    train-rmse:0.063144 
## [455]    train-rmse:0.063032 
## [456]    train-rmse:0.062950 
## [457]    train-rmse:0.062937 
## [458]    train-rmse:0.062912 
## [459]    train-rmse:0.062827 
## [460]    train-rmse:0.062789 
## [461]    train-rmse:0.062738 
## [462]    train-rmse:0.062647 
## [463]    train-rmse:0.062557 
## [464]    train-rmse:0.062456 
## [465]    train-rmse:0.062413 
## [466]    train-rmse:0.062358 
## [467]    train-rmse:0.062278 
## [468]    train-rmse:0.062116 
## [469]    train-rmse:0.062037 
## [470]    train-rmse:0.062021 
## [471]    train-rmse:0.061983 
## [472]    train-rmse:0.061808 
## [473]    train-rmse:0.061724 
## [474]    train-rmse:0.061543 
## [475]    train-rmse:0.061420 
## [476]    train-rmse:0.061406 
## [477]    train-rmse:0.061366 
## [478]    train-rmse:0.061267 
## [479]    train-rmse:0.061226 
## [480]    train-rmse:0.061055 
## [481]    train-rmse:0.061036 
## [482]    train-rmse:0.060881 
## [483]    train-rmse:0.060781 
## [484]    train-rmse:0.060596 
## [485]    train-rmse:0.060491 
## [486]    train-rmse:0.060406 
## [487]    train-rmse:0.060332 
## [488]    train-rmse:0.060270 
## [489]    train-rmse:0.060174 
## [490]    train-rmse:0.060075 
## [491]    train-rmse:0.059991 
## [492]    train-rmse:0.059809 
## [493]    train-rmse:0.059692 
## [494]    train-rmse:0.059516 
## [495]    train-rmse:0.059320 
## [496]    train-rmse:0.059189 
## [497]    train-rmse:0.059104 
## [498]    train-rmse:0.058993 
## [499]    train-rmse:0.058861 
## [500]    train-rmse:0.058710
predicted_values <- predict(xgb_model,newdata = 
as.matrix(test[, -which(names(test) == "satisfaction_level")]))  # Make predictions on the test data using the trained xgboost model.
rmse <- sqrt(mean((test$satisfaction_level - predicted_values)^2))  # Calculate root mean squared error (RMSE) between actual and predicted values.
rmse  # Display the RMSE.
## [1] 0.1886214