Machine Learning Model Training and Evaluation
data<-read.csv("/Users/sunny/Downloads/HRDATA.csv") # Reads the HRDATA.csv file into the 'data' dataframe.
summary(data) # Provides a summary of the 'data' dataframe, including descriptive statistics for each variable.
## satisfaction_level last_evaluation number_project average_montly_hours
## Min. :0.0900 Min. :0.3600 Min. :2.000 Min. : 96.0
## 1st Qu.:0.4400 1st Qu.:0.5600 1st Qu.:3.000 1st Qu.:156.0
## Median :0.6400 Median :0.7200 Median :4.000 Median :200.0
## Mean :0.6128 Mean :0.7161 Mean :3.803 Mean :201.1
## 3rd Qu.:0.8200 3rd Qu.:0.8700 3rd Qu.:5.000 3rd Qu.:245.0
## Max. :1.0000 Max. :1.0000 Max. :7.000 Max. :310.0
## time_spend_company Work_accident left promotion_last_5years
## Min. : 2.000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median : 3.000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean : 3.498 Mean :0.1446 Mean :0.2381 Mean :0.02127
## 3rd Qu.: 4.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :10.000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## Department salary
## Length:14999 Length:14999
## Class :character Class :character
## Mode :character Mode :character
##
##
##
Data Loading and Summary Statistics
data1<-read.csv("/Users/sunny/Downloads/HRDATA.csv") # Reads the HRDATA.csv file into the 'data1' dataframe.
summary(data1) # Provides a summary of the 'data1' dataframe, including descriptive statistics for each variable.
## satisfaction_level last_evaluation number_project average_montly_hours
## Min. :0.0900 Min. :0.3600 Min. :2.000 Min. : 96.0
## 1st Qu.:0.4400 1st Qu.:0.5600 1st Qu.:3.000 1st Qu.:156.0
## Median :0.6400 Median :0.7200 Median :4.000 Median :200.0
## Mean :0.6128 Mean :0.7161 Mean :3.803 Mean :201.1
## 3rd Qu.:0.8200 3rd Qu.:0.8700 3rd Qu.:5.000 3rd Qu.:245.0
## Max. :1.0000 Max. :1.0000 Max. :7.000 Max. :310.0
## time_spend_company Work_accident left promotion_last_5years
## Min. : 2.000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median : 3.000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean : 3.498 Mean :0.1446 Mean :0.2381 Mean :0.02127
## 3rd Qu.: 4.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :10.000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## Department salary
## Length:14999 Length:14999
## Class :character Class :character
## Mode :character Mode :character
##
##
##
head(data1) # Displays the first few rows of the 'data1' dataframe.
## satisfaction_level last_evaluation number_project average_montly_hours
## 1 0.38 0.53 2 157
## 2 0.80 0.86 5 262
## 3 0.11 0.88 7 272
## 4 0.72 0.87 5 223
## 5 0.37 0.52 2 159
## 6 0.41 0.50 2 153
## time_spend_company Work_accident left promotion_last_5years Department salary
## 1 3 0 1 0 sales low
## 2 6 0 1 0 sales medium
## 3 4 0 1 0 sales medium
## 4 5 0 1 0 sales low
## 5 3 0 1 0 sales low
## 6 3 0 1 0 sales low
pairs(data[c(1,2,3,4,5,6,7,8)]) # Generates a pairs plot for a subset of variables in the 'data' dataframe.

library(MASS) # Loads the MASS package, which contains functions for linear models and other statistical methods.
lm1<-lm((satisfaction_level^1.1)~last_evaluation+number_project+average_montly_hours+time_spend_company+left,data=data1) # Fits a linear regression model with a transformed response variable.
boxcox(lm1) # Performs a Box-Cox transformation on the response variable to find the optimal lambda value.
lambda<-boxcox(lm1) # Stores the lambda value obtained from the Box-Cox transformation.

Exploratory Data Analysis (EDA)
plot(lm1,1:2) # Generates diagnostic plots for the linear regression model 'lm1'.


Linear Regression Modeling
lm10<-lm(satisfaction_level~last_evaluation+number_project+average_montly_hours+time_spend_company+left+promotion_last_5years+salary,data=data1) # Fits a linear regression model with additional predictor variables.
summary(lm10) # Provides a summary of the 'lm10' linear regression model.
##
## Call:
## lm(formula = satisfaction_level ~ last_evaluation + number_project +
## average_montly_hours + time_spend_company + left + promotion_last_5years +
## salary, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64739 -0.13676 -0.01193 0.17003 0.52774
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.148e-01 1.156e-02 53.189 < 2e-16 ***
## last_evaluation 2.460e-01 1.167e-02 21.073 < 2e-16 ***
## number_project -4.090e-02 1.691e-03 -24.184 < 2e-16 ***
## average_montly_hours 1.913e-04 4.126e-05 4.636 3.58e-06 ***
## time_spend_company -5.525e-03 1.294e-03 -4.268 1.98e-05 ***
## left -2.241e-01 4.395e-03 -50.980 < 2e-16 ***
## promotion_last_5years 9.282e-03 1.272e-02 0.730 0.4654
## salarylow 1.200e-02 6.959e-03 1.724 0.0847 .
## salarymedium 1.306e-02 6.955e-03 1.878 0.0604 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2227 on 14990 degrees of freedom
## Multiple R-squared: 0.1982, Adjusted R-squared: 0.1977
## F-statistic: 463 on 8 and 14990 DF, p-value: < 2.2e-16
table(data1$left) # Displays the frequency table for the 'left' variable in the 'data1' dataframe.
##
## 0 1
## 11428 3571
library(car) # Loads the car package, which contains functions for linear regression diagnostics.
## Loading required package: carData
summary(powerTransform(lm1)) # Applies power transformation to predictor variables in the linear regression model 'lm1' to improve model fit.
## bcPower Transformation to Normality
## Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1 1.012 1 0.9833 1.0406
##
## Likelihood ratio test that transformation parameter is equal to 0
## (log transformation)
## LRT df pval
## LR test, lambda = (0) 6018.397 1 < 2.22e-16
##
## Likelihood ratio test that no transformation is needed
## LRT df pval
## LR test, lambda = (1) 0.6756013 1 0.41111
library(car) # Loads the car package.
vif(lm1) # Calculates the variance inflation factors (VIFs) for predictor variables in the linear regression model 'lm1' to assess multicollinearity.
## last_evaluation number_project average_montly_hours
## 1.206278 1.313408 1.284216
## time_spend_company left
## 1.067913 1.025957
library(pROC) # Loads the pROC package, which contains functions for receiver operating characteristic (ROC) curve analysis.
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
roc.curve <- roc(satisfaction_level ~ predict(lm1, type = "response"), data = data1) # Generates an ROC curve for the linear regression model 'lm1'.
## Warning in roc.default(response, predictors[, 1], ...): 'response' has more
## than two levels. Consider setting 'levels' explicitly or using 'multiclass.roc'
## instead
## Setting levels: control = 0.09, case = 0.1
## Setting direction: controls < cases
plot(roc.curve, main = "ROC curve for Satisfaction Levels", print.thres = "best") # Plots the ROC curve.

auc(roc.curve) # Calculates the area under the ROC curve (AUC) for the 'roc.curve'.
## Area under the curve: 0.4934
data1$Department<-as.numeric(as.factor(data1$Department)) # Converts the 'Department' variable to numeric after converting it to a factor.
data1$salary<-as.numeric(as.factor(data1$salary)) # Converts the 'salary' variable to numeric after converting it to a factor.
head(data1) # Displays the first few rows of the 'data1' dataframe after converting 'Department' and 'salary' variables to numeric.
## satisfaction_level last_evaluation number_project average_montly_hours
## 1 0.38 0.53 2 157
## 2 0.80 0.86 5 262
## 3 0.11 0.88 7 272
## 4 0.72 0.87 5 223
## 5 0.37 0.52 2 159
## 6 0.41 0.50 2 153
## time_spend_company Work_accident left promotion_last_5years Department salary
## 1 3 0 1 0 8 2
## 2 6 0 1 0 8 3
## 3 4 0 1 0 8 3
## 4 5 0 1 0 8 2
## 5 3 0 1 0 8 2
## 6 3 0 1 0 8 2
Multiple Linear Regression Modeling
mlr_model <- lm(satisfaction_level ~ ., data =data1) # Fits a multiple linear regression model using all predictor variables in the 'data1' dataframe.
summary(mlr_model) # Provides a summary of the multiple linear regression model 'mlr_model'.
##
## Call:
## lm(formula = satisfaction_level ~ ., data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65320 -0.13779 -0.01234 0.17071 0.52592
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.058e-01 1.275e-02 47.526 < 2e-16 ***
## last_evaluation 2.462e-01 1.167e-02 21.101 < 2e-16 ***
## number_project -4.097e-02 1.691e-03 -24.223 < 2e-16 ***
## average_montly_hours 1.908e-04 4.126e-05 4.623 3.80e-06 ***
## time_spend_company -5.519e-03 1.292e-03 -4.270 1.96e-05 ***
## Work_accident -9.185e-05 5.237e-03 -0.018 0.9860
## left -2.233e-01 4.388e-03 -50.886 < 2e-16 ***
## promotion_last_5years 8.945e-03 1.268e-02 0.705 0.4806
## Department 1.573e-03 6.631e-04 2.373 0.0177 *
## salary 4.055e-03 2.906e-03 1.395 0.1630
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2227 on 14989 degrees of freedom
## Multiple R-squared: 0.1984, Adjusted R-squared: 0.1979
## F-statistic: 412.1 on 9 and 14989 DF, p-value: < 2.2e-16
data$Department<-as.factor(data$Department) # Converts the 'Department' variable to a factor in the 'data' dataframe.
data$salary<-as.factor(data$salary) # Converts the 'salary' variable to a factor in the 'data' dataframe.
library(caret) # Loads the caret package, which contains functions for machine learning and predictive modeling.
## Loading required package: ggplot2
## Loading required package: lattice
# split data into training and test sets
train_index <- sample(nrow(data1), nrow(data1) * 0.75) # Generates random indices for selecting training data.
train <- data1[train_index, ] # Creates a training dataset using the selected indices.
test <- data1[-train_index, ] # Creates a test dataset using the remaining indices.
mlr_model <- lm(satisfaction_level ~ ., data = train) # Fits a multiple linear regression model using all predictor variables in the training data.
predictions <- predict(mlr_model, newdata = test) # Makes predictions on the test data using the trained model.
rmse <- RMSE(predictions, test$satisfaction_level) # Calculates the root mean squared error (RMSE) between actual and predicted values.
rmse # Displays the RMSE.
## [1] 0.2240144
summary(mlr_model) # Provides a summary of the multiple linear regression model.
##
## Call:
## lm(formula = satisfaction_level ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64708 -0.13795 -0.01193 0.16964 0.52490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.166e-01 1.472e-02 41.893 < 2e-16 ***
## last_evaluation 2.432e-01 1.346e-02 18.070 < 2e-16 ***
## number_project -3.981e-02 1.953e-03 -20.381 < 2e-16 ***
## average_montly_hours 1.388e-04 4.743e-05 2.927 0.00343 **
## time_spend_company -5.175e-03 1.489e-03 -3.476 0.00051 ***
## Work_accident -4.362e-03 5.981e-03 -0.729 0.46576
## left -2.276e-01 5.084e-03 -44.777 < 2e-16 ***
## promotion_last_5years 1.826e-02 1.465e-02 1.247 0.21249
## Department 1.739e-03 7.660e-04 2.271 0.02319 *
## salary 2.741e-03 3.351e-03 0.818 0.41343
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2223 on 11239 degrees of freedom
## Multiple R-squared: 0.2, Adjusted R-squared: 0.1993
## F-statistic: 312.2 on 9 and 11239 DF, p-value: < 2.2e-16
library(caret) # Loads the caret package.
# split data into training and test sets
train_index <- sample(nrow(data1), nrow(data1) * 0.75) # Generates random indices for selecting training data.
train <- data1[train_index, ] # Creates a training dataset using the selected indices.
test <- data1[-train_index, ] # Creates a test dataset using the remaining indices.
knn_model <- train(satisfaction_level ~ ., data = train,
method = "knn", trControl = trainControl(method = "cv",
number = 5), tuneLength = 10) # Trains a k-nearest neighbors (knn) model using cross-validation for hyperparameter tuning.
predictions <- predict(knn_model, newdata = test) # Makes predictions on the test data using the trained model.
rmse <- RMSE(predictions, test$satisfaction_level) # Calculates the root mean squared error (RMSE) between actual and predicted values.
rmse # Displays the RMSE.
## [1] 0.1964364
knn_model # Displays the trained knn model.
## k-Nearest Neighbors
##
## 11249 samples
## 9 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 8998, 9000, 9000, 8998, 9000
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 0.2060301 0.3381178 0.1524995
## 7 0.2034742 0.3435083 0.1519791
## 9 0.2031038 0.3409996 0.1529153
## 11 0.2024010 0.3421331 0.1533562
## 13 0.2022033 0.3416447 0.1536898
## 15 0.2023878 0.3391117 0.1544352
## 17 0.2025088 0.3372876 0.1549230
## 19 0.2029253 0.3340218 0.1556012
## 21 0.2032374 0.3315789 0.1560191
## 23 0.2038273 0.3275502 0.1567571
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 13.
library(xgboost)
# split data into training and test sets
train_index <- sample(nrow(data1), nrow(data1) * 0.75) # Randomly select 75% of the rows for training.
train <- data1[train_index, ] # Create training dataset using selected indices.
test <- data1[-train_index, ] # Create test dataset using the remaining indices.
dtrain <- xgb.DMatrix(data = as.matrix(train[, -1]), label = train$satisfaction_level) # Convert training data to a format suitable for xgboost.
dtest <- xgb.DMatrix(data = as.matrix(test[, -1]), label = test$satisfaction_level) # Convert test data to a format suitable for xgboost.
Random Forest Model Training and Testing
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
# split data into training and test sets
train_index <- sample(nrow(data1), nrow(data1) * 0.80) # Randomly select 80% of the rows for training.
train <- data1[train_index, ] # Create training dataset using selected indices.
test <- data1[-train_index, ] # Create test dataset using the remaining indices.
rf_model <- randomForest(satisfaction_level ~ .,
data = train, ntree = 20) # Train random forest model with 20 trees using the training data.
predicted_values <- predict(rf_model, newdata = test) # Make predictions on the test data using the trained random forest model.
rmse <- sqrt(mean((test$satisfaction_level - predicted_values)^2)) # Calculate root mean squared error (RMSE) between actual and predicted values.
rmse # Display the RMSE.
## [1] 0.1743369
rf_model # Display the trained random forest model.
##
## Call:
## randomForest(formula = satisfaction_level ~ ., data = train, ntree = 20)
## Type of random forest: regression
## Number of trees: 20
## No. of variables tried at each split: 3
##
## Mean of squared residuals: 0.03098704
## % Var explained: 49.86
data$Department <- as.numeric(as.factor(data$Department)) # Convert 'Department' variable to numeric after converting it to a factor.
data$salary <- as.numeric(as.factor(data$salary)) # Convert 'salary' variable to numeric after converting it to a factor.
XGBoost Model Training and Testing
library(xgboost)
# split data into training and test sets
train_index <- sample(nrow(data), nrow(data) * 0.75) # Randomly select 75% of the rows for training.
train <- data[train_index, ] # Create training dataset using selected indices.
test <- data[-train_index, ] # Create test dataset using the remaining indices.
xgb_model <- xgboost(data = as.matrix(train[, -which(names(train) == "satisfaction_level")]), label = train$satisfaction_level,
nrounds = 500) # Train xgboost model with 500 rounds using the training data.
## [1] train-rmse:0.231641
## [2] train-rmse:0.204860
## [3] train-rmse:0.189708
## [4] train-rmse:0.181412
## [5] train-rmse:0.177042
## [6] train-rmse:0.174257
## [7] train-rmse:0.172619
## [8] train-rmse:0.171128
## [9] train-rmse:0.169610
## [10] train-rmse:0.168743
## [11] train-rmse:0.167790
## [12] train-rmse:0.167326
## [13] train-rmse:0.166649
## [14] train-rmse:0.165453
## [15] train-rmse:0.164473
## [16] train-rmse:0.164070
## [17] train-rmse:0.163139
## [18] train-rmse:0.162357
## [19] train-rmse:0.161712
## [20] train-rmse:0.160616
## [21] train-rmse:0.159759
## [22] train-rmse:0.159237
## [23] train-rmse:0.158750
## [24] train-rmse:0.158170
## [25] train-rmse:0.157047
## [26] train-rmse:0.156589
## [27] train-rmse:0.156265
## [28] train-rmse:0.155343
## [29] train-rmse:0.155052
## [30] train-rmse:0.154081
## [31] train-rmse:0.153315
## [32] train-rmse:0.152651
## [33] train-rmse:0.151912
## [34] train-rmse:0.151346
## [35] train-rmse:0.150700
## [36] train-rmse:0.150268
## [37] train-rmse:0.149633
## [38] train-rmse:0.149181
## [39] train-rmse:0.148430
## [40] train-rmse:0.147548
## [41] train-rmse:0.146982
## [42] train-rmse:0.146251
## [43] train-rmse:0.145442
## [44] train-rmse:0.144816
## [45] train-rmse:0.144195
## [46] train-rmse:0.143412
## [47] train-rmse:0.143138
## [48] train-rmse:0.142835
## [49] train-rmse:0.142522
## [50] train-rmse:0.142431
## [51] train-rmse:0.142122
## [52] train-rmse:0.141806
## [53] train-rmse:0.141371
## [54] train-rmse:0.140837
## [55] train-rmse:0.140633
## [56] train-rmse:0.140162
## [57] train-rmse:0.139928
## [58] train-rmse:0.139520
## [59] train-rmse:0.139376
## [60] train-rmse:0.138874
## [61] train-rmse:0.138794
## [62] train-rmse:0.138428
## [63] train-rmse:0.137565
## [64] train-rmse:0.137092
## [65] train-rmse:0.136370
## [66] train-rmse:0.136034
## [67] train-rmse:0.135632
## [68] train-rmse:0.135265
## [69] train-rmse:0.134907
## [70] train-rmse:0.134500
## [71] train-rmse:0.134071
## [72] train-rmse:0.133435
## [73] train-rmse:0.132785
## [74] train-rmse:0.132607
## [75] train-rmse:0.132063
## [76] train-rmse:0.131854
## [77] train-rmse:0.131637
## [78] train-rmse:0.131281
## [79] train-rmse:0.131077
## [80] train-rmse:0.130602
## [81] train-rmse:0.130424
## [82] train-rmse:0.130090
## [83] train-rmse:0.129542
## [84] train-rmse:0.129033
## [85] train-rmse:0.128574
## [86] train-rmse:0.128234
## [87] train-rmse:0.128026
## [88] train-rmse:0.127811
## [89] train-rmse:0.127326
## [90] train-rmse:0.127007
## [91] train-rmse:0.126502
## [92] train-rmse:0.126146
## [93] train-rmse:0.125653
## [94] train-rmse:0.125188
## [95] train-rmse:0.124979
## [96] train-rmse:0.124906
## [97] train-rmse:0.124619
## [98] train-rmse:0.124369
## [99] train-rmse:0.124101
## [100] train-rmse:0.124000
## [101] train-rmse:0.123935
## [102] train-rmse:0.123758
## [103] train-rmse:0.123437
## [104] train-rmse:0.123305
## [105] train-rmse:0.122981
## [106] train-rmse:0.122537
## [107] train-rmse:0.122326
## [108] train-rmse:0.121806
## [109] train-rmse:0.121535
## [110] train-rmse:0.120983
## [111] train-rmse:0.120639
## [112] train-rmse:0.120258
## [113] train-rmse:0.119867
## [114] train-rmse:0.119552
## [115] train-rmse:0.119352
## [116] train-rmse:0.119278
## [117] train-rmse:0.119056
## [118] train-rmse:0.118537
## [119] train-rmse:0.118191
## [120] train-rmse:0.118105
## [121] train-rmse:0.118071
## [122] train-rmse:0.118039
## [123] train-rmse:0.117631
## [124] train-rmse:0.117254
## [125] train-rmse:0.116927
## [126] train-rmse:0.116497
## [127] train-rmse:0.116354
## [128] train-rmse:0.115963
## [129] train-rmse:0.115566
## [130] train-rmse:0.115092
## [131] train-rmse:0.114776
## [132] train-rmse:0.114530
## [133] train-rmse:0.114455
## [134] train-rmse:0.114024
## [135] train-rmse:0.113626
## [136] train-rmse:0.113566
## [137] train-rmse:0.113500
## [138] train-rmse:0.113083
## [139] train-rmse:0.112696
## [140] train-rmse:0.112347
## [141] train-rmse:0.112002
## [142] train-rmse:0.111871
## [143] train-rmse:0.111452
## [144] train-rmse:0.111084
## [145] train-rmse:0.110766
## [146] train-rmse:0.110543
## [147] train-rmse:0.110307
## [148] train-rmse:0.110020
## [149] train-rmse:0.109644
## [150] train-rmse:0.109234
## [151] train-rmse:0.108886
## [152] train-rmse:0.108642
## [153] train-rmse:0.108292
## [154] train-rmse:0.108149
## [155] train-rmse:0.107819
## [156] train-rmse:0.107578
## [157] train-rmse:0.107128
## [158] train-rmse:0.106951
## [159] train-rmse:0.106832
## [160] train-rmse:0.106478
## [161] train-rmse:0.106225
## [162] train-rmse:0.106005
## [163] train-rmse:0.105721
## [164] train-rmse:0.105461
## [165] train-rmse:0.105347
## [166] train-rmse:0.105122
## [167] train-rmse:0.105026
## [168] train-rmse:0.104627
## [169] train-rmse:0.104531
## [170] train-rmse:0.104358
## [171] train-rmse:0.104167
## [172] train-rmse:0.103865
## [173] train-rmse:0.103735
## [174] train-rmse:0.103596
## [175] train-rmse:0.103296
## [176] train-rmse:0.103063
## [177] train-rmse:0.102693
## [178] train-rmse:0.102351
## [179] train-rmse:0.102217
## [180] train-rmse:0.102199
## [181] train-rmse:0.101878
## [182] train-rmse:0.101808
## [183] train-rmse:0.101681
## [184] train-rmse:0.101549
## [185] train-rmse:0.101408
## [186] train-rmse:0.101203
## [187] train-rmse:0.101020
## [188] train-rmse:0.100651
## [189] train-rmse:0.100438
## [190] train-rmse:0.100330
## [191] train-rmse:0.100188
## [192] train-rmse:0.100026
## [193] train-rmse:0.099771
## [194] train-rmse:0.099675
## [195] train-rmse:0.099361
## [196] train-rmse:0.099063
## [197] train-rmse:0.099030
## [198] train-rmse:0.098982
## [199] train-rmse:0.098960
## [200] train-rmse:0.098765
## [201] train-rmse:0.098697
## [202] train-rmse:0.098383
## [203] train-rmse:0.098213
## [204] train-rmse:0.097906
## [205] train-rmse:0.097588
## [206] train-rmse:0.097307
## [207] train-rmse:0.097224
## [208] train-rmse:0.096908
## [209] train-rmse:0.096867
## [210] train-rmse:0.096692
## [211] train-rmse:0.096496
## [212] train-rmse:0.096390
## [213] train-rmse:0.096165
## [214] train-rmse:0.095971
## [215] train-rmse:0.095757
## [216] train-rmse:0.095712
## [217] train-rmse:0.095643
## [218] train-rmse:0.095443
## [219] train-rmse:0.095140
## [220] train-rmse:0.094992
## [221] train-rmse:0.094949
## [222] train-rmse:0.094587
## [223] train-rmse:0.094452
## [224] train-rmse:0.094333
## [225] train-rmse:0.094313
## [226] train-rmse:0.094122
## [227] train-rmse:0.093906
## [228] train-rmse:0.093833
## [229] train-rmse:0.093517
## [230] train-rmse:0.093238
## [231] train-rmse:0.092919
## [232] train-rmse:0.092694
## [233] train-rmse:0.092610
## [234] train-rmse:0.092592
## [235] train-rmse:0.092241
## [236] train-rmse:0.091961
## [237] train-rmse:0.091541
## [238] train-rmse:0.091493
## [239] train-rmse:0.091217
## [240] train-rmse:0.091182
## [241] train-rmse:0.090913
## [242] train-rmse:0.090784
## [243] train-rmse:0.090726
## [244] train-rmse:0.090382
## [245] train-rmse:0.090269
## [246] train-rmse:0.089989
## [247] train-rmse:0.089880
## [248] train-rmse:0.089698
## [249] train-rmse:0.089519
## [250] train-rmse:0.089420
## [251] train-rmse:0.089392
## [252] train-rmse:0.089148
## [253] train-rmse:0.088843
## [254] train-rmse:0.088721
## [255] train-rmse:0.088542
## [256] train-rmse:0.088309
## [257] train-rmse:0.088261
## [258] train-rmse:0.088242
## [259] train-rmse:0.088122
## [260] train-rmse:0.088018
## [261] train-rmse:0.087997
## [262] train-rmse:0.087936
## [263] train-rmse:0.087628
## [264] train-rmse:0.087585
## [265] train-rmse:0.087546
## [266] train-rmse:0.087484
## [267] train-rmse:0.087405
## [268] train-rmse:0.087284
## [269] train-rmse:0.086981
## [270] train-rmse:0.086860
## [271] train-rmse:0.086792
## [272] train-rmse:0.086575
## [273] train-rmse:0.086431
## [274] train-rmse:0.086420
## [275] train-rmse:0.086274
## [276] train-rmse:0.086171
## [277] train-rmse:0.086134
## [278] train-rmse:0.085765
## [279] train-rmse:0.085686
## [280] train-rmse:0.085622
## [281] train-rmse:0.085395
## [282] train-rmse:0.085258
## [283] train-rmse:0.085098
## [284] train-rmse:0.084904
## [285] train-rmse:0.084802
## [286] train-rmse:0.084600
## [287] train-rmse:0.084393
## [288] train-rmse:0.084093
## [289] train-rmse:0.083880
## [290] train-rmse:0.083719
## [291] train-rmse:0.083572
## [292] train-rmse:0.083394
## [293] train-rmse:0.083138
## [294] train-rmse:0.082966
## [295] train-rmse:0.082681
## [296] train-rmse:0.082585
## [297] train-rmse:0.082462
## [298] train-rmse:0.082233
## [299] train-rmse:0.082139
## [300] train-rmse:0.082104
## [301] train-rmse:0.081945
## [302] train-rmse:0.081875
## [303] train-rmse:0.081793
## [304] train-rmse:0.081682
## [305] train-rmse:0.081501
## [306] train-rmse:0.081230
## [307] train-rmse:0.081174
## [308] train-rmse:0.080986
## [309] train-rmse:0.080924
## [310] train-rmse:0.080722
## [311] train-rmse:0.080645
## [312] train-rmse:0.080450
## [313] train-rmse:0.080285
## [314] train-rmse:0.080253
## [315] train-rmse:0.080170
## [316] train-rmse:0.079924
## [317] train-rmse:0.079816
## [318] train-rmse:0.079584
## [319] train-rmse:0.079304
## [320] train-rmse:0.079117
## [321] train-rmse:0.078950
## [322] train-rmse:0.078781
## [323] train-rmse:0.078656
## [324] train-rmse:0.078651
## [325] train-rmse:0.078597
## [326] train-rmse:0.078425
## [327] train-rmse:0.078337
## [328] train-rmse:0.078163
## [329] train-rmse:0.077911
## [330] train-rmse:0.077766
## [331] train-rmse:0.077617
## [332] train-rmse:0.077509
## [333] train-rmse:0.077489
## [334] train-rmse:0.077309
## [335] train-rmse:0.077193
## [336] train-rmse:0.076913
## [337] train-rmse:0.076783
## [338] train-rmse:0.076580
## [339] train-rmse:0.076364
## [340] train-rmse:0.076329
## [341] train-rmse:0.076162
## [342] train-rmse:0.076149
## [343] train-rmse:0.076128
## [344] train-rmse:0.076054
## [345] train-rmse:0.075994
## [346] train-rmse:0.075966
## [347] train-rmse:0.075797
## [348] train-rmse:0.075591
## [349] train-rmse:0.075354
## [350] train-rmse:0.075149
## [351] train-rmse:0.075083
## [352] train-rmse:0.074989
## [353] train-rmse:0.074789
## [354] train-rmse:0.074688
## [355] train-rmse:0.074459
## [356] train-rmse:0.074346
## [357] train-rmse:0.074202
## [358] train-rmse:0.074069
## [359] train-rmse:0.073948
## [360] train-rmse:0.073759
## [361] train-rmse:0.073608
## [362] train-rmse:0.073478
## [363] train-rmse:0.073195
## [364] train-rmse:0.073018
## [365] train-rmse:0.072808
## [366] train-rmse:0.072567
## [367] train-rmse:0.072435
## [368] train-rmse:0.072321
## [369] train-rmse:0.072215
## [370] train-rmse:0.072068
## [371] train-rmse:0.072029
## [372] train-rmse:0.071981
## [373] train-rmse:0.071941
## [374] train-rmse:0.071843
## [375] train-rmse:0.071746
## [376] train-rmse:0.071590
## [377] train-rmse:0.071510
## [378] train-rmse:0.071357
## [379] train-rmse:0.071166
## [380] train-rmse:0.071039
## [381] train-rmse:0.070887
## [382] train-rmse:0.070791
## [383] train-rmse:0.070696
## [384] train-rmse:0.070547
## [385] train-rmse:0.070384
## [386] train-rmse:0.070339
## [387] train-rmse:0.070255
## [388] train-rmse:0.070232
## [389] train-rmse:0.070114
## [390] train-rmse:0.069932
## [391] train-rmse:0.069749
## [392] train-rmse:0.069632
## [393] train-rmse:0.069462
## [394] train-rmse:0.069377
## [395] train-rmse:0.069291
## [396] train-rmse:0.069191
## [397] train-rmse:0.069143
## [398] train-rmse:0.069076
## [399] train-rmse:0.068984
## [400] train-rmse:0.068891
## [401] train-rmse:0.068812
## [402] train-rmse:0.068799
## [403] train-rmse:0.068712
## [404] train-rmse:0.068530
## [405] train-rmse:0.068418
## [406] train-rmse:0.068253
## [407] train-rmse:0.068078
## [408] train-rmse:0.067915
## [409] train-rmse:0.067747
## [410] train-rmse:0.067697
## [411] train-rmse:0.067594
## [412] train-rmse:0.067452
## [413] train-rmse:0.067410
## [414] train-rmse:0.067320
## [415] train-rmse:0.067316
## [416] train-rmse:0.067266
## [417] train-rmse:0.067217
## [418] train-rmse:0.067075
## [419] train-rmse:0.066936
## [420] train-rmse:0.066761
## [421] train-rmse:0.066602
## [422] train-rmse:0.066413
## [423] train-rmse:0.066378
## [424] train-rmse:0.066273
## [425] train-rmse:0.066241
## [426] train-rmse:0.066181
## [427] train-rmse:0.066056
## [428] train-rmse:0.065920
## [429] train-rmse:0.065827
## [430] train-rmse:0.065621
## [431] train-rmse:0.065564
## [432] train-rmse:0.065547
## [433] train-rmse:0.065393
## [434] train-rmse:0.065329
## [435] train-rmse:0.065314
## [436] train-rmse:0.065150
## [437] train-rmse:0.065008
## [438] train-rmse:0.064854
## [439] train-rmse:0.064688
## [440] train-rmse:0.064561
## [441] train-rmse:0.064397
## [442] train-rmse:0.064338
## [443] train-rmse:0.064316
## [444] train-rmse:0.064102
## [445] train-rmse:0.063933
## [446] train-rmse:0.063796
## [447] train-rmse:0.063639
## [448] train-rmse:0.063521
## [449] train-rmse:0.063482
## [450] train-rmse:0.063428
## [451] train-rmse:0.063313
## [452] train-rmse:0.063293
## [453] train-rmse:0.063225
## [454] train-rmse:0.063144
## [455] train-rmse:0.063032
## [456] train-rmse:0.062950
## [457] train-rmse:0.062937
## [458] train-rmse:0.062912
## [459] train-rmse:0.062827
## [460] train-rmse:0.062789
## [461] train-rmse:0.062738
## [462] train-rmse:0.062647
## [463] train-rmse:0.062557
## [464] train-rmse:0.062456
## [465] train-rmse:0.062413
## [466] train-rmse:0.062358
## [467] train-rmse:0.062278
## [468] train-rmse:0.062116
## [469] train-rmse:0.062037
## [470] train-rmse:0.062021
## [471] train-rmse:0.061983
## [472] train-rmse:0.061808
## [473] train-rmse:0.061724
## [474] train-rmse:0.061543
## [475] train-rmse:0.061420
## [476] train-rmse:0.061406
## [477] train-rmse:0.061366
## [478] train-rmse:0.061267
## [479] train-rmse:0.061226
## [480] train-rmse:0.061055
## [481] train-rmse:0.061036
## [482] train-rmse:0.060881
## [483] train-rmse:0.060781
## [484] train-rmse:0.060596
## [485] train-rmse:0.060491
## [486] train-rmse:0.060406
## [487] train-rmse:0.060332
## [488] train-rmse:0.060270
## [489] train-rmse:0.060174
## [490] train-rmse:0.060075
## [491] train-rmse:0.059991
## [492] train-rmse:0.059809
## [493] train-rmse:0.059692
## [494] train-rmse:0.059516
## [495] train-rmse:0.059320
## [496] train-rmse:0.059189
## [497] train-rmse:0.059104
## [498] train-rmse:0.058993
## [499] train-rmse:0.058861
## [500] train-rmse:0.058710
predicted_values <- predict(xgb_model,newdata =
as.matrix(test[, -which(names(test) == "satisfaction_level")])) # Make predictions on the test data using the trained xgboost model.
rmse <- sqrt(mean((test$satisfaction_level - predicted_values)^2)) # Calculate root mean squared error (RMSE) between actual and predicted values.
rmse # Display the RMSE.
## [1] 0.1886214