Section 2: Regression Tree

library(readxl)
EmployeePerformance <- read_excel("EmployeePerformance.xlsx")
library(rpart)
## Warning: package 'rpart' was built under R version 4.5.3
TreeModel <- rpart(PerformanceScore ~ YearsofExperience + 
                    AverageMonthlyWorkHours + 
                    TrainingHours + 
                    CommunicationSkills, 
                    data = EmployeePerformance, 
                    method = "anova")
NewEmployee <- data.frame(YearsofExperience = 5,
                           AverageMonthlyWorkHours = 160,
                           TrainingHours = 20,
                           CommunicationSkills = 8)
PredictedPerformance <- predict(TreeModel, NewEmployee)

round(PredictedPerformance, 4)
##       1 
## 81.2632

Section 3: Logistic Regression

library(readxl)
EmployeeAttrition <- read_excel("EmployeeAttrition.xlsx")
EmployeeAttrition$Attrition <- as.factor(EmployeeAttrition$Attrition)
LogisticModel <- glm(Attrition ~ Age + Gender + EducationField + 
                      JobSatisfaction + YearsAtCompany, 
                      data = EmployeeAttrition, 
                      family = "binomial")
summary(LogisticModel)
## 
## Call:
## glm(formula = Attrition ~ Age + Gender + EducationField + JobSatisfaction + 
##     YearsAtCompany, family = "binomial", data = EmployeeAttrition)
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)  
## (Intercept)             -0.04530    2.97676  -0.015   0.9879  
## Age                     -0.07519    0.06990  -1.076   0.2821  
## GenderMale               0.38150    1.33620   0.286   0.7753  
## EducationFieldMarketing  3.83646    1.72786   2.220   0.0264 *
## EducationFieldMedical    1.63655    1.96075   0.835   0.4039  
## JobSatisfaction         -0.25292    0.48325  -0.523   0.6007  
## YearsAtCompany           0.17981    0.25348   0.709   0.4781  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26.920  on 19  degrees of freedom
## Residual deviance: 17.384  on 13  degrees of freedom
## AIC: 31.384
## 
## Number of Fisher Scoring iterations: 5
NewEmployee <- data.frame(Age = 30,
                           Gender = "Female",
                           EducationField = "Marketing",
                           JobSatisfaction = 4,
                           YearsAtCompany = 1)
ProbabilityLeave <- predict(LogisticModel, NewEmployee, type = "response")
round(ProbabilityLeave, 4)
##     1 
## 0.669