Section 2: Regression Tree
library(readxl)
EmployeePerformance <- read_excel("EmployeePerformance.xlsx")
library(rpart)
## Warning: package 'rpart' was built under R version 4.5.3
TreeModel <- rpart(PerformanceScore ~ YearsofExperience +
AverageMonthlyWorkHours +
TrainingHours +
CommunicationSkills,
data = EmployeePerformance,
method = "anova")
NewEmployee <- data.frame(YearsofExperience = 5,
AverageMonthlyWorkHours = 160,
TrainingHours = 20,
CommunicationSkills = 8)
PredictedPerformance <- predict(TreeModel, NewEmployee)
round(PredictedPerformance, 4)
## 1
## 81.2632
Section 3: Logistic Regression
library(readxl)
EmployeeAttrition <- read_excel("EmployeeAttrition.xlsx")
EmployeeAttrition$Attrition <- as.factor(EmployeeAttrition$Attrition)
LogisticModel <- glm(Attrition ~ Age + Gender + EducationField +
JobSatisfaction + YearsAtCompany,
data = EmployeeAttrition,
family = "binomial")
summary(LogisticModel)
##
## Call:
## glm(formula = Attrition ~ Age + Gender + EducationField + JobSatisfaction +
## YearsAtCompany, family = "binomial", data = EmployeeAttrition)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.04530 2.97676 -0.015 0.9879
## Age -0.07519 0.06990 -1.076 0.2821
## GenderMale 0.38150 1.33620 0.286 0.7753
## EducationFieldMarketing 3.83646 1.72786 2.220 0.0264 *
## EducationFieldMedical 1.63655 1.96075 0.835 0.4039
## JobSatisfaction -0.25292 0.48325 -0.523 0.6007
## YearsAtCompany 0.17981 0.25348 0.709 0.4781
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 26.920 on 19 degrees of freedom
## Residual deviance: 17.384 on 13 degrees of freedom
## AIC: 31.384
##
## Number of Fisher Scoring iterations: 5
NewEmployee <- data.frame(Age = 30,
Gender = "Female",
EducationField = "Marketing",
JobSatisfaction = 4,
YearsAtCompany = 1)
ProbabilityLeave <- predict(LogisticModel, NewEmployee, type = "response")
round(ProbabilityLeave, 4)
## 1
## 0.669