# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.3.3
## Loading required package: ggplot2
library(ggplot2)
# Load the HR dataset
data <- read.csv("HR_comma_sep-1.csv")
#1.(a) Box Plot for Employee Satisfaction
# Box Plot for Employee Satisfaction
# Convert 'left' variable to factor for better labeling in plots
data$left <- factor(data$left, levels = c(0, 1), labels = c("Stayed", "Left"))
satisfaction_plot <- ggplot(data, aes(x = left, y = satisfaction_level)) +
geom_boxplot() +
labs(title = "Employee Satisfaction by Left Status", x = "Employee Status", y = "Satisfaction Level") +
theme_minimal()
satisfaction_plot
###1.(a) Box Plot for Last Evaluation
# Box Plot for Last Evaluation
evaluation_plot <- ggplot(data, aes(x = left, y = last_evaluation)) +
geom_boxplot() +
labs(title = "Last Evaluation by Left Status", x = "Employee Status", y = "Last Evaluation") +
theme_minimal()
evaluation_plot
###2. Using the HR_comma_sep dataset, create a correlogram, only for continuous variables
# Selecting the continuous variables
continuous_vars <- data %>% select(satisfaction_level, last_evaluation, number_project, average_montly_hours, Work_accident, promotion_last_5years)
# Calculate the correlation matrix
correlation_matrix <- cor(continuous_vars)
# Create the correlogram
ggcorrplot(correlation_matrix,
method = "circle",
type = "upper",
title = "Correlogram of Continuous Variables",
lab = TRUE,
colors = c("darkblue", "white", "orange"))
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00