library(readr)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Display the structure of the dataset
str(hr) # Lists each variable name and their data types
## spc_tbl_ [14,999 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ satisfaction_level : num [1:14999] 0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
## $ last_evaluation : num [1:14999] 0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
## $ number_project : num [1:14999] 2 5 7 5 2 2 6 5 5 2 ...
## $ average_montly_hours : num [1:14999] 157 262 272 223 159 153 247 259 224 142 ...
## $ time_spend_company : num [1:14999] 3 6 4 5 3 3 4 5 5 3 ...
## $ Work_accident : num [1:14999] 0 0 0 0 0 0 0 0 0 0 ...
## $ left : num [1:14999] 1 1 1 1 1 1 1 1 1 1 ...
## $ promotion_last_5years: num [1:14999] 0 0 0 0 0 0 0 0 0 0 ...
## $ Department : chr [1:14999] "sales" "sales" "sales" "sales" ...
## $ salary : chr [1:14999] "low" "medium" "medium" "low" ...
## - attr(*, "spec")=
## .. cols(
## .. satisfaction_level = col_double(),
## .. last_evaluation = col_double(),
## .. number_project = col_double(),
## .. average_montly_hours = col_double(),
## .. time_spend_company = col_double(),
## .. Work_accident = col_double(),
## .. left = col_double(),
## .. promotion_last_5years = col_double(),
## .. Department = col_character(),
## .. salary = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
# Variable classification (as comments):
# satisfaction_level: numerical
# last_evaluation: numerical
# number_project: numerical
# average_montly_hours: numerical
# time_spend_company: numerical
# Work_accident: categorical (binary)
# left: categorical (binary)
# promotion_last_5years: categorical (binary)
# sales: categorical (department names)
# salary: categorical (low/medium/high)
# Output the first few rows to verify the data
head(hr)
## # A tibble: 6 × 10
## satisfaction_level last_evaluation number_project average_montly_hours
## <dbl> <dbl> <dbl> <dbl>
## 1 0.38 0.53 2 157
## 2 0.8 0.86 5 262
## 3 0.11 0.88 7 272
## 4 0.72 0.87 5 223
## 5 0.37 0.52 2 159
## 6 0.41 0.5 2 153
## # ℹ 6 more variables: time_spend_company <dbl>, Work_accident <dbl>,
## # left <dbl>, promotion_last_5years <dbl>, Department <chr>, salary <chr>