A2

library(readr)

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Display the structure of the dataset
str(hr) # Lists each variable name and their data types

## spc_tbl_ [14,999 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ satisfaction_level   : num [1:14999] 0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
##  $ last_evaluation      : num [1:14999] 0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
##  $ number_project       : num [1:14999] 2 5 7 5 2 2 6 5 5 2 ...
##  $ average_montly_hours : num [1:14999] 157 262 272 223 159 153 247 259 224 142 ...
##  $ time_spend_company   : num [1:14999] 3 6 4 5 3 3 4 5 5 3 ...
##  $ Work_accident        : num [1:14999] 0 0 0 0 0 0 0 0 0 0 ...
##  $ left                 : num [1:14999] 1 1 1 1 1 1 1 1 1 1 ...
##  $ promotion_last_5years: num [1:14999] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Department           : chr [1:14999] "sales" "sales" "sales" "sales" ...
##  $ salary               : chr [1:14999] "low" "medium" "medium" "low" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   satisfaction_level = col_double(),
##   ..   last_evaluation = col_double(),
##   ..   number_project = col_double(),
##   ..   average_montly_hours = col_double(),
##   ..   time_spend_company = col_double(),
##   ..   Work_accident = col_double(),
##   ..   left = col_double(),
##   ..   promotion_last_5years = col_double(),
##   ..   Department = col_character(),
##   ..   salary = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

# Variable classification (as comments):
# satisfaction_level: numerical
# last_evaluation: numerical
# number_project: numerical
# average_montly_hours: numerical
# time_spend_company: numerical
# Work_accident: categorical (binary)
# left: categorical (binary)
# promotion_last_5years: categorical (binary)
# sales: categorical (department names)
# salary: categorical (low/medium/high)

# Output the first few rows to verify the data
head(hr)

## # A tibble: 6 × 10
##   satisfaction_level last_evaluation number_project average_montly_hours
##                <dbl>           <dbl>          <dbl>                <dbl>
## 1               0.38            0.53              2                  157
## 2               0.8             0.86              5                  262
## 3               0.11            0.88              7                  272
## 4               0.72            0.87              5                  223
## 5               0.37            0.52              2                  159
## 6               0.41            0.5               2                  153
## # ℹ 6 more variables: time_spend_company <dbl>, Work_accident <dbl>,
## #   left <dbl>, promotion_last_5years <dbl>, Department <chr>, salary <chr>

A2

Nick

2024-12-05