QC checks:
Varibale: Employee ID
:
employ <- employ_test_raw
paste('is Employee ID unique?', length(unique(employ$employee_ID)) ==length(employ_test_raw$`Employee ID`))
## [1] "is Employee ID unique? TRUE"
Varibale: Employment Status
: good to include as is
table(employ$employment_status)
##
## Active Paid Leave Retired Terminated Unpaid Leave
## 14069 17 552 2684 141
Varibale: Employment Class
:
remove Intern
and Casual
maybe
Expatriate
, Seasonal Worker
table(employ$employee_class)
##
## Contract Employee Employee Expatriate
## 187 17077 13
## Intern On Call / Casual Employee Seasonal Worker
## 16 17 153
Varibale: Job Code
: too
many unique => remove
length(unique(employ$job_code))
## [1] 5292
hist(table(employ$job_code))

Varibale: Job Name
: too
many unique => remove
length(unique(employ$job_name))
## [1] 4239
hist(table(employ$job_name))

Varibale: Global Division
:
good: combine Digital
&
Data and Analytics
table(employ$global_division)
##
## Branch Sales Operation Data and Analytics
## 1871 12
## Digital Finance
## 24 461
## GBS General and Administrative
## 653 568
## Human Resources Information Technology
## 390 163
## Innovation and Science Legal
## 1071 313
## Marketing Physical Presence
## 1175 2367
## Sales Supply Chain
## 2123 4902
## Technology
## 1370
Varibale: Global Subdivision
:
too many unique => remove; maybe use to better balance large
`Global Division (‘Supply Chain’)
length(unique(employ$g_subdivision))
## [1] 129
hist(table(employ$g_subdivision))

Varibale: Department Name
:
too many unique => remove
length(unique(employ$department_name))
## [1] 2451
hist(table(employ$department_name))

Varibale: Company
: * many
unique => ? consolidate? *
length(unique(employ$Company))
## [1] 79
hist(table(employ$Company))

Varibale: Job Family Name
:
good
length(unique(employ$job_family_name))
## [1] 27
table(employ$job_family_name)
##
## Agribusiness
## 69
## Aviation
## 67
## Business Systems & Analytics
## 303
## Corporate Affairs
## 259
## Creative Specialties
## 336
## Engineering & Sciences
## 939
## Facilities
## 14
## Finance
## 1087
## General Prof & Leadership
## 340
## General Support
## 924
## Health & Safety
## 39
## Human Resources
## 411
## Information Technology
## 1189
## Legal
## 387
## Maintenance
## 393
## Marketing
## 891
## Production
## 1651
## Quality
## 286
## Regulatory
## 199
## Sales & ABO/IBO Support
## 3267
## Security
## 48
## Shop/Retail
## 2434
## Special Events
## 174
## Strategy
## 52
## Supply Chain
## 646
## Unknown
## 81
## Warehousing, Distribution & Transportation
## 977
Varibale: Job Sub Family Name
:
* too many maybe? *
length(unique(employ$job_sub_family_name))
## [1] 176
hist(table(employ$job_sub_family_name))

Varibale: Age Group
: good
table(employ$age_group)
##
## 0-19 20-24 25-34 35-44 45-54 55+
## 33 675 5031 6515 3391 1818
Varibale: tenure_bin
: good
table(employ$tenure_bin)
##
## 0-1 years 1-3 years 10-15 years 15-20 years 20-25 years 25-30 years
## 2081 2480 3524 2078 907 904
## 3-5 years 30+ years 5-10 years
## 1729 720 3040
Varibale: Gender
: good
table(employ$gender)
##
## Female Male
## 9211 8252
Varibale: Manager Statu
: * ??
What is this*
table(employ$`Manager Status`)
##
## 0 1
## 14738 2725
Varibale: Direct Span of Control'
: this can be calculated from manager id field,
maybe remove?
table(employ$span_of_control)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 14738 346 355 364 337 298 252 166 136 118 75 49 40
## 13 14 15 16 17 18 19 20 21 22 23 24 25
## 33 31 18 16 10 11 10 6 5 6 1 1 8
## 26 27 28 29 30 31 33 35 36 37 38 39 41
## 8 3 2 1 1 1 3 1 1 1 1 1 1
## 42 43 45 48 51 56 59 103
## 1 1 1 1 1 1 1 1
Varibale: Work Location
:
length(table(employ$work_location))
## [1] 533
max(table(employ$work_location))
## [1] 3119
hist(table(employ$work_location))

Varibale: Pay Type
: good ,
maybe remove the 3 NA’s
table(employ$pay_type)
##
## Hourly Not Applicable Salaried
## 2308 3 15152
Varibale: Global Grade Level
:
good
table(employ$gloabl_grade_level)
##
## 0-13 14-15 16-17 18-19 20+
## 9632 4087 2614 850 280
Varibale: Compa Ratio Range
:
good ???? what is this ??
table(employ$`Compa Ratio Range`)
##
## 0-30% 100-105% 105-110% 110-125% 125-150% 150-200% 200-300% 30-50%
## 346 3485 1882 2132 251 25 5 31
## 300-400% 50-75% 75-85% 85-90% 85-95% 90-95% 95-100%
## 2 884 2035 1654 1 2008 2537
Varibale: Competency Category
:
good ???? what is this ??
table(employ$`Competency Category`)
##
## Executive First-Level Leader Individual Contributor
## 280 2614 9632
## Mid-Level Leader Professional
## 850 4087
Varibale: Top Talent Status
:
good
table(employ$`Top Talent Status`)
##
## All Other Employees High Performer High Potential
## 16628 303 240
## Local GCR High Performer Local GCR High Potential
## 169 123
Varibale: Exit Reason
: *
consolidate reasons *
table(employ$exit_reason)
##
## (Term) Contract Not Renewed (Term) Dissatisfied with Job
## 62 9
## (Term) Failure to Return from Lv (Term) Sever Involuntarily
## 5 45
## (Term) Unsatisfactory Perform (Term) Vol Retiree Separation
## 54 443
## (Term)Vol Non-Retiree Separation (Termination) Attendance
## 204 25
## (Termination) Death (Termination) Disability Retiree
## 14 1
## (Termination) Insubordination (Termination) Limited Retiree
## 4 5
## (Termination) Misconduct (Termination) Mutual Separation
## 40 328
## (Termination) Other Involuntary (Termination) Regular Retiree
## 118 97
## (Termination) Resignation (Termination) Severed Retiree
## 1779 6
## System Termination
## 276
CULTURE
culture_Q_NAs <- as.data.frame(t(as.data.frame(map(culture_test_raw, ~sum(is.na(.)))))) %>%
rename(NAs_per_question= V1) %>%
mutate(fraction_NAs= round(NAs_per_question/length(culture_test_raw$employee_id), 2)) %>%
arrange(fraction_NAs)
culture_Q_NAs
## NAs_per_question fraction_NAs
## survey_date 0 0.00
## employee_id 0 0.00
## survey 0 0.00
## Q_believe_goals 440 0.01
## Q_proud_to_work 440 0.01
## Q_recomment 440 0.01
## Q_job_statisfied 440 0.01
## Q_comm_two_way 558 0.01
## Q_indiv_belong 584 0.01
## Q_TW_support 605 0.01
## Q_decision_after 562 0.01
## Q_needs_ABO 1263 0.03
## Q_needs_customers 1284 0.03
## Q_indiv_serve_customers 1266 0.03
## Q_entrepreneurship 1255 0.03
## Q_indiv_challenge 10484 0.23
## Q_emplowered_risk 10474 0.23
## Q_learning_environ 10504 0.23
## Q_experiment 10456 0.23
## Q_indiv_understane_A70vision 22734 0.50
## Q_indiv_change_at_company 22744 0.50
## Q_indiv_changeA70 22771 0.50
## Q_SL_leads_change 22722 0.50
## Q_understand_strategy 23096 0.51
## Q_company_future 23096 0.51
## Q_indiv_work_connect 23140 0.51
## Q_company_strategy 23173 0.51
## Q_SL_direction 23107 0.51
## Q_company_makingchange 22780 0.51
## Q_indiv_feeling 28139 0.62
## free_text_length 31765 0.70
## Q_manager_quarterly_conv 33691 0.75
## Q_manager_growth 33694 0.75
## Q_recognition_accomplish 33693 0.75
## Q_diverse_perspectives 34176 0.76
## Q_different_ideas 34168 0.76
## Q_indiv_respect 34171 0.76
## Q_SL_diverese 34166 0.76
## Q_learning_goals 34965 0.78
## Q_learning_development 34967 0.78
## Q_company_equal 34978 0.78
## Q_community 34971 0.78
## Q_wellbeing_life 34979 0.78
## Q_bene_meet_needs 34974 0.78
## Q_indiv_stress 41245 0.91
## Q_SL_trust 41244 0.91
## Q_SL_commu_change 43298 0.96
## Q_manager_support_duringchange 43296 0.96
## Q_bene_compet 44553 0.99
## Q_bene_pay 44553 0.99
## Q_respect 44553 0.99
## Q_mangr 44553 0.99
## Q_my_growth 44553 0.99
## Q_decision_involved 44553 0.99
## Q_wellbeing_schedule 44553 0.99
## Q_my_contribution 44553 0.99
## Q_recognition_goodjob 44553 0.99
## Q_workload 44554 0.99
## Q_safety 44554 0.99
On Boarding
on_board_Q_NAs <- as.data.frame(t(as.data.frame(map(on_board_test_raw, ~sum(is.na(.)))))) %>%
rename(NAs_per_question= V1) %>%
mutate(fraction_NAs= round(NAs_per_question/length(on_board_test_raw$employee_id), 2)) %>%
arrange(fraction_NAs)
on_board_Q_NAs
## NAs_per_question fraction_NAs
## survey_date 0 0.00
## employee_id 0 0.00
## Q_beleive_ingoals 5 0.00
## Q_proud_company 5 0.00
## Q_recomment_company 5 0.00
## Q_satisfied_company 5 0.00
## survey_days 0 0.00
## Q_belonging 7 0.01
## Q_received_training 175 0.17
## Q_expectedofme 176 0.17
## Q_expected_job 178 0.17
## Q_manager_investment 176 0.17
## Q_regular_feedback 176 0.17
## Q_expected_culture 174 0.17
## Q_department_environment 175 0.17
## Q_cooperate 175 0.17
## Q_link_mywork_to_goals 441 0.42
## feedback_length 541 0.51
## Q_link_mywork_to_vision 785 0.75
## Q_learning_development 880 0.84
## Q_received_recognition 880 0.84
## Q_company_values_contribution 882 0.84
## Q_company_meet_goals 880 0.84
## Q_in_team_diverse_perspectives 880 0.84