QC checks:

Varibale: Employee ID:

employ <- employ_test_raw

paste('is Employee ID unique?', length(unique(employ$employee_ID)) ==length(employ_test_raw$`Employee ID`))
## [1] "is Employee ID unique? TRUE"

Varibale: Employment Status: good to include as is

table(employ$employment_status)
## 
##       Active   Paid Leave      Retired   Terminated Unpaid Leave 
##        14069           17          552         2684          141

Varibale: Employment Class: remove Intern and Casual maybe Expatriate, Seasonal Worker

table(employ$employee_class)
## 
##         Contract Employee                  Employee                Expatriate 
##                       187                     17077                        13 
##                    Intern On Call / Casual Employee           Seasonal Worker 
##                        16                        17                       153

Varibale: Job Code: too many unique => remove

length(unique(employ$job_code))
## [1] 5292
hist(table(employ$job_code))

Varibale: Job Name: too many unique => remove

length(unique(employ$job_name))
## [1] 4239
hist(table(employ$job_name))

Varibale: Global Division: good: combine Digital & Data and Analytics

table(employ$global_division)
## 
##     Branch Sales Operation         Data and Analytics 
##                       1871                         12 
##                    Digital                    Finance 
##                         24                        461 
##                        GBS General and Administrative 
##                        653                        568 
##            Human Resources     Information Technology 
##                        390                        163 
##     Innovation and Science                      Legal 
##                       1071                        313 
##                  Marketing          Physical Presence 
##                       1175                       2367 
##                      Sales               Supply Chain 
##                       2123                       4902 
##                 Technology 
##                       1370

Varibale: Global Subdivision: too many unique => remove; maybe use to better balance large `Global Division (‘Supply Chain’)

length(unique(employ$g_subdivision))
## [1] 129
hist(table(employ$g_subdivision))

Varibale: Department Name: too many unique => remove

length(unique(employ$department_name))
## [1] 2451
hist(table(employ$department_name))

Varibale: Company: * many unique => ? consolidate? *

length(unique(employ$Company))
## [1] 79
hist(table(employ$Company))

Varibale: Job Family Name: good

length(unique(employ$job_family_name))
## [1] 27
table(employ$job_family_name)
## 
##                               Agribusiness 
##                                         69 
##                                   Aviation 
##                                         67 
##               Business Systems & Analytics 
##                                        303 
##                          Corporate Affairs 
##                                        259 
##                       Creative Specialties 
##                                        336 
##                     Engineering & Sciences 
##                                        939 
##                                 Facilities 
##                                         14 
##                                    Finance 
##                                       1087 
##                  General Prof & Leadership 
##                                        340 
##                            General Support 
##                                        924 
##                            Health & Safety 
##                                         39 
##                            Human Resources 
##                                        411 
##                     Information Technology 
##                                       1189 
##                                      Legal 
##                                        387 
##                                Maintenance 
##                                        393 
##                                  Marketing 
##                                        891 
##                                 Production 
##                                       1651 
##                                    Quality 
##                                        286 
##                                 Regulatory 
##                                        199 
##                    Sales & ABO/IBO Support 
##                                       3267 
##                                   Security 
##                                         48 
##                                Shop/Retail 
##                                       2434 
##                             Special Events 
##                                        174 
##                                   Strategy 
##                                         52 
##                               Supply Chain 
##                                        646 
##                                    Unknown 
##                                         81 
## Warehousing, Distribution & Transportation 
##                                        977

Varibale: Job Sub Family Name: * too many maybe? *

length(unique(employ$job_sub_family_name))
## [1] 176
hist(table(employ$job_sub_family_name))

Varibale: Age Group: good

table(employ$age_group)
## 
##  0-19 20-24 25-34 35-44 45-54   55+ 
##    33   675  5031  6515  3391  1818

Varibale: tenure_bin: good

table(employ$tenure_bin)
## 
##   0-1 years   1-3 years 10-15 years 15-20 years 20-25 years 25-30 years 
##        2081        2480        3524        2078         907         904 
##   3-5 years   30+ years  5-10 years 
##        1729         720        3040

Varibale: Gender: good

table(employ$gender)
## 
## Female   Male 
##   9211   8252

Varibale: Manager Statu: * ?? What is this*

table(employ$`Manager Status`)
## 
##     0     1 
## 14738  2725

Varibale: Direct Span of Control': this can be calculated from manager id field, maybe remove?

table(employ$span_of_control)
## 
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
## 14738   346   355   364   337   298   252   166   136   118    75    49    40 
##    13    14    15    16    17    18    19    20    21    22    23    24    25 
##    33    31    18    16    10    11    10     6     5     6     1     1     8 
##    26    27    28    29    30    31    33    35    36    37    38    39    41 
##     8     3     2     1     1     1     3     1     1     1     1     1     1 
##    42    43    45    48    51    56    59   103 
##     1     1     1     1     1     1     1     1

Varibale: Work Location:

length(table(employ$work_location))
## [1] 533
max(table(employ$work_location))
## [1] 3119
hist(table(employ$work_location))

Varibale: Pay Type: good , maybe remove the 3 NA’s

table(employ$pay_type)
## 
##         Hourly Not Applicable       Salaried 
##           2308              3          15152

Varibale: Global Grade Level: good

table(employ$gloabl_grade_level)
## 
##  0-13 14-15 16-17 18-19   20+ 
##  9632  4087  2614   850   280

Varibale: Compa Ratio Range: good ???? what is this ??

table(employ$`Compa Ratio Range`)
## 
##    0-30% 100-105% 105-110% 110-125% 125-150% 150-200% 200-300%   30-50% 
##      346     3485     1882     2132      251       25        5       31 
## 300-400%   50-75%   75-85%   85-90%   85-95%   90-95%  95-100% 
##        2      884     2035     1654        1     2008     2537

Varibale: Competency Category: good ???? what is this ??

table(employ$`Competency Category`)
## 
##              Executive     First-Level Leader Individual Contributor 
##                    280                   2614                   9632 
##       Mid-Level Leader           Professional 
##                    850                   4087

Varibale: Top Talent Status: good

table(employ$`Top Talent Status`)
## 
##      All Other Employees           High Performer           High Potential 
##                    16628                      303                      240 
## Local GCR High Performer Local GCR High Potential 
##                      169                      123

Varibale: Performance Rating: good

table(employ$`Performance Rating`)
## 
## Exceptional Performance       Needs Improvement               Not Rated 
##                    2738                     344                    3011 
##      Strong Performance 
##                   11370

Varibale: Exit Reason: * consolidate reasons *

table(employ$exit_reason)
## 
##      (Term) Contract Not Renewed     (Term) Dissatisfied with Job 
##                               62                                9 
## (Term) Failure to Return from Lv       (Term) Sever Involuntarily 
##                                5                               45 
##    (Term) Unsatisfactory Perform    (Term) Vol Retiree Separation 
##                               54                              443 
## (Term)Vol Non-Retiree Separation         (Termination) Attendance 
##                              204                               25 
##              (Termination) Death (Termination) Disability Retiree 
##                               14                                1 
##    (Termination) Insubordination    (Termination) Limited Retiree 
##                                4                                5 
##         (Termination) Misconduct  (Termination) Mutual Separation 
##                               40                              328 
##  (Termination) Other Involuntary    (Termination) Regular Retiree 
##                              118                               97 
##        (Termination) Resignation    (Termination) Severed Retiree 
##                             1779                                6 
##               System Termination 
##                              276

CULTURE

culture_Q_NAs <- as.data.frame(t(as.data.frame(map(culture_test_raw, ~sum(is.na(.)))))) %>% 
  rename(NAs_per_question= V1) %>%
  mutate(fraction_NAs= round(NAs_per_question/length(culture_test_raw$employee_id), 2)) %>%
  arrange(fraction_NAs)

culture_Q_NAs
##                                NAs_per_question fraction_NAs
## survey_date                                   0         0.00
## employee_id                                   0         0.00
## survey                                        0         0.00
## Q_believe_goals                             440         0.01
## Q_proud_to_work                             440         0.01
## Q_recomment                                 440         0.01
## Q_job_statisfied                            440         0.01
## Q_comm_two_way                              558         0.01
## Q_indiv_belong                              584         0.01
## Q_TW_support                                605         0.01
## Q_decision_after                            562         0.01
## Q_needs_ABO                                1263         0.03
## Q_needs_customers                          1284         0.03
## Q_indiv_serve_customers                    1266         0.03
## Q_entrepreneurship                         1255         0.03
## Q_indiv_challenge                         10484         0.23
## Q_emplowered_risk                         10474         0.23
## Q_learning_environ                        10504         0.23
## Q_experiment                              10456         0.23
## Q_indiv_understane_A70vision              22734         0.50
## Q_indiv_change_at_company                 22744         0.50
## Q_indiv_changeA70                         22771         0.50
## Q_SL_leads_change                         22722         0.50
## Q_understand_strategy                     23096         0.51
## Q_company_future                          23096         0.51
## Q_indiv_work_connect                      23140         0.51
## Q_company_strategy                        23173         0.51
## Q_SL_direction                            23107         0.51
## Q_company_makingchange                    22780         0.51
## Q_indiv_feeling                           28139         0.62
## free_text_length                          31765         0.70
## Q_manager_quarterly_conv                  33691         0.75
## Q_manager_growth                          33694         0.75
## Q_recognition_accomplish                  33693         0.75
## Q_diverse_perspectives                    34176         0.76
## Q_different_ideas                         34168         0.76
## Q_indiv_respect                           34171         0.76
## Q_SL_diverese                             34166         0.76
## Q_learning_goals                          34965         0.78
## Q_learning_development                    34967         0.78
## Q_company_equal                           34978         0.78
## Q_community                               34971         0.78
## Q_wellbeing_life                          34979         0.78
## Q_bene_meet_needs                         34974         0.78
## Q_indiv_stress                            41245         0.91
## Q_SL_trust                                41244         0.91
## Q_SL_commu_change                         43298         0.96
## Q_manager_support_duringchange            43296         0.96
## Q_bene_compet                             44553         0.99
## Q_bene_pay                                44553         0.99
## Q_respect                                 44553         0.99
## Q_mangr                                   44553         0.99
## Q_my_growth                               44553         0.99
## Q_decision_involved                       44553         0.99
## Q_wellbeing_schedule                      44553         0.99
## Q_my_contribution                         44553         0.99
## Q_recognition_goodjob                     44553         0.99
## Q_workload                                44554         0.99
## Q_safety                                  44554         0.99

On Boarding

on_board_Q_NAs <- as.data.frame(t(as.data.frame(map(on_board_test_raw, ~sum(is.na(.)))))) %>%
  rename(NAs_per_question= V1) %>%
  mutate(fraction_NAs= round(NAs_per_question/length(on_board_test_raw$employee_id), 2)) %>%
  arrange(fraction_NAs)

on_board_Q_NAs 
##                                NAs_per_question fraction_NAs
## survey_date                                   0         0.00
## employee_id                                   0         0.00
## Q_beleive_ingoals                             5         0.00
## Q_proud_company                               5         0.00
## Q_recomment_company                           5         0.00
## Q_satisfied_company                           5         0.00
## survey_days                                   0         0.00
## Q_belonging                                   7         0.01
## Q_received_training                         175         0.17
## Q_expectedofme                              176         0.17
## Q_expected_job                              178         0.17
## Q_manager_investment                        176         0.17
## Q_regular_feedback                          176         0.17
## Q_expected_culture                          174         0.17
## Q_department_environment                    175         0.17
## Q_cooperate                                 175         0.17
## Q_link_mywork_to_goals                      441         0.42
## feedback_length                             541         0.51
## Q_link_mywork_to_vision                     785         0.75
## Q_learning_development                      880         0.84
## Q_received_recognition                      880         0.84
## Q_company_values_contribution               882         0.84
## Q_company_meet_goals                        880         0.84
## Q_in_team_diverse_perspectives              880         0.84