Dealing with Missing Data in R

DataCamp: Statistics with R

Bonnie Cooper

library( dplyr )
library( ggplot2 )
library( gridExtra )
library( tidyverse )
library( naniar )
library( simputation )

Why care about missing data?

Introduction to missing data

“The best thing to do with missing data is to not have any.” -Gertrude M. Cox

  • Working with real-world data == working with missing data
  • Missing Data can have unexpected effects on your analysis
  • Bad imputation can lead to poor estimates and decisions

Checking for missing values with any_na()

x <- c( 1, NA, 3, NA, NA, 5 )
baset0 <- Sys.time()
any( is.na( x ) )
## [1] TRUE
timeint_base <- Sys.time() - baset0

naniart0 <- Sys.time()
any_na( x )
## [1] TRUE
timeint_naniar <- Sys.time() - naniart0

base2t0 <- Sys.time()
anyNA( x )
## [1] TRUE
timeint_base2 <- Sys.time() - base2t0

res <- paste( 'Comparing runtime for methods to detect NA:\nbase R runtime:', timeint_base,
              '\nanyNA() base r runtime', timeint_base2,
              '\nany_na() naniar runtime:', timeint_naniar)
cat( res, sep = '\n')
## Comparing runtime for methods to detect NA:
## base R runtime: 0.00362491607666016 
## anyNA() base r runtime 0.00226187705993652 
## any_na() naniar runtime: 0.00248432159423828
#return a boolean vector that tests for NAs
are_na( x )
## [1] FALSE  TRUE FALSE  TRUE  TRUE FALSE
#return the number of NAs
n_miss( x )
## [1] 3
#return the proportion of NAs
prop_miss( x )
## [1] 0.5

Generally, operations with NA values returns an NA value:

heights <- data.frame( 'Sophie' = 165, 'Dan' = 177, 'Fred' = NA )
sum( heights )
## [1] NA

Important Distinctions:

  • NaN == Not a Number. is evaluated the same as NA
  • NULL == empty. is Not the same as NA
  • Inf == Infinity. is Not the same as NA
r1 <- any_na( NaN )
r2 <- any_na( NULL )
r3 <- any_na( Inf )
r4 <- any_na( 0 )

res <- paste( 'When tested any_na()::\nNaN evaluates:', r1,
              '\nNULL evaluates:', r2,
              '\nInf evaluates:', r3,
              '\n0 evaluates:', r4 )
cat( res, sep='\n' )
## When tested any_na()::
## NaN evaluates: TRUE 
## NULL evaluates: FALSE 
## Inf evaluates: FALSE 
## 0 evaluates: FALSE

Conditional Statement Behaviors to look out for:

r1 <- NA | TRUE
r2 <- NA | FALSE
r3 <- NA | NaN
r4 <- NaN | NA

res <- paste( 'Conditional Statement Behaviors to be aware of::\nNA | TRUE evaluates:', r1,
              '\nNA | FALSE evaluates:', r2,
              '\nNA | NaN evaluates:', r3,
              '\nNaN | NA evaluates:', r4 )
cat( res, sep='\n' )
## Conditional Statement Behaviors to be aware of::
## NA | TRUE evaluates: TRUE 
## NA | FALSE evaluates: NA 
## NA | NaN evaluates: NA 
## NaN | NA evaluates: NA
# Create x, a vector, with values NA, NaN, Inf, ".", and "missing"
x <- c(NA, NaN, Inf, ".", "missing")

# Use any_na() and are_na() on to explore the missings
any_na(x)
## [1] TRUE
are_na(x)
## [1]  TRUE FALSE FALSE FALSE FALSE
dat_hw_url <- 'https://raw.githubusercontent.com/SmilodonCub/ReadingLearningTinkering/master/DataCamp/Statistics_with_R/dat_hw.csv'
dat_hw <- read.csv( dat_hw_url ) %>%
  select( -X )
head( dat_hw )
##      weight     height
## 1        NA  2.3881462
## 2  91.20470  1.0014508
## 3  81.57915         NA
## 4  76.84886         NA
## 5 111.01731 -0.2412422
## 6  90.15135  2.5207375
# Use n_miss() to count the total number of missing values in dat_hw
n_miss(dat_hw)
## [1] 30
# Use n_miss() on dat_hw$weight to count the total number of missing values
n_miss(dat_hw$weight)
## [1] 15
# Use n_complete() on dat_hw to count the total number of complete values
n_complete(dat_hw)
## [1] 170
# Use n_complete() on dat_hw$weight to count the total number of complete values
n_complete(dat_hw$weight)
## [1] 85
# Use prop_miss() and prop_complete() on dat_hw to count the total number of missing values in each of the variables
prop_miss(dat_hw)
## [1] 0.15
prop_complete(dat_hw)
## [1] 0.85

Why care about missing values?

Introduction to missingness summaries

Basic summary missingness:

n_miss( x )
## [1] 1
n_complete( x )
## [1] 4

Dataframe summaries of missingness:

miss_var_summary(): summarize the number of missing in each variable/feature/column

miss_x_cols <- miss_var_summary( dat_hw )
glimpse( miss_x_cols )
## Rows: 2
## Columns: 3
## $ variable <chr> "weight", "height"
## $ n_miss   <int> 15, 15
## $ pct_miss <dbl> 15, 15
miss_x_cols <- miss_var_summary( airquality )
glimpse( miss_x_cols )
## Rows: 6
## Columns: 3
## $ variable <chr> "Ozone", "Solar.R", "Wind", "Temp", "Month", "Day"
## $ n_miss   <int> 37, 7, 0, 0, 0, 0
## $ pct_miss <dbl> 24.183007, 4.575163, 0.000000, 0.000000, 0.000000, 0.000000

miss_case_summary: each case is a row in the dataframe. info on missing values by row.

dim( miss_case_summary( dat_hw ) )
## [1] 100   3
head( miss_case_summary( airquality ) )
## # A tibble: 6 x 3
##    case n_miss pct_miss
##   <int>  <int>    <dbl>
## 1     5      2     33.3
## 2    27      2     33.3
## 3     6      1     16.7
## 4    10      1     16.7
## 5    11      1     16.7
## 6    25      1     16.7

Missing Data Tabulations:

miss_var_table() returns a dataframe with info on the variables missing data as well as the percentage of variables affected by missing data

miss_var_table( dat_hw )
## # A tibble: 1 x 3
##   n_miss_in_var n_vars pct_vars
##           <int>  <int>    <dbl>
## 1            15      2      100

can be interpretted as: 2 variables are missing 15 observations each. 100% of the variables in the dataframe are affected this way

miss_var_table( airquality )
## # A tibble: 3 x 3
##   n_miss_in_var n_vars pct_vars
##           <int>  <int>    <dbl>
## 1             0      4     66.7
## 2             7      1     16.7
## 3            37      1     16.7

can be interpretted as: 66.6% of the features in this dataframe (total of 4 features) are missing 0 observations. One variables (16.6% of features) is missing 7 observations while another variable (16.6% of features) is missing 37 observations.

miss_case_table(): returns the same information but by cases (rows)

miss_case_table( dat_hw )
## # A tibble: 2 x 3
##   n_miss_in_case n_cases pct_cases
##            <int>   <int>     <dbl>
## 1              0      70        70
## 2              1      30        30

can be interpretted as: 70% of rows (70 rows) are missing 0 observations. 30% of rows (30 rows) are missing 1 observation.

miss_case_table( airquality )
## # A tibble: 3 x 3
##   n_miss_in_case n_cases pct_cases
##            <int>   <int>     <dbl>
## 1              0     111     72.5 
## 2              1      40     26.1 
## 3              2       2      1.31

can be interpretted as: 72.5% of rows (111 rows) are missing 0 observations. 26.1% of rows (40 rows) are missing 1 observation. 1.3% or rows (2 rows) are missing 2 observations.

Other useful functions:

  • miss_var_span() summarizes missing data by span of data (good for time series analysis e.g. weekly spans of 7)
  • miss_var_run() summarizes runs of missing data. good for finding unusual patterns of missing data. returns runs of complete and missing data. great for sinding systemic sampling error.

Using summaries with group_by():

airquality %>%
  group_by( Month ) %>%
  miss_var_summary()
## # A tibble: 25 x 4
## # Groups:   Month [5]
##    Month variable n_miss pct_miss
##    <int> <chr>     <int>    <dbl>
##  1     5 Ozone         5     16.1
##  2     5 Solar.R       4     12.9
##  3     5 Wind          0      0  
##  4     5 Temp          0      0  
##  5     5 Day           0      0  
##  6     6 Ozone        21     70  
##  7     6 Solar.R       0      0  
##  8     6 Wind          0      0  
##  9     6 Temp          0      0  
## 10     6 Day           0      0  
## # … with 15 more rows
glimpse( pedestrian )
## Rows: 37,700
## Columns: 9
## $ hourly_counts <int> 883, 597, 294, 183, 118, 68, 47, 52, 120, 333, 761, 135…
## $ date_time     <dttm> 2016-01-01 00:00:00, 2016-01-01 01:00:00, 2016-01-01 0…
## $ year          <int> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2…
## $ month         <ord> January, January, January, January, January, January, J…
## $ month_day     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ week_day      <ord> Friday, Friday, Friday, Friday, Friday, Friday, Friday,…
## $ hour          <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ sensor_id     <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ sensor_name   <chr> "Bourke Street Mall (South)", "Bourke Street Mall (Sout…
miss_var_table( pedestrian )
## # A tibble: 2 x 3
##   n_miss_in_var n_vars pct_vars
##           <int>  <int>    <dbl>
## 1             0      8     88.9
## 2          2548      1     11.1
# Calculate the summaries for each run of missingness for the variable, hourly_counts
miss_var_run(pedestrian, var = hourly_counts)
## # A tibble: 35 x 2
##    run_length is_na   
##         <int> <chr>   
##  1       6628 complete
##  2          1 missing 
##  3       5250 complete
##  4        624 missing 
##  5       3652 complete
##  6          1 missing 
##  7       1290 complete
##  8        744 missing 
##  9       7420 complete
## 10          1 missing 
## # … with 25 more rows
# Calculate the summaries for each span of missingness, 
# for a span of 4000, for the variable hourly_counts
miss_var_span(pedestrian, var = hourly_counts, span_every = 4000)
## # A tibble: 10 x 5
##    span_counter n_miss n_complete prop_miss prop_complete
##           <int>  <int>      <dbl>     <dbl>         <dbl>
##  1            1      0       4000   0               1    
##  2            2      1       3999   0.00025         1.00 
##  3            3    121       3879   0.0302          0.970
##  4            4    503       3497   0.126           0.874
##  5            5    745       3255   0.186           0.814
##  6            6      0       4000   0               1    
##  7            7      1       3999   0.00025         1.00 
##  8            8      0       4000   0               1    
##  9            9    745       3255   0.186           0.814
## 10           10    432       3568   0.108           0.892
# For each `month` variable, calculate the run of missingness for hourly_counts
pedestrian %>% group_by(month) %>% miss_var_run(hourly_counts)
## # A tibble: 51 x 3
## # Groups:   month [12]
##    month    run_length is_na   
##    <ord>         <int> <chr>   
##  1 January        2976 complete
##  2 February       2784 complete
##  3 March          2976 complete
##  4 April           888 complete
##  5 April           552 missing 
##  6 April          1440 complete
##  7 May             744 complete
##  8 May              72 missing 
##  9 May            2160 complete
## 10 June           2880 complete
## # … with 41 more rows
# For each `month` variable, calculate the span of missingness 
# of a span of 2000, for the variable hourly_counts
pedestrian %>% group_by(month) %>% miss_var_span(var = hourly_counts, span_every = 2000)
## # A tibble: 25 x 6
## # Groups:   month [12]
##    month    span_counter n_miss n_complete prop_miss prop_complete
##    <ord>           <int>  <int>      <dbl>     <dbl>         <dbl>
##  1 January             1      0       2000     0             1    
##  2 January             2      0       2000     0             1    
##  3 February            1      0       2000     0             1    
##  4 February            2      0       2000     0             1    
##  5 March               1      0       2000     0             1    
##  6 March               2      0       2000     0             1    
##  7 April               1    552       1448     0.276         0.724
##  8 April               2      0       2000     0             1    
##  9 May                 1     72       1928     0.036         0.964
## 10 May                 2      0       2000     0             1    
## # … with 15 more rows

How do we visual missing values?

naniar missing data visualization methods.

Overview of missingness: a type of heatmap for missing data. black == missing. also provides basic stats of proportions of missingness.

vis_miss( airquality )

vis_miss( dat_hw )

vis_miss( airquality )

vis_miss( airquality, cluster = TRUE)

vis_miss( dat_hw, cluster = TRUE)

Looking at missing observations in both variables and cases

varp <- gg_miss_var( airquality )
casep <- gg_miss_case( airquality )
grid.arrange( varp, casep, ncol = 2 )

varp <- gg_miss_var( dat_hw )
casep <- gg_miss_case( dat_hw )
grid.arrange( varp, casep, ncol = 2 )

faceting a gg_miss_var() plot acts like visualizing a group_by

gg_miss_var( airquality, facet = Month )

Visualizing missingness patterns

gg_miss_upset() shows co-occuring missing observations across variables

gg_miss_upset( airquality )

gg_miss_fct(): visualizing factors for missingness. again, kinda like visualizing missing group_by result. Gives a heat map view for each feature (y-axis) and each montt (x-axis) where color intensity is the number of missing observations.

gg_miss_fct( x = airquality, fct = Month )

gg_miss_span() visualizes the number of missing observations for a given span.

gg_miss_span( pedestrian, hourly_counts, span_every = 3000)

# Visualize all of the missingness in the `riskfactors`  dataset
vm <- vis_miss(riskfactors) +
    theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

# Visualize and cluster all of the missingness in the `riskfactors` dataset
vmc <-vis_miss(riskfactors, cluster = TRUE) +
    theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

# visualize and sort the columns by missingness in the `riskfactors` dataset
vms <- vis_miss(riskfactors, sort_miss = TRUE) +
    theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

# visualize cluster and sort the columns by missingness in the `riskfactors` dataset
vmcs <- vis_miss(riskfactors, sort_miss = TRUE, cluster = TRUE ) +
    theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

grid.arrange( vm, vmc, vms, vmcs, ncol = 2 )

# Visualize the number of missings in cases using `gg_miss_case()`
caserf <- gg_miss_case(riskfactors)

# Explore the number of missings in cases using `gg_miss_case()` 
# and facet by the variable `education`
fct_caserf <- gg_miss_case(riskfactors, facet = education)

grid.arrange( caserf, fct_caserf, ncol = 2 )

# Visualize the number of missings in variables using `gg_miss_var()`
varrf <- gg_miss_var(riskfactors)

# Explore the number of missings in variables using `gg_miss_var()` 
# and facet by the variable `education`
fct_varrf <- gg_miss_var(riskfactors, facet = education)

grid.arrange( varrf, fct_varrf, ncol = 2 )

# With the riskfactors dataset, explore how the missingness changes across the marital variable using gg_miss_fct()
gg_miss_fct(x = riskfactors, fct = marital)

# Using the pedestrian dataset, explore how the missingness of hourly_counts changes over a span of 3000 
gg_miss_span(pedestrian, var = hourly_counts, span_every = 3000)

# Using the pedestrian dataset, explore the impact of month by faceting by month
# and explore how missingness changes for a span of 1000
gg_miss_span(pedestrian, var = hourly_counts , span_every = 1000, facet = month)

Wrangling and tidying up missing values.

Searching for an replacing missing values

Assumptions with missing data: finding missing values and labelling with NA

In a perfect word, missing data is labelled NA. However, it may be the case that it is coded as ‘missing’, ‘Not Available’, ‘N/A’ or some other permutation.

miss_search_count(): Searching for missing values

employees_url <- 'https://raw.githubusercontent.com/ChaitanyaBaweja/Programming-Tutorials/master/Missing-Data-Pandas/employees.csv'
employees <- read.csv( employees_url )
glimpse( employees )
## Rows: 1,000
## Columns: 6
## $ First.Name        <chr> "Douglas", "Thomas", "Maria", "Jerry", "Larry", "De…
## $ Gender            <chr> "Male", "Male", "Female", "Male", "Male", "n.a.", "…
## $ Salary            <chr> "97308", "61933", "130590", NA, "101004", "115163",…
## $ Bonus..           <chr> "6.945", "NaN", "11.858", "9.34", "1.389", "10.125"…
## $ Senior.Management <chr> "TRUE", "TRUE", "FALSE", "TRUE", "TRUE", "FALSE", "…
## $ Team              <chr> "Marketing", "", "Finance", "Finance", "Client Serv…
#explicitly search for strange NA mislabellings
employees %>%
  miss_scan_count( search = list('n.a', 'na') )
## # A tibble: 6 x 2
##   Variable              n
##   <chr>             <int>
## 1 First.Name           58
## 2 Gender                1
## 3 Salary                1
## 4 Bonus..               0
## 5 Senior.Management     2
## 6 Team                106
employees %>%
  miss_scan_count( search = common_na_strings )
## # A tibble: 6 x 2
##   Variable              n
##   <chr>             <int>
## 1 First.Name          998
## 2 Gender              999
## 3 Salary              999
## 4 Bonus..            1000
## 5 Senior.Management  1000
## 6 Team               1000
print( common_na_strings )
##  [1] "NA"     "N A"    "N/A"    "NA "    " NA"    "N /A"   "N / A"  " N / A"
##  [9] "N / A " "na"     "n a"    "n/a"    "na "    " na"    "n /a"   "n / a" 
## [17] " a / a" "n / a " "NULL"   "null"   ""       "\\?"    "\\*"    "\\."

replace_with_na(): replace specified values with NA

  • replace_with_na_all(): all variables
  • replace_with_na_at(): a subset of selected variables
  • replace_with_na_if(): a subset of variables that fulfill some condition
#replace all instances of 'N/A' and 'N/a' from the feature `grade`
employees %>%
  replace_with_na( replace = list( Team = c('n.a', 'na') ) ) %>%
  miss_scan_count( search = common_na_strings )
## # A tibble: 6 x 2
##   Variable              n
##   <chr>             <int>
## 1 First.Name          998
## 2 Gender              999
## 3 Salary              999
## 4 Bonus..            1000
## 5 Senior.Management  1000
## 6 Team                997
#replace any instance of -99 with `NA`
employees %>%
  replace_with_na_all( condition = ~.x == "" ) %>%
  miss_scan_count( search = common_na_strings )
## # A tibble: 6 x 2
##   Variable              n
##   <chr>             <int>
## 1 First.Name          931
## 2 Gender              852
## 3 Salary              998
## 4 Bonus..            1000
## 5 Senior.Management   933
## 6 Team                957
#replace multiple with `NA`
employees %>%
  replace_with_na_all( condition = ~.x %in% common_na_strings ) %>%
  miss_scan_count( search = common_na_strings )
## # A tibble: 6 x 2
##   Variable              n
##   <chr>             <int>
## 1 First.Name          931
## 2 Gender              852
## 3 Salary              998
## 4 Bonus..            1000
## 5 Senior.Management   933
## 6 Team                954
# Explore the strange missing values "N/A"
miss_scan_count(data = pedestrian, search = list("N/A") )
## # A tibble: 9 x 2
##   Variable          n
##   <chr>         <int>
## 1 hourly_counts     0
## 2 date_time         0
## 3 year              0
## 4 month             0
## 5 month_day         0
## 6 week_day          0
## 7 hour              0
## 8 sensor_id         0
## 9 sensor_name       0
# Explore the strange missing values "missing"
miss_scan_count(data = pedestrian, search = list("missing") )
## # A tibble: 9 x 2
##   Variable          n
##   <chr>         <int>
## 1 hourly_counts     0
## 2 date_time         0
## 3 year              0
## 4 month             0
## 5 month_day         0
## 6 week_day          0
## 7 hour              0
## 8 sensor_id         0
## 9 sensor_name       0
# Explore the strange missing values "na"
miss_scan_count(data = pedestrian, search = list('na' ) )
## # A tibble: 9 x 2
##   Variable          n
##   <chr>         <int>
## 1 hourly_counts     0
## 2 date_time         0
## 3 year              0
## 4 month             0
## 5 month_day         0
## 6 week_day          0
## 7 hour              0
## 8 sensor_id         0
## 9 sensor_name       0
# Explore the strange missing values " " (a single space)
miss_scan_count(data = pedestrian, search = list(' ') )
## # A tibble: 9 x 2
##   Variable          n
##   <chr>         <int>
## 1 hourly_counts     0
## 2 date_time     37700
## 3 year              0
## 4 month             0
## 5 month_day         0
## 6 week_day          0
## 7 hour              0
## 8 sensor_id         0
## 9 sensor_name   37700
# Explore all of the strange missing values, "N/A", "missing", "na", " "
miss_scan_count(data = pedestrian, search = list("N/A", "missing", "na", " "))
## # A tibble: 9 x 2
##   Variable          n
##   <chr>         <int>
## 1 hourly_counts     0
## 2 date_time     37700
## 3 year              0
## 4 month             0
## 5 month_day         0
## 6 week_day          0
## 7 hour              0
## 8 sensor_id         0
## 9 sensor_name   37700
# Print the top of the pacman data using `head()`
glimpse(employees)
## Rows: 1,000
## Columns: 6
## $ First.Name        <chr> "Douglas", "Thomas", "Maria", "Jerry", "Larry", "De…
## $ Gender            <chr> "Male", "Male", "Female", "Male", "Male", "n.a.", "…
## $ Salary            <chr> "97308", "61933", "130590", NA, "101004", "115163",…
## $ Bonus..           <chr> "6.945", "NaN", "11.858", "9.34", "1.389", "10.125"…
## $ Senior.Management <chr> "TRUE", "TRUE", "FALSE", "TRUE", "TRUE", "FALSE", "…
## $ Team              <chr> "Marketing", "", "Finance", "Finance", "Client Serv…
# Replace the strange missing values "N/A", "na", and  
# "missing" with `NA` for the variables, year, and score
emp_clean <- replace_with_na(data = employees, replace = list(Team = c("", "na", "n.a","NaN"),
                                Bonus.. = c("", "na", "n.a","NaN")))
                                        
# Test if `pacman_clean` still has these values in it?
miss_scan_count(emp_clean, search = list("", "na", "n.a", "NaN"))
## # A tibble: 6 x 2
##   Variable              n
##   <chr>             <int>
## 1 First.Name          998
## 2 Gender              999
## 3 Salary              999
## 4 Bonus..             997
## 5 Senior.Management  1000
## 6 Team                954
# Use `replace_with_na_at()` to replace with NA
replace_with_na_at(employees,
                   .vars = c('First.Name', 'Gender', 'Team'), 
                   ~.x %in% c("", " ", "na", 'NaN', '?'))
##       First.Name Gender Salary Bonus.. Senior.Management                 Team
## 1        Douglas   Male  97308   6.945              TRUE            Marketing
## 2         Thomas   Male  61933     NaN              TRUE                 <NA>
## 3          Maria Female 130590  11.858             FALSE              Finance
## 4          Jerry   Male   <NA>    9.34              TRUE              Finance
## 5          Larry   Male 101004   1.389              TRUE      Client Services
## 6         Dennis   n.a. 115163  10.125             FALSE                Legal
## 7           Ruby Female  65476  10.012              TRUE              Product
## 8           <NA> Female  45906  11.598                                Finance
## 9         Angela   <NA>         18.523              TRUE          Engineering
## 10       Frances Female 139852   7.524              TRUE Business Development
## 11        Louise Female  63241  15.132              TRUE                 <NA>
## 12         Julie Female 102508  12.637              TRUE                Legal
## 13       Brandon   Male 112807  17.492              TRUE      Human Resources
## 14          Gary   Male 109831   5.831             FALSE                Sales
## 15      Kimberly Female  41426     NaN              TRUE              Finance
## 16       Lillian   <NA>  59414   1.256             FALSE              Product
## 17        Jeremy   Male  90370   7.369             FALSE      Human Resources
## 18         Shawn   Male 111737   6.414             FALSE                 <NA>
## 19         Diana Female 132940  19.082             FALSE      Client Services
## 20         Donna Female  81014   1.894             FALSE              Product
## 21          Lois   <NA>  64714   4.934              TRUE                Legal
## 22       Matthew   Male 100612  13.645             FALSE            Marketing
## 23        Joshua   <NA>  90816  18.816              TRUE      Client Services
## 24          <NA>   Male 125792   5.042                                   <NA>
## 25          John   Male  97950  13.873             FALSE      Client Services
## 26          <NA>   Male  37076  18.576                        Client Services
## 27         Craig   Male  37598   7.757              TRUE            Marketing
## 28         Scott   <NA> 122367   5.218             FALSE                Legal
## 29         Terry   Male 124008  13.464              TRUE      Client Services
## 30      Benjamin   Male  79529   7.008              TRUE                Legal
## 31     Christina Female 118780   9.096              TRUE          Engineering
## 32         Joyce   <NA>  88657  12.752             FALSE              Product
## 33          <NA>   Male 122340   6.417                                   <NA>
## 34          Jean Female 119082   16.18             FALSE Business Development
## 35         Jerry   Male  95734  19.096             FALSE      Client Services
## 36       Theresa Female  85182  16.675             FALSE                Sales
## 37        Rachel Female 142032  12.599             FALSE Business Development
## 38         Linda Female  57427   9.557              TRUE      Client Services
## 39     Stephanie Female  36844   5.574              TRUE Business Development
## 40          <NA>   Male 122173   7.797                        Client Services
## 41       Michael   Male  99283   2.665              TRUE         Distribution
## 42     Christine   <NA>  66582  11.308              TRUE Business Development
## 43       Beverly Female 121918  15.835             FALSE                Legal
## 44       Marilyn Female  73524   5.207              TRUE            Marketing
## 45       Cynthia Female 145146   7.482              TRUE              Product
## 46         Roger   Male  88010  13.886              TRUE                Sales
## 47         Bruce   Male 114796   6.796             FALSE              Finance
## 48         Kathy Female  66820     NaN              TRUE      Client Services
## 49      Clarence   Male  93581   6.083              TRUE Business Development
## 50         Chris   <NA> 113590   3.055             FALSE                Sales
## 51         Nancy Female  94976   13.83              TRUE          Engineering
## 52          <NA>   <NA>  41126  14.009                                  Sales
## 53          Todd   Male  49339   1.695              TRUE                 <NA>
## 54          Alan   <NA>  40341  17.578              TRUE              Finance
## 55          Sara Female  83677   8.999             FALSE          Engineering
## 56         Karen Female 102488  17.653              TRUE              Product
## 57          Carl   Male 130276  16.084              TRUE              Finance
## 58          <NA>   Male  64715  15.107              TRUE      Human Resources
## 59       Theresa Female  72670   1.481              TRUE          Engineering
## 60         Irene Female  66851  11.279             FALSE          Engineering
## 61         Paula   <NA>  48866   4.271             FALSE         Distribution
## 62        Denise Female 106862   3.699             FALSE Business Development
## 63          <NA> Female  58112  19.414                              Marketing
## 64       Matthew   Male  35203   18.04             FALSE      Human Resources
## 65      Kathleen   <NA>  77834  18.771             FALSE Business Development
## 66         Steve   Male  61310  12.428              TRUE         Distribution
## 67         Nancy Female 125250   2.672              TRUE Business Development
## 68        Rachel Female  51178   9.735              TRUE              Finance
## 69          Jose   Male  84834   14.33              TRUE              Finance
## 70         Irene   <NA> 100863   4.382              TRUE              Finance
## 71          Todd   <NA>  84692   6.617             FALSE      Client Services
## 72        Johnny   Male 118172  16.194              TRUE                Sales
## 73         Bobby   Male  54043   3.833             FALSE              Product
## 74       Frances Female  90582   4.709              TRUE                Sales
## 75        Thomas   Male  62096  17.029             FALSE            Marketing
## 76        Bonnie Female 104897       ?              TRUE      Human Resources
## 77      Margaret Female 131604   7.353              TRUE         Distribution
## 78       Charles   Male 107391    1.26              TRUE            Marketing
## 79         Robin Female 114797   5.965              TRUE                Sales
## 80          <NA> Female 115814    4.99             FALSE              Product
## 81        Gerald   <NA> 137126  15.602              TRUE                Sales
## 82   Christopher   Male  47369  14.822             FALSE                Legal
## 83        Steven   Male  35095   8.379              TRUE      Client Services
## 84         Shawn   Male 148115   6.539              TRUE              Finance
## 85         Doris Female  83072   7.511             FALSE              Finance
## 86        Jeremy   Male 100238   3.887              TRUE      Client Services
## 87         Annie   <NA> 103495   17.29              TRUE Business Development
## 88         Annie Female 144887   8.276              TRUE                Sales
## 89         Donna Female  64088   6.155              TRUE                Legal
## 90        Janice Female  51082  11.955             FALSE                Legal
## 91        Janice   <NA>  91719  11.583              TRUE                Legal
## 92         James   <NA> 128771   8.309             FALSE                 <NA>
## 93         Linda Female 119009  12.506              TRUE Business Development
## 94      Virginia   <NA> 111858   1.601              TRUE                Legal
## 95         Harry   Male 130620    7.03             FALSE                Legal
## 96       Heather Female  43026  14.166             FALSE      Client Services
## 97       Cynthia Female 142321   1.737               NaN              Finance
## 98         Laura   <NA> 140371   10.62              TRUE            Marketing
## 99          Tina Female 100705  16.961              TRUE            Marketing
## 100       Harold   Male  77544  12.447             FALSE Business Development
## 101      Melissa Female  48109  14.995             FALSE              Finance
## 102        Aaron   Male  61602  11.849              TRUE            Marketing
## 103         Jack   Male 103902  12.159             FALSE      Client Services
## 104      Phyllis Female 136984   8.932              TRUE              Finance
## 105         John   Male  80740  19.305             FALSE            Marketing
## 106        Kathy Female  91712   8.567             FALSE              Finance
## 107         Paul   Male  42146   3.046             FALSE                Legal
## 108       Steven   Male  68680  16.565             FALSE                Legal
## 109      Russell   <NA> 133980  12.396              TRUE                Legal
## 110  Christopher   Male  37919  11.449             FALSE                 <NA>
## 111      Shirley Female 147113  16.135             FALSE                Legal
## 112       Bonnie Female  42153   8.454              TRUE Business Development
## 113       Willie   Male  64363   4.023             FALSE            Marketing
## 114         Tina Female 114767   3.711              TRUE          Engineering
## 115       Ashley Female  58698   6.811              TRUE Business Development
## 116       Pamela Female  54585   4.166             FALSE              Product
## 117         <NA>   Male  76189  18.988                                  Legal
## 118       Steven   Male 109095   9.494             FALSE              Finance
## 119       Andrea Female 120204   9.557             FALSE Business Development
## 120         Paul   Male  41054  12.299             FALSE            Marketing
## 121        Peter   Male  84885  15.402             FALSE Business Development
## 122     Kathleen   <NA> 119735   18.74             FALSE              Product
## 123    Christina Female 110169  13.892              TRUE          Engineering
## 124        Helen Female  73789  14.841              TRUE              Product
## 125      Marilyn Female  76078   2.401              TRUE              Product
## 126     Patricia Female  49368  14.226             FALSE      Human Resources
## 127     Michelle Female  57325  17.179              TRUE      Human Resources
## 128      William   Male  66521    5.83             FALSE      Human Resources
## 129      Kenneth   Male 127654  16.439              TRUE      Client Services
## 130      Antonio   Male  60866  13.101              TRUE Business Development
## 131      Shirley Female  41334   6.219              TRUE      Human Resources
## 132      Rebecca Female  94231  17.517             FALSE              Product
## 133       Carlos   Male 146670  10.763             FALSE      Human Resources
## 134         Lois Female 106317   2.235              TRUE      Client Services
## 135      Carolyn Female 109260    2.65              TRUE Business Development
## 136       Gloria Female 134148   8.833              TRUE                Legal
## 137        Henry   Male  43542  19.687             FALSE                Legal
## 138         Adam   Male  95327   15.12             FALSE         Distribution
## 139       Ashley Female 112238    6.03              TRUE          Engineering
## 140         <NA> Female 132373  10.527                                   <NA>
## 141      Shirley Female 113850   1.854             FALSE            Marketing
## 142         Adam   Male 110194  14.727              TRUE              Product
## 143    Elizabeth Female 146129   5.687             FALSE              Finance
## 144       Teresa   <NA> 140013   8.689              TRUE          Engineering
## 145       Nicole   <NA> 122717  12.452             FALSE                Sales
## 146     Jennifer Female  71715  13.079              TRUE      Client Services
## 147     Patricia Female  95322    4.15              n.a.              Product
## 148       Philip   Male  89227   3.996             FALSE                Legal
## 149      Patrick   <NA> 124488  14.837              TRUE                Sales
## 150         <NA> Female  86230   8.578                           Distribution
## 151         Sean   Male 135490  19.934             FALSE            Marketing
## 152      Brandon   <NA> 121333  15.295             FALSE Business Development
## 153         Ruth Female 129297   8.067              TRUE      Client Services
## 154       Victor   <NA>  84546  10.489              TRUE              Finance
## 155      Rebecca Female  85730   5.359              TRUE              Product
## 156        Marie Female 104058  17.695              TRUE Business Development
## 157       Howard   Male 105062   1.563             FALSE      Human Resources
## 158         <NA> Female  79536  14.443                                Product
## 159        Norma Female 114412   8.756              TRUE            Marketing
## 160        James   <NA>  68501  14.316             FALSE            Marketing
## 161        Kathy Female 149563  16.991              TRUE              Finance
## 162      Marilyn   <NA> 103386  11.451             FALSE         Distribution
## 163       Arthur   Male  89786  14.422              TRUE                Legal
## 164        Terry   Male  52226  19.135             FALSE      Client Services
## 165         Mary Female 134645  18.197             FALSE Business Development
## 166         <NA> Female  59148   9.061                                  Legal
## 167         <NA> Female  42341   7.014                                  Sales
## 168  Christopher   Male 142178  17.984              TRUE                Sales
## 169        Peter   <NA>  38989   7.017              TRUE            Marketing
## 170     Michelle Female  53754   5.455              TRUE              Product
## 171        Irene Female 133772   19.02              TRUE                Sales
## 172      Patrick   Male 143499  17.495              TRUE          Engineering
## 173         Sara Female  97058   9.402             FALSE              Finance
## 174      Lillian Female  85446   3.959              TRUE          Engineering
## 175         <NA>   <NA>  40297   6.185                        Client Services
## 176       Willie   Male 146651   1.451              TRUE          Engineering
## 177       Victor   Male 124486  10.166             FALSE              Product
## 178        Wayne   Male 102652  14.085              TRUE         Distribution
## 179         Jane Female 144474  17.648             FALSE              Product
## 180      Jessica Female  68759  19.343              TRUE              Finance
## 181         Mark   Male  57286   5.025              TRUE Business Development
## 182        Randy   Male  58129   1.952              TRUE         Distribution
## 183      Lillian Female 123940  12.184              TRUE Business Development
## 184         Ruth   <NA>  98233   2.518              TRUE         Distribution
## 185        Jerry   Male 140810   9.177              TRUE      Client Services
## 186       Sandra Female  42090   8.842              TRUE                Legal
## 187         <NA> Female 149654   1.825                                  Sales
## 188        Roger   Male  51430    6.46             FALSE            Marketing
## 189      Charles   Male  71749  15.931             FALSE                Legal
## 190     Clarence   Male  85700   1.215             FALSE                Sales
## 191        Carol Female  57783   9.129             FALSE              Finance
## 192         Lois Female  36946   6.652             FALSE          Engineering
## 193      Barbara   <NA>  99326  16.475              TRUE                Legal
## 194      Rebecca Female 109259   4.443              TRUE                Legal
## 195        Irene Female 131038   8.996             FALSE         Distribution
## 196       Ronald   Male 121068  12.757              TRUE              Product
## 197       Steven   Male  62719  19.127             FALSE      Client Services
## 198      Carolyn Female  69268   3.031             FALSE      Client Services
## 199        Maria Female  36067    9.64              TRUE              Product
## 200     Jonathan   Male 130581  16.736              TRUE                 <NA>
## 201         Gary   Male  89661   8.525             FALSE              Finance
## 202     Kimberly Female  36643   7.953             FALSE            Marketing
## 203        Roger   Male 140558   5.084              TRUE                Sales
## 204        Diana Female 103521   2.784              TRUE Business Development
## 205       Willie   Male  55281   4.935              TRUE            Marketing
## 206      Brandon   Male 115711   8.012              TRUE              Finance
## 207     Patricia Female 114079   8.399              TRUE      Client Services
## 208    Katherine Female  57531   1.767             FALSE      Client Services
## 209     Jonathan   Male 141069   4.903             FALSE      Human Resources
## 210        Emily Female  89434  11.295             FALSE          Engineering
## 211         Ruth Female  44639   9.148              TRUE                Legal
## 212        Linda Female 110967  19.612              TRUE      Human Resources
## 213         Lisa Female 115387   1.821             FALSE      Client Services
## 214       Evelyn Female  81673  15.364              TRUE          Engineering
## 215        Julie Female 109588    3.55             FALSE          Engineering
## 216         Mary Female  92544     3.8             FALSE      Client Services
## 217      Matthew   Male 142373   2.462             FALSE            Marketing
## 218      Douglas   Male  83341   1.015              TRUE      Client Services
## 219      Gregory   <NA>  98865  10.628              TRUE              Finance
## 220        Billy   Male 120444   7.768              TRUE              Finance
## 221         <NA> Female  71945    5.56                              Marketing
## 222      Gregory   Male 109564   3.845             FALSE                Legal
## 223        Jason   Male  78417   3.067             FALSE              Finance
## 224       Daniel   Male 106947  15.866              TRUE                Legal
## 225        Sarah Female  87298   2.311             FALSE         Distribution
## 226        Harry   Male  64579  15.266              TRUE                Sales
## 227        Kathy Female  50905   9.404              TRUE              Finance
## 228         Todd   Male  59728  11.226              TRUE      Client Services
## 229        Bobby   Male  51685  17.445              TRUE          Engineering
## 230       Jeremy   Male  49542   1.679              TRUE         Distribution
## 231       George   <NA>  38375   8.999             FALSE                Legal
## 232         <NA> Female  64084  17.018             FALSE              Product
## 233      Marilyn Female 147663  10.263             FALSE      Human Resources
## 234      Kathryn Female  73935  14.752             FALSE                Sales
## 235        Irene Female  40837  12.182              TRUE      Client Services
## 236        Norma Female  94393   3.643              TRUE          Engineering
## 237        Laura Female  42087   2.624             FALSE              Product
## 238       Cheryl Female  52080   9.375             FALSE                Legal
## 239        Kevin   Male  35061   5.128             FALSE                Legal
## 240      Lillian   <NA>  64164  17.612             FALSE      Human Resources
## 241      Phyllis   <NA>  94088  19.107             FALSE         Distribution
## 242       Walter   Male 127813   5.961             FALSE              Finance
## 243       Robert   Male  38041  18.428              TRUE          Engineering
## 244        Jimmy   Male 126310   5.413              TRUE              Product
## 245     Clarence   Male 142561   8.866             FALSE      Client Services
## 246       Victor   Male  70817  17.138             FALSE          Engineering
## 247         Fred   <NA>  59937  12.045              TRUE      Human Resources
## 248       Brenda   <NA> 106115   3.742              TRUE              Product
## 249       Justin   Male  82782   4.366                 ?         Distribution
## 250        Betty Female 104896   19.55              TRUE Business Development
## 251        Randy   <NA> 133943    8.94              TRUE                Sales
## 252       Sharon   <NA>  83658   6.513             FALSE Business Development
## 253        Ralph   Male  71896   4.232              TRUE            Marketing
## 254         <NA>   <NA> 113732  10.391                                  Sales
## 255       Edward   Male 110485   7.995             FALSE      Client Services
## 256       Denise Female 115118   5.108             FALSE      Human Resources
## 257        Debra Female  48696    4.75             FALSE                Legal
## 258         <NA>   Male  42676  15.517                                  Sales
## 259      Michael   Male  43586  12.659             FALSE                 <NA>
## 260        Henry   Male  89258  15.585              TRUE      Human Resources
## 261       Gloria Female  90730   2.491             FALSE         Distribution
## 262        Marie Female 100308  13.677             FALSE              Product
## 263         Anne Female  69134   3.723              TRUE          Engineering
## 264        Sarah   <NA>  58295  14.603              TRUE          Engineering
## 265    Stephanie Female  50141  13.218              TRUE      Human Resources
## 266          Roy   Male 101941    3.45             FALSE      Client Services
## 267         <NA> Female 115145  14.063                           Distribution
## 268         <NA>   Male  40451  16.044                           Distribution
## 269        Irene Female  56526    5.81              TRUE              Finance
## 270         <NA> Female 145316  18.517                        Human Resources
## 271       Thomas   Male 103235   9.554              TRUE      Human Resources
## 272        Frank   <NA>  58563  12.062              TRUE            Marketing
## 273         Fred   Male  74129  18.225             FALSE              Product
## 274     Nicholas   Male  74669   1.113              TRUE              Product
## 275       Howard   Male  97490    5.46              TRUE            Marketing
## 276        Debra Female 104250  18.456              TRUE Business Development
## 277    Christine Female  94345  11.996             FALSE              Finance
## 278       Brenda   <NA>  82439  19.062             FALSE                Sales
## 279        Betty Female  51613  12.984             FALSE         Distribution
## 280         Ruby Female 105946   1.139             FALSE Business Development
## 281        Kevin   Male  46080   9.635             FALSE Business Development
## 282       Arthur   Male 134610   6.147              TRUE                 n.a.
## 283         Carl   <NA> 125104  12.345             FALSE      Client Services
## 284         Todd   Male 107281   1.612              TRUE          Engineering
## 285       Brenda Female 141521   4.337             FALSE      Client Services
## 286         Judy Female  65931   2.304             FALSE      Human Resources
## 287         Todd   Male  69989  10.985              TRUE              Finance
## 288         Lois Female 147183   9.999              TRUE      Client Services
## 289       Walter   <NA>  66757  18.099             FALSE         Distribution
## 290      Jessica Female  75145   6.388              TRUE                Legal
## 291       Jeremy   Male 129460  13.657              TRUE                 <NA>
## 292        Tammy Female 132839  17.463              TRUE      Client Services
## 293         Anne Female  44537  18.284              TRUE      Client Services
## 294        Jesse   Male 118733   9.653             FALSE            Marketing
## 295     Virginia Female  46905  19.154             FALSE         Distribution
## 296        Jesse   Male  79582   3.873             FALSE                Legal
## 297       Jeremy   <NA>  55394    3.18              TRUE                Sales
## 298       Daniel   Male 123811   7.664              TRUE      Human Resources
## 299      Patrick   <NA> 137314   4.542              TRUE            Marketing
## 300        Emily Female  36711  19.028              TRUE      Human Resources
## 301         Alan   Male 111786   3.592              TRUE          Engineering
## 302        James   Male  72257  13.023             FALSE          Engineering
## 303         Adam   Male  71276   5.027              TRUE      Human Resources
## 304         Joan   <NA>  38712   3.657             FALSE      Client Services
## 305       Jeremy   Male  46930  18.702              TRUE              Finance
## 306     Margaret Female 125220   3.733             FALSE            Marketing
## 307         Mark   Male 121477   17.44              TRUE Business Development
## 308      Marilyn Female  86386   2.937             FALSE         Distribution
## 309       Cheryl Female  81308   2.196              TRUE                Legal
## 310     Benjamin   Male  84810  15.794             FALSE            Marketing
## 311       Harold   Male  66775   2.158              TRUE                Legal
## 312       Ernest   <NA>  72145  13.448              TRUE              Finance
## 313       Gerald   Male 121604   1.923              TRUE                Sales
## 314         Judy Female 109510  13.457              TRUE                Legal
## 315        Bobby   Male 112117   6.338             FALSE                 <NA>
## 316        Bobby   <NA> 108127  15.858             FALSE      Client Services
## 317        Marie Female 123711  10.966             FALSE              Product
## 318        Kathy Female 132381    8.34             FALSE Business Development
## 319          Roy   Male 148225   1.841             FALSE              Finance
## 320   Jacqueline Female   n.a.  18.243             FALSE            Marketing
## 321         <NA> Female  62960  14.356                                  Sales
## 322        Julie Female  56926   7.507             FALSE                Sales
## 323      Douglas   Male  41428  14.372             FALSE              Product
## 324        Linda Female 115658   3.041              TRUE                Sales
## 325         Ruby Female  76707   6.031             FALSE Business Development
## 326      Russell   Male  60388  17.885             FALSE      Client Services
## 327      Jeffrey   Male  45150  12.075              TRUE              Product
## 328        Aaron   Male  58755   5.097              TRUE            Marketing
## 329       Samuel   Male  76076   5.319              TRUE              Finance
## 330         <NA>   Male  87760  14.987                            Engineering
## 331        Randy   Male 135119   7.887             FALSE      Client Services
## 332       Evelyn Female  36759  17.269              TRUE            Marketing
## 333         Fred   Male 121723  18.645              TRUE              Product
## 334      Shirley Female 110061   7.494             FALSE      Client Services
## 335         Todd   Male  85074   1.844             FALSE      Client Services
## 336       Robert   Male  85799   19.93             FALSE              Finance
## 337         Mark   Male  75150  12.182              TRUE                Legal
## 338         Earl   Male  91344   4.035              TRUE                Sales
## 339      Richard   Male  86326  10.717             FALSE              Product
## 340      Michael   Male  98753  16.443              TRUE      Human Resources
## 341       Steven   Male 113060   2.846              TRUE                Sales
## 342       Carlos   Male  77327   11.58              TRUE              Finance
## 343        Marie Female  62666  10.247             FALSE              Product
## 344       Ronald   Male  96633    4.99              TRUE          Engineering
## 345        Scott   Male  58248   3.914             FALSE Business Development
## 346       Steven   Male  83706    6.96              TRUE      Human Resources
## 347        James   Male  74086   5.778              TRUE      Human Resources
## 348         Lori Female  95389  14.223             FALSE                Sales
## 349       Philip   Male 129968  19.897             FALSE              Finance
## 350      Phyllis Female      ?   8.723             FALSE                Sales
## 351       Thomas   <NA>  41549    3.95             FALSE                Sales
## 352      Cynthia Female  74287  10.751             FALSE                Sales
## 353         <NA>   Male  69906   4.844                            Engineering
## 354         <NA>   Male  65078   3.095                              Marketing
## 355    Elizabeth Female 106406   1.782              TRUE                Legal
## 356       Andrea Female 115913  12.121             FALSE                Legal
## 357         Judy Female  38092   5.668             FALSE         Distribution
## 358      Russell   Male 121160   7.843             FALSE          Engineering
## 359        Scott   Male  90429    4.45             FALSE              Product
## 360        Sarah Female  64207   7.824              TRUE      Client Services
## 361        Susan Female  80688  18.892              TRUE                Sales
## 362     Margaret   <NA>  55044   4.078             FALSE                Sales
## 363       Joshua   Male  72893   9.555             FALSE         Distribution
## 364       Justin   <NA>  96978  13.865             FALSE              Finance
## 365         Juan   Male  97364   3.595             FALSE              Product
## 366       Gloria   <NA> 140885   1.113             FALSE      Human Resources
## 367     Jennifer   <NA>  58520  16.231              TRUE      Client Services
## 368       Edward   Male  66067  10.957              TRUE                 <NA>
## 369      Marilyn Female 147183   8.748             FALSE Business Development
## 370         Mary Female  87721  12.484             FALSE              Product
## 371        Linda Female 144001   2.194             FALSE Business Development
## 372        Larry   Male  91133    5.14             FALSE                Sales
## 373       Albert   Male  67827  19.717              TRUE          Engineering
## 374      Kenneth   Male  81839  12.072             FALSE                Sales
## 375         <NA> Female  81444   3.171                           Distribution
## 376      Gregory   <NA> 137661   4.805              TRUE                Sales
## 377      Brandon   Male 112548   4.664             FALSE          Engineering
## 378      Stephen   Male 111249  10.574              TRUE      Human Resources
## 379      Russell   Male 114334   9.669             FALSE              Product
## 380         <NA> Female 118906   4.537                              Marketing
## 381       Pamela   <NA>  72979    5.66             FALSE                Sales
## 382        Scott   Male  64172  10.894              TRUE          Engineering
## 383         <NA> Female 107024  12.182                                   <NA>
## 384       Carlos   Male  50167   2.362             FALSE              Product
## 385         Jack   Male 106995  15.723             FALSE              Product
## 386        Debra Female  70492   8.895             FALSE      Client Services
## 387      Phyllis Female 125881  16.697             FALSE                Sales
## 388       Robert   Male 123294  19.894             FALSE                 <NA>
## 389       Gloria Female 131045  11.312              TRUE              Product
## 390       Sharon   <NA>  97635  10.413              TRUE      Client Services
## 391      Barbara   <NA>  94493  13.443              TRUE                Sales
## 392      Marilyn Female 140502   9.989              TRUE                Sales
## 393        Ralph   Male 106310    4.03              TRUE      Client Services
## 394        Harry   <NA>  46240   6.976              TRUE          Engineering
## 395        Robin Female 111163   5.025              TRUE              Product
## 396        Kathy Female  93753   7.094              TRUE                Sales
## 397         Rose Female  63494  19.385              TRUE      Human Resources
## 398     Clarence   Male 116693  13.835              TRUE         Distribution
## 399        Doris Female  85215   7.425              TRUE      Human Resources
## 400      Kathryn Female  86439   7.799             FALSE              Finance
## 401        Billy   <NA>  62913  18.241              TRUE            Marketing
## 402        Norma Female  38872   9.302              TRUE Business Development
## 403      Richard   <NA> 124655  14.272              TRUE          Engineering
## 404        Craig   Male 113506  19.642             FALSE            Marketing
## 405        Sarah   <NA> 109980    8.86             FALSE                Sales
## 406       Sharon Female  91522   7.564             FALSE              Finance
## 407         Mary Female  42214  17.538              TRUE Business Development
## 408        Steve   <NA>  83159    9.55              TRUE         Distribution
## 409          Joe   Male 144082    1.62              TRUE      Client Services
## 410       Jeremy   Male  47885  19.276              TRUE      Human Resources
## 411       Ernest   Male 126232   6.191              TRUE                Sales
## 412        Kevin   Male 134598  11.699             FALSE Business Development
## 413   Jacqueline Female  66604  14.609             FALSE                Legal
## 414       Johnny   Male 115194   5.861              TRUE            Marketing
## 415         Gary   Male  49101    11.9              TRUE Business Development
## 416      Shirley   <NA>  67811  12.699             FALSE              Finance
## 417         <NA>   Male 111043   5.966                           Distribution
## 418        Sarah   <NA>  37748   9.047             FALSE      Human Resources
## 419        Julia Female  36403   2.664              TRUE              Finance
## 420      Dorothy Female 140136    3.12              TRUE Business Development
## 421       Justin   Male 121508  19.334              TRUE              Finance
## 422        Kevin   Male  79906  18.021              TRUE                Sales
## 423       Victor   Male 123144  16.261              TRUE              Product
## 424      Deborah Female  46953  17.436             FALSE      Client Services
## 425      Matthew   <NA>  79443  14.637             FALSE      Human Resources
## 426        Alice Female  51395   2.378              TRUE              Finance
## 427         Todd   Male 134408    3.56              TRUE      Human Resources
## 428        Frank   Male      ?   8.037              TRUE                Sales
## 429  Christopher   Male  68028   7.869              TRUE              Finance
## 430         Rose Female 149903    5.63             FALSE      Human Resources
## 431       Andrea Female  79123  19.422             FALSE         Distribution
## 432      Charles   Male 104014   7.077             FALSE                Legal
## 433      Jessica   <NA> 121160  12.993             FALSE      Client Services
## 434        Wanda Female  65362   7.132              TRUE                Legal
## 435        Joyce Female  50701  14.227              TRUE                 <NA>
## 436        Billy   Male 144709  10.069              TRUE         Distribution
## 437      Kathryn Female  53061  11.864              TRUE              Finance
## 438       Denise Female  36697  11.196              TRUE                Sales
## 439        Jason   Male  69244    6.22              TRUE                 <NA>
## 440     Nicholas   Male 101036   2.826              TRUE      Human Resources
## 441        Aaron   Male  52119  11.343              TRUE      Client Services
## 442       Louise Female  46666   1.886              TRUE Business Development
## 443        Julie Female  73437   2.518              TRUE              Finance
## 444        Kathy Female  86318  18.492              TRUE                Sales
## 445         <NA>   Male  76409   7.008                           Distribution
## 446        Chris   Male  71642   1.496             FALSE                 <NA>
## 447       Cheryl Female  67150   15.85              TRUE            Marketing
## 448      Gregory   Male 142208  11.204              TRUE          Engineering
## 449        Wayne   Male  81183  17.066             FALSE      Client Services
## 450      Beverly Female 107163   3.665              TRUE      Human Resources
## 451       Willie   Male  55038  19.691              n.a.                Legal
## 452        Terry   <NA> 140002   19.49              TRUE            Marketing
## 453        Scott   Male 146812   1.965              TRUE            Marketing
## 454        Annie Female  40119   4.338              TRUE      Human Resources
## 455         <NA>   Male 136602  10.429                              Marketing
## 456         Ruth Female  69579  18.029              TRUE      Human Resources
## 457      Deborah   <NA> 101457   6.662             FALSE          Engineering
## 458     Patricia Female 121232  16.624             FALSE                Legal
## 459       Albert   Male 102626  15.843             FALSE              Finance
## 460      Charles   <NA> 137171   5.574              TRUE            Marketing
## 461         Tina Female  88276  14.248             FALSE                Legal
## 462       Jeremy   Male  43354  13.946             FALSE            Marketing
## 463        Craig   Male 125556   7.996             FALSE          Engineering
## 464         Jose   Male  59862   3.269             FALSE              Product
## 465     Lawrence   Male  74640  18.641             FALSE            Marketing
## 466        Helen Female  52875   4.188             FALSE                Legal
## 467       Walter   Male  58789   5.461             FALSE                Sales
## 468          Amy Female 122897   8.222              TRUE          Engineering
## 469       Janice Female 136032  10.696              TRUE            Marketing
## 470      Stephen   Male 141958   9.078              TRUE                Legal
## 471         Ryan   Male 139917  11.466             FALSE         Distribution
## 472       Sharon Female 147635  14.693             FALSE      Human Resources
## 473       Joshua   Male  68230   8.047             FALSE      Client Services
## 474     Clarence   Male 124365   8.457             FALSE Business Development
## 475     Jonathan   Male 104749  11.364             FALSE          Engineering
## 476    Stephanie Female 122121   7.937              TRUE          Engineering
## 477        Kathy Female 143541   8.461             FALSE      Human Resources
## 478       Albert   Male 137840   9.705             FALSE Business Development
## 479      Michael   Male  73354  18.357             FALSE Business Development
## 480      Richard   Male  47647  18.787              TRUE                 <NA>
## 481          Amy Female 106249  15.967             FALSE                Sales
## 482         <NA> Female  93847   1.085                   Business Development
## 483      Beverly   <NA> 104815    3.38             FALSE              Product
## 484         <NA> Female 115436   3.099                        Human Resources
## 485          Joe   Male  50645  11.119             FALSE            Marketing
## 486       Ashley Female 142415   1.985              TRUE              Finance
## 487       Howard   Male  37984   2.021             FALSE         Distribution
## 488       Cheryl Female  71751  15.918             FALSE Business Development
## 489       Robert   Male 135882  19.944             FALSE                Legal
## 490       Sharon Female  46007  19.731              TRUE      Client Services
## 491       Judith Female 117055   7.461             FALSE            Marketing
## 492     Nicholas   <NA>  58478   6.525              TRUE                 <NA>
## 493        Jerry   Male 121357  18.845             FALSE Business Development
## 494        Craig   Male  44857  13.266             FALSE              Finance
## 495      Barbara Female  47322    7.25              TRUE                Sales
## 496       Eugene   Male  81077   2.117             FALSE                Sales
## 497       Johnny   Male  76394   5.437              TRUE            Marketing
## 498     Benjamin   Male 114356   7.123             FALSE      Client Services
## 499        Diana Female  41831   4.548             FALSE Business Development
## 500      Barbara   <NA>  90187  14.764              TRUE         Distribution
## 501    Christine Female  72613  11.126             FALSE                Legal
## 502         Sean   Male  42748   9.765             FALSE         Distribution
## 503        Sarah Female  37259   1.763             FALSE      Client Services
## 504        Roger   Male 125033   4.887              TRUE            Marketing
## 505         <NA> Female  38275  10.494                           Distribution
## 506         <NA>   Male  71520  13.248                            Engineering
## 507       Donald   Male 106472   6.499              TRUE      Client Services
## 508     Clarence   Male 103684  18.654              TRUE          Engineering
## 509        Scott   Male  96111  15.592             FALSE      Human Resources
## 510          Ann Female  90719    6.22             FALSE Business Development
## 511        Frank   Male 140303   17.07             FALSE                Legal
## 512          Joe   Male  62161    8.13              TRUE Business Development
## 513        Wanda Female  78883  19.695             FALSE                 <NA>
## 514        Jimmy   Male  63549  19.624             FALSE                 <NA>
## 515          Amy   <NA>  63888  18.115              TRUE              Product
## 516        Larry   Male  97370  10.458             FALSE                Sales
## 517       Gloria Female  66224  15.979              TRUE                Legal
## 518         Ruth Female  97915   3.997              TRUE      Human Resources
## 519        Diana Female  86883  18.003              TRUE              Product
## 520      Raymond   Male  37812   3.178             FALSE      Human Resources
## 521        Peter   Male  56580   8.411              TRUE                 <NA>
## 522         <NA>   <NA>  83895  17.971                            Engineering
## 523    Catherine Female  58047  14.858              TRUE         Distribution
## 524        Diane Female 124889  15.026              TRUE                Sales
## 525        Peter   <NA> 118840  14.509              TRUE      Client Services
## 526        Steve   Male  67780    9.54              TRUE      Human Resources
## 527      Barbara Female 144677   8.696             FALSE              Finance
## 528        Helen   <NA>  45724   1.022             FALSE              Product
## 529        Jimmy   <NA>  86676   7.175              TRUE              Product
## 530  Christopher   Male  82401   7.252             FALSE      Client Services
## 531     Kathleen Female  35575  14.595             FALSE         Distribution
## 532     Virginia Female 123649  10.154              TRUE            Marketing
## 533         Lisa Female  38078   10.28              TRUE              Product
## 534         Earl   Male  52620  13.773             FALSE              Product
## 535       Gerald   <NA> 133366  12.292             FALSE                Legal
## 536       Louise Female  91462   8.205             FALSE            Marketing
## 537     Clarence   Male 146589   4.905              TRUE Business Development
## 538      Cynthia Female  51633  13.472              TRUE Business Development
## 539         Adam   Male  45181   3.491             FALSE      Human Resources
## 540       Justin   Male  62454   3.459              TRUE                Sales
## 541         <NA> Female  84746    4.82                                Finance
## 542         Ruby Female 147362   7.851              TRUE          Engineering
## 543       Amanda Female  80803  14.077              TRUE         Distribution
## 544         Anna Female 117293   2.366             FALSE      Client Services
## 545        Roger   Male 105689   13.32              TRUE            Marketing
## 546        Julie   <NA>  93302   9.048              TRUE              Product
## 547       Joseph   Male 102555   3.672              TRUE              Product
## 548       Evelyn Female  51525  10.366             FALSE              Finance
## 549       Janice Female  41190   3.311              TRUE                Sales
## 550         Sara Female  75484   3.186             FALSE              Finance
## 551      Gregory   Male  82726  16.304              TRUE            Marketing
## 552       Philip   Male 122319  19.122             FALSE          Engineering
## 553      Barbara Female 127297  11.905              TRUE              Product
## 554       Amanda Female 109290  13.853             FALSE          Engineering
## 555      Phyllis   <NA>  99150   6.007             FALSE      Client Services
## 556         Anne Female  71930  18.451              TRUE              Product
## 557       Arthur   Male  66819   6.639              TRUE            Marketing
## 558         Jane Female  42424  18.115             FALSE         Distribution
## 559        Linda Female  51431  13.295             FALSE          Engineering
## 560       Ashley Female 120675    6.51             FALSE      Human Resources
## 561        Shawn   Male  96610   2.097              TRUE      Client Services
## 562     Kathleen Female  71430   8.572             FALSE          Engineering
## 563         Sara   <NA>  87713  18.863              TRUE                Legal
## 564        Randy   Male  86723  14.842             FALSE          Engineering
## 565       Andrew   Male  43414   7.563              TRUE      Client Services
## 566      Marilyn Female  87145   17.33             FALSE      Human Resources
## 567       Johnny   Male  91124  12.986              TRUE         Distribution
## 568         <NA> Female  48141  12.605                                   <NA>
## 569        Susan Female  90829  19.142             FALSE            Marketing
## 570         Ruby Female 101262   6.773             FALSE      Client Services
## 571        Kelly Female  41427   1.431             FALSE              Product
## 572     Jonathan   Male  56993  18.623             FALSE            Marketing
## 573      Raymond   Male 114244   16.69             FALSE      Human Resources
## 574     Kimberly Female  81800   5.435              TRUE                 <NA>
## 575         <NA> Female 118736   7.421                        Client Services
## 576         John   Male  66077   5.809              TRUE              Product
## 577      Michael   Male  35013  14.879             FALSE              Product
## 578        Jerry   Male  98393  11.393             FALSE      Client Services
## 579       Amanda Female 107111   1.438              TRUE              Product
## 580       Harold   Male  65673   1.187              TRUE                Legal
## 581        Harry   Male  65482  18.089             FALSE                 <NA>
## 582       Ernest   Male  81919  15.118             FALSE            Marketing
## 583        Harry   Male  59277    10.2             FALSE              Finance
## 584        Diane Female  49501  13.506             FALSE Business Development
## 585         Jack   Male  70367  18.266              TRUE                Legal
## 586      Shirley Female 132156   2.754             FALSE            Marketing
## 587         Rose Female  56961   7.585             FALSE            Marketing
## 588       Cheryl Female  98841   8.945              TRUE      Client Services
## 589      Cynthia Female  78226   2.419             FALSE      Human Resources
## 590       Sandra Female 116931   9.657              TRUE      Human Resources
## 591       Andrea Female  87575  13.346              TRUE Business Development
## 592       Rachel Female 110924   7.808             FALSE         Distribution
## 593       Justin   Male  78351  15.221             FALSE                Sales
## 594        Marie Female 125574   4.644             FALSE                Sales
## 595        Louis   Male  95198   2.075             FALSE Business Development
## 596        Nancy Female 121006   3.512              TRUE              Finance
## 597         Mark   Male  95728   6.752              TRUE Business Development
## 598       Teresa Female  69740   8.294             FALSE         Distribution
## 599      Frances Female  91996  11.506             FALSE              Product
## 600         <NA> Female  98385  10.925                        Human Resources
## 601      Barbara   <NA>  90556  15.749              TRUE         Distribution
## 602    Christine   <NA>  50366   9.862              TRUE            Marketing
## 603        Bobby   <NA>  84232  15.704              TRUE          Engineering
## 604         Carl   <NA> 100888   12.49              TRUE Business Development
## 605        Bruce   Male 141335  15.427              TRUE          Engineering
## 606         Rose Female  97691   2.142             FALSE      Client Services
## 607      Mildred Female  47266  10.256             FALSE      Client Services
## 608         <NA>   Male 139754   12.74                                  Sales
## 609         <NA> Female 116236  17.274                            Engineering
## 610         Todd   Male 103405   15.91             FALSE                Sales
## 611         Alan   Male  41453  10.084             FALSE              Product
## 612    Elizabeth   <NA>  52730  12.525             FALSE            Marketing
## 613        Julie   <NA>  60361   7.099              TRUE         Distribution
## 614       Teresa   <NA>  63103  11.378             FALSE              Finance
## 615         Eric   Male  65168  11.513             FALSE         Distribution
## 616       Edward   Male  73105   6.083              TRUE          Engineering
## 617     Kimberly Female  37916  12.929              TRUE         Distribution
## 618        Kathy Female  45682   1.451              TRUE            Marketing
## 619        Peter   Male  69297   1.268             FALSE              Finance
## 620      Beverly Female  59070  19.064              TRUE          Engineering
## 621        Ralph   Male  81215     8.4             FALSE              Finance
## 622       Justin   Male 128036  18.824             FALSE      Client Services
## 623        Irene Female  89780   8.999              TRUE                Sales
## 624        Irene Female 125018   9.601              TRUE                Legal
## 625       Jeremy   Male 133033    12.2             FALSE                Sales
## 626      Cynthia Female  82408   8.701              TRUE            Marketing
## 627         <NA> Female 131755    2.93                                   <NA>
## 628         Anne Female 128305  16.636             FALSE            Marketing
## 629         <NA>   <NA> 147309   1.702                           Distribution
## 630        Debra Female  74911  19.513             FALSE            Marketing
## 631      Michael   Male  47079   2.617             FALSE                Legal
## 632        Jason   <NA>  82873   1.869             FALSE      Client Services
## 633      Rebecca Female 134673   6.878             FALSE          Engineering
## 634       Andrea Female 123591     6.5              TRUE          Engineering
## 635         Carl   Male  75598  19.289             FALSE                 <NA>
## 636        Randy   Male  89831  13.047              TRUE                 <NA>
## 637      Marilyn Female  92430   2.924             FALSE          Engineering
## 638        Wayne   Male 126956  18.396             FALSE      Human Resources
## 639      Frances Female 112467   1.433             FALSE Business Development
## 640       Amanda   <NA>  46665  19.391              TRUE      Client Services
## 641     Kathleen Female  42553   3.756              TRUE         Distribution
## 642       Amanda Female 102081   5.203             FALSE                Sales
## 643       Amanda   <NA> 135118  15.108             FALSE              Finance
## 644        Shawn   Male  71975    9.77             FALSE      Human Resources
## 645    Katherine Female 149908  18.912             FALSE              Finance
## 646         Anna   <NA>  45418  10.162             FALSE            Marketing
## 647    Elizabeth Female  79145   19.78             FALSE              Finance
## 648       Donald   Male 122920    5.32             FALSE                 <NA>
## 649      Gregory   Male 128031  15.857              TRUE Business Development
## 650         Ruth Female  59969  14.064              TRUE              Finance
## 651      Cynthia Female  35381  11.749             FALSE              Finance
## 652      Deborah Female 113129  17.371             FALSE      Client Services
## 653       Willie   Male 141932   1.017              TRUE          Engineering
## 654        Debra Female  42296  16.922             FALSE              Product
## 655         Carl   Male  63395  11.411             FALSE                Legal
## 656       Walter   Male 125382  13.613              TRUE      Client Services
## 657         Lisa Female 113592  17.108              TRUE            Marketing
## 658      Rebecca Female  46750  11.367              TRUE                Sales
## 659      Jessica Female  90285  13.591              TRUE      Client Services
## 660        Steve   <NA>  53692   4.785              TRUE            Marketing
## 661         Mary   <NA> 100341   6.662             FALSE         Distribution
## 662        Craig   Male 123876   4.225             FALSE          Engineering
## 663    Katherine Female  41643   4.659              TRUE         Distribution
## 664       Andrea Female 113760  12.866              TRUE         Distribution
## 665      Timothy   Male  49473  12.463             FALSE                Legal
## 666      Anthony   Male 146141   3.645              TRUE         Distribution
## 667       Harold   <NA>  79459   7.776              TRUE Business Development
## 668      Douglas   <NA> 104496  14.771              TRUE            Marketing
## 669        James   Male  67789  17.105              TRUE                Legal
## 670       Joseph   <NA>  86564  11.879              TRUE                 <NA>
## 671       Martha Female 135758  14.782              TRUE      Client Services
## 672        Laura Female  84672    3.96             FALSE         Distribution
## 673       Robert   Male 111580  10.982             FALSE      Human Resources
## 674        Ralph   Male  50455  16.248             FALSE Business Development
## 675         <NA>   Male  88733   1.932                        Human Resources
## 676        Diane Female 130577  12.791             FALSE            Marketing
## 677        Annie Female 138925   9.801              TRUE            Marketing
## 678   Jacqueline Female  62371   15.77              TRUE      Client Services
## 679       Brenda Female  87715   2.675             FALSE              Product
## 680         Lori Female  66029   3.345              TRUE      Client Services
## 681       Albert   Male  86818  14.301              TRUE          Engineering
## 682        Frank   Male  75147  17.398             FALSE              Product
## 683      Deborah Female 105573   5.268              TRUE              Product
## 684        Robin   <NA>  41230   6.611              TRUE         Distribution
## 685        Alice Female 117787  10.485             FALSE                 <NA>
## 686         <NA>   Male  88086  19.387                                Finance
## 687         Paul   <NA>  91462  18.704             FALSE              Finance
## 688         <NA>   Male 139959   8.992                           Distribution
## 689        Brian   Male  93901  17.821              TRUE                Legal
## 690        Julie   <NA>  50529  17.263             FALSE              Finance
## 691        Bruce   Male 134988  13.926              TRUE                Sales
## 692         Joan Female 120941   3.694              TRUE              Finance
## 693       Harold   Male 118753   8.162             FALSE                Sales
## 694        Alice Female  92799   2.782             FALSE                Sales
## 695      Barbara Female  85718  13.326             FALSE      Client Services
## 696        Sarah Female 109517   5.146             FALSE      Human Resources
## 697       Samuel   Male  85550  11.593              TRUE            Marketing
## 698         <NA>   Male 136655   9.801                           Distribution
## 699      Barbara   <NA>  43312   8.218              TRUE                Legal
## 700          Amy   <NA> 102839  10.385              TRUE         Distribution
## 701        Frank   Male  78891   7.927              TRUE         Distribution
## 702        Robin   <NA>  93201  11.712              TRUE                Legal
## 703        Debra Female  84693   6.976              TRUE              Finance
## 704      Brandon   <NA>  73587   9.769              TRUE      Human Resources
## 705       Thomas   Male  65251  11.211             FALSE         Distribution
## 706        Harry   Male 129148  15.193              TRUE          Engineering
## 707         Todd   Male 128175  18.473              TRUE                 <NA>
## 708     Patricia Female  75825   7.839             FALSE          Engineering
## 709        Steve   Male  51821   1.197              TRUE                Legal
## 710         <NA>   Male  60411  12.048                        Human Resources
## 711       Edward   Male  58327  17.095              TRUE              Finance
## 712        Karen Female  46478  16.552             FALSE          Engineering
## 713       Martin   <NA> 123963  15.745              TRUE          Engineering
## 714          Ann Female  79796   9.851             FALSE         Distribution
## 715     Jonathan   Male  83809  12.922             FALSE         Distribution
## 716        Peter   Male  77933  13.132              TRUE          Engineering
## 717         Eric   Male  51070  13.806              TRUE Business Development
## 718        Jason   <NA>  97480  11.518             FALSE      Human Resources
## 719        Terry   Male  58357   7.969             FALSE          Engineering
## 720         John   Male  67165  13.001             FALSE          Engineering
## 721        Marie Female 145988  18.685              TRUE      Human Resources
## 722       Harold   Male 147417  11.626              TRUE Business Development
## 723       Joshua   Male  95003   5.197              TRUE            Marketing
## 724      Brandon   <NA> 144187  11.416              TRUE                Legal
## 725       Andrea Female  37888   13.47             FALSE          Engineering
## 726       Jeremy   Male 131513   1.876              TRUE              Finance
## 727       Daniel   Male  77287      13              TRUE                 <NA>
## 728          Roy   Male  46875  12.942              TRUE                Sales
## 729      Dorothy Female  82744  19.111              TRUE      Client Services
## 730       Steven   Male  43252  18.892             FALSE      Client Services
## 731       Nicole Female  66047  18.674              TRUE            Marketing
## 732          Ann Female 118431  12.772              TRUE              Finance
## 733        Henry   Male  59943   1.432             FALSE              Finance
## 734         <NA>   Male 136681   3.655                   Business Development
## 735         <NA> Female 138807     1.4                        Human Resources
## 736      Barbara Female  82884   6.837              TRUE Business Development
## 737         Judy Female  48668  11.716              TRUE              Product
## 738     Lawrence   Male 122971  14.618             FALSE              Product
## 739       Joseph   Male 107050  12.737             FALSE Business Development
## 740       Carlos   Male 138598  14.737             FALSE                Sales
## 741      Russell   <NA> 149456   3.533             FALSE            Marketing
## 742         Jane Female 128540  15.941             FALSE                Legal
## 743       Martin   Male  61117   2.844             FALSE      Client Services
## 744       Steven   Male 100949  13.813              TRUE      Human Resources
## 745         Judy   <NA>  46829    5.19              TRUE              Finance
## 746        James   Male  69111  14.625              TRUE Business Development
## 747       Gloria Female  46602   1.027              TRUE Business Development
## 748       Ernest   Male  61181  16.559             FALSE Business Development
## 749       Martha Female  94963  19.626              TRUE Business Development
## 750        Janet   <NA>  85789   9.712             FALSE                Legal
## 751        Louis   <NA> 145274  16.379             FALSE              Product
## 752         Tina Female 102841   3.369             FALSE      Client Services
## 753        Henry   Male  49665  18.338             FALSE Business Development
## 754      Antonio   Male  41928   5.478              TRUE                 <NA>
## 755         Lisa Female 128042    2.03              TRUE                Legal
## 756       Denise Female  86150   3.997             FALSE      Client Services
## 757      Stephen   Male 121816  10.615              TRUE         Distribution
## 758     Benjamin   Male 123409   7.783             FALSE              Product
## 759         Carl   <NA>  98295   7.617              TRUE                Legal
## 760         Ruth Female  59678  10.895             FALSE         Distribution
## 761        Shawn   Male  57871    4.02              TRUE      Human Resources
## 762     Jennifer Female 132084  10.006              TRUE          Engineering
## 763        Terry   Male  35633   3.947              TRUE         Distribution
## 764   Jacqueline Female 125298   3.019              TRUE                Legal
## 765        Roger   Male 115582  15.343              TRUE                Sales
## 766        Alice   <NA> 148339  11.479              TRUE              Finance
## 767       Justin   Male 112975   9.699             FALSE      Human Resources
## 768        Janet Female  36927  18.769             FALSE      Client Services
## 769        Robin Female  70248    9.66              TRUE                Sales
## 770       Samuel   <NA> 141305   9.849              TRUE            Marketing
## 771    Katherine Female  72002  13.178              TRUE              Product
## 772        Peter   Male 102577  12.026              TRUE              Product
## 773      Lillian Female 113554  18.018              TRUE Business Development
## 774         <NA>   Male  47176  10.736                                Finance
## 775         <NA> Female 106428  10.867                                   <NA>
## 776         Rose Female  75181    6.06              TRUE              Finance
## 777        Bobby   Male  79047  18.784             FALSE      Human Resources
## 778     Kimberly Female  52970   4.513             FALSE                Sales
## 779      Antonio   Male 137979   5.266             FALSE         Distribution
## 780        Doris Female 114360  17.799              TRUE Business Development
## 781       Steven   Male 110306  16.843              TRUE      Human Resources
## 782     Lawrence   Male  46378   9.127             FALSE                 <NA>
## 783         <NA>   Male  74104   17.68                        Client Services
## 784         <NA>   <NA> 132505  13.592                                Product
## 785    Stephanie Female  96649   3.453             FALSE                Sales
## 786         <NA> Female  57811   8.941                              Marketing
## 787       Sandra Female 111468    2.58              TRUE              Product
## 788        Kevin   Male 141498   4.135              TRUE                Sales
## 789     Michelle   <NA> 124441  16.353             FALSE Business Development
## 790         Lisa Female  40121   6.293             FALSE              Product
## 791         Fred   Male 129712  11.058             FALSE              Finance
## 792       Joseph   Male 126010  19.601             FALSE                Legal
## 793         Anne   <NA> 122762   9.564             FALSE         Distribution
## 794       Andrea Female 149105  13.707              TRUE         Distribution
## 795       Nicole Female  44021  10.286             FALSE                 <NA>
## 796      Theresa   <NA>  42025   3.319              TRUE      Human Resources
## 797    Catherine Female  59970  12.801             FALSE      Client Services
## 798         Rose Female 145001   19.85             FALSE          Engineering
## 799     Lawrence   Male 102589  17.952              TRUE            Marketing
## 800      Raymond   Male  47529   2.712              TRUE              Product
## 801     Clarence   Male 148941  11.517             FALSE              Product
## 802         Ryan   Male  91109  17.499              TRUE      Human Resources
## 803         Jane Female  51923  13.623             FALSE Business Development
## 804         Lois Female  53954  19.075             FALSE              Product
## 805        Shawn   Male  39335  10.664             FALSE         Distribution
## 806       Thomas   Male 111371  15.081              TRUE          Engineering
## 807      Kathryn Female  86676   6.081             FALSE                Sales
## 808         Mary Female 115057   2.089             FALSE              Finance
## 809        Julie Female 145357   3.459             FALSE          Engineering
## 810       George   Male  36749  19.754             FALSE              Finance
## 811        Ralph   Male  89854   7.227             FALSE Business Development
## 812       Judith Female 134048   6.818              TRUE            Marketing
## 813         <NA>   Male 141311   5.478                                Product
## 814       Evelyn Female 123621  19.767              TRUE            Marketing
## 815       Rachel Female  54941   3.221              TRUE         Distribution
## 816        Maria   <NA> 106562       4             FALSE      Human Resources
## 817        Kelly Female  39371   4.068             FALSE          Engineering
## 818         Ruby Female  83112   4.083             FALSE            Marketing
## 819          Ann Female  96941  10.048              TRUE         Distribution
## 820      Kenneth   Male  47232  17.862              TRUE                Legal
## 821      William   Male  54058   5.182              TRUE      Human Resources
## 822         <NA> Female  80399  12.254                           Distribution
## 823      Deborah Female 118043   7.266              TRUE Business Development
## 824         <NA>   Male 109411   9.494                            Engineering
## 825        Julia Female  97566   2.147             FALSE            Marketing
## 826       Robert   <NA>  69267    5.89              TRUE                Sales
## 827         <NA>   <NA>  87103   5.665                                   <NA>
## 828        Jesse   <NA>  98811   7.487             FALSE                Legal
## 829      Cynthia Female 149684   7.864             FALSE              Product
## 830       Johnny   Male  71383   2.097              TRUE      Human Resources
## 831      Michael   Male  81206  19.908              TRUE         Distribution
## 832      Kenneth   Male  69112   7.588              TRUE              Finance
## 833        Keith   Male 120672  19.467             FALSE                Legal
## 834       Gerald   Male  96511   9.331             FALSE      Human Resources
## 835         Carl   <NA>  49325   2.071              TRUE Business Development
## 836      Douglas   Male 132175    2.28             FALSE          Engineering
## 837      Carolyn Female 118037  13.492             FALSE              Finance
## 838       Joseph   Male 139570  15.804              TRUE              Finance
## 839        Billy   Male 115280   9.153             FALSE         Distribution
## 840        Joyce Female  51065  16.807             FALSE      Human Resources
## 841      Lillian Female 103854   4.924              TRUE         Distribution
## 842         Ruby Female  48354  19.501             FALSE Business Development
## 843       Sandra Female 132327  19.264             FALSE              Product
## 844       Louise Female 106362   8.965             FALSE                Sales
## 845        Maria   <NA> 148857   8.738             FALSE                Legal
## 846       Brenda Female 131131  11.682             FALSE      Client Services
## 847      Stephen   Male 129663  15.574             FALSE      Human Resources
## 848       Nicole   <NA>  41449   4.707             FALSE              Finance
## 849        Bobby   Male  93368    2.88              TRUE              Product
## 850       Ernest   Male  53335   9.192             FALSE              Product
## 851      Charles   Male 148291   6.002             FALSE                 <NA>
## 852        Bobby   Male 147842  16.158              TRUE                 <NA>
## 853        Diana Female 105066  17.343              TRUE Business Development
## 854      Mildred Female 139284   11.39              TRUE                 <NA>
## 855        Harry   Male  63046  10.411             FALSE              Finance
## 856      Phillip   <NA>  89700   2.277              TRUE                 <NA>
## 857       Bonnie   <NA> 108946  12.211             FALSE              Finance
## 858        Marie Female  98406   7.943              TRUE              Product
## 859         Sean   Male 108581   9.634             FALSE      Human Resources
## 860        Robin Female  41808  19.239             FALSE Business Development
## 861      Phillip   Male  36837   14.66             FALSE            Marketing
## 862        Jerry   Male 140850  18.855             FALSE              Finance
## 863       Ronald   Male  50426  18.536              TRUE                Sales
## 864      Phillip   Male 134120   6.842             FALSE      Human Resources
## 865         Ryan   Male  57292    6.01             FALSE                 <NA>
## 866        Karen Female  80633  16.306             FALSE                Legal
## 867       Bonnie Female 131943  14.249             FALSE      Client Services
## 868         Earl   Male  48046   7.737             FALSE            Marketing
## 869    Katherine Female  97443  13.657             FALSE              Product
## 870      Matthew   Male 135352   7.986              TRUE Business Development
## 871      Cynthia   <NA> 107816  18.751             FALSE            Marketing
## 872       Gerald   Male  96329   2.469              TRUE          Engineering
## 873       Brenda Female  73749  19.332             FALSE Business Development
## 874        Jason   Male  75607   4.299              TRUE                Sales
## 875      Melissa Female  98858   3.525              TRUE      Human Resources
## 876      Beverly Female  76485   9.212              TRUE          Engineering
## 877        Terry   <NA>  41238   8.219             FALSE            Marketing
## 878         <NA>   Male 114896  13.823                        Client Services
## 879   Jacqueline Female 125418   8.064             FALSE         Distribution
## 880          Amy Female  75415  19.132             FALSE      Client Services
## 881       Robert   <NA>  90998   8.382             FALSE              Finance
## 882         Ruby Female 142868   6.318             FALSE            Marketing
## 883         Sara   <NA> 135990  14.344              TRUE         Distribution
## 884      Kathryn Female  57300  18.015             FALSE            Marketing
## 885         Lisa Female  73706   18.53             FALSE              Finance
## 886         Juan   Male  85871    9.16             FALSE              Product
## 887      Anthony   Male  96795  14.837             FALSE                Sales
## 888        David   Male  92242  15.407             FALSE                Legal
## 889      Marilyn Female 115149  11.934              TRUE                Legal
## 890     Margaret Female 126924   1.552              TRUE              Finance
## 891         <NA>   Male 145329     7.1                                Finance
## 892      Timothy   Male  92587   8.475             FALSE              Finance
## 893      Brandon   Male  60263   2.709             FALSE                Sales
## 894        Frank   Male  91406   5.681              TRUE Business Development
## 895        Betty Female  37005   7.645              TRUE            Marketing
## 896       Janice   <NA> 139791  16.968             FALSE Business Development
## 897       Victor   Male  45267   3.942              TRUE                Sales
## 898      Kenneth   <NA>  95296  10.146             FALSE              Finance
## 899         Jane Female  59680  15.211              TRUE Business Development
## 900       Walter   Male 144701  16.323              TRUE            Marketing
## 901    Christina Female  35477  18.178             FALSE      Human Resources
## 902     Patricia Female 119266   6.911             FALSE         Distribution
## 903         <NA>   Male 103877   6.322                           Distribution
## 904      Heather Female  47605  14.955              TRUE      Human Resources
## 905    Stephanie Female 136604    6.16              TRUE      Human Resources
## 906        Alice Female 121250   4.363              TRUE              Finance
## 907      Frances Female  35884  17.667             FALSE                Sales
## 908    Elizabeth Female 137144  10.081             FALSE              Finance
## 909       Janice Female 102697   3.283             FALSE          Engineering
## 910       Donald   Male  61999   6.466             FALSE              Product
## 911      Melissa Female  45223   8.879              TRUE                Legal
## 912         Carl   Male  54033  15.528              TRUE Business Development
## 913          Joe   Male 126120    1.02             FALSE                 <NA>
## 914         Lois Female  99747   6.168             FALSE            Marketing
## 915          Ann Female  71958   5.272              TRUE         Distribution
## 916         Todd   Male 115566   6.716              TRUE      Client Services
## 917      Marilyn Female 118369   7.696              TRUE Business Development
## 918        Shawn   Male  51667   6.339             FALSE              Product
## 919         Ryan   Male  85858  19.475             FALSE      Client Services
## 920         Sean   Male 131423   8.957             FALSE         Distribution
## 921         Rose Female  49538   9.828             FALSE      Client Services
## 922       George   Male  50369    18.9              TRUE                Sales
## 923       Arthur   Male  86615   3.412              TRUE Business Development
## 924        Irene   <NA> 135369    4.38             FALSE Business Development
## 925      Deborah Female  60003   9.624             FALSE      Client Services
## 926         <NA> Female  95866  19.388                                  Sales
## 927       Judith Female 109324  19.488             FALSE         Distribution
## 928       Philip   Male 103557  16.014              TRUE Business Development
## 929      Jeffrey   Male 111376   2.673              TRUE Business Development
## 930      Theresa Female  75661   1.079              TRUE                Legal
## 931        Nancy Female  85213   2.386              TRUE            Marketing
## 932       Harold   Male 140444   3.771             FALSE         Distribution
## 933       Bonnie Female  90427    2.01              TRUE      Client Services
## 934        Doris Female 141439   3.799             FALSE Business Development
## 935       Samuel   Male  43694   3.787              TRUE          Engineering
## 936        Alice Female 131952   12.09             FALSE          Engineering
## 937        Maria Female  96250  10.056             FALSE Business Development
## 938        Aaron   <NA>  63126  18.424             FALSE      Client Services
## 939         Mark   <NA>  44836   2.657             FALSE      Client Services
## 940        Ralph   <NA>  70635   2.147             FALSE      Client Services
## 941       Andrew   Male 137386   8.611              TRUE         Distribution
## 942      William   Male 104840  15.653              TRUE          Engineering
## 943         Lori Female  75498   6.537              TRUE            Marketing
## 944        Wayne   Male  67471   2.728             FALSE          Engineering
## 945      Kenneth   Male 101914   1.905              TRUE         Distribution
## 946       Gerald   <NA>  93712  17.426              TRUE         Distribution
## 947         <NA> Female 133472  16.941                           Distribution
## 948         <NA>   Male 107351   5.329                              Marketing
## 949       Ashley Female 142410  11.048              TRUE          Engineering
## 950        Scott   Male  37385   8.226              TRUE Business Development
## 951        Paula Female  58423  10.833             FALSE Business Development
## 952         <NA> Female 143638   9.662                                   <NA>
## 953       Teresa Female 113425  11.907              TRUE      Human Resources
## 954        Randy   Male  57266  14.077             FALSE              Product
## 955          Joe   Male 119667   1.148              TRUE              Finance
## 956        Sarah Female 127118  11.176             FALSE                Legal
## 957      Beverly Female  80838   8.115             FALSE          Engineering
## 958      Jeffrey   Male  70990  15.901              TRUE                Sales
## 959       Gloria Female  39833   9.631             FALSE          Engineering
## 960       Albert   Male  45094    5.85              TRUE Business Development
## 961      Stephen   Male  93997  18.093              TRUE Business Development
## 962      Antonio   <NA> 103050    3.05             FALSE                Legal
## 963     Jonathan   Male 121797  16.923             FALSE              Product
## 964          Ann Female  89443   17.94              TRUE                Sales
## 965        Bruce   Male  35802  12.391              TRUE                Sales
## 966    Catherine Female  68164  18.393             FALSE      Client Services
## 967        Louis   Male  93022   9.146              TRUE      Human Resources
## 968       Thomas   Male 105681  19.572             FALSE          Engineering
## 969       Louise Female  43050  11.671             FALSE         Distribution
## 970        Linda Female  44486  17.308              TRUE          Engineering
## 971        Alice Female  63571  15.397              TRUE              Product
## 972      Patrick   Male  75423   5.368              TRUE Business Development
## 973       Victor   <NA>  76381  11.159              TRUE                Sales
## 974      Russell   Male 137359  11.105             FALSE Business Development
## 975        Harry   Male  67656  16.455              TRUE      Client Services
## 976        Susan Female  92436  12.467             FALSE                Sales
## 977       Denise Female 137954   4.195              TRUE                Legal
## 978        Sarah Female 124566   5.949             FALSE              Product
## 979         Sean   Male  66146  11.178             FALSE      Human Resources
## 980       Ernest   Male 142935  13.198              TRUE              Product
## 981     Kimberly Female  46233   8.862              TRUE          Engineering
## 982        James   Male 148985   19.28             FALSE                Legal
## 983         Rose Female  91411   8.639              TRUE      Human Resources
## 984         John   Male 146907  11.738             FALSE          Engineering
## 985        Maria Female  43455   13.04             FALSE          Engineering
## 986      Stephen   <NA>  85668   1.909             FALSE                Legal
## 987        Donna Female  82871  17.999             FALSE            Marketing
## 988       Gloria Female 136709  10.331              TRUE              Finance
## 989        Alice Female  47638  11.209             FALSE      Human Resources
## 990       Justin   <NA>  38344   3.794             FALSE                Legal
## 991        Robin Female 100765  10.982              TRUE      Client Services
## 992         Rose Female 134505  11.051              TRUE            Marketing
## 993      Anthony   Male 112769  11.625              TRUE              Finance
## 994         Tina Female  56450   19.04              TRUE          Engineering
## 995       George   Male  98874   4.479              TRUE            Marketing
## 996        Henry   <NA> 132483  16.655             FALSE         Distribution
## 997      Phillip   Male  42392  19.675             FALSE              Finance
## 998      Russell   Male  96914   1.421             FALSE              Product
## 999        Larry   Male  60500  11.985             FALSE Business Development
## 1000      Albert   Male 129949  10.169              TRUE                Sales
# Use `replace_with_na_if()` to replace with NA the character values using `is.character`
replace_with_na_if(employees,
                   .predicate = is.character, 
                   ~.x %in% c("", " ", "na", 'NaN', '?'))
##       First.Name Gender Salary Bonus.. Senior.Management                 Team
## 1        Douglas   Male  97308   6.945              TRUE            Marketing
## 2         Thomas   Male  61933    <NA>              TRUE                 <NA>
## 3          Maria Female 130590  11.858             FALSE              Finance
## 4          Jerry   Male   <NA>    9.34              TRUE              Finance
## 5          Larry   Male 101004   1.389              TRUE      Client Services
## 6         Dennis   n.a. 115163  10.125             FALSE                Legal
## 7           Ruby Female  65476  10.012              TRUE              Product
## 8           <NA> Female  45906  11.598              <NA>              Finance
## 9         Angela   <NA>   <NA>  18.523              TRUE          Engineering
## 10       Frances Female 139852   7.524              TRUE Business Development
## 11        Louise Female  63241  15.132              TRUE                 <NA>
## 12         Julie Female 102508  12.637              TRUE                Legal
## 13       Brandon   Male 112807  17.492              TRUE      Human Resources
## 14          Gary   Male 109831   5.831             FALSE                Sales
## 15      Kimberly Female  41426    <NA>              TRUE              Finance
## 16       Lillian   <NA>  59414   1.256             FALSE              Product
## 17        Jeremy   Male  90370   7.369             FALSE      Human Resources
## 18         Shawn   Male 111737   6.414             FALSE                 <NA>
## 19         Diana Female 132940  19.082             FALSE      Client Services
## 20         Donna Female  81014   1.894             FALSE              Product
## 21          Lois   <NA>  64714   4.934              TRUE                Legal
## 22       Matthew   Male 100612  13.645             FALSE            Marketing
## 23        Joshua   <NA>  90816  18.816              TRUE      Client Services
## 24          <NA>   Male 125792   5.042              <NA>                 <NA>
## 25          John   Male  97950  13.873             FALSE      Client Services
## 26          <NA>   Male  37076  18.576              <NA>      Client Services
## 27         Craig   Male  37598   7.757              TRUE            Marketing
## 28         Scott   <NA> 122367   5.218             FALSE                Legal
## 29         Terry   Male 124008  13.464              TRUE      Client Services
## 30      Benjamin   Male  79529   7.008              TRUE                Legal
## 31     Christina Female 118780   9.096              TRUE          Engineering
## 32         Joyce   <NA>  88657  12.752             FALSE              Product
## 33          <NA>   Male 122340   6.417              <NA>                 <NA>
## 34          Jean Female 119082   16.18             FALSE Business Development
## 35         Jerry   Male  95734  19.096             FALSE      Client Services
## 36       Theresa Female  85182  16.675             FALSE                Sales
## 37        Rachel Female 142032  12.599             FALSE Business Development
## 38         Linda Female  57427   9.557              TRUE      Client Services
## 39     Stephanie Female  36844   5.574              TRUE Business Development
## 40          <NA>   Male 122173   7.797              <NA>      Client Services
## 41       Michael   Male  99283   2.665              TRUE         Distribution
## 42     Christine   <NA>  66582  11.308              TRUE Business Development
## 43       Beverly Female 121918  15.835             FALSE                Legal
## 44       Marilyn Female  73524   5.207              TRUE            Marketing
## 45       Cynthia Female 145146   7.482              TRUE              Product
## 46         Roger   Male  88010  13.886              TRUE                Sales
## 47         Bruce   Male 114796   6.796             FALSE              Finance
## 48         Kathy Female  66820    <NA>              TRUE      Client Services
## 49      Clarence   Male  93581   6.083              TRUE Business Development
## 50         Chris   <NA> 113590   3.055             FALSE                Sales
## 51         Nancy Female  94976   13.83              TRUE          Engineering
## 52          <NA>   <NA>  41126  14.009              <NA>                Sales
## 53          Todd   Male  49339   1.695              TRUE                 <NA>
## 54          Alan   <NA>  40341  17.578              TRUE              Finance
## 55          Sara Female  83677   8.999             FALSE          Engineering
## 56         Karen Female 102488  17.653              TRUE              Product
## 57          Carl   Male 130276  16.084              TRUE              Finance
## 58          <NA>   Male  64715  15.107              TRUE      Human Resources
## 59       Theresa Female  72670   1.481              TRUE          Engineering
## 60         Irene Female  66851  11.279             FALSE          Engineering
## 61         Paula   <NA>  48866   4.271             FALSE         Distribution
## 62        Denise Female 106862   3.699             FALSE Business Development
## 63          <NA> Female  58112  19.414              <NA>            Marketing
## 64       Matthew   Male  35203   18.04             FALSE      Human Resources
## 65      Kathleen   <NA>  77834  18.771             FALSE Business Development
## 66         Steve   Male  61310  12.428              TRUE         Distribution
## 67         Nancy Female 125250   2.672              TRUE Business Development
## 68        Rachel Female  51178   9.735              TRUE              Finance
## 69          Jose   Male  84834   14.33              TRUE              Finance
## 70         Irene   <NA> 100863   4.382              TRUE              Finance
## 71          Todd   <NA>  84692   6.617             FALSE      Client Services
## 72        Johnny   Male 118172  16.194              TRUE                Sales
## 73         Bobby   Male  54043   3.833             FALSE              Product
## 74       Frances Female  90582   4.709              TRUE                Sales
## 75        Thomas   Male  62096  17.029             FALSE            Marketing
## 76        Bonnie Female 104897    <NA>              TRUE      Human Resources
## 77      Margaret Female 131604   7.353              TRUE         Distribution
## 78       Charles   Male 107391    1.26              TRUE            Marketing
## 79         Robin Female 114797   5.965              TRUE                Sales
## 80          <NA> Female 115814    4.99             FALSE              Product
## 81        Gerald   <NA> 137126  15.602              TRUE                Sales
## 82   Christopher   Male  47369  14.822             FALSE                Legal
## 83        Steven   Male  35095   8.379              TRUE      Client Services
## 84         Shawn   Male 148115   6.539              TRUE              Finance
## 85         Doris Female  83072   7.511             FALSE              Finance
## 86        Jeremy   Male 100238   3.887              TRUE      Client Services
## 87         Annie   <NA> 103495   17.29              TRUE Business Development
## 88         Annie Female 144887   8.276              TRUE                Sales
## 89         Donna Female  64088   6.155              TRUE                Legal
## 90        Janice Female  51082  11.955             FALSE                Legal
## 91        Janice   <NA>  91719  11.583              TRUE                Legal
## 92         James   <NA> 128771   8.309             FALSE                 <NA>
## 93         Linda Female 119009  12.506              TRUE Business Development
## 94      Virginia   <NA> 111858   1.601              TRUE                Legal
## 95         Harry   Male 130620    7.03             FALSE                Legal
## 96       Heather Female  43026  14.166             FALSE      Client Services
## 97       Cynthia Female 142321   1.737              <NA>              Finance
## 98         Laura   <NA> 140371   10.62              TRUE            Marketing
## 99          Tina Female 100705  16.961              TRUE            Marketing
## 100       Harold   Male  77544  12.447             FALSE Business Development
## 101      Melissa Female  48109  14.995             FALSE              Finance
## 102        Aaron   Male  61602  11.849              TRUE            Marketing
## 103         Jack   Male 103902  12.159             FALSE      Client Services
## 104      Phyllis Female 136984   8.932              TRUE              Finance
## 105         John   Male  80740  19.305             FALSE            Marketing
## 106        Kathy Female  91712   8.567             FALSE              Finance
## 107         Paul   Male  42146   3.046             FALSE                Legal
## 108       Steven   Male  68680  16.565             FALSE                Legal
## 109      Russell   <NA> 133980  12.396              TRUE                Legal
## 110  Christopher   Male  37919  11.449             FALSE                 <NA>
## 111      Shirley Female 147113  16.135             FALSE                Legal
## 112       Bonnie Female  42153   8.454              TRUE Business Development
## 113       Willie   Male  64363   4.023             FALSE            Marketing
## 114         Tina Female 114767   3.711              TRUE          Engineering
## 115       Ashley Female  58698   6.811              TRUE Business Development
## 116       Pamela Female  54585   4.166             FALSE              Product
## 117         <NA>   Male  76189  18.988              <NA>                Legal
## 118       Steven   Male 109095   9.494             FALSE              Finance
## 119       Andrea Female 120204   9.557             FALSE Business Development
## 120         Paul   Male  41054  12.299             FALSE            Marketing
## 121        Peter   Male  84885  15.402             FALSE Business Development
## 122     Kathleen   <NA> 119735   18.74             FALSE              Product
## 123    Christina Female 110169  13.892              TRUE          Engineering
## 124        Helen Female  73789  14.841              TRUE              Product
## 125      Marilyn Female  76078   2.401              TRUE              Product
## 126     Patricia Female  49368  14.226             FALSE      Human Resources
## 127     Michelle Female  57325  17.179              TRUE      Human Resources
## 128      William   Male  66521    5.83             FALSE      Human Resources
## 129      Kenneth   Male 127654  16.439              TRUE      Client Services
## 130      Antonio   Male  60866  13.101              TRUE Business Development
## 131      Shirley Female  41334   6.219              TRUE      Human Resources
## 132      Rebecca Female  94231  17.517             FALSE              Product
## 133       Carlos   Male 146670  10.763             FALSE      Human Resources
## 134         Lois Female 106317   2.235              TRUE      Client Services
## 135      Carolyn Female 109260    2.65              TRUE Business Development
## 136       Gloria Female 134148   8.833              TRUE                Legal
## 137        Henry   Male  43542  19.687             FALSE                Legal
## 138         Adam   Male  95327   15.12             FALSE         Distribution
## 139       Ashley Female 112238    6.03              TRUE          Engineering
## 140         <NA> Female 132373  10.527              <NA>                 <NA>
## 141      Shirley Female 113850   1.854             FALSE            Marketing
## 142         Adam   Male 110194  14.727              TRUE              Product
## 143    Elizabeth Female 146129   5.687             FALSE              Finance
## 144       Teresa   <NA> 140013   8.689              TRUE          Engineering
## 145       Nicole   <NA> 122717  12.452             FALSE                Sales
## 146     Jennifer Female  71715  13.079              TRUE      Client Services
## 147     Patricia Female  95322    4.15              n.a.              Product
## 148       Philip   Male  89227   3.996             FALSE                Legal
## 149      Patrick   <NA> 124488  14.837              TRUE                Sales
## 150         <NA> Female  86230   8.578              <NA>         Distribution
## 151         Sean   Male 135490  19.934             FALSE            Marketing
## 152      Brandon   <NA> 121333  15.295             FALSE Business Development
## 153         Ruth Female 129297   8.067              TRUE      Client Services
## 154       Victor   <NA>  84546  10.489              TRUE              Finance
## 155      Rebecca Female  85730   5.359              TRUE              Product
## 156        Marie Female 104058  17.695              TRUE Business Development
## 157       Howard   Male 105062   1.563             FALSE      Human Resources
## 158         <NA> Female  79536  14.443              <NA>              Product
## 159        Norma Female 114412   8.756              TRUE            Marketing
## 160        James   <NA>  68501  14.316             FALSE            Marketing
## 161        Kathy Female 149563  16.991              TRUE              Finance
## 162      Marilyn   <NA> 103386  11.451             FALSE         Distribution
## 163       Arthur   Male  89786  14.422              TRUE                Legal
## 164        Terry   Male  52226  19.135             FALSE      Client Services
## 165         Mary Female 134645  18.197             FALSE Business Development
## 166         <NA> Female  59148   9.061              <NA>                Legal
## 167         <NA> Female  42341   7.014              <NA>                Sales
## 168  Christopher   Male 142178  17.984              TRUE                Sales
## 169        Peter   <NA>  38989   7.017              TRUE            Marketing
## 170     Michelle Female  53754   5.455              TRUE              Product
## 171        Irene Female 133772   19.02              TRUE                Sales
## 172      Patrick   Male 143499  17.495              TRUE          Engineering
## 173         Sara Female  97058   9.402             FALSE              Finance
## 174      Lillian Female  85446   3.959              TRUE          Engineering
## 175         <NA>   <NA>  40297   6.185              <NA>      Client Services
## 176       Willie   Male 146651   1.451              TRUE          Engineering
## 177       Victor   Male 124486  10.166             FALSE              Product
## 178        Wayne   Male 102652  14.085              TRUE         Distribution
## 179         Jane Female 144474  17.648             FALSE              Product
## 180      Jessica Female  68759  19.343              TRUE              Finance
## 181         Mark   Male  57286   5.025              TRUE Business Development
## 182        Randy   Male  58129   1.952              TRUE         Distribution
## 183      Lillian Female 123940  12.184              TRUE Business Development
## 184         Ruth   <NA>  98233   2.518              TRUE         Distribution
## 185        Jerry   Male 140810   9.177              TRUE      Client Services
## 186       Sandra Female  42090   8.842              TRUE                Legal
## 187         <NA> Female 149654   1.825              <NA>                Sales
## 188        Roger   Male  51430    6.46             FALSE            Marketing
## 189      Charles   Male  71749  15.931             FALSE                Legal
## 190     Clarence   Male  85700   1.215             FALSE                Sales
## 191        Carol Female  57783   9.129             FALSE              Finance
## 192         Lois Female  36946   6.652             FALSE          Engineering
## 193      Barbara   <NA>  99326  16.475              TRUE                Legal
## 194      Rebecca Female 109259   4.443              TRUE                Legal
## 195        Irene Female 131038   8.996             FALSE         Distribution
## 196       Ronald   Male 121068  12.757              TRUE              Product
## 197       Steven   Male  62719  19.127             FALSE      Client Services
## 198      Carolyn Female  69268   3.031             FALSE      Client Services
## 199        Maria Female  36067    9.64              TRUE              Product
## 200     Jonathan   Male 130581  16.736              TRUE                 <NA>
## 201         Gary   Male  89661   8.525             FALSE              Finance
## 202     Kimberly Female  36643   7.953             FALSE            Marketing
## 203        Roger   Male 140558   5.084              TRUE                Sales
## 204        Diana Female 103521   2.784              TRUE Business Development
## 205       Willie   Male  55281   4.935              TRUE            Marketing
## 206      Brandon   Male 115711   8.012              TRUE              Finance
## 207     Patricia Female 114079   8.399              TRUE      Client Services
## 208    Katherine Female  57531   1.767             FALSE      Client Services
## 209     Jonathan   Male 141069   4.903             FALSE      Human Resources
## 210        Emily Female  89434  11.295             FALSE          Engineering
## 211         Ruth Female  44639   9.148              TRUE                Legal
## 212        Linda Female 110967  19.612              TRUE      Human Resources
## 213         Lisa Female 115387   1.821             FALSE      Client Services
## 214       Evelyn Female  81673  15.364              TRUE          Engineering
## 215        Julie Female 109588    3.55             FALSE          Engineering
## 216         Mary Female  92544     3.8             FALSE      Client Services
## 217      Matthew   Male 142373   2.462             FALSE            Marketing
## 218      Douglas   Male  83341   1.015              TRUE      Client Services
## 219      Gregory   <NA>  98865  10.628              TRUE              Finance
## 220        Billy   Male 120444   7.768              TRUE              Finance
## 221         <NA> Female  71945    5.56              <NA>            Marketing
## 222      Gregory   Male 109564   3.845             FALSE                Legal
## 223        Jason   Male  78417   3.067             FALSE              Finance
## 224       Daniel   Male 106947  15.866              TRUE                Legal
## 225        Sarah Female  87298   2.311             FALSE         Distribution
## 226        Harry   Male  64579  15.266              TRUE                Sales
## 227        Kathy Female  50905   9.404              TRUE              Finance
## 228         Todd   Male  59728  11.226              TRUE      Client Services
## 229        Bobby   Male  51685  17.445              TRUE          Engineering
## 230       Jeremy   Male  49542   1.679              TRUE         Distribution
## 231       George   <NA>  38375   8.999             FALSE                Legal
## 232         <NA> Female  64084  17.018             FALSE              Product
## 233      Marilyn Female 147663  10.263             FALSE      Human Resources
## 234      Kathryn Female  73935  14.752             FALSE                Sales
## 235        Irene Female  40837  12.182              TRUE      Client Services
## 236        Norma Female  94393   3.643              TRUE          Engineering
## 237        Laura Female  42087   2.624             FALSE              Product
## 238       Cheryl Female  52080   9.375             FALSE                Legal
## 239        Kevin   Male  35061   5.128             FALSE                Legal
## 240      Lillian   <NA>  64164  17.612             FALSE      Human Resources
## 241      Phyllis   <NA>  94088  19.107             FALSE         Distribution
## 242       Walter   Male 127813   5.961             FALSE              Finance
## 243       Robert   Male  38041  18.428              TRUE          Engineering
## 244        Jimmy   Male 126310   5.413              TRUE              Product
## 245     Clarence   Male 142561   8.866             FALSE      Client Services
## 246       Victor   Male  70817  17.138             FALSE          Engineering
## 247         Fred   <NA>  59937  12.045              TRUE      Human Resources
## 248       Brenda   <NA> 106115   3.742              TRUE              Product
## 249       Justin   Male  82782   4.366              <NA>         Distribution
## 250        Betty Female 104896   19.55              TRUE Business Development
## 251        Randy   <NA> 133943    8.94              TRUE                Sales
## 252       Sharon   <NA>  83658   6.513             FALSE Business Development
## 253        Ralph   Male  71896   4.232              TRUE            Marketing
## 254         <NA>   <NA> 113732  10.391              <NA>                Sales
## 255       Edward   Male 110485   7.995             FALSE      Client Services
## 256       Denise Female 115118   5.108             FALSE      Human Resources
## 257        Debra Female  48696    4.75             FALSE                Legal
## 258         <NA>   Male  42676  15.517              <NA>                Sales
## 259      Michael   Male  43586  12.659             FALSE                 <NA>
## 260        Henry   Male  89258  15.585              TRUE      Human Resources
## 261       Gloria Female  90730   2.491             FALSE         Distribution
## 262        Marie Female 100308  13.677             FALSE              Product
## 263         Anne Female  69134   3.723              TRUE          Engineering
## 264        Sarah   <NA>  58295  14.603              TRUE          Engineering
## 265    Stephanie Female  50141  13.218              TRUE      Human Resources
## 266          Roy   Male 101941    3.45             FALSE      Client Services
## 267         <NA> Female 115145  14.063              <NA>         Distribution
## 268         <NA>   Male  40451  16.044              <NA>         Distribution
## 269        Irene Female  56526    5.81              TRUE              Finance
## 270         <NA> Female 145316  18.517              <NA>      Human Resources
## 271       Thomas   Male 103235   9.554              TRUE      Human Resources
## 272        Frank   <NA>  58563  12.062              TRUE            Marketing
## 273         Fred   Male  74129  18.225             FALSE              Product
## 274     Nicholas   Male  74669   1.113              TRUE              Product
## 275       Howard   Male  97490    5.46              TRUE            Marketing
## 276        Debra Female 104250  18.456              TRUE Business Development
## 277    Christine Female  94345  11.996             FALSE              Finance
## 278       Brenda   <NA>  82439  19.062             FALSE                Sales
## 279        Betty Female  51613  12.984             FALSE         Distribution
## 280         Ruby Female 105946   1.139             FALSE Business Development
## 281        Kevin   Male  46080   9.635             FALSE Business Development
## 282       Arthur   Male 134610   6.147              TRUE                 n.a.
## 283         Carl   <NA> 125104  12.345             FALSE      Client Services
## 284         Todd   Male 107281   1.612              TRUE          Engineering
## 285       Brenda Female 141521   4.337             FALSE      Client Services
## 286         Judy Female  65931   2.304             FALSE      Human Resources
## 287         Todd   Male  69989  10.985              TRUE              Finance
## 288         Lois Female 147183   9.999              TRUE      Client Services
## 289       Walter   <NA>  66757  18.099             FALSE         Distribution
## 290      Jessica Female  75145   6.388              TRUE                Legal
## 291       Jeremy   Male 129460  13.657              TRUE                 <NA>
## 292        Tammy Female 132839  17.463              TRUE      Client Services
## 293         Anne Female  44537  18.284              TRUE      Client Services
## 294        Jesse   Male 118733   9.653             FALSE            Marketing
## 295     Virginia Female  46905  19.154             FALSE         Distribution
## 296        Jesse   Male  79582   3.873             FALSE                Legal
## 297       Jeremy   <NA>  55394    3.18              TRUE                Sales
## 298       Daniel   Male 123811   7.664              TRUE      Human Resources
## 299      Patrick   <NA> 137314   4.542              TRUE            Marketing
## 300        Emily Female  36711  19.028              TRUE      Human Resources
## 301         Alan   Male 111786   3.592              TRUE          Engineering
## 302        James   Male  72257  13.023             FALSE          Engineering
## 303         Adam   Male  71276   5.027              TRUE      Human Resources
## 304         Joan   <NA>  38712   3.657             FALSE      Client Services
## 305       Jeremy   Male  46930  18.702              TRUE              Finance
## 306     Margaret Female 125220   3.733             FALSE            Marketing
## 307         Mark   Male 121477   17.44              TRUE Business Development
## 308      Marilyn Female  86386   2.937             FALSE         Distribution
## 309       Cheryl Female  81308   2.196              TRUE                Legal
## 310     Benjamin   Male  84810  15.794             FALSE            Marketing
## 311       Harold   Male  66775   2.158              TRUE                Legal
## 312       Ernest   <NA>  72145  13.448              TRUE              Finance
## 313       Gerald   Male 121604   1.923              TRUE                Sales
## 314         Judy Female 109510  13.457              TRUE                Legal
## 315        Bobby   Male 112117   6.338             FALSE                 <NA>
## 316        Bobby   <NA> 108127  15.858             FALSE      Client Services
## 317        Marie Female 123711  10.966             FALSE              Product
## 318        Kathy Female 132381    8.34             FALSE Business Development
## 319          Roy   Male 148225   1.841             FALSE              Finance
## 320   Jacqueline Female   n.a.  18.243             FALSE            Marketing
## 321         <NA> Female  62960  14.356              <NA>                Sales
## 322        Julie Female  56926   7.507             FALSE                Sales
## 323      Douglas   Male  41428  14.372             FALSE              Product
## 324        Linda Female 115658   3.041              TRUE                Sales
## 325         Ruby Female  76707   6.031             FALSE Business Development
## 326      Russell   Male  60388  17.885             FALSE      Client Services
## 327      Jeffrey   Male  45150  12.075              TRUE              Product
## 328        Aaron   Male  58755   5.097              TRUE            Marketing
## 329       Samuel   Male  76076   5.319              TRUE              Finance
## 330         <NA>   Male  87760  14.987              <NA>          Engineering
## 331        Randy   Male 135119   7.887             FALSE      Client Services
## 332       Evelyn Female  36759  17.269              TRUE            Marketing
## 333         Fred   Male 121723  18.645              TRUE              Product
## 334      Shirley Female 110061   7.494             FALSE      Client Services
## 335         Todd   Male  85074   1.844             FALSE      Client Services
## 336       Robert   Male  85799   19.93             FALSE              Finance
## 337         Mark   Male  75150  12.182              TRUE                Legal
## 338         Earl   Male  91344   4.035              TRUE                Sales
## 339      Richard   Male  86326  10.717             FALSE              Product
## 340      Michael   Male  98753  16.443              TRUE      Human Resources
## 341       Steven   Male 113060   2.846              TRUE                Sales
## 342       Carlos   Male  77327   11.58              TRUE              Finance
## 343        Marie Female  62666  10.247             FALSE              Product
## 344       Ronald   Male  96633    4.99              TRUE          Engineering
## 345        Scott   Male  58248   3.914             FALSE Business Development
## 346       Steven   Male  83706    6.96              TRUE      Human Resources
## 347        James   Male  74086   5.778              TRUE      Human Resources
## 348         Lori Female  95389  14.223             FALSE                Sales
## 349       Philip   Male 129968  19.897             FALSE              Finance
## 350      Phyllis Female   <NA>   8.723             FALSE                Sales
## 351       Thomas   <NA>  41549    3.95             FALSE                Sales
## 352      Cynthia Female  74287  10.751             FALSE                Sales
## 353         <NA>   Male  69906   4.844              <NA>          Engineering
## 354         <NA>   Male  65078   3.095              <NA>            Marketing
## 355    Elizabeth Female 106406   1.782              TRUE                Legal
## 356       Andrea Female 115913  12.121             FALSE                Legal
## 357         Judy Female  38092   5.668             FALSE         Distribution
## 358      Russell   Male 121160   7.843             FALSE          Engineering
## 359        Scott   Male  90429    4.45             FALSE              Product
## 360        Sarah Female  64207   7.824              TRUE      Client Services
## 361        Susan Female  80688  18.892              TRUE                Sales
## 362     Margaret   <NA>  55044   4.078             FALSE                Sales
## 363       Joshua   Male  72893   9.555             FALSE         Distribution
## 364       Justin   <NA>  96978  13.865             FALSE              Finance
## 365         Juan   Male  97364   3.595             FALSE              Product
## 366       Gloria   <NA> 140885   1.113             FALSE      Human Resources
## 367     Jennifer   <NA>  58520  16.231              TRUE      Client Services
## 368       Edward   Male  66067  10.957              TRUE                 <NA>
## 369      Marilyn Female 147183   8.748             FALSE Business Development
## 370         Mary Female  87721  12.484             FALSE              Product
## 371        Linda Female 144001   2.194             FALSE Business Development
## 372        Larry   Male  91133    5.14             FALSE                Sales
## 373       Albert   Male  67827  19.717              TRUE          Engineering
## 374      Kenneth   Male  81839  12.072             FALSE                Sales
## 375         <NA> Female  81444   3.171              <NA>         Distribution
## 376      Gregory   <NA> 137661   4.805              TRUE                Sales
## 377      Brandon   Male 112548   4.664             FALSE          Engineering
## 378      Stephen   Male 111249  10.574              TRUE      Human Resources
## 379      Russell   Male 114334   9.669             FALSE              Product
## 380         <NA> Female 118906   4.537              <NA>            Marketing
## 381       Pamela   <NA>  72979    5.66             FALSE                Sales
## 382        Scott   Male  64172  10.894              TRUE          Engineering
## 383         <NA> Female 107024  12.182              <NA>                 <NA>
## 384       Carlos   Male  50167   2.362             FALSE              Product
## 385         Jack   Male 106995  15.723             FALSE              Product
## 386        Debra Female  70492   8.895             FALSE      Client Services
## 387      Phyllis Female 125881  16.697             FALSE                Sales
## 388       Robert   Male 123294  19.894             FALSE                 <NA>
## 389       Gloria Female 131045  11.312              TRUE              Product
## 390       Sharon   <NA>  97635  10.413              TRUE      Client Services
## 391      Barbara   <NA>  94493  13.443              TRUE                Sales
## 392      Marilyn Female 140502   9.989              TRUE                Sales
## 393        Ralph   Male 106310    4.03              TRUE      Client Services
## 394        Harry   <NA>  46240   6.976              TRUE          Engineering
## 395        Robin Female 111163   5.025              TRUE              Product
## 396        Kathy Female  93753   7.094              TRUE                Sales
## 397         Rose Female  63494  19.385              TRUE      Human Resources
## 398     Clarence   Male 116693  13.835              TRUE         Distribution
## 399        Doris Female  85215   7.425              TRUE      Human Resources
## 400      Kathryn Female  86439   7.799             FALSE              Finance
## 401        Billy   <NA>  62913  18.241              TRUE            Marketing
## 402        Norma Female  38872   9.302              TRUE Business Development
## 403      Richard   <NA> 124655  14.272              TRUE          Engineering
## 404        Craig   Male 113506  19.642             FALSE            Marketing
## 405        Sarah   <NA> 109980    8.86             FALSE                Sales
## 406       Sharon Female  91522   7.564             FALSE              Finance
## 407         Mary Female  42214  17.538              TRUE Business Development
## 408        Steve   <NA>  83159    9.55              TRUE         Distribution
## 409          Joe   Male 144082    1.62              TRUE      Client Services
## 410       Jeremy   Male  47885  19.276              TRUE      Human Resources
## 411       Ernest   Male 126232   6.191              TRUE                Sales
## 412        Kevin   Male 134598  11.699             FALSE Business Development
## 413   Jacqueline Female  66604  14.609             FALSE                Legal
## 414       Johnny   Male 115194   5.861              TRUE            Marketing
## 415         Gary   Male  49101    11.9              TRUE Business Development
## 416      Shirley   <NA>  67811  12.699             FALSE              Finance
## 417         <NA>   Male 111043   5.966              <NA>         Distribution
## 418        Sarah   <NA>  37748   9.047             FALSE      Human Resources
## 419        Julia Female  36403   2.664              TRUE              Finance
## 420      Dorothy Female 140136    3.12              TRUE Business Development
## 421       Justin   Male 121508  19.334              TRUE              Finance
## 422        Kevin   Male  79906  18.021              TRUE                Sales
## 423       Victor   Male 123144  16.261              TRUE              Product
## 424      Deborah Female  46953  17.436             FALSE      Client Services
## 425      Matthew   <NA>  79443  14.637             FALSE      Human Resources
## 426        Alice Female  51395   2.378              TRUE              Finance
## 427         Todd   Male 134408    3.56              TRUE      Human Resources
## 428        Frank   Male   <NA>   8.037              TRUE                Sales
## 429  Christopher   Male  68028   7.869              TRUE              Finance
## 430         Rose Female 149903    5.63             FALSE      Human Resources
## 431       Andrea Female  79123  19.422             FALSE         Distribution
## 432      Charles   Male 104014   7.077             FALSE                Legal
## 433      Jessica   <NA> 121160  12.993             FALSE      Client Services
## 434        Wanda Female  65362   7.132              TRUE                Legal
## 435        Joyce Female  50701  14.227              TRUE                 <NA>
## 436        Billy   Male 144709  10.069              TRUE         Distribution
## 437      Kathryn Female  53061  11.864              TRUE              Finance
## 438       Denise Female  36697  11.196              TRUE                Sales
## 439        Jason   Male  69244    6.22              TRUE                 <NA>
## 440     Nicholas   Male 101036   2.826              TRUE      Human Resources
## 441        Aaron   Male  52119  11.343              TRUE      Client Services
## 442       Louise Female  46666   1.886              TRUE Business Development
## 443        Julie Female  73437   2.518              TRUE              Finance
## 444        Kathy Female  86318  18.492              TRUE                Sales
## 445         <NA>   Male  76409   7.008              <NA>         Distribution
## 446        Chris   Male  71642   1.496             FALSE                 <NA>
## 447       Cheryl Female  67150   15.85              TRUE            Marketing
## 448      Gregory   Male 142208  11.204              TRUE          Engineering
## 449        Wayne   Male  81183  17.066             FALSE      Client Services
## 450      Beverly Female 107163   3.665              TRUE      Human Resources
## 451       Willie   Male  55038  19.691              n.a.                Legal
## 452        Terry   <NA> 140002   19.49              TRUE            Marketing
## 453        Scott   Male 146812   1.965              TRUE            Marketing
## 454        Annie Female  40119   4.338              TRUE      Human Resources
## 455         <NA>   Male 136602  10.429              <NA>            Marketing
## 456         Ruth Female  69579  18.029              TRUE      Human Resources
## 457      Deborah   <NA> 101457   6.662             FALSE          Engineering
## 458     Patricia Female 121232  16.624             FALSE                Legal
## 459       Albert   Male 102626  15.843             FALSE              Finance
## 460      Charles   <NA> 137171   5.574              TRUE            Marketing
## 461         Tina Female  88276  14.248             FALSE                Legal
## 462       Jeremy   Male  43354  13.946             FALSE            Marketing
## 463        Craig   Male 125556   7.996             FALSE          Engineering
## 464         Jose   Male  59862   3.269             FALSE              Product
## 465     Lawrence   Male  74640  18.641             FALSE            Marketing
## 466        Helen Female  52875   4.188             FALSE                Legal
## 467       Walter   Male  58789   5.461             FALSE                Sales
## 468          Amy Female 122897   8.222              TRUE          Engineering
## 469       Janice Female 136032  10.696              TRUE            Marketing
## 470      Stephen   Male 141958   9.078              TRUE                Legal
## 471         Ryan   Male 139917  11.466             FALSE         Distribution
## 472       Sharon Female 147635  14.693             FALSE      Human Resources
## 473       Joshua   Male  68230   8.047             FALSE      Client Services
## 474     Clarence   Male 124365   8.457             FALSE Business Development
## 475     Jonathan   Male 104749  11.364             FALSE          Engineering
## 476    Stephanie Female 122121   7.937              TRUE          Engineering
## 477        Kathy Female 143541   8.461             FALSE      Human Resources
## 478       Albert   Male 137840   9.705             FALSE Business Development
## 479      Michael   Male  73354  18.357             FALSE Business Development
## 480      Richard   Male  47647  18.787              TRUE                 <NA>
## 481          Amy Female 106249  15.967             FALSE                Sales
## 482         <NA> Female  93847   1.085              <NA> Business Development
## 483      Beverly   <NA> 104815    3.38             FALSE              Product
## 484         <NA> Female 115436   3.099              <NA>      Human Resources
## 485          Joe   Male  50645  11.119             FALSE            Marketing
## 486       Ashley Female 142415   1.985              TRUE              Finance
## 487       Howard   Male  37984   2.021             FALSE         Distribution
## 488       Cheryl Female  71751  15.918             FALSE Business Development
## 489       Robert   Male 135882  19.944             FALSE                Legal
## 490       Sharon Female  46007  19.731              TRUE      Client Services
## 491       Judith Female 117055   7.461             FALSE            Marketing
## 492     Nicholas   <NA>  58478   6.525              TRUE                 <NA>
## 493        Jerry   Male 121357  18.845             FALSE Business Development
## 494        Craig   Male  44857  13.266             FALSE              Finance
## 495      Barbara Female  47322    7.25              TRUE                Sales
## 496       Eugene   Male  81077   2.117             FALSE                Sales
## 497       Johnny   Male  76394   5.437              TRUE            Marketing
## 498     Benjamin   Male 114356   7.123             FALSE      Client Services
## 499        Diana Female  41831   4.548             FALSE Business Development
## 500      Barbara   <NA>  90187  14.764              TRUE         Distribution
## 501    Christine Female  72613  11.126             FALSE                Legal
## 502         Sean   Male  42748   9.765             FALSE         Distribution
## 503        Sarah Female  37259   1.763             FALSE      Client Services
## 504        Roger   Male 125033   4.887              TRUE            Marketing
## 505         <NA> Female  38275  10.494              <NA>         Distribution
## 506         <NA>   Male  71520  13.248              <NA>          Engineering
## 507       Donald   Male 106472   6.499              TRUE      Client Services
## 508     Clarence   Male 103684  18.654              TRUE          Engineering
## 509        Scott   Male  96111  15.592             FALSE      Human Resources
## 510          Ann Female  90719    6.22             FALSE Business Development
## 511        Frank   Male 140303   17.07             FALSE                Legal
## 512          Joe   Male  62161    8.13              TRUE Business Development
## 513        Wanda Female  78883  19.695             FALSE                 <NA>
## 514        Jimmy   Male  63549  19.624             FALSE                 <NA>
## 515          Amy   <NA>  63888  18.115              TRUE              Product
## 516        Larry   Male  97370  10.458             FALSE                Sales
## 517       Gloria Female  66224  15.979              TRUE                Legal
## 518         Ruth Female  97915   3.997              TRUE      Human Resources
## 519        Diana Female  86883  18.003              TRUE              Product
## 520      Raymond   Male  37812   3.178             FALSE      Human Resources
## 521        Peter   Male  56580   8.411              TRUE                 <NA>
## 522         <NA>   <NA>  83895  17.971              <NA>          Engineering
## 523    Catherine Female  58047  14.858              TRUE         Distribution
## 524        Diane Female 124889  15.026              TRUE                Sales
## 525        Peter   <NA> 118840  14.509              TRUE      Client Services
## 526        Steve   Male  67780    9.54              TRUE      Human Resources
## 527      Barbara Female 144677   8.696             FALSE              Finance
## 528        Helen   <NA>  45724   1.022             FALSE              Product
## 529        Jimmy   <NA>  86676   7.175              TRUE              Product
## 530  Christopher   Male  82401   7.252             FALSE      Client Services
## 531     Kathleen Female  35575  14.595             FALSE         Distribution
## 532     Virginia Female 123649  10.154              TRUE            Marketing
## 533         Lisa Female  38078   10.28              TRUE              Product
## 534         Earl   Male  52620  13.773             FALSE              Product
## 535       Gerald   <NA> 133366  12.292             FALSE                Legal
## 536       Louise Female  91462   8.205             FALSE            Marketing
## 537     Clarence   Male 146589   4.905              TRUE Business Development
## 538      Cynthia Female  51633  13.472              TRUE Business Development
## 539         Adam   Male  45181   3.491             FALSE      Human Resources
## 540       Justin   Male  62454   3.459              TRUE                Sales
## 541         <NA> Female  84746    4.82              <NA>              Finance
## 542         Ruby Female 147362   7.851              TRUE          Engineering
## 543       Amanda Female  80803  14.077              TRUE         Distribution
## 544         Anna Female 117293   2.366             FALSE      Client Services
## 545        Roger   Male 105689   13.32              TRUE            Marketing
## 546        Julie   <NA>  93302   9.048              TRUE              Product
## 547       Joseph   Male 102555   3.672              TRUE              Product
## 548       Evelyn Female  51525  10.366             FALSE              Finance
## 549       Janice Female  41190   3.311              TRUE                Sales
## 550         Sara Female  75484   3.186             FALSE              Finance
## 551      Gregory   Male  82726  16.304              TRUE            Marketing
## 552       Philip   Male 122319  19.122             FALSE          Engineering
## 553      Barbara Female 127297  11.905              TRUE              Product
## 554       Amanda Female 109290  13.853             FALSE          Engineering
## 555      Phyllis   <NA>  99150   6.007             FALSE      Client Services
## 556         Anne Female  71930  18.451              TRUE              Product
## 557       Arthur   Male  66819   6.639              TRUE            Marketing
## 558         Jane Female  42424  18.115             FALSE         Distribution
## 559        Linda Female  51431  13.295             FALSE          Engineering
## 560       Ashley Female 120675    6.51             FALSE      Human Resources
## 561        Shawn   Male  96610   2.097              TRUE      Client Services
## 562     Kathleen Female  71430   8.572             FALSE          Engineering
## 563         Sara   <NA>  87713  18.863              TRUE                Legal
## 564        Randy   Male  86723  14.842             FALSE          Engineering
## 565       Andrew   Male  43414   7.563              TRUE      Client Services
## 566      Marilyn Female  87145   17.33             FALSE      Human Resources
## 567       Johnny   Male  91124  12.986              TRUE         Distribution
## 568         <NA> Female  48141  12.605              <NA>                 <NA>
## 569        Susan Female  90829  19.142             FALSE            Marketing
## 570         Ruby Female 101262   6.773             FALSE      Client Services
## 571        Kelly Female  41427   1.431             FALSE              Product
## 572     Jonathan   Male  56993  18.623             FALSE            Marketing
## 573      Raymond   Male 114244   16.69             FALSE      Human Resources
## 574     Kimberly Female  81800   5.435              TRUE                 <NA>
## 575         <NA> Female 118736   7.421              <NA>      Client Services
## 576         John   Male  66077   5.809              TRUE              Product
## 577      Michael   Male  35013  14.879             FALSE              Product
## 578        Jerry   Male  98393  11.393             FALSE      Client Services
## 579       Amanda Female 107111   1.438              TRUE              Product
## 580       Harold   Male  65673   1.187              TRUE                Legal
## 581        Harry   Male  65482  18.089             FALSE                 <NA>
## 582       Ernest   Male  81919  15.118             FALSE            Marketing
## 583        Harry   Male  59277    10.2             FALSE              Finance
## 584        Diane Female  49501  13.506             FALSE Business Development
## 585         Jack   Male  70367  18.266              TRUE                Legal
## 586      Shirley Female 132156   2.754             FALSE            Marketing
## 587         Rose Female  56961   7.585             FALSE            Marketing
## 588       Cheryl Female  98841   8.945              TRUE      Client Services
## 589      Cynthia Female  78226   2.419             FALSE      Human Resources
## 590       Sandra Female 116931   9.657              TRUE      Human Resources
## 591       Andrea Female  87575  13.346              TRUE Business Development
## 592       Rachel Female 110924   7.808             FALSE         Distribution
## 593       Justin   Male  78351  15.221             FALSE                Sales
## 594        Marie Female 125574   4.644             FALSE                Sales
## 595        Louis   Male  95198   2.075             FALSE Business Development
## 596        Nancy Female 121006   3.512              TRUE              Finance
## 597         Mark   Male  95728   6.752              TRUE Business Development
## 598       Teresa Female  69740   8.294             FALSE         Distribution
## 599      Frances Female  91996  11.506             FALSE              Product
## 600         <NA> Female  98385  10.925              <NA>      Human Resources
## 601      Barbara   <NA>  90556  15.749              TRUE         Distribution
## 602    Christine   <NA>  50366   9.862              TRUE            Marketing
## 603        Bobby   <NA>  84232  15.704              TRUE          Engineering
## 604         Carl   <NA> 100888   12.49              TRUE Business Development
## 605        Bruce   Male 141335  15.427              TRUE          Engineering
## 606         Rose Female  97691   2.142             FALSE      Client Services
## 607      Mildred Female  47266  10.256             FALSE      Client Services
## 608         <NA>   Male 139754   12.74              <NA>                Sales
## 609         <NA> Female 116236  17.274              <NA>          Engineering
## 610         Todd   Male 103405   15.91             FALSE                Sales
## 611         Alan   Male  41453  10.084             FALSE              Product
## 612    Elizabeth   <NA>  52730  12.525             FALSE            Marketing
## 613        Julie   <NA>  60361   7.099              TRUE         Distribution
## 614       Teresa   <NA>  63103  11.378             FALSE              Finance
## 615         Eric   Male  65168  11.513             FALSE         Distribution
## 616       Edward   Male  73105   6.083              TRUE          Engineering
## 617     Kimberly Female  37916  12.929              TRUE         Distribution
## 618        Kathy Female  45682   1.451              TRUE            Marketing
## 619        Peter   Male  69297   1.268             FALSE              Finance
## 620      Beverly Female  59070  19.064              TRUE          Engineering
## 621        Ralph   Male  81215     8.4             FALSE              Finance
## 622       Justin   Male 128036  18.824             FALSE      Client Services
## 623        Irene Female  89780   8.999              TRUE                Sales
## 624        Irene Female 125018   9.601              TRUE                Legal
## 625       Jeremy   Male 133033    12.2             FALSE                Sales
## 626      Cynthia Female  82408   8.701              TRUE            Marketing
## 627         <NA> Female 131755    2.93              <NA>                 <NA>
## 628         Anne Female 128305  16.636             FALSE            Marketing
## 629         <NA>   <NA> 147309   1.702              <NA>         Distribution
## 630        Debra Female  74911  19.513             FALSE            Marketing
## 631      Michael   Male  47079   2.617             FALSE                Legal
## 632        Jason   <NA>  82873   1.869             FALSE      Client Services
## 633      Rebecca Female 134673   6.878             FALSE          Engineering
## 634       Andrea Female 123591     6.5              TRUE          Engineering
## 635         Carl   Male  75598  19.289             FALSE                 <NA>
## 636        Randy   Male  89831  13.047              TRUE                 <NA>
## 637      Marilyn Female  92430   2.924             FALSE          Engineering
## 638        Wayne   Male 126956  18.396             FALSE      Human Resources
## 639      Frances Female 112467   1.433             FALSE Business Development
## 640       Amanda   <NA>  46665  19.391              TRUE      Client Services
## 641     Kathleen Female  42553   3.756              TRUE         Distribution
## 642       Amanda Female 102081   5.203             FALSE                Sales
## 643       Amanda   <NA> 135118  15.108             FALSE              Finance
## 644        Shawn   Male  71975    9.77             FALSE      Human Resources
## 645    Katherine Female 149908  18.912             FALSE              Finance
## 646         Anna   <NA>  45418  10.162             FALSE            Marketing
## 647    Elizabeth Female  79145   19.78             FALSE              Finance
## 648       Donald   Male 122920    5.32             FALSE                 <NA>
## 649      Gregory   Male 128031  15.857              TRUE Business Development
## 650         Ruth Female  59969  14.064              TRUE              Finance
## 651      Cynthia Female  35381  11.749             FALSE              Finance
## 652      Deborah Female 113129  17.371             FALSE      Client Services
## 653       Willie   Male 141932   1.017              TRUE          Engineering
## 654        Debra Female  42296  16.922             FALSE              Product
## 655         Carl   Male  63395  11.411             FALSE                Legal
## 656       Walter   Male 125382  13.613              TRUE      Client Services
## 657         Lisa Female 113592  17.108              TRUE            Marketing
## 658      Rebecca Female  46750  11.367              TRUE                Sales
## 659      Jessica Female  90285  13.591              TRUE      Client Services
## 660        Steve   <NA>  53692   4.785              TRUE            Marketing
## 661         Mary   <NA> 100341   6.662             FALSE         Distribution
## 662        Craig   Male 123876   4.225             FALSE          Engineering
## 663    Katherine Female  41643   4.659              TRUE         Distribution
## 664       Andrea Female 113760  12.866              TRUE         Distribution
## 665      Timothy   Male  49473  12.463             FALSE                Legal
## 666      Anthony   Male 146141   3.645              TRUE         Distribution
## 667       Harold   <NA>  79459   7.776              TRUE Business Development
## 668      Douglas   <NA> 104496  14.771              TRUE            Marketing
## 669        James   Male  67789  17.105              TRUE                Legal
## 670       Joseph   <NA>  86564  11.879              TRUE                 <NA>
## 671       Martha Female 135758  14.782              TRUE      Client Services
## 672        Laura Female  84672    3.96             FALSE         Distribution
## 673       Robert   Male 111580  10.982             FALSE      Human Resources
## 674        Ralph   Male  50455  16.248             FALSE Business Development
## 675         <NA>   Male  88733   1.932              <NA>      Human Resources
## 676        Diane Female 130577  12.791             FALSE            Marketing
## 677        Annie Female 138925   9.801              TRUE            Marketing
## 678   Jacqueline Female  62371   15.77              TRUE      Client Services
## 679       Brenda Female  87715   2.675             FALSE              Product
## 680         Lori Female  66029   3.345              TRUE      Client Services
## 681       Albert   Male  86818  14.301              TRUE          Engineering
## 682        Frank   Male  75147  17.398             FALSE              Product
## 683      Deborah Female 105573   5.268              TRUE              Product
## 684        Robin   <NA>  41230   6.611              TRUE         Distribution
## 685        Alice Female 117787  10.485             FALSE                 <NA>
## 686         <NA>   Male  88086  19.387              <NA>              Finance
## 687         Paul   <NA>  91462  18.704             FALSE              Finance
## 688         <NA>   Male 139959   8.992              <NA>         Distribution
## 689        Brian   Male  93901  17.821              TRUE                Legal
## 690        Julie   <NA>  50529  17.263             FALSE              Finance
## 691        Bruce   Male 134988  13.926              TRUE                Sales
## 692         Joan Female 120941   3.694              TRUE              Finance
## 693       Harold   Male 118753   8.162             FALSE                Sales
## 694        Alice Female  92799   2.782             FALSE                Sales
## 695      Barbara Female  85718  13.326             FALSE      Client Services
## 696        Sarah Female 109517   5.146             FALSE      Human Resources
## 697       Samuel   Male  85550  11.593              TRUE            Marketing
## 698         <NA>   Male 136655   9.801              <NA>         Distribution
## 699      Barbara   <NA>  43312   8.218              TRUE                Legal
## 700          Amy   <NA> 102839  10.385              TRUE         Distribution
## 701        Frank   Male  78891   7.927              TRUE         Distribution
## 702        Robin   <NA>  93201  11.712              TRUE                Legal
## 703        Debra Female  84693   6.976              TRUE              Finance
## 704      Brandon   <NA>  73587   9.769              TRUE      Human Resources
## 705       Thomas   Male  65251  11.211             FALSE         Distribution
## 706        Harry   Male 129148  15.193              TRUE          Engineering
## 707         Todd   Male 128175  18.473              TRUE                 <NA>
## 708     Patricia Female  75825   7.839             FALSE          Engineering
## 709        Steve   Male  51821   1.197              TRUE                Legal
## 710         <NA>   Male  60411  12.048              <NA>      Human Resources
## 711       Edward   Male  58327  17.095              TRUE              Finance
## 712        Karen Female  46478  16.552             FALSE          Engineering
## 713       Martin   <NA> 123963  15.745              TRUE          Engineering
## 714          Ann Female  79796   9.851             FALSE         Distribution
## 715     Jonathan   Male  83809  12.922             FALSE         Distribution
## 716        Peter   Male  77933  13.132              TRUE          Engineering
## 717         Eric   Male  51070  13.806              TRUE Business Development
## 718        Jason   <NA>  97480  11.518             FALSE      Human Resources
## 719        Terry   Male  58357   7.969             FALSE          Engineering
## 720         John   Male  67165  13.001             FALSE          Engineering
## 721        Marie Female 145988  18.685              TRUE      Human Resources
## 722       Harold   Male 147417  11.626              TRUE Business Development
## 723       Joshua   Male  95003   5.197              TRUE            Marketing
## 724      Brandon   <NA> 144187  11.416              TRUE                Legal
## 725       Andrea Female  37888   13.47             FALSE          Engineering
## 726       Jeremy   Male 131513   1.876              TRUE              Finance
## 727       Daniel   Male  77287      13              TRUE                 <NA>
## 728          Roy   Male  46875  12.942              TRUE                Sales
## 729      Dorothy Female  82744  19.111              TRUE      Client Services
## 730       Steven   Male  43252  18.892             FALSE      Client Services
## 731       Nicole Female  66047  18.674              TRUE            Marketing
## 732          Ann Female 118431  12.772              TRUE              Finance
## 733        Henry   Male  59943   1.432             FALSE              Finance
## 734         <NA>   Male 136681   3.655              <NA> Business Development
## 735         <NA> Female 138807     1.4              <NA>      Human Resources
## 736      Barbara Female  82884   6.837              TRUE Business Development
## 737         Judy Female  48668  11.716              TRUE              Product
## 738     Lawrence   Male 122971  14.618             FALSE              Product
## 739       Joseph   Male 107050  12.737             FALSE Business Development
## 740       Carlos   Male 138598  14.737             FALSE                Sales
## 741      Russell   <NA> 149456   3.533             FALSE            Marketing
## 742         Jane Female 128540  15.941             FALSE                Legal
## 743       Martin   Male  61117   2.844             FALSE      Client Services
## 744       Steven   Male 100949  13.813              TRUE      Human Resources
## 745         Judy   <NA>  46829    5.19              TRUE              Finance
## 746        James   Male  69111  14.625              TRUE Business Development
## 747       Gloria Female  46602   1.027              TRUE Business Development
## 748       Ernest   Male  61181  16.559             FALSE Business Development
## 749       Martha Female  94963  19.626              TRUE Business Development
## 750        Janet   <NA>  85789   9.712             FALSE                Legal
## 751        Louis   <NA> 145274  16.379             FALSE              Product
## 752         Tina Female 102841   3.369             FALSE      Client Services
## 753        Henry   Male  49665  18.338             FALSE Business Development
## 754      Antonio   Male  41928   5.478              TRUE                 <NA>
## 755         Lisa Female 128042    2.03              TRUE                Legal
## 756       Denise Female  86150   3.997             FALSE      Client Services
## 757      Stephen   Male 121816  10.615              TRUE         Distribution
## 758     Benjamin   Male 123409   7.783             FALSE              Product
## 759         Carl   <NA>  98295   7.617              TRUE                Legal
## 760         Ruth Female  59678  10.895             FALSE         Distribution
## 761        Shawn   Male  57871    4.02              TRUE      Human Resources
## 762     Jennifer Female 132084  10.006              TRUE          Engineering
## 763        Terry   Male  35633   3.947              TRUE         Distribution
## 764   Jacqueline Female 125298   3.019              TRUE                Legal
## 765        Roger   Male 115582  15.343              TRUE                Sales
## 766        Alice   <NA> 148339  11.479              TRUE              Finance
## 767       Justin   Male 112975   9.699             FALSE      Human Resources
## 768        Janet Female  36927  18.769             FALSE      Client Services
## 769        Robin Female  70248    9.66              TRUE                Sales
## 770       Samuel   <NA> 141305   9.849              TRUE            Marketing
## 771    Katherine Female  72002  13.178              TRUE              Product
## 772        Peter   Male 102577  12.026              TRUE              Product
## 773      Lillian Female 113554  18.018              TRUE Business Development
## 774         <NA>   Male  47176  10.736              <NA>              Finance
## 775         <NA> Female 106428  10.867              <NA>                 <NA>
## 776         Rose Female  75181    6.06              TRUE              Finance
## 777        Bobby   Male  79047  18.784             FALSE      Human Resources
## 778     Kimberly Female  52970   4.513             FALSE                Sales
## 779      Antonio   Male 137979   5.266             FALSE         Distribution
## 780        Doris Female 114360  17.799              TRUE Business Development
## 781       Steven   Male 110306  16.843              TRUE      Human Resources
## 782     Lawrence   Male  46378   9.127             FALSE                 <NA>
## 783         <NA>   Male  74104   17.68              <NA>      Client Services
## 784         <NA>   <NA> 132505  13.592              <NA>              Product
## 785    Stephanie Female  96649   3.453             FALSE                Sales
## 786         <NA> Female  57811   8.941              <NA>            Marketing
## 787       Sandra Female 111468    2.58              TRUE              Product
## 788        Kevin   Male 141498   4.135              TRUE                Sales
## 789     Michelle   <NA> 124441  16.353             FALSE Business Development
## 790         Lisa Female  40121   6.293             FALSE              Product
## 791         Fred   Male 129712  11.058             FALSE              Finance
## 792       Joseph   Male 126010  19.601             FALSE                Legal
## 793         Anne   <NA> 122762   9.564             FALSE         Distribution
## 794       Andrea Female 149105  13.707              TRUE         Distribution
## 795       Nicole Female  44021  10.286             FALSE                 <NA>
## 796      Theresa   <NA>  42025   3.319              TRUE      Human Resources
## 797    Catherine Female  59970  12.801             FALSE      Client Services
## 798         Rose Female 145001   19.85             FALSE          Engineering
## 799     Lawrence   Male 102589  17.952              TRUE            Marketing
## 800      Raymond   Male  47529   2.712              TRUE              Product
## 801     Clarence   Male 148941  11.517             FALSE              Product
## 802         Ryan   Male  91109  17.499              TRUE      Human Resources
## 803         Jane Female  51923  13.623             FALSE Business Development
## 804         Lois Female  53954  19.075             FALSE              Product
## 805        Shawn   Male  39335  10.664             FALSE         Distribution
## 806       Thomas   Male 111371  15.081              TRUE          Engineering
## 807      Kathryn Female  86676   6.081             FALSE                Sales
## 808         Mary Female 115057   2.089             FALSE              Finance
## 809        Julie Female 145357   3.459             FALSE          Engineering
## 810       George   Male  36749  19.754             FALSE              Finance
## 811        Ralph   Male  89854   7.227             FALSE Business Development
## 812       Judith Female 134048   6.818              TRUE            Marketing
## 813         <NA>   Male 141311   5.478              <NA>              Product
## 814       Evelyn Female 123621  19.767              TRUE            Marketing
## 815       Rachel Female  54941   3.221              TRUE         Distribution
## 816        Maria   <NA> 106562       4             FALSE      Human Resources
## 817        Kelly Female  39371   4.068             FALSE          Engineering
## 818         Ruby Female  83112   4.083             FALSE            Marketing
## 819          Ann Female  96941  10.048              TRUE         Distribution
## 820      Kenneth   Male  47232  17.862              TRUE                Legal
## 821      William   Male  54058   5.182              TRUE      Human Resources
## 822         <NA> Female  80399  12.254              <NA>         Distribution
## 823      Deborah Female 118043   7.266              TRUE Business Development
## 824         <NA>   Male 109411   9.494              <NA>          Engineering
## 825        Julia Female  97566   2.147             FALSE            Marketing
## 826       Robert   <NA>  69267    5.89              TRUE                Sales
## 827         <NA>   <NA>  87103   5.665              <NA>                 <NA>
## 828        Jesse   <NA>  98811   7.487             FALSE                Legal
## 829      Cynthia Female 149684   7.864             FALSE              Product
## 830       Johnny   Male  71383   2.097              TRUE      Human Resources
## 831      Michael   Male  81206  19.908              TRUE         Distribution
## 832      Kenneth   Male  69112   7.588              TRUE              Finance
## 833        Keith   Male 120672  19.467             FALSE                Legal
## 834       Gerald   Male  96511   9.331             FALSE      Human Resources
## 835         Carl   <NA>  49325   2.071              TRUE Business Development
## 836      Douglas   Male 132175    2.28             FALSE          Engineering
## 837      Carolyn Female 118037  13.492             FALSE              Finance
## 838       Joseph   Male 139570  15.804              TRUE              Finance
## 839        Billy   Male 115280   9.153             FALSE         Distribution
## 840        Joyce Female  51065  16.807             FALSE      Human Resources
## 841      Lillian Female 103854   4.924              TRUE         Distribution
## 842         Ruby Female  48354  19.501             FALSE Business Development
## 843       Sandra Female 132327  19.264             FALSE              Product
## 844       Louise Female 106362   8.965             FALSE                Sales
## 845        Maria   <NA> 148857   8.738             FALSE                Legal
## 846       Brenda Female 131131  11.682             FALSE      Client Services
## 847      Stephen   Male 129663  15.574             FALSE      Human Resources
## 848       Nicole   <NA>  41449   4.707             FALSE              Finance
## 849        Bobby   Male  93368    2.88              TRUE              Product
## 850       Ernest   Male  53335   9.192             FALSE              Product
## 851      Charles   Male 148291   6.002             FALSE                 <NA>
## 852        Bobby   Male 147842  16.158              TRUE                 <NA>
## 853        Diana Female 105066  17.343              TRUE Business Development
## 854      Mildred Female 139284   11.39              TRUE                 <NA>
## 855        Harry   Male  63046  10.411             FALSE              Finance
## 856      Phillip   <NA>  89700   2.277              TRUE                 <NA>
## 857       Bonnie   <NA> 108946  12.211             FALSE              Finance
## 858        Marie Female  98406   7.943              TRUE              Product
## 859         Sean   Male 108581   9.634             FALSE      Human Resources
## 860        Robin Female  41808  19.239             FALSE Business Development
## 861      Phillip   Male  36837   14.66             FALSE            Marketing
## 862        Jerry   Male 140850  18.855             FALSE              Finance
## 863       Ronald   Male  50426  18.536              TRUE                Sales
## 864      Phillip   Male 134120   6.842             FALSE      Human Resources
## 865         Ryan   Male  57292    6.01             FALSE                 <NA>
## 866        Karen Female  80633  16.306             FALSE                Legal
## 867       Bonnie Female 131943  14.249             FALSE      Client Services
## 868         Earl   Male  48046   7.737             FALSE            Marketing
## 869    Katherine Female  97443  13.657             FALSE              Product
## 870      Matthew   Male 135352   7.986              TRUE Business Development
## 871      Cynthia   <NA> 107816  18.751             FALSE            Marketing
## 872       Gerald   Male  96329   2.469              TRUE          Engineering
## 873       Brenda Female  73749  19.332             FALSE Business Development
## 874        Jason   Male  75607   4.299              TRUE                Sales
## 875      Melissa Female  98858   3.525              TRUE      Human Resources
## 876      Beverly Female  76485   9.212              TRUE          Engineering
## 877        Terry   <NA>  41238   8.219             FALSE            Marketing
## 878         <NA>   Male 114896  13.823              <NA>      Client Services
## 879   Jacqueline Female 125418   8.064             FALSE         Distribution
## 880          Amy Female  75415  19.132             FALSE      Client Services
## 881       Robert   <NA>  90998   8.382             FALSE              Finance
## 882         Ruby Female 142868   6.318             FALSE            Marketing
## 883         Sara   <NA> 135990  14.344              TRUE         Distribution
## 884      Kathryn Female  57300  18.015             FALSE            Marketing
## 885         Lisa Female  73706   18.53             FALSE              Finance
## 886         Juan   Male  85871    9.16             FALSE              Product
## 887      Anthony   Male  96795  14.837             FALSE                Sales
## 888        David   Male  92242  15.407             FALSE                Legal
## 889      Marilyn Female 115149  11.934              TRUE                Legal
## 890     Margaret Female 126924   1.552              TRUE              Finance
## 891         <NA>   Male 145329     7.1              <NA>              Finance
## 892      Timothy   Male  92587   8.475             FALSE              Finance
## 893      Brandon   Male  60263   2.709             FALSE                Sales
## 894        Frank   Male  91406   5.681              TRUE Business Development
## 895        Betty Female  37005   7.645              TRUE            Marketing
## 896       Janice   <NA> 139791  16.968             FALSE Business Development
## 897       Victor   Male  45267   3.942              TRUE                Sales
## 898      Kenneth   <NA>  95296  10.146             FALSE              Finance
## 899         Jane Female  59680  15.211              TRUE Business Development
## 900       Walter   Male 144701  16.323              TRUE            Marketing
## 901    Christina Female  35477  18.178             FALSE      Human Resources
## 902     Patricia Female 119266   6.911             FALSE         Distribution
## 903         <NA>   Male 103877   6.322              <NA>         Distribution
## 904      Heather Female  47605  14.955              TRUE      Human Resources
## 905    Stephanie Female 136604    6.16              TRUE      Human Resources
## 906        Alice Female 121250   4.363              TRUE              Finance
## 907      Frances Female  35884  17.667             FALSE                Sales
## 908    Elizabeth Female 137144  10.081             FALSE              Finance
## 909       Janice Female 102697   3.283             FALSE          Engineering
## 910       Donald   Male  61999   6.466             FALSE              Product
## 911      Melissa Female  45223   8.879              TRUE                Legal
## 912         Carl   Male  54033  15.528              TRUE Business Development
## 913          Joe   Male 126120    1.02             FALSE                 <NA>
## 914         Lois Female  99747   6.168             FALSE            Marketing
## 915          Ann Female  71958   5.272              TRUE         Distribution
## 916         Todd   Male 115566   6.716              TRUE      Client Services
## 917      Marilyn Female 118369   7.696              TRUE Business Development
## 918        Shawn   Male  51667   6.339             FALSE              Product
## 919         Ryan   Male  85858  19.475             FALSE      Client Services
## 920         Sean   Male 131423   8.957             FALSE         Distribution
## 921         Rose Female  49538   9.828             FALSE      Client Services
## 922       George   Male  50369    18.9              TRUE                Sales
## 923       Arthur   Male  86615   3.412              TRUE Business Development
## 924        Irene   <NA> 135369    4.38             FALSE Business Development
## 925      Deborah Female  60003   9.624             FALSE      Client Services
## 926         <NA> Female  95866  19.388              <NA>                Sales
## 927       Judith Female 109324  19.488             FALSE         Distribution
## 928       Philip   Male 103557  16.014              TRUE Business Development
## 929      Jeffrey   Male 111376   2.673              TRUE Business Development
## 930      Theresa Female  75661   1.079              TRUE                Legal
## 931        Nancy Female  85213   2.386              TRUE            Marketing
## 932       Harold   Male 140444   3.771             FALSE         Distribution
## 933       Bonnie Female  90427    2.01              TRUE      Client Services
## 934        Doris Female 141439   3.799             FALSE Business Development
## 935       Samuel   Male  43694   3.787              TRUE          Engineering
## 936        Alice Female 131952   12.09             FALSE          Engineering
## 937        Maria Female  96250  10.056             FALSE Business Development
## 938        Aaron   <NA>  63126  18.424             FALSE      Client Services
## 939         Mark   <NA>  44836   2.657             FALSE      Client Services
## 940        Ralph   <NA>  70635   2.147             FALSE      Client Services
## 941       Andrew   Male 137386   8.611              TRUE         Distribution
## 942      William   Male 104840  15.653              TRUE          Engineering
## 943         Lori Female  75498   6.537              TRUE            Marketing
## 944        Wayne   Male  67471   2.728             FALSE          Engineering
## 945      Kenneth   Male 101914   1.905              TRUE         Distribution
## 946       Gerald   <NA>  93712  17.426              TRUE         Distribution
## 947         <NA> Female 133472  16.941              <NA>         Distribution
## 948         <NA>   Male 107351   5.329              <NA>            Marketing
## 949       Ashley Female 142410  11.048              TRUE          Engineering
## 950        Scott   Male  37385   8.226              TRUE Business Development
## 951        Paula Female  58423  10.833             FALSE Business Development
## 952         <NA> Female 143638   9.662              <NA>                 <NA>
## 953       Teresa Female 113425  11.907              TRUE      Human Resources
## 954        Randy   Male  57266  14.077             FALSE              Product
## 955          Joe   Male 119667   1.148              TRUE              Finance
## 956        Sarah Female 127118  11.176             FALSE                Legal
## 957      Beverly Female  80838   8.115             FALSE          Engineering
## 958      Jeffrey   Male  70990  15.901              TRUE                Sales
## 959       Gloria Female  39833   9.631             FALSE          Engineering
## 960       Albert   Male  45094    5.85              TRUE Business Development
## 961      Stephen   Male  93997  18.093              TRUE Business Development
## 962      Antonio   <NA> 103050    3.05             FALSE                Legal
## 963     Jonathan   Male 121797  16.923             FALSE              Product
## 964          Ann Female  89443   17.94              TRUE                Sales
## 965        Bruce   Male  35802  12.391              TRUE                Sales
## 966    Catherine Female  68164  18.393             FALSE      Client Services
## 967        Louis   Male  93022   9.146              TRUE      Human Resources
## 968       Thomas   Male 105681  19.572             FALSE          Engineering
## 969       Louise Female  43050  11.671             FALSE         Distribution
## 970        Linda Female  44486  17.308              TRUE          Engineering
## 971        Alice Female  63571  15.397              TRUE              Product
## 972      Patrick   Male  75423   5.368              TRUE Business Development
## 973       Victor   <NA>  76381  11.159              TRUE                Sales
## 974      Russell   Male 137359  11.105             FALSE Business Development
## 975        Harry   Male  67656  16.455              TRUE      Client Services
## 976        Susan Female  92436  12.467             FALSE                Sales
## 977       Denise Female 137954   4.195              TRUE                Legal
## 978        Sarah Female 124566   5.949             FALSE              Product
## 979         Sean   Male  66146  11.178             FALSE      Human Resources
## 980       Ernest   Male 142935  13.198              TRUE              Product
## 981     Kimberly Female  46233   8.862              TRUE          Engineering
## 982        James   Male 148985   19.28             FALSE                Legal
## 983         Rose Female  91411   8.639              TRUE      Human Resources
## 984         John   Male 146907  11.738             FALSE          Engineering
## 985        Maria Female  43455   13.04             FALSE          Engineering
## 986      Stephen   <NA>  85668   1.909             FALSE                Legal
## 987        Donna Female  82871  17.999             FALSE            Marketing
## 988       Gloria Female 136709  10.331              TRUE              Finance
## 989        Alice Female  47638  11.209             FALSE      Human Resources
## 990       Justin   <NA>  38344   3.794             FALSE                Legal
## 991        Robin Female 100765  10.982              TRUE      Client Services
## 992         Rose Female 134505  11.051              TRUE            Marketing
## 993      Anthony   Male 112769  11.625              TRUE              Finance
## 994         Tina Female  56450   19.04              TRUE          Engineering
## 995       George   Male  98874   4.479              TRUE            Marketing
## 996        Henry   <NA> 132483  16.655             FALSE         Distribution
## 997      Phillip   Male  42392  19.675             FALSE              Finance
## 998      Russell   Male  96914   1.421             FALSE              Product
## 999        Larry   Male  60500  11.985             FALSE Business Development
## 1000      Albert   Male 129949  10.169              TRUE                Sales
# Use `replace_with_na_all()` to replace with NA
replace_with_na_all(employees, condition = ~.x %in% c("", " ", "na", 'NaN', '?'))
## # A tibble: 1,000 x 6
##    First.Name Gender Salary Bonus.. Senior.Management Team                
##    <chr>      <chr>  <chr>  <chr>   <chr>             <chr>               
##  1 Douglas    Male   97308  6.945   TRUE              Marketing           
##  2 Thomas     Male   61933  <NA>    TRUE              <NA>                
##  3 Maria      Female 130590 11.858  FALSE             Finance             
##  4 Jerry      Male   <NA>   9.34    TRUE              Finance             
##  5 Larry      Male   101004 1.389   TRUE              Client Services     
##  6 Dennis     n.a.   115163 10.125  FALSE             Legal               
##  7 Ruby       Female 65476  10.012  TRUE              Product             
##  8 <NA>       Female 45906  11.598  <NA>              Finance             
##  9 Angela     <NA>   <NA>   18.523  TRUE              Engineering         
## 10 Frances    Female 139852 7.524   TRUE              Business Development
## # … with 990 more rows

Filling down missing values

Explicitly Missing: they are missing in the data and indicated with NA or something else.
Implicitly Missing: Not shown in the data, but implied (e.g. a missing level)

name <- c( 'jesse', 'jesse', 'jesse', 'jesse', 'andy',  'andy',  'andy',  'nic',   'nic',
           'dan',   'dan',   'alex', 'alex',  'alex',  'alex')
time <- c( 'morning', 'afternoon',  'evening', 'late_night', 'morning', 'afternoon',
           'late_night', 'afternoon', 'late_night', 'morning', 'evening', 'morning', 
           'afternoon',  'evening', 'late_night' )
value <- c(6678, 800060, 475528, 143533, 425115, 587468, 111000, 588532, 915533, 388148,
           180912, 552670,  98355, 266055, 121056)

frogger <- data.frame( 'name' = name, 'time' = time, 'value' = value )
glimpse( frogger )
## Rows: 15
## Columns: 3
## $ name  <chr> "jesse", "jesse", "jesse", "jesse", "andy", "andy", "andy", "ni…
## $ time  <chr> "morning", "afternoon", "evening", "late_night", "morning", "af…
## $ value <dbl> 6678, 800060, 475528, 143533, 425115, 587468, 111000, 588532, 9…
# Use `complete()` on the `time` and `name` variables to  
# make implicit missing values explicit
frogger_tidy <- frogger %>% complete(time, name)
frogger_tidy
## # A tibble: 20 x 3
##    time       name   value
##    <chr>      <chr>  <dbl>
##  1 afternoon  alex   98355
##  2 afternoon  andy  587468
##  3 afternoon  dan       NA
##  4 afternoon  jesse 800060
##  5 afternoon  nic   588532
##  6 evening    alex  266055
##  7 evening    andy      NA
##  8 evening    dan   180912
##  9 evening    jesse 475528
## 10 evening    nic       NA
## 11 late_night alex  121056
## 12 late_night andy  111000
## 13 late_night dan       NA
## 14 late_night jesse 143533
## 15 late_night nic   915533
## 16 morning    alex  552670
## 17 morning    andy  425115
## 18 morning    dan   388148
## 19 morning    jesse   6678
## 20 morning    nic       NA
# Use `fill()` to fill down the name variable in the frogger dataset
frogger %>% tidyr::fill(name)
##     name       time  value
## 1  jesse    morning   6678
## 2  jesse  afternoon 800060
## 3  jesse    evening 475528
## 4  jesse late_night 143533
## 5   andy    morning 425115
## 6   andy  afternoon 587468
## 7   andy late_night 111000
## 8    nic  afternoon 588532
## 9    nic late_night 915533
## 10   dan    morning 388148
## 11   dan    evening 180912
## 12  alex    morning 552670
## 13  alex  afternoon  98355
## 14  alex    evening 266055
## 15  alex late_night 121056
frogger %>% 
  fill(name) %>%
  complete(name,time)
## # A tibble: 20 x 3
##    name  time        value
##    <chr> <chr>       <dbl>
##  1 alex  afternoon   98355
##  2 alex  evening    266055
##  3 alex  late_night 121056
##  4 alex  morning    552670
##  5 andy  afternoon  587468
##  6 andy  evening        NA
##  7 andy  late_night 111000
##  8 andy  morning    425115
##  9 dan   afternoon      NA
## 10 dan   evening    180912
## 11 dan   late_night     NA
## 12 dan   morning    388148
## 13 jesse afternoon  800060
## 14 jesse evening    475528
## 15 jesse late_night 143533
## 16 jesse morning      6678
## 17 nic   afternoon  588532
## 18 nic   evening        NA
## 19 nic   late_night 915533
## 20 nic   morning        NA

Missing data dependence

  • MCAR: Missing Completely at Random.
    • missingness has no association with any data you have observed or not observed
    • Imputation is advisable
    • deleting observations may reduce the sample size, limiting inference, but will not bias.
  • MAR: Missing at Random
    • missingness depends on data observed, but not data unobserved
    • Should be imputing data
    • deletion is not advisable and may lead to bias
  • MNAR: Missing Not at Random
    • missingness of the response is related to an unobserved value relevant to the assessment of interest.
    • data will be biased by deletion and imputation
    • inference can be limited, proceed with caution
load( file='oceanbuoys.rda' )
glimpse( oceanbuoys )
## Rows: 736
## Columns: 8
## $ year       <dbl> 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997…
## $ latitude   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ longitude  <dbl> -110, -110, -110, -110, -110, -110, -110, -110, -110, -110…
## $ sea_temp_c <dbl> 27.59, 27.55, 27.57, 27.62, 27.65, 27.83, 28.01, 28.04, 28…
## $ air_temp_c <dbl> 27.15, 27.02, 27.00, 26.93, 26.84, 26.94, 27.04, 27.11, 27…
## $ humidity   <dbl> 79.6, 75.8, 76.5, 76.2, 76.4, 76.7, 76.5, 78.3, 78.6, 76.9…
## $ wind_ew    <dbl> -6.4, -5.3, -5.1, -4.9, -3.5, -4.4, -2.0, -3.7, -4.2, -3.6…
## $ wind_ns    <dbl> 5.4, 5.3, 4.5, 2.5, 4.1, 1.6, 3.5, 4.5, 5.0, 3.5, 2.9, 1.8…
# Arrange by year
oceanbuoys %>% arrange(year) %>% vis_miss()

# Arrange by latitude
oceanbuoys %>% arrange(latitude) %>% vis_miss()

# Arrange by wind_ew (wind east west)
oceanbuoys %>% arrange(wind_ew) %>% vis_miss()

gg_miss_var( oceanbuoys, facet = year)

Testing missing relationships

Tools to explore missing data dependence

as_shadow() to explore missingness

  • Coordinated names: shadow matrix inherits feature labels the ’_NA’
  • Clear Values: binary missing or !missing

bind_shadow() or nabular() to bind the shadow mat with the data == nabular data (a mix of NA and tabular data). This format is useful to do things like calculate summary statistics based on the missingness of a feature

airquality %>%
  bind_shadow() %>%
  group_by( Ozone_NA ) %>%
  summarise( mean = mean( Wind ) )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   Ozone_NA  mean
##   <fct>    <dbl>
## 1 !NA       9.86
## 2 NA       10.3

Create Nabular Data

# Create shadow matrix data with `as_shadow()`
obs <- as_shadow( oceanbuoys )
head( obs )
## # A tibble: 6 x 8
##   year_NA latitude_NA longitude_NA sea_temp_c_NA air_temp_c_NA humidity_NA
##   <fct>   <fct>       <fct>        <fct>         <fct>         <fct>      
## 1 !NA     !NA         !NA          !NA           !NA           !NA        
## 2 !NA     !NA         !NA          !NA           !NA           !NA        
## 3 !NA     !NA         !NA          !NA           !NA           !NA        
## 4 !NA     !NA         !NA          !NA           !NA           !NA        
## 5 !NA     !NA         !NA          !NA           !NA           !NA        
## 6 !NA     !NA         !NA          !NA           !NA           !NA        
## # … with 2 more variables: wind_ew_NA <fct>, wind_ns_NA <fct>
# Create nabular data by binding the shadow to the data with `bind_shadow()`
bob <- bind_shadow( oceanbuoys )
dim( bob )
## [1] 736  16
# Bind only the variables with missing values by using bind_shadow(only_miss = TRUE)
bob_om <- bind_shadow( oceanbuoys, only_miss = TRUE)
dim( bob_om )
## [1] 736  11

Use nabular data to calculate some summary statistics about other features:

# `bind_shadow()` and `group_by()` humidity missingness (`humidity_NA`)
oceanbuoys %>%
  bind_shadow() %>%
  group_by( humidity_NA ) %>% 
  summarize(wind_ew_mean = mean(wind_ew), # calculate mean of wind_ew
            wind_ew_sd = sd(wind_ew)) # calculate standard deviation of wind_ew
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 3
##   humidity_NA wind_ew_mean wind_ew_sd
##   <fct>              <dbl>      <dbl>
## 1 !NA                -3.78       1.90
## 2 NA                 -3.30       2.31
# Repeat this, but calculating summaries for wind north south (`wind_ns`).
oceanbuoys %>%
  bind_shadow() %>%
  group_by(humidity_NA) %>%
  summarize(wind_ns_mean = mean(wind_ns),
            wind_ns_sd = sd(wind_ns))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 3
##   humidity_NA wind_ns_mean wind_ns_sd
##   <fct>              <dbl>      <dbl>
## 1 !NA                 2.78       2.06
## 2 NA                  1.66       2.23

Add information about to summarize the missingness of a dataset

# How many NAs are in a feature?

# Summarize wind_ew by the missingness of `air_temp_c_NA`
oceanbuoys %>% 
  bind_shadow() %>%
  group_by(air_temp_c_NA) %>%
  summarize(wind_ew_mean = mean(wind_ew),
            wind_ew_sd = sd(wind_ew),
            n_obs = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
##   air_temp_c_NA wind_ew_mean wind_ew_sd n_obs
##   <fct>                <dbl>      <dbl> <int>
## 1 !NA                  -3.91       1.85   655
## 2 NA                   -2.17       2.14    81
# Summarize wind_ew by missingness of `air_temp_c_NA` and `humidity_NA`
oceanbuoys %>% 
  bind_shadow() %>%
  group_by(air_temp_c_NA, humidity_NA) %>%
  summarize(wind_ew_mean = mean(wind_ew),
            wind_ew_sd = sd(wind_ew),
            n_obs = n())
## `summarise()` regrouping output by 'air_temp_c_NA' (override with `.groups` argument)
## # A tibble: 4 x 5
## # Groups:   air_temp_c_NA [2]
##   air_temp_c_NA humidity_NA wind_ew_mean wind_ew_sd n_obs
##   <fct>         <fct>              <dbl>      <dbl> <int>
## 1 !NA           !NA                -4.01       1.74   565
## 2 !NA           NA                 -3.24       2.31    90
## 3 NA            !NA                -2.06       2.08    78
## 4 NA            NA                 -4.97       1.74     3

Visualizing missingness across one variable

Exploring conditional missings w/ggplot

  • How to use nabular data to explore how values change according to other values going missing
  • ggplot2 visualizations:
    • density plots
    • box plots
    • etc.
ggplot( airquality,
        aes( x = Temp ) ) +
  geom_density()

Create nabular data:

airquality %>%
  bind_shadow() %>%
  ggplot( aes( x = Temp,
               color = Ozone_NA ) ) +
  geom_density()

The values of Temperature do not change much when data for Ozone are present or NA

Here is a feceted versions:

airquality %>%
  bind_shadow() %>%
  ggplot( aes( x = Temp ) ) +
  geom_density() +
  facet_wrap( ~Ozone_NA )

Another look with facetted scatter plots. This gives an idea of how sparce NA data is compared to when the feature is present.

airquality %>%
  bind_shadow() %>%
  ggplot( aes( x = Temp,
               y = Wind ) ) +
  geom_point() +
  facet_wrap( ~Ozone_NA )

Can make the same point, perhaps more obvious, with a box plot.

airquality %>%
  bind_shadow() %>%
  ggplot( aes( x = Ozone_NA,
               y = Temp ) ) +
  geom_boxplot()

This shows how close the medians of the two distributions are.

Visualizing missingness with color:

airquality %>%
  bind_shadow() %>%
  ggplot( aes( x = Temp,
               y = Wind,
               color = Ozone_NA ) ) +
  geom_point()

Visualize the missingness of two features

airquality %>%
  bind_shadow() %>%
  ggplot( aes( x = Temp,
               color = Ozone_NA ) ) +
  geom_density() +
  facet_wrap( ~ Solar.R_NA )

There doesn’t appear to much much difference in the distributions of Temperature when Solar.R info is in a given record. However, when Solar.R is missing, the temperatures are low.

Now to take a look at oceanbuoys

# First explore the missingness structure of `oceanbuoys` using `vis_miss()`
vmob <- vis_miss(oceanbuoys) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

# Explore the distribution of `wind_ew` for the missingness  
# of `air_temp_c_NA` using  `geom_density()`
bsob <- bind_shadow(oceanbuoys) %>%
  ggplot(aes(x = wind_ew, 
             color = air_temp_c_NA)) + 
  geom_density()

# Explore the distribution of sea temperature for the  
# missingness of humidity (humidity_NA) using  `geom_density()`
bsob2 <- bind_shadow(oceanbuoys) %>%
  ggplot(aes(x = sea_temp_c,
             color = humidity_NA)) + 
  geom_density()

grid.arrange( vmob, bsob, bsob2, ncol = 1 )
## Warning: Removed 3 rows containing non-finite values (stat_density).

# Explore the distribution of wind east west (wind_ew) for the missingness of air temperature 
# using geom_density() and faceting by the missingness of air temperature (air_temp_c_NA).
ob1 <- oceanbuoys %>%
  bind_shadow() %>%
  ggplot(aes(x = wind_ew)) + 
  geom_density() + 
  facet_wrap(~air_temp_c_NA)

# Build upon this visualization by coloring by the missingness of humidity (humidity_NA).
ob2 <- oceanbuoys %>%
  bind_shadow() %>%
  ggplot(aes(x = wind_ew,
             color = humidity_NA)) + 
  geom_density() + 
  facet_wrap(~air_temp_c_NA)

grid.arrange( ob1, ob2, ncol = 1 )

# Explore the distribution of wind east west (`wind_ew`) for  
# the missingness of air temperature using  `geom_boxplot()`
ob1 <- oceanbuoys %>%
  bind_shadow() %>%
  ggplot(aes(x = air_temp_c_NA,
             y = wind_ew)) + 
  geom_boxplot()

# Build upon this visualization by faceting by the missingness of humidity (`humidity_NA`).
ob2 <- oceanbuoys %>%
  bind_shadow() %>%
  ggplot(aes(x = air_temp_c_NA,
             y = wind_ew)) + 
  geom_boxplot() + 
  facet_wrap(~humidity_NA)

grid.arrange( ob1, ob2, ncol = 1 )

Visualizing missingness across two variables

The problem with visualizing missing data in 2D (e.g. scatterplot) is that rows with missing values are removed. ggplot2 is very kind to give a warning when rows are dropped:

ggplot( airquality,
        aes( x = Ozone,
             y = Solar.R ) ) +
  geom_point()
## Warning: Removed 42 rows containing missing values (geom_point).

geom_miss_point() visualizes missing data by placing them in the margins of a figure

ggplot( airquality,
        aes( x = Ozone,
             y = Solar.R ) ) +
  geom_miss_point()

ggplot( airquality,
        aes( x = Wind,
             y = Ozone ) ) +
  geom_miss_point() +
  facet_wrap( ~ Month )

Visualize missingness with another missing variable

airquality %>%
  bind_shadow() %>%
  ggplot( aes( x = Wind,
               y = Ozone ) ) +
  geom_miss_point() +
  facet_wrap( ~ Solar.R_NA )

# Explore the missingness in wind and air temperature, and  
# display the missingness using `geom_miss_point()`
ob1 <- ggplot(oceanbuoys,
       aes(x = wind_ew,
           y = air_temp_c)) + 
  geom_miss_point()

# Explore the missingness in humidity and air temperature,  
# and display the missingness using `geom_miss_point()`
ob2 <- ggplot(oceanbuoys,
       aes(x = humidity,
           y = air_temp_c)) + 
  geom_miss_point()

grid.arrange( ob1, ob2, ncol = 2 )

# Explore the missingness in wind and air temperature, and display the 
# missingness using `geom_miss_point()`. Facet by year to explore this further.
ob1 <- ggplot(oceanbuoys,
       aes(x = wind_ew,
           y = air_temp_c)) + 
  geom_miss_point() + 
  facet_wrap(~year)

# Explore the missingness in humidity and air temperature, and display the 
# missingness using `geom_miss_point()` Facet by year to explore this further.
ob2 <- ggplot(oceanbuoys,
       aes(x = humidity,
           y = air_temp_c)) + 
  geom_miss_point() + 
  facet_wrap(~year)

grid.arrange( ob1, ob2, ncol = 1 )

# Use geom_miss_point() and facet_wrap to explore how the missingness  
# in wind_ew and air_temp_c is different for missingness of humidity
bind_shadow(oceanbuoys) %>%
  ggplot(aes(x = wind_ew,
           y = air_temp_c)) + 
  geom_miss_point() + 
  facet_wrap(~humidity_NA)

# Use geom_miss_point() and facet_grid to explore how the missingness in wind_ew and air_temp_c 
# is different for missingness of humidity AND by year - by using `facet_grid(humidity_NA ~ year)`
bind_shadow(oceanbuoys) %>%
  ggplot(aes(x = wind_ew,
             y = air_temp_c)) + 
  geom_miss_point() + 
  facet_grid(humidity_NA~year)

Connecting the dots (Imputation)

Filling in the blanks

Performing and tracking imputation

Using imputations to understand data structure. Visualizing and exploring imputed values

  • Imputing data to explore missingness
  • tracking missing values
  • visualize imputed values against data

impute_below() imputes below to minimum vaue in the variable

impute_below( c( 5,6,7,NA,9,10 ) )
## [1]  5.00000  6.00000  7.00000  4.40271  9.00000 10.00000

impute below to satisfy a conditional

summary( pedestrian )
##  hourly_counts       date_time                        year          month      
##  Min.   :    0.0   Min.   :2016-01-01 00:00:00   Min.   :2016   October: 5540  
##  1st Qu.:   72.0   1st Qu.:2016-04-08 04:00:00   1st Qu.:2016   January: 2976  
##  Median :  277.0   Median :2016-07-15 08:00:00   Median :2016   March  : 2976  
##  Mean   :  701.8   Mean   :2016-07-09 04:46:33   Mean   :2016   May    : 2976  
##  3rd Qu.:  878.0   3rd Qu.:2016-10-11 21:00:00   3rd Qu.:2016   July   : 2976  
##  Max.   :11273.0   Max.   :2016-12-31 23:00:00   Max.   :2016   August : 2976  
##  NA's   :2548                                                   (Other):17280  
##    month_day          week_day         hour        sensor_id    
##  Min.   : 1.00   Sunday   :5396   Min.   : 0.0   Min.   : 2.00  
##  1st Qu.: 8.00   Monday   :5376   1st Qu.: 6.0   1st Qu.: 2.00  
##  Median :16.00   Tuesday  :5328   Median :12.0   Median : 7.00  
##  Mean   :15.75   Wednesday:5328   Mean   :11.5   Mean   :11.15  
##  3rd Qu.:23.00   Thursday :5352   3rd Qu.:18.0   3rd Qu.:13.00  
##  Max.   :31.00   Friday   :5424   Max.   :23.0   Max.   :23.00  
##                  Saturday :5496                                 
##  sensor_name       
##  Length:37700      
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
pedestrian_imp <- impute_below_if( pedestrian, is.numeric )
summary( pedestrian_imp )
##  hourly_counts       date_time                        year          month      
##  Min.   :-1409.0   Min.   :2016-01-01 00:00:00   Min.   :2016   October: 5540  
##  1st Qu.:   44.0   1st Qu.:2016-04-08 04:00:00   1st Qu.:2016   January: 2976  
##  Median :  243.0   Median :2016-07-15 08:00:00   Median :2016   March  : 2976  
##  Mean   :  578.1   Mean   :2016-07-09 04:46:33   Mean   :2016   May    : 2976  
##  3rd Qu.:  804.0   3rd Qu.:2016-10-11 21:00:00   3rd Qu.:2016   July   : 2976  
##  Max.   :11273.0   Max.   :2016-12-31 23:00:00   Max.   :2016   August : 2976  
##                                                                 (Other):17280  
##    month_day          week_day         hour        sensor_id    
##  Min.   : 1.00   Sunday   :5396   Min.   : 0.0   Min.   : 2.00  
##  1st Qu.: 8.00   Monday   :5376   1st Qu.: 6.0   1st Qu.: 2.00  
##  Median :16.00   Tuesday  :5328   Median :12.0   Median : 7.00  
##  Mean   :15.75   Wednesday:5328   Mean   :11.5   Mean   :11.15  
##  3rd Qu.:23.00   Thursday :5352   3rd Qu.:18.0   3rd Qu.:13.00  
##  Max.   :31.00   Friday   :5424   Max.   :23.0   Max.   :23.00  
##                  Saturday :5496                                 
##  sensor_name       
##  Length:37700      
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 

Specify features to be imputed

glimpse( oceanbuoys )
## Rows: 736
## Columns: 8
## $ year       <dbl> 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997…
## $ latitude   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ longitude  <dbl> -110, -110, -110, -110, -110, -110, -110, -110, -110, -110…
## $ sea_temp_c <dbl> 27.59, 27.55, 27.57, 27.62, 27.65, 27.83, 28.01, 28.04, 28…
## $ air_temp_c <dbl> 27.15, 27.02, 27.00, 26.93, 26.84, 26.94, 27.04, 27.11, 27…
## $ humidity   <dbl> 79.6, 75.8, 76.5, 76.2, 76.4, 76.7, 76.5, 78.3, 78.6, 76.9…
## $ wind_ew    <dbl> -6.4, -5.3, -5.1, -4.9, -3.5, -4.4, -2.0, -3.7, -4.2, -3.6…
## $ wind_ns    <dbl> 5.4, 5.3, 4.5, 2.5, 4.1, 1.6, 3.5, 4.5, 5.0, 3.5, 2.9, 1.8…
miss_var_summary( oceanbuoys )
## # A tibble: 8 x 3
##   variable   n_miss pct_miss
##   <chr>       <int>    <dbl>
## 1 humidity       93   12.6  
## 2 air_temp_c     81   11.0  
## 3 sea_temp_c      3    0.408
## 4 year            0    0    
## 5 latitude        0    0    
## 6 longitude       0    0    
## 7 wind_ew         0    0    
## 8 wind_ns         0    0
oceanbuoys_imp <- impute_below_at( oceanbuoys, vars( humidity, air_temp_c, sea_temp_c ) )
miss_var_summary( oceanbuoys_imp )
## # A tibble: 8 x 3
##   variable   n_miss pct_miss
##   <chr>       <int>    <dbl>
## 1 year            0        0
## 2 latitude        0        0
## 3 longitude       0        0
## 4 sea_temp_c      0        0
## 5 air_temp_c      0        0
## 6 humidity        0        0
## 7 wind_ew         0        0
## 8 wind_ns         0        0
glimpse( dat_hw )
## Rows: 100
## Columns: 2
## $ weight <dbl> NA, 91.20470, 81.57915, 76.84886, 111.01731, 90.15135, 63.1424…
## $ height <dbl> 2.3881462, 1.0014508, NA, NA, -0.2412422, 2.5207375, 1.4016896…
dat_hw_imp <- impute_below_all( dat_hw )

ob1 <- ggplot(dat_hw_imp,
       aes(x = weight,
           y = height)) + 
  geom_point()

ob1

Tracking missing values can be handles by using bind_shadows():

dat_hw_imp <- bind_shadow( dat_hw ) %>% impute_below_all()
head( dat_hw_imp )
## # A tibble: 6 x 4
##   weight height weight_NA height_NA
##    <dbl>  <dbl> <fct>     <fct>    
## 1   40.3  2.39  NA        !NA      
## 2   91.2  1.00  !NA       !NA      
## 3   81.6 -1.51  !NA       NA       
## 4   76.8 -1.65  !NA       NA       
## 5  111.  -0.241 !NA       !NA      
## 6   90.2  2.52  !NA       !NA
aq_imp <- airquality %>%
  bind_shadow() %>%
  impute_below_all() %>%
  ggplot( aes( x = Ozone,
               fill = Ozone_NA ) ) +
  geom_histogram()
aq_imp
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Split the plot by missingness:

aq_imp <- airquality %>%
  bind_shadow() %>%
  impute_below_all() %>%
  ggplot( aes( x = Ozone,
               fill = Ozone_NA ) ) +
  geom_histogram() +
  facet_wrap( ~ Solar.R_NA )
aq_imp
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Visualize imputed value against data values using scatter plots:

aq_imp <- airquality %>%
  bind_shadow() %>%
  add_label_shadow() %>%
  impute_below_all() %>%
  ggplot( aes( x = Ozone,
               y = Solar.R,
               color = any_missing ) ) +
  geom_point()
aq_imp

This successfully recreates the figure rendered by a call to geom_miss_point()

# Impute the oceanbuoys data below the range using `impute_below`.
ocean_imp <- impute_below_all(oceanbuoys)

# Visualize the new missing values
ggplot(ocean_imp, 
       aes(x = wind_ew, y = air_temp_c)) +  
  geom_point()

# Impute and track data with `bind_shadow`, `impute_below_all`, and `add_label_shadow`
ocean_imp_track <-  bind_shadow(oceanbuoys) %>% impute_below_all() %>% add_label_shadow()

# Look at the imputed values
ggplot(ocean_imp_track, aes(x = wind_ew, y = air_temp_c, color = any_missing ) ) +
geom_point()

ocean_imp_track
## # A tibble: 736 x 17
##     year latitude longitude sea_temp_c air_temp_c humidity wind_ew wind_ns
##    <dbl>    <dbl>     <dbl>      <dbl>      <dbl>    <dbl>   <dbl>   <dbl>
##  1  1997        0      -110       27.6       27.1     79.6   -6.40    5.40
##  2  1997        0      -110       27.5       27.0     75.8   -5.30    5.30
##  3  1997        0      -110       27.6       27       76.5   -5.10    4.5 
##  4  1997        0      -110       27.6       26.9     76.2   -4.90    2.5 
##  5  1997        0      -110       27.6       26.8     76.4   -3.5     4.10
##  6  1997        0      -110       27.8       26.9     76.7   -4.40    1.60
##  7  1997        0      -110       28.0       27.0     76.5   -2       3.5 
##  8  1997        0      -110       28.0       27.1     78.3   -3.70    4.5 
##  9  1997        0      -110       28.0       27.2     78.6   -4.20    5   
## 10  1997        0      -110       28.0       27.2     76.9   -3.60    3.5 
## # … with 726 more rows, and 9 more variables: year_NA <fct>, latitude_NA <fct>,
## #   longitude_NA <fct>, sea_temp_c_NA <fct>, air_temp_c_NA <fct>,
## #   humidity_NA <fct>, wind_ew_NA <fct>, wind_ns_NA <fct>, any_missing <chr>
# Impute and track the missing values
ocean_imp_track <- bind_shadow(oceanbuoys) %>% 
  impute_below_all() %>% 
  add_label_shadow()

# Visualize the missingness in wind and air temperature,  
# coloring missing air temp values with air_temp_c_NA
ggplot(ocean_imp_track, 
       aes(x = wind_ew, y = air_temp_c, color = air_temp_c_NA)) + 
  geom_point()

# Visualize humidity and air temp, coloring any missing cases using the variable any_missing
ggplot(ocean_imp_track, 
       aes(x = humidity, y = air_temp_c, color = any_missing)) +  
  geom_point()

# Explore the values of air_temp_c, visualizing the amount of missings with `air_temp_c_NA`.
p <- ggplot(ocean_imp_track, aes(x = air_temp_c, fill = air_temp_c_NA)) +  geom_histogram()

# Expore the missings in humidity using humidity_NA
p2 <- ggplot(ocean_imp_track,  aes(x = humidity, fill = humidity_NA)) + geom_histogram()

# Explore the missings in air_temp_c according to year, using `facet_wrap(~year)`.
p + facet_wrap(~year)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Explore the missings in humidity according to year, using `facet_wrap(~year)`.
p2 + facet_wrap(~year)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

What makes good imputation?

To understand good imputations, let’s spend some time taking a look at bad imputations:
Imputation by the mean value of the data is particularly bad.

Imputing by the mean is bad because it artifucially increases the mean while decreasing the variance of the dataset as the following excersizes demonstrate:

aq_imp <- airquality %>%
  bind_shadow(only_miss = TRUE) %>% #bind only features with missing values
  add_label_shadow() %>%
  impute_mean_all() %>%
  ggplot( aes( x = Ozone_NA,
               y = Ozone ) ) +
  geom_boxplot()
## Warning: Problem with `mutate()` input `any_missing`.
## ℹ argument is not numeric or logical: returning NA
## ℹ Input `any_missing` is `(function (x) ...`.
## Warning in mean.default(x, na.rm = TRUE): argument is not numeric or logical:
## returning NA
aq_imp

The median is lower for the ‘not missing’ group than the NA group.

aq_imp <- airquality %>%
  bind_shadow(only_miss = TRUE) %>% #bind only features with missing values
  add_label_shadow() %>%
  impute_mean_all() %>%
  ggplot( aes( x = Ozone,
               y = Solar.R,
               color = any_missing ) ) +
  geom_point()
## Warning: Problem with `mutate()` input `any_missing`.
## ℹ argument is not numeric or logical: returning NA
## ℹ Input `any_missing` is `(function (x) ...`.
## Warning in mean.default(x, na.rm = TRUE): argument is not numeric or logical:
## returning NA
aq_imp

Exploring imputations for many variables:

aq_imp <- airquality %>%
  bind_shadow() %>% #bind only features with missing values
  impute_mean_all()

aq_imp_long <- shadow_long( aq_imp,
                            Ozone,
                            Solar.R )
head( aq_imp_long )
## # A tibble: 6 x 4
##   variable value variable_NA value_NA
##   <chr>    <dbl> <chr>       <chr>   
## 1 Ozone     41   Ozone_NA    !NA     
## 2 Ozone     36   Ozone_NA    !NA     
## 3 Ozone     12   Ozone_NA    !NA     
## 4 Ozone     18   Ozone_NA    !NA     
## 5 Ozone     42.1 Ozone_NA    NA      
## 6 Ozone     28   Ozone_NA    !NA

…and now to visualize:

ggplot( aq_imp_long,
        aes( x = value,
             fill = value_NA ) ) +
  geom_histogram() +
  facet_wrap( ~ variable )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Impute the mean value and track the imputations 
ocean_imp_mean <- bind_shadow(oceanbuoys) %>% 
  impute_mean_all() %>% 
  add_label_shadow()

# Explore the mean values in humidity in the imputed dataset
ggplot(ocean_imp_mean, 
       aes(x = humidity_NA, y = humidity)) + 
  geom_boxplot()

# Explore the values in air temperature in the imputed dataset
ggplot(ocean_imp_mean, 
       aes(x = air_temp_c_NA, y = air_temp_c)) + 
  geom_boxplot()

# Explore imputations in air temperature and humidity,  
# coloring by the variable, any_missing
ggplot(ocean_imp_mean, 
       aes(x = air_temp_c, y = humidity, color = any_missing)) + 
  geom_point()

# Explore imputations in air temperature and humidity,  
# coloring by the variable, any_missing, and faceting by year
ggplot(ocean_imp_mean, 
       aes(x = air_temp_c, y = humidity, color = any_missing)) + 
  geom_point() +  
  facet_wrap(~year)

# Gather the imputed data 
ocean_imp_mean_gather <- shadow_long(ocean_imp_mean,
                                     humidity,
                                     air_temp_c)
# Inspect the data
head( ocean_imp_mean_gather )
## # A tibble: 6 x 4
##   variable   value       variable_NA   value_NA
##   <chr>      <chr>       <chr>         <chr>   
## 1 air_temp_c 27.14999962 air_temp_c_NA !NA     
## 2 air_temp_c 27.02000046 air_temp_c_NA !NA     
## 3 air_temp_c 27          air_temp_c_NA !NA     
## 4 air_temp_c 26.93000031 air_temp_c_NA !NA     
## 5 air_temp_c 26.84000015 air_temp_c_NA !NA     
## 6 air_temp_c 26.94000053 air_temp_c_NA !NA
# Explore the imputations in a histogram 

ggplot( ocean_imp_mean_gather,
        aes( x = as.numeric(value),
             fill = value_NA ) ) +
  geom_histogram() +
  facet_wrap( ~variable )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Performing imputations

Imputing using a linear regression model.
Try simputation
Using impute_lm from simputation is a powerfl way to impute values for a dataset. However, the model used for imputation needs the same regorous evaluation that statistical model approaches demand.

oceanbuoys_lmimp <- bind_shadow( oceanbuoys, only_miss = TRUE ) %>%
  add_label_shadow() %>%
  impute_lm( humidity ~ air_temp_c + wind_ew )
head( oceanbuoys_lmimp )
## # A tibble: 6 x 12
##    year latitude longitude sea_temp_c air_temp_c humidity wind_ew wind_ns
##   <dbl>    <dbl>     <dbl>      <dbl>      <dbl>    <dbl>   <dbl>   <dbl>
## 1  1997        0      -110       27.6       27.1     79.6   -6.40    5.40
## 2  1997        0      -110       27.5       27.0     75.8   -5.30    5.30
## 3  1997        0      -110       27.6       27       76.5   -5.10    4.5 
## 4  1997        0      -110       27.6       26.9     76.2   -4.90    2.5 
## 5  1997        0      -110       27.6       26.8     76.4   -3.5     4.10
## 6  1997        0      -110       27.8       26.9     76.7   -4.40    1.60
## # … with 4 more variables: sea_temp_c_NA <fct>, air_temp_c_NA <fct>,
## #   humidity_NA <fct>, any_missing <chr>
airquality_type <- airquality %>% mutate( Solar.R = as.double( Solar.R ),
                                           Ozone = as.double( Ozone ) )
aq_imp_lm <- airquality_type %>% 
  bind_shadow() %>%
  add_label_shadow() %>%
  impute_lm( Solar.R ~ Wind + Temp + Month ) %>%
  impute_lm( Ozone ~ Wind + Temp + Month )
head( aq_imp_lm )
## # A tibble: 6 x 13
##   Ozone Solar.R  Wind  Temp Month   Day Ozone_NA Solar.R_NA Wind_NA Temp_NA
##   <dbl>   <dbl> <dbl> <int> <int> <int> <fct>    <fct>      <fct>   <fct>  
## 1 41       190    7.4    67     5     1 !NA      !NA        !NA     !NA    
## 2 36       118    8      72     5     2 !NA      !NA        !NA     !NA    
## 3 12       149   12.6    74     5     3 !NA      !NA        !NA     !NA    
## 4 18       313   11.5    62     5     4 !NA      !NA        !NA     !NA    
## 5 -9.04    138.  14.3    56     5     5 NA       NA         !NA     !NA    
## 6 28       178.  14.9    66     5     6 !NA      NA         !NA     !NA    
## # … with 3 more variables: Month_NA <fct>, Day_NA <fct>, any_missing <chr>

The bind_shadow() & add_labels_missings() methods are important for tracking which values were imputed. The imputed values for the last example are visualized below:

ggplot( aq_imp_lm,
        aes( x = Solar.R,
             y = Ozone,
             color = any_missing ) ) +
  geom_point() 

Build other linear model imputations and compare the results:

aq_imp_lm_small <- airquality_type %>% 
  bind_shadow() %>%
  add_label_shadow() %>%
  impute_lm( Solar.R ~ Wind + Temp ) %>%
  impute_lm( Ozone ~ Wind + Temp )

aq_imp_lm_large <- airquality_type %>% 
  bind_shadow() %>%
  add_label_shadow() %>%
  impute_lm( Solar.R ~ Wind + Temp + Month + Day ) %>%
  impute_lm( Ozone ~ Wind + Temp + Month + Day )

Bind the models, so that we can build visualizations:

bound_models <- bind_rows( small = aq_imp_lm_small,
                           medium = aq_imp_lm,
                           large = aq_imp_lm_large,
                           .id = 'imp_model' )
head( bound_models )
## # A tibble: 6 x 14
##   imp_model Ozone Solar.R  Wind  Temp Month   Day Ozone_NA Solar.R_NA Wind_NA
##   <chr>     <dbl>   <dbl> <dbl> <int> <int> <int> <fct>    <fct>      <fct>  
## 1 small      41      190    7.4    67     5     1 !NA      !NA        !NA    
## 2 small      36      118    8      72     5     2 !NA      !NA        !NA    
## 3 small      12      149   12.6    74     5     3 !NA      !NA        !NA    
## 4 small      18      313   11.5    62     5     4 !NA      !NA        !NA    
## 5 small     -11.7    127.  14.3    56     5     5 NA       NA         !NA    
## 6 small      28      160.  14.9    66     5     6 !NA      NA         !NA    
## # … with 4 more variables: Temp_NA <fct>, Month_NA <fct>, Day_NA <fct>,
## #   any_missing <chr>
bound_models_gather <- bound_models %>%
  select( Ozone, Solar.R, any_missing, imp_model ) %>%
  gather( key = 'variable', value = 'value', -any_missing, -imp_model )
head( bound_models_gather )
## # A tibble: 6 x 4
##   any_missing imp_model variable value
##   <chr>       <chr>     <chr>    <dbl>
## 1 Not Missing small     Ozone     41  
## 2 Not Missing small     Ozone     36  
## 3 Not Missing small     Ozone     12  
## 4 Not Missing small     Ozone     18  
## 5 Missing     small     Ozone    -11.7
## 6 Missing     small     Ozone     28
ggplot( bound_models_gather,
        aes( x = imp_model,
             y = value,
             color = imp_model ) ) +
  geom_boxplot() +
  facet_wrap( ~variable )

# Impute humidity and air temperature using wind_ew and wind_ns, and track missing values
ocean_imp_lm_wind <- oceanbuoys %>% 
    bind_shadow() %>%
    impute_lm(air_temp_c ~ wind_ew + wind_ns) %>% 
    impute_lm(humidity ~ wind_ew + wind_ns) %>%
    add_label_shadow()
    
# Plot the imputed values for air_temp_c and humidity, colored by missingness
ggplot(ocean_imp_lm_wind, 
       aes(x = air_temp_c, y = humidity, color = any_missing)) + 
  geom_point()

# Bind the models together 
bound_models <- bind_rows(mean = ocean_imp_mean,
                          lm_wind = ocean_imp_lm_wind,
                          .id = "imp_model")

# Inspect the values of air_temp and humidity as a scatter plot
ggplot(bound_models, 
       aes(x = air_temp_c, 
           y = humidity, 
           color = any_missing)) +
  geom_point() + 
  facet_wrap(~imp_model)

# Build a model adding year to the outcome
ocean_imp_lm_wind_year <- bind_shadow(oceanbuoys) %>%
  impute_lm(air_temp_c ~ wind_ew + wind_ns + year) %>%
  impute_lm(humidity ~ wind_ew + wind_ns + year) %>%
  add_label_shadow()

# Bind the mean, lm_wind, and lm_wind_year models together
bound_models <- bind_rows(mean = ocean_imp_mean,
                          lm_wind = ocean_imp_lm_wind,
                          lm_wind_year = ocean_imp_lm_wind_year,
                          .id = "imp_model")
bound_models$imp_model_f <- factor( bound_models$imp_model, levels = c('mean','lm_wind','lm_wind_year'))

# Explore air_temp and humidity, coloring by any missings, and faceting by imputation model
ggplot(bound_models, aes(x = air_temp_c, y = humidity, color = any_missing)) + 
  geom_point() + facet_wrap(~imp_model_f)

Evaluating imputations and models

Assessing inference from imputed data in a modelling context.

Compare the imputated data with a Complete case analysis (only uses rows with no missing values)

#Complete Case Analysis.
aq_cc <- airquality %>%
  na.omit() %>%
  bind_shadow() %>%
  add_label_shadow()
#dim( aq_cc )

#Impute the data with a linear model
aq_imp_lm <- bind_shadow( airquality_type ) %>%
  add_label_shadow() %>%
  impute_lm( Ozone ~ Temp + Wind + Month + Day ) %>%
  impute_lm( Solar.R ~ Temp + Wind + Month + Day )
#dim( aq_imp_lm )

#Bind the different datasets together
bound_models <- bind_rows( cc = aq_cc,
                           imp_lm = aq_imp_lm,
                           .id = 'imp_model' )
head( bound_models )
## # A tibble: 6 x 14
##   imp_model Ozone Solar.R  Wind  Temp Month   Day Ozone_NA Solar.R_NA Wind_NA
##   <chr>     <dbl>   <dbl> <dbl> <int> <int> <int> <fct>    <fct>      <fct>  
## 1 cc           41     190   7.4    67     5     1 !NA      !NA        !NA    
## 2 cc           36     118   8      72     5     2 !NA      !NA        !NA    
## 3 cc           12     149  12.6    74     5     3 !NA      !NA        !NA    
## 4 cc           18     313  11.5    62     5     4 !NA      !NA        !NA    
## 5 cc           23     299   8.6    65     5     7 !NA      !NA        !NA    
## 6 cc           19      99  13.8    59     5     8 !NA      !NA        !NA    
## # … with 4 more variables: Temp_NA <fct>, Month_NA <fct>, Day_NA <fct>,
## #   any_missing <chr>

Now that the data is formatted, fit a linear model to each of the datasets

model_summary <- bound_models %>%
  group_by( imp_model ) %>%
  nest() %>% #colapses the data such that each row represents a dataset
  mutate( mod = map( data,
                     ~lm( Temp ~ Ozone + Solar.R + Wind + Temp + Day + Month,
                          data = . ) ), #fit a linear model to each row
          res = map( mod, residuals ), #get the residuals
          pred = map( mod, predict ), #get a model prediction
          tidy = map( mod, broom::tidy ) ) #get the coefficients too
model_summary
## # A tibble: 2 x 6
## # Groups:   imp_model [2]
##   imp_model data                mod    res         pred        tidy            
##   <chr>     <list>              <list> <list>      <list>      <list>          
## 1 cc        <tibble [111 × 13]> <lm>   <dbl [111]> <dbl [111]> <tibble [6 × 5]>
## 2 imp_lm    <tibble [153 × 13]> <lm>   <dbl [153]> <dbl [153]> <tibble [6 × 5]>

Explore the results from both approaches to fit a linear model to the data (with & w/out imputation)

model_summary %>%
  select( imp_model,
          tidy ) %>%
  unnest(cols = c( tidy ) ) 
## # A tibble: 12 x 6
## # Groups:   imp_model [2]
##    imp_model term        estimate std.error statistic  p.value
##    <chr>     <chr>          <dbl>     <dbl>     <dbl>    <dbl>
##  1 cc        (Intercept) 57.3       4.50      12.7    5.52e-23
##  2 cc        Ozone        0.165     0.0239     6.92   3.66e-10
##  3 cc        Solar.R      0.0108    0.00699    1.55   1.24e- 1
##  4 cc        Wind        -0.174     0.212     -0.821  4.13e- 1
##  5 cc        Day         -0.0892    0.0677    -1.32   1.91e- 1
##  6 cc        Month        2.04      0.409      4.99   2.42e- 6
##  7 imp_lm    (Intercept) 54.7       3.59      15.2    5.21e-32
##  8 imp_lm    Ozone        0.196     0.0205     9.53   4.52e-17
##  9 imp_lm    Solar.R      0.0102    0.00577    1.76   7.97e- 2
## 10 imp_lm    Wind        -0.00642   0.172     -0.0374 9.70e- 1
## 11 imp_lm    Day         -0.112     0.0538    -2.08   3.92e- 2
## 12 imp_lm    Month        2.11      0.340      6.21   5.09e- 9
model_summary %>%
  select( imp_model,
          res ) %>%
  unnest(cols = c( res ) ) %>%
  ggplot( aes( x = res,
               fill = imp_model ) ) +
  geom_histogram( position = 'dodge' )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Explore the predictions in the data

model_summary %>%
  select( imp_model,
          pred ) %>%
  unnest(cols = c( pred ) ) %>%
  ggplot( aes( x = pred,
               fill = imp_model ) ) +
  geom_histogram( position = 'dodge' )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

and for the oceanbouys dataset

#Complete Case Analysis.
ocean_cc <- oceanbuoys %>%
  na.omit() %>%
  bind_shadow() %>%
  add_label_shadow()

# Create an imputed dataset using a linear models
ocean_imp_lm_all <- bind_shadow(oceanbuoys) %>%
  add_label_shadow() %>%
  impute_lm(sea_temp_c ~ wind_ew + wind_ns + year + latitude + longitude) %>%
  impute_lm(air_temp_c ~ wind_ew + wind_ns + year + latitude + longitude) %>%
  impute_lm(humidity ~ wind_ew + wind_ns + year + latitude + longitude)

# Bind the datasets
bound_models <- bind_rows(cc = ocean_cc,
                          imp_lm_wind = ocean_imp_lm_wind,
                          imp_lm_all = ocean_imp_lm_all,
                          .id = "imp_model")
# Look at the models
glimpse( bound_models )
## Rows: 2,037
## Columns: 18
## $ imp_model     <chr> "cc", "cc", "cc", "cc", "cc", "cc", "cc", "cc", "cc", "…
## $ year          <dbl> 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1…
## $ latitude      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ longitude     <dbl> -110, -110, -110, -110, -110, -110, -110, -110, -110, -…
## $ sea_temp_c    <dbl> 27.59, 27.55, 27.57, 27.62, 27.65, 27.83, 28.01, 28.04,…
## $ air_temp_c    <dbl> 27.15, 27.02, 27.00, 26.93, 26.84, 26.94, 27.04, 27.11,…
## $ humidity      <dbl> 79.6, 75.8, 76.5, 76.2, 76.4, 76.7, 76.5, 78.3, 78.6, 7…
## $ wind_ew       <dbl> -6.4, -5.3, -5.1, -4.9, -3.5, -4.4, -2.0, -3.7, -4.2, -…
## $ wind_ns       <dbl> 5.4, 5.3, 4.5, 2.5, 4.1, 1.6, 3.5, 4.5, 5.0, 3.5, 2.9, …
## $ year_NA       <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ latitude_NA   <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ longitude_NA  <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ sea_temp_c_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ air_temp_c_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ humidity_NA   <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ wind_ew_NA    <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ wind_ns_NA    <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ any_missing   <chr> "Not Missing", "Not Missing", "Not Missing", "Not Missi…
# Create the model summary for each dataset
model_summary <- bound_models %>% 
  group_by(imp_model) %>%
  nest() %>%
  mutate(mod = map(data, ~lm(sea_temp_c ~ air_temp_c + humidity + year, data = .)),
         res = map(mod, residuals),
         pred = map(mod, predict),
         tidy = map(mod, broom::tidy ))

# Explore the coefficients in the model
model_summary %>% 
    select(imp_model,tidy) %>% 
    unnest(cols = c( tidy ))
## # A tibble: 12 x 6
## # Groups:   imp_model [3]
##    imp_model   term          estimate std.error statistic   p.value
##    <chr>       <chr>            <dbl>     <dbl>     <dbl>     <dbl>
##  1 cc          (Intercept)  -735.      45.9        -16.0  8.19e- 48
##  2 cc          air_temp_c      0.864    0.0231      37.4  2.64e-154
##  3 cc          humidity        0.0341   0.00390      8.74 2.69e- 17
##  4 cc          year            0.369    0.0232      15.9  3.46e- 47
##  5 imp_lm_wind (Intercept) -1742.      56.1        -31.0  1.83e-135
##  6 imp_lm_wind air_temp_c      0.365    0.0279      13.1  2.73e- 35
##  7 imp_lm_wind humidity        0.0225   0.00690      3.26 1.17e-  3
##  8 imp_lm_wind year            0.880    0.0283      31.1  6.79e-136
##  9 imp_lm_all  (Intercept)  -697.      51.8        -13.5  5.04e- 37
## 10 imp_lm_all  air_temp_c      0.890    0.0255      35.0  2.90e-158
## 11 imp_lm_all  humidity        0.0127   0.00463      2.75 6.03e-  3
## 12 imp_lm_all  year            0.351    0.0262      13.4  1.12e- 36

The imp_lm_all model gives the highest estimate for air_temp_c

Final Lesson

Some Other Datasets to play with

ozoneNA_url <- 'https://raw.githubusercontent.com/njtierney/user2018-missing-data-tutorial/master/ozoneNA.csv'
ecological_url <- 'https://raw.githubusercontent.com/njtierney/user2018-missing-data-tutorial/master/ecological.csv'