library( dplyr )
library( ggplot2 )
library( gridExtra )
library( tidyverse )
library( naniar )
library( simputation )
Why care about missing data?
Introduction to missing data
“The best thing to do with missing data is to not have any.” -Gertrude M. Cox
- Working with real-world data == working with missing data
- Missing Data can have unexpected effects on your analysis
- Bad imputation can lead to poor estimates and decisions
Checking for missing values with any_na()
x <- c( 1, NA, 3, NA, NA, 5 )
baset0 <- Sys.time()
any( is.na( x ) )
## [1] TRUE
timeint_base <- Sys.time() - baset0
naniart0 <- Sys.time()
any_na( x )
## [1] TRUE
timeint_naniar <- Sys.time() - naniart0
base2t0 <- Sys.time()
anyNA( x )
## [1] TRUE
timeint_base2 <- Sys.time() - base2t0
res <- paste( 'Comparing runtime for methods to detect NA:\nbase R runtime:', timeint_base,
'\nanyNA() base r runtime', timeint_base2,
'\nany_na() naniar runtime:', timeint_naniar)
cat( res, sep = '\n')
## Comparing runtime for methods to detect NA:
## base R runtime: 0.00362491607666016
## anyNA() base r runtime 0.00226187705993652
## any_na() naniar runtime: 0.00248432159423828
#return a boolean vector that tests for NAs
are_na( x )
## [1] FALSE TRUE FALSE TRUE TRUE FALSE
#return the number of NAs
n_miss( x )
## [1] 3
#return the proportion of NAs
prop_miss( x )
## [1] 0.5
Generally, operations with NA values returns an NA value:
heights <- data.frame( 'Sophie' = 165, 'Dan' = 177, 'Fred' = NA )
sum( heights )
## [1] NA
Important Distinctions:
NaN== Not a Number. is evaluated the same asNANULL== empty. is Not the same asNAInf== Infinity. is Not the same asNA
r1 <- any_na( NaN )
r2 <- any_na( NULL )
r3 <- any_na( Inf )
r4 <- any_na( 0 )
res <- paste( 'When tested any_na()::\nNaN evaluates:', r1,
'\nNULL evaluates:', r2,
'\nInf evaluates:', r3,
'\n0 evaluates:', r4 )
cat( res, sep='\n' )
## When tested any_na()::
## NaN evaluates: TRUE
## NULL evaluates: FALSE
## Inf evaluates: FALSE
## 0 evaluates: FALSE
Conditional Statement Behaviors to look out for:
r1 <- NA | TRUE
r2 <- NA | FALSE
r3 <- NA | NaN
r4 <- NaN | NA
res <- paste( 'Conditional Statement Behaviors to be aware of::\nNA | TRUE evaluates:', r1,
'\nNA | FALSE evaluates:', r2,
'\nNA | NaN evaluates:', r3,
'\nNaN | NA evaluates:', r4 )
cat( res, sep='\n' )
## Conditional Statement Behaviors to be aware of::
## NA | TRUE evaluates: TRUE
## NA | FALSE evaluates: NA
## NA | NaN evaluates: NA
## NaN | NA evaluates: NA
# Create x, a vector, with values NA, NaN, Inf, ".", and "missing"
x <- c(NA, NaN, Inf, ".", "missing")
# Use any_na() and are_na() on to explore the missings
any_na(x)
## [1] TRUE
are_na(x)
## [1] TRUE FALSE FALSE FALSE FALSE
dat_hw_url <- 'https://raw.githubusercontent.com/SmilodonCub/ReadingLearningTinkering/master/DataCamp/Statistics_with_R/dat_hw.csv'
dat_hw <- read.csv( dat_hw_url ) %>%
select( -X )
head( dat_hw )
## weight height
## 1 NA 2.3881462
## 2 91.20470 1.0014508
## 3 81.57915 NA
## 4 76.84886 NA
## 5 111.01731 -0.2412422
## 6 90.15135 2.5207375
# Use n_miss() to count the total number of missing values in dat_hw
n_miss(dat_hw)
## [1] 30
# Use n_miss() on dat_hw$weight to count the total number of missing values
n_miss(dat_hw$weight)
## [1] 15
# Use n_complete() on dat_hw to count the total number of complete values
n_complete(dat_hw)
## [1] 170
# Use n_complete() on dat_hw$weight to count the total number of complete values
n_complete(dat_hw$weight)
## [1] 85
# Use prop_miss() and prop_complete() on dat_hw to count the total number of missing values in each of the variables
prop_miss(dat_hw)
## [1] 0.15
prop_complete(dat_hw)
## [1] 0.85
Why care about missing values?
Introduction to missingness summaries
Basic summary missingness:
n_miss( x )
## [1] 1
n_complete( x )
## [1] 4
Dataframe summaries of missingness:
miss_var_summary(): summarize the number of missing in each variable/feature/column
miss_x_cols <- miss_var_summary( dat_hw )
glimpse( miss_x_cols )
## Rows: 2
## Columns: 3
## $ variable <chr> "weight", "height"
## $ n_miss <int> 15, 15
## $ pct_miss <dbl> 15, 15
miss_x_cols <- miss_var_summary( airquality )
glimpse( miss_x_cols )
## Rows: 6
## Columns: 3
## $ variable <chr> "Ozone", "Solar.R", "Wind", "Temp", "Month", "Day"
## $ n_miss <int> 37, 7, 0, 0, 0, 0
## $ pct_miss <dbl> 24.183007, 4.575163, 0.000000, 0.000000, 0.000000, 0.000000
miss_case_summary: each case is a row in the dataframe. info on missing values by row.
dim( miss_case_summary( dat_hw ) )
## [1] 100 3
head( miss_case_summary( airquality ) )
## # A tibble: 6 x 3
## case n_miss pct_miss
## <int> <int> <dbl>
## 1 5 2 33.3
## 2 27 2 33.3
## 3 6 1 16.7
## 4 10 1 16.7
## 5 11 1 16.7
## 6 25 1 16.7
Missing Data Tabulations:
miss_var_table() returns a dataframe with info on the variables missing data as well as the percentage of variables affected by missing data
miss_var_table( dat_hw )
## # A tibble: 1 x 3
## n_miss_in_var n_vars pct_vars
## <int> <int> <dbl>
## 1 15 2 100
can be interpretted as: 2 variables are missing 15 observations each. 100% of the variables in the dataframe are affected this way
miss_var_table( airquality )
## # A tibble: 3 x 3
## n_miss_in_var n_vars pct_vars
## <int> <int> <dbl>
## 1 0 4 66.7
## 2 7 1 16.7
## 3 37 1 16.7
can be interpretted as: 66.6% of the features in this dataframe (total of 4 features) are missing 0 observations. One variables (16.6% of features) is missing 7 observations while another variable (16.6% of features) is missing 37 observations.
miss_case_table(): returns the same information but by cases (rows)
miss_case_table( dat_hw )
## # A tibble: 2 x 3
## n_miss_in_case n_cases pct_cases
## <int> <int> <dbl>
## 1 0 70 70
## 2 1 30 30
can be interpretted as: 70% of rows (70 rows) are missing 0 observations. 30% of rows (30 rows) are missing 1 observation.
miss_case_table( airquality )
## # A tibble: 3 x 3
## n_miss_in_case n_cases pct_cases
## <int> <int> <dbl>
## 1 0 111 72.5
## 2 1 40 26.1
## 3 2 2 1.31
can be interpretted as: 72.5% of rows (111 rows) are missing 0 observations. 26.1% of rows (40 rows) are missing 1 observation. 1.3% or rows (2 rows) are missing 2 observations.
Other useful functions:
miss_var_span()summarizes missing data by span of data (good for time series analysis e.g. weekly spans of 7)miss_var_run()summarizes runs of missing data. good for finding unusual patterns of missing data. returns runs of complete and missing data. great for sinding systemic sampling error.
Using summaries with group_by():
airquality %>%
group_by( Month ) %>%
miss_var_summary()
## # A tibble: 25 x 4
## # Groups: Month [5]
## Month variable n_miss pct_miss
## <int> <chr> <int> <dbl>
## 1 5 Ozone 5 16.1
## 2 5 Solar.R 4 12.9
## 3 5 Wind 0 0
## 4 5 Temp 0 0
## 5 5 Day 0 0
## 6 6 Ozone 21 70
## 7 6 Solar.R 0 0
## 8 6 Wind 0 0
## 9 6 Temp 0 0
## 10 6 Day 0 0
## # … with 15 more rows
glimpse( pedestrian )
## Rows: 37,700
## Columns: 9
## $ hourly_counts <int> 883, 597, 294, 183, 118, 68, 47, 52, 120, 333, 761, 135…
## $ date_time <dttm> 2016-01-01 00:00:00, 2016-01-01 01:00:00, 2016-01-01 0…
## $ year <int> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2…
## $ month <ord> January, January, January, January, January, January, J…
## $ month_day <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ week_day <ord> Friday, Friday, Friday, Friday, Friday, Friday, Friday,…
## $ hour <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ sensor_id <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ sensor_name <chr> "Bourke Street Mall (South)", "Bourke Street Mall (Sout…
miss_var_table( pedestrian )
## # A tibble: 2 x 3
## n_miss_in_var n_vars pct_vars
## <int> <int> <dbl>
## 1 0 8 88.9
## 2 2548 1 11.1
# Calculate the summaries for each run of missingness for the variable, hourly_counts
miss_var_run(pedestrian, var = hourly_counts)
## # A tibble: 35 x 2
## run_length is_na
## <int> <chr>
## 1 6628 complete
## 2 1 missing
## 3 5250 complete
## 4 624 missing
## 5 3652 complete
## 6 1 missing
## 7 1290 complete
## 8 744 missing
## 9 7420 complete
## 10 1 missing
## # … with 25 more rows
# Calculate the summaries for each span of missingness,
# for a span of 4000, for the variable hourly_counts
miss_var_span(pedestrian, var = hourly_counts, span_every = 4000)
## # A tibble: 10 x 5
## span_counter n_miss n_complete prop_miss prop_complete
## <int> <int> <dbl> <dbl> <dbl>
## 1 1 0 4000 0 1
## 2 2 1 3999 0.00025 1.00
## 3 3 121 3879 0.0302 0.970
## 4 4 503 3497 0.126 0.874
## 5 5 745 3255 0.186 0.814
## 6 6 0 4000 0 1
## 7 7 1 3999 0.00025 1.00
## 8 8 0 4000 0 1
## 9 9 745 3255 0.186 0.814
## 10 10 432 3568 0.108 0.892
# For each `month` variable, calculate the run of missingness for hourly_counts
pedestrian %>% group_by(month) %>% miss_var_run(hourly_counts)
## # A tibble: 51 x 3
## # Groups: month [12]
## month run_length is_na
## <ord> <int> <chr>
## 1 January 2976 complete
## 2 February 2784 complete
## 3 March 2976 complete
## 4 April 888 complete
## 5 April 552 missing
## 6 April 1440 complete
## 7 May 744 complete
## 8 May 72 missing
## 9 May 2160 complete
## 10 June 2880 complete
## # … with 41 more rows
# For each `month` variable, calculate the span of missingness
# of a span of 2000, for the variable hourly_counts
pedestrian %>% group_by(month) %>% miss_var_span(var = hourly_counts, span_every = 2000)
## # A tibble: 25 x 6
## # Groups: month [12]
## month span_counter n_miss n_complete prop_miss prop_complete
## <ord> <int> <int> <dbl> <dbl> <dbl>
## 1 January 1 0 2000 0 1
## 2 January 2 0 2000 0 1
## 3 February 1 0 2000 0 1
## 4 February 2 0 2000 0 1
## 5 March 1 0 2000 0 1
## 6 March 2 0 2000 0 1
## 7 April 1 552 1448 0.276 0.724
## 8 April 2 0 2000 0 1
## 9 May 1 72 1928 0.036 0.964
## 10 May 2 0 2000 0 1
## # … with 15 more rows
How do we visual missing values?
naniar missing data visualization methods.
Overview of missingness: a type of heatmap for missing data. black == missing. also provides basic stats of proportions of missingness.
vis_miss( airquality )
vis_miss( dat_hw )
vis_miss( airquality )
vis_miss( airquality, cluster = TRUE)
vis_miss( dat_hw, cluster = TRUE)
Looking at missing observations in both variables and cases
varp <- gg_miss_var( airquality )
casep <- gg_miss_case( airquality )
grid.arrange( varp, casep, ncol = 2 )
varp <- gg_miss_var( dat_hw )
casep <- gg_miss_case( dat_hw )
grid.arrange( varp, casep, ncol = 2 )
faceting a gg_miss_var() plot acts like visualizing a group_by
gg_miss_var( airquality, facet = Month )
Visualizing missingness patterns
gg_miss_upset() shows co-occuring missing observations across variables
gg_miss_upset( airquality )
gg_miss_fct(): visualizing factors for missingness. again, kinda like visualizing missing group_by result. Gives a heat map view for each feature (y-axis) and each montt (x-axis) where color intensity is the number of missing observations.
gg_miss_fct( x = airquality, fct = Month )
gg_miss_span() visualizes the number of missing observations for a given span.
gg_miss_span( pedestrian, hourly_counts, span_every = 3000)
# Visualize all of the missingness in the `riskfactors` dataset
vm <- vis_miss(riskfactors) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
# Visualize and cluster all of the missingness in the `riskfactors` dataset
vmc <-vis_miss(riskfactors, cluster = TRUE) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
# visualize and sort the columns by missingness in the `riskfactors` dataset
vms <- vis_miss(riskfactors, sort_miss = TRUE) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
# visualize cluster and sort the columns by missingness in the `riskfactors` dataset
vmcs <- vis_miss(riskfactors, sort_miss = TRUE, cluster = TRUE ) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
grid.arrange( vm, vmc, vms, vmcs, ncol = 2 )
# Visualize the number of missings in cases using `gg_miss_case()`
caserf <- gg_miss_case(riskfactors)
# Explore the number of missings in cases using `gg_miss_case()`
# and facet by the variable `education`
fct_caserf <- gg_miss_case(riskfactors, facet = education)
grid.arrange( caserf, fct_caserf, ncol = 2 )
# Visualize the number of missings in variables using `gg_miss_var()`
varrf <- gg_miss_var(riskfactors)
# Explore the number of missings in variables using `gg_miss_var()`
# and facet by the variable `education`
fct_varrf <- gg_miss_var(riskfactors, facet = education)
grid.arrange( varrf, fct_varrf, ncol = 2 )
# With the riskfactors dataset, explore how the missingness changes across the marital variable using gg_miss_fct()
gg_miss_fct(x = riskfactors, fct = marital)
# Using the pedestrian dataset, explore how the missingness of hourly_counts changes over a span of 3000
gg_miss_span(pedestrian, var = hourly_counts, span_every = 3000)
# Using the pedestrian dataset, explore the impact of month by faceting by month
# and explore how missingness changes for a span of 1000
gg_miss_span(pedestrian, var = hourly_counts , span_every = 1000, facet = month)
Wrangling and tidying up missing values.
Searching for an replacing missing values
Assumptions with missing data: finding missing values and labelling with NA
In a perfect word, missing data is labelled NA. However, it may be the case that it is coded as ‘missing’, ‘Not Available’, ‘N/A’ or some other permutation.
miss_search_count(): Searching for missing values
employees_url <- 'https://raw.githubusercontent.com/ChaitanyaBaweja/Programming-Tutorials/master/Missing-Data-Pandas/employees.csv'
employees <- read.csv( employees_url )
glimpse( employees )
## Rows: 1,000
## Columns: 6
## $ First.Name <chr> "Douglas", "Thomas", "Maria", "Jerry", "Larry", "De…
## $ Gender <chr> "Male", "Male", "Female", "Male", "Male", "n.a.", "…
## $ Salary <chr> "97308", "61933", "130590", NA, "101004", "115163",…
## $ Bonus.. <chr> "6.945", "NaN", "11.858", "9.34", "1.389", "10.125"…
## $ Senior.Management <chr> "TRUE", "TRUE", "FALSE", "TRUE", "TRUE", "FALSE", "…
## $ Team <chr> "Marketing", "", "Finance", "Finance", "Client Serv…
#explicitly search for strange NA mislabellings
employees %>%
miss_scan_count( search = list('n.a', 'na') )
## # A tibble: 6 x 2
## Variable n
## <chr> <int>
## 1 First.Name 58
## 2 Gender 1
## 3 Salary 1
## 4 Bonus.. 0
## 5 Senior.Management 2
## 6 Team 106
employees %>%
miss_scan_count( search = common_na_strings )
## # A tibble: 6 x 2
## Variable n
## <chr> <int>
## 1 First.Name 998
## 2 Gender 999
## 3 Salary 999
## 4 Bonus.. 1000
## 5 Senior.Management 1000
## 6 Team 1000
print( common_na_strings )
## [1] "NA" "N A" "N/A" "NA " " NA" "N /A" "N / A" " N / A"
## [9] "N / A " "na" "n a" "n/a" "na " " na" "n /a" "n / a"
## [17] " a / a" "n / a " "NULL" "null" "" "\\?" "\\*" "\\."
replace_with_na(): replace specified values with NA
replace_with_na_all(): all variablesreplace_with_na_at(): a subset of selected variablesreplace_with_na_if(): a subset of variables that fulfill some condition
#replace all instances of 'N/A' and 'N/a' from the feature `grade`
employees %>%
replace_with_na( replace = list( Team = c('n.a', 'na') ) ) %>%
miss_scan_count( search = common_na_strings )
## # A tibble: 6 x 2
## Variable n
## <chr> <int>
## 1 First.Name 998
## 2 Gender 999
## 3 Salary 999
## 4 Bonus.. 1000
## 5 Senior.Management 1000
## 6 Team 997
#replace any instance of -99 with `NA`
employees %>%
replace_with_na_all( condition = ~.x == "" ) %>%
miss_scan_count( search = common_na_strings )
## # A tibble: 6 x 2
## Variable n
## <chr> <int>
## 1 First.Name 931
## 2 Gender 852
## 3 Salary 998
## 4 Bonus.. 1000
## 5 Senior.Management 933
## 6 Team 957
#replace multiple with `NA`
employees %>%
replace_with_na_all( condition = ~.x %in% common_na_strings ) %>%
miss_scan_count( search = common_na_strings )
## # A tibble: 6 x 2
## Variable n
## <chr> <int>
## 1 First.Name 931
## 2 Gender 852
## 3 Salary 998
## 4 Bonus.. 1000
## 5 Senior.Management 933
## 6 Team 954
# Explore the strange missing values "N/A"
miss_scan_count(data = pedestrian, search = list("N/A") )
## # A tibble: 9 x 2
## Variable n
## <chr> <int>
## 1 hourly_counts 0
## 2 date_time 0
## 3 year 0
## 4 month 0
## 5 month_day 0
## 6 week_day 0
## 7 hour 0
## 8 sensor_id 0
## 9 sensor_name 0
# Explore the strange missing values "missing"
miss_scan_count(data = pedestrian, search = list("missing") )
## # A tibble: 9 x 2
## Variable n
## <chr> <int>
## 1 hourly_counts 0
## 2 date_time 0
## 3 year 0
## 4 month 0
## 5 month_day 0
## 6 week_day 0
## 7 hour 0
## 8 sensor_id 0
## 9 sensor_name 0
# Explore the strange missing values "na"
miss_scan_count(data = pedestrian, search = list('na' ) )
## # A tibble: 9 x 2
## Variable n
## <chr> <int>
## 1 hourly_counts 0
## 2 date_time 0
## 3 year 0
## 4 month 0
## 5 month_day 0
## 6 week_day 0
## 7 hour 0
## 8 sensor_id 0
## 9 sensor_name 0
# Explore the strange missing values " " (a single space)
miss_scan_count(data = pedestrian, search = list(' ') )
## # A tibble: 9 x 2
## Variable n
## <chr> <int>
## 1 hourly_counts 0
## 2 date_time 37700
## 3 year 0
## 4 month 0
## 5 month_day 0
## 6 week_day 0
## 7 hour 0
## 8 sensor_id 0
## 9 sensor_name 37700
# Explore all of the strange missing values, "N/A", "missing", "na", " "
miss_scan_count(data = pedestrian, search = list("N/A", "missing", "na", " "))
## # A tibble: 9 x 2
## Variable n
## <chr> <int>
## 1 hourly_counts 0
## 2 date_time 37700
## 3 year 0
## 4 month 0
## 5 month_day 0
## 6 week_day 0
## 7 hour 0
## 8 sensor_id 0
## 9 sensor_name 37700
# Print the top of the pacman data using `head()`
glimpse(employees)
## Rows: 1,000
## Columns: 6
## $ First.Name <chr> "Douglas", "Thomas", "Maria", "Jerry", "Larry", "De…
## $ Gender <chr> "Male", "Male", "Female", "Male", "Male", "n.a.", "…
## $ Salary <chr> "97308", "61933", "130590", NA, "101004", "115163",…
## $ Bonus.. <chr> "6.945", "NaN", "11.858", "9.34", "1.389", "10.125"…
## $ Senior.Management <chr> "TRUE", "TRUE", "FALSE", "TRUE", "TRUE", "FALSE", "…
## $ Team <chr> "Marketing", "", "Finance", "Finance", "Client Serv…
# Replace the strange missing values "N/A", "na", and
# "missing" with `NA` for the variables, year, and score
emp_clean <- replace_with_na(data = employees, replace = list(Team = c("", "na", "n.a","NaN"),
Bonus.. = c("", "na", "n.a","NaN")))
# Test if `pacman_clean` still has these values in it?
miss_scan_count(emp_clean, search = list("", "na", "n.a", "NaN"))
## # A tibble: 6 x 2
## Variable n
## <chr> <int>
## 1 First.Name 998
## 2 Gender 999
## 3 Salary 999
## 4 Bonus.. 997
## 5 Senior.Management 1000
## 6 Team 954
# Use `replace_with_na_at()` to replace with NA
replace_with_na_at(employees,
.vars = c('First.Name', 'Gender', 'Team'),
~.x %in% c("", " ", "na", 'NaN', '?'))
## First.Name Gender Salary Bonus.. Senior.Management Team
## 1 Douglas Male 97308 6.945 TRUE Marketing
## 2 Thomas Male 61933 NaN TRUE <NA>
## 3 Maria Female 130590 11.858 FALSE Finance
## 4 Jerry Male <NA> 9.34 TRUE Finance
## 5 Larry Male 101004 1.389 TRUE Client Services
## 6 Dennis n.a. 115163 10.125 FALSE Legal
## 7 Ruby Female 65476 10.012 TRUE Product
## 8 <NA> Female 45906 11.598 Finance
## 9 Angela <NA> 18.523 TRUE Engineering
## 10 Frances Female 139852 7.524 TRUE Business Development
## 11 Louise Female 63241 15.132 TRUE <NA>
## 12 Julie Female 102508 12.637 TRUE Legal
## 13 Brandon Male 112807 17.492 TRUE Human Resources
## 14 Gary Male 109831 5.831 FALSE Sales
## 15 Kimberly Female 41426 NaN TRUE Finance
## 16 Lillian <NA> 59414 1.256 FALSE Product
## 17 Jeremy Male 90370 7.369 FALSE Human Resources
## 18 Shawn Male 111737 6.414 FALSE <NA>
## 19 Diana Female 132940 19.082 FALSE Client Services
## 20 Donna Female 81014 1.894 FALSE Product
## 21 Lois <NA> 64714 4.934 TRUE Legal
## 22 Matthew Male 100612 13.645 FALSE Marketing
## 23 Joshua <NA> 90816 18.816 TRUE Client Services
## 24 <NA> Male 125792 5.042 <NA>
## 25 John Male 97950 13.873 FALSE Client Services
## 26 <NA> Male 37076 18.576 Client Services
## 27 Craig Male 37598 7.757 TRUE Marketing
## 28 Scott <NA> 122367 5.218 FALSE Legal
## 29 Terry Male 124008 13.464 TRUE Client Services
## 30 Benjamin Male 79529 7.008 TRUE Legal
## 31 Christina Female 118780 9.096 TRUE Engineering
## 32 Joyce <NA> 88657 12.752 FALSE Product
## 33 <NA> Male 122340 6.417 <NA>
## 34 Jean Female 119082 16.18 FALSE Business Development
## 35 Jerry Male 95734 19.096 FALSE Client Services
## 36 Theresa Female 85182 16.675 FALSE Sales
## 37 Rachel Female 142032 12.599 FALSE Business Development
## 38 Linda Female 57427 9.557 TRUE Client Services
## 39 Stephanie Female 36844 5.574 TRUE Business Development
## 40 <NA> Male 122173 7.797 Client Services
## 41 Michael Male 99283 2.665 TRUE Distribution
## 42 Christine <NA> 66582 11.308 TRUE Business Development
## 43 Beverly Female 121918 15.835 FALSE Legal
## 44 Marilyn Female 73524 5.207 TRUE Marketing
## 45 Cynthia Female 145146 7.482 TRUE Product
## 46 Roger Male 88010 13.886 TRUE Sales
## 47 Bruce Male 114796 6.796 FALSE Finance
## 48 Kathy Female 66820 NaN TRUE Client Services
## 49 Clarence Male 93581 6.083 TRUE Business Development
## 50 Chris <NA> 113590 3.055 FALSE Sales
## 51 Nancy Female 94976 13.83 TRUE Engineering
## 52 <NA> <NA> 41126 14.009 Sales
## 53 Todd Male 49339 1.695 TRUE <NA>
## 54 Alan <NA> 40341 17.578 TRUE Finance
## 55 Sara Female 83677 8.999 FALSE Engineering
## 56 Karen Female 102488 17.653 TRUE Product
## 57 Carl Male 130276 16.084 TRUE Finance
## 58 <NA> Male 64715 15.107 TRUE Human Resources
## 59 Theresa Female 72670 1.481 TRUE Engineering
## 60 Irene Female 66851 11.279 FALSE Engineering
## 61 Paula <NA> 48866 4.271 FALSE Distribution
## 62 Denise Female 106862 3.699 FALSE Business Development
## 63 <NA> Female 58112 19.414 Marketing
## 64 Matthew Male 35203 18.04 FALSE Human Resources
## 65 Kathleen <NA> 77834 18.771 FALSE Business Development
## 66 Steve Male 61310 12.428 TRUE Distribution
## 67 Nancy Female 125250 2.672 TRUE Business Development
## 68 Rachel Female 51178 9.735 TRUE Finance
## 69 Jose Male 84834 14.33 TRUE Finance
## 70 Irene <NA> 100863 4.382 TRUE Finance
## 71 Todd <NA> 84692 6.617 FALSE Client Services
## 72 Johnny Male 118172 16.194 TRUE Sales
## 73 Bobby Male 54043 3.833 FALSE Product
## 74 Frances Female 90582 4.709 TRUE Sales
## 75 Thomas Male 62096 17.029 FALSE Marketing
## 76 Bonnie Female 104897 ? TRUE Human Resources
## 77 Margaret Female 131604 7.353 TRUE Distribution
## 78 Charles Male 107391 1.26 TRUE Marketing
## 79 Robin Female 114797 5.965 TRUE Sales
## 80 <NA> Female 115814 4.99 FALSE Product
## 81 Gerald <NA> 137126 15.602 TRUE Sales
## 82 Christopher Male 47369 14.822 FALSE Legal
## 83 Steven Male 35095 8.379 TRUE Client Services
## 84 Shawn Male 148115 6.539 TRUE Finance
## 85 Doris Female 83072 7.511 FALSE Finance
## 86 Jeremy Male 100238 3.887 TRUE Client Services
## 87 Annie <NA> 103495 17.29 TRUE Business Development
## 88 Annie Female 144887 8.276 TRUE Sales
## 89 Donna Female 64088 6.155 TRUE Legal
## 90 Janice Female 51082 11.955 FALSE Legal
## 91 Janice <NA> 91719 11.583 TRUE Legal
## 92 James <NA> 128771 8.309 FALSE <NA>
## 93 Linda Female 119009 12.506 TRUE Business Development
## 94 Virginia <NA> 111858 1.601 TRUE Legal
## 95 Harry Male 130620 7.03 FALSE Legal
## 96 Heather Female 43026 14.166 FALSE Client Services
## 97 Cynthia Female 142321 1.737 NaN Finance
## 98 Laura <NA> 140371 10.62 TRUE Marketing
## 99 Tina Female 100705 16.961 TRUE Marketing
## 100 Harold Male 77544 12.447 FALSE Business Development
## 101 Melissa Female 48109 14.995 FALSE Finance
## 102 Aaron Male 61602 11.849 TRUE Marketing
## 103 Jack Male 103902 12.159 FALSE Client Services
## 104 Phyllis Female 136984 8.932 TRUE Finance
## 105 John Male 80740 19.305 FALSE Marketing
## 106 Kathy Female 91712 8.567 FALSE Finance
## 107 Paul Male 42146 3.046 FALSE Legal
## 108 Steven Male 68680 16.565 FALSE Legal
## 109 Russell <NA> 133980 12.396 TRUE Legal
## 110 Christopher Male 37919 11.449 FALSE <NA>
## 111 Shirley Female 147113 16.135 FALSE Legal
## 112 Bonnie Female 42153 8.454 TRUE Business Development
## 113 Willie Male 64363 4.023 FALSE Marketing
## 114 Tina Female 114767 3.711 TRUE Engineering
## 115 Ashley Female 58698 6.811 TRUE Business Development
## 116 Pamela Female 54585 4.166 FALSE Product
## 117 <NA> Male 76189 18.988 Legal
## 118 Steven Male 109095 9.494 FALSE Finance
## 119 Andrea Female 120204 9.557 FALSE Business Development
## 120 Paul Male 41054 12.299 FALSE Marketing
## 121 Peter Male 84885 15.402 FALSE Business Development
## 122 Kathleen <NA> 119735 18.74 FALSE Product
## 123 Christina Female 110169 13.892 TRUE Engineering
## 124 Helen Female 73789 14.841 TRUE Product
## 125 Marilyn Female 76078 2.401 TRUE Product
## 126 Patricia Female 49368 14.226 FALSE Human Resources
## 127 Michelle Female 57325 17.179 TRUE Human Resources
## 128 William Male 66521 5.83 FALSE Human Resources
## 129 Kenneth Male 127654 16.439 TRUE Client Services
## 130 Antonio Male 60866 13.101 TRUE Business Development
## 131 Shirley Female 41334 6.219 TRUE Human Resources
## 132 Rebecca Female 94231 17.517 FALSE Product
## 133 Carlos Male 146670 10.763 FALSE Human Resources
## 134 Lois Female 106317 2.235 TRUE Client Services
## 135 Carolyn Female 109260 2.65 TRUE Business Development
## 136 Gloria Female 134148 8.833 TRUE Legal
## 137 Henry Male 43542 19.687 FALSE Legal
## 138 Adam Male 95327 15.12 FALSE Distribution
## 139 Ashley Female 112238 6.03 TRUE Engineering
## 140 <NA> Female 132373 10.527 <NA>
## 141 Shirley Female 113850 1.854 FALSE Marketing
## 142 Adam Male 110194 14.727 TRUE Product
## 143 Elizabeth Female 146129 5.687 FALSE Finance
## 144 Teresa <NA> 140013 8.689 TRUE Engineering
## 145 Nicole <NA> 122717 12.452 FALSE Sales
## 146 Jennifer Female 71715 13.079 TRUE Client Services
## 147 Patricia Female 95322 4.15 n.a. Product
## 148 Philip Male 89227 3.996 FALSE Legal
## 149 Patrick <NA> 124488 14.837 TRUE Sales
## 150 <NA> Female 86230 8.578 Distribution
## 151 Sean Male 135490 19.934 FALSE Marketing
## 152 Brandon <NA> 121333 15.295 FALSE Business Development
## 153 Ruth Female 129297 8.067 TRUE Client Services
## 154 Victor <NA> 84546 10.489 TRUE Finance
## 155 Rebecca Female 85730 5.359 TRUE Product
## 156 Marie Female 104058 17.695 TRUE Business Development
## 157 Howard Male 105062 1.563 FALSE Human Resources
## 158 <NA> Female 79536 14.443 Product
## 159 Norma Female 114412 8.756 TRUE Marketing
## 160 James <NA> 68501 14.316 FALSE Marketing
## 161 Kathy Female 149563 16.991 TRUE Finance
## 162 Marilyn <NA> 103386 11.451 FALSE Distribution
## 163 Arthur Male 89786 14.422 TRUE Legal
## 164 Terry Male 52226 19.135 FALSE Client Services
## 165 Mary Female 134645 18.197 FALSE Business Development
## 166 <NA> Female 59148 9.061 Legal
## 167 <NA> Female 42341 7.014 Sales
## 168 Christopher Male 142178 17.984 TRUE Sales
## 169 Peter <NA> 38989 7.017 TRUE Marketing
## 170 Michelle Female 53754 5.455 TRUE Product
## 171 Irene Female 133772 19.02 TRUE Sales
## 172 Patrick Male 143499 17.495 TRUE Engineering
## 173 Sara Female 97058 9.402 FALSE Finance
## 174 Lillian Female 85446 3.959 TRUE Engineering
## 175 <NA> <NA> 40297 6.185 Client Services
## 176 Willie Male 146651 1.451 TRUE Engineering
## 177 Victor Male 124486 10.166 FALSE Product
## 178 Wayne Male 102652 14.085 TRUE Distribution
## 179 Jane Female 144474 17.648 FALSE Product
## 180 Jessica Female 68759 19.343 TRUE Finance
## 181 Mark Male 57286 5.025 TRUE Business Development
## 182 Randy Male 58129 1.952 TRUE Distribution
## 183 Lillian Female 123940 12.184 TRUE Business Development
## 184 Ruth <NA> 98233 2.518 TRUE Distribution
## 185 Jerry Male 140810 9.177 TRUE Client Services
## 186 Sandra Female 42090 8.842 TRUE Legal
## 187 <NA> Female 149654 1.825 Sales
## 188 Roger Male 51430 6.46 FALSE Marketing
## 189 Charles Male 71749 15.931 FALSE Legal
## 190 Clarence Male 85700 1.215 FALSE Sales
## 191 Carol Female 57783 9.129 FALSE Finance
## 192 Lois Female 36946 6.652 FALSE Engineering
## 193 Barbara <NA> 99326 16.475 TRUE Legal
## 194 Rebecca Female 109259 4.443 TRUE Legal
## 195 Irene Female 131038 8.996 FALSE Distribution
## 196 Ronald Male 121068 12.757 TRUE Product
## 197 Steven Male 62719 19.127 FALSE Client Services
## 198 Carolyn Female 69268 3.031 FALSE Client Services
## 199 Maria Female 36067 9.64 TRUE Product
## 200 Jonathan Male 130581 16.736 TRUE <NA>
## 201 Gary Male 89661 8.525 FALSE Finance
## 202 Kimberly Female 36643 7.953 FALSE Marketing
## 203 Roger Male 140558 5.084 TRUE Sales
## 204 Diana Female 103521 2.784 TRUE Business Development
## 205 Willie Male 55281 4.935 TRUE Marketing
## 206 Brandon Male 115711 8.012 TRUE Finance
## 207 Patricia Female 114079 8.399 TRUE Client Services
## 208 Katherine Female 57531 1.767 FALSE Client Services
## 209 Jonathan Male 141069 4.903 FALSE Human Resources
## 210 Emily Female 89434 11.295 FALSE Engineering
## 211 Ruth Female 44639 9.148 TRUE Legal
## 212 Linda Female 110967 19.612 TRUE Human Resources
## 213 Lisa Female 115387 1.821 FALSE Client Services
## 214 Evelyn Female 81673 15.364 TRUE Engineering
## 215 Julie Female 109588 3.55 FALSE Engineering
## 216 Mary Female 92544 3.8 FALSE Client Services
## 217 Matthew Male 142373 2.462 FALSE Marketing
## 218 Douglas Male 83341 1.015 TRUE Client Services
## 219 Gregory <NA> 98865 10.628 TRUE Finance
## 220 Billy Male 120444 7.768 TRUE Finance
## 221 <NA> Female 71945 5.56 Marketing
## 222 Gregory Male 109564 3.845 FALSE Legal
## 223 Jason Male 78417 3.067 FALSE Finance
## 224 Daniel Male 106947 15.866 TRUE Legal
## 225 Sarah Female 87298 2.311 FALSE Distribution
## 226 Harry Male 64579 15.266 TRUE Sales
## 227 Kathy Female 50905 9.404 TRUE Finance
## 228 Todd Male 59728 11.226 TRUE Client Services
## 229 Bobby Male 51685 17.445 TRUE Engineering
## 230 Jeremy Male 49542 1.679 TRUE Distribution
## 231 George <NA> 38375 8.999 FALSE Legal
## 232 <NA> Female 64084 17.018 FALSE Product
## 233 Marilyn Female 147663 10.263 FALSE Human Resources
## 234 Kathryn Female 73935 14.752 FALSE Sales
## 235 Irene Female 40837 12.182 TRUE Client Services
## 236 Norma Female 94393 3.643 TRUE Engineering
## 237 Laura Female 42087 2.624 FALSE Product
## 238 Cheryl Female 52080 9.375 FALSE Legal
## 239 Kevin Male 35061 5.128 FALSE Legal
## 240 Lillian <NA> 64164 17.612 FALSE Human Resources
## 241 Phyllis <NA> 94088 19.107 FALSE Distribution
## 242 Walter Male 127813 5.961 FALSE Finance
## 243 Robert Male 38041 18.428 TRUE Engineering
## 244 Jimmy Male 126310 5.413 TRUE Product
## 245 Clarence Male 142561 8.866 FALSE Client Services
## 246 Victor Male 70817 17.138 FALSE Engineering
## 247 Fred <NA> 59937 12.045 TRUE Human Resources
## 248 Brenda <NA> 106115 3.742 TRUE Product
## 249 Justin Male 82782 4.366 ? Distribution
## 250 Betty Female 104896 19.55 TRUE Business Development
## 251 Randy <NA> 133943 8.94 TRUE Sales
## 252 Sharon <NA> 83658 6.513 FALSE Business Development
## 253 Ralph Male 71896 4.232 TRUE Marketing
## 254 <NA> <NA> 113732 10.391 Sales
## 255 Edward Male 110485 7.995 FALSE Client Services
## 256 Denise Female 115118 5.108 FALSE Human Resources
## 257 Debra Female 48696 4.75 FALSE Legal
## 258 <NA> Male 42676 15.517 Sales
## 259 Michael Male 43586 12.659 FALSE <NA>
## 260 Henry Male 89258 15.585 TRUE Human Resources
## 261 Gloria Female 90730 2.491 FALSE Distribution
## 262 Marie Female 100308 13.677 FALSE Product
## 263 Anne Female 69134 3.723 TRUE Engineering
## 264 Sarah <NA> 58295 14.603 TRUE Engineering
## 265 Stephanie Female 50141 13.218 TRUE Human Resources
## 266 Roy Male 101941 3.45 FALSE Client Services
## 267 <NA> Female 115145 14.063 Distribution
## 268 <NA> Male 40451 16.044 Distribution
## 269 Irene Female 56526 5.81 TRUE Finance
## 270 <NA> Female 145316 18.517 Human Resources
## 271 Thomas Male 103235 9.554 TRUE Human Resources
## 272 Frank <NA> 58563 12.062 TRUE Marketing
## 273 Fred Male 74129 18.225 FALSE Product
## 274 Nicholas Male 74669 1.113 TRUE Product
## 275 Howard Male 97490 5.46 TRUE Marketing
## 276 Debra Female 104250 18.456 TRUE Business Development
## 277 Christine Female 94345 11.996 FALSE Finance
## 278 Brenda <NA> 82439 19.062 FALSE Sales
## 279 Betty Female 51613 12.984 FALSE Distribution
## 280 Ruby Female 105946 1.139 FALSE Business Development
## 281 Kevin Male 46080 9.635 FALSE Business Development
## 282 Arthur Male 134610 6.147 TRUE n.a.
## 283 Carl <NA> 125104 12.345 FALSE Client Services
## 284 Todd Male 107281 1.612 TRUE Engineering
## 285 Brenda Female 141521 4.337 FALSE Client Services
## 286 Judy Female 65931 2.304 FALSE Human Resources
## 287 Todd Male 69989 10.985 TRUE Finance
## 288 Lois Female 147183 9.999 TRUE Client Services
## 289 Walter <NA> 66757 18.099 FALSE Distribution
## 290 Jessica Female 75145 6.388 TRUE Legal
## 291 Jeremy Male 129460 13.657 TRUE <NA>
## 292 Tammy Female 132839 17.463 TRUE Client Services
## 293 Anne Female 44537 18.284 TRUE Client Services
## 294 Jesse Male 118733 9.653 FALSE Marketing
## 295 Virginia Female 46905 19.154 FALSE Distribution
## 296 Jesse Male 79582 3.873 FALSE Legal
## 297 Jeremy <NA> 55394 3.18 TRUE Sales
## 298 Daniel Male 123811 7.664 TRUE Human Resources
## 299 Patrick <NA> 137314 4.542 TRUE Marketing
## 300 Emily Female 36711 19.028 TRUE Human Resources
## 301 Alan Male 111786 3.592 TRUE Engineering
## 302 James Male 72257 13.023 FALSE Engineering
## 303 Adam Male 71276 5.027 TRUE Human Resources
## 304 Joan <NA> 38712 3.657 FALSE Client Services
## 305 Jeremy Male 46930 18.702 TRUE Finance
## 306 Margaret Female 125220 3.733 FALSE Marketing
## 307 Mark Male 121477 17.44 TRUE Business Development
## 308 Marilyn Female 86386 2.937 FALSE Distribution
## 309 Cheryl Female 81308 2.196 TRUE Legal
## 310 Benjamin Male 84810 15.794 FALSE Marketing
## 311 Harold Male 66775 2.158 TRUE Legal
## 312 Ernest <NA> 72145 13.448 TRUE Finance
## 313 Gerald Male 121604 1.923 TRUE Sales
## 314 Judy Female 109510 13.457 TRUE Legal
## 315 Bobby Male 112117 6.338 FALSE <NA>
## 316 Bobby <NA> 108127 15.858 FALSE Client Services
## 317 Marie Female 123711 10.966 FALSE Product
## 318 Kathy Female 132381 8.34 FALSE Business Development
## 319 Roy Male 148225 1.841 FALSE Finance
## 320 Jacqueline Female n.a. 18.243 FALSE Marketing
## 321 <NA> Female 62960 14.356 Sales
## 322 Julie Female 56926 7.507 FALSE Sales
## 323 Douglas Male 41428 14.372 FALSE Product
## 324 Linda Female 115658 3.041 TRUE Sales
## 325 Ruby Female 76707 6.031 FALSE Business Development
## 326 Russell Male 60388 17.885 FALSE Client Services
## 327 Jeffrey Male 45150 12.075 TRUE Product
## 328 Aaron Male 58755 5.097 TRUE Marketing
## 329 Samuel Male 76076 5.319 TRUE Finance
## 330 <NA> Male 87760 14.987 Engineering
## 331 Randy Male 135119 7.887 FALSE Client Services
## 332 Evelyn Female 36759 17.269 TRUE Marketing
## 333 Fred Male 121723 18.645 TRUE Product
## 334 Shirley Female 110061 7.494 FALSE Client Services
## 335 Todd Male 85074 1.844 FALSE Client Services
## 336 Robert Male 85799 19.93 FALSE Finance
## 337 Mark Male 75150 12.182 TRUE Legal
## 338 Earl Male 91344 4.035 TRUE Sales
## 339 Richard Male 86326 10.717 FALSE Product
## 340 Michael Male 98753 16.443 TRUE Human Resources
## 341 Steven Male 113060 2.846 TRUE Sales
## 342 Carlos Male 77327 11.58 TRUE Finance
## 343 Marie Female 62666 10.247 FALSE Product
## 344 Ronald Male 96633 4.99 TRUE Engineering
## 345 Scott Male 58248 3.914 FALSE Business Development
## 346 Steven Male 83706 6.96 TRUE Human Resources
## 347 James Male 74086 5.778 TRUE Human Resources
## 348 Lori Female 95389 14.223 FALSE Sales
## 349 Philip Male 129968 19.897 FALSE Finance
## 350 Phyllis Female ? 8.723 FALSE Sales
## 351 Thomas <NA> 41549 3.95 FALSE Sales
## 352 Cynthia Female 74287 10.751 FALSE Sales
## 353 <NA> Male 69906 4.844 Engineering
## 354 <NA> Male 65078 3.095 Marketing
## 355 Elizabeth Female 106406 1.782 TRUE Legal
## 356 Andrea Female 115913 12.121 FALSE Legal
## 357 Judy Female 38092 5.668 FALSE Distribution
## 358 Russell Male 121160 7.843 FALSE Engineering
## 359 Scott Male 90429 4.45 FALSE Product
## 360 Sarah Female 64207 7.824 TRUE Client Services
## 361 Susan Female 80688 18.892 TRUE Sales
## 362 Margaret <NA> 55044 4.078 FALSE Sales
## 363 Joshua Male 72893 9.555 FALSE Distribution
## 364 Justin <NA> 96978 13.865 FALSE Finance
## 365 Juan Male 97364 3.595 FALSE Product
## 366 Gloria <NA> 140885 1.113 FALSE Human Resources
## 367 Jennifer <NA> 58520 16.231 TRUE Client Services
## 368 Edward Male 66067 10.957 TRUE <NA>
## 369 Marilyn Female 147183 8.748 FALSE Business Development
## 370 Mary Female 87721 12.484 FALSE Product
## 371 Linda Female 144001 2.194 FALSE Business Development
## 372 Larry Male 91133 5.14 FALSE Sales
## 373 Albert Male 67827 19.717 TRUE Engineering
## 374 Kenneth Male 81839 12.072 FALSE Sales
## 375 <NA> Female 81444 3.171 Distribution
## 376 Gregory <NA> 137661 4.805 TRUE Sales
## 377 Brandon Male 112548 4.664 FALSE Engineering
## 378 Stephen Male 111249 10.574 TRUE Human Resources
## 379 Russell Male 114334 9.669 FALSE Product
## 380 <NA> Female 118906 4.537 Marketing
## 381 Pamela <NA> 72979 5.66 FALSE Sales
## 382 Scott Male 64172 10.894 TRUE Engineering
## 383 <NA> Female 107024 12.182 <NA>
## 384 Carlos Male 50167 2.362 FALSE Product
## 385 Jack Male 106995 15.723 FALSE Product
## 386 Debra Female 70492 8.895 FALSE Client Services
## 387 Phyllis Female 125881 16.697 FALSE Sales
## 388 Robert Male 123294 19.894 FALSE <NA>
## 389 Gloria Female 131045 11.312 TRUE Product
## 390 Sharon <NA> 97635 10.413 TRUE Client Services
## 391 Barbara <NA> 94493 13.443 TRUE Sales
## 392 Marilyn Female 140502 9.989 TRUE Sales
## 393 Ralph Male 106310 4.03 TRUE Client Services
## 394 Harry <NA> 46240 6.976 TRUE Engineering
## 395 Robin Female 111163 5.025 TRUE Product
## 396 Kathy Female 93753 7.094 TRUE Sales
## 397 Rose Female 63494 19.385 TRUE Human Resources
## 398 Clarence Male 116693 13.835 TRUE Distribution
## 399 Doris Female 85215 7.425 TRUE Human Resources
## 400 Kathryn Female 86439 7.799 FALSE Finance
## 401 Billy <NA> 62913 18.241 TRUE Marketing
## 402 Norma Female 38872 9.302 TRUE Business Development
## 403 Richard <NA> 124655 14.272 TRUE Engineering
## 404 Craig Male 113506 19.642 FALSE Marketing
## 405 Sarah <NA> 109980 8.86 FALSE Sales
## 406 Sharon Female 91522 7.564 FALSE Finance
## 407 Mary Female 42214 17.538 TRUE Business Development
## 408 Steve <NA> 83159 9.55 TRUE Distribution
## 409 Joe Male 144082 1.62 TRUE Client Services
## 410 Jeremy Male 47885 19.276 TRUE Human Resources
## 411 Ernest Male 126232 6.191 TRUE Sales
## 412 Kevin Male 134598 11.699 FALSE Business Development
## 413 Jacqueline Female 66604 14.609 FALSE Legal
## 414 Johnny Male 115194 5.861 TRUE Marketing
## 415 Gary Male 49101 11.9 TRUE Business Development
## 416 Shirley <NA> 67811 12.699 FALSE Finance
## 417 <NA> Male 111043 5.966 Distribution
## 418 Sarah <NA> 37748 9.047 FALSE Human Resources
## 419 Julia Female 36403 2.664 TRUE Finance
## 420 Dorothy Female 140136 3.12 TRUE Business Development
## 421 Justin Male 121508 19.334 TRUE Finance
## 422 Kevin Male 79906 18.021 TRUE Sales
## 423 Victor Male 123144 16.261 TRUE Product
## 424 Deborah Female 46953 17.436 FALSE Client Services
## 425 Matthew <NA> 79443 14.637 FALSE Human Resources
## 426 Alice Female 51395 2.378 TRUE Finance
## 427 Todd Male 134408 3.56 TRUE Human Resources
## 428 Frank Male ? 8.037 TRUE Sales
## 429 Christopher Male 68028 7.869 TRUE Finance
## 430 Rose Female 149903 5.63 FALSE Human Resources
## 431 Andrea Female 79123 19.422 FALSE Distribution
## 432 Charles Male 104014 7.077 FALSE Legal
## 433 Jessica <NA> 121160 12.993 FALSE Client Services
## 434 Wanda Female 65362 7.132 TRUE Legal
## 435 Joyce Female 50701 14.227 TRUE <NA>
## 436 Billy Male 144709 10.069 TRUE Distribution
## 437 Kathryn Female 53061 11.864 TRUE Finance
## 438 Denise Female 36697 11.196 TRUE Sales
## 439 Jason Male 69244 6.22 TRUE <NA>
## 440 Nicholas Male 101036 2.826 TRUE Human Resources
## 441 Aaron Male 52119 11.343 TRUE Client Services
## 442 Louise Female 46666 1.886 TRUE Business Development
## 443 Julie Female 73437 2.518 TRUE Finance
## 444 Kathy Female 86318 18.492 TRUE Sales
## 445 <NA> Male 76409 7.008 Distribution
## 446 Chris Male 71642 1.496 FALSE <NA>
## 447 Cheryl Female 67150 15.85 TRUE Marketing
## 448 Gregory Male 142208 11.204 TRUE Engineering
## 449 Wayne Male 81183 17.066 FALSE Client Services
## 450 Beverly Female 107163 3.665 TRUE Human Resources
## 451 Willie Male 55038 19.691 n.a. Legal
## 452 Terry <NA> 140002 19.49 TRUE Marketing
## 453 Scott Male 146812 1.965 TRUE Marketing
## 454 Annie Female 40119 4.338 TRUE Human Resources
## 455 <NA> Male 136602 10.429 Marketing
## 456 Ruth Female 69579 18.029 TRUE Human Resources
## 457 Deborah <NA> 101457 6.662 FALSE Engineering
## 458 Patricia Female 121232 16.624 FALSE Legal
## 459 Albert Male 102626 15.843 FALSE Finance
## 460 Charles <NA> 137171 5.574 TRUE Marketing
## 461 Tina Female 88276 14.248 FALSE Legal
## 462 Jeremy Male 43354 13.946 FALSE Marketing
## 463 Craig Male 125556 7.996 FALSE Engineering
## 464 Jose Male 59862 3.269 FALSE Product
## 465 Lawrence Male 74640 18.641 FALSE Marketing
## 466 Helen Female 52875 4.188 FALSE Legal
## 467 Walter Male 58789 5.461 FALSE Sales
## 468 Amy Female 122897 8.222 TRUE Engineering
## 469 Janice Female 136032 10.696 TRUE Marketing
## 470 Stephen Male 141958 9.078 TRUE Legal
## 471 Ryan Male 139917 11.466 FALSE Distribution
## 472 Sharon Female 147635 14.693 FALSE Human Resources
## 473 Joshua Male 68230 8.047 FALSE Client Services
## 474 Clarence Male 124365 8.457 FALSE Business Development
## 475 Jonathan Male 104749 11.364 FALSE Engineering
## 476 Stephanie Female 122121 7.937 TRUE Engineering
## 477 Kathy Female 143541 8.461 FALSE Human Resources
## 478 Albert Male 137840 9.705 FALSE Business Development
## 479 Michael Male 73354 18.357 FALSE Business Development
## 480 Richard Male 47647 18.787 TRUE <NA>
## 481 Amy Female 106249 15.967 FALSE Sales
## 482 <NA> Female 93847 1.085 Business Development
## 483 Beverly <NA> 104815 3.38 FALSE Product
## 484 <NA> Female 115436 3.099 Human Resources
## 485 Joe Male 50645 11.119 FALSE Marketing
## 486 Ashley Female 142415 1.985 TRUE Finance
## 487 Howard Male 37984 2.021 FALSE Distribution
## 488 Cheryl Female 71751 15.918 FALSE Business Development
## 489 Robert Male 135882 19.944 FALSE Legal
## 490 Sharon Female 46007 19.731 TRUE Client Services
## 491 Judith Female 117055 7.461 FALSE Marketing
## 492 Nicholas <NA> 58478 6.525 TRUE <NA>
## 493 Jerry Male 121357 18.845 FALSE Business Development
## 494 Craig Male 44857 13.266 FALSE Finance
## 495 Barbara Female 47322 7.25 TRUE Sales
## 496 Eugene Male 81077 2.117 FALSE Sales
## 497 Johnny Male 76394 5.437 TRUE Marketing
## 498 Benjamin Male 114356 7.123 FALSE Client Services
## 499 Diana Female 41831 4.548 FALSE Business Development
## 500 Barbara <NA> 90187 14.764 TRUE Distribution
## 501 Christine Female 72613 11.126 FALSE Legal
## 502 Sean Male 42748 9.765 FALSE Distribution
## 503 Sarah Female 37259 1.763 FALSE Client Services
## 504 Roger Male 125033 4.887 TRUE Marketing
## 505 <NA> Female 38275 10.494 Distribution
## 506 <NA> Male 71520 13.248 Engineering
## 507 Donald Male 106472 6.499 TRUE Client Services
## 508 Clarence Male 103684 18.654 TRUE Engineering
## 509 Scott Male 96111 15.592 FALSE Human Resources
## 510 Ann Female 90719 6.22 FALSE Business Development
## 511 Frank Male 140303 17.07 FALSE Legal
## 512 Joe Male 62161 8.13 TRUE Business Development
## 513 Wanda Female 78883 19.695 FALSE <NA>
## 514 Jimmy Male 63549 19.624 FALSE <NA>
## 515 Amy <NA> 63888 18.115 TRUE Product
## 516 Larry Male 97370 10.458 FALSE Sales
## 517 Gloria Female 66224 15.979 TRUE Legal
## 518 Ruth Female 97915 3.997 TRUE Human Resources
## 519 Diana Female 86883 18.003 TRUE Product
## 520 Raymond Male 37812 3.178 FALSE Human Resources
## 521 Peter Male 56580 8.411 TRUE <NA>
## 522 <NA> <NA> 83895 17.971 Engineering
## 523 Catherine Female 58047 14.858 TRUE Distribution
## 524 Diane Female 124889 15.026 TRUE Sales
## 525 Peter <NA> 118840 14.509 TRUE Client Services
## 526 Steve Male 67780 9.54 TRUE Human Resources
## 527 Barbara Female 144677 8.696 FALSE Finance
## 528 Helen <NA> 45724 1.022 FALSE Product
## 529 Jimmy <NA> 86676 7.175 TRUE Product
## 530 Christopher Male 82401 7.252 FALSE Client Services
## 531 Kathleen Female 35575 14.595 FALSE Distribution
## 532 Virginia Female 123649 10.154 TRUE Marketing
## 533 Lisa Female 38078 10.28 TRUE Product
## 534 Earl Male 52620 13.773 FALSE Product
## 535 Gerald <NA> 133366 12.292 FALSE Legal
## 536 Louise Female 91462 8.205 FALSE Marketing
## 537 Clarence Male 146589 4.905 TRUE Business Development
## 538 Cynthia Female 51633 13.472 TRUE Business Development
## 539 Adam Male 45181 3.491 FALSE Human Resources
## 540 Justin Male 62454 3.459 TRUE Sales
## 541 <NA> Female 84746 4.82 Finance
## 542 Ruby Female 147362 7.851 TRUE Engineering
## 543 Amanda Female 80803 14.077 TRUE Distribution
## 544 Anna Female 117293 2.366 FALSE Client Services
## 545 Roger Male 105689 13.32 TRUE Marketing
## 546 Julie <NA> 93302 9.048 TRUE Product
## 547 Joseph Male 102555 3.672 TRUE Product
## 548 Evelyn Female 51525 10.366 FALSE Finance
## 549 Janice Female 41190 3.311 TRUE Sales
## 550 Sara Female 75484 3.186 FALSE Finance
## 551 Gregory Male 82726 16.304 TRUE Marketing
## 552 Philip Male 122319 19.122 FALSE Engineering
## 553 Barbara Female 127297 11.905 TRUE Product
## 554 Amanda Female 109290 13.853 FALSE Engineering
## 555 Phyllis <NA> 99150 6.007 FALSE Client Services
## 556 Anne Female 71930 18.451 TRUE Product
## 557 Arthur Male 66819 6.639 TRUE Marketing
## 558 Jane Female 42424 18.115 FALSE Distribution
## 559 Linda Female 51431 13.295 FALSE Engineering
## 560 Ashley Female 120675 6.51 FALSE Human Resources
## 561 Shawn Male 96610 2.097 TRUE Client Services
## 562 Kathleen Female 71430 8.572 FALSE Engineering
## 563 Sara <NA> 87713 18.863 TRUE Legal
## 564 Randy Male 86723 14.842 FALSE Engineering
## 565 Andrew Male 43414 7.563 TRUE Client Services
## 566 Marilyn Female 87145 17.33 FALSE Human Resources
## 567 Johnny Male 91124 12.986 TRUE Distribution
## 568 <NA> Female 48141 12.605 <NA>
## 569 Susan Female 90829 19.142 FALSE Marketing
## 570 Ruby Female 101262 6.773 FALSE Client Services
## 571 Kelly Female 41427 1.431 FALSE Product
## 572 Jonathan Male 56993 18.623 FALSE Marketing
## 573 Raymond Male 114244 16.69 FALSE Human Resources
## 574 Kimberly Female 81800 5.435 TRUE <NA>
## 575 <NA> Female 118736 7.421 Client Services
## 576 John Male 66077 5.809 TRUE Product
## 577 Michael Male 35013 14.879 FALSE Product
## 578 Jerry Male 98393 11.393 FALSE Client Services
## 579 Amanda Female 107111 1.438 TRUE Product
## 580 Harold Male 65673 1.187 TRUE Legal
## 581 Harry Male 65482 18.089 FALSE <NA>
## 582 Ernest Male 81919 15.118 FALSE Marketing
## 583 Harry Male 59277 10.2 FALSE Finance
## 584 Diane Female 49501 13.506 FALSE Business Development
## 585 Jack Male 70367 18.266 TRUE Legal
## 586 Shirley Female 132156 2.754 FALSE Marketing
## 587 Rose Female 56961 7.585 FALSE Marketing
## 588 Cheryl Female 98841 8.945 TRUE Client Services
## 589 Cynthia Female 78226 2.419 FALSE Human Resources
## 590 Sandra Female 116931 9.657 TRUE Human Resources
## 591 Andrea Female 87575 13.346 TRUE Business Development
## 592 Rachel Female 110924 7.808 FALSE Distribution
## 593 Justin Male 78351 15.221 FALSE Sales
## 594 Marie Female 125574 4.644 FALSE Sales
## 595 Louis Male 95198 2.075 FALSE Business Development
## 596 Nancy Female 121006 3.512 TRUE Finance
## 597 Mark Male 95728 6.752 TRUE Business Development
## 598 Teresa Female 69740 8.294 FALSE Distribution
## 599 Frances Female 91996 11.506 FALSE Product
## 600 <NA> Female 98385 10.925 Human Resources
## 601 Barbara <NA> 90556 15.749 TRUE Distribution
## 602 Christine <NA> 50366 9.862 TRUE Marketing
## 603 Bobby <NA> 84232 15.704 TRUE Engineering
## 604 Carl <NA> 100888 12.49 TRUE Business Development
## 605 Bruce Male 141335 15.427 TRUE Engineering
## 606 Rose Female 97691 2.142 FALSE Client Services
## 607 Mildred Female 47266 10.256 FALSE Client Services
## 608 <NA> Male 139754 12.74 Sales
## 609 <NA> Female 116236 17.274 Engineering
## 610 Todd Male 103405 15.91 FALSE Sales
## 611 Alan Male 41453 10.084 FALSE Product
## 612 Elizabeth <NA> 52730 12.525 FALSE Marketing
## 613 Julie <NA> 60361 7.099 TRUE Distribution
## 614 Teresa <NA> 63103 11.378 FALSE Finance
## 615 Eric Male 65168 11.513 FALSE Distribution
## 616 Edward Male 73105 6.083 TRUE Engineering
## 617 Kimberly Female 37916 12.929 TRUE Distribution
## 618 Kathy Female 45682 1.451 TRUE Marketing
## 619 Peter Male 69297 1.268 FALSE Finance
## 620 Beverly Female 59070 19.064 TRUE Engineering
## 621 Ralph Male 81215 8.4 FALSE Finance
## 622 Justin Male 128036 18.824 FALSE Client Services
## 623 Irene Female 89780 8.999 TRUE Sales
## 624 Irene Female 125018 9.601 TRUE Legal
## 625 Jeremy Male 133033 12.2 FALSE Sales
## 626 Cynthia Female 82408 8.701 TRUE Marketing
## 627 <NA> Female 131755 2.93 <NA>
## 628 Anne Female 128305 16.636 FALSE Marketing
## 629 <NA> <NA> 147309 1.702 Distribution
## 630 Debra Female 74911 19.513 FALSE Marketing
## 631 Michael Male 47079 2.617 FALSE Legal
## 632 Jason <NA> 82873 1.869 FALSE Client Services
## 633 Rebecca Female 134673 6.878 FALSE Engineering
## 634 Andrea Female 123591 6.5 TRUE Engineering
## 635 Carl Male 75598 19.289 FALSE <NA>
## 636 Randy Male 89831 13.047 TRUE <NA>
## 637 Marilyn Female 92430 2.924 FALSE Engineering
## 638 Wayne Male 126956 18.396 FALSE Human Resources
## 639 Frances Female 112467 1.433 FALSE Business Development
## 640 Amanda <NA> 46665 19.391 TRUE Client Services
## 641 Kathleen Female 42553 3.756 TRUE Distribution
## 642 Amanda Female 102081 5.203 FALSE Sales
## 643 Amanda <NA> 135118 15.108 FALSE Finance
## 644 Shawn Male 71975 9.77 FALSE Human Resources
## 645 Katherine Female 149908 18.912 FALSE Finance
## 646 Anna <NA> 45418 10.162 FALSE Marketing
## 647 Elizabeth Female 79145 19.78 FALSE Finance
## 648 Donald Male 122920 5.32 FALSE <NA>
## 649 Gregory Male 128031 15.857 TRUE Business Development
## 650 Ruth Female 59969 14.064 TRUE Finance
## 651 Cynthia Female 35381 11.749 FALSE Finance
## 652 Deborah Female 113129 17.371 FALSE Client Services
## 653 Willie Male 141932 1.017 TRUE Engineering
## 654 Debra Female 42296 16.922 FALSE Product
## 655 Carl Male 63395 11.411 FALSE Legal
## 656 Walter Male 125382 13.613 TRUE Client Services
## 657 Lisa Female 113592 17.108 TRUE Marketing
## 658 Rebecca Female 46750 11.367 TRUE Sales
## 659 Jessica Female 90285 13.591 TRUE Client Services
## 660 Steve <NA> 53692 4.785 TRUE Marketing
## 661 Mary <NA> 100341 6.662 FALSE Distribution
## 662 Craig Male 123876 4.225 FALSE Engineering
## 663 Katherine Female 41643 4.659 TRUE Distribution
## 664 Andrea Female 113760 12.866 TRUE Distribution
## 665 Timothy Male 49473 12.463 FALSE Legal
## 666 Anthony Male 146141 3.645 TRUE Distribution
## 667 Harold <NA> 79459 7.776 TRUE Business Development
## 668 Douglas <NA> 104496 14.771 TRUE Marketing
## 669 James Male 67789 17.105 TRUE Legal
## 670 Joseph <NA> 86564 11.879 TRUE <NA>
## 671 Martha Female 135758 14.782 TRUE Client Services
## 672 Laura Female 84672 3.96 FALSE Distribution
## 673 Robert Male 111580 10.982 FALSE Human Resources
## 674 Ralph Male 50455 16.248 FALSE Business Development
## 675 <NA> Male 88733 1.932 Human Resources
## 676 Diane Female 130577 12.791 FALSE Marketing
## 677 Annie Female 138925 9.801 TRUE Marketing
## 678 Jacqueline Female 62371 15.77 TRUE Client Services
## 679 Brenda Female 87715 2.675 FALSE Product
## 680 Lori Female 66029 3.345 TRUE Client Services
## 681 Albert Male 86818 14.301 TRUE Engineering
## 682 Frank Male 75147 17.398 FALSE Product
## 683 Deborah Female 105573 5.268 TRUE Product
## 684 Robin <NA> 41230 6.611 TRUE Distribution
## 685 Alice Female 117787 10.485 FALSE <NA>
## 686 <NA> Male 88086 19.387 Finance
## 687 Paul <NA> 91462 18.704 FALSE Finance
## 688 <NA> Male 139959 8.992 Distribution
## 689 Brian Male 93901 17.821 TRUE Legal
## 690 Julie <NA> 50529 17.263 FALSE Finance
## 691 Bruce Male 134988 13.926 TRUE Sales
## 692 Joan Female 120941 3.694 TRUE Finance
## 693 Harold Male 118753 8.162 FALSE Sales
## 694 Alice Female 92799 2.782 FALSE Sales
## 695 Barbara Female 85718 13.326 FALSE Client Services
## 696 Sarah Female 109517 5.146 FALSE Human Resources
## 697 Samuel Male 85550 11.593 TRUE Marketing
## 698 <NA> Male 136655 9.801 Distribution
## 699 Barbara <NA> 43312 8.218 TRUE Legal
## 700 Amy <NA> 102839 10.385 TRUE Distribution
## 701 Frank Male 78891 7.927 TRUE Distribution
## 702 Robin <NA> 93201 11.712 TRUE Legal
## 703 Debra Female 84693 6.976 TRUE Finance
## 704 Brandon <NA> 73587 9.769 TRUE Human Resources
## 705 Thomas Male 65251 11.211 FALSE Distribution
## 706 Harry Male 129148 15.193 TRUE Engineering
## 707 Todd Male 128175 18.473 TRUE <NA>
## 708 Patricia Female 75825 7.839 FALSE Engineering
## 709 Steve Male 51821 1.197 TRUE Legal
## 710 <NA> Male 60411 12.048 Human Resources
## 711 Edward Male 58327 17.095 TRUE Finance
## 712 Karen Female 46478 16.552 FALSE Engineering
## 713 Martin <NA> 123963 15.745 TRUE Engineering
## 714 Ann Female 79796 9.851 FALSE Distribution
## 715 Jonathan Male 83809 12.922 FALSE Distribution
## 716 Peter Male 77933 13.132 TRUE Engineering
## 717 Eric Male 51070 13.806 TRUE Business Development
## 718 Jason <NA> 97480 11.518 FALSE Human Resources
## 719 Terry Male 58357 7.969 FALSE Engineering
## 720 John Male 67165 13.001 FALSE Engineering
## 721 Marie Female 145988 18.685 TRUE Human Resources
## 722 Harold Male 147417 11.626 TRUE Business Development
## 723 Joshua Male 95003 5.197 TRUE Marketing
## 724 Brandon <NA> 144187 11.416 TRUE Legal
## 725 Andrea Female 37888 13.47 FALSE Engineering
## 726 Jeremy Male 131513 1.876 TRUE Finance
## 727 Daniel Male 77287 13 TRUE <NA>
## 728 Roy Male 46875 12.942 TRUE Sales
## 729 Dorothy Female 82744 19.111 TRUE Client Services
## 730 Steven Male 43252 18.892 FALSE Client Services
## 731 Nicole Female 66047 18.674 TRUE Marketing
## 732 Ann Female 118431 12.772 TRUE Finance
## 733 Henry Male 59943 1.432 FALSE Finance
## 734 <NA> Male 136681 3.655 Business Development
## 735 <NA> Female 138807 1.4 Human Resources
## 736 Barbara Female 82884 6.837 TRUE Business Development
## 737 Judy Female 48668 11.716 TRUE Product
## 738 Lawrence Male 122971 14.618 FALSE Product
## 739 Joseph Male 107050 12.737 FALSE Business Development
## 740 Carlos Male 138598 14.737 FALSE Sales
## 741 Russell <NA> 149456 3.533 FALSE Marketing
## 742 Jane Female 128540 15.941 FALSE Legal
## 743 Martin Male 61117 2.844 FALSE Client Services
## 744 Steven Male 100949 13.813 TRUE Human Resources
## 745 Judy <NA> 46829 5.19 TRUE Finance
## 746 James Male 69111 14.625 TRUE Business Development
## 747 Gloria Female 46602 1.027 TRUE Business Development
## 748 Ernest Male 61181 16.559 FALSE Business Development
## 749 Martha Female 94963 19.626 TRUE Business Development
## 750 Janet <NA> 85789 9.712 FALSE Legal
## 751 Louis <NA> 145274 16.379 FALSE Product
## 752 Tina Female 102841 3.369 FALSE Client Services
## 753 Henry Male 49665 18.338 FALSE Business Development
## 754 Antonio Male 41928 5.478 TRUE <NA>
## 755 Lisa Female 128042 2.03 TRUE Legal
## 756 Denise Female 86150 3.997 FALSE Client Services
## 757 Stephen Male 121816 10.615 TRUE Distribution
## 758 Benjamin Male 123409 7.783 FALSE Product
## 759 Carl <NA> 98295 7.617 TRUE Legal
## 760 Ruth Female 59678 10.895 FALSE Distribution
## 761 Shawn Male 57871 4.02 TRUE Human Resources
## 762 Jennifer Female 132084 10.006 TRUE Engineering
## 763 Terry Male 35633 3.947 TRUE Distribution
## 764 Jacqueline Female 125298 3.019 TRUE Legal
## 765 Roger Male 115582 15.343 TRUE Sales
## 766 Alice <NA> 148339 11.479 TRUE Finance
## 767 Justin Male 112975 9.699 FALSE Human Resources
## 768 Janet Female 36927 18.769 FALSE Client Services
## 769 Robin Female 70248 9.66 TRUE Sales
## 770 Samuel <NA> 141305 9.849 TRUE Marketing
## 771 Katherine Female 72002 13.178 TRUE Product
## 772 Peter Male 102577 12.026 TRUE Product
## 773 Lillian Female 113554 18.018 TRUE Business Development
## 774 <NA> Male 47176 10.736 Finance
## 775 <NA> Female 106428 10.867 <NA>
## 776 Rose Female 75181 6.06 TRUE Finance
## 777 Bobby Male 79047 18.784 FALSE Human Resources
## 778 Kimberly Female 52970 4.513 FALSE Sales
## 779 Antonio Male 137979 5.266 FALSE Distribution
## 780 Doris Female 114360 17.799 TRUE Business Development
## 781 Steven Male 110306 16.843 TRUE Human Resources
## 782 Lawrence Male 46378 9.127 FALSE <NA>
## 783 <NA> Male 74104 17.68 Client Services
## 784 <NA> <NA> 132505 13.592 Product
## 785 Stephanie Female 96649 3.453 FALSE Sales
## 786 <NA> Female 57811 8.941 Marketing
## 787 Sandra Female 111468 2.58 TRUE Product
## 788 Kevin Male 141498 4.135 TRUE Sales
## 789 Michelle <NA> 124441 16.353 FALSE Business Development
## 790 Lisa Female 40121 6.293 FALSE Product
## 791 Fred Male 129712 11.058 FALSE Finance
## 792 Joseph Male 126010 19.601 FALSE Legal
## 793 Anne <NA> 122762 9.564 FALSE Distribution
## 794 Andrea Female 149105 13.707 TRUE Distribution
## 795 Nicole Female 44021 10.286 FALSE <NA>
## 796 Theresa <NA> 42025 3.319 TRUE Human Resources
## 797 Catherine Female 59970 12.801 FALSE Client Services
## 798 Rose Female 145001 19.85 FALSE Engineering
## 799 Lawrence Male 102589 17.952 TRUE Marketing
## 800 Raymond Male 47529 2.712 TRUE Product
## 801 Clarence Male 148941 11.517 FALSE Product
## 802 Ryan Male 91109 17.499 TRUE Human Resources
## 803 Jane Female 51923 13.623 FALSE Business Development
## 804 Lois Female 53954 19.075 FALSE Product
## 805 Shawn Male 39335 10.664 FALSE Distribution
## 806 Thomas Male 111371 15.081 TRUE Engineering
## 807 Kathryn Female 86676 6.081 FALSE Sales
## 808 Mary Female 115057 2.089 FALSE Finance
## 809 Julie Female 145357 3.459 FALSE Engineering
## 810 George Male 36749 19.754 FALSE Finance
## 811 Ralph Male 89854 7.227 FALSE Business Development
## 812 Judith Female 134048 6.818 TRUE Marketing
## 813 <NA> Male 141311 5.478 Product
## 814 Evelyn Female 123621 19.767 TRUE Marketing
## 815 Rachel Female 54941 3.221 TRUE Distribution
## 816 Maria <NA> 106562 4 FALSE Human Resources
## 817 Kelly Female 39371 4.068 FALSE Engineering
## 818 Ruby Female 83112 4.083 FALSE Marketing
## 819 Ann Female 96941 10.048 TRUE Distribution
## 820 Kenneth Male 47232 17.862 TRUE Legal
## 821 William Male 54058 5.182 TRUE Human Resources
## 822 <NA> Female 80399 12.254 Distribution
## 823 Deborah Female 118043 7.266 TRUE Business Development
## 824 <NA> Male 109411 9.494 Engineering
## 825 Julia Female 97566 2.147 FALSE Marketing
## 826 Robert <NA> 69267 5.89 TRUE Sales
## 827 <NA> <NA> 87103 5.665 <NA>
## 828 Jesse <NA> 98811 7.487 FALSE Legal
## 829 Cynthia Female 149684 7.864 FALSE Product
## 830 Johnny Male 71383 2.097 TRUE Human Resources
## 831 Michael Male 81206 19.908 TRUE Distribution
## 832 Kenneth Male 69112 7.588 TRUE Finance
## 833 Keith Male 120672 19.467 FALSE Legal
## 834 Gerald Male 96511 9.331 FALSE Human Resources
## 835 Carl <NA> 49325 2.071 TRUE Business Development
## 836 Douglas Male 132175 2.28 FALSE Engineering
## 837 Carolyn Female 118037 13.492 FALSE Finance
## 838 Joseph Male 139570 15.804 TRUE Finance
## 839 Billy Male 115280 9.153 FALSE Distribution
## 840 Joyce Female 51065 16.807 FALSE Human Resources
## 841 Lillian Female 103854 4.924 TRUE Distribution
## 842 Ruby Female 48354 19.501 FALSE Business Development
## 843 Sandra Female 132327 19.264 FALSE Product
## 844 Louise Female 106362 8.965 FALSE Sales
## 845 Maria <NA> 148857 8.738 FALSE Legal
## 846 Brenda Female 131131 11.682 FALSE Client Services
## 847 Stephen Male 129663 15.574 FALSE Human Resources
## 848 Nicole <NA> 41449 4.707 FALSE Finance
## 849 Bobby Male 93368 2.88 TRUE Product
## 850 Ernest Male 53335 9.192 FALSE Product
## 851 Charles Male 148291 6.002 FALSE <NA>
## 852 Bobby Male 147842 16.158 TRUE <NA>
## 853 Diana Female 105066 17.343 TRUE Business Development
## 854 Mildred Female 139284 11.39 TRUE <NA>
## 855 Harry Male 63046 10.411 FALSE Finance
## 856 Phillip <NA> 89700 2.277 TRUE <NA>
## 857 Bonnie <NA> 108946 12.211 FALSE Finance
## 858 Marie Female 98406 7.943 TRUE Product
## 859 Sean Male 108581 9.634 FALSE Human Resources
## 860 Robin Female 41808 19.239 FALSE Business Development
## 861 Phillip Male 36837 14.66 FALSE Marketing
## 862 Jerry Male 140850 18.855 FALSE Finance
## 863 Ronald Male 50426 18.536 TRUE Sales
## 864 Phillip Male 134120 6.842 FALSE Human Resources
## 865 Ryan Male 57292 6.01 FALSE <NA>
## 866 Karen Female 80633 16.306 FALSE Legal
## 867 Bonnie Female 131943 14.249 FALSE Client Services
## 868 Earl Male 48046 7.737 FALSE Marketing
## 869 Katherine Female 97443 13.657 FALSE Product
## 870 Matthew Male 135352 7.986 TRUE Business Development
## 871 Cynthia <NA> 107816 18.751 FALSE Marketing
## 872 Gerald Male 96329 2.469 TRUE Engineering
## 873 Brenda Female 73749 19.332 FALSE Business Development
## 874 Jason Male 75607 4.299 TRUE Sales
## 875 Melissa Female 98858 3.525 TRUE Human Resources
## 876 Beverly Female 76485 9.212 TRUE Engineering
## 877 Terry <NA> 41238 8.219 FALSE Marketing
## 878 <NA> Male 114896 13.823 Client Services
## 879 Jacqueline Female 125418 8.064 FALSE Distribution
## 880 Amy Female 75415 19.132 FALSE Client Services
## 881 Robert <NA> 90998 8.382 FALSE Finance
## 882 Ruby Female 142868 6.318 FALSE Marketing
## 883 Sara <NA> 135990 14.344 TRUE Distribution
## 884 Kathryn Female 57300 18.015 FALSE Marketing
## 885 Lisa Female 73706 18.53 FALSE Finance
## 886 Juan Male 85871 9.16 FALSE Product
## 887 Anthony Male 96795 14.837 FALSE Sales
## 888 David Male 92242 15.407 FALSE Legal
## 889 Marilyn Female 115149 11.934 TRUE Legal
## 890 Margaret Female 126924 1.552 TRUE Finance
## 891 <NA> Male 145329 7.1 Finance
## 892 Timothy Male 92587 8.475 FALSE Finance
## 893 Brandon Male 60263 2.709 FALSE Sales
## 894 Frank Male 91406 5.681 TRUE Business Development
## 895 Betty Female 37005 7.645 TRUE Marketing
## 896 Janice <NA> 139791 16.968 FALSE Business Development
## 897 Victor Male 45267 3.942 TRUE Sales
## 898 Kenneth <NA> 95296 10.146 FALSE Finance
## 899 Jane Female 59680 15.211 TRUE Business Development
## 900 Walter Male 144701 16.323 TRUE Marketing
## 901 Christina Female 35477 18.178 FALSE Human Resources
## 902 Patricia Female 119266 6.911 FALSE Distribution
## 903 <NA> Male 103877 6.322 Distribution
## 904 Heather Female 47605 14.955 TRUE Human Resources
## 905 Stephanie Female 136604 6.16 TRUE Human Resources
## 906 Alice Female 121250 4.363 TRUE Finance
## 907 Frances Female 35884 17.667 FALSE Sales
## 908 Elizabeth Female 137144 10.081 FALSE Finance
## 909 Janice Female 102697 3.283 FALSE Engineering
## 910 Donald Male 61999 6.466 FALSE Product
## 911 Melissa Female 45223 8.879 TRUE Legal
## 912 Carl Male 54033 15.528 TRUE Business Development
## 913 Joe Male 126120 1.02 FALSE <NA>
## 914 Lois Female 99747 6.168 FALSE Marketing
## 915 Ann Female 71958 5.272 TRUE Distribution
## 916 Todd Male 115566 6.716 TRUE Client Services
## 917 Marilyn Female 118369 7.696 TRUE Business Development
## 918 Shawn Male 51667 6.339 FALSE Product
## 919 Ryan Male 85858 19.475 FALSE Client Services
## 920 Sean Male 131423 8.957 FALSE Distribution
## 921 Rose Female 49538 9.828 FALSE Client Services
## 922 George Male 50369 18.9 TRUE Sales
## 923 Arthur Male 86615 3.412 TRUE Business Development
## 924 Irene <NA> 135369 4.38 FALSE Business Development
## 925 Deborah Female 60003 9.624 FALSE Client Services
## 926 <NA> Female 95866 19.388 Sales
## 927 Judith Female 109324 19.488 FALSE Distribution
## 928 Philip Male 103557 16.014 TRUE Business Development
## 929 Jeffrey Male 111376 2.673 TRUE Business Development
## 930 Theresa Female 75661 1.079 TRUE Legal
## 931 Nancy Female 85213 2.386 TRUE Marketing
## 932 Harold Male 140444 3.771 FALSE Distribution
## 933 Bonnie Female 90427 2.01 TRUE Client Services
## 934 Doris Female 141439 3.799 FALSE Business Development
## 935 Samuel Male 43694 3.787 TRUE Engineering
## 936 Alice Female 131952 12.09 FALSE Engineering
## 937 Maria Female 96250 10.056 FALSE Business Development
## 938 Aaron <NA> 63126 18.424 FALSE Client Services
## 939 Mark <NA> 44836 2.657 FALSE Client Services
## 940 Ralph <NA> 70635 2.147 FALSE Client Services
## 941 Andrew Male 137386 8.611 TRUE Distribution
## 942 William Male 104840 15.653 TRUE Engineering
## 943 Lori Female 75498 6.537 TRUE Marketing
## 944 Wayne Male 67471 2.728 FALSE Engineering
## 945 Kenneth Male 101914 1.905 TRUE Distribution
## 946 Gerald <NA> 93712 17.426 TRUE Distribution
## 947 <NA> Female 133472 16.941 Distribution
## 948 <NA> Male 107351 5.329 Marketing
## 949 Ashley Female 142410 11.048 TRUE Engineering
## 950 Scott Male 37385 8.226 TRUE Business Development
## 951 Paula Female 58423 10.833 FALSE Business Development
## 952 <NA> Female 143638 9.662 <NA>
## 953 Teresa Female 113425 11.907 TRUE Human Resources
## 954 Randy Male 57266 14.077 FALSE Product
## 955 Joe Male 119667 1.148 TRUE Finance
## 956 Sarah Female 127118 11.176 FALSE Legal
## 957 Beverly Female 80838 8.115 FALSE Engineering
## 958 Jeffrey Male 70990 15.901 TRUE Sales
## 959 Gloria Female 39833 9.631 FALSE Engineering
## 960 Albert Male 45094 5.85 TRUE Business Development
## 961 Stephen Male 93997 18.093 TRUE Business Development
## 962 Antonio <NA> 103050 3.05 FALSE Legal
## 963 Jonathan Male 121797 16.923 FALSE Product
## 964 Ann Female 89443 17.94 TRUE Sales
## 965 Bruce Male 35802 12.391 TRUE Sales
## 966 Catherine Female 68164 18.393 FALSE Client Services
## 967 Louis Male 93022 9.146 TRUE Human Resources
## 968 Thomas Male 105681 19.572 FALSE Engineering
## 969 Louise Female 43050 11.671 FALSE Distribution
## 970 Linda Female 44486 17.308 TRUE Engineering
## 971 Alice Female 63571 15.397 TRUE Product
## 972 Patrick Male 75423 5.368 TRUE Business Development
## 973 Victor <NA> 76381 11.159 TRUE Sales
## 974 Russell Male 137359 11.105 FALSE Business Development
## 975 Harry Male 67656 16.455 TRUE Client Services
## 976 Susan Female 92436 12.467 FALSE Sales
## 977 Denise Female 137954 4.195 TRUE Legal
## 978 Sarah Female 124566 5.949 FALSE Product
## 979 Sean Male 66146 11.178 FALSE Human Resources
## 980 Ernest Male 142935 13.198 TRUE Product
## 981 Kimberly Female 46233 8.862 TRUE Engineering
## 982 James Male 148985 19.28 FALSE Legal
## 983 Rose Female 91411 8.639 TRUE Human Resources
## 984 John Male 146907 11.738 FALSE Engineering
## 985 Maria Female 43455 13.04 FALSE Engineering
## 986 Stephen <NA> 85668 1.909 FALSE Legal
## 987 Donna Female 82871 17.999 FALSE Marketing
## 988 Gloria Female 136709 10.331 TRUE Finance
## 989 Alice Female 47638 11.209 FALSE Human Resources
## 990 Justin <NA> 38344 3.794 FALSE Legal
## 991 Robin Female 100765 10.982 TRUE Client Services
## 992 Rose Female 134505 11.051 TRUE Marketing
## 993 Anthony Male 112769 11.625 TRUE Finance
## 994 Tina Female 56450 19.04 TRUE Engineering
## 995 George Male 98874 4.479 TRUE Marketing
## 996 Henry <NA> 132483 16.655 FALSE Distribution
## 997 Phillip Male 42392 19.675 FALSE Finance
## 998 Russell Male 96914 1.421 FALSE Product
## 999 Larry Male 60500 11.985 FALSE Business Development
## 1000 Albert Male 129949 10.169 TRUE Sales
# Use `replace_with_na_if()` to replace with NA the character values using `is.character`
replace_with_na_if(employees,
.predicate = is.character,
~.x %in% c("", " ", "na", 'NaN', '?'))
## First.Name Gender Salary Bonus.. Senior.Management Team
## 1 Douglas Male 97308 6.945 TRUE Marketing
## 2 Thomas Male 61933 <NA> TRUE <NA>
## 3 Maria Female 130590 11.858 FALSE Finance
## 4 Jerry Male <NA> 9.34 TRUE Finance
## 5 Larry Male 101004 1.389 TRUE Client Services
## 6 Dennis n.a. 115163 10.125 FALSE Legal
## 7 Ruby Female 65476 10.012 TRUE Product
## 8 <NA> Female 45906 11.598 <NA> Finance
## 9 Angela <NA> <NA> 18.523 TRUE Engineering
## 10 Frances Female 139852 7.524 TRUE Business Development
## 11 Louise Female 63241 15.132 TRUE <NA>
## 12 Julie Female 102508 12.637 TRUE Legal
## 13 Brandon Male 112807 17.492 TRUE Human Resources
## 14 Gary Male 109831 5.831 FALSE Sales
## 15 Kimberly Female 41426 <NA> TRUE Finance
## 16 Lillian <NA> 59414 1.256 FALSE Product
## 17 Jeremy Male 90370 7.369 FALSE Human Resources
## 18 Shawn Male 111737 6.414 FALSE <NA>
## 19 Diana Female 132940 19.082 FALSE Client Services
## 20 Donna Female 81014 1.894 FALSE Product
## 21 Lois <NA> 64714 4.934 TRUE Legal
## 22 Matthew Male 100612 13.645 FALSE Marketing
## 23 Joshua <NA> 90816 18.816 TRUE Client Services
## 24 <NA> Male 125792 5.042 <NA> <NA>
## 25 John Male 97950 13.873 FALSE Client Services
## 26 <NA> Male 37076 18.576 <NA> Client Services
## 27 Craig Male 37598 7.757 TRUE Marketing
## 28 Scott <NA> 122367 5.218 FALSE Legal
## 29 Terry Male 124008 13.464 TRUE Client Services
## 30 Benjamin Male 79529 7.008 TRUE Legal
## 31 Christina Female 118780 9.096 TRUE Engineering
## 32 Joyce <NA> 88657 12.752 FALSE Product
## 33 <NA> Male 122340 6.417 <NA> <NA>
## 34 Jean Female 119082 16.18 FALSE Business Development
## 35 Jerry Male 95734 19.096 FALSE Client Services
## 36 Theresa Female 85182 16.675 FALSE Sales
## 37 Rachel Female 142032 12.599 FALSE Business Development
## 38 Linda Female 57427 9.557 TRUE Client Services
## 39 Stephanie Female 36844 5.574 TRUE Business Development
## 40 <NA> Male 122173 7.797 <NA> Client Services
## 41 Michael Male 99283 2.665 TRUE Distribution
## 42 Christine <NA> 66582 11.308 TRUE Business Development
## 43 Beverly Female 121918 15.835 FALSE Legal
## 44 Marilyn Female 73524 5.207 TRUE Marketing
## 45 Cynthia Female 145146 7.482 TRUE Product
## 46 Roger Male 88010 13.886 TRUE Sales
## 47 Bruce Male 114796 6.796 FALSE Finance
## 48 Kathy Female 66820 <NA> TRUE Client Services
## 49 Clarence Male 93581 6.083 TRUE Business Development
## 50 Chris <NA> 113590 3.055 FALSE Sales
## 51 Nancy Female 94976 13.83 TRUE Engineering
## 52 <NA> <NA> 41126 14.009 <NA> Sales
## 53 Todd Male 49339 1.695 TRUE <NA>
## 54 Alan <NA> 40341 17.578 TRUE Finance
## 55 Sara Female 83677 8.999 FALSE Engineering
## 56 Karen Female 102488 17.653 TRUE Product
## 57 Carl Male 130276 16.084 TRUE Finance
## 58 <NA> Male 64715 15.107 TRUE Human Resources
## 59 Theresa Female 72670 1.481 TRUE Engineering
## 60 Irene Female 66851 11.279 FALSE Engineering
## 61 Paula <NA> 48866 4.271 FALSE Distribution
## 62 Denise Female 106862 3.699 FALSE Business Development
## 63 <NA> Female 58112 19.414 <NA> Marketing
## 64 Matthew Male 35203 18.04 FALSE Human Resources
## 65 Kathleen <NA> 77834 18.771 FALSE Business Development
## 66 Steve Male 61310 12.428 TRUE Distribution
## 67 Nancy Female 125250 2.672 TRUE Business Development
## 68 Rachel Female 51178 9.735 TRUE Finance
## 69 Jose Male 84834 14.33 TRUE Finance
## 70 Irene <NA> 100863 4.382 TRUE Finance
## 71 Todd <NA> 84692 6.617 FALSE Client Services
## 72 Johnny Male 118172 16.194 TRUE Sales
## 73 Bobby Male 54043 3.833 FALSE Product
## 74 Frances Female 90582 4.709 TRUE Sales
## 75 Thomas Male 62096 17.029 FALSE Marketing
## 76 Bonnie Female 104897 <NA> TRUE Human Resources
## 77 Margaret Female 131604 7.353 TRUE Distribution
## 78 Charles Male 107391 1.26 TRUE Marketing
## 79 Robin Female 114797 5.965 TRUE Sales
## 80 <NA> Female 115814 4.99 FALSE Product
## 81 Gerald <NA> 137126 15.602 TRUE Sales
## 82 Christopher Male 47369 14.822 FALSE Legal
## 83 Steven Male 35095 8.379 TRUE Client Services
## 84 Shawn Male 148115 6.539 TRUE Finance
## 85 Doris Female 83072 7.511 FALSE Finance
## 86 Jeremy Male 100238 3.887 TRUE Client Services
## 87 Annie <NA> 103495 17.29 TRUE Business Development
## 88 Annie Female 144887 8.276 TRUE Sales
## 89 Donna Female 64088 6.155 TRUE Legal
## 90 Janice Female 51082 11.955 FALSE Legal
## 91 Janice <NA> 91719 11.583 TRUE Legal
## 92 James <NA> 128771 8.309 FALSE <NA>
## 93 Linda Female 119009 12.506 TRUE Business Development
## 94 Virginia <NA> 111858 1.601 TRUE Legal
## 95 Harry Male 130620 7.03 FALSE Legal
## 96 Heather Female 43026 14.166 FALSE Client Services
## 97 Cynthia Female 142321 1.737 <NA> Finance
## 98 Laura <NA> 140371 10.62 TRUE Marketing
## 99 Tina Female 100705 16.961 TRUE Marketing
## 100 Harold Male 77544 12.447 FALSE Business Development
## 101 Melissa Female 48109 14.995 FALSE Finance
## 102 Aaron Male 61602 11.849 TRUE Marketing
## 103 Jack Male 103902 12.159 FALSE Client Services
## 104 Phyllis Female 136984 8.932 TRUE Finance
## 105 John Male 80740 19.305 FALSE Marketing
## 106 Kathy Female 91712 8.567 FALSE Finance
## 107 Paul Male 42146 3.046 FALSE Legal
## 108 Steven Male 68680 16.565 FALSE Legal
## 109 Russell <NA> 133980 12.396 TRUE Legal
## 110 Christopher Male 37919 11.449 FALSE <NA>
## 111 Shirley Female 147113 16.135 FALSE Legal
## 112 Bonnie Female 42153 8.454 TRUE Business Development
## 113 Willie Male 64363 4.023 FALSE Marketing
## 114 Tina Female 114767 3.711 TRUE Engineering
## 115 Ashley Female 58698 6.811 TRUE Business Development
## 116 Pamela Female 54585 4.166 FALSE Product
## 117 <NA> Male 76189 18.988 <NA> Legal
## 118 Steven Male 109095 9.494 FALSE Finance
## 119 Andrea Female 120204 9.557 FALSE Business Development
## 120 Paul Male 41054 12.299 FALSE Marketing
## 121 Peter Male 84885 15.402 FALSE Business Development
## 122 Kathleen <NA> 119735 18.74 FALSE Product
## 123 Christina Female 110169 13.892 TRUE Engineering
## 124 Helen Female 73789 14.841 TRUE Product
## 125 Marilyn Female 76078 2.401 TRUE Product
## 126 Patricia Female 49368 14.226 FALSE Human Resources
## 127 Michelle Female 57325 17.179 TRUE Human Resources
## 128 William Male 66521 5.83 FALSE Human Resources
## 129 Kenneth Male 127654 16.439 TRUE Client Services
## 130 Antonio Male 60866 13.101 TRUE Business Development
## 131 Shirley Female 41334 6.219 TRUE Human Resources
## 132 Rebecca Female 94231 17.517 FALSE Product
## 133 Carlos Male 146670 10.763 FALSE Human Resources
## 134 Lois Female 106317 2.235 TRUE Client Services
## 135 Carolyn Female 109260 2.65 TRUE Business Development
## 136 Gloria Female 134148 8.833 TRUE Legal
## 137 Henry Male 43542 19.687 FALSE Legal
## 138 Adam Male 95327 15.12 FALSE Distribution
## 139 Ashley Female 112238 6.03 TRUE Engineering
## 140 <NA> Female 132373 10.527 <NA> <NA>
## 141 Shirley Female 113850 1.854 FALSE Marketing
## 142 Adam Male 110194 14.727 TRUE Product
## 143 Elizabeth Female 146129 5.687 FALSE Finance
## 144 Teresa <NA> 140013 8.689 TRUE Engineering
## 145 Nicole <NA> 122717 12.452 FALSE Sales
## 146 Jennifer Female 71715 13.079 TRUE Client Services
## 147 Patricia Female 95322 4.15 n.a. Product
## 148 Philip Male 89227 3.996 FALSE Legal
## 149 Patrick <NA> 124488 14.837 TRUE Sales
## 150 <NA> Female 86230 8.578 <NA> Distribution
## 151 Sean Male 135490 19.934 FALSE Marketing
## 152 Brandon <NA> 121333 15.295 FALSE Business Development
## 153 Ruth Female 129297 8.067 TRUE Client Services
## 154 Victor <NA> 84546 10.489 TRUE Finance
## 155 Rebecca Female 85730 5.359 TRUE Product
## 156 Marie Female 104058 17.695 TRUE Business Development
## 157 Howard Male 105062 1.563 FALSE Human Resources
## 158 <NA> Female 79536 14.443 <NA> Product
## 159 Norma Female 114412 8.756 TRUE Marketing
## 160 James <NA> 68501 14.316 FALSE Marketing
## 161 Kathy Female 149563 16.991 TRUE Finance
## 162 Marilyn <NA> 103386 11.451 FALSE Distribution
## 163 Arthur Male 89786 14.422 TRUE Legal
## 164 Terry Male 52226 19.135 FALSE Client Services
## 165 Mary Female 134645 18.197 FALSE Business Development
## 166 <NA> Female 59148 9.061 <NA> Legal
## 167 <NA> Female 42341 7.014 <NA> Sales
## 168 Christopher Male 142178 17.984 TRUE Sales
## 169 Peter <NA> 38989 7.017 TRUE Marketing
## 170 Michelle Female 53754 5.455 TRUE Product
## 171 Irene Female 133772 19.02 TRUE Sales
## 172 Patrick Male 143499 17.495 TRUE Engineering
## 173 Sara Female 97058 9.402 FALSE Finance
## 174 Lillian Female 85446 3.959 TRUE Engineering
## 175 <NA> <NA> 40297 6.185 <NA> Client Services
## 176 Willie Male 146651 1.451 TRUE Engineering
## 177 Victor Male 124486 10.166 FALSE Product
## 178 Wayne Male 102652 14.085 TRUE Distribution
## 179 Jane Female 144474 17.648 FALSE Product
## 180 Jessica Female 68759 19.343 TRUE Finance
## 181 Mark Male 57286 5.025 TRUE Business Development
## 182 Randy Male 58129 1.952 TRUE Distribution
## 183 Lillian Female 123940 12.184 TRUE Business Development
## 184 Ruth <NA> 98233 2.518 TRUE Distribution
## 185 Jerry Male 140810 9.177 TRUE Client Services
## 186 Sandra Female 42090 8.842 TRUE Legal
## 187 <NA> Female 149654 1.825 <NA> Sales
## 188 Roger Male 51430 6.46 FALSE Marketing
## 189 Charles Male 71749 15.931 FALSE Legal
## 190 Clarence Male 85700 1.215 FALSE Sales
## 191 Carol Female 57783 9.129 FALSE Finance
## 192 Lois Female 36946 6.652 FALSE Engineering
## 193 Barbara <NA> 99326 16.475 TRUE Legal
## 194 Rebecca Female 109259 4.443 TRUE Legal
## 195 Irene Female 131038 8.996 FALSE Distribution
## 196 Ronald Male 121068 12.757 TRUE Product
## 197 Steven Male 62719 19.127 FALSE Client Services
## 198 Carolyn Female 69268 3.031 FALSE Client Services
## 199 Maria Female 36067 9.64 TRUE Product
## 200 Jonathan Male 130581 16.736 TRUE <NA>
## 201 Gary Male 89661 8.525 FALSE Finance
## 202 Kimberly Female 36643 7.953 FALSE Marketing
## 203 Roger Male 140558 5.084 TRUE Sales
## 204 Diana Female 103521 2.784 TRUE Business Development
## 205 Willie Male 55281 4.935 TRUE Marketing
## 206 Brandon Male 115711 8.012 TRUE Finance
## 207 Patricia Female 114079 8.399 TRUE Client Services
## 208 Katherine Female 57531 1.767 FALSE Client Services
## 209 Jonathan Male 141069 4.903 FALSE Human Resources
## 210 Emily Female 89434 11.295 FALSE Engineering
## 211 Ruth Female 44639 9.148 TRUE Legal
## 212 Linda Female 110967 19.612 TRUE Human Resources
## 213 Lisa Female 115387 1.821 FALSE Client Services
## 214 Evelyn Female 81673 15.364 TRUE Engineering
## 215 Julie Female 109588 3.55 FALSE Engineering
## 216 Mary Female 92544 3.8 FALSE Client Services
## 217 Matthew Male 142373 2.462 FALSE Marketing
## 218 Douglas Male 83341 1.015 TRUE Client Services
## 219 Gregory <NA> 98865 10.628 TRUE Finance
## 220 Billy Male 120444 7.768 TRUE Finance
## 221 <NA> Female 71945 5.56 <NA> Marketing
## 222 Gregory Male 109564 3.845 FALSE Legal
## 223 Jason Male 78417 3.067 FALSE Finance
## 224 Daniel Male 106947 15.866 TRUE Legal
## 225 Sarah Female 87298 2.311 FALSE Distribution
## 226 Harry Male 64579 15.266 TRUE Sales
## 227 Kathy Female 50905 9.404 TRUE Finance
## 228 Todd Male 59728 11.226 TRUE Client Services
## 229 Bobby Male 51685 17.445 TRUE Engineering
## 230 Jeremy Male 49542 1.679 TRUE Distribution
## 231 George <NA> 38375 8.999 FALSE Legal
## 232 <NA> Female 64084 17.018 FALSE Product
## 233 Marilyn Female 147663 10.263 FALSE Human Resources
## 234 Kathryn Female 73935 14.752 FALSE Sales
## 235 Irene Female 40837 12.182 TRUE Client Services
## 236 Norma Female 94393 3.643 TRUE Engineering
## 237 Laura Female 42087 2.624 FALSE Product
## 238 Cheryl Female 52080 9.375 FALSE Legal
## 239 Kevin Male 35061 5.128 FALSE Legal
## 240 Lillian <NA> 64164 17.612 FALSE Human Resources
## 241 Phyllis <NA> 94088 19.107 FALSE Distribution
## 242 Walter Male 127813 5.961 FALSE Finance
## 243 Robert Male 38041 18.428 TRUE Engineering
## 244 Jimmy Male 126310 5.413 TRUE Product
## 245 Clarence Male 142561 8.866 FALSE Client Services
## 246 Victor Male 70817 17.138 FALSE Engineering
## 247 Fred <NA> 59937 12.045 TRUE Human Resources
## 248 Brenda <NA> 106115 3.742 TRUE Product
## 249 Justin Male 82782 4.366 <NA> Distribution
## 250 Betty Female 104896 19.55 TRUE Business Development
## 251 Randy <NA> 133943 8.94 TRUE Sales
## 252 Sharon <NA> 83658 6.513 FALSE Business Development
## 253 Ralph Male 71896 4.232 TRUE Marketing
## 254 <NA> <NA> 113732 10.391 <NA> Sales
## 255 Edward Male 110485 7.995 FALSE Client Services
## 256 Denise Female 115118 5.108 FALSE Human Resources
## 257 Debra Female 48696 4.75 FALSE Legal
## 258 <NA> Male 42676 15.517 <NA> Sales
## 259 Michael Male 43586 12.659 FALSE <NA>
## 260 Henry Male 89258 15.585 TRUE Human Resources
## 261 Gloria Female 90730 2.491 FALSE Distribution
## 262 Marie Female 100308 13.677 FALSE Product
## 263 Anne Female 69134 3.723 TRUE Engineering
## 264 Sarah <NA> 58295 14.603 TRUE Engineering
## 265 Stephanie Female 50141 13.218 TRUE Human Resources
## 266 Roy Male 101941 3.45 FALSE Client Services
## 267 <NA> Female 115145 14.063 <NA> Distribution
## 268 <NA> Male 40451 16.044 <NA> Distribution
## 269 Irene Female 56526 5.81 TRUE Finance
## 270 <NA> Female 145316 18.517 <NA> Human Resources
## 271 Thomas Male 103235 9.554 TRUE Human Resources
## 272 Frank <NA> 58563 12.062 TRUE Marketing
## 273 Fred Male 74129 18.225 FALSE Product
## 274 Nicholas Male 74669 1.113 TRUE Product
## 275 Howard Male 97490 5.46 TRUE Marketing
## 276 Debra Female 104250 18.456 TRUE Business Development
## 277 Christine Female 94345 11.996 FALSE Finance
## 278 Brenda <NA> 82439 19.062 FALSE Sales
## 279 Betty Female 51613 12.984 FALSE Distribution
## 280 Ruby Female 105946 1.139 FALSE Business Development
## 281 Kevin Male 46080 9.635 FALSE Business Development
## 282 Arthur Male 134610 6.147 TRUE n.a.
## 283 Carl <NA> 125104 12.345 FALSE Client Services
## 284 Todd Male 107281 1.612 TRUE Engineering
## 285 Brenda Female 141521 4.337 FALSE Client Services
## 286 Judy Female 65931 2.304 FALSE Human Resources
## 287 Todd Male 69989 10.985 TRUE Finance
## 288 Lois Female 147183 9.999 TRUE Client Services
## 289 Walter <NA> 66757 18.099 FALSE Distribution
## 290 Jessica Female 75145 6.388 TRUE Legal
## 291 Jeremy Male 129460 13.657 TRUE <NA>
## 292 Tammy Female 132839 17.463 TRUE Client Services
## 293 Anne Female 44537 18.284 TRUE Client Services
## 294 Jesse Male 118733 9.653 FALSE Marketing
## 295 Virginia Female 46905 19.154 FALSE Distribution
## 296 Jesse Male 79582 3.873 FALSE Legal
## 297 Jeremy <NA> 55394 3.18 TRUE Sales
## 298 Daniel Male 123811 7.664 TRUE Human Resources
## 299 Patrick <NA> 137314 4.542 TRUE Marketing
## 300 Emily Female 36711 19.028 TRUE Human Resources
## 301 Alan Male 111786 3.592 TRUE Engineering
## 302 James Male 72257 13.023 FALSE Engineering
## 303 Adam Male 71276 5.027 TRUE Human Resources
## 304 Joan <NA> 38712 3.657 FALSE Client Services
## 305 Jeremy Male 46930 18.702 TRUE Finance
## 306 Margaret Female 125220 3.733 FALSE Marketing
## 307 Mark Male 121477 17.44 TRUE Business Development
## 308 Marilyn Female 86386 2.937 FALSE Distribution
## 309 Cheryl Female 81308 2.196 TRUE Legal
## 310 Benjamin Male 84810 15.794 FALSE Marketing
## 311 Harold Male 66775 2.158 TRUE Legal
## 312 Ernest <NA> 72145 13.448 TRUE Finance
## 313 Gerald Male 121604 1.923 TRUE Sales
## 314 Judy Female 109510 13.457 TRUE Legal
## 315 Bobby Male 112117 6.338 FALSE <NA>
## 316 Bobby <NA> 108127 15.858 FALSE Client Services
## 317 Marie Female 123711 10.966 FALSE Product
## 318 Kathy Female 132381 8.34 FALSE Business Development
## 319 Roy Male 148225 1.841 FALSE Finance
## 320 Jacqueline Female n.a. 18.243 FALSE Marketing
## 321 <NA> Female 62960 14.356 <NA> Sales
## 322 Julie Female 56926 7.507 FALSE Sales
## 323 Douglas Male 41428 14.372 FALSE Product
## 324 Linda Female 115658 3.041 TRUE Sales
## 325 Ruby Female 76707 6.031 FALSE Business Development
## 326 Russell Male 60388 17.885 FALSE Client Services
## 327 Jeffrey Male 45150 12.075 TRUE Product
## 328 Aaron Male 58755 5.097 TRUE Marketing
## 329 Samuel Male 76076 5.319 TRUE Finance
## 330 <NA> Male 87760 14.987 <NA> Engineering
## 331 Randy Male 135119 7.887 FALSE Client Services
## 332 Evelyn Female 36759 17.269 TRUE Marketing
## 333 Fred Male 121723 18.645 TRUE Product
## 334 Shirley Female 110061 7.494 FALSE Client Services
## 335 Todd Male 85074 1.844 FALSE Client Services
## 336 Robert Male 85799 19.93 FALSE Finance
## 337 Mark Male 75150 12.182 TRUE Legal
## 338 Earl Male 91344 4.035 TRUE Sales
## 339 Richard Male 86326 10.717 FALSE Product
## 340 Michael Male 98753 16.443 TRUE Human Resources
## 341 Steven Male 113060 2.846 TRUE Sales
## 342 Carlos Male 77327 11.58 TRUE Finance
## 343 Marie Female 62666 10.247 FALSE Product
## 344 Ronald Male 96633 4.99 TRUE Engineering
## 345 Scott Male 58248 3.914 FALSE Business Development
## 346 Steven Male 83706 6.96 TRUE Human Resources
## 347 James Male 74086 5.778 TRUE Human Resources
## 348 Lori Female 95389 14.223 FALSE Sales
## 349 Philip Male 129968 19.897 FALSE Finance
## 350 Phyllis Female <NA> 8.723 FALSE Sales
## 351 Thomas <NA> 41549 3.95 FALSE Sales
## 352 Cynthia Female 74287 10.751 FALSE Sales
## 353 <NA> Male 69906 4.844 <NA> Engineering
## 354 <NA> Male 65078 3.095 <NA> Marketing
## 355 Elizabeth Female 106406 1.782 TRUE Legal
## 356 Andrea Female 115913 12.121 FALSE Legal
## 357 Judy Female 38092 5.668 FALSE Distribution
## 358 Russell Male 121160 7.843 FALSE Engineering
## 359 Scott Male 90429 4.45 FALSE Product
## 360 Sarah Female 64207 7.824 TRUE Client Services
## 361 Susan Female 80688 18.892 TRUE Sales
## 362 Margaret <NA> 55044 4.078 FALSE Sales
## 363 Joshua Male 72893 9.555 FALSE Distribution
## 364 Justin <NA> 96978 13.865 FALSE Finance
## 365 Juan Male 97364 3.595 FALSE Product
## 366 Gloria <NA> 140885 1.113 FALSE Human Resources
## 367 Jennifer <NA> 58520 16.231 TRUE Client Services
## 368 Edward Male 66067 10.957 TRUE <NA>
## 369 Marilyn Female 147183 8.748 FALSE Business Development
## 370 Mary Female 87721 12.484 FALSE Product
## 371 Linda Female 144001 2.194 FALSE Business Development
## 372 Larry Male 91133 5.14 FALSE Sales
## 373 Albert Male 67827 19.717 TRUE Engineering
## 374 Kenneth Male 81839 12.072 FALSE Sales
## 375 <NA> Female 81444 3.171 <NA> Distribution
## 376 Gregory <NA> 137661 4.805 TRUE Sales
## 377 Brandon Male 112548 4.664 FALSE Engineering
## 378 Stephen Male 111249 10.574 TRUE Human Resources
## 379 Russell Male 114334 9.669 FALSE Product
## 380 <NA> Female 118906 4.537 <NA> Marketing
## 381 Pamela <NA> 72979 5.66 FALSE Sales
## 382 Scott Male 64172 10.894 TRUE Engineering
## 383 <NA> Female 107024 12.182 <NA> <NA>
## 384 Carlos Male 50167 2.362 FALSE Product
## 385 Jack Male 106995 15.723 FALSE Product
## 386 Debra Female 70492 8.895 FALSE Client Services
## 387 Phyllis Female 125881 16.697 FALSE Sales
## 388 Robert Male 123294 19.894 FALSE <NA>
## 389 Gloria Female 131045 11.312 TRUE Product
## 390 Sharon <NA> 97635 10.413 TRUE Client Services
## 391 Barbara <NA> 94493 13.443 TRUE Sales
## 392 Marilyn Female 140502 9.989 TRUE Sales
## 393 Ralph Male 106310 4.03 TRUE Client Services
## 394 Harry <NA> 46240 6.976 TRUE Engineering
## 395 Robin Female 111163 5.025 TRUE Product
## 396 Kathy Female 93753 7.094 TRUE Sales
## 397 Rose Female 63494 19.385 TRUE Human Resources
## 398 Clarence Male 116693 13.835 TRUE Distribution
## 399 Doris Female 85215 7.425 TRUE Human Resources
## 400 Kathryn Female 86439 7.799 FALSE Finance
## 401 Billy <NA> 62913 18.241 TRUE Marketing
## 402 Norma Female 38872 9.302 TRUE Business Development
## 403 Richard <NA> 124655 14.272 TRUE Engineering
## 404 Craig Male 113506 19.642 FALSE Marketing
## 405 Sarah <NA> 109980 8.86 FALSE Sales
## 406 Sharon Female 91522 7.564 FALSE Finance
## 407 Mary Female 42214 17.538 TRUE Business Development
## 408 Steve <NA> 83159 9.55 TRUE Distribution
## 409 Joe Male 144082 1.62 TRUE Client Services
## 410 Jeremy Male 47885 19.276 TRUE Human Resources
## 411 Ernest Male 126232 6.191 TRUE Sales
## 412 Kevin Male 134598 11.699 FALSE Business Development
## 413 Jacqueline Female 66604 14.609 FALSE Legal
## 414 Johnny Male 115194 5.861 TRUE Marketing
## 415 Gary Male 49101 11.9 TRUE Business Development
## 416 Shirley <NA> 67811 12.699 FALSE Finance
## 417 <NA> Male 111043 5.966 <NA> Distribution
## 418 Sarah <NA> 37748 9.047 FALSE Human Resources
## 419 Julia Female 36403 2.664 TRUE Finance
## 420 Dorothy Female 140136 3.12 TRUE Business Development
## 421 Justin Male 121508 19.334 TRUE Finance
## 422 Kevin Male 79906 18.021 TRUE Sales
## 423 Victor Male 123144 16.261 TRUE Product
## 424 Deborah Female 46953 17.436 FALSE Client Services
## 425 Matthew <NA> 79443 14.637 FALSE Human Resources
## 426 Alice Female 51395 2.378 TRUE Finance
## 427 Todd Male 134408 3.56 TRUE Human Resources
## 428 Frank Male <NA> 8.037 TRUE Sales
## 429 Christopher Male 68028 7.869 TRUE Finance
## 430 Rose Female 149903 5.63 FALSE Human Resources
## 431 Andrea Female 79123 19.422 FALSE Distribution
## 432 Charles Male 104014 7.077 FALSE Legal
## 433 Jessica <NA> 121160 12.993 FALSE Client Services
## 434 Wanda Female 65362 7.132 TRUE Legal
## 435 Joyce Female 50701 14.227 TRUE <NA>
## 436 Billy Male 144709 10.069 TRUE Distribution
## 437 Kathryn Female 53061 11.864 TRUE Finance
## 438 Denise Female 36697 11.196 TRUE Sales
## 439 Jason Male 69244 6.22 TRUE <NA>
## 440 Nicholas Male 101036 2.826 TRUE Human Resources
## 441 Aaron Male 52119 11.343 TRUE Client Services
## 442 Louise Female 46666 1.886 TRUE Business Development
## 443 Julie Female 73437 2.518 TRUE Finance
## 444 Kathy Female 86318 18.492 TRUE Sales
## 445 <NA> Male 76409 7.008 <NA> Distribution
## 446 Chris Male 71642 1.496 FALSE <NA>
## 447 Cheryl Female 67150 15.85 TRUE Marketing
## 448 Gregory Male 142208 11.204 TRUE Engineering
## 449 Wayne Male 81183 17.066 FALSE Client Services
## 450 Beverly Female 107163 3.665 TRUE Human Resources
## 451 Willie Male 55038 19.691 n.a. Legal
## 452 Terry <NA> 140002 19.49 TRUE Marketing
## 453 Scott Male 146812 1.965 TRUE Marketing
## 454 Annie Female 40119 4.338 TRUE Human Resources
## 455 <NA> Male 136602 10.429 <NA> Marketing
## 456 Ruth Female 69579 18.029 TRUE Human Resources
## 457 Deborah <NA> 101457 6.662 FALSE Engineering
## 458 Patricia Female 121232 16.624 FALSE Legal
## 459 Albert Male 102626 15.843 FALSE Finance
## 460 Charles <NA> 137171 5.574 TRUE Marketing
## 461 Tina Female 88276 14.248 FALSE Legal
## 462 Jeremy Male 43354 13.946 FALSE Marketing
## 463 Craig Male 125556 7.996 FALSE Engineering
## 464 Jose Male 59862 3.269 FALSE Product
## 465 Lawrence Male 74640 18.641 FALSE Marketing
## 466 Helen Female 52875 4.188 FALSE Legal
## 467 Walter Male 58789 5.461 FALSE Sales
## 468 Amy Female 122897 8.222 TRUE Engineering
## 469 Janice Female 136032 10.696 TRUE Marketing
## 470 Stephen Male 141958 9.078 TRUE Legal
## 471 Ryan Male 139917 11.466 FALSE Distribution
## 472 Sharon Female 147635 14.693 FALSE Human Resources
## 473 Joshua Male 68230 8.047 FALSE Client Services
## 474 Clarence Male 124365 8.457 FALSE Business Development
## 475 Jonathan Male 104749 11.364 FALSE Engineering
## 476 Stephanie Female 122121 7.937 TRUE Engineering
## 477 Kathy Female 143541 8.461 FALSE Human Resources
## 478 Albert Male 137840 9.705 FALSE Business Development
## 479 Michael Male 73354 18.357 FALSE Business Development
## 480 Richard Male 47647 18.787 TRUE <NA>
## 481 Amy Female 106249 15.967 FALSE Sales
## 482 <NA> Female 93847 1.085 <NA> Business Development
## 483 Beverly <NA> 104815 3.38 FALSE Product
## 484 <NA> Female 115436 3.099 <NA> Human Resources
## 485 Joe Male 50645 11.119 FALSE Marketing
## 486 Ashley Female 142415 1.985 TRUE Finance
## 487 Howard Male 37984 2.021 FALSE Distribution
## 488 Cheryl Female 71751 15.918 FALSE Business Development
## 489 Robert Male 135882 19.944 FALSE Legal
## 490 Sharon Female 46007 19.731 TRUE Client Services
## 491 Judith Female 117055 7.461 FALSE Marketing
## 492 Nicholas <NA> 58478 6.525 TRUE <NA>
## 493 Jerry Male 121357 18.845 FALSE Business Development
## 494 Craig Male 44857 13.266 FALSE Finance
## 495 Barbara Female 47322 7.25 TRUE Sales
## 496 Eugene Male 81077 2.117 FALSE Sales
## 497 Johnny Male 76394 5.437 TRUE Marketing
## 498 Benjamin Male 114356 7.123 FALSE Client Services
## 499 Diana Female 41831 4.548 FALSE Business Development
## 500 Barbara <NA> 90187 14.764 TRUE Distribution
## 501 Christine Female 72613 11.126 FALSE Legal
## 502 Sean Male 42748 9.765 FALSE Distribution
## 503 Sarah Female 37259 1.763 FALSE Client Services
## 504 Roger Male 125033 4.887 TRUE Marketing
## 505 <NA> Female 38275 10.494 <NA> Distribution
## 506 <NA> Male 71520 13.248 <NA> Engineering
## 507 Donald Male 106472 6.499 TRUE Client Services
## 508 Clarence Male 103684 18.654 TRUE Engineering
## 509 Scott Male 96111 15.592 FALSE Human Resources
## 510 Ann Female 90719 6.22 FALSE Business Development
## 511 Frank Male 140303 17.07 FALSE Legal
## 512 Joe Male 62161 8.13 TRUE Business Development
## 513 Wanda Female 78883 19.695 FALSE <NA>
## 514 Jimmy Male 63549 19.624 FALSE <NA>
## 515 Amy <NA> 63888 18.115 TRUE Product
## 516 Larry Male 97370 10.458 FALSE Sales
## 517 Gloria Female 66224 15.979 TRUE Legal
## 518 Ruth Female 97915 3.997 TRUE Human Resources
## 519 Diana Female 86883 18.003 TRUE Product
## 520 Raymond Male 37812 3.178 FALSE Human Resources
## 521 Peter Male 56580 8.411 TRUE <NA>
## 522 <NA> <NA> 83895 17.971 <NA> Engineering
## 523 Catherine Female 58047 14.858 TRUE Distribution
## 524 Diane Female 124889 15.026 TRUE Sales
## 525 Peter <NA> 118840 14.509 TRUE Client Services
## 526 Steve Male 67780 9.54 TRUE Human Resources
## 527 Barbara Female 144677 8.696 FALSE Finance
## 528 Helen <NA> 45724 1.022 FALSE Product
## 529 Jimmy <NA> 86676 7.175 TRUE Product
## 530 Christopher Male 82401 7.252 FALSE Client Services
## 531 Kathleen Female 35575 14.595 FALSE Distribution
## 532 Virginia Female 123649 10.154 TRUE Marketing
## 533 Lisa Female 38078 10.28 TRUE Product
## 534 Earl Male 52620 13.773 FALSE Product
## 535 Gerald <NA> 133366 12.292 FALSE Legal
## 536 Louise Female 91462 8.205 FALSE Marketing
## 537 Clarence Male 146589 4.905 TRUE Business Development
## 538 Cynthia Female 51633 13.472 TRUE Business Development
## 539 Adam Male 45181 3.491 FALSE Human Resources
## 540 Justin Male 62454 3.459 TRUE Sales
## 541 <NA> Female 84746 4.82 <NA> Finance
## 542 Ruby Female 147362 7.851 TRUE Engineering
## 543 Amanda Female 80803 14.077 TRUE Distribution
## 544 Anna Female 117293 2.366 FALSE Client Services
## 545 Roger Male 105689 13.32 TRUE Marketing
## 546 Julie <NA> 93302 9.048 TRUE Product
## 547 Joseph Male 102555 3.672 TRUE Product
## 548 Evelyn Female 51525 10.366 FALSE Finance
## 549 Janice Female 41190 3.311 TRUE Sales
## 550 Sara Female 75484 3.186 FALSE Finance
## 551 Gregory Male 82726 16.304 TRUE Marketing
## 552 Philip Male 122319 19.122 FALSE Engineering
## 553 Barbara Female 127297 11.905 TRUE Product
## 554 Amanda Female 109290 13.853 FALSE Engineering
## 555 Phyllis <NA> 99150 6.007 FALSE Client Services
## 556 Anne Female 71930 18.451 TRUE Product
## 557 Arthur Male 66819 6.639 TRUE Marketing
## 558 Jane Female 42424 18.115 FALSE Distribution
## 559 Linda Female 51431 13.295 FALSE Engineering
## 560 Ashley Female 120675 6.51 FALSE Human Resources
## 561 Shawn Male 96610 2.097 TRUE Client Services
## 562 Kathleen Female 71430 8.572 FALSE Engineering
## 563 Sara <NA> 87713 18.863 TRUE Legal
## 564 Randy Male 86723 14.842 FALSE Engineering
## 565 Andrew Male 43414 7.563 TRUE Client Services
## 566 Marilyn Female 87145 17.33 FALSE Human Resources
## 567 Johnny Male 91124 12.986 TRUE Distribution
## 568 <NA> Female 48141 12.605 <NA> <NA>
## 569 Susan Female 90829 19.142 FALSE Marketing
## 570 Ruby Female 101262 6.773 FALSE Client Services
## 571 Kelly Female 41427 1.431 FALSE Product
## 572 Jonathan Male 56993 18.623 FALSE Marketing
## 573 Raymond Male 114244 16.69 FALSE Human Resources
## 574 Kimberly Female 81800 5.435 TRUE <NA>
## 575 <NA> Female 118736 7.421 <NA> Client Services
## 576 John Male 66077 5.809 TRUE Product
## 577 Michael Male 35013 14.879 FALSE Product
## 578 Jerry Male 98393 11.393 FALSE Client Services
## 579 Amanda Female 107111 1.438 TRUE Product
## 580 Harold Male 65673 1.187 TRUE Legal
## 581 Harry Male 65482 18.089 FALSE <NA>
## 582 Ernest Male 81919 15.118 FALSE Marketing
## 583 Harry Male 59277 10.2 FALSE Finance
## 584 Diane Female 49501 13.506 FALSE Business Development
## 585 Jack Male 70367 18.266 TRUE Legal
## 586 Shirley Female 132156 2.754 FALSE Marketing
## 587 Rose Female 56961 7.585 FALSE Marketing
## 588 Cheryl Female 98841 8.945 TRUE Client Services
## 589 Cynthia Female 78226 2.419 FALSE Human Resources
## 590 Sandra Female 116931 9.657 TRUE Human Resources
## 591 Andrea Female 87575 13.346 TRUE Business Development
## 592 Rachel Female 110924 7.808 FALSE Distribution
## 593 Justin Male 78351 15.221 FALSE Sales
## 594 Marie Female 125574 4.644 FALSE Sales
## 595 Louis Male 95198 2.075 FALSE Business Development
## 596 Nancy Female 121006 3.512 TRUE Finance
## 597 Mark Male 95728 6.752 TRUE Business Development
## 598 Teresa Female 69740 8.294 FALSE Distribution
## 599 Frances Female 91996 11.506 FALSE Product
## 600 <NA> Female 98385 10.925 <NA> Human Resources
## 601 Barbara <NA> 90556 15.749 TRUE Distribution
## 602 Christine <NA> 50366 9.862 TRUE Marketing
## 603 Bobby <NA> 84232 15.704 TRUE Engineering
## 604 Carl <NA> 100888 12.49 TRUE Business Development
## 605 Bruce Male 141335 15.427 TRUE Engineering
## 606 Rose Female 97691 2.142 FALSE Client Services
## 607 Mildred Female 47266 10.256 FALSE Client Services
## 608 <NA> Male 139754 12.74 <NA> Sales
## 609 <NA> Female 116236 17.274 <NA> Engineering
## 610 Todd Male 103405 15.91 FALSE Sales
## 611 Alan Male 41453 10.084 FALSE Product
## 612 Elizabeth <NA> 52730 12.525 FALSE Marketing
## 613 Julie <NA> 60361 7.099 TRUE Distribution
## 614 Teresa <NA> 63103 11.378 FALSE Finance
## 615 Eric Male 65168 11.513 FALSE Distribution
## 616 Edward Male 73105 6.083 TRUE Engineering
## 617 Kimberly Female 37916 12.929 TRUE Distribution
## 618 Kathy Female 45682 1.451 TRUE Marketing
## 619 Peter Male 69297 1.268 FALSE Finance
## 620 Beverly Female 59070 19.064 TRUE Engineering
## 621 Ralph Male 81215 8.4 FALSE Finance
## 622 Justin Male 128036 18.824 FALSE Client Services
## 623 Irene Female 89780 8.999 TRUE Sales
## 624 Irene Female 125018 9.601 TRUE Legal
## 625 Jeremy Male 133033 12.2 FALSE Sales
## 626 Cynthia Female 82408 8.701 TRUE Marketing
## 627 <NA> Female 131755 2.93 <NA> <NA>
## 628 Anne Female 128305 16.636 FALSE Marketing
## 629 <NA> <NA> 147309 1.702 <NA> Distribution
## 630 Debra Female 74911 19.513 FALSE Marketing
## 631 Michael Male 47079 2.617 FALSE Legal
## 632 Jason <NA> 82873 1.869 FALSE Client Services
## 633 Rebecca Female 134673 6.878 FALSE Engineering
## 634 Andrea Female 123591 6.5 TRUE Engineering
## 635 Carl Male 75598 19.289 FALSE <NA>
## 636 Randy Male 89831 13.047 TRUE <NA>
## 637 Marilyn Female 92430 2.924 FALSE Engineering
## 638 Wayne Male 126956 18.396 FALSE Human Resources
## 639 Frances Female 112467 1.433 FALSE Business Development
## 640 Amanda <NA> 46665 19.391 TRUE Client Services
## 641 Kathleen Female 42553 3.756 TRUE Distribution
## 642 Amanda Female 102081 5.203 FALSE Sales
## 643 Amanda <NA> 135118 15.108 FALSE Finance
## 644 Shawn Male 71975 9.77 FALSE Human Resources
## 645 Katherine Female 149908 18.912 FALSE Finance
## 646 Anna <NA> 45418 10.162 FALSE Marketing
## 647 Elizabeth Female 79145 19.78 FALSE Finance
## 648 Donald Male 122920 5.32 FALSE <NA>
## 649 Gregory Male 128031 15.857 TRUE Business Development
## 650 Ruth Female 59969 14.064 TRUE Finance
## 651 Cynthia Female 35381 11.749 FALSE Finance
## 652 Deborah Female 113129 17.371 FALSE Client Services
## 653 Willie Male 141932 1.017 TRUE Engineering
## 654 Debra Female 42296 16.922 FALSE Product
## 655 Carl Male 63395 11.411 FALSE Legal
## 656 Walter Male 125382 13.613 TRUE Client Services
## 657 Lisa Female 113592 17.108 TRUE Marketing
## 658 Rebecca Female 46750 11.367 TRUE Sales
## 659 Jessica Female 90285 13.591 TRUE Client Services
## 660 Steve <NA> 53692 4.785 TRUE Marketing
## 661 Mary <NA> 100341 6.662 FALSE Distribution
## 662 Craig Male 123876 4.225 FALSE Engineering
## 663 Katherine Female 41643 4.659 TRUE Distribution
## 664 Andrea Female 113760 12.866 TRUE Distribution
## 665 Timothy Male 49473 12.463 FALSE Legal
## 666 Anthony Male 146141 3.645 TRUE Distribution
## 667 Harold <NA> 79459 7.776 TRUE Business Development
## 668 Douglas <NA> 104496 14.771 TRUE Marketing
## 669 James Male 67789 17.105 TRUE Legal
## 670 Joseph <NA> 86564 11.879 TRUE <NA>
## 671 Martha Female 135758 14.782 TRUE Client Services
## 672 Laura Female 84672 3.96 FALSE Distribution
## 673 Robert Male 111580 10.982 FALSE Human Resources
## 674 Ralph Male 50455 16.248 FALSE Business Development
## 675 <NA> Male 88733 1.932 <NA> Human Resources
## 676 Diane Female 130577 12.791 FALSE Marketing
## 677 Annie Female 138925 9.801 TRUE Marketing
## 678 Jacqueline Female 62371 15.77 TRUE Client Services
## 679 Brenda Female 87715 2.675 FALSE Product
## 680 Lori Female 66029 3.345 TRUE Client Services
## 681 Albert Male 86818 14.301 TRUE Engineering
## 682 Frank Male 75147 17.398 FALSE Product
## 683 Deborah Female 105573 5.268 TRUE Product
## 684 Robin <NA> 41230 6.611 TRUE Distribution
## 685 Alice Female 117787 10.485 FALSE <NA>
## 686 <NA> Male 88086 19.387 <NA> Finance
## 687 Paul <NA> 91462 18.704 FALSE Finance
## 688 <NA> Male 139959 8.992 <NA> Distribution
## 689 Brian Male 93901 17.821 TRUE Legal
## 690 Julie <NA> 50529 17.263 FALSE Finance
## 691 Bruce Male 134988 13.926 TRUE Sales
## 692 Joan Female 120941 3.694 TRUE Finance
## 693 Harold Male 118753 8.162 FALSE Sales
## 694 Alice Female 92799 2.782 FALSE Sales
## 695 Barbara Female 85718 13.326 FALSE Client Services
## 696 Sarah Female 109517 5.146 FALSE Human Resources
## 697 Samuel Male 85550 11.593 TRUE Marketing
## 698 <NA> Male 136655 9.801 <NA> Distribution
## 699 Barbara <NA> 43312 8.218 TRUE Legal
## 700 Amy <NA> 102839 10.385 TRUE Distribution
## 701 Frank Male 78891 7.927 TRUE Distribution
## 702 Robin <NA> 93201 11.712 TRUE Legal
## 703 Debra Female 84693 6.976 TRUE Finance
## 704 Brandon <NA> 73587 9.769 TRUE Human Resources
## 705 Thomas Male 65251 11.211 FALSE Distribution
## 706 Harry Male 129148 15.193 TRUE Engineering
## 707 Todd Male 128175 18.473 TRUE <NA>
## 708 Patricia Female 75825 7.839 FALSE Engineering
## 709 Steve Male 51821 1.197 TRUE Legal
## 710 <NA> Male 60411 12.048 <NA> Human Resources
## 711 Edward Male 58327 17.095 TRUE Finance
## 712 Karen Female 46478 16.552 FALSE Engineering
## 713 Martin <NA> 123963 15.745 TRUE Engineering
## 714 Ann Female 79796 9.851 FALSE Distribution
## 715 Jonathan Male 83809 12.922 FALSE Distribution
## 716 Peter Male 77933 13.132 TRUE Engineering
## 717 Eric Male 51070 13.806 TRUE Business Development
## 718 Jason <NA> 97480 11.518 FALSE Human Resources
## 719 Terry Male 58357 7.969 FALSE Engineering
## 720 John Male 67165 13.001 FALSE Engineering
## 721 Marie Female 145988 18.685 TRUE Human Resources
## 722 Harold Male 147417 11.626 TRUE Business Development
## 723 Joshua Male 95003 5.197 TRUE Marketing
## 724 Brandon <NA> 144187 11.416 TRUE Legal
## 725 Andrea Female 37888 13.47 FALSE Engineering
## 726 Jeremy Male 131513 1.876 TRUE Finance
## 727 Daniel Male 77287 13 TRUE <NA>
## 728 Roy Male 46875 12.942 TRUE Sales
## 729 Dorothy Female 82744 19.111 TRUE Client Services
## 730 Steven Male 43252 18.892 FALSE Client Services
## 731 Nicole Female 66047 18.674 TRUE Marketing
## 732 Ann Female 118431 12.772 TRUE Finance
## 733 Henry Male 59943 1.432 FALSE Finance
## 734 <NA> Male 136681 3.655 <NA> Business Development
## 735 <NA> Female 138807 1.4 <NA> Human Resources
## 736 Barbara Female 82884 6.837 TRUE Business Development
## 737 Judy Female 48668 11.716 TRUE Product
## 738 Lawrence Male 122971 14.618 FALSE Product
## 739 Joseph Male 107050 12.737 FALSE Business Development
## 740 Carlos Male 138598 14.737 FALSE Sales
## 741 Russell <NA> 149456 3.533 FALSE Marketing
## 742 Jane Female 128540 15.941 FALSE Legal
## 743 Martin Male 61117 2.844 FALSE Client Services
## 744 Steven Male 100949 13.813 TRUE Human Resources
## 745 Judy <NA> 46829 5.19 TRUE Finance
## 746 James Male 69111 14.625 TRUE Business Development
## 747 Gloria Female 46602 1.027 TRUE Business Development
## 748 Ernest Male 61181 16.559 FALSE Business Development
## 749 Martha Female 94963 19.626 TRUE Business Development
## 750 Janet <NA> 85789 9.712 FALSE Legal
## 751 Louis <NA> 145274 16.379 FALSE Product
## 752 Tina Female 102841 3.369 FALSE Client Services
## 753 Henry Male 49665 18.338 FALSE Business Development
## 754 Antonio Male 41928 5.478 TRUE <NA>
## 755 Lisa Female 128042 2.03 TRUE Legal
## 756 Denise Female 86150 3.997 FALSE Client Services
## 757 Stephen Male 121816 10.615 TRUE Distribution
## 758 Benjamin Male 123409 7.783 FALSE Product
## 759 Carl <NA> 98295 7.617 TRUE Legal
## 760 Ruth Female 59678 10.895 FALSE Distribution
## 761 Shawn Male 57871 4.02 TRUE Human Resources
## 762 Jennifer Female 132084 10.006 TRUE Engineering
## 763 Terry Male 35633 3.947 TRUE Distribution
## 764 Jacqueline Female 125298 3.019 TRUE Legal
## 765 Roger Male 115582 15.343 TRUE Sales
## 766 Alice <NA> 148339 11.479 TRUE Finance
## 767 Justin Male 112975 9.699 FALSE Human Resources
## 768 Janet Female 36927 18.769 FALSE Client Services
## 769 Robin Female 70248 9.66 TRUE Sales
## 770 Samuel <NA> 141305 9.849 TRUE Marketing
## 771 Katherine Female 72002 13.178 TRUE Product
## 772 Peter Male 102577 12.026 TRUE Product
## 773 Lillian Female 113554 18.018 TRUE Business Development
## 774 <NA> Male 47176 10.736 <NA> Finance
## 775 <NA> Female 106428 10.867 <NA> <NA>
## 776 Rose Female 75181 6.06 TRUE Finance
## 777 Bobby Male 79047 18.784 FALSE Human Resources
## 778 Kimberly Female 52970 4.513 FALSE Sales
## 779 Antonio Male 137979 5.266 FALSE Distribution
## 780 Doris Female 114360 17.799 TRUE Business Development
## 781 Steven Male 110306 16.843 TRUE Human Resources
## 782 Lawrence Male 46378 9.127 FALSE <NA>
## 783 <NA> Male 74104 17.68 <NA> Client Services
## 784 <NA> <NA> 132505 13.592 <NA> Product
## 785 Stephanie Female 96649 3.453 FALSE Sales
## 786 <NA> Female 57811 8.941 <NA> Marketing
## 787 Sandra Female 111468 2.58 TRUE Product
## 788 Kevin Male 141498 4.135 TRUE Sales
## 789 Michelle <NA> 124441 16.353 FALSE Business Development
## 790 Lisa Female 40121 6.293 FALSE Product
## 791 Fred Male 129712 11.058 FALSE Finance
## 792 Joseph Male 126010 19.601 FALSE Legal
## 793 Anne <NA> 122762 9.564 FALSE Distribution
## 794 Andrea Female 149105 13.707 TRUE Distribution
## 795 Nicole Female 44021 10.286 FALSE <NA>
## 796 Theresa <NA> 42025 3.319 TRUE Human Resources
## 797 Catherine Female 59970 12.801 FALSE Client Services
## 798 Rose Female 145001 19.85 FALSE Engineering
## 799 Lawrence Male 102589 17.952 TRUE Marketing
## 800 Raymond Male 47529 2.712 TRUE Product
## 801 Clarence Male 148941 11.517 FALSE Product
## 802 Ryan Male 91109 17.499 TRUE Human Resources
## 803 Jane Female 51923 13.623 FALSE Business Development
## 804 Lois Female 53954 19.075 FALSE Product
## 805 Shawn Male 39335 10.664 FALSE Distribution
## 806 Thomas Male 111371 15.081 TRUE Engineering
## 807 Kathryn Female 86676 6.081 FALSE Sales
## 808 Mary Female 115057 2.089 FALSE Finance
## 809 Julie Female 145357 3.459 FALSE Engineering
## 810 George Male 36749 19.754 FALSE Finance
## 811 Ralph Male 89854 7.227 FALSE Business Development
## 812 Judith Female 134048 6.818 TRUE Marketing
## 813 <NA> Male 141311 5.478 <NA> Product
## 814 Evelyn Female 123621 19.767 TRUE Marketing
## 815 Rachel Female 54941 3.221 TRUE Distribution
## 816 Maria <NA> 106562 4 FALSE Human Resources
## 817 Kelly Female 39371 4.068 FALSE Engineering
## 818 Ruby Female 83112 4.083 FALSE Marketing
## 819 Ann Female 96941 10.048 TRUE Distribution
## 820 Kenneth Male 47232 17.862 TRUE Legal
## 821 William Male 54058 5.182 TRUE Human Resources
## 822 <NA> Female 80399 12.254 <NA> Distribution
## 823 Deborah Female 118043 7.266 TRUE Business Development
## 824 <NA> Male 109411 9.494 <NA> Engineering
## 825 Julia Female 97566 2.147 FALSE Marketing
## 826 Robert <NA> 69267 5.89 TRUE Sales
## 827 <NA> <NA> 87103 5.665 <NA> <NA>
## 828 Jesse <NA> 98811 7.487 FALSE Legal
## 829 Cynthia Female 149684 7.864 FALSE Product
## 830 Johnny Male 71383 2.097 TRUE Human Resources
## 831 Michael Male 81206 19.908 TRUE Distribution
## 832 Kenneth Male 69112 7.588 TRUE Finance
## 833 Keith Male 120672 19.467 FALSE Legal
## 834 Gerald Male 96511 9.331 FALSE Human Resources
## 835 Carl <NA> 49325 2.071 TRUE Business Development
## 836 Douglas Male 132175 2.28 FALSE Engineering
## 837 Carolyn Female 118037 13.492 FALSE Finance
## 838 Joseph Male 139570 15.804 TRUE Finance
## 839 Billy Male 115280 9.153 FALSE Distribution
## 840 Joyce Female 51065 16.807 FALSE Human Resources
## 841 Lillian Female 103854 4.924 TRUE Distribution
## 842 Ruby Female 48354 19.501 FALSE Business Development
## 843 Sandra Female 132327 19.264 FALSE Product
## 844 Louise Female 106362 8.965 FALSE Sales
## 845 Maria <NA> 148857 8.738 FALSE Legal
## 846 Brenda Female 131131 11.682 FALSE Client Services
## 847 Stephen Male 129663 15.574 FALSE Human Resources
## 848 Nicole <NA> 41449 4.707 FALSE Finance
## 849 Bobby Male 93368 2.88 TRUE Product
## 850 Ernest Male 53335 9.192 FALSE Product
## 851 Charles Male 148291 6.002 FALSE <NA>
## 852 Bobby Male 147842 16.158 TRUE <NA>
## 853 Diana Female 105066 17.343 TRUE Business Development
## 854 Mildred Female 139284 11.39 TRUE <NA>
## 855 Harry Male 63046 10.411 FALSE Finance
## 856 Phillip <NA> 89700 2.277 TRUE <NA>
## 857 Bonnie <NA> 108946 12.211 FALSE Finance
## 858 Marie Female 98406 7.943 TRUE Product
## 859 Sean Male 108581 9.634 FALSE Human Resources
## 860 Robin Female 41808 19.239 FALSE Business Development
## 861 Phillip Male 36837 14.66 FALSE Marketing
## 862 Jerry Male 140850 18.855 FALSE Finance
## 863 Ronald Male 50426 18.536 TRUE Sales
## 864 Phillip Male 134120 6.842 FALSE Human Resources
## 865 Ryan Male 57292 6.01 FALSE <NA>
## 866 Karen Female 80633 16.306 FALSE Legal
## 867 Bonnie Female 131943 14.249 FALSE Client Services
## 868 Earl Male 48046 7.737 FALSE Marketing
## 869 Katherine Female 97443 13.657 FALSE Product
## 870 Matthew Male 135352 7.986 TRUE Business Development
## 871 Cynthia <NA> 107816 18.751 FALSE Marketing
## 872 Gerald Male 96329 2.469 TRUE Engineering
## 873 Brenda Female 73749 19.332 FALSE Business Development
## 874 Jason Male 75607 4.299 TRUE Sales
## 875 Melissa Female 98858 3.525 TRUE Human Resources
## 876 Beverly Female 76485 9.212 TRUE Engineering
## 877 Terry <NA> 41238 8.219 FALSE Marketing
## 878 <NA> Male 114896 13.823 <NA> Client Services
## 879 Jacqueline Female 125418 8.064 FALSE Distribution
## 880 Amy Female 75415 19.132 FALSE Client Services
## 881 Robert <NA> 90998 8.382 FALSE Finance
## 882 Ruby Female 142868 6.318 FALSE Marketing
## 883 Sara <NA> 135990 14.344 TRUE Distribution
## 884 Kathryn Female 57300 18.015 FALSE Marketing
## 885 Lisa Female 73706 18.53 FALSE Finance
## 886 Juan Male 85871 9.16 FALSE Product
## 887 Anthony Male 96795 14.837 FALSE Sales
## 888 David Male 92242 15.407 FALSE Legal
## 889 Marilyn Female 115149 11.934 TRUE Legal
## 890 Margaret Female 126924 1.552 TRUE Finance
## 891 <NA> Male 145329 7.1 <NA> Finance
## 892 Timothy Male 92587 8.475 FALSE Finance
## 893 Brandon Male 60263 2.709 FALSE Sales
## 894 Frank Male 91406 5.681 TRUE Business Development
## 895 Betty Female 37005 7.645 TRUE Marketing
## 896 Janice <NA> 139791 16.968 FALSE Business Development
## 897 Victor Male 45267 3.942 TRUE Sales
## 898 Kenneth <NA> 95296 10.146 FALSE Finance
## 899 Jane Female 59680 15.211 TRUE Business Development
## 900 Walter Male 144701 16.323 TRUE Marketing
## 901 Christina Female 35477 18.178 FALSE Human Resources
## 902 Patricia Female 119266 6.911 FALSE Distribution
## 903 <NA> Male 103877 6.322 <NA> Distribution
## 904 Heather Female 47605 14.955 TRUE Human Resources
## 905 Stephanie Female 136604 6.16 TRUE Human Resources
## 906 Alice Female 121250 4.363 TRUE Finance
## 907 Frances Female 35884 17.667 FALSE Sales
## 908 Elizabeth Female 137144 10.081 FALSE Finance
## 909 Janice Female 102697 3.283 FALSE Engineering
## 910 Donald Male 61999 6.466 FALSE Product
## 911 Melissa Female 45223 8.879 TRUE Legal
## 912 Carl Male 54033 15.528 TRUE Business Development
## 913 Joe Male 126120 1.02 FALSE <NA>
## 914 Lois Female 99747 6.168 FALSE Marketing
## 915 Ann Female 71958 5.272 TRUE Distribution
## 916 Todd Male 115566 6.716 TRUE Client Services
## 917 Marilyn Female 118369 7.696 TRUE Business Development
## 918 Shawn Male 51667 6.339 FALSE Product
## 919 Ryan Male 85858 19.475 FALSE Client Services
## 920 Sean Male 131423 8.957 FALSE Distribution
## 921 Rose Female 49538 9.828 FALSE Client Services
## 922 George Male 50369 18.9 TRUE Sales
## 923 Arthur Male 86615 3.412 TRUE Business Development
## 924 Irene <NA> 135369 4.38 FALSE Business Development
## 925 Deborah Female 60003 9.624 FALSE Client Services
## 926 <NA> Female 95866 19.388 <NA> Sales
## 927 Judith Female 109324 19.488 FALSE Distribution
## 928 Philip Male 103557 16.014 TRUE Business Development
## 929 Jeffrey Male 111376 2.673 TRUE Business Development
## 930 Theresa Female 75661 1.079 TRUE Legal
## 931 Nancy Female 85213 2.386 TRUE Marketing
## 932 Harold Male 140444 3.771 FALSE Distribution
## 933 Bonnie Female 90427 2.01 TRUE Client Services
## 934 Doris Female 141439 3.799 FALSE Business Development
## 935 Samuel Male 43694 3.787 TRUE Engineering
## 936 Alice Female 131952 12.09 FALSE Engineering
## 937 Maria Female 96250 10.056 FALSE Business Development
## 938 Aaron <NA> 63126 18.424 FALSE Client Services
## 939 Mark <NA> 44836 2.657 FALSE Client Services
## 940 Ralph <NA> 70635 2.147 FALSE Client Services
## 941 Andrew Male 137386 8.611 TRUE Distribution
## 942 William Male 104840 15.653 TRUE Engineering
## 943 Lori Female 75498 6.537 TRUE Marketing
## 944 Wayne Male 67471 2.728 FALSE Engineering
## 945 Kenneth Male 101914 1.905 TRUE Distribution
## 946 Gerald <NA> 93712 17.426 TRUE Distribution
## 947 <NA> Female 133472 16.941 <NA> Distribution
## 948 <NA> Male 107351 5.329 <NA> Marketing
## 949 Ashley Female 142410 11.048 TRUE Engineering
## 950 Scott Male 37385 8.226 TRUE Business Development
## 951 Paula Female 58423 10.833 FALSE Business Development
## 952 <NA> Female 143638 9.662 <NA> <NA>
## 953 Teresa Female 113425 11.907 TRUE Human Resources
## 954 Randy Male 57266 14.077 FALSE Product
## 955 Joe Male 119667 1.148 TRUE Finance
## 956 Sarah Female 127118 11.176 FALSE Legal
## 957 Beverly Female 80838 8.115 FALSE Engineering
## 958 Jeffrey Male 70990 15.901 TRUE Sales
## 959 Gloria Female 39833 9.631 FALSE Engineering
## 960 Albert Male 45094 5.85 TRUE Business Development
## 961 Stephen Male 93997 18.093 TRUE Business Development
## 962 Antonio <NA> 103050 3.05 FALSE Legal
## 963 Jonathan Male 121797 16.923 FALSE Product
## 964 Ann Female 89443 17.94 TRUE Sales
## 965 Bruce Male 35802 12.391 TRUE Sales
## 966 Catherine Female 68164 18.393 FALSE Client Services
## 967 Louis Male 93022 9.146 TRUE Human Resources
## 968 Thomas Male 105681 19.572 FALSE Engineering
## 969 Louise Female 43050 11.671 FALSE Distribution
## 970 Linda Female 44486 17.308 TRUE Engineering
## 971 Alice Female 63571 15.397 TRUE Product
## 972 Patrick Male 75423 5.368 TRUE Business Development
## 973 Victor <NA> 76381 11.159 TRUE Sales
## 974 Russell Male 137359 11.105 FALSE Business Development
## 975 Harry Male 67656 16.455 TRUE Client Services
## 976 Susan Female 92436 12.467 FALSE Sales
## 977 Denise Female 137954 4.195 TRUE Legal
## 978 Sarah Female 124566 5.949 FALSE Product
## 979 Sean Male 66146 11.178 FALSE Human Resources
## 980 Ernest Male 142935 13.198 TRUE Product
## 981 Kimberly Female 46233 8.862 TRUE Engineering
## 982 James Male 148985 19.28 FALSE Legal
## 983 Rose Female 91411 8.639 TRUE Human Resources
## 984 John Male 146907 11.738 FALSE Engineering
## 985 Maria Female 43455 13.04 FALSE Engineering
## 986 Stephen <NA> 85668 1.909 FALSE Legal
## 987 Donna Female 82871 17.999 FALSE Marketing
## 988 Gloria Female 136709 10.331 TRUE Finance
## 989 Alice Female 47638 11.209 FALSE Human Resources
## 990 Justin <NA> 38344 3.794 FALSE Legal
## 991 Robin Female 100765 10.982 TRUE Client Services
## 992 Rose Female 134505 11.051 TRUE Marketing
## 993 Anthony Male 112769 11.625 TRUE Finance
## 994 Tina Female 56450 19.04 TRUE Engineering
## 995 George Male 98874 4.479 TRUE Marketing
## 996 Henry <NA> 132483 16.655 FALSE Distribution
## 997 Phillip Male 42392 19.675 FALSE Finance
## 998 Russell Male 96914 1.421 FALSE Product
## 999 Larry Male 60500 11.985 FALSE Business Development
## 1000 Albert Male 129949 10.169 TRUE Sales
# Use `replace_with_na_all()` to replace with NA
replace_with_na_all(employees, condition = ~.x %in% c("", " ", "na", 'NaN', '?'))
## # A tibble: 1,000 x 6
## First.Name Gender Salary Bonus.. Senior.Management Team
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Douglas Male 97308 6.945 TRUE Marketing
## 2 Thomas Male 61933 <NA> TRUE <NA>
## 3 Maria Female 130590 11.858 FALSE Finance
## 4 Jerry Male <NA> 9.34 TRUE Finance
## 5 Larry Male 101004 1.389 TRUE Client Services
## 6 Dennis n.a. 115163 10.125 FALSE Legal
## 7 Ruby Female 65476 10.012 TRUE Product
## 8 <NA> Female 45906 11.598 <NA> Finance
## 9 Angela <NA> <NA> 18.523 TRUE Engineering
## 10 Frances Female 139852 7.524 TRUE Business Development
## # … with 990 more rows
Filling down missing values
Explicitly Missing: they are missing in the data and indicated with NA or something else.
Implicitly Missing: Not shown in the data, but implied (e.g. a missing level)
name <- c( 'jesse', 'jesse', 'jesse', 'jesse', 'andy', 'andy', 'andy', 'nic', 'nic',
'dan', 'dan', 'alex', 'alex', 'alex', 'alex')
time <- c( 'morning', 'afternoon', 'evening', 'late_night', 'morning', 'afternoon',
'late_night', 'afternoon', 'late_night', 'morning', 'evening', 'morning',
'afternoon', 'evening', 'late_night' )
value <- c(6678, 800060, 475528, 143533, 425115, 587468, 111000, 588532, 915533, 388148,
180912, 552670, 98355, 266055, 121056)
frogger <- data.frame( 'name' = name, 'time' = time, 'value' = value )
glimpse( frogger )
## Rows: 15
## Columns: 3
## $ name <chr> "jesse", "jesse", "jesse", "jesse", "andy", "andy", "andy", "ni…
## $ time <chr> "morning", "afternoon", "evening", "late_night", "morning", "af…
## $ value <dbl> 6678, 800060, 475528, 143533, 425115, 587468, 111000, 588532, 9…
# Use `complete()` on the `time` and `name` variables to
# make implicit missing values explicit
frogger_tidy <- frogger %>% complete(time, name)
frogger_tidy
## # A tibble: 20 x 3
## time name value
## <chr> <chr> <dbl>
## 1 afternoon alex 98355
## 2 afternoon andy 587468
## 3 afternoon dan NA
## 4 afternoon jesse 800060
## 5 afternoon nic 588532
## 6 evening alex 266055
## 7 evening andy NA
## 8 evening dan 180912
## 9 evening jesse 475528
## 10 evening nic NA
## 11 late_night alex 121056
## 12 late_night andy 111000
## 13 late_night dan NA
## 14 late_night jesse 143533
## 15 late_night nic 915533
## 16 morning alex 552670
## 17 morning andy 425115
## 18 morning dan 388148
## 19 morning jesse 6678
## 20 morning nic NA
# Use `fill()` to fill down the name variable in the frogger dataset
frogger %>% tidyr::fill(name)
## name time value
## 1 jesse morning 6678
## 2 jesse afternoon 800060
## 3 jesse evening 475528
## 4 jesse late_night 143533
## 5 andy morning 425115
## 6 andy afternoon 587468
## 7 andy late_night 111000
## 8 nic afternoon 588532
## 9 nic late_night 915533
## 10 dan morning 388148
## 11 dan evening 180912
## 12 alex morning 552670
## 13 alex afternoon 98355
## 14 alex evening 266055
## 15 alex late_night 121056
frogger %>%
fill(name) %>%
complete(name,time)
## # A tibble: 20 x 3
## name time value
## <chr> <chr> <dbl>
## 1 alex afternoon 98355
## 2 alex evening 266055
## 3 alex late_night 121056
## 4 alex morning 552670
## 5 andy afternoon 587468
## 6 andy evening NA
## 7 andy late_night 111000
## 8 andy morning 425115
## 9 dan afternoon NA
## 10 dan evening 180912
## 11 dan late_night NA
## 12 dan morning 388148
## 13 jesse afternoon 800060
## 14 jesse evening 475528
## 15 jesse late_night 143533
## 16 jesse morning 6678
## 17 nic afternoon 588532
## 18 nic evening NA
## 19 nic late_night 915533
## 20 nic morning NA
Missing data dependence
- MCAR: Missing Completely at Random.
- missingness has no association with any data you have observed or not observed
- Imputation is advisable
- deleting observations may reduce the sample size, limiting inference, but will not bias.
- MAR: Missing at Random
- missingness depends on data observed, but not data unobserved
- Should be imputing data
- deletion is not advisable and may lead to bias
- MNAR: Missing Not at Random
- missingness of the response is related to an unobserved value relevant to the assessment of interest.
- data will be biased by deletion and imputation
- inference can be limited, proceed with caution
load( file='oceanbuoys.rda' )
glimpse( oceanbuoys )
## Rows: 736
## Columns: 8
## $ year <dbl> 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997…
## $ latitude <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ longitude <dbl> -110, -110, -110, -110, -110, -110, -110, -110, -110, -110…
## $ sea_temp_c <dbl> 27.59, 27.55, 27.57, 27.62, 27.65, 27.83, 28.01, 28.04, 28…
## $ air_temp_c <dbl> 27.15, 27.02, 27.00, 26.93, 26.84, 26.94, 27.04, 27.11, 27…
## $ humidity <dbl> 79.6, 75.8, 76.5, 76.2, 76.4, 76.7, 76.5, 78.3, 78.6, 76.9…
## $ wind_ew <dbl> -6.4, -5.3, -5.1, -4.9, -3.5, -4.4, -2.0, -3.7, -4.2, -3.6…
## $ wind_ns <dbl> 5.4, 5.3, 4.5, 2.5, 4.1, 1.6, 3.5, 4.5, 5.0, 3.5, 2.9, 1.8…
# Arrange by year
oceanbuoys %>% arrange(year) %>% vis_miss()
# Arrange by latitude
oceanbuoys %>% arrange(latitude) %>% vis_miss()
# Arrange by wind_ew (wind east west)
oceanbuoys %>% arrange(wind_ew) %>% vis_miss()
gg_miss_var( oceanbuoys, facet = year)
Testing missing relationships
Tools to explore missing data dependence
as_shadow() to explore missingness
- Coordinated names: shadow matrix inherits feature labels the ’_NA’
- Clear Values: binary missing or !missing
bind_shadow() or nabular() to bind the shadow mat with the data == nabular data (a mix of NA and tabular data). This format is useful to do things like calculate summary statistics based on the missingness of a feature
airquality %>%
bind_shadow() %>%
group_by( Ozone_NA ) %>%
summarise( mean = mean( Wind ) )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## Ozone_NA mean
## <fct> <dbl>
## 1 !NA 9.86
## 2 NA 10.3
Create Nabular Data
# Create shadow matrix data with `as_shadow()`
obs <- as_shadow( oceanbuoys )
head( obs )
## # A tibble: 6 x 8
## year_NA latitude_NA longitude_NA sea_temp_c_NA air_temp_c_NA humidity_NA
## <fct> <fct> <fct> <fct> <fct> <fct>
## 1 !NA !NA !NA !NA !NA !NA
## 2 !NA !NA !NA !NA !NA !NA
## 3 !NA !NA !NA !NA !NA !NA
## 4 !NA !NA !NA !NA !NA !NA
## 5 !NA !NA !NA !NA !NA !NA
## 6 !NA !NA !NA !NA !NA !NA
## # … with 2 more variables: wind_ew_NA <fct>, wind_ns_NA <fct>
# Create nabular data by binding the shadow to the data with `bind_shadow()`
bob <- bind_shadow( oceanbuoys )
dim( bob )
## [1] 736 16
# Bind only the variables with missing values by using bind_shadow(only_miss = TRUE)
bob_om <- bind_shadow( oceanbuoys, only_miss = TRUE)
dim( bob_om )
## [1] 736 11
Use nabular data to calculate some summary statistics about other features:
# `bind_shadow()` and `group_by()` humidity missingness (`humidity_NA`)
oceanbuoys %>%
bind_shadow() %>%
group_by( humidity_NA ) %>%
summarize(wind_ew_mean = mean(wind_ew), # calculate mean of wind_ew
wind_ew_sd = sd(wind_ew)) # calculate standard deviation of wind_ew
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 3
## humidity_NA wind_ew_mean wind_ew_sd
## <fct> <dbl> <dbl>
## 1 !NA -3.78 1.90
## 2 NA -3.30 2.31
# Repeat this, but calculating summaries for wind north south (`wind_ns`).
oceanbuoys %>%
bind_shadow() %>%
group_by(humidity_NA) %>%
summarize(wind_ns_mean = mean(wind_ns),
wind_ns_sd = sd(wind_ns))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 3
## humidity_NA wind_ns_mean wind_ns_sd
## <fct> <dbl> <dbl>
## 1 !NA 2.78 2.06
## 2 NA 1.66 2.23
Add information about to summarize the missingness of a dataset
# How many NAs are in a feature?
# Summarize wind_ew by the missingness of `air_temp_c_NA`
oceanbuoys %>%
bind_shadow() %>%
group_by(air_temp_c_NA) %>%
summarize(wind_ew_mean = mean(wind_ew),
wind_ew_sd = sd(wind_ew),
n_obs = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
## air_temp_c_NA wind_ew_mean wind_ew_sd n_obs
## <fct> <dbl> <dbl> <int>
## 1 !NA -3.91 1.85 655
## 2 NA -2.17 2.14 81
# Summarize wind_ew by missingness of `air_temp_c_NA` and `humidity_NA`
oceanbuoys %>%
bind_shadow() %>%
group_by(air_temp_c_NA, humidity_NA) %>%
summarize(wind_ew_mean = mean(wind_ew),
wind_ew_sd = sd(wind_ew),
n_obs = n())
## `summarise()` regrouping output by 'air_temp_c_NA' (override with `.groups` argument)
## # A tibble: 4 x 5
## # Groups: air_temp_c_NA [2]
## air_temp_c_NA humidity_NA wind_ew_mean wind_ew_sd n_obs
## <fct> <fct> <dbl> <dbl> <int>
## 1 !NA !NA -4.01 1.74 565
## 2 !NA NA -3.24 2.31 90
## 3 NA !NA -2.06 2.08 78
## 4 NA NA -4.97 1.74 3
Visualizing missingness across one variable
Exploring conditional missings w/ggplot
- How to use nabular data to explore how values change according to other values going missing
ggplot2visualizations:- density plots
- box plots
- etc.
ggplot( airquality,
aes( x = Temp ) ) +
geom_density()
Create nabular data:
airquality %>%
bind_shadow() %>%
ggplot( aes( x = Temp,
color = Ozone_NA ) ) +
geom_density()
The values of Temperature do not change much when data for Ozone are present or
NA
Here is a feceted versions:
airquality %>%
bind_shadow() %>%
ggplot( aes( x = Temp ) ) +
geom_density() +
facet_wrap( ~Ozone_NA )
Another look with facetted scatter plots. This gives an idea of how sparce NA data is compared to when the feature is present.
airquality %>%
bind_shadow() %>%
ggplot( aes( x = Temp,
y = Wind ) ) +
geom_point() +
facet_wrap( ~Ozone_NA )
Can make the same point, perhaps more obvious, with a box plot.
airquality %>%
bind_shadow() %>%
ggplot( aes( x = Ozone_NA,
y = Temp ) ) +
geom_boxplot()
This shows how close the medians of the two distributions are.
Visualizing missingness with color:
airquality %>%
bind_shadow() %>%
ggplot( aes( x = Temp,
y = Wind,
color = Ozone_NA ) ) +
geom_point()
Visualize the missingness of two features
airquality %>%
bind_shadow() %>%
ggplot( aes( x = Temp,
color = Ozone_NA ) ) +
geom_density() +
facet_wrap( ~ Solar.R_NA )
There doesn’t appear to much much difference in the distributions of Temperature when Solar.R info is in a given record. However, when Solar.R is missing, the temperatures are low.
Now to take a look at oceanbuoys
# First explore the missingness structure of `oceanbuoys` using `vis_miss()`
vmob <- vis_miss(oceanbuoys) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
# Explore the distribution of `wind_ew` for the missingness
# of `air_temp_c_NA` using `geom_density()`
bsob <- bind_shadow(oceanbuoys) %>%
ggplot(aes(x = wind_ew,
color = air_temp_c_NA)) +
geom_density()
# Explore the distribution of sea temperature for the
# missingness of humidity (humidity_NA) using `geom_density()`
bsob2 <- bind_shadow(oceanbuoys) %>%
ggplot(aes(x = sea_temp_c,
color = humidity_NA)) +
geom_density()
grid.arrange( vmob, bsob, bsob2, ncol = 1 )
## Warning: Removed 3 rows containing non-finite values (stat_density).
# Explore the distribution of wind east west (wind_ew) for the missingness of air temperature
# using geom_density() and faceting by the missingness of air temperature (air_temp_c_NA).
ob1 <- oceanbuoys %>%
bind_shadow() %>%
ggplot(aes(x = wind_ew)) +
geom_density() +
facet_wrap(~air_temp_c_NA)
# Build upon this visualization by coloring by the missingness of humidity (humidity_NA).
ob2 <- oceanbuoys %>%
bind_shadow() %>%
ggplot(aes(x = wind_ew,
color = humidity_NA)) +
geom_density() +
facet_wrap(~air_temp_c_NA)
grid.arrange( ob1, ob2, ncol = 1 )
# Explore the distribution of wind east west (`wind_ew`) for
# the missingness of air temperature using `geom_boxplot()`
ob1 <- oceanbuoys %>%
bind_shadow() %>%
ggplot(aes(x = air_temp_c_NA,
y = wind_ew)) +
geom_boxplot()
# Build upon this visualization by faceting by the missingness of humidity (`humidity_NA`).
ob2 <- oceanbuoys %>%
bind_shadow() %>%
ggplot(aes(x = air_temp_c_NA,
y = wind_ew)) +
geom_boxplot() +
facet_wrap(~humidity_NA)
grid.arrange( ob1, ob2, ncol = 1 )
Visualizing missingness across two variables
The problem with visualizing missing data in 2D (e.g. scatterplot) is that rows with missing values are removed. ggplot2 is very kind to give a warning when rows are dropped:
ggplot( airquality,
aes( x = Ozone,
y = Solar.R ) ) +
geom_point()
## Warning: Removed 42 rows containing missing values (geom_point).
geom_miss_point() visualizes missing data by placing them in the margins of a figure
ggplot( airquality,
aes( x = Ozone,
y = Solar.R ) ) +
geom_miss_point()
ggplot( airquality,
aes( x = Wind,
y = Ozone ) ) +
geom_miss_point() +
facet_wrap( ~ Month )
Visualize missingness with another missing variable
airquality %>%
bind_shadow() %>%
ggplot( aes( x = Wind,
y = Ozone ) ) +
geom_miss_point() +
facet_wrap( ~ Solar.R_NA )
# Explore the missingness in wind and air temperature, and
# display the missingness using `geom_miss_point()`
ob1 <- ggplot(oceanbuoys,
aes(x = wind_ew,
y = air_temp_c)) +
geom_miss_point()
# Explore the missingness in humidity and air temperature,
# and display the missingness using `geom_miss_point()`
ob2 <- ggplot(oceanbuoys,
aes(x = humidity,
y = air_temp_c)) +
geom_miss_point()
grid.arrange( ob1, ob2, ncol = 2 )
# Explore the missingness in wind and air temperature, and display the
# missingness using `geom_miss_point()`. Facet by year to explore this further.
ob1 <- ggplot(oceanbuoys,
aes(x = wind_ew,
y = air_temp_c)) +
geom_miss_point() +
facet_wrap(~year)
# Explore the missingness in humidity and air temperature, and display the
# missingness using `geom_miss_point()` Facet by year to explore this further.
ob2 <- ggplot(oceanbuoys,
aes(x = humidity,
y = air_temp_c)) +
geom_miss_point() +
facet_wrap(~year)
grid.arrange( ob1, ob2, ncol = 1 )
# Use geom_miss_point() and facet_wrap to explore how the missingness
# in wind_ew and air_temp_c is different for missingness of humidity
bind_shadow(oceanbuoys) %>%
ggplot(aes(x = wind_ew,
y = air_temp_c)) +
geom_miss_point() +
facet_wrap(~humidity_NA)
# Use geom_miss_point() and facet_grid to explore how the missingness in wind_ew and air_temp_c
# is different for missingness of humidity AND by year - by using `facet_grid(humidity_NA ~ year)`
bind_shadow(oceanbuoys) %>%
ggplot(aes(x = wind_ew,
y = air_temp_c)) +
geom_miss_point() +
facet_grid(humidity_NA~year)
Connecting the dots (Imputation)
Filling in the blanks
Performing and tracking imputation
Using imputations to understand data structure. Visualizing and exploring imputed values
- Imputing data to explore missingness
- tracking missing values
- visualize imputed values against data
impute_below() imputes below to minimum vaue in the variable
impute_below( c( 5,6,7,NA,9,10 ) )
## [1] 5.00000 6.00000 7.00000 4.40271 9.00000 10.00000
impute below to satisfy a conditional
summary( pedestrian )
## hourly_counts date_time year month
## Min. : 0.0 Min. :2016-01-01 00:00:00 Min. :2016 October: 5540
## 1st Qu.: 72.0 1st Qu.:2016-04-08 04:00:00 1st Qu.:2016 January: 2976
## Median : 277.0 Median :2016-07-15 08:00:00 Median :2016 March : 2976
## Mean : 701.8 Mean :2016-07-09 04:46:33 Mean :2016 May : 2976
## 3rd Qu.: 878.0 3rd Qu.:2016-10-11 21:00:00 3rd Qu.:2016 July : 2976
## Max. :11273.0 Max. :2016-12-31 23:00:00 Max. :2016 August : 2976
## NA's :2548 (Other):17280
## month_day week_day hour sensor_id
## Min. : 1.00 Sunday :5396 Min. : 0.0 Min. : 2.00
## 1st Qu.: 8.00 Monday :5376 1st Qu.: 6.0 1st Qu.: 2.00
## Median :16.00 Tuesday :5328 Median :12.0 Median : 7.00
## Mean :15.75 Wednesday:5328 Mean :11.5 Mean :11.15
## 3rd Qu.:23.00 Thursday :5352 3rd Qu.:18.0 3rd Qu.:13.00
## Max. :31.00 Friday :5424 Max. :23.0 Max. :23.00
## Saturday :5496
## sensor_name
## Length:37700
## Class :character
## Mode :character
##
##
##
##
pedestrian_imp <- impute_below_if( pedestrian, is.numeric )
summary( pedestrian_imp )
## hourly_counts date_time year month
## Min. :-1409.0 Min. :2016-01-01 00:00:00 Min. :2016 October: 5540
## 1st Qu.: 44.0 1st Qu.:2016-04-08 04:00:00 1st Qu.:2016 January: 2976
## Median : 243.0 Median :2016-07-15 08:00:00 Median :2016 March : 2976
## Mean : 578.1 Mean :2016-07-09 04:46:33 Mean :2016 May : 2976
## 3rd Qu.: 804.0 3rd Qu.:2016-10-11 21:00:00 3rd Qu.:2016 July : 2976
## Max. :11273.0 Max. :2016-12-31 23:00:00 Max. :2016 August : 2976
## (Other):17280
## month_day week_day hour sensor_id
## Min. : 1.00 Sunday :5396 Min. : 0.0 Min. : 2.00
## 1st Qu.: 8.00 Monday :5376 1st Qu.: 6.0 1st Qu.: 2.00
## Median :16.00 Tuesday :5328 Median :12.0 Median : 7.00
## Mean :15.75 Wednesday:5328 Mean :11.5 Mean :11.15
## 3rd Qu.:23.00 Thursday :5352 3rd Qu.:18.0 3rd Qu.:13.00
## Max. :31.00 Friday :5424 Max. :23.0 Max. :23.00
## Saturday :5496
## sensor_name
## Length:37700
## Class :character
## Mode :character
##
##
##
##
Specify features to be imputed
glimpse( oceanbuoys )
## Rows: 736
## Columns: 8
## $ year <dbl> 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997…
## $ latitude <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ longitude <dbl> -110, -110, -110, -110, -110, -110, -110, -110, -110, -110…
## $ sea_temp_c <dbl> 27.59, 27.55, 27.57, 27.62, 27.65, 27.83, 28.01, 28.04, 28…
## $ air_temp_c <dbl> 27.15, 27.02, 27.00, 26.93, 26.84, 26.94, 27.04, 27.11, 27…
## $ humidity <dbl> 79.6, 75.8, 76.5, 76.2, 76.4, 76.7, 76.5, 78.3, 78.6, 76.9…
## $ wind_ew <dbl> -6.4, -5.3, -5.1, -4.9, -3.5, -4.4, -2.0, -3.7, -4.2, -3.6…
## $ wind_ns <dbl> 5.4, 5.3, 4.5, 2.5, 4.1, 1.6, 3.5, 4.5, 5.0, 3.5, 2.9, 1.8…
miss_var_summary( oceanbuoys )
## # A tibble: 8 x 3
## variable n_miss pct_miss
## <chr> <int> <dbl>
## 1 humidity 93 12.6
## 2 air_temp_c 81 11.0
## 3 sea_temp_c 3 0.408
## 4 year 0 0
## 5 latitude 0 0
## 6 longitude 0 0
## 7 wind_ew 0 0
## 8 wind_ns 0 0
oceanbuoys_imp <- impute_below_at( oceanbuoys, vars( humidity, air_temp_c, sea_temp_c ) )
miss_var_summary( oceanbuoys_imp )
## # A tibble: 8 x 3
## variable n_miss pct_miss
## <chr> <int> <dbl>
## 1 year 0 0
## 2 latitude 0 0
## 3 longitude 0 0
## 4 sea_temp_c 0 0
## 5 air_temp_c 0 0
## 6 humidity 0 0
## 7 wind_ew 0 0
## 8 wind_ns 0 0
glimpse( dat_hw )
## Rows: 100
## Columns: 2
## $ weight <dbl> NA, 91.20470, 81.57915, 76.84886, 111.01731, 90.15135, 63.1424…
## $ height <dbl> 2.3881462, 1.0014508, NA, NA, -0.2412422, 2.5207375, 1.4016896…
dat_hw_imp <- impute_below_all( dat_hw )
ob1 <- ggplot(dat_hw_imp,
aes(x = weight,
y = height)) +
geom_point()
ob1
Tracking missing values can be handles by using bind_shadows():
dat_hw_imp <- bind_shadow( dat_hw ) %>% impute_below_all()
head( dat_hw_imp )
## # A tibble: 6 x 4
## weight height weight_NA height_NA
## <dbl> <dbl> <fct> <fct>
## 1 40.3 2.39 NA !NA
## 2 91.2 1.00 !NA !NA
## 3 81.6 -1.51 !NA NA
## 4 76.8 -1.65 !NA NA
## 5 111. -0.241 !NA !NA
## 6 90.2 2.52 !NA !NA
aq_imp <- airquality %>%
bind_shadow() %>%
impute_below_all() %>%
ggplot( aes( x = Ozone,
fill = Ozone_NA ) ) +
geom_histogram()
aq_imp
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Split the plot by missingness:
aq_imp <- airquality %>%
bind_shadow() %>%
impute_below_all() %>%
ggplot( aes( x = Ozone,
fill = Ozone_NA ) ) +
geom_histogram() +
facet_wrap( ~ Solar.R_NA )
aq_imp
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Visualize imputed value against data values using scatter plots:
aq_imp <- airquality %>%
bind_shadow() %>%
add_label_shadow() %>%
impute_below_all() %>%
ggplot( aes( x = Ozone,
y = Solar.R,
color = any_missing ) ) +
geom_point()
aq_imp
This successfully recreates the figure rendered by a call to
geom_miss_point()
# Impute the oceanbuoys data below the range using `impute_below`.
ocean_imp <- impute_below_all(oceanbuoys)
# Visualize the new missing values
ggplot(ocean_imp,
aes(x = wind_ew, y = air_temp_c)) +
geom_point()
# Impute and track data with `bind_shadow`, `impute_below_all`, and `add_label_shadow`
ocean_imp_track <- bind_shadow(oceanbuoys) %>% impute_below_all() %>% add_label_shadow()
# Look at the imputed values
ggplot(ocean_imp_track, aes(x = wind_ew, y = air_temp_c, color = any_missing ) ) +
geom_point()
ocean_imp_track
## # A tibble: 736 x 17
## year latitude longitude sea_temp_c air_temp_c humidity wind_ew wind_ns
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1997 0 -110 27.6 27.1 79.6 -6.40 5.40
## 2 1997 0 -110 27.5 27.0 75.8 -5.30 5.30
## 3 1997 0 -110 27.6 27 76.5 -5.10 4.5
## 4 1997 0 -110 27.6 26.9 76.2 -4.90 2.5
## 5 1997 0 -110 27.6 26.8 76.4 -3.5 4.10
## 6 1997 0 -110 27.8 26.9 76.7 -4.40 1.60
## 7 1997 0 -110 28.0 27.0 76.5 -2 3.5
## 8 1997 0 -110 28.0 27.1 78.3 -3.70 4.5
## 9 1997 0 -110 28.0 27.2 78.6 -4.20 5
## 10 1997 0 -110 28.0 27.2 76.9 -3.60 3.5
## # … with 726 more rows, and 9 more variables: year_NA <fct>, latitude_NA <fct>,
## # longitude_NA <fct>, sea_temp_c_NA <fct>, air_temp_c_NA <fct>,
## # humidity_NA <fct>, wind_ew_NA <fct>, wind_ns_NA <fct>, any_missing <chr>
# Impute and track the missing values
ocean_imp_track <- bind_shadow(oceanbuoys) %>%
impute_below_all() %>%
add_label_shadow()
# Visualize the missingness in wind and air temperature,
# coloring missing air temp values with air_temp_c_NA
ggplot(ocean_imp_track,
aes(x = wind_ew, y = air_temp_c, color = air_temp_c_NA)) +
geom_point()
# Visualize humidity and air temp, coloring any missing cases using the variable any_missing
ggplot(ocean_imp_track,
aes(x = humidity, y = air_temp_c, color = any_missing)) +
geom_point()
# Explore the values of air_temp_c, visualizing the amount of missings with `air_temp_c_NA`.
p <- ggplot(ocean_imp_track, aes(x = air_temp_c, fill = air_temp_c_NA)) + geom_histogram()
# Expore the missings in humidity using humidity_NA
p2 <- ggplot(ocean_imp_track, aes(x = humidity, fill = humidity_NA)) + geom_histogram()
# Explore the missings in air_temp_c according to year, using `facet_wrap(~year)`.
p + facet_wrap(~year)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Explore the missings in humidity according to year, using `facet_wrap(~year)`.
p2 + facet_wrap(~year)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
What makes good imputation?
To understand good imputations, let’s spend some time taking a look at bad imputations:
Imputation by the mean value of the data is particularly bad.
Imputing by the mean is bad because it artifucially increases the mean while decreasing the variance of the dataset as the following excersizes demonstrate:
aq_imp <- airquality %>%
bind_shadow(only_miss = TRUE) %>% #bind only features with missing values
add_label_shadow() %>%
impute_mean_all() %>%
ggplot( aes( x = Ozone_NA,
y = Ozone ) ) +
geom_boxplot()
## Warning: Problem with `mutate()` input `any_missing`.
## ℹ argument is not numeric or logical: returning NA
## ℹ Input `any_missing` is `(function (x) ...`.
## Warning in mean.default(x, na.rm = TRUE): argument is not numeric or logical:
## returning NA
aq_imp
The median is lower for the ‘not missing’ group than the NA group.
aq_imp <- airquality %>%
bind_shadow(only_miss = TRUE) %>% #bind only features with missing values
add_label_shadow() %>%
impute_mean_all() %>%
ggplot( aes( x = Ozone,
y = Solar.R,
color = any_missing ) ) +
geom_point()
## Warning: Problem with `mutate()` input `any_missing`.
## ℹ argument is not numeric or logical: returning NA
## ℹ Input `any_missing` is `(function (x) ...`.
## Warning in mean.default(x, na.rm = TRUE): argument is not numeric or logical:
## returning NA
aq_imp
Exploring imputations for many variables:
aq_imp <- airquality %>%
bind_shadow() %>% #bind only features with missing values
impute_mean_all()
aq_imp_long <- shadow_long( aq_imp,
Ozone,
Solar.R )
head( aq_imp_long )
## # A tibble: 6 x 4
## variable value variable_NA value_NA
## <chr> <dbl> <chr> <chr>
## 1 Ozone 41 Ozone_NA !NA
## 2 Ozone 36 Ozone_NA !NA
## 3 Ozone 12 Ozone_NA !NA
## 4 Ozone 18 Ozone_NA !NA
## 5 Ozone 42.1 Ozone_NA NA
## 6 Ozone 28 Ozone_NA !NA
…and now to visualize:
ggplot( aq_imp_long,
aes( x = value,
fill = value_NA ) ) +
geom_histogram() +
facet_wrap( ~ variable )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Impute the mean value and track the imputations
ocean_imp_mean <- bind_shadow(oceanbuoys) %>%
impute_mean_all() %>%
add_label_shadow()
# Explore the mean values in humidity in the imputed dataset
ggplot(ocean_imp_mean,
aes(x = humidity_NA, y = humidity)) +
geom_boxplot()
# Explore the values in air temperature in the imputed dataset
ggplot(ocean_imp_mean,
aes(x = air_temp_c_NA, y = air_temp_c)) +
geom_boxplot()
# Explore imputations in air temperature and humidity,
# coloring by the variable, any_missing
ggplot(ocean_imp_mean,
aes(x = air_temp_c, y = humidity, color = any_missing)) +
geom_point()
# Explore imputations in air temperature and humidity,
# coloring by the variable, any_missing, and faceting by year
ggplot(ocean_imp_mean,
aes(x = air_temp_c, y = humidity, color = any_missing)) +
geom_point() +
facet_wrap(~year)
# Gather the imputed data
ocean_imp_mean_gather <- shadow_long(ocean_imp_mean,
humidity,
air_temp_c)
# Inspect the data
head( ocean_imp_mean_gather )
## # A tibble: 6 x 4
## variable value variable_NA value_NA
## <chr> <chr> <chr> <chr>
## 1 air_temp_c 27.14999962 air_temp_c_NA !NA
## 2 air_temp_c 27.02000046 air_temp_c_NA !NA
## 3 air_temp_c 27 air_temp_c_NA !NA
## 4 air_temp_c 26.93000031 air_temp_c_NA !NA
## 5 air_temp_c 26.84000015 air_temp_c_NA !NA
## 6 air_temp_c 26.94000053 air_temp_c_NA !NA
# Explore the imputations in a histogram
ggplot( ocean_imp_mean_gather,
aes( x = as.numeric(value),
fill = value_NA ) ) +
geom_histogram() +
facet_wrap( ~variable )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Performing imputations
Imputing using a linear regression model.
Try simputation
Using impute_lm from simputation is a powerfl way to impute values for a dataset. However, the model used for imputation needs the same regorous evaluation that statistical model approaches demand.
oceanbuoys_lmimp <- bind_shadow( oceanbuoys, only_miss = TRUE ) %>%
add_label_shadow() %>%
impute_lm( humidity ~ air_temp_c + wind_ew )
head( oceanbuoys_lmimp )
## # A tibble: 6 x 12
## year latitude longitude sea_temp_c air_temp_c humidity wind_ew wind_ns
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1997 0 -110 27.6 27.1 79.6 -6.40 5.40
## 2 1997 0 -110 27.5 27.0 75.8 -5.30 5.30
## 3 1997 0 -110 27.6 27 76.5 -5.10 4.5
## 4 1997 0 -110 27.6 26.9 76.2 -4.90 2.5
## 5 1997 0 -110 27.6 26.8 76.4 -3.5 4.10
## 6 1997 0 -110 27.8 26.9 76.7 -4.40 1.60
## # … with 4 more variables: sea_temp_c_NA <fct>, air_temp_c_NA <fct>,
## # humidity_NA <fct>, any_missing <chr>
airquality_type <- airquality %>% mutate( Solar.R = as.double( Solar.R ),
Ozone = as.double( Ozone ) )
aq_imp_lm <- airquality_type %>%
bind_shadow() %>%
add_label_shadow() %>%
impute_lm( Solar.R ~ Wind + Temp + Month ) %>%
impute_lm( Ozone ~ Wind + Temp + Month )
head( aq_imp_lm )
## # A tibble: 6 x 13
## Ozone Solar.R Wind Temp Month Day Ozone_NA Solar.R_NA Wind_NA Temp_NA
## <dbl> <dbl> <dbl> <int> <int> <int> <fct> <fct> <fct> <fct>
## 1 41 190 7.4 67 5 1 !NA !NA !NA !NA
## 2 36 118 8 72 5 2 !NA !NA !NA !NA
## 3 12 149 12.6 74 5 3 !NA !NA !NA !NA
## 4 18 313 11.5 62 5 4 !NA !NA !NA !NA
## 5 -9.04 138. 14.3 56 5 5 NA NA !NA !NA
## 6 28 178. 14.9 66 5 6 !NA NA !NA !NA
## # … with 3 more variables: Month_NA <fct>, Day_NA <fct>, any_missing <chr>
The bind_shadow() & add_labels_missings() methods are important for tracking which values were imputed. The imputed values for the last example are visualized below:
ggplot( aq_imp_lm,
aes( x = Solar.R,
y = Ozone,
color = any_missing ) ) +
geom_point()
Build other linear model imputations and compare the results:
aq_imp_lm_small <- airquality_type %>%
bind_shadow() %>%
add_label_shadow() %>%
impute_lm( Solar.R ~ Wind + Temp ) %>%
impute_lm( Ozone ~ Wind + Temp )
aq_imp_lm_large <- airquality_type %>%
bind_shadow() %>%
add_label_shadow() %>%
impute_lm( Solar.R ~ Wind + Temp + Month + Day ) %>%
impute_lm( Ozone ~ Wind + Temp + Month + Day )
Bind the models, so that we can build visualizations:
bound_models <- bind_rows( small = aq_imp_lm_small,
medium = aq_imp_lm,
large = aq_imp_lm_large,
.id = 'imp_model' )
head( bound_models )
## # A tibble: 6 x 14
## imp_model Ozone Solar.R Wind Temp Month Day Ozone_NA Solar.R_NA Wind_NA
## <chr> <dbl> <dbl> <dbl> <int> <int> <int> <fct> <fct> <fct>
## 1 small 41 190 7.4 67 5 1 !NA !NA !NA
## 2 small 36 118 8 72 5 2 !NA !NA !NA
## 3 small 12 149 12.6 74 5 3 !NA !NA !NA
## 4 small 18 313 11.5 62 5 4 !NA !NA !NA
## 5 small -11.7 127. 14.3 56 5 5 NA NA !NA
## 6 small 28 160. 14.9 66 5 6 !NA NA !NA
## # … with 4 more variables: Temp_NA <fct>, Month_NA <fct>, Day_NA <fct>,
## # any_missing <chr>
bound_models_gather <- bound_models %>%
select( Ozone, Solar.R, any_missing, imp_model ) %>%
gather( key = 'variable', value = 'value', -any_missing, -imp_model )
head( bound_models_gather )
## # A tibble: 6 x 4
## any_missing imp_model variable value
## <chr> <chr> <chr> <dbl>
## 1 Not Missing small Ozone 41
## 2 Not Missing small Ozone 36
## 3 Not Missing small Ozone 12
## 4 Not Missing small Ozone 18
## 5 Missing small Ozone -11.7
## 6 Missing small Ozone 28
ggplot( bound_models_gather,
aes( x = imp_model,
y = value,
color = imp_model ) ) +
geom_boxplot() +
facet_wrap( ~variable )
# Impute humidity and air temperature using wind_ew and wind_ns, and track missing values
ocean_imp_lm_wind <- oceanbuoys %>%
bind_shadow() %>%
impute_lm(air_temp_c ~ wind_ew + wind_ns) %>%
impute_lm(humidity ~ wind_ew + wind_ns) %>%
add_label_shadow()
# Plot the imputed values for air_temp_c and humidity, colored by missingness
ggplot(ocean_imp_lm_wind,
aes(x = air_temp_c, y = humidity, color = any_missing)) +
geom_point()
# Bind the models together
bound_models <- bind_rows(mean = ocean_imp_mean,
lm_wind = ocean_imp_lm_wind,
.id = "imp_model")
# Inspect the values of air_temp and humidity as a scatter plot
ggplot(bound_models,
aes(x = air_temp_c,
y = humidity,
color = any_missing)) +
geom_point() +
facet_wrap(~imp_model)
# Build a model adding year to the outcome
ocean_imp_lm_wind_year <- bind_shadow(oceanbuoys) %>%
impute_lm(air_temp_c ~ wind_ew + wind_ns + year) %>%
impute_lm(humidity ~ wind_ew + wind_ns + year) %>%
add_label_shadow()
# Bind the mean, lm_wind, and lm_wind_year models together
bound_models <- bind_rows(mean = ocean_imp_mean,
lm_wind = ocean_imp_lm_wind,
lm_wind_year = ocean_imp_lm_wind_year,
.id = "imp_model")
bound_models$imp_model_f <- factor( bound_models$imp_model, levels = c('mean','lm_wind','lm_wind_year'))
# Explore air_temp and humidity, coloring by any missings, and faceting by imputation model
ggplot(bound_models, aes(x = air_temp_c, y = humidity, color = any_missing)) +
geom_point() + facet_wrap(~imp_model_f)
Evaluating imputations and models
Assessing inference from imputed data in a modelling context.
Compare the imputated data with a Complete case analysis (only uses rows with no missing values)
#Complete Case Analysis.
aq_cc <- airquality %>%
na.omit() %>%
bind_shadow() %>%
add_label_shadow()
#dim( aq_cc )
#Impute the data with a linear model
aq_imp_lm <- bind_shadow( airquality_type ) %>%
add_label_shadow() %>%
impute_lm( Ozone ~ Temp + Wind + Month + Day ) %>%
impute_lm( Solar.R ~ Temp + Wind + Month + Day )
#dim( aq_imp_lm )
#Bind the different datasets together
bound_models <- bind_rows( cc = aq_cc,
imp_lm = aq_imp_lm,
.id = 'imp_model' )
head( bound_models )
## # A tibble: 6 x 14
## imp_model Ozone Solar.R Wind Temp Month Day Ozone_NA Solar.R_NA Wind_NA
## <chr> <dbl> <dbl> <dbl> <int> <int> <int> <fct> <fct> <fct>
## 1 cc 41 190 7.4 67 5 1 !NA !NA !NA
## 2 cc 36 118 8 72 5 2 !NA !NA !NA
## 3 cc 12 149 12.6 74 5 3 !NA !NA !NA
## 4 cc 18 313 11.5 62 5 4 !NA !NA !NA
## 5 cc 23 299 8.6 65 5 7 !NA !NA !NA
## 6 cc 19 99 13.8 59 5 8 !NA !NA !NA
## # … with 4 more variables: Temp_NA <fct>, Month_NA <fct>, Day_NA <fct>,
## # any_missing <chr>
Now that the data is formatted, fit a linear model to each of the datasets
model_summary <- bound_models %>%
group_by( imp_model ) %>%
nest() %>% #colapses the data such that each row represents a dataset
mutate( mod = map( data,
~lm( Temp ~ Ozone + Solar.R + Wind + Temp + Day + Month,
data = . ) ), #fit a linear model to each row
res = map( mod, residuals ), #get the residuals
pred = map( mod, predict ), #get a model prediction
tidy = map( mod, broom::tidy ) ) #get the coefficients too
model_summary
## # A tibble: 2 x 6
## # Groups: imp_model [2]
## imp_model data mod res pred tidy
## <chr> <list> <list> <list> <list> <list>
## 1 cc <tibble [111 × 13]> <lm> <dbl [111]> <dbl [111]> <tibble [6 × 5]>
## 2 imp_lm <tibble [153 × 13]> <lm> <dbl [153]> <dbl [153]> <tibble [6 × 5]>
Explore the results from both approaches to fit a linear model to the data (with & w/out imputation)
model_summary %>%
select( imp_model,
tidy ) %>%
unnest(cols = c( tidy ) )
## # A tibble: 12 x 6
## # Groups: imp_model [2]
## imp_model term estimate std.error statistic p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cc (Intercept) 57.3 4.50 12.7 5.52e-23
## 2 cc Ozone 0.165 0.0239 6.92 3.66e-10
## 3 cc Solar.R 0.0108 0.00699 1.55 1.24e- 1
## 4 cc Wind -0.174 0.212 -0.821 4.13e- 1
## 5 cc Day -0.0892 0.0677 -1.32 1.91e- 1
## 6 cc Month 2.04 0.409 4.99 2.42e- 6
## 7 imp_lm (Intercept) 54.7 3.59 15.2 5.21e-32
## 8 imp_lm Ozone 0.196 0.0205 9.53 4.52e-17
## 9 imp_lm Solar.R 0.0102 0.00577 1.76 7.97e- 2
## 10 imp_lm Wind -0.00642 0.172 -0.0374 9.70e- 1
## 11 imp_lm Day -0.112 0.0538 -2.08 3.92e- 2
## 12 imp_lm Month 2.11 0.340 6.21 5.09e- 9
model_summary %>%
select( imp_model,
res ) %>%
unnest(cols = c( res ) ) %>%
ggplot( aes( x = res,
fill = imp_model ) ) +
geom_histogram( position = 'dodge' )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Explore the predictions in the data
model_summary %>%
select( imp_model,
pred ) %>%
unnest(cols = c( pred ) ) %>%
ggplot( aes( x = pred,
fill = imp_model ) ) +
geom_histogram( position = 'dodge' )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
and for the oceanbouys dataset
#Complete Case Analysis.
ocean_cc <- oceanbuoys %>%
na.omit() %>%
bind_shadow() %>%
add_label_shadow()
# Create an imputed dataset using a linear models
ocean_imp_lm_all <- bind_shadow(oceanbuoys) %>%
add_label_shadow() %>%
impute_lm(sea_temp_c ~ wind_ew + wind_ns + year + latitude + longitude) %>%
impute_lm(air_temp_c ~ wind_ew + wind_ns + year + latitude + longitude) %>%
impute_lm(humidity ~ wind_ew + wind_ns + year + latitude + longitude)
# Bind the datasets
bound_models <- bind_rows(cc = ocean_cc,
imp_lm_wind = ocean_imp_lm_wind,
imp_lm_all = ocean_imp_lm_all,
.id = "imp_model")
# Look at the models
glimpse( bound_models )
## Rows: 2,037
## Columns: 18
## $ imp_model <chr> "cc", "cc", "cc", "cc", "cc", "cc", "cc", "cc", "cc", "…
## $ year <dbl> 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1…
## $ latitude <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ longitude <dbl> -110, -110, -110, -110, -110, -110, -110, -110, -110, -…
## $ sea_temp_c <dbl> 27.59, 27.55, 27.57, 27.62, 27.65, 27.83, 28.01, 28.04,…
## $ air_temp_c <dbl> 27.15, 27.02, 27.00, 26.93, 26.84, 26.94, 27.04, 27.11,…
## $ humidity <dbl> 79.6, 75.8, 76.5, 76.2, 76.4, 76.7, 76.5, 78.3, 78.6, 7…
## $ wind_ew <dbl> -6.4, -5.3, -5.1, -4.9, -3.5, -4.4, -2.0, -3.7, -4.2, -…
## $ wind_ns <dbl> 5.4, 5.3, 4.5, 2.5, 4.1, 1.6, 3.5, 4.5, 5.0, 3.5, 2.9, …
## $ year_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ latitude_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ longitude_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ sea_temp_c_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ air_temp_c_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ humidity_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ wind_ew_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ wind_ns_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, …
## $ any_missing <chr> "Not Missing", "Not Missing", "Not Missing", "Not Missi…
# Create the model summary for each dataset
model_summary <- bound_models %>%
group_by(imp_model) %>%
nest() %>%
mutate(mod = map(data, ~lm(sea_temp_c ~ air_temp_c + humidity + year, data = .)),
res = map(mod, residuals),
pred = map(mod, predict),
tidy = map(mod, broom::tidy ))
# Explore the coefficients in the model
model_summary %>%
select(imp_model,tidy) %>%
unnest(cols = c( tidy ))
## # A tibble: 12 x 6
## # Groups: imp_model [3]
## imp_model term estimate std.error statistic p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cc (Intercept) -735. 45.9 -16.0 8.19e- 48
## 2 cc air_temp_c 0.864 0.0231 37.4 2.64e-154
## 3 cc humidity 0.0341 0.00390 8.74 2.69e- 17
## 4 cc year 0.369 0.0232 15.9 3.46e- 47
## 5 imp_lm_wind (Intercept) -1742. 56.1 -31.0 1.83e-135
## 6 imp_lm_wind air_temp_c 0.365 0.0279 13.1 2.73e- 35
## 7 imp_lm_wind humidity 0.0225 0.00690 3.26 1.17e- 3
## 8 imp_lm_wind year 0.880 0.0283 31.1 6.79e-136
## 9 imp_lm_all (Intercept) -697. 51.8 -13.5 5.04e- 37
## 10 imp_lm_all air_temp_c 0.890 0.0255 35.0 2.90e-158
## 11 imp_lm_all humidity 0.0127 0.00463 2.75 6.03e- 3
## 12 imp_lm_all year 0.351 0.0262 13.4 1.12e- 36
The imp_lm_all model gives the highest estimate for air_temp_c
Final Lesson
Some Other Datasets to play with
ozoneNA_url <- 'https://raw.githubusercontent.com/njtierney/user2018-missing-data-tutorial/master/ozoneNA.csv'
ecological_url <- 'https://raw.githubusercontent.com/njtierney/user2018-missing-data-tutorial/master/ecological.csv'