Simpsons Paradox: Two HR Examples

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(scales)
alpha = 0.5

set.seed(123)
n = 1000

Neuroticism = rnorm(n)
Performance = rnorm(n) + Neuroticism * 0.1

Performance = rescale(Performance, to = c(0, 100))
# summary(Performance)
Neuroticism = rescale(Neuroticism, to = c(0, 7))
# summary(Neuroticism)

data <- data.frame(
  Performance,
  Neuroticism
)

options = c("Technical","Service")
technical = 
  (data$Performance > mean(data$Performance) & 
     data$Neuroticism > mean(data$Neuroticism)) |
  (data$Performance < mean(data$Performance) & 
     data$Neuroticism < mean(data$Neuroticism))

data$Job[technical] <- sample(options, sum(technical), T, c(0.6, 0.2))
data$Job[is.na(data$Job)] <- sample(options, sum(is.na(data$Job)), T, c(0.2, 0.8))

p <- data %>% ggplot(aes(Neuroticism, Performance)) 
p + geom_point(alpha = alpha) + geom_smooth(method = 'lm')

p +
geom_point(aes(col = Job), alpha = alpha) + 
  geom_smooth(aes(col = Job), method = 'lm') +
  theme(legend.background = element_rect(fill = "transparent"),
        legend.justification = c(0, 1),
        legend.position = c(0, 1))

set.seed(123)
n = 1000

Education = rbinom(n, 2, 0.5)
Neuroticism = rnorm(n) + Education
Salary = Education * 2 + rnorm(n) - Neuroticism * 0.3

Salary = sample(10000:11000,1) + rescale(Salary, to = c(0, 100000))
# summary(Salary)
Neuroticism = rescale(Neuroticism, to = c(0, 7))
# summary(Neuroticism)
Education = factor(Education, labels = c("Low", "Medium", "High"))

data <- data.frame(
  Salary,
  Neuroticism,
  Education
)

p <- data %>% ggplot(aes(Neuroticism, Salary)) 
p + geom_point(alpha = alpha) + geom_smooth(method = 'lm')

p + 
  geom_point(aes(col = Education), alpha = alpha) + 
  geom_smooth(aes(col = Education), method = 'lm') +
  theme(legend.background = element_rect(fill = "transparent"),
        legend.justification = c(0, 1),
        legend.position = c(0, 1))

Simpsons Paradox: Two HR Examples

Paul van der Laken

27 september 2017