PART 1

1) Below these instruction, create a new code chunk, use the annotation “Disparate Impact”, and title the code chunk the same way.

2) Using the disparate impact Excel file in the Modules page on Canvas, provide evidence and a write up an explanation of whether adverse impact exists across age groups. Make sure you are using the appropriate test.

PART 2

5) Knit your file as an HTML and upload it to Canvas.

getwd()
## [1] "/Users/timmydaly/Desktop/MGT HR"
library(readxl)

di.data <- read_xlsx("DisparateImpact.xlsx")

print(di.data)
## # A tibble: 200 × 5
##    Gender Race       Age Cognitive_Test AgeGroup
##    <chr>  <chr>    <dbl> <chr>          <chr>   
##  1 Male   White       51 Fail           Over40  
##  2 Female Black       49 Pass           Over40  
##  3 Male   White       54 Pass           Over40  
##  4 Male   Black       59 Fail           Over40  
##  5 Male   Asian       35 Pass           Under40 
##  6 Female White       32 Pass           Under40 
##  7 Male   Black       49 Fail           Over40  
##  8 Male   Hispanic    38 Fail           Under40 
##  9 Male   White       36 Pass           Under40 
## 10 Female White       38 Pass           Under40 
## # ℹ 190 more rows
observed.table <- table(di.data$AgeGroup, di.data$Cognitive_Test)

print(observed.table)
##          
##           Fail Pass
##   Over40    60   48
##   Under40   16   76
observed.table[1,2]
## [1] 48
pass_Over40 <- observed.table[1,2]

fail_Over40 <- observed.table[1,1]

N_Over40 <- pass_Over40 + fail_Over40

pass_Under40 <- observed.table[2,2]

fail_Under40 <- observed.table[2,1]

N_Under40 <- pass_Under40 + fail_Under40

SR_Over40 <- pass_Over40 / N_Over40

print(SR_Over40)
## [1] 0.4444444
SR_Under40 <- pass_Under40 / N_Under40

print(SR_Under40)
## [1] 0.826087
IR <- SR_Under40 / SR_Over40

print(IR)
## [1] 1.858696
chisq.test(observed.table, correct=FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  observed.table
## X-squared = 30.713, df = 1, p-value = 2.992e-08
fisher.test(observed.table)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  observed.table
## p-value = 2.107e-08
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   2.946085 12.266635
## sample estimates:
## odds ratio 
##   5.880999
Criterion <- read_xlsx("Criterion.xlsx")

print(Criterion)
## # A tibble: 200 × 5
##    Gender Race       Age    IQ Job_Performance
##    <chr>  <chr>    <dbl> <dbl>           <dbl>
##  1 Male   White       51  91.0               3
##  2 Female Black       49  80.9               3
##  3 Male   White       54  98.5               4
##  4 Male   Black       59 100.                4
##  5 Male   Asian       35  82.2               2
##  6 Female White       32  96.4               4
##  7 Male   Black       49 105.                5
##  8 Male   Hispanic    38 112.                6
##  9 Male   White       36 108.                4
## 10 Female White       38  94.0               4
## # ℹ 190 more rows
library(lessR)
## 
## lessR 4.4.5                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
ScatterPlot(x=IQ, y=Job_Performance, data=Criterion)

## 
## >>> Suggestions  or  enter: style(suggest=FALSE)
## Plot(IQ, Job_Performance, enhance=TRUE)  # many options
## Plot(IQ, Job_Performance, color="red")  # exterior edge color of points
## Plot(IQ, Job_Performance, fit="lm", fit_se=c(.90,.99))  # fit line, stnd errors
## Plot(IQ, Job_Performance, out_cut=.10)  # label top 10% from center as outliers 
## 
## 
## >>> Pearson's product-moment correlation 
##  
## Number of paired values with neither missing, n = 200 
## Sample Correlation of IQ and Job_Performance: r = 0.762 
##   
## Hypothesis Test of 0 Correlation:  t = 16.576,  df = 198,  p-value = 0.000 
## 95% Confidence Interval for Correlation:  0.697 to 0.815 
## 
ScatterPlot(x=IQ, y=Job_Performance, data=Criterion,

            xlab="IQ Test Score",

            ylab="Job Performance Score",

            ellipse=TRUE)
## [Ellipse with Murdoch and Chow's function ellipse from their ellipse package]

## 
## >>> Suggestions  or  enter: style(suggest=FALSE)
## Plot(IQ, Job_Performance, enhance=TRUE)  # many options
## Plot(IQ, Job_Performance, color="red")  # exterior edge color of points
## Plot(IQ, Job_Performance, fit="lm", fit_se=c(.90,.99))  # fit line, stnd errors
## Plot(IQ, Job_Performance, out_cut=.10)  # label top 10% from center as outliers 
## 
## 
## >>> Pearson's product-moment correlation 
##  
## Number of paired values with neither missing, n = 200 
## Sample Correlation of IQ and Job_Performance: r = 0.762 
##   
## Hypothesis Test of 0 Correlation:  t = 16.576,  df = 198,  p-value = 0.000 
## 95% Confidence Interval for Correlation:  0.697 to 0.815 
## 
cp_cor <- Correlation(x=IQ, y=Job_Performance, data=Criterion)

print(cp_cor)
## Correlation Analysis for Variables IQ and Job_Performance 
##   
## 
## >>> Pearson's product-moment correlation 
##  
## Number of paired values with neither missing, n = 200 
## Number of cases (rows of data) deleted: 0 
## 
## Sample Covariance: s = 9.025 
##  
## Sample Correlation: r = 0.762 
## 
## Hypothesis Test of 0 Correlation:  t = 16.576,  df = 198,  p-value = 0.000 
## 95% Confidence Interval for Correlation:  0.697 to 0.815
cp_cor$r^2
##      cor 
## 0.580644