5) Knit your file as an HTML and upload it to Canvas.
getwd()
## [1] "/Users/timmydaly/Desktop/MGT HR"
library(readxl)
di.data <- read_xlsx("DisparateImpact.xlsx")
print(di.data)
## # A tibble: 200 × 5
## Gender Race Age Cognitive_Test AgeGroup
## <chr> <chr> <dbl> <chr> <chr>
## 1 Male White 51 Fail Over40
## 2 Female Black 49 Pass Over40
## 3 Male White 54 Pass Over40
## 4 Male Black 59 Fail Over40
## 5 Male Asian 35 Pass Under40
## 6 Female White 32 Pass Under40
## 7 Male Black 49 Fail Over40
## 8 Male Hispanic 38 Fail Under40
## 9 Male White 36 Pass Under40
## 10 Female White 38 Pass Under40
## # ℹ 190 more rows
observed.table <- table(di.data$AgeGroup, di.data$Cognitive_Test)
print(observed.table)
##
## Fail Pass
## Over40 60 48
## Under40 16 76
observed.table[1,2]
## [1] 48
pass_Over40 <- observed.table[1,2]
fail_Over40 <- observed.table[1,1]
N_Over40 <- pass_Over40 + fail_Over40
pass_Under40 <- observed.table[2,2]
fail_Under40 <- observed.table[2,1]
N_Under40 <- pass_Under40 + fail_Under40
SR_Over40 <- pass_Over40 / N_Over40
print(SR_Over40)
## [1] 0.4444444
SR_Under40 <- pass_Under40 / N_Under40
print(SR_Under40)
## [1] 0.826087
IR <- SR_Under40 / SR_Over40
print(IR)
## [1] 1.858696
chisq.test(observed.table, correct=FALSE)
##
## Pearson's Chi-squared test
##
## data: observed.table
## X-squared = 30.713, df = 1, p-value = 2.992e-08
fisher.test(observed.table)
##
## Fisher's Exact Test for Count Data
##
## data: observed.table
## p-value = 2.107e-08
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 2.946085 12.266635
## sample estimates:
## odds ratio
## 5.880999
Criterion <- read_xlsx("Criterion.xlsx")
print(Criterion)
## # A tibble: 200 × 5
## Gender Race Age IQ Job_Performance
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Male White 51 91.0 3
## 2 Female Black 49 80.9 3
## 3 Male White 54 98.5 4
## 4 Male Black 59 100. 4
## 5 Male Asian 35 82.2 2
## 6 Female White 32 96.4 4
## 7 Male Black 49 105. 5
## 8 Male Hispanic 38 112. 6
## 9 Male White 36 108. 4
## 10 Female White 38 94.0 4
## # ℹ 190 more rows
library(lessR)
##
## lessR 4.4.5 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
ScatterPlot(x=IQ, y=Job_Performance, data=Criterion)

##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(IQ, Job_Performance, enhance=TRUE) # many options
## Plot(IQ, Job_Performance, color="red") # exterior edge color of points
## Plot(IQ, Job_Performance, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(IQ, Job_Performance, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 200
## Sample Correlation of IQ and Job_Performance: r = 0.762
##
## Hypothesis Test of 0 Correlation: t = 16.576, df = 198, p-value = 0.000
## 95% Confidence Interval for Correlation: 0.697 to 0.815
##
ScatterPlot(x=IQ, y=Job_Performance, data=Criterion,
xlab="IQ Test Score",
ylab="Job Performance Score",
ellipse=TRUE)
## [Ellipse with Murdoch and Chow's function ellipse from their ellipse package]

##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(IQ, Job_Performance, enhance=TRUE) # many options
## Plot(IQ, Job_Performance, color="red") # exterior edge color of points
## Plot(IQ, Job_Performance, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(IQ, Job_Performance, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 200
## Sample Correlation of IQ and Job_Performance: r = 0.762
##
## Hypothesis Test of 0 Correlation: t = 16.576, df = 198, p-value = 0.000
## 95% Confidence Interval for Correlation: 0.697 to 0.815
##
cp_cor <- Correlation(x=IQ, y=Job_Performance, data=Criterion)
print(cp_cor)
## Correlation Analysis for Variables IQ and Job_Performance
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 200
## Number of cases (rows of data) deleted: 0
##
## Sample Covariance: s = 9.025
##
## Sample Correlation: r = 0.762
##
## Hypothesis Test of 0 Correlation: t = 16.576, df = 198, p-value = 0.000
## 95% Confidence Interval for Correlation: 0.697 to 0.815
cp_cor$r^2
## cor
## 0.580644