#
# Statistics Project 2
#
# You will perform a statistical analysis to establish whether race has a significant impact on the rate of callbacks for resumes.
# Parameter of interest: difference in callback rates in the population at large between black and white applicants
# Point estimate: difference in callback rates in the sample of black and white applicants
# Answer the following questions in this notebook below and submit to your Github account.
# 1.What test is appropriate for this problem? Does CLT apply
# 2.What are the null and alternate hypotheses?
# 3.Compute margin of error, confidence interval, and p-value.
# 4.Discuss statistical significance.
#
# Resources:
# http://www.stat.columbia.edu/~martin/W2024/R2.pdf
# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html
#
#
library(stats)
library(foreign)
# load the data
setwd("~/sliderule/statistics_project2/statistics project 2")
data <- read.dta('data/us_job_market_discrimination.dta')
# Use inference function provide by DataCamp
load(url("http://assets.datacamp.com/course/dasi/inference.Rdata"))
# clean data
data$call <- as.logical(data$call)
# subset for comparison
black <- subset(data, data$race=="b")
white <- subset(data, data$race=="w")
# 1.What test is appropriate for this problem? Does CLT apply
# check condition: samples are independent (assume yes)
# check condition: 10% population (total population of black/white applicants in country is >10x of sample)
# check condition: success and failure cases >10 each (yes)
nrow(black)
## [1] 2435
nrow(white)
## [1] 2435
# same number of rows: 2435
# 2.What are the null and alternate hypotheses?
# H0 = there is no difference between callback rates on resumes from blacks vs. whites
# HA = there is a difference between callback rates
#
# Conclustion: There is a significant difference in callback rates, reject null
#
# TODO: Contrast t.test() with inference()
t.test(black$call, white$call, alternative = "two.sided", mu = 0, var.equal = TRUE)
##
## Two Sample t-test
##
## data: black$call and white$call
## t = -4.1147, df = 4868, p-value = 3.941e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04729491 -0.01677080
## sample estimates:
## mean of x mean of y
## 0.06447639 0.09650924
t.test(black$call, white$call, alternative = "two.sided", mu = 0, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: black$call and white$call
## t = -4.1147, df = 4711.6, p-value = 3.943e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04729503 -0.01677067
## sample estimates:
## mean of x mean of y
## 0.06447639 0.09650924
# 3.Compute margin of error, confidence interval, and p-value.
#
# Conclusion: At 95% CI, callback rate to black applicants is lower by 23.08 to 35.18%
# TODO: Can we do multiple tests and use p-adjust to deal with FWER and FDR?
inference(black$call, white$call, est = "proportion", type = "ci", conflevel = 0.95, boot_method = "perc", method = "theoretical", success = TRUE)
## Response variable: categorical, Explanatory variable: categorical
## Difference between two proportions -- success: TRUE
## Summary statistics:
## x
## y FALSE TRUE Sum
## FALSE 2120 158 2278
## TRUE 80 77 157
## Sum 2200 235 2435

## Observed difference between proportions (FALSE-TRUE) = -0.2913
## Check conditions:
## FALSE : number of successes = 80 ; number of failures = 2120
## TRUE : number of successes = 77 ; number of failures = 158
## Standard error = 0.0309
## 95 % Confidence interval = ( -0.3518 , -0.2308 )
# 4.Discuss statistical significance.
#
# Alternative hypothesis supported that there is a difference in callback rates at 95% CI
#
# Conclusion:
#
# TODO: Can we do multiple tests and use p-adjust to deal with FWER and FDR?
# testing for normality -- weird results?
qqnorm(black$call)
qqline(black$call)

qqnorm(white$call)
qqline(white$call)

# using z-score test stat assuming normality
inference(black$call, white$call, est = "proportion", type = "ht", conflevel = 0.95, boot_method = "perc", method = "theoretical", alternative = "twosided", success = TRUE, null = 0)
## Response variable: categorical, Explanatory variable: categorical
## Difference between two proportions -- success: TRUE
## Summary statistics:
## x
## y FALSE TRUE Sum
## FALSE 2120 158 2278
## TRUE 80 77 157
## Sum 2200 235 2435
## Observed difference between proportions (FALSE-TRUE) = -0.2913
## H0: p_FALSE - p_TRUE = 0
## HA: p_FALSE - p_TRUE != 0
## Pooled proportion = 0.0645
## Check conditions:
## FALSE : number of expected successes = 142 ; number of expected failures = 2058
## TRUE : number of expected successes = 15 ; number of expected failures = 220
## Standard error = 0.017
## Test statistic: Z = -17.282
## p-value = 0
