klinares — Jul 19, 2017, 11:35 AM
#####################################################
## Kevin L - Check in rate for SAS White IN study ##
################## April 21, 2017 ###################
############# response rates ################
######################################
# response rate by form.
# building CI:
# Form A = 190/435, Form B 189/437
formA_n = 435
formB_n = 437
z = 1.96 # building 95% CI
phat_formA = 190/formA_n
phat_formB = 189/formB_n
point_estimate = phat_formA - phat_formB
se = sqrt(
((phat_formA*(1-phat_formA))/formA_n) +
((phat_formB*(1-phat_formB))/formB_n) )
me = z*se # margin of error +/-
Conf_int_95 <- cbind(point_estimate - me, point_estimate + me)
#double check work
prop_test <- prop.test(x=c(190,189), n=c(435,437),
alternative="two.sided", correct=F, conf.level=.95)
#continuity correction chi sqr test turned off, set to 'T' to get se
prop_test$conf.int # in R
[1] -0.06151551 0.07009017
attr(,"conf.level")
[1] 0.95
Conf_int_95 # hand calculations
[,1] [,2]
[1,] -0.06151672 0.07009138
# power analysis
power.prop.test(p1 = 0.4367816, p2 = 0.4324943, sig.level = 0.05,
power = .70, alternative = c("two.sided"))
Two-sample comparison of proportions power calculation
n = 165023.4
p1 = 0.4367816
p2 = 0.4324943
sig.level = 0.05
power = 0.7
alternative = two.sided
NOTE: n is number in *each* group
# we would need 165,024 independant cases to reject null 70% of the time
# hypothesis test (method 2)
phat_pool = 379/872
se_pool = sqrt(
((phat_pool*(1-phat_pool))/formA_n) +
((phat_pool*(1-phat_pool))/formB_n) )
z_score = (point_estimate - 0) / se_pool
pnorm(z_score, lower.tail=F)*2 # same pvalue from our prop.test results
[1] 0.8983872
rm(list=ls()) # cleaning house
######################################
# response rate by sector
# building CI & Hyp test for automotive sector
prop_test_auto <- prop.test(x=c(59,70), n=c(141,142),
alternative="two.sided", correct=F, conf.level=.95)
phat_Auto_pool = 129/283
point_est_auto = 0.4929577 - 0.4184397
se_auto_pool = sqrt(
((phat_Auto_pool*(1-phat_Auto_pool))/142) +
((phat_Auto_pool*(1-phat_Auto_pool))/141) )
z_score_auto = (point_est_auto - 0) / se_auto_pool
pnorm(z_score_auto, lower.tail=F)*2 # same pvalue from our prop.test results
[1] 0.2082113
# building CI & Hyp test for beauty sector
prop_test_beauty <- prop.test(x=c(131,119), n=c(294,295),
alternative="two.sided", correct=F, conf.level=.95)
phat_beauty_pool = 250/589
point_est_beauty = 0.4455782 - 0.4033898
se_beauty_pool = sqrt(
((phat_beauty_pool*(1-phat_beauty_pool))/294) +
((phat_beauty_pool*(1-phat_beauty_pool))/295) )
z_score_beauty = (point_est_beauty - 0) / se_beauty_pool
pnorm(z_score_beauty, lower.tail=F)*2 # same pvalue from our prop.test results
[1] 0.300307
rm(list=ls()) # cleaning house
################################################################
# contingency table for estimating proportion of responses
library(gmodels)
Warning: package 'gmodels' was built under R version 3.3.3
Response <- matrix(c(54, 64, 119, 103), byrow=T, ncol=2)
Response
[,1] [,2]
[1,] 54 64
[2,] 119 103
colnames(Response) <- c('Form A', 'Form B')
rownames(Response) <- c('Auto', 'Beauty')
# compute joint and marginal probabilities
Response_joint <- CrossTable(Response, prop.chisq=F)
Cell Contents
|-------------------------|
| N |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 340
|
| Form A | Form B | Row Total |
-------------|-----------|-----------|-----------|
Auto | 54 | 64 | 118 |
| 0.458 | 0.542 | 0.347 |
| 0.312 | 0.383 | |
| 0.159 | 0.188 | |
-------------|-----------|-----------|-----------|
Beauty | 119 | 103 | 222 |
| 0.536 | 0.464 | 0.653 |
| 0.688 | 0.617 | |
| 0.350 | 0.303 | |
-------------|-----------|-----------|-----------|
Column Total | 173 | 167 | 340 |
| 0.509 | 0.491 | |
-------------|-----------|-----------|-----------|
Response_joint
$t
Form A Form B
Auto 54 64
Beauty 119 103
$prop.row
Form A Form B
Auto 0.4576271 0.5423729
Beauty 0.5360360 0.4639640
$prop.col
Form A Form B
Auto 0.3121387 0.3832335
Beauty 0.6878613 0.6167665
$prop.tbl
Form A Form B
Auto 0.1588235 0.1882353
Beauty 0.3500000 0.3029412
###########################################################################
# visualizing proportions
prop_responses_joint = Response_joint$prop.tbl
barplot(prop_responses_joint, density=15, col=c('seagreen2', 'royalblue'),
ylab='Proportion of company responses for each form type',
xlab='Forms', main='Proportion of responses given forms')
legend('topleft', c('Automotive', 'Beauty'), pch=15, density=15,
col=c('seagreen2', 'royalblue'))