n <- 194
x <- 21
p0 <- 0.08
phat <- x/n
phat
## [1] 0.1082474
prop.test(x = x, n = n, p = p0, alternative = "two.sided", correct = TRUE)
##
## 1-sample proportions test with continuity correction
##
## data: x out of n, null probability p0
## X-squared = 1.7369, df = 1, p-value = 0.1875
## alternative hypothesis: true p is not equal to 0.08
## 95 percent confidence interval:
## 0.06981131 0.16277150
## sample estimates:
## p
## 0.1082474
# data
p1 <- 0.080; n1 <- 11545
p2 <- 0.088; n2 <- 4691
x1 <- p1 * n1
x2 <- p2 * n2
# 1) 95% Confidence Interval for the difference (CA - OR)
prop.test(x = c(x1, x2), n = c(n1, n2), alternative = "two.sided", conf.level = 0.95)
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(x1, x2) out of c(n1, n2)
## X-squared = 2.7213, df = 1, p-value = 0.09902
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.01764785 0.00164785
## sample estimates:
## prop 1 prop 2
## 0.080 0.088
# 2) Hypothesis Test (H0: p1 = p2 vs HA: p1 does not equal p2)
prop.test(x = c(x1, x2), n = c(n1, n2), alternative = "two.sided", correct = FALSE)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(x1, x2) out of c(n1, n2)
## X-squared = 2.8262, df = 1, p-value = 0.09274
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.017497954 0.001497954
## sample estimates:
## prop 1 prop 2
## 0.080 0.088
# Inputs
z <- qnorm(0.99) # 98% CI → z = 2.33
p <- 0.5
ME <- 0.02
cost_per_subject <- 20
# sample size (round up)
n <- ceiling((z^2 * p * (1 - p)) / (ME^2))
n
## [1] 3383
# Total cost
total_cost <- n * cost_per_subject
total_cost
## [1] 67660
n = the minimum number of participants requiered total cost= amount of money the reseracher must set aside to pay them
xbar_man_city <- 27.88
sd_man_city <- 5.01
n_man <- 26
xbar_auto_city <- 22.92
sd_auto_city <- 5.29
n_auto <- 26
diff_city <- xbar_man_city - xbar_auto_city
se_city <- sqrt(sd_auto_city^2/n_auto + sd_man_city^2/n_man)
df_city <- (sd_auto_city^2/n_auto + sd_man_city^2/n_man)^2 /
((sd_auto_city^2/n_auto)^2/(n_auto-1) + (sd_man_city^2/n_man)^2/(n_man-1))
t_city <- diff_city / se_city
p_city <- 2 * pt(-abs(t_city), df_city)
# 95% CI
tcrit95 <- qt(0.975, df_city)
ci_city <- diff_city + c(-1,1)*tcrit95*se_city
t_city; df_city; p_city; ci_city
## [1] 3.471249
## [1] 49.85285
## [1] 0.001079155
## [1] 2.089799 7.830201
power.t.test(delta = 0.5, sd = 2.2,
sig.level = 0.05, power = 0.80,
type = "two.sample", alternative = "two.sided")
##
## Two-sample t test power calculation
##
## n = 304.872
## delta = 0.5
## sd = 2.2
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
##
## NOTE: n is number in *each* group
n <- 5534
x <- 23.44
sd <- 4.72
se <- sd / sqrt(n)
tcrit <- qt(0.975, df = n - 1) # 95% CI
lower <- x - tcrit * se
upper <- x + tcrit * se
c(lower, upper)
## [1] 23.31562 23.56438
Assumptions: 1. The sample is random and representative of the population. 2. Individual observations are independent. 3. Because n = 5534 (large), the sampling distribution of the sample mean is approximately normal.
p <- matrix(
c(0.138, 0.146, 0.121, # Download: Pos1, Pos2, Pos3
0.183, 0.185, 0.227), # NoDownload: Pos1, Pos2, Pos3
nrow = 2, byrow = TRUE,
dimnames = list(Outcome = c("Download","NoDownload"),
Position = c("Pos1","Pos2","Pos3"))
)
N <- 701
cts <- round(p * N)
delta <- N - sum(cts)
if (delta != 0) {
# add/subtract the leftover to the largest cell to force totals to 701
wh <- which(cts == max(cts), arr.ind = TRUE)[1, , drop = FALSE]
cts[wh] <- cts[wh] + delta
}
cts
## Position
## Outcome Pos1 Pos2 Pos3
## Download 97 102 85
## NoDownload 128 130 159
colSums(cts) # totals by position
## Pos1 Pos2 Pos3
## 225 232 244
rowSums(cts) # totals by outcome
## Download NoDownload
## 284 417
## B) One-way test: were group totals balanced? (H0: equal allocation to 3 positions)
group_totals <- colSums(cts)
chisq_equal_alloc <- chisq.test(group_totals, p = rep(1/3, 3))
chisq_equal_alloc
##
## Chi-squared test for given probabilities
##
## data: group_totals
## X-squared = 0.7903, df = 2, p-value = 0.6736
chisq_equal_alloc$expected
## Pos1 Pos2 Pos3
## 233.6667 233.6667 233.6667
chisq_equal_alloc$stdres # which positions deviate most
## Pos1 Pos2 Pos3
## -0.6943834 -0.1335353 0.8279187
## C) Two-way test: does click rate depend on position? (H0: independence)
chisq_indep <- chisq.test(cts, correct = FALSE) # 2x3, no Yates correction
chisq_indep
##
## Pearson's Chi-squared test
##
## data: cts
## X-squared = 5.0405, df = 2, p-value = 0.08044
chisq_indep$expected
## Position
## Outcome Pos1 Pos2 Pos3
## Download 91.15549 93.99144 98.85307
## NoDownload 133.84451 138.00856 145.14693
chisq_indep$stdres # cells driving the signal
## Position
## Outcome Pos1 Pos2 Pos3
## Download 0.9631702 1.309404 -2.237395
## NoDownload -0.9631702 -1.309404 2.237395
prop.table(cts, margin = 2) # click rate within each position
## Position
## Outcome Pos1 Pos2 Pos3
## Download 0.4311111 0.4396552 0.3483607
## NoDownload 0.5688889 0.5603448 0.6516393