HW4

Q1

n <- 194
x <- 21
p0 <- 0.08 
phat <- x/n 
phat

## [1] 0.1082474

prop.test(x = x, n = n, p = p0, alternative = "two.sided", correct = TRUE)

## 
##  1-sample proportions test with continuity correction
## 
## data:  x out of n, null probability p0
## X-squared = 1.7369, df = 1, p-value = 0.1875
## alternative hypothesis: true p is not equal to 0.08
## 95 percent confidence interval:
##  0.06981131 0.16277150
## sample estimates:
##         p 
## 0.1082474

Q2

# data
p1 <- 0.080; n1 <- 11545
p2 <- 0.088; n2 <- 4691

x1 <- p1 * n1
x2 <- p2 * n2

# 1) 95% Confidence Interval for the difference (CA - OR)
prop.test(x = c(x1, x2), n = c(n1, n2), alternative = "two.sided", conf.level = 0.95)

## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(x1, x2) out of c(n1, n2)
## X-squared = 2.7213, df = 1, p-value = 0.09902
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.01764785  0.00164785
## sample estimates:
## prop 1 prop 2 
##  0.080  0.088

# 2) Hypothesis Test (H0: p1 = p2 vs HA: p1 does not equal p2)
prop.test(x = c(x1, x2), n = c(n1, n2), alternative = "two.sided", correct = FALSE)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(x1, x2) out of c(n1, n2)
## X-squared = 2.8262, df = 1, p-value = 0.09274
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.017497954  0.001497954
## sample estimates:
## prop 1 prop 2 
##  0.080  0.088

Q3

# Inputs
z <- qnorm(0.99)   # 98% CI → z = 2.33
p <- 0.5
ME <- 0.02
cost_per_subject <- 20

# sample size (round up)
n <- ceiling((z^2 * p * (1 - p)) / (ME^2))
n

## [1] 3383

# Total cost
total_cost <- n * cost_per_subject
total_cost

## [1] 67660

n = the minimum number of participants requiered total cost= amount of money the reseracher must set aside to pay them

Q4

xbar_man_city <- 27.88
sd_man_city   <- 5.01
n_man         <- 26
xbar_auto_city <- 22.92
sd_auto_city   <- 5.29
n_auto         <- 26

diff_city <- xbar_man_city - xbar_auto_city
se_city   <- sqrt(sd_auto_city^2/n_auto + sd_man_city^2/n_man)
df_city   <- (sd_auto_city^2/n_auto + sd_man_city^2/n_man)^2 /
  ((sd_auto_city^2/n_auto)^2/(n_auto-1) + (sd_man_city^2/n_man)^2/(n_man-1))
t_city    <- diff_city / se_city
p_city    <- 2 * pt(-abs(t_city), df_city)

# 95% CI
tcrit95 <- qt(0.975, df_city)
ci_city <- diff_city + c(-1,1)*tcrit95*se_city

t_city; df_city; p_city; ci_city

## [1] 3.471249

## [1] 49.85285

## [1] 0.001079155

## [1] 2.089799 7.830201

Q5

power.t.test(delta = 0.5, sd = 2.2,
             sig.level = 0.05, power = 0.80,
             type = "two.sample", alternative = "two.sided")

## 
##      Two-sample t test power calculation 
## 
##               n = 304.872
##           delta = 0.5
##              sd = 2.2
##       sig.level = 0.05
##           power = 0.8
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

n  <- 5534
x  <- 23.44
sd <- 4.72 

se <- sd / sqrt(n)
tcrit <- qt(0.975, df = n - 1)      # 95% CI

lower <- x - tcrit * se
upper <- x + tcrit * se
c(lower, upper)

## [1] 23.31562 23.56438

Assumptions: 1. The sample is random and representative of the population. 2. Individual observations are independent. 3. Because n = 5534 (large), the sampling distribution of the sample mean is approximately normal.

Q7

p <- matrix(
  c(0.138, 0.146, 0.121,   # Download: Pos1, Pos2, Pos3
    0.183, 0.185, 0.227),  # NoDownload: Pos1, Pos2, Pos3
  nrow = 2, byrow = TRUE,
  dimnames = list(Outcome = c("Download","NoDownload"),
                  Position = c("Pos1","Pos2","Pos3"))
)

N <- 701


cts <- round(p * N)
delta <- N - sum(cts)
if (delta != 0) {
  # add/subtract the leftover to the largest cell to force totals to 701
  wh <- which(cts == max(cts), arr.ind = TRUE)[1, , drop = FALSE]
  cts[wh] <- cts[wh] + delta
}
cts

##             Position
## Outcome      Pos1 Pos2 Pos3
##   Download     97  102   85
##   NoDownload  128  130  159

colSums(cts)  # totals by position

## Pos1 Pos2 Pos3 
##  225  232  244

rowSums(cts)  # totals by outcome

##   Download NoDownload 
##        284        417

## B) One-way test: were group totals balanced? (H0: equal allocation to 3 positions)
group_totals <- colSums(cts)
chisq_equal_alloc <- chisq.test(group_totals, p = rep(1/3, 3))
chisq_equal_alloc

## 
##  Chi-squared test for given probabilities
## 
## data:  group_totals
## X-squared = 0.7903, df = 2, p-value = 0.6736

chisq_equal_alloc$expected

##     Pos1     Pos2     Pos3 
## 233.6667 233.6667 233.6667

chisq_equal_alloc$stdres  # which positions deviate most

##       Pos1       Pos2       Pos3 
## -0.6943834 -0.1335353  0.8279187

## C) Two-way test: does click rate depend on position? (H0: independence)
chisq_indep <- chisq.test(cts, correct = FALSE)  # 2x3, no Yates correction
chisq_indep

## 
##  Pearson's Chi-squared test
## 
## data:  cts
## X-squared = 5.0405, df = 2, p-value = 0.08044

chisq_indep$expected

##             Position
## Outcome           Pos1      Pos2      Pos3
##   Download    91.15549  93.99144  98.85307
##   NoDownload 133.84451 138.00856 145.14693

chisq_indep$stdres       # cells driving the signal

##             Position
## Outcome            Pos1      Pos2      Pos3
##   Download    0.9631702  1.309404 -2.237395
##   NoDownload -0.9631702 -1.309404  2.237395

prop.table(cts, margin = 2)  # click rate within each position

##             Position
## Outcome           Pos1      Pos2      Pos3
##   Download   0.4311111 0.4396552 0.3483607
##   NoDownload 0.5688889 0.5603448 0.6516393

HW4

2025-11-12

Q1

Q2

Q3

Q4

Q5

Q7