Chapter 5 - Inference for Numerical Data
Practice: 5.5, 5.13, 5.19, 5.31, 5.45 Graded: 5.6, 5.14, 5.20, 5.32, 5.48
n <- 25
mean <- (65+77)/2
m.err <- (77-65)/2
# margin of error = t*sd/sqrt(n)
df <- n - 1
t <- qt(.90, df)
# sd <- sqrt(n) * m.err / t
sd <- (m.err / t) * sqrt(n)
# a. samples = (z*sd / m.err) ^ 2
z <- qnorm(.95, mean = 0, sd = 1) #for 90% confidene interval
sd <- 250
m.err <- 25
n.a <- ((z*sd) / m.err) ^ 2
# b.larger or smaller
# c.
z <- qnorm(.995, mean = 0, sd = 1) #for 99% confidence interval
n.c <- ((z*sd) / m.err) ^ 2
270.5543454
Larger. Higher degrees of confidence requires larger margins of error.
663.4896601
No. there is no clear difference in the average of the reading and writing schores. In addition to it, the difference in scores centeres around 0
We could assume that the reading and writing scores are independent among students.
n <- 200
mean.diff <- -0.545
df <- n-1
sd.diff <- 8.887
se <- sd.diff/sqrt(n)
t <- ((mean.diff - 0) / se)
p <- pt(t, df)
This may be type II error ; we may not have enough samples to detect the difference – It appears to show no evidence of difference when there is difference.
Yes. As we failed to reject the null hypothesis that has a null value of 0, that indicates there is no difference between the populations.
n <- 26
auto.mean <- 16.12
auto.sd <- 3.58
man.mean <- 19.85
man.sd <- 4.51
mean.diff <- auto.mean - man.mean
se <- sqrt((auto.sd^2 / n) + (man.sd^2/n))
t <- (mean.diff - 0) / se
df <- n - 1
p <- pt(t, df) * 2 #two tailed test
r p
) thus we reject the H0.; The p value 0.0028836 is smaller than .05 TRUE thus we reject the H0.# dfG = k - 1 Degree of freedom
# dfE = n - k
# SSG sums of squares groups.
# MSG = ssg/dfg mean square between group; measures the variability of the sample averages.
# SSE sum of squared errors
# MSE = sse/dfe mean square error; measures the variability within the groups.
# SST sum of squared total
# F = MSG/MSE variation between groups / variation within groups
mean <- c(38.67, 39.6, 41.39, 42.55, 40.85)
sd <- c(15.81, 14.97, 18.1, 13.62, 15.51)
n <- c(121, 546, 97, 253, 155)
df <- data.frame(mean, sd, n)
n <- sum(df$n)
k <- 5
# degree of freedom
dof <- k-1
dof.res <- n-k
# f-statistics
prf <- 0.0682
f.stat <- qf(1-prf, dof, dof.res)
# f-statistics = msg / mse
msg <- 501.54
mse <- msg/f.stat
# msg = (1 / dof) * ssg
ssg <- dof * msg
sse <- 267382
# sst = ssg + sse
sst <- ssg + sse
# dof total = dof + dof residual
dft <- dof + dof.res
degree <- c(dof, ssg, msg, f.stat, prf)
residuals <- c(dof.res, sse, mse, NA, NA)
total <- c(dof+dof.res, ssg+sse, NA, NA, NA)
table <- rbind(degree, residuals, total)
colnames(table) <- c("DF", "SUM SQ", "MEAN SQ", "F value", "Pr(>F)")
library(knitr)
kable(table)
DF | SUM SQ | MEAN SQ | F value | Pr(>F) | |
---|---|---|---|---|---|
degree | 4 | 2006.16 | 501.5400 | 2.188931 | 0.0682 |
residuals | 1167 | 267382.00 | 229.1255 | NA | NA |
total | 1171 | 269388.16 | NA | NA | NA |