\[ \begin{eqnarray*} Null\;Hypothesis\;H_0: \mu=5\\ Alternative\;Hypothesis\;H_1: \mu\neq5\\ \end{eqnarray*} \]
$$ \[\begin{equation} T = \frac{\overline{X}-\mu}{s/\sqrt{n}}\\ \overline{X}: mean\;of\;x,\quad s: standard\;deviation\;of\;x\\ Under H_0,\quad t-distribution\; with\; degrees\;of\;freedom: n-1 \end{equation}\]
$$
N = 10
mu = 7
sigma = 3
x = rnorm(N, mu, sigma)
x
## [1] 8.441327 8.913165 7.278627 5.989883 4.065244 13.788764 10.685814
## [8] 8.740634 2.090527 7.499690
mean(x)
## [1] 7.749367
sd(x)
## [1] 3.281054
\[ \begin{equation} Z = \frac{\overline{X}-\mu}{\sigma/\sqrt{n}}\\ Under\;H_0,\quad Z \thicksim N(0, 1)\\ \end{equation} \]
x1 = NULL
xbar = NULL
z = NULL
mu = 5
for(i in 1:10000){
x1 = rnorm(10, 5, 3) #under H0
xbar[i] = mean(x1)
z[i] = (xbar[i] - mu)/(3/sqrt(10))
}
mean(z)
## [1] -0.01079737
sd(z)
## [1] 0.9964213
hist(z)
t = (mean(x) - 5)/(sd(x)/sqrt(N))
t
## [1] 2.649839
pvalue = ifelse(pt(t, df=N-1, lower.tail = FALSE) >=0.5, pt(t, df=N-1, lower.tail = TRUE)*2 , pt(t, df=N-1, lower.tail = FALSE)*2)
pvalue
## [1] 0.0264832
#1. Random Sampling
N = 10
mu = 7
sigma = 3
x = rnorm(N, mu, sigma)
mean(x)
## [1] 6.956676
sd(x)
## [1] 3.02519
#2. Inference
t = (mean(x) - 5)/(sd(x)/sqrt(N))
pvalue = ifelse(pt(t, df=N-1, lower.tail = FALSE) >=0.5, pt(t, df=N-1, lower.tail = TRUE)*2 , pt(t, df=N-1, lower.tail = FALSE)*2)
pvalue
## [1] 0.07114768
t.test(x, alternative ="two.sided", mu = 5, conf.level = 0.95)
##
## One Sample t-test
##
## data: x
## t = 2.0453, df = 9, p-value = 0.07115
## alternative hypothesis: true mean is not equal to 5
## 95 percent confidence interval:
## 4.792585 9.120766
## sample estimates:
## mean of x
## 6.956676
#1. Random Sampling
N = 10
mu = 7
sigma = 3
x = rnorm(N, mu, sigma)
mean(x)
## [1] 8.360525
sd(x)
## [1] 2.818144
#2. Inference
t = (mean(x) - 5)/(sd(x)/sqrt(N))
pvalue = ifelse(pt(t, df=N-1, lower.tail = FALSE) >=0.5, pt(t, df=N-1, lower.tail = TRUE)*2 , pt(t, df=N-1, lower.tail = FALSE)*2)
pvalue
## [1] 0.004410407
ind = NULL
for(i in 1:100){
#1. Random Sampling
N = 10
mu = 7
sigma = 3
x = rnorm(N, mu, sigma)
#2. Inference
t = (mean(x) - 5)/(sd(x)/sqrt(N))
pvalue = ifelse(pt(t, df=N-1, lower.tail = FALSE) >=0.5, pt(t, df=N-1, lower.tail = TRUE)*2 , pt(t, df=N-1, lower.tail = FALSE)*2)
ind[i] = ifelse(pvalue < 0.05, 1, 0)
}
sum(ind)
## [1] 47
True negative: When the alternative hypothesis (H1) is true, we excecute the correct decision, which is, we do not reject the H0.
Type II (beta) error and false negative are the same.
Specificity (the probability of true negative) equals to (1 - probability of Type I (alpha) error).
Sensitivity and Power are the same.
power.t.test(n=NULL, delta=2, sd=3, type="one.sample", alternative="two.sided", sig.level = 0.05, power = 0.8)
##
## One-sample t test power calculation
##
## n = 19.66697
## delta = 2
## sd = 3
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
power.t.test(n=20, delta=2, sd=3, type="one.sample", alternative="two.sided", sig.level = 0.05, power = NULL)
##
## One-sample t test power calculation
##
## n = 20
## delta = 2
## sd = 3
## sig.level = 0.05
## power = 0.8072909
## alternative = two.sided
\[ \begin{eqnarray*} Null\;Hypothesis\;H_0: \mu_1=\mu_2\\ Alternative\;Hypothesis\;H_1: \mu_1\neq\mu_2\\ \ \\ \ \\ \end{eqnarray*} \]
\[ \begin{equation} \ \\ \ \\ T = \frac{(\overline{X_1}-\overline{X_2})-(\mu_1-\mu_2)}{s_{pooled}/\sqrt{\frac{1}{n_1} + \frac{1}{n_2}}}\\ \overline{X_1}: mean\;of\;group\;1, \quad \overline{X_2}: mean\;of\;group\;2\\ \ \\ \ s_{pooled}: pooled\;standard\;deviation\\ \ s_{pooled} = \sqrt\frac{(n_1-1)s_{x_1}^2+(n_2-1)s_{x_2}^2}{n_1+n_2-2}\\ Under\;H_0,\quad t-distribution\; with\; degrees\;of\;freedom\;(Pooled): n_1+n_2-2 \end{equation} \ \\ \ \\ \]
\[ \begin{equation} \ \\ \ \\ T = \frac{(\overline{X_1}-\overline{X_2})-(\mu_1-\mu_2)}{\sqrt{\frac{s_1}{n_1} + \frac{s_2}{n_2}}}\\ \overline{X_1}: mean\;of\;group\;1, \quad \overline{X_2}: mean\;of\;group\;2\\ \ \\ \ s_1: standard\;deviation\;of\;x_1, \quad s_2: standard\;deviation\;of\;x_2\\ Under\;H_0,\quad t-distribution\; with\; degrees\;of\;freedom\;(Satterthwaite): \frac{(\frac{s_1^2}{n_1}+\frac{s_2^2}{n_2})^2}{\frac{(s_1^2/n_1)^2}{n_1-1}+\frac{(s_2^2/n_2)^2}{n_2-1}} \ \\ \ \\ \end{equation} \]
N1 = 10
mu1 = 7
sigma1 = 2
N2 = 10
mu2 = 5
sigma2 = 1
x1 = rnorm(N1, mu1, sigma1)
x1
## [1] 6.300602 5.895936 9.077546 4.914760 5.871402 13.305813 10.886115
## [8] 9.295387 5.996030 7.809036
x2 = rnorm(N2, mu2, sigma2)
x2
## [1] 4.365016 3.096977 5.735381 4.804516 6.355678 4.927263 5.239364
## [8] 5.403059 4.509544 5.580119
mean(x1)
## [1] 7.935263
mean(x2)
## [1] 5.001692
sd(x1)
## [1] 2.681118
sd(x2)
## [1] 0.8971352
t = (mean(x1)-mean(x2) - 0)/sqrt( (sd(x1)^2/N1) + (sd(x2)^2/N2) )
t
## [1] 3.281217
s1=sd(x1)
s2=sd(x2)
satterthwaite = (s1^2/N1 + s2^2/N2)^2 / ((s1^2/N1)^2/(N1-1) + (s2^2/N2)^2/(N2-1))
pvalue = ifelse(pt(t, df=satterthwaite, lower.tail = FALSE) >=0.5,
pt(t, df=satterthwaite, lower.tail = TRUE)*2 ,
pt(t, df=satterthwaite, lower.tail = FALSE)*2)
pvalue
## [1] 0.007326015
#1. Random Sampling
N1 = 10
mu1 = 7
sigma1 = 2
N2 = 10
mu2 = 5
sigma2 = 1
x1 = rnorm(N1, mu1, sigma1)
x2 = rnorm(N2, mu2, sigma2)
mean(x1)
## [1] 6.910588
mean(x2)
## [1] 5.061426
sd(x1)
## [1] 2.13581
sd(x2)
## [1] 0.8626888
#2. Inference
t = (mean(x1)-mean(x2) - 0)/sqrt( (sd(x1)^2/N1) + (sd(x2)^2/N2) )
s1=sd(x1)
s2=sd(x2)
satterthwaite = (s1^2/N1 + s2^2/N2)^2 / ((s1^2/N1)^2/(N1-1) + (s2^2/N2)^2/(N2-1))
pvalue = ifelse(pt(t, df=satterthwaite, lower.tail = FALSE) >=0.5,
pt(t, df=satterthwaite, lower.tail = TRUE)*2 ,
pt(t, df=satterthwaite, lower.tail = FALSE)*2)
pvalue
## [1] 0.02620317
t.test(x1, x2, alternative ="two.sided", mu = 0, conf.level = 0.95, var.equal = F)
##
## Welch Two Sample t-test
##
## data: x1 and x2
## t = 2.5386, df = 11.861, p-value = 0.0262
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2600049 3.4383194
## sample estimates:
## mean of x mean of y
## 6.910588 5.061426
#1. Random Sampling
N1 = 10
mu1 = 7
sigma1 = 2
N2 = 10
mu2 = 5
sigma2 = 1
x1 = rnorm(N1, mu1, sigma1)
x2 = rnorm(N2, mu2, sigma2)
mean(x1)
## [1] 7.329867
mean(x2)
## [1] 4.696522
sd(x1)
## [1] 2.432369
sd(x2)
## [1] 0.9042499
#2. Inference
t = (mean(x1)-mean(x2) - 0)/sqrt( (sd(x1)^2/N1) + (sd(x2)^2/N2) )
s1=sd(x1)
s2=sd(x2)
satterthwaite = (s1^2/N1 + s2^2/N2)^2 / ((s1^2/N1)^2/(N1-1) + (s2^2/N2)^2/(N2-1))
pvalue = ifelse(pt(t, df=satterthwaite, lower.tail = FALSE) >=0.5,
pt(t, df=satterthwaite, lower.tail = TRUE)*2 ,
pt(t, df=satterthwaite, lower.tail = FALSE)*2)
pvalue
## [1] 0.007939017
ind = NULL
for(i in 1:100){
#1. Random Sampling
N1 = 10
mu1 = 7
sigma1 = 2
N2 = 10
mu2 = 5
sigma2 = 1
x1 = rnorm(N1, mu1, sigma1)
x2 = rnorm(N2, mu2, sigma2)
#2. Inference
t = (mean(x1)-mean(x2) - 0)/sqrt( (sd(x1)^2/N1) + (sd(x2)^2/N2) )
s1=sd(x1)
s2=sd(x2)
satterthwaite = (s1^2/N1 + s2^2/N2)^2 / ((s1^2/N1)^2/(N1-1) + (s2^2/N2)^2/(N2-1))
pvalue = ifelse(pt(t, df=satterthwaite, lower.tail = FALSE) >=0.5,
pt(t, df=satterthwaite, lower.tail = TRUE)*2 ,
pt(t, df=satterthwaite, lower.tail = FALSE)*2)
pvalue
ind[i] = ifelse(pvalue < 0.05, 1, 0)
}
sum(ind)
## [1] 79
power.t.test(n=NULL, delta=2, sd=3, type="two.sample", alternative="two.sided", sig.level = 0.05, power = 0.8)
##
## Two-sample t test power calculation
##
## n = 36.3058
## delta = 2
## sd = 3
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
##
## NOTE: n is number in *each* group
power.t.test(n=37, delta=2, sd=3, type="two.sample", alternative="two.sided", sig.level = 0.05, power = NULL)
##
## Two-sample t test power calculation
##
## n = 37
## delta = 2
## sd = 3
## sig.level = 0.05
## power = 0.8075859
## alternative = two.sided
##
## NOTE: n is number in *each* group