\[H_0:\mu \geq 81 \quad v.s. \quad H_1:\mu < 81\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_3.RData")
library(BSDA)
## Loading required package: lattice
##
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
##
## Orange
z.test(example6_3$PM2.5值,mu=81,sigma.x=sd(example6_3$PM2.5值),alternative='less',conf.level=0.95)
##
## One-sample z-Test
##
## data: example6_3$PM2.5值
## z = -1.1856, p-value = 0.1179
## alternative hypothesis: true mean is less than 81
## 95 percent confidence interval:
## NA 81.56174
## sample estimates:
## mean of x
## 79.55
#左侧检验只显示置信区间的上限
#这里显然,总体方差未知
\[H_0:\mu =5 \quad v.s. \quad H_1:\mu \neq 5\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_4.RData")
#这里显然,总体方差未知
t.test(example6_4$厚度,mu=5)
##
## One Sample t-test
##
## data: example6_4$厚度
## t = -5.6273, df = 19, p-value = 1.998e-05
## alternative hypothesis: true mean is not equal to 5
## 95 percent confidence interval:
## 4.725612 4.874388
## sample estimates:
## mean of x
## 4.8
\[d=\frac{|\overline{x}-\mu_0|}{s}\] cohen的d统计量表示样本均值与假设的总体均值的差异是多少个标准差。
当 \(d<0.20\) 时,效应量非常小,几乎为0;
当 \(0.20\leq d<0.50\) 时,效应量小;
当 \(0.50\leq d<0.80\) 时,效应量中;
当 \(d\geq 0.80\) 时,效应量大。
library(lsr)
cohensD(example6_4$厚度,mu=5)
## [1] 1.258306
\[H_0:\mu_1-\mu_2 =0 \quad v.s. \quad H_1:\mu_1-\mu_2 \neq 0\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_5.RData")
library(BSDA)
#这里显然,总体方差未知
z.test(example6_5$男生上网时间,example6_5$女生上网时间,sigma.x = sd(example6_5$男生上网时间),sigma.y = sd(example6_5$女生上网时间),alternative = 'two.sided')
##
## Two-sample z-Test
##
## data: example6_5$男生上网时间 and example6_5$女生上网时间
## z = 1.1188, p-value = 0.2632
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1712448 0.6268003
## sample estimates:
## mean of x mean of y
## 3.058333 2.830556
\[H_0:\mu_1-\mu_2 =0 \quad v.s. \quad H_1:\mu_1-\mu_2 \neq 0\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_6.RData")
t.test(example6_6$甲企业,example6_6$乙企业,var.equal=TRUE)#若两个总体方差相等
##
## Two Sample t-test
##
## data: example6_6$甲企业 and example6_6$乙企业
## t = 3.4943, df = 38, p-value = 0.001225
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 135.24 507.76
## sample estimates:
## mean of x mean of y
## 8487.5 8166.0
t.test(example6_6$甲企业,example6_6$乙企业,var.equal=FALSE)#若两个总体方差不相等
##
## Welch Two Sample t-test
##
## data: example6_6$甲企业 and example6_6$乙企业
## t = 3.4943, df = 33.683, p-value = 0.001353
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 134.4528 508.5472
## sample estimates:
## mean of x mean of y
## 8487.5 8166.0
\[d=|t|\sqrt{\frac{n_1+n_2}{n_1n_2}}\]
library(lsr)
cohensD(example6_6$甲企业,example6_6$乙企业)
## [1] 1.104985
\[H_0:\mu_1-\mu_2 =0 \quad v.s. \quad H_1:\mu_1-\mu_2 \neq 0\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_7.RData")
t.test(example6_7$旧款饮料,example6_7$新款饮料,paired=TRUE)
##
## Paired t-test
##
## data: example6_7$旧款饮料 and example6_7$新款饮料
## t = -2.7508, df = 9, p-value = 0.02245
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.3690538 -0.2309462
## sample estimates:
## mean of the differences
## -1.3
\[d=\frac{\overline{d}}{s_d}=\frac{|t|}{\sqrt{n}}\]
library(lsr)
cohensD(example6_7$旧款饮料,example6_7$新款饮料,method='paired')
## [1] 0.8698945
\[H_0:\pi \leq 25\% \quad v.s. \quad H_1:\pi > 25\%\] 检验统计量: \[z=\frac{p-\pi_0}{\sqrt{\frac{\pi_0(1-\pi_0)}{n}}}\]
n=2000;p=450/2000;pi_0=0.25
z=(p-pi_0)/sqrt(pi_0*(1-pi_0)/n)
p_value=1-pnorm(z)
data.frame(z,p_value)
## z p_value
## 1 -2.581989 0.9950884
\[H_0:\pi_1-\pi_2 \geq 0 \quad v.s. \quad H_1:\pi_1-\pi_2 < 0\] 检验统计量: \[z=\frac{p_1-p_2}{\sqrt{p(1-p)(\frac{1}{n_1}+\frac{1}{n_2})}}\] 其中 \[p=\frac{p_1n_1+p_2n_2}{n_1+n_2}\]
n1=200;n2=200;p1=0.27;p2=0.35
p=(p1*n1+p2*n2)/(n1+n2)
z=(p1-p2)/sqrt(p*(1-p)*(1/n1+1/n2))
p_value=pnorm(z)
data.frame(z,p_value)
## z p_value
## 1 -1.729755 0.04183703
\[H_0:\pi_1-\pi_2 \geq 8\% \quad v.s. \quad H_1:\pi_1-\pi_2 < 8\%\]
检验统计量: \[z=\frac{(p_1-p_2)-d_0}{\sqrt{\frac{p_1(1-p_1)}{n_1}+\frac{p_2(1-p_2)}{n_2}}}\]
n1=300;n2=300;p1=33/300;p2=84/300;d_0=0.08
z=((p1-p2)-0.08)/sqrt(p1*(1-p1)/n1+p2*(1-p2)/n2)
p_value=pnorm(z)
data.frame(z,p_value)
## z p_value
## 1 -7.91229 1.26348e-15
\[H_0:\sigma^2 \leq 16 \quad v.s. \quad H_1:\sigma^2 > 16\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_11.RData")
library(TeachingDemos)
##
## Attaching package: 'TeachingDemos'
## The following object is masked from 'package:BSDA':
##
## z.test
sigma.test(example6_11$填装量,sigmasq=16,alternative='greater')
##
## One sample Chi-squared test for variance
##
## data: example6_11$填装量
## X-squared = 2.9741, df = 9, p-value = 0.9653
## alternative hypothesis: true variance is greater than 16
## 95 percent confidence interval:
## 2.812522 Inf
## sample estimates:
## var of example6_11$填装量
## 5.287222
\[H_0:\frac{\sigma_1^2}{\sigma_2^2} = 1 \quad v.s. \quad H_1:\frac{\sigma_1^2}{\sigma_2^2} \neq 1\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_6.RData")
var.test(example6_6[,1],example6_6[,2],alternative = 'two.sided')#和 $ 的选择是一样的
##
## F test to compare two variances
##
## data: example6_6[, 1] and example6_6[, 2]
## F = 0.47273, num df = 19, denom df = 19, p-value = 0.111
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1871127 1.1943320
## sample estimates:
## ratio of variances
## 0.4727311
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_3.RData")
#绘制Q-Q图
par(mfrow=c(1,2),mai=c(0.7,0.7,0.2,0.1),cex=0.8)
qqnorm(example6_3$PM2.5值,xlab='期望正态值',ylab='观测值',datax=TRUE,main='正态Q-Q图')
qqline(example6_3$PM2.5值,datax=TRUE,col='red')
#绘制P-P图
f=ecdf(example6_3$PM2.5值)
p1=f(example6_3$PM2.5值)
p2=pnorm(example6_3$PM2.5值,mean(example6_3$PM2.5值),sd(example6_3$PM2.5值))
plot(p1,p2,xlab='观测的累积概率',ylab='期望的累积概率',main='正态P-P图')
abline(a=0,b=1,col='red')
\[H_0:厚度服从正态分布 \quad v.s. \quad H_1:厚度不服从正态分布\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_4.RData")
shapiro.test(example6_4$厚度)
##
## Shapiro-Wilk normality test
##
## data: example6_4$厚度
## W = 0.91377, p-value = 0.07522
\[H_0:F(x)=F_0(x)—总体分布与指定的理论分布无显著差异 \quad v.s. \\ H_1:F(x) \neq F_0(x)—总体分布与指定的理论分布有显著差异\] 检验统计量: \[D=\max \{|S(x_i)-F(x_i)|\} \quad \text{or} \quad D=\max \{(|S(x_i)-F(x_i)|),(|S(x_{i-1})-F(x_i)|)\}\]
小样本下,统计量D服从Kolmogorov分布;大样本下,用正态分布近似,此时统计量: \[z=\sqrt{n}D\] K-S检验要求正态总体已知,当参数未知时,用样本均值、样本方差代替。 \[H_0:厚度服从正态分布 \quad v.s. \quad H_1:厚度不服从正态分布\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_4.RData")
ks.test(example6_4$厚度,'pnorm',mean(example6_4$厚度),sd(example6_4$厚度))
## Warning in ks.test(example6_4$厚度, "pnorm", mean(example6_4$厚度),
## sd(example6_4$厚度)): ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: example6_4$厚度
## D = 0.23538, p-value = 0.2178
## alternative hypothesis: two-sided
\[H_0:M =5 \quad v.s. \quad M \neq 5\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_4.RData")
wilcox.test(example6_4$厚度,m=5)
## Warning in wilcox.test.default(example6_4$厚度, m = 5): cannot compute exact p-
## value with ties
## Warning in wilcox.test.default(example6_4$厚度, m = 5): cannot compute exact p-
## value with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: example6_4$厚度
## V = 2, p-value = 0.0005812
## alternative hypothesis: true location is not equal to 5
\[H_0:M_甲=M_乙 \quad v.s. H_1:M_甲 \neq M_乙\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_6.RData")
wilcox.test(example6_6$甲企业,example6_6$乙企业)
##
## Wilcoxon rank sum exact test
##
## data: example6_6$甲企业 and example6_6$乙企业
## W = 319, p-value = 0.0009334
## alternative hypothesis: true location shift is not equal to 0
\[H_0:M_d=0 \quad (两个总体相同) \quad v.s. H_1:M_d \neq 0 \quad (两个总体不相同)\]
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch6\\example6_7.RData")
wilcox.test(example6_7$旧款饮料,example6_7$新款饮料,paired=TRUE)
## Warning in wilcox.test.default(example6_7$旧款饮料, example6_7$新款饮料, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(example6_7$旧款饮料, example6_7$新款饮料, :
## cannot compute exact p-value with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: example6_7$旧款饮料 and example6_7$新款饮料
## V = 5.5, p-value = 0.04759
## alternative hypothesis: true location shift is not equal to 0