教学目标

理解t检验和方差分析的前提条件
掌握不同情形替代t检验和方差分析的非参数检验方法
均采用之前例子来说明程序。

情形1：因变量是称名或顺序变量-》卡方检验

南方和北方同学高考所选志愿情况是否有差异？

#导入数据
# 直接file-》import dataset，选择文件即可
library(haven)
# 导入数据默认为数据框格式（data.frame）
eee_data <- read_sav("https://gitee.com/vv_victorwei/r-language-data-analysis/raw/master/Untitled2.sav")
# head会呈现eee_data的前六行数据，以预览概况
head(eee_data)

## # A tibble: 6 × 55
##   V1    time_used IP      id    gender   age birth…¹ roman…² hukou  hair homet…³
##   <chr>     <dbl> <chr>   <chr>  <dbl> <dbl>   <dbl>   <dbl> <dbl> <dbl>   <dbl>
## 1 175         594 223.66… 2225…      1    23       1       0     0     1       0
## 2 124         178 101.88… 2222…      1    23       1       0     0     1       0
## 3 138         207 211.16… 2225…      1    22       0       0     1     1       0
## 4 74          176 58.246… 2122…      1    23       1       0     1     1       0
## 5 149         232 120.25… 2222…      1    22       0       0     0     1       0
## 6 11          142 58.40.… 1601…      1    23       1       0     1     1       0
## # … with 44 more variables: family_income <dbl>, minzhu <dbl>, zhiyuan <dbl>,
## #   height <dbl>, weight <dbl>, zhengzhi <dbl>, personality <dbl>, a1 <dbl>,
## #   a2 <dbl>, a3 <dbl>, a4 <dbl>, a5 <dbl>, a6 <dbl>, a7 <dbl>, a8 <dbl>,
## #   a9 <dbl>, a10 <dbl>, a11 <dbl>, a12 <dbl>, a13 <dbl>, b1 <dbl>, b2 <dbl>,
## #   b3 <dbl>, b4 <dbl>, b5 <dbl>, b6 <dbl>, importance_ranking <chr>,
## #   importance_ranking_d <dbl>, daode <dbl>, zhishi <dbl>, jineng <dbl>,
## #   yishu <dbl>, food <chr>, chuan_food <dbl>, yue_food <dbl>, …

table(eee_data$hometown,eee_data$zhiyuan)

##    
##      1  2  3
##   0 48 17 38
##   1 30 15 27

summary(table(eee_data$hometown,eee_data$zhiyuan))

## Number of cases in table: 175 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 0.67, df = 2, p-value = 0.7153

情形2：两组独立样本均值差异，但总体分布未知+小样本（每组<30）

Wilcoxon秩和检验

# 课堂例子
library(haven)
e2_gender <- read_sav("https://gitee.com/vv_victorwei/r-language-data-analysis/raw/master/%E5%9D%87%E5%80%BC%E5%88%86%E6%9E%90/4.5%20E2%20gender%20difference%20.sav")
head(e2_gender)

## # A tibble: 6 × 2
##   group     score
##   <dbl+lbl> <dbl>
## 1 1 [boy]    70  
## 2 1 [boy]    68.3
## 3 1 [boy]    86.7
## 4 1 [boy]    70  
## 5 1 [boy]    75.0
## 6 1 [boy]    58.0

# 因子化 性别
e2_gender$group_f<-factor(e2_gender$group,levels = c(1,2),labels = c("male","female"))


# Wilcoxon秩和检验
wilcox.test(score~group_f, data=e2_gender)

## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  score by group_f
## W = 129, p-value = 0.0565
## alternative hypothesis: true location shift is not equal to 0

情形3：两组配对样本均值差异，但总体分布未知+小样本（<30）

-Wilcoxon符号秩检验

# 课堂例子
library(haven)
e3_test <- read_sav("https://gitee.com/vv_victorwei/r-language-data-analysis/raw/master/%E5%9D%87%E5%80%BC%E5%88%86%E6%9E%90/4.5%20E3%20pretest%20and%20posttest.sav")
head(e3_test)

## # A tibble: 6 × 3
##    学号 pretest posttest
##   <dbl>   <dbl>    <dbl>
## 1     4    25.8       30
## 2    96    41.5       43
## 3    12    45.5       50
## 4    13    45.7       50
## 5    77    49.4       55
## 6     6    50.4       56

wilcox.test(e3_test$pretest,e3_test$posttest, paired = TRUE)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  e3_test$pretest and e3_test$posttest
## V = 1301, p-value = 2.59e-05
## alternative hypothesis: true location shift is not equal to 0

情形4：三组独立样本均值差异，但总体分布分布非正态+小样本

Kruskal-Wallis检验

library(haven)
e1_3groups <- read_sav("https://gitee.com/vv_victorwei/r-language-data-analysis/raw/master/%E5%9D%87%E5%80%BC%E5%88%86%E6%9E%90/4.6%20E1%20three%20groups%20comparison.sav")
head(e1_3groups)

## # A tibble: 6 × 2
##   grade     testscore
##   <dbl+lbl>     <dbl>
## 1 1 [小班]         70
## 2 1 [小班]         80
## 3 1 [小班]         75
## 4 1 [小班]         86
## 5 1 [小班]         77
## 6 1 [小班]         75

e1_3groups$grade_f<-factor(e1_3groups$grade,levels = c(1,2,3),labels = c("class 1","class 2","class 3"))

kruskal.test(testscore ~ grade_f, data = e1_3groups)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  testscore by grade_f
## Kruskal-Wallis chi-squared = 9.9827, df = 2, p-value = 0.006797

情形5：三组相关样本均值差异，但总体分布分布非正态+小样本

Friedman检验

library(haven)
# 导入数据默认为数据框格式（data.frame）
three_waves <- read_sav("https://gitee.com/vv_victorwei/r-language-data-analysis/raw/master/%E5%9D%87%E5%80%BC%E5%88%86%E6%9E%90/4.6%20E3%20R%20ANOVA.sav")
# head会呈现的前六行数据，以预览概况
head(three_waves)

## # A tibble: 6 × 3
##   score_t1 score_t2 score_t3
##      <dbl>    <dbl>    <dbl>
## 1       64       75       76
## 2       62       70       71
## 3       70       72       68
## 4       60       73       74
## 5       89       92       93
## 6       76       82       83

# 重复测量的数据变换
# R需要的模式是ID,TIMEPOINT, SCORE
# 可以把t1、t2和t3的数据堆砌一下，ID按1：20重复3遍堆砌，timepoint按1重复20遍、2重复20次，3重复20次堆砌

id<-rep(1:20,3)
time<-c(rep(1,20),rep(2,20),rep(3,20))
score<-c(three_waves$score_t1,three_waves$score_t2,three_waves$score_t3)
# 因子化
time<-factor(time)
id<-factor(id)
#合并三个变量为一个数据框
three_waves_new<-data.frame(id,time,score)

friedman.test(score~time|id,three_waves_new)

## 
##  Friedman rank sum test
## 
## data:  score and time and id
## Friedman chi-squared = 10.842, df = 2, p-value = 0.004422

专题四、非参数检验

Wei Wei

2022-10-19

教学目标

情形1：因变量是称名或顺序变量-》卡方检验

情形2：两组独立样本均值差异，但总体分布未知+小样本（每组<30）

情形3：两组配对样本均值差异，但总体分布未知+小样本（<30）

情形4：三组独立样本均值差异，但总体分布分布非正态+小样本

情形5：三组相关样本均值差异，但总体分布分布非正态+小样本