library(tidyverse)

讀取資料

vocdta<- read.csv("dta.csv", h=T)

檢視前六筆資料

head(vocdta)
##   ID gender T1 T2 T3
## 1  1      2 13  9 13
## 2  2      2 13  8 12
## 3  3      2 11  7  7
## 4  4      2 14  9 10
## 5  5      2  7  6  5
## 6  6      2 13 10 11

#convert to long data

vocdta <- pivot_longer(vocdta, cols = c(, 3:5), names_to="time", values_to = "nvoc")

檢視long data前六筆資料

head(vocdta)
## # A tibble: 6 x 4
##      ID gender time   nvoc
##   <int>  <int> <chr> <int>
## 1     1      2 T1       13
## 2     1      2 T2        9
## 3     1      2 T3       13
## 4     2      2 T1       13
## 5     2      2 T2        8
## 6     2      2 T3       12

檢視long data資料結構

str(vocdta)
## tibble [63 x 4] (S3: tbl_df/tbl/data.frame)
##  $ ID    : int [1:63] 1 1 1 2 2 2 3 3 3 4 ...
##  $ gender: int [1:63] 2 2 2 2 2 2 2 2 2 2 ...
##  $ time  : chr [1:63] "T1" "T2" "T3" "T1" ...
##  $ nvoc  : int [1:63] 13 9 13 13 8 12 11 7 7 14 ...

#convert char to factor variable

vocdta$time <- as.factor(vocdta$time)
vocdta$ID <- as.factor(vocdta$ID)

#convert to wide data

vocwdta <- pivot_wider(vocdta, names_from = "time", values_from = "nvoc")

檢視wide data資料

str(vocwdta)
## tibble [21 x 5] (S3: tbl_df/tbl/data.frame)
##  $ ID    : Factor w/ 21 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ gender: int [1:21] 2 2 2 2 2 2 2 2 2 2 ...
##  $ T1    : int [1:21] 13 13 11 14 7 13 8 15 13 15 ...
##  $ T2    : int [1:21] 9 8 7 9 6 10 7 11 9 9 ...
##  $ T3    : int [1:21] 13 12 7 10 5 11 8 15 8 13 ...

畫出boxplot圖看三個時間點的題數狀況

plot(nvoc ~ time, data = vocdta, 
      frame.plot = FALSE)

看出T2、T3有outlier

library(gplots)
plotmeans(nvoc ~ time, data = vocdta, frame = F)

看三個時間點的答題數平均

aggregate(nvoc ~ time, data = vocdta, mean)
##   time      nvoc
## 1   T1 11.857143
## 2   T2  8.285714
## 3   T3  9.333333

看三個時間點的答題數標準差

aggregate(nvoc ~ time, data = vocdta, sd)
##   time     nvoc
## 1   T1 3.054271
## 2   T2 2.552310
## 3   T3 3.022141

因為本次資料大於兩組,因此需使用變異數分析(變異數分析主要是用來檢定多組相互獨立樣本的母體平均數是否具有顯著差異)

#ANOVA Test

summary(aov(nvoc ~ time, data = vocdta))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## time         2  141.6   70.78   8.501 0.000561 ***
## Residuals   60  499.5    8.33                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

結論 : 因為p-value小於顯著水準0.05,因此我們可判定這些組間具有顯著差異。