library(tidyverse)
讀取資料
vocdta<- read.csv("dta.csv", h=T)
檢視前六筆資料
head(vocdta)
## ID gender T1 T2 T3
## 1 1 2 13 9 13
## 2 2 2 13 8 12
## 3 3 2 11 7 7
## 4 4 2 14 9 10
## 5 5 2 7 6 5
## 6 6 2 13 10 11
#convert to long data
vocdta <- pivot_longer(vocdta, cols = c(, 3:5), names_to="time", values_to = "nvoc")
檢視long data前六筆資料
head(vocdta)
## # A tibble: 6 x 4
## ID gender time nvoc
## <int> <int> <chr> <int>
## 1 1 2 T1 13
## 2 1 2 T2 9
## 3 1 2 T3 13
## 4 2 2 T1 13
## 5 2 2 T2 8
## 6 2 2 T3 12
檢視long data資料結構
str(vocdta)
## tibble [63 x 4] (S3: tbl_df/tbl/data.frame)
## $ ID : int [1:63] 1 1 1 2 2 2 3 3 3 4 ...
## $ gender: int [1:63] 2 2 2 2 2 2 2 2 2 2 ...
## $ time : chr [1:63] "T1" "T2" "T3" "T1" ...
## $ nvoc : int [1:63] 13 9 13 13 8 12 11 7 7 14 ...
#convert char to factor variable
vocdta$time <- as.factor(vocdta$time)
vocdta$ID <- as.factor(vocdta$ID)
#convert to wide data
vocwdta <- pivot_wider(vocdta, names_from = "time", values_from = "nvoc")
檢視wide data資料
str(vocwdta)
## tibble [21 x 5] (S3: tbl_df/tbl/data.frame)
## $ ID : Factor w/ 21 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ gender: int [1:21] 2 2 2 2 2 2 2 2 2 2 ...
## $ T1 : int [1:21] 13 13 11 14 7 13 8 15 13 15 ...
## $ T2 : int [1:21] 9 8 7 9 6 10 7 11 9 9 ...
## $ T3 : int [1:21] 13 12 7 10 5 11 8 15 8 13 ...
畫出boxplot圖看三個時間點的題數狀況
plot(nvoc ~ time, data = vocdta,
frame.plot = FALSE)
看出T2、T3有outlier
library(gplots)
plotmeans(nvoc ~ time, data = vocdta, frame = F)
看三個時間點的答題數平均
aggregate(nvoc ~ time, data = vocdta, mean)
## time nvoc
## 1 T1 11.857143
## 2 T2 8.285714
## 3 T3 9.333333
看三個時間點的答題數標準差
aggregate(nvoc ~ time, data = vocdta, sd)
## time nvoc
## 1 T1 3.054271
## 2 T2 2.552310
## 3 T3 3.022141
因為本次資料大於兩組,因此需使用變異數分析(變異數分析主要是用來檢定多組相互獨立樣本的母體平均數是否具有顯著差異)
#ANOVA Test
summary(aov(nvoc ~ time, data = vocdta))
## Df Sum Sq Mean Sq F value Pr(>F)
## time 2 141.6 70.78 8.501 0.000561 ***
## Residuals 60 499.5 8.33
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
結論 : 因為p-value小於顯著水準0.05,因此我們可判定這些組間具有顯著差異。