load data
data <- read.csv("raw.csv", h = T)
head(data)
## ID Gender T1 T2 T3
## 1 1 2 13 9 13
## 2 2 2 13 8 12
## 3 3 2 11 7 7
## 4 4 2 14 9 10
## 5 5 2 7 6 5
## 6 6 2 13 10 11
convert to long data
library(tidyverse)
## Warning: 套件 'tidyverse' 是用 R 版本 4.1.3 來建造的
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: 套件 'ggplot2' 是用 R 版本 4.1.3 來建造的
## Warning: 套件 'tibble' 是用 R 版本 4.1.3 來建造的
## Warning: 套件 'tidyr' 是用 R 版本 4.1.3 來建造的
## Warning: 套件 'readr' 是用 R 版本 4.1.3 來建造的
## Warning: 套件 'purrr' 是用 R 版本 4.1.3 來建造的
## Warning: 套件 'dplyr' 是用 R 版本 4.1.3 來建造的
## Warning: 套件 'stringr' 是用 R 版本 4.1.3 來建造的
## Warning: 套件 'forcats' 是用 R 版本 4.1.3 來建造的
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
ldata <- pivot_longer(data, cols = c(, 3:5), names_to = "time", values_to = "total")
head(ldata)
## # A tibble: 6 x 4
## ID Gender time total
## <int> <int> <chr> <int>
## 1 1 2 T1 13
## 2 1 2 T2 9
## 3 1 2 T3 13
## 4 2 2 T1 13
## 5 2 2 T2 8
## 6 2 2 T3 12
str(ldata)
## tibble [63 x 4] (S3: tbl_df/tbl/data.frame)
## $ ID : int [1:63] 1 1 1 2 2 2 3 3 3 4 ...
## $ Gender: int [1:63] 2 2 2 2 2 2 2 2 2 2 ...
## $ time : chr [1:63] "T1" "T2" "T3" "T1" ...
## $ total : int [1:63] 13 9 13 13 8 12 11 7 7 14 ...
convert char to factor variable
ldata$time <- as.factor(ldata$time)
ldata$ID <- as.factor(ldata$ID)
convert to wide data
wdata <- pivot_wider(ldata, names_from = "time", values_from = "total")
str(wdata)
## tibble [21 x 5] (S3: tbl_df/tbl/data.frame)
## $ ID : Factor w/ 21 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Gender: int [1:21] 2 2 2 2 2 2 2 2 2 2 ...
## $ T1 : int [1:21] 13 13 11 14 7 13 8 15 13 15 ...
## $ T2 : int [1:21] 9 8 7 9 6 10 7 11 9 9 ...
## $ T3 : int [1:21] 13 12 7 10 5 11 8 15 8 13 ...
visualized
library(gplots)
## Warning: 套件 'gplots' 是用 R 版本 4.1.3 來建造的
##
## 載入套件:'gplots'
## 下列物件被遮斷自 'package:stats':
##
## lowess
plot(total ~ time, data = ldata, frame.plot = FALSE)
plotmeans(total ~ time, data = ldata, frame = F)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" 不是一個繪圖參數
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" 不是一
## 個繪圖參數
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" 不是一個繪圖參數
aggregate(total ~ time, data = ldata, mean)
## time total
## 1 T1 11.857143
## 2 T2 8.285714
## 3 T3 9.333333
aggregate(total ~ time, data = ldata, sd)
## time total
## 1 T1 3.054271
## 2 T2 2.552310
## 3 T3 3.022141
run ANOVA
summary(aov(total ~ time, data = ldata))
## Df Sum Sq Mean Sq F value Pr(>F)
## time 2 141.6 70.78 8.501 0.000561 ***
## Residuals 60 499.5 8.33
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ANOVA for repeated measures
summary(aov(total ~ time + Error(ID), data = ldata))
##
## Error: ID
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 20 431.1 21.55
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## time 2 141.56 70.78 41.36 1.83e-10 ***
## Residuals 40 68.44 1.71
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Because F value and P value, we can say the differences of mean between each time are significant.
data$total <- c(data$T1 + data$T2 + data$T3)
aggregate(total ~ Gender, data = data, mean)
## Gender total
## 1 1 19.66667
## 2 2 31.11111
aggregate(total ~ Gender, data = data, sd)
## Gender total
## 1 1 10.69268
## 2 2 6.54297
plotmeans(total ~ Gender, data = data, frame = F)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" 不是一個繪圖參數
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" 不是一
## 個繪圖參數
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" 不是一個繪圖參數
summary(aov(total ~ Gender, data = data))
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 336.8 336.8 6.69 0.0181 *
## Residuals 19 956.4 50.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
the total answer between gender has significant differences