See the my toy data (here)[https://github.com/ecosang/misc/blob/master/ttt.csv]
library(tidyverse)
library(magrittr)
dat <- read_csv("https://raw.githubusercontent.com/ecosang/misc/master/ttt.csv")
## Warning: Duplicated column names deduplicated: 'E' => 'E_1' [4], 'E' =>
## 'E_2' [5], 'A' => 'A_1' [6], 'A' => 'A_2' [7], 'E' => 'E_3' [8], 'E' =>
## 'E_4' [9], 'A' => 'A_3' [10], 'A' => 'A_4' [11], 'R' => 'R_1' [13], 'R' =>
## 'R_2' [14], 'A' => 'A_5' [15], 'A' => 'A_6' [16], 'R' => 'R_3' [17], 'A' =>
## 'A_7' [18], 'A' => 'A_8' [19], 'E' => 'E_5' [20], 'E' => 'E_6' [21], 'R' =>
## 'R_4' [22]
# https://raw.githubusercontent.com/ecosang/misc/master/ttt.csv
knitr::kable(dat)
| 1 |
20.00 |
NA |
21.25 |
NA |
25.00 |
20.00 |
NA |
20.00 |
NA |
23.75 |
NA |
23.75 |
21.25 |
NA |
NA |
25.00 |
22.50 |
25.0 |
20.0 |
NA |
25.00 |
| 2 |
20.00 |
NA |
17.50 |
NA |
23.75 |
21.25 |
NA |
20.00 |
18.75 |
21.25 |
NA |
22.50 |
NA |
22.50 |
25.00 |
22.50 |
22.50 |
20.0 |
22.5 |
22.5 |
21.25 |
| 3 |
22.50 |
25.00 |
23.75 |
25.00 |
25.00 |
23.75 |
NA |
NA |
NA |
25.00 |
NA |
25.00 |
NA |
23.75 |
23.75 |
NA |
23.75 |
25.0 |
25.0 |
25.0 |
NA |
| 4 |
22.50 |
25.00 |
23.75 |
NA |
25.00 |
NA |
NA |
21.25 |
25.00 |
20.00 |
25.0 |
NA |
NA |
NA |
NA |
25.00 |
25.00 |
25.0 |
22.5 |
25.0 |
22.50 |
| 5 |
21.25 |
18.75 |
16.25 |
NA |
22.50 |
21.25 |
NA |
NA |
20.00 |
20.00 |
NA |
NA |
NA |
25.00 |
25.00 |
16.25 |
21.25 |
NA |
20.0 |
NA |
NA |
| 6 |
21.25 |
NA |
16.25 |
20.00 |
18.75 |
20.00 |
NA |
21.25 |
20.00 |
NA |
25.0 |
NA |
NA |
23.75 |
NA |
NA |
18.75 |
20.0 |
NA |
20.0 |
20.00 |
| 7 |
NA |
17.50 |
NA |
21.25 |
20.00 |
20.00 |
NA |
NA |
21.25 |
NA |
22.5 |
NA |
NA |
23.75 |
NA |
NA |
18.75 |
20.0 |
NA |
22.5 |
17.50 |
| 8 |
18.75 |
NA |
16.25 |
25.00 |
20.00 |
16.25 |
NA |
NA |
20.00 |
21.25 |
22.5 |
NA |
NA |
22.50 |
22.50 |
22.50 |
20.00 |
20.0 |
22.5 |
22.5 |
18.75 |
| 9 |
22.50 |
NA |
16.25 |
18.75 |
23.75 |
21.25 |
NA |
23.75 |
NA |
23.75 |
22.5 |
NA |
NA |
NA |
NA |
NA |
NA |
22.5 |
NA |
25.0 |
17.50 |
| 10 |
21.25 |
21.25 |
21.25 |
NA |
22.50 |
21.25 |
NA |
20.00 |
20.00 |
22.50 |
NA |
22.50 |
NA |
NA |
NA |
22.50 |
20.00 |
20.0 |
NA |
NA |
NA |
The read_csv doesn’t allow duplicated col names.
# it is similar to reshape2::melt (colmun name => variable)
new_dat <- dat %>% gather(2:dim(dat)[2], key = "subject", value = "score")
knitr::kable(new_dat %>% head())
| 1 |
A |
20 |
| 2 |
A |
20 |
| 3 |
A |
22.5 |
| 4 |
A |
22.5 |
| 5 |
A |
21.25 |
| 6 |
A |
21.25 |
# change subject as factor (e.g., A, A, A, E, E, E,... since read_csv
# doesn't allow duplicated colnames)
new_dat <- new_dat %>% mutate(subject = sapply(1:length(new_dat$subject), function(x) strsplit(new_dat$subject,
"_")[[x]][1]) %>% as.factor())
# change score as numeric
new_dat %<>% mutate(score = as.numeric(score))
knitr::kable(new_dat %>% head())
| 1 |
A |
20.00 |
| 2 |
A |
20.00 |
| 3 |
A |
22.50 |
| 4 |
A |
22.50 |
| 5 |
A |
21.25 |
| 6 |
A |
21.25 |
# sort data by score
sort_dat <- new_dat %>% arrange(subject, stu, desc(score))
# remove NA ? # it is not desired. I am not sure.
sort_dat <- sort_dat[sort_dat$score %>% is.na() %>% not(), ]
knitr::kable(sort_dat %>% head())
| 1 |
A |
25.00 |
| 1 |
A |
25.00 |
| 1 |
A |
23.75 |
| 1 |
A |
22.50 |
| 1 |
A |
20.00 |
| 1 |
A |
20.00 |
final_dat <- sort_dat %>% group_by(stu, subject) %>% mutate(idx = row_number())
knitr::kable(final_dat %>% head())
| 1 |
A |
25.00 |
1 |
| 1 |
A |
25.00 |
2 |
| 1 |
A |
23.75 |
3 |
| 1 |
A |
22.50 |
4 |
| 1 |
A |
20.00 |
5 |
| 1 |
A |
20.00 |
6 |
# filter top 3
output <- final_dat %>% filter(idx <= 3) %>% group_by(stu, subject) %>% summarize(top3_mean_score = mean(score))
knitr::kable(output)
| 1 |
A |
24.58333 |
| 1 |
E |
20.41667 |
| 1 |
R |
24.58333 |
| 2 |
A |
23.75000 |
| 2 |
E |
21.66667 |
| 2 |
R |
22.08333 |
| 3 |
A |
25.00000 |
| 3 |
E |
25.00000 |
| 3 |
R |
25.00000 |
| 4 |
A |
25.00000 |
| 4 |
E |
24.58333 |
| 4 |
R |
24.16667 |
| 5 |
A |
24.16667 |
| 5 |
E |
18.33333 |
| 5 |
R |
16.25000 |
| 6 |
A |
21.66667 |
| 6 |
E |
20.41667 |
| 6 |
R |
22.50000 |
| 7 |
A |
21.66667 |
| 7 |
E |
20.41667 |
| 7 |
R |
20.00000 |
| 8 |
A |
22.08333 |
| 8 |
E |
23.33333 |
| 8 |
R |
21.25000 |
| 9 |
A |
23.33333 |
| 9 |
E |
22.50000 |
| 9 |
R |
20.00000 |
| 10 |
A |
22.08333 |
| 10 |
E |
20.83333 |
| 10 |
R |
22.50000 |