Code IQ: Rではじめる統計解析 by Tokyo.R (2nd attempt)

Question (in Japanese):

https://codeiq.jp/ace/aad34210/q62

1.

exam <- read.csv("exam_results.csv", row.names = 1)

2.

five.subjects <- exam[,c("national_language", "math", "english","society", "science")]

range.value <- function(x) diff(range(x))

out2 <- sapply(c(mean = mean,sd = sd,max = max,min = min,range = range.value),
               function(FUN) {
                   sapply(five.subjects, FUN)
               })
out2
                    mean     sd max min range
national_language 48.759 29.000 100   0   100
math              49.729 28.734 100   0   100
english           50.882 29.103 100   0   100
society           49.771 29.286 100   0   100
science           49.672 29.752 100   0   100

3.

library(reshape2)
exam.melt <- melt(exam, id.vars = c("student_no","city"),
                  variable.name = "subject", value.name = "score")

library(plyr)
out3 <- dlply(.data = exam.melt,
              .variables = "subject",
              function(x) {
                  sapply(c(mean = mean,sd = sd,max = max,min = min,range = range.value),
                         function(FUN) {
                             tapply(x$score, x$city, FUN)
                         })
              })
lapply(out3, invisible)
$national_language
    mean     sd max min range
A 47.950 29.984  99   0    99
B 55.581 25.948  97   0    97
C 45.917 28.353  99   1    98
D 52.158 28.257  99   1    98
E 48.737 29.928  96   0    96
F 50.500 34.220  98   1    97
G 47.692 28.168  96   1    95
H 41.263 31.715 100   0   100
I 43.714 28.323 100   1    99
J 48.268 30.435  99   1    98
K 50.688 30.523 100   2    98
L 48.464 30.287 100   0   100
M 49.703 28.117  89   0    89
N 52.619 29.529 100   1    99
O 47.324 30.873  99   0    99
P 46.175 30.391 100   0   100
Q 45.353 28.020 100   0   100
R 54.000 30.018  99   1    98
S 46.919 25.056  99   3    96
T 51.447 26.947  99   9    90
U 53.095 27.274  96   5    91
V 49.344 27.885 100   8    92
W 45.812 30.807 100   0   100
X 42.700 27.884 100   2    98
Y 48.940 29.201  99   1    98
Z 50.053 28.729 100   6    94

$math
    mean     sd max min range
A 52.525 26.549 100   0   100
B 46.395 27.947  99   1    98
C 47.889 29.720  96   2    94
D 41.605 25.954  99   3    96
E 44.421 28.993  91   1    90
F 52.763 27.768  95   2    93
G 50.564 29.816  99   0    99
H 49.158 28.602  98   1    97
I 56.486 27.759  99   0    99
J 54.927 29.720 100   0   100
K 51.396 28.739  96   1    95
L 49.500 30.737  99   4    95
M 54.054 29.080 100   2    98
N 47.952 30.977  99   2    97
O 56.459 30.735  98   1    97
P 42.700 30.283  99   2    97
Q 38.824 25.104  98   6    92
R 50.359 29.594  99   0    99
S 56.595 26.120 100   2    98
T 47.974 26.158  97   1    96
U 46.690 31.227  99   0    99
V 50.656 31.468 100   2    98
W 55.875 26.792  97   8    89
X 52.075 30.114  96   6    90
Y 47.480 28.326 100   4    96
Z 49.316 27.402  97   0    97

$english
    mean     sd max min range
A 51.300 31.894  99   1    98
B 49.907 26.245  98   6    92
C 49.722 27.976  99   3    96
D 55.079 26.660  98   7    91
E 48.421 28.226  97   1    96
F 45.632 29.471 100   4    96
G 55.077 28.225 100   5    95
H 46.763 29.912  95   0    95
I 46.886 31.419  97   0    97
J 57.488 28.642  99   2    97
K 51.750 30.582  97   1    96
L 55.786 29.759  98   1    97
M 45.027 24.438  91   3    88
N 47.786 30.524 100   3    97
O 54.838 30.203  99   0    99
P 57.350 28.911  99   0    99
Q 61.206 26.159  99   0    99
R 52.872 31.168  99   3    96
S 54.676 33.233  99   1    98
T 48.289 29.745 100   0   100
U 43.524 26.275  94   2    92
V 48.969 27.599  97   3    94
W 47.719 29.107  98   7    91
X 50.450 30.691  98   0    98
Y 49.520 29.743 100   2    98
Z 48.553 29.079  97   4    93

$society
    mean     sd max min range
A 45.625 28.337  99   0    99
B 51.116 32.807 100   3    97
C 50.444 31.527  95   3    92
D 49.395 27.679  99   2    97
E 50.579 29.389 100   3    97
F 56.421 27.653  94   3    91
G 45.564 28.092  94   4    90
H 52.447 29.891  96   0    96
I 52.514 30.116 100   0   100
J 47.927 28.376  97   1    96
K 48.062 26.216  99   6    93
L 31.036 24.312  88   1    87
M 52.405 28.265 100   5    95
N 55.333 30.520 100   1    99
O 50.838 31.759  96   2    94
P 54.300 30.515  99   3    96
Q 54.088 30.585  96   0    96
R 53.974 28.160  99   0    99
S 44.000 31.441  99   1    98
T 46.974 27.462  99   3    96
U 48.476 28.133  99   8    91
V 58.906 27.697 100   5    95
W 48.750 29.409  91   0    91
X 41.700 28.437  93   0    93
Y 51.960 30.374 100   3    97
Z 47.895 30.235 100   0   100

$science
    mean     sd max min range
A 48.475 30.079  99   5    94
B 46.279 29.618  96   1    95
C 52.167 31.656 100   0   100
D 56.579 31.007  98   1    97
E 48.947 34.554  99   1    98
F 48.158 29.138 100   1    99
G 49.641 31.792  96   0    96
H 54.763 29.730 100   2    98
I 40.543 28.015 100   1    99
J 51.268 28.183  95   0    95
K 48.229 28.956  98   0    98
L 53.214 28.919 100   0   100
M 44.054 27.818  97   1    96
N 52.024 32.153  99   0    99
O 58.568 27.843  99   0    99
P 46.225 30.783  98   0    98
Q 45.588 29.763  98   4    94
R 52.077 29.318 100   1    99
S 52.189 29.453  98   0    98
T 48.816 28.425  96   2    94
U 41.976 28.128  99   0    99
V 42.375 31.424  98   0    98
W 59.844 30.416 100   3    97
X 55.250 25.295  98   0    98
Y 43.880 29.198  98   0    98
Z 53.842 30.168  99   3    96

4.

library(ggplot2)

out2.df <- data.frame(out2)
out2.df$subject <- factor(rownames(out2.df), levels = c("national_language", "math", "english","society", "science"))

ggplot(out2.df, aes(y = mean, ymax = max, ymin = min, x = subject, color = subject)) +
    geom_pointrange() +
    labs(title = "mean, max, and min scores") + ylab("score")

plot of chunk unnamed-chunk-5

5.

read.and.summarize <- function(year = "") {

    exam <- read.csv(paste("exam_results", year ,".csv", sep = ""))

    five.subjects <- exam[,c("national_language", "math", "english","society", "science")]

    range.value <- function(x) diff(range(x))

    out <- sapply(c(mean = mean,sd = sd,max = max,min = min,range = range.value),
                   function(FUN) {
                       sapply(five.subjects, FUN)
                   })
    out
}

## give year as a string
## read.and.summarize()