Data

Search for Best Configuration

M1 <- 1
M2 <- 1000000
Xsum <- numeric(0)
Values_mat <- numeric(0)
for(k in M1:M2){
  set.seed(k)
  N <- nrow(class_roll) 
  class_roll$group <- 
    sample(1:N) %%
    2 %>%
    factor(levels = c(0, 1), labels = c("Red", "Black"))
  Xsum <- c(Xsum, red_and_black(class_roll)$Xsum)
  Values_mat <- rbind(Values_mat, red_and_black(class_roll)$Values)
}
colnames(Values_mat) <- paste0("X", 1:6)
# Values_mat
# pairs(Values_mat)
cor(Values_mat) %>%
  round(4)
##         X1      X2      X3      X4      X5     X6
## X1  1.0000  0.0009  0.0353 -0.0027  0.0057 -4e-04
## X2  0.0009  1.0000 -0.0006  0.0000  0.0014 -2e-03
## X3  0.0353 -0.0006  1.0000 -0.0008  0.0053 -5e-04
## X4 -0.0027  0.0000 -0.0008  1.0000 -0.0029 -2e-03
## X5  0.0057  0.0014  0.0053 -0.0029  1.0000  1e-04
## X6 -0.0004 -0.0020 -0.0005 -0.0020  0.0001  1e+00
names(Xsum) <- M1:M2
Xsum %>%
  summary %>%
  round(2) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.75   16.38   20.36   21.03   24.96   67.20
Xsum %>%
  sd %>%
  round(2)
## [1] 6.49
Xsum %>%
  `<=`(6) %>%
  which %>%
  `[`(Xsum, .) %>%
  round(2)
##    872   2832   3337   4099   5107   6991   7338   9831  10980  12184  15650 
##   5.45   5.77   5.41   5.88   5.45   5.92   5.96   4.35   5.64   4.87   5.88 
##  17357  17776  19485  19855  20177  20361  20778  23621  24079  27357  31520 
##   4.58   5.53   5.71   4.90   3.21   5.15   5.33   6.00   5.62   6.00   5.95 
##  34490  34968  36427  37132  39741  40971  41478  42450  46976  47397  48006 
##   5.66   3.19   5.95   4.60   5.86   4.95   5.65   5.99   5.13   5.39   5.69 
##  48363  49392  64430  70277  71340  74234  74386  74757  78449  80843  83023 
##   4.81   5.96   5.42   5.53   5.73   5.72   3.90   5.22   5.78   5.69   5.26 
##  84851  85469  92774  94529  96544  97580 104923 106185 107943 114379 117930 
##   4.98   4.98   4.28   4.03   4.25   5.95   5.55   6.00   5.44   5.69   4.63 
## 118851 119021 119812 120164 122623 123641 125252 129757 136162 136251 136806 
##   5.34   5.94   4.81   5.91   5.63   4.80   5.95   5.95   4.45   5.56   5.44 
## 138177 143054 144347 145180 146752 149156 152316 155299 156627 157627 162366 
##   5.58   6.00   5.83   6.00   5.98   5.81   5.60   4.43   5.39   5.34   5.67 
## 163116 164400 168700 170097 170454 170850 172237 172878 173635 173760 175247 
##   5.52   4.13   5.61   5.51   4.94   5.55   5.59   5.28   5.07   5.72   5.98 
## 175405 175524 182101 182236 183768 185446 185583 185595 189492 195561 197896 
##   5.45   5.66   5.80   5.43   3.27   4.74   5.35   5.36   5.97   4.81   4.79 
## 201797 202160 210417 211182 211289 214729 216467 217767 218384 221544 222233 
##   4.14   5.65   5.80   5.96   5.45   5.18   5.04   4.67   5.17   5.16   4.70 
## 223963 226603 234998 238381 246047 247252 248582 253184 254786 255352 259006 
##   5.82   5.24   5.45   5.31   5.58   4.63   5.90   5.71   5.42   5.71   5.13 
## 259014 263307 263956 265065 265502 266001 267875 272308 273171 273813 273908 
##   5.88   4.33   5.32   4.91   4.43   5.58   4.39   5.49   4.66   5.21   3.68 
## 273916 274795 278739 280451 281040 284097 290847 292723 294056 296624 299209 
##   4.87   4.41   5.86   5.85   5.98   5.69   5.20   4.51   5.84   5.39   5.92 
## 299756 302977 303106 304927 305884 307156 307518 307784 307937 309217 310131 
##   5.13   5.35   5.72   5.89   5.17   5.98   4.63   5.77   5.86   4.38   5.00 
## 313231 313270 316366 318981 319184 323455 326786 326824 329013 329850 331312 
##   4.60   4.82   5.45   4.87   3.78   4.65   5.89   5.89   5.94   5.55   5.69 
## 331466 331773 332138 333421 334115 334255 334582 335605 337470 337900 338229 
##   5.75   5.68   5.43   5.92   5.45   5.27   4.62   5.09   5.13   5.83   4.76 
## 338468 340091 341130 343133 344317 345547 345629 345941 346043 346549 346679 
##   5.50   5.22   4.53   5.57   5.96   5.95   4.63   5.78   5.08   4.59   5.99 
## 349961 352207 353314 355512 355680 356101 362990 364968 365357 365529 367531 
##   5.69   5.61   5.41   5.60   5.25   5.74   5.94   5.28   5.65   5.63   5.82 
## 368010 373002 374121 381493 382216 383344 384165 387474 389129 389895 390357 
##   5.34   5.68   5.61   5.97   5.93   5.29   5.82   5.84   5.53   5.87   4.74 
## 393628 395484 395486 396368 400795 402586 406335 407675 412310 419938 421425 
##   5.99   5.67   4.89   5.63   4.60   5.48   5.55   4.73   5.74   4.65   5.84 
## 421827 422395 422805 430966 431744 432935 434658 435161 437083 437353 439996 
##   6.00   5.75   5.47   5.41   5.62   5.25   5.24   5.52   5.32   5.99   5.89 
## 442899 443350 455078 455385 455875 456551 456718 457873 459132 460448 461386 
##   4.99   5.15   5.57   5.85   5.67   5.70   5.98   3.34   5.94   4.60   5.70 
## 461549 468022 468324 468963 470268 470299 471429 474691 477594 477845 479695 
##   5.77   5.95   5.84   5.68   5.72   4.94   4.41   5.94   4.40   5.57   5.64 
## 481849 482627 486825 488389 490635 494911 495985 497044 500214 500327 504363 
##   5.52   5.73   5.81   5.50   5.69   4.98   5.12   4.38   5.70   4.78   5.91 
## 507290 512428 514707 515567 517580 519111 519192 519816 528537 529905 531184 
##   5.99   5.67   4.90   4.10   4.88   5.99   4.63   5.42   5.74   5.93   5.60 
## 531786 534141 535141 536794 538231 539396 540413 541481 545800 546278 546878 
##   3.94   5.83   4.90   4.72   5.50   4.92   5.93   4.95   5.62   5.30   5.22 
## 550118 550605 552621 553967 554219 555188 556648 556763 557210 558151 559578 
##   5.07   5.35   4.21   5.97   4.43   5.30   5.24   4.43   5.98   5.99   5.94 
## 560522 566611 567938 568606 570569 576823 577034 577080 578267 579974 580178 
##   5.17   5.19   4.86   5.33   5.75   5.98   5.80   4.17   5.34   4.74   4.66 
## 582808 582824 586832 588176 590482 590554 592793 593120 593370 593768 593958 
##   5.61   5.30   5.21   4.61   5.98   5.03   5.34   5.74   4.84   5.86   4.60 
## 594294 595331 597540 602936 603854 604006 604124 606301 606865 608169 612948 
##   5.86   5.84   5.80   3.98   4.92   5.72   5.64   5.48   5.07   5.54   5.31 
## 613557 613903 615374 615545 617099 617495 620936 628634 631057 631307 631924 
##   5.30   5.35   4.59   5.62   5.89   5.17   5.98   4.63   5.77   5.30   4.49 
## 632293 632419 636242 640856 642456 642663 647252 649274 649985 651062 651737 
##   5.07   5.38   5.23   3.24   5.45   5.63   4.42   5.64   5.75   5.51   5.96 
## 652197 652301 653095 653372 657307 658051 659194 660344 660694 662232 663811 
##   5.52   5.84   5.55   4.59   5.12   5.60   4.48   5.42   5.98   5.72   5.79 
## 663953 664021 664090 666973 671256 673033 674207 676405 682022 682694 687244 
##   5.15   4.59   5.54   5.78   5.34   5.06   5.45   4.39   4.16   4.58   3.43 
## 688015 689554 694834 695500 698162 698684 700293 700436 701024 705238 705269 
##   3.86   5.09   5.48   4.84   5.84   5.58   4.27   5.93   4.30   4.58   5.20 
## 705399 706031 708714 710846 711972 712865 715364 719059 720498 720904 720974 
##   4.66   5.35   5.77   5.72   5.92   5.84   5.09   4.13   5.93   5.48   5.38 
## 723372 723632 724527 727544 728625 729656 730239 733813 734623 736848 739674 
##   5.93   4.43   5.85   5.73   4.57   4.42   5.96   5.85   5.09   5.61   5.84 
## 742654 745698 748655 750919 753871 756540 762466 768824 771323 771700 771912 
##   5.75   3.97   5.85   5.05   5.72   4.93   5.99   5.31   5.43   5.96   4.80 
## 772876 773393 774147 774777 776107 777682 784297 784825 787227 787684 788800 
##   5.75   5.29   5.73   3.35   5.38   5.51   5.12   5.78   5.74   5.98   5.40 
## 790141 791451 793458 795484 796189 796451 797757 799091 799733 804988 805279 
##   5.67   5.00   4.69   5.61   5.60   5.93   5.77   4.81   5.56   5.36   5.77 
## 807844 809013 813791 814966 823945 825913 826120 829031 829469 834599 835735 
##   5.58   6.00   5.57   5.71   5.41   5.43   5.17   4.46   5.15   4.82   5.84 
## 836183 836825 837673 840518 841536 842912 843403 843874 852789 852793 853050 
##   5.71   5.86   5.60   5.56   5.93   4.81   3.75   5.98   5.74   5.99   4.97 
## 853825 863283 863493 865133 871225 873764 874585 874890 875550 876622 877032 
##   5.53   5.03   4.97   5.71   5.24   5.31   5.89   5.55   5.36   5.10   5.31 
## 877578 878552 880252 882613 885030 885350 886024 887148 892615 892708 893038 
##   5.43   5.85   5.84   5.95   5.44   5.88   5.28   5.94   5.78   5.80   5.42 
## 894232 894243 894542 900326 905375 906111 906256 906961 907059 908105 909362 
##   5.24   5.89   5.53   5.95   4.51   5.91   5.35   5.61   5.71   5.61   4.64 
## 912241 912676 913148 923657 923755 933173 934015 934037 934141 934602 935300 
##   5.65   5.99   5.96   5.54   2.75   5.79   5.91   4.79   4.32   5.83   5.46 
## 936218 937856 939056 939661 940337 941294 945693 948136 949897 950876 951099 
##   5.96   5.73   4.65   5.66   4.94   5.43   4.78   4.97   4.46   4.81   5.69 
## 951365 953396 954699 955882 956864 957281 957936 957944 958242 960199 961473 
##   4.49   5.94   5.29   5.21   5.50   5.76   3.19   5.63   5.73   5.70   4.72 
## 963039 968216 968449 969822 973184 974114 975099 977702 978200 979105 980882 
##   3.77   5.82   4.08   3.90   5.93   5.67   4.27   5.45   4.57   5.38   4.54 
## 981643 985401 987618 989618 994125 995337 996117 997224 997317 998571 
##   4.43   5.68   5.78   5.82   5.83   5.97   4.97   5.69   5.39   5.59
Xmin <- names(Xsum[which(Xsum == min(Xsum))])
Xmin
## [1] "923755"

Plot

hist(Xsum, prob = TRUE, nclass = 30, xlim = c(0, 50), ylim = c(0, 0.065))
x <- seq(0, 50, by = 0.1)
lines(x, dchisq(x, df = 21), col = "red")
legend("topright", inset = 0.05, legend = c("Xsum", "Chi-square(21)"), col = c("black", "red"), lty = 1)

plot(density(Xsum), xlim = c(0, 50), main = "Density Estimation of Xsum")
lines(x, dchisq(x, df = 21), col = "red")
legend("topright", inset = 0.05, legend = c("Xsum", "Chi-square(21)"), col = c("black", "red"), lty = 1)

Randomization

set.seed(Xmin)
N <- nrow(class_roll) 
class_roll$group <- 
  sample(1:N) %%
  2 %>%
  factor(levels = c(0, 1), labels = c("Red", "Black"))
red_and_black(class_roll)
## $Values
## [1] 0.84416303 0.31845373 0.07386091 0.04853833 1.42736489 0.04256239
## 
## $Xsum
## [1] 2.754943

학번

class_roll$id_2 <-
  class_roll$id %>%
  ifelse(. <= 2015, "2015", .)
tbl1 <- class_roll %$%
  table(.$group, .$id_2 %>% substr(1, 4)) %>%
  `colnames<-`(c("2015 이전", 2016:2021)) 
tbl1 %>%
  pander
  2015 이전 2016 2017 2018 2019 2020 2021
Red 18 31 26 53 20 51 109
Black 19 27 29 55 18 47 113
X1min <- tbl1 %>%
  chisq.test(simulate.p.value = TRUE) %>%
  `[[`(1)
X1min
## X-squared 
##  0.844163

학번 홀짝

tbl2 <- class_roll$id %>%
  as.numeric %>%
  `%%`(2) %>%
  factor(levels = c(1, 0), labels = c("홀", "짝")) %>%
  table(class_roll$group, .) 
tbl2 %>%
  pander
 
Red 146 162
Black 153 155
X2min <- tbl2 %>%
  chisq.test(simulate.p.value = TRUE) %>%
  `[[`(1)
X2min
## X-squared 
## 0.3184537

학적 상태

tbl3 <- class_roll$status %>%
  table(class_roll$group, .) 
tbl3 %>%
  pander
  학생 휴학
Red 279 29
Black 277 31
X3min <- tbl3 %>%
  chisq.test(simulate.p.value = TRUE) %>%
  `[[`(1)
X3min
##  X-squared 
## 0.07386091

e-mail 서비스업체

tbl4 <- class_roll$email %>%
  strsplit("@", fixed = TRUE) %>%
  sapply("[", 2) %>%
  `==`("naver.com") %>%
  ifelse("네이버", "기타서비스") %>%
  factor(levels = c("네이버", "기타서비스")) %>%
  table(class_roll$group, .) 
tbl4 %>%
  pander
  네이버 기타서비스
Red 258 50
Black 260 48
X4min <- tbl4 %>%
  chisq.test(simulate.p.value = TRUE) %>%
  `[[`(1)
X4min
##  X-squared 
## 0.04853833

전화번호의 분포

cut_label <- paste(paste0(0:9, "000"), paste0(0:9, "999"), 
                   sep = "~")
tbl5 <- class_roll$cell_no %>%
  substr(start = 8, stop = 11) %>%
  sapply(as.numeric) %>%
  cut(labels = cut_label, 
      breaks = seq(0, 10000, by = 1000)) %>%
  table(class_roll$group, .) 
tbl5 %>%
  pander
  0000~0999 1000~1999 2000~2999 3000~3999 4000~4999 5000~5999 6000~6999 7000~7999 8000~8999 9000~9999
Red 24 30 32 29 25 31 27 37 41 32
Black 23 32 36 24 26 33 28 37 35 34
X5min <- tbl5 %>%
  chisq.test(simulate.p.value = TRUE) %>%
  `[[`(1)
X5min
## X-squared 
##  1.427365

성씨 분포

f_name <- class_roll$name %>%
  substring(first = 1, last = 1) 
tbl6 <- f_name %>%
  `%in%`(c("김", "이", "박")) %>%
  ifelse(f_name, "기타") %>%
  factor(levels = c("김", "이", "박", "기타")) %>%
  table(class_roll$group, .) 
tbl6 %>%
  pander
  기타
Red 64 47 24 173
Black 64 48 25 171
X6min <- tbl6 %>%
  chisq.test(simulate.p.value = TRUE) %>%
  `[[`(1)
X6min
##  X-squared 
## 0.04256239

Sum of Chi_Squares

Xsum_min <- X1min + X2min + X3min + X4min + X5min + X6min
Xsum_min
## X-squared 
##  2.754943