Data
Search for Best Configuration
M1 <- 1
M2 <- 1000000
Xsum <- numeric(0)
Values_mat <- numeric(0)
for(k in M1:M2){
set.seed(k)
N <- nrow(class_roll)
class_roll$group <-
sample(1:N) %%
2 %>%
factor(levels = c(0, 1), labels = c("Red", "Black"))
Xsum <- c(Xsum, red_and_black(class_roll)$Xsum)
Values_mat <- rbind(Values_mat, red_and_black(class_roll)$Values)
}
colnames(Values_mat) <- paste0("X", 1:6)
# Values_mat
# pairs(Values_mat)
cor(Values_mat) %>%
round(4)
## X1 X2 X3 X4 X5 X6
## X1 1.0000 0.0009 0.0353 -0.0027 0.0057 -4e-04
## X2 0.0009 1.0000 -0.0006 0.0000 0.0014 -2e-03
## X3 0.0353 -0.0006 1.0000 -0.0008 0.0053 -5e-04
## X4 -0.0027 0.0000 -0.0008 1.0000 -0.0029 -2e-03
## X5 0.0057 0.0014 0.0053 -0.0029 1.0000 1e-04
## X6 -0.0004 -0.0020 -0.0005 -0.0020 0.0001 1e+00
names(Xsum) <- M1:M2
Xsum %>%
summary %>%
round(2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.75 16.38 20.36 21.03 24.96 67.20
Xsum %>%
sd %>%
round(2)
## [1] 6.49
Xsum %>%
`<=`(6) %>%
which %>%
`[`(Xsum, .) %>%
round(2)
## 872 2832 3337 4099 5107 6991 7338 9831 10980 12184 15650
## 5.45 5.77 5.41 5.88 5.45 5.92 5.96 4.35 5.64 4.87 5.88
## 17357 17776 19485 19855 20177 20361 20778 23621 24079 27357 31520
## 4.58 5.53 5.71 4.90 3.21 5.15 5.33 6.00 5.62 6.00 5.95
## 34490 34968 36427 37132 39741 40971 41478 42450 46976 47397 48006
## 5.66 3.19 5.95 4.60 5.86 4.95 5.65 5.99 5.13 5.39 5.69
## 48363 49392 64430 70277 71340 74234 74386 74757 78449 80843 83023
## 4.81 5.96 5.42 5.53 5.73 5.72 3.90 5.22 5.78 5.69 5.26
## 84851 85469 92774 94529 96544 97580 104923 106185 107943 114379 117930
## 4.98 4.98 4.28 4.03 4.25 5.95 5.55 6.00 5.44 5.69 4.63
## 118851 119021 119812 120164 122623 123641 125252 129757 136162 136251 136806
## 5.34 5.94 4.81 5.91 5.63 4.80 5.95 5.95 4.45 5.56 5.44
## 138177 143054 144347 145180 146752 149156 152316 155299 156627 157627 162366
## 5.58 6.00 5.83 6.00 5.98 5.81 5.60 4.43 5.39 5.34 5.67
## 163116 164400 168700 170097 170454 170850 172237 172878 173635 173760 175247
## 5.52 4.13 5.61 5.51 4.94 5.55 5.59 5.28 5.07 5.72 5.98
## 175405 175524 182101 182236 183768 185446 185583 185595 189492 195561 197896
## 5.45 5.66 5.80 5.43 3.27 4.74 5.35 5.36 5.97 4.81 4.79
## 201797 202160 210417 211182 211289 214729 216467 217767 218384 221544 222233
## 4.14 5.65 5.80 5.96 5.45 5.18 5.04 4.67 5.17 5.16 4.70
## 223963 226603 234998 238381 246047 247252 248582 253184 254786 255352 259006
## 5.82 5.24 5.45 5.31 5.58 4.63 5.90 5.71 5.42 5.71 5.13
## 259014 263307 263956 265065 265502 266001 267875 272308 273171 273813 273908
## 5.88 4.33 5.32 4.91 4.43 5.58 4.39 5.49 4.66 5.21 3.68
## 273916 274795 278739 280451 281040 284097 290847 292723 294056 296624 299209
## 4.87 4.41 5.86 5.85 5.98 5.69 5.20 4.51 5.84 5.39 5.92
## 299756 302977 303106 304927 305884 307156 307518 307784 307937 309217 310131
## 5.13 5.35 5.72 5.89 5.17 5.98 4.63 5.77 5.86 4.38 5.00
## 313231 313270 316366 318981 319184 323455 326786 326824 329013 329850 331312
## 4.60 4.82 5.45 4.87 3.78 4.65 5.89 5.89 5.94 5.55 5.69
## 331466 331773 332138 333421 334115 334255 334582 335605 337470 337900 338229
## 5.75 5.68 5.43 5.92 5.45 5.27 4.62 5.09 5.13 5.83 4.76
## 338468 340091 341130 343133 344317 345547 345629 345941 346043 346549 346679
## 5.50 5.22 4.53 5.57 5.96 5.95 4.63 5.78 5.08 4.59 5.99
## 349961 352207 353314 355512 355680 356101 362990 364968 365357 365529 367531
## 5.69 5.61 5.41 5.60 5.25 5.74 5.94 5.28 5.65 5.63 5.82
## 368010 373002 374121 381493 382216 383344 384165 387474 389129 389895 390357
## 5.34 5.68 5.61 5.97 5.93 5.29 5.82 5.84 5.53 5.87 4.74
## 393628 395484 395486 396368 400795 402586 406335 407675 412310 419938 421425
## 5.99 5.67 4.89 5.63 4.60 5.48 5.55 4.73 5.74 4.65 5.84
## 421827 422395 422805 430966 431744 432935 434658 435161 437083 437353 439996
## 6.00 5.75 5.47 5.41 5.62 5.25 5.24 5.52 5.32 5.99 5.89
## 442899 443350 455078 455385 455875 456551 456718 457873 459132 460448 461386
## 4.99 5.15 5.57 5.85 5.67 5.70 5.98 3.34 5.94 4.60 5.70
## 461549 468022 468324 468963 470268 470299 471429 474691 477594 477845 479695
## 5.77 5.95 5.84 5.68 5.72 4.94 4.41 5.94 4.40 5.57 5.64
## 481849 482627 486825 488389 490635 494911 495985 497044 500214 500327 504363
## 5.52 5.73 5.81 5.50 5.69 4.98 5.12 4.38 5.70 4.78 5.91
## 507290 512428 514707 515567 517580 519111 519192 519816 528537 529905 531184
## 5.99 5.67 4.90 4.10 4.88 5.99 4.63 5.42 5.74 5.93 5.60
## 531786 534141 535141 536794 538231 539396 540413 541481 545800 546278 546878
## 3.94 5.83 4.90 4.72 5.50 4.92 5.93 4.95 5.62 5.30 5.22
## 550118 550605 552621 553967 554219 555188 556648 556763 557210 558151 559578
## 5.07 5.35 4.21 5.97 4.43 5.30 5.24 4.43 5.98 5.99 5.94
## 560522 566611 567938 568606 570569 576823 577034 577080 578267 579974 580178
## 5.17 5.19 4.86 5.33 5.75 5.98 5.80 4.17 5.34 4.74 4.66
## 582808 582824 586832 588176 590482 590554 592793 593120 593370 593768 593958
## 5.61 5.30 5.21 4.61 5.98 5.03 5.34 5.74 4.84 5.86 4.60
## 594294 595331 597540 602936 603854 604006 604124 606301 606865 608169 612948
## 5.86 5.84 5.80 3.98 4.92 5.72 5.64 5.48 5.07 5.54 5.31
## 613557 613903 615374 615545 617099 617495 620936 628634 631057 631307 631924
## 5.30 5.35 4.59 5.62 5.89 5.17 5.98 4.63 5.77 5.30 4.49
## 632293 632419 636242 640856 642456 642663 647252 649274 649985 651062 651737
## 5.07 5.38 5.23 3.24 5.45 5.63 4.42 5.64 5.75 5.51 5.96
## 652197 652301 653095 653372 657307 658051 659194 660344 660694 662232 663811
## 5.52 5.84 5.55 4.59 5.12 5.60 4.48 5.42 5.98 5.72 5.79
## 663953 664021 664090 666973 671256 673033 674207 676405 682022 682694 687244
## 5.15 4.59 5.54 5.78 5.34 5.06 5.45 4.39 4.16 4.58 3.43
## 688015 689554 694834 695500 698162 698684 700293 700436 701024 705238 705269
## 3.86 5.09 5.48 4.84 5.84 5.58 4.27 5.93 4.30 4.58 5.20
## 705399 706031 708714 710846 711972 712865 715364 719059 720498 720904 720974
## 4.66 5.35 5.77 5.72 5.92 5.84 5.09 4.13 5.93 5.48 5.38
## 723372 723632 724527 727544 728625 729656 730239 733813 734623 736848 739674
## 5.93 4.43 5.85 5.73 4.57 4.42 5.96 5.85 5.09 5.61 5.84
## 742654 745698 748655 750919 753871 756540 762466 768824 771323 771700 771912
## 5.75 3.97 5.85 5.05 5.72 4.93 5.99 5.31 5.43 5.96 4.80
## 772876 773393 774147 774777 776107 777682 784297 784825 787227 787684 788800
## 5.75 5.29 5.73 3.35 5.38 5.51 5.12 5.78 5.74 5.98 5.40
## 790141 791451 793458 795484 796189 796451 797757 799091 799733 804988 805279
## 5.67 5.00 4.69 5.61 5.60 5.93 5.77 4.81 5.56 5.36 5.77
## 807844 809013 813791 814966 823945 825913 826120 829031 829469 834599 835735
## 5.58 6.00 5.57 5.71 5.41 5.43 5.17 4.46 5.15 4.82 5.84
## 836183 836825 837673 840518 841536 842912 843403 843874 852789 852793 853050
## 5.71 5.86 5.60 5.56 5.93 4.81 3.75 5.98 5.74 5.99 4.97
## 853825 863283 863493 865133 871225 873764 874585 874890 875550 876622 877032
## 5.53 5.03 4.97 5.71 5.24 5.31 5.89 5.55 5.36 5.10 5.31
## 877578 878552 880252 882613 885030 885350 886024 887148 892615 892708 893038
## 5.43 5.85 5.84 5.95 5.44 5.88 5.28 5.94 5.78 5.80 5.42
## 894232 894243 894542 900326 905375 906111 906256 906961 907059 908105 909362
## 5.24 5.89 5.53 5.95 4.51 5.91 5.35 5.61 5.71 5.61 4.64
## 912241 912676 913148 923657 923755 933173 934015 934037 934141 934602 935300
## 5.65 5.99 5.96 5.54 2.75 5.79 5.91 4.79 4.32 5.83 5.46
## 936218 937856 939056 939661 940337 941294 945693 948136 949897 950876 951099
## 5.96 5.73 4.65 5.66 4.94 5.43 4.78 4.97 4.46 4.81 5.69
## 951365 953396 954699 955882 956864 957281 957936 957944 958242 960199 961473
## 4.49 5.94 5.29 5.21 5.50 5.76 3.19 5.63 5.73 5.70 4.72
## 963039 968216 968449 969822 973184 974114 975099 977702 978200 979105 980882
## 3.77 5.82 4.08 3.90 5.93 5.67 4.27 5.45 4.57 5.38 4.54
## 981643 985401 987618 989618 994125 995337 996117 997224 997317 998571
## 4.43 5.68 5.78 5.82 5.83 5.97 4.97 5.69 5.39 5.59
Xmin <- names(Xsum[which(Xsum == min(Xsum))])
Xmin
## [1] "923755"
Plot
hist(Xsum, prob = TRUE, nclass = 30, xlim = c(0, 50), ylim = c(0, 0.065))
x <- seq(0, 50, by = 0.1)
lines(x, dchisq(x, df = 21), col = "red")
legend("topright", inset = 0.05, legend = c("Xsum", "Chi-square(21)"), col = c("black", "red"), lty = 1)

plot(density(Xsum), xlim = c(0, 50), main = "Density Estimation of Xsum")
lines(x, dchisq(x, df = 21), col = "red")
legend("topright", inset = 0.05, legend = c("Xsum", "Chi-square(21)"), col = c("black", "red"), lty = 1)

Randomization
set.seed(Xmin)
N <- nrow(class_roll)
class_roll$group <-
sample(1:N) %%
2 %>%
factor(levels = c(0, 1), labels = c("Red", "Black"))
red_and_black(class_roll)
## $Values
## [1] 0.84416303 0.31845373 0.07386091 0.04853833 1.42736489 0.04256239
##
## $Xsum
## [1] 2.754943
학번
class_roll$id_2 <-
class_roll$id %>%
ifelse(. <= 2015, "2015", .)
tbl1 <- class_roll %$%
table(.$group, .$id_2 %>% substr(1, 4)) %>%
`colnames<-`(c("2015 이전", 2016:2021))
tbl1 %>%
pander
Red |
18 |
31 |
26 |
53 |
20 |
51 |
109 |
Black |
19 |
27 |
29 |
55 |
18 |
47 |
113 |
X1min <- tbl1 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X1min
## X-squared
## 0.844163
학번 홀짝
tbl2 <- class_roll$id %>%
as.numeric %>%
`%%`(2) %>%
factor(levels = c(1, 0), labels = c("홀", "짝")) %>%
table(class_roll$group, .)
tbl2 %>%
pander
Red |
146 |
162 |
Black |
153 |
155 |
X2min <- tbl2 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X2min
## X-squared
## 0.3184537
학적 상태
tbl3 <- class_roll$status %>%
table(class_roll$group, .)
tbl3 %>%
pander
X3min <- tbl3 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X3min
## X-squared
## 0.07386091
e-mail 서비스업체
tbl4 <- class_roll$email %>%
strsplit("@", fixed = TRUE) %>%
sapply("[", 2) %>%
`==`("naver.com") %>%
ifelse("네이버", "기타서비스") %>%
factor(levels = c("네이버", "기타서비스")) %>%
table(class_roll$group, .)
tbl4 %>%
pander
X4min <- tbl4 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X4min
## X-squared
## 0.04853833
전화번호의 분포
cut_label <- paste(paste0(0:9, "000"), paste0(0:9, "999"),
sep = "~")
tbl5 <- class_roll$cell_no %>%
substr(start = 8, stop = 11) %>%
sapply(as.numeric) %>%
cut(labels = cut_label,
breaks = seq(0, 10000, by = 1000)) %>%
table(class_roll$group, .)
tbl5 %>%
pander
Red |
24 |
30 |
32 |
29 |
25 |
31 |
27 |
37 |
41 |
32 |
Black |
23 |
32 |
36 |
24 |
26 |
33 |
28 |
37 |
35 |
34 |
X5min <- tbl5 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X5min
## X-squared
## 1.427365
성씨 분포
f_name <- class_roll$name %>%
substring(first = 1, last = 1)
tbl6 <- f_name %>%
`%in%`(c("김", "이", "박")) %>%
ifelse(f_name, "기타") %>%
factor(levels = c("김", "이", "박", "기타")) %>%
table(class_roll$group, .)
tbl6 %>%
pander
Red |
64 |
47 |
24 |
173 |
Black |
64 |
48 |
25 |
171 |
X6min <- tbl6 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X6min
## X-squared
## 0.04256239
Sum of Chi_Squares
Xsum_min <- X1min + X2min + X3min + X4min + X5min + X6min
Xsum_min
## X-squared
## 2.754943