Data
Search for Best Configuration
M1 <- 6000001
M2 <- 7000000
Xsum <- sapply(M1:M2, red_and_black)
names(Xsum) <- M1:M2
Xsum %>%
summary %>%
round(2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.94 16.36 20.36 21.03 24.98 66.80
Xsum %>%
sd %>%
round(2)
## [1] 6.5
Xsum %>%
`<=`(5) %>%
which %>%
`[`(Xsum, .) %>%
round(2)
## 6001314 6003188 6015041 6044584 6047857 6052567 6061663 6062903 6066215 6071182
## 4.27 4.95 4.59 4.43 4.26 4.51 4.27 4.67 4.07 3.62
## 6080564 6091575 6092144 6104876 6112351 6117966 6118713 6127742 6128067 6130231
## 4.95 4.85 4.94 4.96 4.94 3.01 4.08 4.76 4.57 4.91
## 6134976 6137865 6142178 6145637 6152527 6154813 6159876 6166688 6175296 6204692
## 4.14 4.74 4.64 3.88 4.97 4.74 4.65 4.73 4.85 4.92
## 6204847 6208675 6218670 6220020 6223526 6231485 6232695 6244760 6251672 6265867
## 4.56 4.84 4.94 4.67 4.11 4.93 4.96 4.96 4.12 4.84
## 6270946 6276347 6281013 6284939 6288274 6291333 6300203 6301798 6304377 6314112
## 4.97 4.79 4.64 4.73 4.65 4.57 3.33 4.96 4.42 3.47
## 6321218 6345157 6364749 6381020 6386216 6406986 6412035 6414954 6415879 6419752
## 4.99 4.33 4.70 4.98 4.38 4.85 3.42 4.99 4.97 4.60
## 6425830 6426053 6436529 6447190 6495569 6504032 6527706 6528341 6529613 6532852
## 4.29 4.67 4.69 4.70 4.35 3.39 4.60 3.17 4.91 4.40
## 6533253 6534526 6535932 6541274 6551107 6556068 6557320 6557365 6562253 6573601
## 4.82 4.19 4.19 5.00 4.76 4.64 4.33 4.75 4.77 4.70
## 6575193 6575909 6578220 6579968 6581972 6587225 6592552 6611006 6614563 6618686
## 4.38 4.35 4.13 4.64 4.16 2.94 4.77 4.83 4.96 4.62
## 6631168 6633624 6642924 6643452 6662807 6664536 6677421 6688302 6698035 6703998
## 4.97 4.03 4.82 4.75 4.73 4.76 4.65 4.61 4.95 4.62
## 6713496 6728370 6729717 6736067 6739154 6744817 6745073 6745157 6766924 6768064
## 4.79 4.88 5.00 4.06 4.70 4.60 4.77 4.69 4.43 4.90
## 6768787 6775365 6784802 6794919 6808448 6812467 6831091 6833245 6843605 6863214
## 4.91 4.79 4.32 4.93 4.26 4.62 4.76 4.02 4.10 4.66
## 6886558 6903446 6923137 6926136 6926640 6928369 6937487 6941244 6959961 6977797
## 4.41 4.79 3.37 4.71 4.65 4.84 4.47 4.65 3.90 4.82
## 6979643 6979766 6991091 6991820
## 4.92 4.07 4.91 3.47
Xmin <- names(Xsum[which(Xsum == min(Xsum))])
Xmin
## [1] "6587225"
Plot
hist(Xsum, prob = TRUE, nclass = 30, xlim = c(0, 50), ylim = c(0, 0.065))
x <- seq(0, 50, by = 0.1)
lines(x, dchisq(x, df = 21), col = "red")
legend("topright", inset = 0.05, legend = c("Xsum", "Chi-square(21)"), col = c("black", "red"), lty = 1)

plot(density(Xsum), xlim = c(0, 50), ylim = c(0, 0.065), main = "Density Estimation of Xsum")
lines(x, dchisq(x, df = 21), col = "red")
legend("topright", inset = 0.05, legend = c("Xsum", "Chi-square(21)"), col = c("black", "red"), lty = 1)

Randomization
set.seed(Xmin)
N <- nrow(class_roll)
class_roll$group <-
sample(1:N) %%
2 %>%
factor(levels = c(0, 1), labels = c("Red", "Black"))
red_and_black(Xmin)
## [1] 2.935213
학번
class_roll$id_2 <-
class_roll$id %>%
ifelse(. <= 2016, "2016", .)
tbl1 <- class_roll %$%
table(.$group, .$id_2 %>% substr(1, 4)) %>%
`colnames<-`(c("2016 이전", 2017:2022))
tbl1 %>%
pander
Red |
14 |
26 |
59 |
64 |
44 |
71 |
228 |
Black |
16 |
32 |
53 |
67 |
40 |
65 |
234 |
X1min <- tbl1 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X1min
## X-squared
## 1.676272
학번 홀짝
tbl2 <- class_roll$id %>%
as.numeric %>%
`%%`(2) %>%
factor(levels = c(1, 0), labels = c("홀", "짝")) %>%
table(class_roll$group, .)
tbl2 %>%
pander
Red |
265 |
241 |
Black |
273 |
234 |
X2min <- tbl2 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X2min
## X-squared
## 0.2211301
학적 상태
tbl3 <- class_roll$status %>%
table(class_roll$group, .)
tbl3 %>%
pander
X3min <- tbl3 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X3min
## X-squared
## 8.925952e-05
e-mail 서비스업체
tbl4 <- class_roll$email %>%
strsplit("@", fixed = TRUE) %>%
sapply("[", 2) %>%
`==`("naver.com") %>%
ifelse("네이버", "기타서비스") %>%
factor(levels = c("네이버", "기타서비스")) %>%
table(class_roll$group, .)
tbl4 %>%
pander
X4min <- tbl4 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X4min
## X-squared
## 0.4808161
전화번호의 분포
cut_label <- paste(paste0(0:9, "000"), paste0(0:9, "999"),
sep = "~")
tbl5 <- class_roll$cell_no %>%
substr(start = 8, stop = 11) %>%
sapply(as.numeric) %>%
cut(labels = cut_label,
breaks = seq(0, 10000, by = 1000)) %>%
table(class_roll$group, .)
tbl5 %>%
pander
Red |
47 |
45 |
44 |
50 |
58 |
52 |
59 |
58 |
43 |
50 |
Black |
46 |
47 |
48 |
50 |
59 |
53 |
57 |
56 |
42 |
49 |
X5min <- tbl5 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X5min
## X-squared
## 0.3366642
성씨 분포
f_name <- class_roll$name %>%
substring(first = 1, last = 1)
tbl6 <- f_name %>%
`%in%`(c("김", "이", "박")) %>%
ifelse(f_name, "기타") %>%
factor(levels = c("김", "이", "박", "기타")) %>%
table(class_roll$group, .)
tbl6 %>%
pander
Red |
117 |
71 |
37 |
281 |
Black |
117 |
71 |
41 |
278 |
X6min <- tbl6 %>%
chisq.test(simulate.p.value = TRUE) %>%
`[[`(1)
X6min
## X-squared
## 0.2202414
Sum of Chi_Squares
Xsum_min <- X1min + X2min + X3min + X4min + X5min + X6min
Xsum_min
## X-squared
## 2.935213