東大の過去5年の男女別の志願者数と合格者数。
(Source: http://www.mext.go.jp/…/afield…/2018/09/10/1409128_002_1.pdf)
a=matrix(c(314,70,384,84,16,100,27,23,319,64,383,80,21,101,25,33,320,68,388,86,14,100,27,21,331,60,391,84,16,100,25,27,324,69,393,83,17,100,26,25,327,69,396,84,17,101),8,)
## Warning in matrix(c(314, 70, 384, 84, 16, 100, 27, 23, 319, 64, 383, 80, :
## データ長 [46] が行数 [8] を整数で割った、もしくは掛けた値ではありません
dat=t(a)
dat=dat[,c(1,2,4,5)]
row.names(dat) = c("H30","H29","H28","H27","H26","H25")
colnames(dat) = c("M_cand","F_cand","M_suc","F_suc")
clude_dat = apply(dat,sum,MARGIN = 2)
clude_mat=matrix(clude_dat,2,2)
# フィッシャーの正確検定で合格率の有意差を検討
fisher.test(clude_mat)
##
## Fisher's Exact Test for Count Data
##
## data: clude_mat
## p-value = 0.9033
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.7595872 1.2443186
## sample estimates:
## odds ratio
## 0.9752324
シミュレーション関数を定義
sim(): simulation 関数 変更可能パラメターは以下の通り
-n:男女各人口 -M_sample_p: 男性の大学進学率(受験のフィールドにあがる割合)
-F_sample_p: 女性の大学進学率(受験のフィールドにあがる割合)
-M_p: 男性が東大への出願を決定する閾値(受験人口中のパーセンタイルで指定)
-F_p: 女性が東大への出願を決定する閾値(受験人口中のパーセンタイルで指定)
-top: 合格定員
戻り値
-Fem_P: 女性の入学者割合
-suc_P: 男女の合格者割合
sim_main:シミュレーションの実行関数
戻り値: 全シミュレーション中の男性合格率/女性合格率 が1以上の割合
sim <- function(n=2000,M_sample_p = 0.556,F_sample_p =0.482, M_p = 0.75,F_p=0.80, top = 50){
Male <- rnorm(n = n,mean = 50,sd=25)
Fem <- rnorm(n = n,mean = 50,sd=25)
Male <- sample(Male,size = n*M_sample_p)
Fem_sam <- sample(Fem,size = n*F_sample_p)
pop = c(Male,Fem_sam)
QM=quantile(pop,probs = M_p)
QF_S= quantile(pop,probs = F_p)
M_U_QM= Male[Male>=QM]
F_U_QFS= Fem_sam[Fem_sam>=QF_S]
M_df = data.frame(val = M_U_QM)
M_df$sex <- "M"
F_df = data.frame(val = F_U_QFS)
F_df$sex <- "F"
library(dplyr)
df <- rbind.data.frame(M_df,F_df)
df <- df %>%
dplyr::arrange(order = desc(val))
df$suc <- 0
df$suc[1:top] <- 1
test <- fisher.test(table(df$sex,df$suc))
F_suc_p=prop.table(table(df$sex,df$suc),margin = 1)[1,2]
M_suc_p=prop.table(table(df$sex,df$suc),margin = 1)[2,2]
df=df[1:top,]
tab=table(df[,"sex"])
return(list(Fem_P= tab[1]/(tab[2]+tab[1]),
Suc_P=c(M_suc_p,F_suc_p)))
}
sim_main<- function(B = 2000,n=20000,M_sample_p = 0.556,F_sample_p =0.482, M_p = 0.75,F_p=0.80, top = 100){
library(doParallel)
cl <- makeCluster(detectCores())
registerDoParallel(cl)
vec_2<- foreach(i = 1:B, .combine=c, .export = "sim") %dopar% {
res=sim(n=n,M_sample_p=M_sample_p,F_sample_p=F_sample_p, M_p = M_p, F_p=F_p ,top= top)
res[[2]][1]/res[[2]][2]
}
stopCluster(cl)
# for(i in 1:B){
# res=sim(n=n,M_sample_p=M_sample_p,F_sample_p=F_sample_p, M_p = M_p, F_p=F_p ,top= top)
# vec_1[i]=res[[1]][1]
# vec_2[i]=res[[2]][1]/res[[2]][2]
# }
#hist(vec_1,breaks = 15)
#hist(vec_2,breaks = 30,xlim=c(0,1.5))
return(prop.table(table(vec_2 >=1.00)))
}
sim_main(M_sample_p = 0.556,F_sample_p =0.482,top= 100)
## Warning: package 'doParallel' was built under R version 3.5.3
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
##
## FALSE TRUE
## 0.8595 0.1405
sim_main(M_sample_p = 0.556,F_sample_p =0.556,top= 100)
##
## FALSE TRUE
## 0.8655 0.1345
sim_main(M_sample_p = 0.556,F_sample_p =0.600,top= 100)
##
## FALSE TRUE
## 0.8745 0.1255
#
sim_main(M_p=0.75,F_p=0.75,top= 100)
##
## FALSE TRUE
## 0.4955 0.5045
sim_main(M_p=0.75,F_p=0.80,top= 100)
##
## FALSE TRUE
## 0.876 0.124
sim_main(M_p=0.75,F_p=0.85,top= 100)
##
## FALSE TRUE
## 0.995 0.005
pnorm(q=70,mean=50,sd =10) = 0.9963189
pnorm(q=76.8,mean=50,sd =10) = 0.9963189
sim_main(n=550000,M_p=pnorm(q=75,mean=50,sd =10),F_p=pnorm(q=75,mean=50,sd =10),top= 100)
##
## FALSE TRUE
## 0.4945 0.5055
sim_main(n=550000,M_p=pnorm(q=75,mean=50,sd =10),F_p=pnorm(q=76,mean=50,sd =10),top= 100)
##
## FALSE TRUE
## 0.9295 0.0705
sim_main(n=550000,M_p=pnorm(q=75,mean=50,sd =10),F_p=pnorm(q=77,mean=50,sd =10),top= 100)
##
## FALSE TRUE
## 0.999 0.001
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Japanese_Japan.932 LC_CTYPE=Japanese_Japan.932
## [3] LC_MONETARY=Japanese_Japan.932 LC_NUMERIC=C
## [5] LC_TIME=Japanese_Japan.932
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] doParallel_1.0.14 iterators_1.0.10 foreach_1.4.4
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.18 codetools_0.2-15 digest_0.6.18 rprojroot_1.3-2
## [5] backports_1.1.3 magrittr_1.5 evaluate_0.11 stringi_1.2.4
## [9] rmarkdown_1.10 tools_3.5.1 stringr_1.3.1 yaml_2.2.0
## [13] compiler_3.5.1 htmltools_0.3.6 knitr_1.20