library(ggplot2)
library(reshape2)
library(scales)
library(xtable)
library(bayesm)
以下の資料のデータを参考にサンプルデータを生成する
http://eprints.lib.hokudai.ac.jp/dspace/bitstream/2115/39717/2/kubo2009IEICE.pdf
batting_average_data <- read.csv("batting_average.csv",
header = T)
batting_average_data$batter <- as.factor(batting_average_data$batter)
真の打率から、サンプルデータを作成
set.seed(5)
batting_average_raw_data <- do.call(rbind, lapply(1:nrow(batting_average_data),
function(i) {
d <- batting_average_data[i, ]
m <- d$batting_average_true
n <- d$bats
y <- rbinom(n, 1, m)
data.frame(id = d$batter, hit = y, x = 1)
}))
batting_average_data2 <- dcast(batting_average_raw_data,
id ~ "batting_average", function(x) {
sum(x)/length(x)
}, value.var = "hit")
batting_average_data2$batting_average_true <- batting_average_data$batting_average_true
print.xtable(xtable::xtable(batting_average_data2),
type = "html")
| id | batting_average | batting_average_true | |
|---|---|---|---|
| 1 | 1 | 0.23 | 0.27 |
| 2 | 2 | 0.43 | 0.28 |
| 3 | 3 | 0.26 | 0.28 |
| 4 | 4 | 0.26 | 0.29 |
| 5 | 5 | 0.30 | 0.29 |
| 6 | 6 | 0.33 | 0.29 |
| 7 | 7 | 0.25 | 0.29 |
| 8 | 8 | 0.20 | 0.29 |
| 9 | 9 | 0.40 | 0.30 |
| 10 | 10 | 0.33 | 0.30 |
| 11 | 11 | 0.30 | 0.30 |
| 12 | 12 | 0.22 | 0.30 |
| 13 | 13 | 0.31 | 0.30 |
| 14 | 14 | 0.39 | 0.31 |
| 15 | 15 | 0.32 | 0.31 |
| 16 | 16 | 0.28 | 0.31 |
| 17 | 17 | 0.35 | 0.31 |
| 18 | 18 | 0.24 | 0.32 |
| 19 | 19 | 0.31 | 0.32 |
| 20 | 20 | 0.39 | 0.33 |
ggplot(batting_average_data2, aes(x = batting_average_true,
y = batting_average, col = id)) + geom_point() + geom_abline() +
xlab("真の打率") + ylab("割り算推定打率") +
ylim(0.15, 0.45) + ggtitle("割り算推定打率と真の打率")
id.list <- unique(batting_average_raw_data$id)
lgtdata <- lapply(id.list, function(id) {
d <- batting_average_raw_data[as.numeric(batting_average_raw_data$id) ==
as.numeric(id), ]
list(X = matrix(d$x, nrow = nrow(d), ncol = 1), y = d$hit,
beta = matrix(1, nrow = 1, ncol = nrow(d)))
})
R <- 10000
set.seed(5)
out <- rhierBinLogit(Data = list(lgtdata = lgtdata),
Mcmc = list(R = R))
beta <- rowMeans(out$betadraw[, , seq(9000, 10000,
10)])
batting_average_data2$rhierBinLogitResult <- exp(beta)/(1 +
exp(beta))
ggplot(batting_average_data2, aes(x = batting_average_true,
y = rhierBinLogitResult, col = id)) + geom_point() +
geom_abline() + xlab("真の打率") + ylab("階層ベイズモデル推定打率") +
ylim(0.15, 0.45) + ggtitle("階層ベイズモデル推定打率と真の打率")
batting_average_data.melt <- melt(batting_average_data2,
id.vars = c("batting_average_true"), measure.vars = c("batting_average",
"rhierBinLogitResult"))
ggplot(batting_average_data.melt, aes(x = batting_average_true,
y = value, col = variable)) + geom_point() + geom_abline() +
xlab("真の打率") + ylab("推定打率") + ylim(0.15,
0.45) + ggtitle("推定打率と真の打率")