library("dplyr")
library("reshape2")
library("ggplot2")
library("stringr")
library("scales")
library("rstan")
# http://hosho.ees.hokudai.ac.jp/~kubo/ce/IwanamiBook.html
d <- read.csv("./data7a.csv")
dim(d)
## [1] 100 2
head(d)
## id y
## 1 1 0
## 2 2 2
## 3 3 7
## 4 4 8
## 5 5 1
## 6 6 7
prob <- seq(0.1,0.8,0.01)
logL <- function(x){
sum(dbinom(d$y,size = 8,log = T,prob=x))
}
# 最尤推定
plot(prob,sapply(prob,logL)) # 0.5あたりが最大
ml <-glm(data=d,formula = cbind(y,8-y)~1, family=binomial)
1/(1+exp(-ml$coefficients)) # 0.5035
## (Intercept)
## 0.50375
d$expected <- rbinom(n = 100,size = 8,prob=0.504)
# p.225 10.1(B)
ggplot(data=d, aes(alpha=.5))+
geom_histogram(aes(x=y), binwidth=1, fill="black")+
geom_histogram(aes(x=expected), binwidth=1, fill="white")
# 分散
var(d$y)
## [1] 9.928384
var(d$expected)
## [1] 1.737374
// midori_study chap.10
// 二項分布、個体差+切片のモデル
data {
int<lower=0> N; // sample size
int<lower=0> y[N]; // response variable
}
parameters {
real beta;
real r[N];
real<lower=0> s;
}
transformed parameters {
real q[N];
for (i in 1:N) {
q[i] <- inv_logit(beta + r[i]); // 生存確率
}
}
model {
for (i in 1:N) {
y[i] ~ binomial(8, q[i]); // 二項分布
}
beta ~ normal(0, 100); // 無情報事前分布
s ~ uniform(0, 1.0e+4); // 無情報事前分布
r ~ normal(0, s); // 階層事前分布 テキストだとtau値(sの逆数)だしていたけど、使わず。。
}
dat<-list(N=nrow(d),y=d$y)
result.stan <- stan("./binom.stan",
data = dat,
iter = 1600,
warmup= 100,
chains = 3)
##
## TRANSLATING MODEL 'binom' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'binom' NOW.
##
## SAMPLING FOR MODEL 'binom' NOW (CHAIN 1).
##
## Iteration: 1 / 1600 [ 0%] (Warmup)
## Iteration: 101 / 1600 [ 6%] (Sampling)
## Iteration: 260 / 1600 [ 16%] (Sampling)
## Iteration: 420 / 1600 [ 26%] (Sampling)
## Iteration: 580 / 1600 [ 36%] (Sampling)
## Iteration: 740 / 1600 [ 46%] (Sampling)
## Iteration: 900 / 1600 [ 56%] (Sampling)
## Iteration: 1060 / 1600 [ 66%] (Sampling)
## Iteration: 1220 / 1600 [ 76%] (Sampling)
## Iteration: 1380 / 1600 [ 86%] (Sampling)
## Iteration: 1540 / 1600 [ 96%] (Sampling)
## Iteration: 1600 / 1600 [100%] (Sampling)
## # Elapsed Time: 0.305 seconds (Warm-up)
## # 3.549 seconds (Sampling)
## # 3.854 seconds (Total)
##
##
## SAMPLING FOR MODEL 'binom' NOW (CHAIN 2).
##
## Iteration: 1 / 1600 [ 0%] (Warmup)
## Iteration: 101 / 1600 [ 6%] (Sampling)
## Iteration: 260 / 1600 [ 16%] (Sampling)
## Iteration: 420 / 1600 [ 26%] (Sampling)
## Iteration: 580 / 1600 [ 36%] (Sampling)
## Iteration: 740 / 1600 [ 46%] (Sampling)
## Iteration: 900 / 1600 [ 56%] (Sampling)
## Iteration: 1060 / 1600 [ 66%] (Sampling)
## Iteration: 1220 / 1600 [ 76%] (Sampling)
## Iteration: 1380 / 1600 [ 86%] (Sampling)
## Iteration: 1540 / 1600 [ 96%] (Sampling)
## Iteration: 1600 / 1600 [100%] (Sampling)
## # Elapsed Time: 0.29 seconds (Warm-up)
## # 3.542 seconds (Sampling)
## # 3.832 seconds (Total)
##
##
## SAMPLING FOR MODEL 'binom' NOW (CHAIN 3).
##
## Iteration: 1 / 1600 [ 0%] (Warmup)
## Iteration: 101 / 1600 [ 6%] (Sampling)
## Iteration: 260 / 1600 [ 16%] (Sampling)
## Iteration: 420 / 1600 [ 26%] (Sampling)
## Iteration: 580 / 1600 [ 36%] (Sampling)
## Iteration: 740 / 1600 [ 46%] (Sampling)
## Iteration: 900 / 1600 [ 56%] (Sampling)
## Iteration: 1060 / 1600 [ 66%] (Sampling)
## Iteration: 1220 / 1600 [ 76%] (Sampling)
## Iteration: 1380 / 1600 [ 86%] (Sampling)
## Iteration: 1540 / 1600 [ 96%] (Sampling)
## Iteration: 1600 / 1600 [100%] (Sampling)
## # Elapsed Time: 0.275 seconds (Warm-up)
## # 3.523 seconds (Sampling)
## # 3.798 seconds (Total)
# result.stan
plot(result.stan)
post <- rstan::extract(result.stan, permuted=F)
post.m <- melt(post)
summary(post.m)
## iterations chains parameters value
## Min. : 1.0 chain:1:304500 beta : 4500 Min. :-479.0415
## 1st Qu.: 375.8 chain:2:304500 r[1] : 4500 1st Qu.: -0.0967
## Median : 750.5 chain:3:304500 r[2] : 4500 Median : 0.3892
## Mean : 750.5 r[3] : 4500 Mean : -1.9267
## 3rd Qu.:1125.2 r[4] : 4500 3rd Qu.: 0.9739
## Max. :1500.0 r[5] : 4500 Max. : 16.4126
## (Other):886500
post.m %>% filter(parameters == 'beta') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")
post.m %>% filter(parameters == 's') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")
post.m %>% filter(parameters == 'r[1]') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")
post.m %>% filter(parameters == 'r[2]') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")
post.m %>% filter(parameters == 'r[3]') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")
post.m %>% filter(str_detect(parameters,"q\\[")) %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")
post.m %>% filter(str_detect(parameters,"q\\[")) %>%
mutate(y_pred=value*8, y_fact=round(y_pred)) %>% # 生存確率 --> 生存個数
group_by(chains, iterations, y_fact) %>%
summarise(count=length(value)) %>% ## 個数ごとの件数
group_by(y_fact) %>%
summarise( # パーセンタイル計算
lwr=quantile(count,.025),
median_cnt=quantile(count,.5),
mean_cnt=mean(count),
upr=quantile(count,.975)
) -> mcmc.dist
mcmc.dist
## Source: local data frame [9 x 5]
##
## y_fact lwr median_cnt mean_cnt upr
## 1 0 11.000 17 17.476889 23
## 2 1 8.000 14 14.516444 21
## 3 2 4.000 9 8.871305 14
## 4 3 2.000 6 6.352994 11
## 5 4 2.000 5 5.447808 10
## 6 5 2.000 6 5.911156 10
## 7 6 4.000 8 8.216000 14
## 8 7 8.475 15 14.662667 21
## 9 8 13.000 19 18.576889 25
ggplot(data=mcmc.dist, aes(x=y_fact))+
# geom_errorbar(aes(ymin=lwr,ymax=upr))+
geom_line(aes(y=upr),col="blue")+
geom_line(aes(y=lwr),col="blue")+
geom_point(aes(y=median_cnt))
library("dplyr")
library("reshape2")
library("ggplot2")
library("stringr")
library("scales")
library("rstan")
d <- read.csv("./d1.csv")
head(d)
## id pot f y
## 1 1 A C 6
## 2 2 A C 3
## 3 3 A C 19
## 4 4 A C 5
## 5 5 A C 0
## 6 6 A C 19
summary(d)
## id pot f y
## Min. : 1.00 A :10 C:50 Min. : 0.00
## 1st Qu.: 25.75 B :10 T:50 1st Qu.: 1.00
## Median : 50.50 C :10 Median : 3.00
## Mean : 50.50 D :10 Mean : 5.52
## 3rd Qu.: 75.25 E :10 3rd Qu.: 7.00
## Max. :100.00 F :10 Max. :37.00
## (Other):40
plot(d)
qplot(data=d,x=pot,y=y,geom="boxplot",fill=f)
// midori_study chap.10
// ポアソン分布 個体差,場所差,切片 モデル
data {
int<lower=0> N_sample; // number of observations
int<lower=0> N_pot; // number of pots
int<lower=0> y[N_sample];
int<lower=0> f[N_sample]; // f(C:1, T:2)
int<lower=0> pot[N_sample];
}
parameters {
real beta1;
real beta2;
real r[N_sample];
real rp[N_pot];
real<lower=0> sigma[2];
}
transformed parameters {
real<lower=0> lambda[N_sample];
for (i in 1:N_sample) {
lambda[i] <- exp(beta1 + beta2 * f[i] + r[i] + rp[pot[i]]);
}
}
model {
for (i in 1:N_sample) {
y[i] ~ poisson(lambda[i]);
}
beta1 ~ normal(0, 100);
beta2 ~ normal(0, 100);
sigma[1] ~ uniform(0, 1.0e+4);
sigma[2] ~ uniform(0, 1.0e+4);
r ~ normal(0, sigma[1]);
rp ~ normal(0, sigma[2]);
}
dat<-list(
N_sample=nrow(d),
N_pot=length(unique(d$pot)),
y=d$y,
f=as.integer(d$f),
pot=as.integer(d$pot)
)
poisson.stan <- stan("./poisson.stan",
data = dat,
iter = 1600,
warmup= 100,
chains = 3)
##
## TRANSLATING MODEL 'poisson' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'poisson' NOW.
##
## SAMPLING FOR MODEL 'poisson' NOW (CHAIN 1).
##
## Iteration: 1 / 1600 [ 0%] (Warmup)
## Iteration: 101 / 1600 [ 6%] (Sampling)
## Iteration: 260 / 1600 [ 16%] (Sampling)
## Iteration: 420 / 1600 [ 26%] (Sampling)
## Iteration: 580 / 1600 [ 36%] (Sampling)
## Iteration: 740 / 1600 [ 46%] (Sampling)
## Iteration: 900 / 1600 [ 56%] (Sampling)
## Iteration: 1060 / 1600 [ 66%] (Sampling)
## Iteration: 1220 / 1600 [ 76%] (Sampling)
## Iteration: 1380 / 1600 [ 86%] (Sampling)
## Iteration: 1540 / 1600 [ 96%] (Sampling)
## Iteration: 1600 / 1600 [100%] (Sampling)
## # Elapsed Time: 0.679 seconds (Warm-up)
## # 8.956 seconds (Sampling)
## # 9.635 seconds (Total)
##
##
## SAMPLING FOR MODEL 'poisson' NOW (CHAIN 2).
##
## Iteration: 1 / 1600 [ 0%] (Warmup)
## Iteration: 101 / 1600 [ 6%] (Sampling)
## Iteration: 260 / 1600 [ 16%] (Sampling)
## Iteration: 420 / 1600 [ 26%] (Sampling)
## Iteration: 580 / 1600 [ 36%] (Sampling)
## Iteration: 740 / 1600 [ 46%] (Sampling)
## Iteration: 900 / 1600 [ 56%] (Sampling)
## Iteration: 1060 / 1600 [ 66%] (Sampling)
## Iteration: 1220 / 1600 [ 76%] (Sampling)
## Iteration: 1380 / 1600 [ 86%] (Sampling)
## Iteration: 1540 / 1600 [ 96%] (Sampling)
## Iteration: 1600 / 1600 [100%] (Sampling)
## # Elapsed Time: 0.596 seconds (Warm-up)
## # 8.827 seconds (Sampling)
## # 9.423 seconds (Total)
##
##
## SAMPLING FOR MODEL 'poisson' NOW (CHAIN 3).
##
## Iteration: 1 / 1600 [ 0%] (Warmup)
## Iteration: 101 / 1600 [ 6%] (Sampling)
## Iteration: 260 / 1600 [ 16%] (Sampling)
## Iteration: 420 / 1600 [ 26%] (Sampling)
## Iteration: 580 / 1600 [ 36%] (Sampling)
## Iteration: 740 / 1600 [ 46%] (Sampling)
## Iteration: 900 / 1600 [ 56%] (Sampling)
## Iteration: 1060 / 1600 [ 66%] (Sampling)
## Iteration: 1220 / 1600 [ 76%] (Sampling)
## Iteration: 1380 / 1600 [ 86%] (Sampling)
## Iteration: 1540 / 1600 [ 96%] (Sampling)
## Iteration: 1600 / 1600 [100%] (Sampling)
## # Elapsed Time: 0.583 seconds (Warm-up)
## # 8.585 seconds (Sampling)
## # 9.168 seconds (Total)
# poisson.stan
plot(poisson.stan)
poisson.post <- rstan::extract(poisson.stan, permuted=F)
post.m <- melt(poisson.post)
head(post.m)
## iterations chains parameters value
## 1 1 chain:1 beta1 1.618219
## 2 2 chain:1 beta1 2.882007
## 3 3 chain:1 beta1 3.135146
## 4 4 chain:1 beta1 2.736473
## 5 5 chain:1 beta1 3.246207
## 6 6 chain:1 beta1 1.822471
summary(post.m)
## iterations chains parameters value
## Min. : 1.0 chain:1:322500 beta1 : 4500 Min. : -6.2105
## 1st Qu.: 375.8 chain:2:322500 beta2 : 4500 1st Qu.: -0.0568
## Median : 750.5 chain:3:322500 r[1] : 4500 Median : 0.7807
## Mean : 750.5 r[2] : 4500 Mean : 5.7127
## 3rd Qu.:1125.2 r[3] : 4500 3rd Qu.: 2.5710
## Max. :1500.0 r[4] : 4500 Max. :701.0707
## (Other):940500
post.m %>% filter(parameters == 'beta1') %>% qplot(data=., x=value)+geom_vline(x=0,col="darkgreen")
post.m %>% filter(parameters == 'beta2') %>% qplot(data=., x=value)+geom_vline(x=0,col="darkgreen")
post.m %>% filter(parameters == 'sigma[1]') %>% qplot(data=., x=value)+geom_vline(x=0,col="darkgreen")
post.m %>% filter(parameters == 'sigma[2]') %>% qplot(data=., x=value)+geom_vline(x=0,col="darkgreen")