library("dplyr")
library("reshape2")
library("ggplot2")
library("stringr")
library("scales")
library("rstan")

chap.10

個体差と生存種子数

# http://hosho.ees.hokudai.ac.jp/~kubo/ce/IwanamiBook.html
d <- read.csv("./data7a.csv")
dim(d)
## [1] 100   2
head(d)
##   id y
## 1  1 0
## 2  2 2
## 3  3 7
## 4  4 8
## 5  5 1
## 6  6 7
prob <- seq(0.1,0.8,0.01)
logL <- function(x){
  sum(dbinom(d$y,size = 8,log = T,prob=x))
}
# 最尤推定
plot(prob,sapply(prob,logL)) # 0.5あたりが最大

ml <-glm(data=d,formula = cbind(y,8-y)~1, family=binomial)
1/(1+exp(-ml$coefficients)) # 0.5035
## (Intercept) 
##     0.50375
d$expected <- rbinom(n = 100,size = 8,prob=0.504)

# p.225 10.1(B)
ggplot(data=d, aes(alpha=.5))+
  geom_histogram(aes(x=y), binwidth=1, fill="black")+
  geom_histogram(aes(x=expected), binwidth=1, fill="white")

# 分散
var(d$y)
## [1] 9.928384
var(d$expected)
## [1] 1.737374

Stanのコード(二項分布、個体差、切片)

// midori_study chap.10
// 二項分布、個体差+切片のモデル
data {
  int<lower=0> N;     // sample size
  int<lower=0> y[N];  // response variable
}
parameters {
  real beta;
  real r[N];
  real<lower=0> s;
}
transformed parameters {
  real q[N];
  
  for (i in 1:N) {
    q[i] <- inv_logit(beta + r[i]); // 生存確率
  }


}
model {
  for (i in 1:N) {
    y[i] ~ binomial(8, q[i]); // 二項分布
  }
  beta ~ normal(0, 100);     // 無情報事前分布 
  s ~ uniform(0, 1.0e+4);    // 無情報事前分布
  r ~ normal(0, s);    // 階層事前分布 テキストだとtau値(sの逆数)だしていたけど、使わず。。
}
dat<-list(N=nrow(d),y=d$y)
result.stan <- stan("./binom.stan",
  data = dat,
  iter = 1600,
  warmup= 100,
  chains = 3)
## 
## TRANSLATING MODEL 'binom' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'binom' NOW.
## 
## SAMPLING FOR MODEL 'binom' NOW (CHAIN 1).
## 
## Iteration:    1 / 1600 [  0%]  (Warmup)
## Iteration:  101 / 1600 [  6%]  (Sampling)
## Iteration:  260 / 1600 [ 16%]  (Sampling)
## Iteration:  420 / 1600 [ 26%]  (Sampling)
## Iteration:  580 / 1600 [ 36%]  (Sampling)
## Iteration:  740 / 1600 [ 46%]  (Sampling)
## Iteration:  900 / 1600 [ 56%]  (Sampling)
## Iteration: 1060 / 1600 [ 66%]  (Sampling)
## Iteration: 1220 / 1600 [ 76%]  (Sampling)
## Iteration: 1380 / 1600 [ 86%]  (Sampling)
## Iteration: 1540 / 1600 [ 96%]  (Sampling)
## Iteration: 1600 / 1600 [100%]  (Sampling)
## #  Elapsed Time: 0.305 seconds (Warm-up)
## #                3.549 seconds (Sampling)
## #                3.854 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'binom' NOW (CHAIN 2).
## 
## Iteration:    1 / 1600 [  0%]  (Warmup)
## Iteration:  101 / 1600 [  6%]  (Sampling)
## Iteration:  260 / 1600 [ 16%]  (Sampling)
## Iteration:  420 / 1600 [ 26%]  (Sampling)
## Iteration:  580 / 1600 [ 36%]  (Sampling)
## Iteration:  740 / 1600 [ 46%]  (Sampling)
## Iteration:  900 / 1600 [ 56%]  (Sampling)
## Iteration: 1060 / 1600 [ 66%]  (Sampling)
## Iteration: 1220 / 1600 [ 76%]  (Sampling)
## Iteration: 1380 / 1600 [ 86%]  (Sampling)
## Iteration: 1540 / 1600 [ 96%]  (Sampling)
## Iteration: 1600 / 1600 [100%]  (Sampling)
## #  Elapsed Time: 0.29 seconds (Warm-up)
## #                3.542 seconds (Sampling)
## #                3.832 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'binom' NOW (CHAIN 3).
## 
## Iteration:    1 / 1600 [  0%]  (Warmup)
## Iteration:  101 / 1600 [  6%]  (Sampling)
## Iteration:  260 / 1600 [ 16%]  (Sampling)
## Iteration:  420 / 1600 [ 26%]  (Sampling)
## Iteration:  580 / 1600 [ 36%]  (Sampling)
## Iteration:  740 / 1600 [ 46%]  (Sampling)
## Iteration:  900 / 1600 [ 56%]  (Sampling)
## Iteration: 1060 / 1600 [ 66%]  (Sampling)
## Iteration: 1220 / 1600 [ 76%]  (Sampling)
## Iteration: 1380 / 1600 [ 86%]  (Sampling)
## Iteration: 1540 / 1600 [ 96%]  (Sampling)
## Iteration: 1600 / 1600 [100%]  (Sampling)
## #  Elapsed Time: 0.275 seconds (Warm-up)
## #                3.523 seconds (Sampling)
## #                3.798 seconds (Total)
# result.stan
plot(result.stan)

post <- rstan::extract(result.stan, permuted=F)
post.m <- melt(post)
summary(post.m)
##    iterations         chains         parameters         value          
##  Min.   :   1.0   chain:1:304500   beta   :  4500   Min.   :-479.0415  
##  1st Qu.: 375.8   chain:2:304500   r[1]   :  4500   1st Qu.:  -0.0967  
##  Median : 750.5   chain:3:304500   r[2]   :  4500   Median :   0.3892  
##  Mean   : 750.5                    r[3]   :  4500   Mean   :  -1.9267  
##  3rd Qu.:1125.2                    r[4]   :  4500   3rd Qu.:   0.9739  
##  Max.   :1500.0                    r[5]   :  4500   Max.   :  16.4126  
##                                    (Other):886500
post.m %>% filter(parameters == 'beta') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")

post.m %>% filter(parameters == 's')    %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")

post.m %>% filter(parameters == 'r[1]') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")

post.m %>% filter(parameters == 'r[2]') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")

post.m %>% filter(parameters == 'r[3]') %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")

post.m %>% filter(str_detect(parameters,"q\\[")) %>% qplot(data=., x=value,geom="density")+geom_vline(x=0,col="darkgreen")

post.m %>% filter(str_detect(parameters,"q\\[")) %>% 
  mutate(y_pred=value*8, y_fact=round(y_pred)) %>%  # 生存確率 --> 生存個数
  group_by(chains, iterations, y_fact) %>% 
  summarise(count=length(value)) %>% ## 個数ごとの件数
  group_by(y_fact) %>% 
  summarise( # パーセンタイル計算
    lwr=quantile(count,.025),
    median_cnt=quantile(count,.5),
    mean_cnt=mean(count),
    upr=quantile(count,.975)
  ) -> mcmc.dist
mcmc.dist
## Source: local data frame [9 x 5]
## 
##   y_fact    lwr median_cnt  mean_cnt upr
## 1      0 11.000         17 17.476889  23
## 2      1  8.000         14 14.516444  21
## 3      2  4.000          9  8.871305  14
## 4      3  2.000          6  6.352994  11
## 5      4  2.000          5  5.447808  10
## 6      5  2.000          6  5.911156  10
## 7      6  4.000          8  8.216000  14
## 8      7  8.475         15 14.662667  21
## 9      8 13.000         19 18.576889  25
ggplot(data=mcmc.dist, aes(x=y_fact))+
  # geom_errorbar(aes(ymin=lwr,ymax=upr))+
  geom_line(aes(y=upr),col="blue")+
  geom_line(aes(y=lwr),col="blue")+
  geom_point(aes(y=median_cnt))

個体差+場所差

library("dplyr")
library("reshape2")
library("ggplot2")
library("stringr")
library("scales")
library("rstan")

d <- read.csv("./d1.csv")
head(d)
##   id pot f  y
## 1  1   A C  6
## 2  2   A C  3
## 3  3   A C 19
## 4  4   A C  5
## 5  5   A C  0
## 6  6   A C 19
summary(d)
##        id              pot     f            y        
##  Min.   :  1.00   A      :10   C:50   Min.   : 0.00  
##  1st Qu.: 25.75   B      :10   T:50   1st Qu.: 1.00  
##  Median : 50.50   C      :10          Median : 3.00  
##  Mean   : 50.50   D      :10          Mean   : 5.52  
##  3rd Qu.: 75.25   E      :10          3rd Qu.: 7.00  
##  Max.   :100.00   F      :10          Max.   :37.00  
##                   (Other):40
plot(d)

qplot(data=d,x=pot,y=y,geom="boxplot",fill=f)

stan コード

// midori_study chap.10
// ポアソン分布 個体差,場所差,切片 モデル
data {
  int<lower=0> N_sample;       // number of observations
  int<lower=0> N_pot;          // number of pots
  int<lower=0>      y[N_sample];
  int<lower=0>      f[N_sample];  // f(C:1, T:2)
  int<lower=0>    pot[N_sample];
}
parameters {
  real beta1;
  real beta2;
  real r[N_sample];
  real rp[N_pot];
  real<lower=0> sigma[2];
}
transformed parameters {
  real<lower=0> lambda[N_sample];

  for (i in 1:N_sample) {
    lambda[i] <- exp(beta1 + beta2 * f[i] + r[i] + rp[pot[i]]);
  }
}
model {
  for (i in 1:N_sample) {
    y[i] ~ poisson(lambda[i]);
  }
  beta1 ~ normal(0, 100);
  beta2 ~ normal(0, 100);
  sigma[1] ~ uniform(0, 1.0e+4);
  sigma[2] ~ uniform(0, 1.0e+4);
  r ~ normal(0, sigma[1]);
  rp ~ normal(0, sigma[2]);
  
}
dat<-list(
  N_sample=nrow(d),
  N_pot=length(unique(d$pot)),
  y=d$y,
  f=as.integer(d$f),
  pot=as.integer(d$pot)
  )
poisson.stan <- stan("./poisson.stan",
  data = dat,
  iter = 1600,
  warmup= 100,
  chains = 3)
## 
## TRANSLATING MODEL 'poisson' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'poisson' NOW.
## 
## SAMPLING FOR MODEL 'poisson' NOW (CHAIN 1).
## 
## Iteration:    1 / 1600 [  0%]  (Warmup)
## Iteration:  101 / 1600 [  6%]  (Sampling)
## Iteration:  260 / 1600 [ 16%]  (Sampling)
## Iteration:  420 / 1600 [ 26%]  (Sampling)
## Iteration:  580 / 1600 [ 36%]  (Sampling)
## Iteration:  740 / 1600 [ 46%]  (Sampling)
## Iteration:  900 / 1600 [ 56%]  (Sampling)
## Iteration: 1060 / 1600 [ 66%]  (Sampling)
## Iteration: 1220 / 1600 [ 76%]  (Sampling)
## Iteration: 1380 / 1600 [ 86%]  (Sampling)
## Iteration: 1540 / 1600 [ 96%]  (Sampling)
## Iteration: 1600 / 1600 [100%]  (Sampling)
## #  Elapsed Time: 0.679 seconds (Warm-up)
## #                8.956 seconds (Sampling)
## #                9.635 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'poisson' NOW (CHAIN 2).
## 
## Iteration:    1 / 1600 [  0%]  (Warmup)
## Iteration:  101 / 1600 [  6%]  (Sampling)
## Iteration:  260 / 1600 [ 16%]  (Sampling)
## Iteration:  420 / 1600 [ 26%]  (Sampling)
## Iteration:  580 / 1600 [ 36%]  (Sampling)
## Iteration:  740 / 1600 [ 46%]  (Sampling)
## Iteration:  900 / 1600 [ 56%]  (Sampling)
## Iteration: 1060 / 1600 [ 66%]  (Sampling)
## Iteration: 1220 / 1600 [ 76%]  (Sampling)
## Iteration: 1380 / 1600 [ 86%]  (Sampling)
## Iteration: 1540 / 1600 [ 96%]  (Sampling)
## Iteration: 1600 / 1600 [100%]  (Sampling)
## #  Elapsed Time: 0.596 seconds (Warm-up)
## #                8.827 seconds (Sampling)
## #                9.423 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'poisson' NOW (CHAIN 3).
## 
## Iteration:    1 / 1600 [  0%]  (Warmup)
## Iteration:  101 / 1600 [  6%]  (Sampling)
## Iteration:  260 / 1600 [ 16%]  (Sampling)
## Iteration:  420 / 1600 [ 26%]  (Sampling)
## Iteration:  580 / 1600 [ 36%]  (Sampling)
## Iteration:  740 / 1600 [ 46%]  (Sampling)
## Iteration:  900 / 1600 [ 56%]  (Sampling)
## Iteration: 1060 / 1600 [ 66%]  (Sampling)
## Iteration: 1220 / 1600 [ 76%]  (Sampling)
## Iteration: 1380 / 1600 [ 86%]  (Sampling)
## Iteration: 1540 / 1600 [ 96%]  (Sampling)
## Iteration: 1600 / 1600 [100%]  (Sampling)
## #  Elapsed Time: 0.583 seconds (Warm-up)
## #                8.585 seconds (Sampling)
## #                9.168 seconds (Total)
# poisson.stan
plot(poisson.stan)

poisson.post <- rstan::extract(poisson.stan, permuted=F)
post.m <- melt(poisson.post)
head(post.m)
##   iterations  chains parameters    value
## 1          1 chain:1      beta1 1.618219
## 2          2 chain:1      beta1 2.882007
## 3          3 chain:1      beta1 3.135146
## 4          4 chain:1      beta1 2.736473
## 5          5 chain:1      beta1 3.246207
## 6          6 chain:1      beta1 1.822471
summary(post.m)
##    iterations         chains         parameters         value         
##  Min.   :   1.0   chain:1:322500   beta1  :  4500   Min.   : -6.2105  
##  1st Qu.: 375.8   chain:2:322500   beta2  :  4500   1st Qu.: -0.0568  
##  Median : 750.5   chain:3:322500   r[1]   :  4500   Median :  0.7807  
##  Mean   : 750.5                    r[2]   :  4500   Mean   :  5.7127  
##  3rd Qu.:1125.2                    r[3]   :  4500   3rd Qu.:  2.5710  
##  Max.   :1500.0                    r[4]   :  4500   Max.   :701.0707  
##                                    (Other):940500
post.m %>% filter(parameters == 'beta1') %>% qplot(data=., x=value)+geom_vline(x=0,col="darkgreen")

post.m %>% filter(parameters == 'beta2') %>% qplot(data=., x=value)+geom_vline(x=0,col="darkgreen")

post.m %>% filter(parameters == 'sigma[1]') %>% qplot(data=., x=value)+geom_vline(x=0,col="darkgreen")

post.m %>% filter(parameters == 'sigma[2]') %>% qplot(data=., x=value)+geom_vline(x=0,col="darkgreen")