setwd("c:/users/mark/documents/work/stemibayes")
library(rethinking)
## Loading required package: rstan
## Loading required package: ggplot2
## Loading required package: StanHeaders
## rstan (Version 2.12.1, packaged: 2016-09-11 13:07:50 UTC, GitRev: 85f7a56811da)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.59)
par(mfrow=c(1,1))
Data
data<-read.csv("old02.csv",header=TRUE)
data$WorkingHours<-0
data$WorkingHours[data$TOD>8&data$TOD<18&data$DOW!=6&data$DOW!=7]<-1
dataf<-data.frame(data$WorkingHours,data$DoorToLab)
colnames(dataf)=c("WorkingHours","DoorToLab")
Visulaization
lab<-c("0"="After-hours","1"="Working Hours")
ggplot(data,aes(x=TOD,y=DoorToLab))+
geom_point()+facet_grid(~WorkingHours,labeller = labeller(WorkingHours=lab))

ggplot(data, aes(x=factor(WorkingHours), y=DoorToLab)) +
geom_boxplot() +scale_x_discrete(labels=c("After-hours","Working Hours"))+
xlab("Time Window")+ylab("Door To Lab Time (minutes)")+
stat_summary(fun.y=mean, geom="point", shape=5, size=4)

library(plyr)
cdat<-ddply(dataf, "WorkingHours", summarise, day.median=median(DoorToLab))
ggplot(data,aes(x=DoorToLab))+
geom_histogram(aes(y=..density..),color="black",fill="red")+
facet_grid(~WorkingHours,labeller = labeller(WorkingHours=lab))+
geom_text(data=cdat,aes(x=200,y=0.01,
label=paste("Median: ",round(day.median))))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Model
ahe<-map2stan(
alist(
DoorToLab~dlnorm(mu,sigma),
mu<-a+inhours[WorkingHours],
a~dnorm(0,1),
inhours[WorkingHours]~dnorm(0,sig),
sigma~dcauchy(0,1),
sig~dcauchy(0,1)
),
data=dataf,
iter=1e4
)
##
## SAMPLING FOR MODEL 'DoorToLab ~ dlnorm(mu, sigma)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1, Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 1, Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 1, Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 1, Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 1, Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%] (Sampling)
## Elapsed Time: 6.232 seconds (Warm-up)
## 7.737 seconds (Sampling)
## 13.969 seconds (Total)
##
## [1] "The following numerical problems occured the indicated number of times after warmup on chain 1"
## count
## Exception thrown at line 22: lognormal_log: Scale parameter is inf, but must be finite! 6
## [1] "When a numerical problem occurs, the Hamiltonian proposal gets rejected."
## [1] "See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected"
## [1] "If the number in the 'count' column is small, do not ask about this message on stan-users."
##
## SAMPLING FOR MODEL 'DoorToLab ~ dlnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
## performed for num_warmup < 20
##
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## Elapsed Time: 0 seconds (Warm-up)
## 0.001 seconds (Sampling)
## 0.001 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 500 / 5000 ]
[ 1000 / 5000 ]
[ 1500 / 5000 ]
[ 2000 / 5000 ]
[ 2500 / 5000 ]
[ 3000 / 5000 ]
[ 3500 / 5000 ]
[ 4000 / 5000 ]
[ 4500 / 5000 ]
[ 5000 / 5000 ]
Diagnostics
precis(ahe,depth=2)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 1.22 1.16 -0.45 3.21 836 1
## inhours[1] 2.36 1.17 0.34 4.05 837 1
## inhours[2] 1.71 1.16 -0.20 3.45 806 1
## sigma 1.04 0.05 0.95 1.11 1731 1
## sig 2.55 2.73 0.19 4.56 1333 1
plot(ahe)
dashboard(ahe)


Prediction
new_data<-data.frame(WorkingHours=c(1,2))
pr<-exp(link(ahe,data=new_data,n=5000))
## [ 500 / 5000 ]
[ 1000 / 5000 ]
[ 1500 / 5000 ]
[ 2000 / 5000 ]
[ 2500 / 5000 ]
[ 3000 / 5000 ]
[ 3500 / 5000 ]
[ 4000 / 5000 ]
[ 4500 / 5000 ]
[ 5000 / 5000 ]
v1<-rep(c("After-hours","Working Hours"),each=5000)
v2<-as.vector(pr)
vd<-data.frame(v1,v2)
vl<-c(median(pr[,1]),median(pr[,2]))
vld<-data.frame(c("After-Hours","Working Hours"),vl)
colnames(vd)<-c("WorkingHours","DoorToLab")
ggplot(vd,aes(x=DoorToLab))+
geom_histogram(aes(y=..density..),color="black",fill="red")+
geom_vline(data=vld,aes(xintercept=vl),color="blue", linetype="dashed", size=1)+
facet_wrap(~WorkingHours,ncol=1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Posterior Prediction of After-hours effect
ae<-pr[,1]-pr[,2]
aed<-data.frame(ae)
colnames(aed)<-c("AfterHoursEffect")
p0<-length(ae[ae<0])/5000
ggplot(aed,aes(x=AfterHoursEffect))+
geom_histogram(aes(y=..density..),color="black",fill="red")+
xlab("Door To Lab: After-hours-Working Hours (minutes)")+
geom_text(aes(x=27,y=0.1,
label=
paste("Median (5 th, 95 th percentile): ",
round(median(ae)),
"(",
round(quantile(ae,c(0.05))),
",",
round(quantile(ae,c(0.95))),
")")),size=4,color="blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
