setwd("c:/users/mark/documents/work/stemibayes")
library(rethinking)
## Loading required package: rstan
## Loading required package: ggplot2
## Loading required package: StanHeaders
## rstan (Version 2.12.1, packaged: 2016-09-11 13:07:50 UTC, GitRev: 85f7a56811da)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.59)
par(mfrow=c(1,1))

Data

data<-read.csv("old02.csv",header=TRUE)
data$WorkingHours<-0
data$WorkingHours[data$TOD>8&data$TOD<18&data$DOW!=6&data$DOW!=7]<-1
dataf<-data.frame(data$WorkingHours,data$DoorToLab)
colnames(dataf)=c("WorkingHours","DoorToLab")

Visulaization

lab<-c("0"="After-hours","1"="Working Hours")

ggplot(data,aes(x=TOD,y=DoorToLab))+
  geom_point()+facet_grid(~WorkingHours,labeller = labeller(WorkingHours=lab))

ggplot(data, aes(x=factor(WorkingHours), y=DoorToLab)) +
  geom_boxplot() +scale_x_discrete(labels=c("After-hours","Working Hours"))+
  xlab("Time Window")+ylab("Door To Lab Time (minutes)")+
    stat_summary(fun.y=mean, geom="point", shape=5, size=4)

library(plyr)

  cdat<-ddply(dataf, "WorkingHours", summarise, day.median=median(DoorToLab))

ggplot(data,aes(x=DoorToLab))+
  geom_histogram(aes(y=..density..),color="black",fill="red")+
  facet_grid(~WorkingHours,labeller = labeller(WorkingHours=lab))+
  geom_text(data=cdat,aes(x=200,y=0.01,
                          label=paste("Median: ",round(day.median))))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Model

ahe<-map2stan(
  alist(
    DoorToLab~dlnorm(mu,sigma),
    mu<-a+inhours[WorkingHours],
    a~dnorm(0,1),
    inhours[WorkingHours]~dnorm(0,sig),
    sigma~dcauchy(0,1),
    sig~dcauchy(0,1)
  ),
  data=dataf,
  iter=1e4
  
)
## 
## SAMPLING FOR MODEL 'DoorToLab ~ dlnorm(mu, sigma)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1, Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1, Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1, Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1, Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1, Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%]  (Sampling)
##  Elapsed Time: 6.232 seconds (Warm-up)
##                7.737 seconds (Sampling)
##                13.969 seconds (Total)
## 
## [1] "The following numerical problems occured the indicated number of times after warmup on chain 1"
##                                                                                         count
## Exception thrown at line 22: lognormal_log: Scale parameter is inf, but must be finite!     6
## [1] "When a numerical problem occurs, the Hamiltonian proposal gets rejected."
## [1] "See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected"
## [1] "If the number in the 'count' column is small,  do not ask about this message on stan-users."
## 
## SAMPLING FOR MODEL 'DoorToLab ~ dlnorm(mu, sigma)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 0 seconds (Warm-up)
##                0.001 seconds (Sampling)
##                0.001 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 500 / 5000 ]
[ 1000 / 5000 ]
[ 1500 / 5000 ]
[ 2000 / 5000 ]
[ 2500 / 5000 ]
[ 3000 / 5000 ]
[ 3500 / 5000 ]
[ 4000 / 5000 ]
[ 4500 / 5000 ]
[ 5000 / 5000 ]

Diagnostics

precis(ahe,depth=2)
##            Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a          1.22   1.16      -0.45       3.21   836    1
## inhours[1] 2.36   1.17       0.34       4.05   837    1
## inhours[2] 1.71   1.16      -0.20       3.45   806    1
## sigma      1.04   0.05       0.95       1.11  1731    1
## sig        2.55   2.73       0.19       4.56  1333    1
plot(ahe)
dashboard(ahe)

Prediction

new_data<-data.frame(WorkingHours=c(1,2))
pr<-exp(link(ahe,data=new_data,n=5000))
## [ 500 / 5000 ]
[ 1000 / 5000 ]
[ 1500 / 5000 ]
[ 2000 / 5000 ]
[ 2500 / 5000 ]
[ 3000 / 5000 ]
[ 3500 / 5000 ]
[ 4000 / 5000 ]
[ 4500 / 5000 ]
[ 5000 / 5000 ]
v1<-rep(c("After-hours","Working Hours"),each=5000)
v2<-as.vector(pr)
vd<-data.frame(v1,v2)
vl<-c(median(pr[,1]),median(pr[,2]))
vld<-data.frame(c("After-Hours","Working Hours"),vl)
colnames(vd)<-c("WorkingHours","DoorToLab")
ggplot(vd,aes(x=DoorToLab))+
    geom_histogram(aes(y=..density..),color="black",fill="red")+
  geom_vline(data=vld,aes(xintercept=vl),color="blue", linetype="dashed", size=1)+
    facet_wrap(~WorkingHours,ncol=1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Posterior Prediction of After-hours effect

ae<-pr[,1]-pr[,2]
aed<-data.frame(ae)
colnames(aed)<-c("AfterHoursEffect")
p0<-length(ae[ae<0])/5000
ggplot(aed,aes(x=AfterHoursEffect))+
  geom_histogram(aes(y=..density..),color="black",fill="red")+
  xlab("Door To Lab: After-hours-Working Hours (minutes)")+
  geom_text(aes(x=27,y=0.1,
                label=
                  paste("Median (5 th, 95 th percentile): ",
                        round(median(ae)),
                        "(",
                        round(quantile(ae,c(0.05))),
                        ",",
                        round(quantile(ae,c(0.95))),
                              ")")),size=4,color="blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.