Data

## Loading required package: rstan
## Loading required package: ggplot2
## Loading required package: StanHeaders
## rstan (Version 2.12.1, packaged: 2016-09-11 13:07:50 UTC, GitRev: 85f7a56811da)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.59)
dat<-read.csv("old02.csv",header=TRUE)

Door to Lab Time

Visualization

lbl<-c(
  "1"="Monday",
"2"="Tuesday",
"3"="Wednesday",
"4"="Thursday",
"5"="Friday",
"6"="Saturday",
"7"="Sunday"
)
dat$day<-factor(dat$DOW)
ggplot(dat,aes(x=TOD,y=DoorToLab))+geom_point()+facet_wrap(~day,labeller=labeller(day=lbl))

Model

dfr<-data.frame(dat$DOW,dat$TOD,dat$DoorToLab)
colnames(dfr)<-c("DOW","TOD","DoorToLab")

model1<-map2stan(
  alist(
    DoorToLab~dlnorm(mu,s),
    mu<-a+dn[DOW]+b1*TOD+b2*TOD*TOD,
    a~dnorm(0,1),
    dn[DOW]~dnorm(0,1),
    b1~dnorm(0,1),
    b2~dnorm(0,1),
    s~dcauchy(0,1)
  
  ),
  data=dfr,
  iter=1e4
  )
## 
## SAMPLING FOR MODEL 'DoorToLab ~ dlnorm(mu, s)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1, Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1, Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1, Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1, Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1, Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%]  (Sampling)
##  Elapsed Time: 7.887 seconds (Warm-up)
##                6.763 seconds (Sampling)
##                14.65 seconds (Total)
## 
## [1] "The following numerical problems occured the indicated number of times after warmup on chain 1"
##                                                                                         count
## Exception thrown at line 25: lognormal_log: Scale parameter is inf, but must be finite!     8
## [1] "When a numerical problem occurs, the Hamiltonian proposal gets rejected."
## [1] "See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected"
## [1] "If the number in the 'count' column is small,  do not ask about this message on stan-users."
## 
## SAMPLING FOR MODEL 'DoorToLab ~ dlnorm(mu, s)' NOW (CHAIN 1).
## WARNING: No variance estimation is
##          performed for num_warmup < 20
## 
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
##  Elapsed Time: 0 seconds (Warm-up)
##                0 seconds (Sampling)
##                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 500 / 5000 ]
[ 1000 / 5000 ]
[ 1500 / 5000 ]
[ 2000 / 5000 ]
[ 2500 / 5000 ]
[ 3000 / 5000 ]
[ 3500 / 5000 ]
[ 4000 / 5000 ]
[ 4500 / 5000 ]
[ 5000 / 5000 ]
precis(model1, depth=2)
##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a      3.45   0.42       2.78       4.13  1033    1
## dn[1]  0.67   0.39       0.06       1.31   981    1
## dn[2]  0.04   0.40      -0.61       0.65  1045    1
## dn[3]  0.41   0.39      -0.20       1.05   983    1
## dn[4]  0.44   0.40      -0.19       1.07  1025    1
## dn[5]  0.52   0.39      -0.08       1.17   958    1
## dn[6]  0.82   0.39       0.21       1.45   984    1
## dn[7]  0.62   0.40      -0.05       1.22  1039    1
## b1    -0.13   0.05      -0.21      -0.06  1583    1
## b2     0.01   0.00       0.00       0.01  1647    1
## s      1.04   0.05       0.96       1.12  2415    1

Diagnostics

plot(model1)
dashboard(model1)

Prediction

pl.fun<-function(t){
  ma<-matrix(data=NA,nrow=1000,ncol=7)
  for(i in 1:1000){
    for(j in 1:7){
      ma[i,j]<-j
    }
  }
  v1<-factor(as.vector(ma))
  nd<-data.frame(DOW=1:7,TOD=t )
  lf<-link(model1,data=nd)
  v2<-exp(as.vector(lf))
  gd<-data.frame(v1,v2)
  colnames(gd)<-c("Day","DoorToLab")
  library(plyr)
  cdat<-ddply(gd, "Day", summarise, day.median=median(DoorToLab))
  ggplot(gd,aes(x=DoorToLab))+
    geom_histogram(aes(y=..density..),fill="red")+
    geom_density(alpha=0.2,fill="#FF6666")+
    xlim(0,120)+
    xlab("Time (minutes)")+
    geom_vline(data=cdat,aes(xintercept=day.median))+
    geom_text(data=cdat,aes(x=100,y=0.04,
      label=paste("Median: ",round(day.median))))+
    facet_wrap(~Day,labeller = labeller(Day=lbl),ncol = 1)+
    ggtitle(paste("Time: ",t,":00 hours"))
}

Visualization

pl.fun(2)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing non-finite values (stat_density).

pl.fun(8)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pl.fun(12)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pl.fun(16)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pl.fun(20)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pl.fun(23)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.