a05-scent.r

s_rathwell — Oct 10, 2013, 9:06 PM

#Assignment 5 - Effect of Scent on the Time to Complete a Maze
#Sarah Rathwell - 301084687
#
#Objective - Is there evidence that mean completion time differs between scents
#
cat("Investigate the relationship between completion time and scent ", date(), '\n')  
Investigate the relationship between completion time and scent  Thu Oct 10 21:06:24 2013 

#set seed and randomize scents 1-4 (1:plain,2:apple,3:banana,4:skunk) for 16 subjects(blocks)
set.seed(1:16)
a<-sample(1:4,4,replace=F)
b<-sample(1:4,4,replace=F)
c<-sample(1:4,4,replace=F)
d<-sample(1:4,4,replace=F)
e<-sample(1:4,4,replace=F)
f<-sample(1:4,4,replace=F)
g<-sample(1:4,4,replace=F)
h<-sample(1:4,4,replace=F)
i<-sample(1:4,4,replace=F)
j<-sample(1:4,4,replace=F)
k<-sample(1:4,4,replace=F)
l<-sample(1:4,4,replace=F)
m<-sample(1:4,4,replace=F)
n<-sample(1:4,4,replace=F)
o<-sample(1:4,4,replace=F)
p<-sample(1:4,4,replace=F)
matrix(c(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p),nrow=16,ncol=4,byrow=TRUE)
      [,1] [,2] [,3] [,4]
 [1,]    2    4    3    1
 [2,]    1    3    2    4
 [3,]    3    1    4    2
 [4,]    3    2    4    1
 [5,]    3    4    1    2
 [6,]    4    1    2    3
 [7,]    2    4    1    3
 [8,]    4    2    1    3
 [9,]    2    1    4    3
[10,]    4    1    2    3
[11,]    4    2    3    1
[12,]    3    4    1    2
[13,]    3    4    1    2
[14,]    2    1    3    4
[15,]    2    4    3    1
[16,]    4    1    3    2

#read in data
scent<-read.csv(file="/Users/s_rathwell/Documents/SFU/Fa2013/STAT300/assign 5/a05-scent-data.csv",header=F)
names(scent) <- c('dummy', 'num','day', 'subject', 'sex', 'scent', 'time')
scent$dummy <- NULL # get rid of first column

#check output
scent[1:5,]
  num day subject    sex  scent time
1   1   1       a female  apple   91
2   2   1       b female  plain  103
3   3   1       c female banana  148
4   4   1       d female banana  127
5   5   1       e female banana  120

#define factors
scent$subject<- factor(scent$subject)
scent$sex<- factor(scent$sex, levels=c('female','male'))
scent$scent<- factor(scent$scent, levels=c('plain','banana','apple','skunk'))
scent$time<- as.numeric(as.character(scent$time))  # some values are missing
Warning: NAs introduced by coercion
str(scent)
'data.frame':   64 obs. of  6 variables:
 $ num    : int  1 2 3 4 5 6 7 8 9 10 ...
 $ day    : int  1 1 1 1 1 1 1 1 2 2 ...
 $ subject: Factor w/ 16 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 1 2 ...
 $ sex    : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 1 1 ...
 $ scent  : Factor w/ 4 levels "plain","banana",..: 3 1 2 2 2 4 3 4 4 2 ...
 $ time   : num  91 103 148 127 120 NA 149 153 134 NA ...

# How many missing values?
cat("Number of missing values in time   is :", sum(is.na(scent$time)), '\n')
Number of missing values in time   is : 7 

#check sample dispersions
boxplot(time ~ scent, data=scent, range=0,notch=TRUE,main="Comparison of maze completion times by scent", sub='Whiskers extend to range of data; data values jittered to avoid overplotting',xlab='Scent', ylab='Time (seconds)')
Warning: some notches went outside hinges ('box'): maybe set notch=FALSE
stripchart(time ~ scent, data=scent, add=TRUE,vertical=TRUE, method="jitter", jitter=.1)

plot of chunk unnamed-chunk-1


#check blocks (subject)
#design is incomplete but connected
xtabs(~subject+scent,data=scent[!is.na(scent$time),])
       scent
subject plain banana apple skunk
      a     1      1     1     1
      b     1      0     1     1
      c     1      1     1     1
      d     1      1     1     1
      e     1      1     0     1
      f     1      1     1     0
      g     1      0     1     1
      h     1      1     1     1
      i     0      1     1     1
      j     1      1     1     1
      k     1      1     1     1
      l     1      1     1     1
      m     1      1     1     1
      n     1      1     1     1
      o     1      0     1     1
      p     1      0     1     1

#report summary statistics
library(doBy)
Loading required package: multcomp
Loading required package: mvtnorm
Loading required package: survival
Loading required package: splines
Loading required package: MASS
report<-summaryBy(time~scent,data=scent[!is.na(scent$time),],FUN=c(length,mean,sd))
report$time.mean.se<-report$time.sd/sqrt(report$time.length)
report
   scent time.length time.mean time.sd time.mean.se
1  plain          15     139.1   22.80        5.888
2 banana          12     141.3   17.05        4.921
3  apple          15     134.7   26.33        6.799
4  skunk          15     161.1   22.50        5.809

#check interactions between block effects and treatment effects (data is relatively parallel)
plot(as.numeric(scent$scent),scent$time,type="n",main="Profile plot of completion time by scent",xlab='scent',ylab='time')
lapply(split(scent,scent$subject),function(x){lines((as.numeric(x$scent)),x$time,type='l')})

plot of chunk unnamed-chunk-1

$a
NULL

$b
NULL

$c
NULL

$d
NULL

$e
NULL

$f
NULL

$g
NULL

$h
NULL

$i
NULL

$j
NULL

$k
NULL

$l
NULL

$m
NULL

$n
NULL

$o
NULL

$p
NULL

#set up linear model with blocks included; omit the NA times
model<-lm(time~subject+scent,data=scent[!is.na(scent$time),])

#type three ss anova (unbalanced design)--*intra block only
library(car)
Anova(model,type=3)
Anova Table (Type III tests)

Response: time
            Sum Sq Df F value  Pr(>F)    
(Intercept)  46479  1  104.97 1.7e-12 ***
subject      10445 15    1.57  0.1286    
scent         6535  3    4.92  0.0055 ** 
Residuals    16825 38                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
layout(matrix(1:4,2,2))
#check assumptions for linear model
plot(model)

plot of chunk unnamed-chunk-1

layout(1)

#analyze for both intra and inter subject variation (subjects are random blocks)
library(lmerTest)
Loading required package: Matrix
Loading required package: lattice
Loading required package: lme4

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step
model2<-lmer(time~scent+(1|subject),data=scent[!is.na(scent$time),])
lmerTest::anova(model2)
Analysis of Variance Table of type 3 with  Satterthwaite  approximation for degrees of freedom
      Df Sum Sq Mean Sq F value Denom Pr(>F)   
scent  3   6271    2090    4.75  40.4 0.0062 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model2)
Linear mixed model fit by REML ['merModLmerTest']
Formula: time ~ scent + (1 | subject) 
   Data: scent[!is.na(scent$time), ] 

REML criterion at convergence: 490.6 

Random effects:
 Groups   Name        Variance Std.Dev.
 subject  (Intercept)  74.1     8.61   
 Residual             439.7    20.97   
Number of obs: 57, groups: subject, 16

Fixed effects:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   139.27       5.84   23.83   <2e-16 ***
scentbanana     2.52       8.21    0.31   0.7608    
scentapple     -5.06       7.69   -0.66   0.5144    
scentskunk     22.04       7.69    2.87   0.0066 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scntbn scntpp
scentbanana -0.617              
scentapple  -0.658  0.468       
scentskunk  -0.658  0.468  0.500

#check pairwise contrasts 
library(lsmeans)
Loading required package: plyr

Attaching package: 'lsmeans'

The following object is masked from 'package:lmerTest':

    lsmeans
lsmeans::lsmeans(model2,pairwise~scent,adjust='tukey')
Loading required package: pbkrtest
Loading required package: parallel
$`scent lsmeans`
  scent lsmean    SE    df lower.CL upper.CL
  plain  139.3 5.606 60.79    128.1    150.5
 banana  141.8 6.294 58.49    129.2    154.4
  apple  134.2 5.606 60.79    123.0    145.4
  skunk  161.3 5.606 60.79    150.1    172.5

$`scent pairwise differences`
               estimate    SE    df t.ratio p.value
plain - banana   -2.515 8.227 40.57 -0.3057 0.98993
plain - apple     5.057 7.693 39.00  0.6573 0.91232
plain - skunk   -22.037 7.693 39.00 -2.8646 0.03251
banana - apple    7.572 8.227 40.57  0.9204 0.79409
banana - skunk  -19.522 8.227 40.57 -2.3730 0.09867
apple - skunk   -27.094 7.693 39.00 -3.5219 0.00586
    p values are adjusted using the tukey method for 4 means 

#plot comparisons of means--*intra block only
result<-aov(time~subject+scent,data=scent[!is.na(scent$time),])
mcp<-TukeyHSD(result,which="scent",ordered=T)
mcp
  Tukey multiple comparisons of means
    95% family-wise confidence level
    factor levels have been ordered

Fit: aov(formula = time ~ subject + scent, data = scent[!is.na(scent$time), ])

$scent
               diff      lwr   upr  p adj
plain-apple   6.244 -14.3971 26.89 0.8481
banana-apple  9.329 -12.5645 31.22 0.6646
skunk-apple  27.733   7.0918 48.37 0.0047
banana-plain  3.085 -18.8090 24.98 0.9813
skunk-plain  21.489   0.8473 42.13 0.0386
skunk-banana 18.404  -3.4895 40.30 0.1262
plot(mcp)
abline(v=o,lty=2)

plot of chunk unnamed-chunk-1


#test comparisons for linear hypothesis and plot cld levels--*intra block only
library(multcomp)
result2<-glht(result,linfct=mcp(scent="Tukey"))
result2cld<-cld(result2)
summary(result2)

     Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: aov(formula = time ~ subject + scent, data = scent[!is.na(scent$time), 
    ])

Linear Hypotheses:
                    Estimate Std. Error t value Pr(>|t|)   
banana - plain == 0     3.04       8.41    0.36    0.984   
apple - plain == 0     -6.39       7.77   -0.82    0.843   
skunk - plain == 0     21.98       7.77    2.83    0.036 * 
apple - banana == 0    -9.42       8.41   -1.12    0.679   
skunk - banana == 0    18.94       8.41    2.25    0.128   
skunk - apple == 0     28.36       7.77    3.65    0.004 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
result2cld
 plain banana  apple  skunk 
   "a"   "ab"    "a"    "b" 
plot(result2cld,main="Multiple Comparison Results",xlab="variety",ylab="phosphor",mai=c(1,1,1.25,1),notch=T)
Warning: some notches went outside hinges ('box'): maybe set notch=FALSE

plot of chunk unnamed-chunk-1