s_rathwell — Oct 10, 2013, 9:06 PM
#Assignment 5 - Effect of Scent on the Time to Complete a Maze
#Sarah Rathwell - 301084687
#
#Objective - Is there evidence that mean completion time differs between scents
#
cat("Investigate the relationship between completion time and scent ", date(), '\n')
Investigate the relationship between completion time and scent Thu Oct 10 21:06:24 2013
#set seed and randomize scents 1-4 (1:plain,2:apple,3:banana,4:skunk) for 16 subjects(blocks)
set.seed(1:16)
a<-sample(1:4,4,replace=F)
b<-sample(1:4,4,replace=F)
c<-sample(1:4,4,replace=F)
d<-sample(1:4,4,replace=F)
e<-sample(1:4,4,replace=F)
f<-sample(1:4,4,replace=F)
g<-sample(1:4,4,replace=F)
h<-sample(1:4,4,replace=F)
i<-sample(1:4,4,replace=F)
j<-sample(1:4,4,replace=F)
k<-sample(1:4,4,replace=F)
l<-sample(1:4,4,replace=F)
m<-sample(1:4,4,replace=F)
n<-sample(1:4,4,replace=F)
o<-sample(1:4,4,replace=F)
p<-sample(1:4,4,replace=F)
matrix(c(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p),nrow=16,ncol=4,byrow=TRUE)
[,1] [,2] [,3] [,4]
[1,] 2 4 3 1
[2,] 1 3 2 4
[3,] 3 1 4 2
[4,] 3 2 4 1
[5,] 3 4 1 2
[6,] 4 1 2 3
[7,] 2 4 1 3
[8,] 4 2 1 3
[9,] 2 1 4 3
[10,] 4 1 2 3
[11,] 4 2 3 1
[12,] 3 4 1 2
[13,] 3 4 1 2
[14,] 2 1 3 4
[15,] 2 4 3 1
[16,] 4 1 3 2
#read in data
scent<-read.csv(file="/Users/s_rathwell/Documents/SFU/Fa2013/STAT300/assign 5/a05-scent-data.csv",header=F)
names(scent) <- c('dummy', 'num','day', 'subject', 'sex', 'scent', 'time')
scent$dummy <- NULL # get rid of first column
#check output
scent[1:5,]
num day subject sex scent time
1 1 1 a female apple 91
2 2 1 b female plain 103
3 3 1 c female banana 148
4 4 1 d female banana 127
5 5 1 e female banana 120
#define factors
scent$subject<- factor(scent$subject)
scent$sex<- factor(scent$sex, levels=c('female','male'))
scent$scent<- factor(scent$scent, levels=c('plain','banana','apple','skunk'))
scent$time<- as.numeric(as.character(scent$time)) # some values are missing
Warning: NAs introduced by coercion
str(scent)
'data.frame': 64 obs. of 6 variables:
$ num : int 1 2 3 4 5 6 7 8 9 10 ...
$ day : int 1 1 1 1 1 1 1 1 2 2 ...
$ subject: Factor w/ 16 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 1 2 ...
$ sex : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 1 1 ...
$ scent : Factor w/ 4 levels "plain","banana",..: 3 1 2 2 2 4 3 4 4 2 ...
$ time : num 91 103 148 127 120 NA 149 153 134 NA ...
# How many missing values?
cat("Number of missing values in time is :", sum(is.na(scent$time)), '\n')
Number of missing values in time is : 7
#check sample dispersions
boxplot(time ~ scent, data=scent, range=0,notch=TRUE,main="Comparison of maze completion times by scent", sub='Whiskers extend to range of data; data values jittered to avoid overplotting',xlab='Scent', ylab='Time (seconds)')
Warning: some notches went outside hinges ('box'): maybe set notch=FALSE
stripchart(time ~ scent, data=scent, add=TRUE,vertical=TRUE, method="jitter", jitter=.1)
#check blocks (subject)
#design is incomplete but connected
xtabs(~subject+scent,data=scent[!is.na(scent$time),])
scent
subject plain banana apple skunk
a 1 1 1 1
b 1 0 1 1
c 1 1 1 1
d 1 1 1 1
e 1 1 0 1
f 1 1 1 0
g 1 0 1 1
h 1 1 1 1
i 0 1 1 1
j 1 1 1 1
k 1 1 1 1
l 1 1 1 1
m 1 1 1 1
n 1 1 1 1
o 1 0 1 1
p 1 0 1 1
#report summary statistics
library(doBy)
Loading required package: multcomp
Loading required package: mvtnorm
Loading required package: survival
Loading required package: splines
Loading required package: MASS
report<-summaryBy(time~scent,data=scent[!is.na(scent$time),],FUN=c(length,mean,sd))
report$time.mean.se<-report$time.sd/sqrt(report$time.length)
report
scent time.length time.mean time.sd time.mean.se
1 plain 15 139.1 22.80 5.888
2 banana 12 141.3 17.05 4.921
3 apple 15 134.7 26.33 6.799
4 skunk 15 161.1 22.50 5.809
#check interactions between block effects and treatment effects (data is relatively parallel)
plot(as.numeric(scent$scent),scent$time,type="n",main="Profile plot of completion time by scent",xlab='scent',ylab='time')
lapply(split(scent,scent$subject),function(x){lines((as.numeric(x$scent)),x$time,type='l')})
$a
NULL
$b
NULL
$c
NULL
$d
NULL
$e
NULL
$f
NULL
$g
NULL
$h
NULL
$i
NULL
$j
NULL
$k
NULL
$l
NULL
$m
NULL
$n
NULL
$o
NULL
$p
NULL
#set up linear model with blocks included; omit the NA times
model<-lm(time~subject+scent,data=scent[!is.na(scent$time),])
#type three ss anova (unbalanced design)--*intra block only
library(car)
Anova(model,type=3)
Anova Table (Type III tests)
Response: time
Sum Sq Df F value Pr(>F)
(Intercept) 46479 1 104.97 1.7e-12 ***
subject 10445 15 1.57 0.1286
scent 6535 3 4.92 0.0055 **
Residuals 16825 38
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
layout(matrix(1:4,2,2))
#check assumptions for linear model
plot(model)
layout(1)
#analyze for both intra and inter subject variation (subjects are random blocks)
library(lmerTest)
Loading required package: Matrix
Loading required package: lattice
Loading required package: lme4
Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':
lmer
The following object is masked from 'package:stats':
step
model2<-lmer(time~scent+(1|subject),data=scent[!is.na(scent$time),])
lmerTest::anova(model2)
Analysis of Variance Table of type 3 with Satterthwaite approximation for degrees of freedom
Df Sum Sq Mean Sq F value Denom Pr(>F)
scent 3 6271 2090 4.75 40.4 0.0062 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model2)
Linear mixed model fit by REML ['merModLmerTest']
Formula: time ~ scent + (1 | subject)
Data: scent[!is.na(scent$time), ]
REML criterion at convergence: 490.6
Random effects:
Groups Name Variance Std.Dev.
subject (Intercept) 74.1 8.61
Residual 439.7 20.97
Number of obs: 57, groups: subject, 16
Fixed effects:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 139.27 5.84 23.83 <2e-16 ***
scentbanana 2.52 8.21 0.31 0.7608
scentapple -5.06 7.69 -0.66 0.5144
scentskunk 22.04 7.69 2.87 0.0066 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) scntbn scntpp
scentbanana -0.617
scentapple -0.658 0.468
scentskunk -0.658 0.468 0.500
#check pairwise contrasts
library(lsmeans)
Loading required package: plyr
Attaching package: 'lsmeans'
The following object is masked from 'package:lmerTest':
lsmeans
lsmeans::lsmeans(model2,pairwise~scent,adjust='tukey')
Loading required package: pbkrtest
Loading required package: parallel
$`scent lsmeans`
scent lsmean SE df lower.CL upper.CL
plain 139.3 5.606 60.79 128.1 150.5
banana 141.8 6.294 58.49 129.2 154.4
apple 134.2 5.606 60.79 123.0 145.4
skunk 161.3 5.606 60.79 150.1 172.5
$`scent pairwise differences`
estimate SE df t.ratio p.value
plain - banana -2.515 8.227 40.57 -0.3057 0.98993
plain - apple 5.057 7.693 39.00 0.6573 0.91232
plain - skunk -22.037 7.693 39.00 -2.8646 0.03251
banana - apple 7.572 8.227 40.57 0.9204 0.79409
banana - skunk -19.522 8.227 40.57 -2.3730 0.09867
apple - skunk -27.094 7.693 39.00 -3.5219 0.00586
p values are adjusted using the tukey method for 4 means
#plot comparisons of means--*intra block only
result<-aov(time~subject+scent,data=scent[!is.na(scent$time),])
mcp<-TukeyHSD(result,which="scent",ordered=T)
mcp
Tukey multiple comparisons of means
95% family-wise confidence level
factor levels have been ordered
Fit: aov(formula = time ~ subject + scent, data = scent[!is.na(scent$time), ])
$scent
diff lwr upr p adj
plain-apple 6.244 -14.3971 26.89 0.8481
banana-apple 9.329 -12.5645 31.22 0.6646
skunk-apple 27.733 7.0918 48.37 0.0047
banana-plain 3.085 -18.8090 24.98 0.9813
skunk-plain 21.489 0.8473 42.13 0.0386
skunk-banana 18.404 -3.4895 40.30 0.1262
plot(mcp)
abline(v=o,lty=2)
#test comparisons for linear hypothesis and plot cld levels--*intra block only
library(multcomp)
result2<-glht(result,linfct=mcp(scent="Tukey"))
result2cld<-cld(result2)
summary(result2)
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: aov(formula = time ~ subject + scent, data = scent[!is.na(scent$time),
])
Linear Hypotheses:
Estimate Std. Error t value Pr(>|t|)
banana - plain == 0 3.04 8.41 0.36 0.984
apple - plain == 0 -6.39 7.77 -0.82 0.843
skunk - plain == 0 21.98 7.77 2.83 0.036 *
apple - banana == 0 -9.42 8.41 -1.12 0.679
skunk - banana == 0 18.94 8.41 2.25 0.128
skunk - apple == 0 28.36 7.77 3.65 0.004 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
result2cld
plain banana apple skunk
"a" "ab" "a" "b"
plot(result2cld,main="Multiple Comparison Results",xlab="variety",ylab="phosphor",mai=c(1,1,1.25,1),notch=T)
Warning: some notches went outside hinges ('box'): maybe set notch=FALSE