IMPORTANT! Run the chunk below to get correct Type III SS. Also loads necessary libraries.

# The following is required to calculate Type III SS when running ANOVA.
# See Note 0
# options("contrasts")
options(contrasts=c('contr.sum','contr.poly'))

# car for Anova().
library(tidyverse); library(car)
## Warning: package 'car' was built under R version 3.5.3

1. One sample T test, Week 5 lab

paper <- read.csv("data/paper.csv")[2]
#H0: mean <= 1600
#Ha: mean > 1600

t.test(paper, mu=1600, alternative="greater")
## 
##  One Sample t-test
## 
## data:  paper
## t = 4.5171, df = 29, p-value = 4.843e-05
## alternative hypothesis: true mean is greater than 1600
## 95 percent confidence interval:
##  1677.149      Inf
## sample estimates:
## mean of x 
##  1723.667

2. Two sample T test, HW5 #6.6

stream <- read.csv("data/stream.csv")
#Part A
#Equal variances will yield Pooled variance T test, whereas unequal will yield Satterthwaite.

#H0: dowmstream - upstream >= -.5
#Ha: downstream - upstream < -.5

t.test(stream$downstream, stream$upstream, 
       alternative="less", mu=-.5, conf.level=.01,
       var.equal=T)
## 
##  Two Sample t-test
## 
## data:  stream$downstream and stream$upstream
## t = -6.9625, df = 28, p-value = 7.166e-08
## alternative hypothesis: true difference in means is less than -0.5
## 1 percent confidence interval:
##       -Inf -1.402898
## sample estimates:
## mean of x mean of y 
##  3.740000  4.906667
#Part B
shapiro.test(stream$downstream)
## 
##  Shapiro-Wilk normality test
## 
## data:  stream$downstream
## W = 0.95141, p-value = 0.5469
shapiro.test(stream$upstream)
## 
##  Shapiro-Wilk normality test
## 
## data:  stream$upstream
## W = 0.94347, p-value = 0.428
layout(mat = matrix(c(1,2), 2, 1),  height = c(3,1)) #matrix(data, nrow, ncol)
par(mar=c(0, 3, 2, 1)) #margins for each plot
hist(stream$downstream, xlim=c(3,5), col="pink", main="Distribution of Downstream")
boxplot(stream$downstream, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(3,5), axes=F)

qqnorm(stream$downstream)
qqline(stream$downstream)

layout(mat = matrix(c(1,2), 2, 1),  height = c(3,1))

par(mar=c(0, 3, 2, 1))
hist(stream$upstream, xlim=c(4,6), col="pink", main="Distribution of Upstream")
boxplot(stream$upstream, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(4,6), axes=F)

qqnorm(stream$upstream)
qqline(stream$upstream)

3. Paired T test, HW5 #6.28

repairpersons <- read.csv("data/ex6-28.txt")
#Part A
#H0: After - Before >= 0
#Ha: After - Before < 0

t.test(repairpersons$X.After., repairpersons$X.Before., 
       alternative="less", paired=T)
## 
##  Paired t-test
## 
## data:  repairpersons$X.After. and repairpersons$X.Before.
## t = -0.86098, df = 9, p-value = 0.2058
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##      -Inf 2.917599
## sample estimates:
## mean of the differences 
##                  -2.584
#Part B
#95% Confidence interval

t.test(repairpersons$X.After., repairpersons$X.Before., paired=T)
## 
##  Paired t-test
## 
## data:  repairpersons$X.After. and repairpersons$X.Before.
## t = -0.86098, df = 9, p-value = 0.4116
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.373261  4.205261
## sample estimates:
## mean of the differences 
##                  -2.584
#Part C
repairpersons <- mutate(repairpersons, diff = X.After. - X.Before.)
shapiro.test(repairpersons$diff)
## 
##  Shapiro-Wilk normality test
## 
## data:  repairpersons$diff
## W = 0.9398, p-value = 0.5508
layout(mat = matrix(c(1,2), 2, 1),  height = c(3,1))
par(mar=c(0, 3, 2, 1))
hist(repairpersons$diff, xlim=c(-17,19), col="pink", main="Distribution of After-Before")
boxplot(repairpersons$diff, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(-17,19), axes=F)

qqnorm(repairpersons$diff)
qqline(repairpersons$diff)

4. Wilcoxon Rank Sum / Mann Whitney U non-parametric test for 2 independent means, Week 7 lab

See Note 4 for processing hairy strings

golf <- read.csv("data/golf.csv")
#The base fn wilcox.test cannot handle ties, so another package is necessary.
library(exactRankTests)
## Warning: package 'exactRankTests' was built under R version 3.5.3
##  Package 'exactRankTests' is no longer under development.
##  Please consider using package 'coin' instead.
#H0: medians/distributions are the same
#Ha: medians/distributions are different
wilcox.exact(score ~ gender, data=golf)
## 
##  Exact Wilcoxon rank sum test
## 
## data:  score by gender
## W = 3, p-value = 0.003497
## alternative hypothesis: true mu is not equal to 0

5. Wilcoxon Signed Rank np test for paired continuous data, Week 7 lab

mining <- read.csv("data/mining.csv")
wilcox.exact(mining$after, mining$before, paired=T)
## 
##  Exact Wilcoxon signed rank test
## 
## data:  mining$after and mining$before
## V = 89, p-value = 0.0007324
## alternative hypothesis: true mu is not equal to 0
#The value V = 89 corresponds to the sum of ranks assigned to the differences with positive sign. See Note 5

6. One Way ANOVA, Week 9 lab

See Note 6 for references

oil <- read.csv("data/oil.csv")

Using car package

library(car) #easier to calculate type 3 SS using this package than base R

Anova(lm(visc ~ type, data=oil), type=3)
## Anova Table (Type III tests)
## 
## Response: visc
##             Sum Sq Df  F value    Pr(>F)    
## (Intercept)  54140  1 255.8093 7.329e-13 ***
## type           231  2   0.5454     0.588    
## Residuals     4233 20                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#HOV Levene test. SAS does Levene's a different way, see Note 6.
leveneTest(visc ~ type, data=oil)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  2  5.5299 0.01225 *
##       20                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7. Kruskal Wallis np 1 way test, Week 9 lab

kruskal.test(visc ~ type, data = oil) 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  visc by type
## Kruskal-Wallis chi-squared = 1.2054, df = 2, p-value = 0.5473

8. Chi Square test for independence, week 9 lab

de <- read.csv("data/de.csv")
#Chi-square function requires data to be in a table
tbl <- table(de$exposure, de$disease)

#Chi Sq Test for Independence
chisq.test(tbl, correct=F) #Default of correct=T yields Continuity Adj Chi Sq
## Warning in chisq.test(tbl, correct = F): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  tbl
## X-squared = 9.1146, df = 1, p-value = 0.002536
#Contingency Table
library(gmodels)
CrossTable(de$exposure, de$disease)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  28 
## 
##  
##              | de$disease 
##  de$exposure |  negative |  Positive | Row Total | 
## -------------|-----------|-----------|-----------|
##      exposed |         5 |        15 |        20 | 
##              |     1.488 |     1.116 |           | 
##              |     0.250 |     0.750 |     0.714 | 
##              |     0.417 |     0.938 |           | 
##              |     0.179 |     0.536 |           | 
## -------------|-----------|-----------|-----------|
##   notExposed |         7 |         1 |         8 | 
##              |     3.720 |     2.790 |           | 
##              |     0.875 |     0.125 |     0.286 | 
##              |     0.583 |     0.062 |           | 
##              |     0.250 |     0.036 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |        12 |        16 |        28 | 
##              |     0.429 |     0.571 |           | 
## -------------|-----------|-----------|-----------|
## 
## 
#Fisher Exact Test
fisher.test(de$exposure, de$disease)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  de$exposure and de$disease
## p-value = 0.004236
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0009998694 0.5759906625
## sample estimates:
## odds ratio 
## 0.05433825

9. Chi Square goodness of fit test, Week 10 lab

gator <- read.csv("data/gator.csv")

#data must be summarized for chisq test
gator_count <- gator %>% 
  group_by(type) %>% 
  summarise(count = n())
#chisq.test(x = observed, p = expected)
#F I O
chisq.test(x=gator_count$count, p=c(1/2, 3/10, 1/5))
## 
##  Chi-squared test for given probabilities
## 
## data:  gator_count$count
## X-squared = 0.42938, df = 2, p-value = 0.8068

10. Linear Contrasts, Week 3 lab (first lab)

See Note 10 for reference

scores <- read.csv("data/scores.csv")

#The order of the contrast weights maps to the order of the levels. Here, I set the order to be alphabetical.
levels(scores$method) <- c("computer", "control", "instructor", "piano")
#Calculate inverse matrix of contrasts
c1 <- c(1, 0, 0, -1) #computer vs piano
c2 <- c(0, 1, -1, 0) #control vs instructor
c3 <- c(-1, 3, -1,-1) #control vs all

mat_temp <- rbind(constant=1/4, c1, c2, c3)
mat <- solve(mat_temp)

mat <- mat[, -1]

#Calculate contrasts.
model <- lm(score ~ method, data=scores, contrasts=list(method = mat))
summary(model)
## 
## Call:
## lm(formula = score ~ method, data = scores, contrasts = list(method = mat))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.300 -1.971 -0.028  2.352  7.912 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.9610     0.3150  12.576  < 2e-16 ***
## methodc1      0.9720     0.8908   1.091    0.278    
## methodc2     -7.5120     0.8908  -8.433 3.42e-13 ***
## methodc3    -16.2920     2.1821  -7.466 3.76e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.15 on 96 degrees of freedom
## Multiple R-squared:  0.433,  Adjusted R-squared:  0.4153 
## F-statistic: 24.44 on 3 and 96 DF,  p-value: 7.807e-12

11. Tukey, Week 3 lab (first lab, same as above)

Different package used, see Note 11

library(TukeyC)
## Warning: package 'TukeyC' was built under R version 3.5.3
## Loading required package: doBy
## Warning: package 'doBy' was built under R version 3.5.3
#Tukey won't take lm or Anova, only aov.
fit <- aov(score ~ method, data=scores)

#This package matches SAS output. TukeyC(x, which)
tukes <- TukeyC(fit, "method")
tukes
## Results
##            Means G1 G2 G3
## instructor  7.40  a      
## computer    4.76     b   
## piano       3.79     b   
## control    -0.11        c
## 
## Sig.level
##  0.05
## 
## Diff_Prob
##            instructor computer piano control
## instructor      0.000    2.636 3.608   7.512
## computer        0.020    0.000 0.972   4.876
## piano           0.001    0.696 0.000   3.904
## control         0.000    0.000 0.000   0.000
## 
## MSD
##            instructor computer piano control
## instructor      0.000    2.329 2.329   2.329
## computer        2.329    0.000 2.329   2.329
## piano           2.329    2.329 0.000   2.329
## control         2.329    2.329 2.329   0.000
plot(tukes)

12. Two Way ANOVA, Week 4 lab (second lab)

See Note 12

soil <- read.csv("data/soil.csv")

soil$calcium <- as.factor(soil$calcium)
soil$ph <- as.factor(soil$ph)

Using car package

#Two way Anova
soil_fit <- lm(diameter ~ calcium * ph, data=soil)
Anova(soil_fit, type=3)
## Anova Table (Type III tests)
## 
## Response: diameter
##              Sum Sq Df    F value    Pr(>F)    
## (Intercept) 1790.70  1 26420.1680 < 2.2e-16 ***
## calcium        1.47  2    10.8238 0.0004462 ***
## ph             4.46  3    21.9385 4.635e-07 ***
## calcium:ph     3.26  6     8.0041 8.186e-05 ***
## Residuals      1.63 24                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residuals test for normality
shapiro.test(soil_fit$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  soil_fit$residuals
## W = 0.98385, p-value = 0.8659
layout(mat = matrix(c(1,2), 2, 1),  height = c(3,1))
par(mar=c(0, 3, 2, 1))
hist(soil_fit$residuals, xlim=c(-1,1), col="pink", main="Distribution of Residuals")
boxplot(soil_fit$residuals, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(-1,1), axes=F)

qqnorm(soil_fit$residuals)
qqline(soil_fit$residuals)

Another example from HW4 #14-8

#Another example from HW4 #14-8
attention <- read.csv("data/attention.csv")

#Two way anova
Anova(lm(span ~ age * product, data=attention), type=3)
## Anova Table (Type III tests)
## 
## Response: span
##             Sum Sq Df  F value    Pr(>F)    
## (Intercept)  44608  1 303.2284 < 2.2e-16 ***
## age           1303  2   4.4287 0.0165575 *  
## product       2018  1  13.7202 0.0005001 ***
## age:product   1384  2   4.7049 0.0130735 *  
## Residuals     7944 54                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

13. Completely Randomized Block Design, HW3 #15-6

music <- read.csv("data/music.csv")

music$subject <- as.factor(music$subject)
Anova(lm(typing_efficiency ~ type_music + subject, data=music), type=3)
## Anova Table (Type III tests)
## 
## Response: typing_efficiency
##             Sum Sq Df   F value    Pr(>F)    
## (Intercept) 9557.3  1 4041.0201 < 2.2e-16 ***
## type_music    31.0  2    6.5436 0.0119776 *  
## subject      149.3  6   10.5235 0.0003462 ***
## Residuals     28.4 12                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

14. Latin Square Design (LSD), HW3 #15-40

lights <- read.csv("data/lights.csv")

lights$intersection <- as.factor(lights$intersection)
lights$period <- as.factor(lights$period)
Anova(lm(minutes ~ signaling + intersection + period, data=lights), type=3)
## Anova Table (Type III tests)
## 
## Response: minutes
##               Sum Sq Df   F value    Pr(>F)    
## (Intercept)  13372.6  1 2289.3339 4.545e-15 ***
## signaling       76.3  4    3.2648   0.04976 *  
## intersection    18.2  4    0.7801   0.55927    
## period        1091.7  4   46.7222 3.202e-07 ***
## Residuals       70.1 12                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Bonus tukey
fit <- aov(minutes ~ signaling + intersection + period, data=lights)
tukes <- TukeyC(fit, "signaling")
tukes
## Results
##     Means G1 G2
## 'B' 24.88  a   
## 'E' 24.14  a  b
## 'A' 24.12  a  b
## 'D' 22.50  a  b
## 'C' 20.00     b
## 
## Sig.level
##  0.05
## 
## Diff_Prob
##       'B'   'E'   'A'   'D'  'C'
## 'B' 0.000 0.740 0.760 2.380 4.88
## 'E' 0.987 0.000 0.020 1.640 4.14
## 'A' 0.986 1.000 0.000 1.620 4.12
## 'D' 0.549 0.817 0.823 0.000 2.50
## 'C' 0.050 0.111 0.113 0.504 0.00
## 
## MSD
##       'B'   'E'   'A'   'D'   'C'
## 'B' 0.000 4.872 4.872 4.872 4.872
## 'E' 4.872 0.000 4.872 4.872 4.872
## 'A' 4.872 4.872 0.000 4.872 4.872
## 'D' 4.872 4.872 4.872 0.000 4.872
## 'C' 4.872 4.872 4.872 4.872 0.000
plot(tukes)

15. ANCOVA, Week 5 lab

oysters <- read.csv("data/oysters.csv")

oysters$treatment <- as.factor(oysters$treatment)
oysters$replication <- as.factor(oysters$replication)
#ANCOVA
Anova(lm(final_weight ~ initial_weight + treatment, data=oysters), type=3)
## Anova Table (Type III tests)
## 
## Response: final_weight
##                 Sum Sq Df  F value    Pr(>F)    
## (Intercept)      1.718  1   5.6959 0.0316708 *  
## initial_weight 156.040  1 517.3840 1.867e-12 ***
## treatment       12.089  4  10.0212 0.0004819 ***
## Residuals        4.222 14                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Test for heteroskedasticity (interaction term)
Anova(lm(final_weight ~ initial_weight * treatment, data=oysters), type=3)
## Anova Table (Type III tests)
## 
## Response: final_weight
##                          Sum Sq Df  F value    Pr(>F)    
## (Intercept)               0.064  1   0.2275    0.6436    
## initial_weight           68.529  1 241.8091 2.472e-08 ***
## treatment                 1.696  4   1.4963    0.2752    
## initial_weight:treatment  1.388  4   1.2247    0.3602    
## Residuals                 2.834 10                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

16. Random Effect and Mixed Effect Models, HW5 #17-10

See Note 16 for all the testing

pesticide <- read.csv("data/pesticide.csv")

pesticide$location <- as.factor(pesticide$location)
pesticide$chemical <- as.factor(pesticide$chemical)
pest_fit <- lm(number_killed ~ location * chemical, data=pesticide)
Anova(pest_fit, type=3)
## Anova Table (Type III tests)
## 
## Response: number_killed
##                    Sum Sq Df   F value    Pr(>F)    
## (Intercept)       1686.10  1 4869.6094 < 2.2e-16 ***
## location             3.81  4    2.7520  0.056700 .  
## chemical           180.13  3  173.4130 1.759e-14 ***
## location:chemical   16.16 12    3.8889  0.003652 ** 
## Residuals            6.92 20                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# When you have random effect, divide the individual effects by interaction term.
# Chemical is random and interacts with Location, so calculating the F-stat for both involves dividing by mean square of the interaction, not the error.

# F-value for Location = ms_location/ms_location:chemical
(3.81/4)/(16.16/12) 
## [1] 0.707302
pf((3.81/4)/(16.16/12), 4, 12, lower.tail=F)
## [1] 0.6022125
# F-value for Chemical = ms_chemical/ms_location:chemical
(180.13/3)/(16.16/12)
## [1] 44.58663
pf((180.13/3)/(16.16/12), 3, 12, lower.tail=F)
## [1] 8.802727e-07

Varcomp, see Note 16 for reference.

library(VCA)
## Warning: package 'VCA' was built under R version 3.5.3
#Look at VC column in fit$aov.tab
fit <- remlMM(number_killed~(location)*chemical, pesticide, cov=T)
print(fit)
## 
## 
## REML-Estimation of Mixed Model:
## -------------------------------
## 
##  [Fixed Effects]
## 
##       int chemical1 chemical2 chemical3 chemical4 
##      5.71      2.85     -2.34      2.62      0.00 
## 
## 
##  [Variance Components]
## 
##   Name              DF        VC       %Total    SD       CV[%]    
## 1 total             24.594337 0.797191 100       0.892856 13.752109
## 2 location          0         0        2.1e-05   0.00041  0.006316 
## 3 location:chemical 5.989053  0.450943 56.566506 0.671523 10.343059
## 4 error             20        0.346248 43.433473 0.588428 9.063201 
##   Var(VC) 
## 1 0.05168 
## 2 0.016228
## 3 0.067907
## 4 0.011989
## 
## Mean: 6.4925 (N = 40) 
## 
## Experimental Design: balanced  |  Method: REML

17. Nested Factors, Week 8 lab #1

drug <- read.csv("data/drug.csv")

drug$site <- as.factor(drug$site)
drug$batch <- as.factor(drug$batch)
drug$tablet <- as.factor(drug$tablet)
# Specifying nested term: site/batch == batch(site) in SAS
drug_fit <- Anova(lm(y ~ site + site/batch, data=drug), type=3)
drug_fit
## Anova Table (Type III tests)
## 
## Response: y
##             Sum Sq Df    F value    Pr(>F)    
## (Intercept) 763.06  1 63105.9683 < 2.2e-16 ***
## site          0.02  1     1.5096 0.2311153    
## site:batch    0.45  4     9.3869 0.0001028 ***
## Residuals     0.29 24                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Batch is a random effect
# F-stat for site = MS_site / MS_site:batch
# Anova() rounds
0.02/(0.45/4)
## [1] 0.1777778
# But if you grab from fit object, you can get exact answers
x <- (drug_fit$`Sum Sq`[2]/drug_fit$Df[2])/(drug_fit$`Sum Sq`[3]/drug_fit$Df[3])
x
## [1] 0.1608176
pf(x, 1, 4, lower.tail=F)
## [1] 0.7089034

Week 8 lab, #2

engi <- read.csv("data/engi.csv")

engi$device <- as.factor(engi$device)
engi$rep <- as.factor(engi$rep)
engi$facility <- as.factor(engi$facility)
engi$tester <- as.factor(engi$tester)
# Tester is nested within facility
engi_fit <- Anova(lm(y ~ facility + device + facility:device + facility/tester, data=engi), type=3)
engi_fit
## Anova Table (Type III tests)
## 
## Response: y
##                  Sum Sq Df   F value    Pr(>F)    
## (Intercept)     269.327  1 5191.8434 < 2.2e-16 ***
## facility          0.935  1   18.0281 0.0001462 ***
## device            0.009  2    0.0843 0.9193022    
## facility:device   0.225  2    2.1727 0.1285749    
## facility:tester   1.106  6    3.5542 0.0072376 ** 
## Residuals         1.867 36                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Finding F-stat and p-value for Facility, which has random effect Tester nested within it.
# Approx
(0.935)/(1.106/6)
## [1] 5.072333
pf((0.935)/(1.106/6),1,6,lower.tail=F)
## [1] 0.06524507
# Exact
x <- engi_fit$`Sum Sq`[2]/(engi_fit$`Sum Sq`[5]/engi_fit$Df[5])
x
## [1] 5.072316
pf(x, 1, 6, lower.tail=F)
## [1] 0.0652454

18. Crossover Design, Week 9 lab

bp <- read.csv("data/bp.csv")
bp$sequence<- as.factor(bp$sequence)
bp$patient<- as.factor(bp$patient)
bp$period<- as.factor(bp$period)
# Since interaction is insignificant, rerun model without it.
bp_fit <- Anova(lm(y ~ sequence + treatment + period + sequence/patient, data=bp), type=3)
bp_fit
## Anova Table (Type III tests)
## 
## Response: y
##                   Sum Sq Df   F value    Pr(>F)    
## (Intercept)      224.500  1 1655.4711 < 2.2e-16 ***
## sequence           0.234  2    0.8624    0.4373    
## treatment          9.517  2   35.0901 2.879e-07 ***
## period             0.017  2    0.0635    0.9387    
## sequence:patient   0.669  9    0.5483    0.8221    
## Residuals          2.712 20                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Patient is nested within Sequence, and thus is considered a random effect.
# F-stat for Sequence = MS_seq / MS_seq:pat
(bp_fit$`Sum Sq`[2]/bp_fit$Df[2])/(bp_fit$`Sum Sq`[5]/bp_fit$Df[5])
## [1] 1.572852
pf((bp_fit$`Sum Sq`[2]/bp_fit$Df[2])/(bp_fit$`Sum Sq`[5]/bp_fit$Df[5]), 2,9,lower.tail = F)
## [1] 0.259531

Note 4

#Use gsub for messier strings. Here it's unnecessary.
#golf <- read.table(text=gsub("(?<=[a-z])\\s+", " ", golf_text, perl=TRUE), header=FALSE, col.names = c("gender", "score"))

Note 5

#Calculation of V:
diff <- mining$after - mining$before
diff <- diff[diff!=0] #delete all differences = 0, as per Signed Rank procedure
diff.rank <- rank(abs(diff))
diff.rank.sign <- diff.rank * sign(diff[])
ranks.pos <- sum(diff.rank.sign[diff.rank.sign > 0]) 
ranks.neg <- -sum(diff.rank.sign[diff.rank.sign < 0]) 

Note 6

Referenced used for ANOVA: https://www.r-bloggers.com/anova-%E2%80%93-type-iiiiii-ss-explained/

#SAS calculates Leven using Squared deviations. R and it's packages seem to do Absolute instead.
#The following replicates what SAS does.
#https://stackoverflow.com/questions/22396543/levenes-test-using-the-squared-residuals

oil.lm <- lm(visc ~ type, data = oil)
anova(lm(residuals(oil.lm)^2 ~ oil$type))

#HOV Levene using absolute deviations
leveneTest(visc ~ type, data=oil)

Using ezANOVA package

library(ez)

#ezANOVA requires an ID column, wid. This is so that it can handle repeated measures.
oil$id <- as.factor(c(1:length(oil$type)))


av <- ezANOVA(data=oil, dv=visc, wid=id ,between=type, detail=T, type=3, white.adjust=T)
print(av)

ANT example

data(ANT)
b_anova_full <- ezANOVA(data=ANT,
                        dv=rt,
                        wid=subnum,
                        between=group,
                        within_full=.(cue, flank),
                        type=3,
                        detailed=T,
                        return_aov=T)
print(b_anova_full)

Note 11

#Base R uses TukeyHSD, which yields less elegant output.
tukes <- TukeyHSD(fit, "method")
tukes
plot(tukes)

Note 12

Using ezANOVA package

soil$id <- as.factor(c(1:length(soil$diameter))) #ezANOVA requires an id column
soil_anova <- ezANOVA(data=soil, dv=diameter, wid=id, between=.(calcium, ph), type=3, detailed=T, return_aov=T)

resids <- resid(soil_anova$aov) #bc we used return_aov=T, we can call the base aov() and output the residuals

shapiro.test(resids)

layout(mat = matrix(c(1,2), 2, 1),  height = c(3,1))
par(mar=c(0, 3, 2, 1))
hist(resids, xlim=c(-1,1), col="pink", main="Distribution of Residuals")
boxplot(resids, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(-1,1), axes=F)

qqnorm(resids)
qqline(resids)

Note 16

reference: https://mcfromnz.wordpress.com/2011/03/02/anova-type-iiiiii-ss-explained/ Varcomp reference: https://stats.stackexchange.com/questions/161225/estimates-of-the-variance-of-the-variance-component-of-a-mixed-effects-model Testing on paint

paint <- read.csv("data/CH17/ex17-1.txt")
colnames(paint) <- c("batch", "percentage")

Testing on pesticide

#Both of these attempts have chemical F = 48.11, but answer should be 173.41 if divided by ms_error, or 44.49 if divided by ms_interaction

#attempt 1 with interaction, wrong
library(afex)
mixed(number_killed ~ chemical + (1|location) + (1|location:chemical), data=pesticide)

#with no interaction, CORRECT, but that means interaction is wrong...
model2 <- lmer_alt(number_killed ~ chemical + (1|location), data=pesticide)
anova(model2)

#attempt 2 with interaction, wrong
model3 <- lmer_alt(number_killed ~ chemical + (1|location) + (1|chemical:location), data=pesticide)
summary(model3)

Working solution (but answer is type 2)

#Make F table manually
fit <- lm(number_killed ~ location * chemical, data=pesticide)
results <- Anova(fit) #this is the anova table. The following replaces F tests with MS_main/MS_interaction
Df <- results$Df
SumSq <- results$"Sum Sq"
MeanSq <- SumSq/results$Df
Fvalue <- results$"F value"
Pvalue <- results$"Pr(>F)"
Error.Term <- MeanSq[3]
df.error <- Df[3]

Fvalue[1] <- MeanSq[1]/Error.Term
Pvalue[1] <- 1 - pf(Fvalue[1], Df[1], df.error)

Fvalue[2] <- MeanSq[2]/Error.Term
Pvalue[2] <- 1 - pf(Fvalue[2], Df[2], df.error)

Ftable <- cbind(Df, SumSq, MeanSq, Fvalue, Pvalue)
rownames(Ftable) <- c("Locations", "Chemicals", "Locations:Chemicals", "Residuals")
print(Ftable)

Misc things

library(lmerTest) 

coef(fit)


#other attempts
model <- lmerTest::lmer(number_killed ~ chemical + (1|location) + (1|location:chemical), data=pesticide)
rand(model)

linearHypothesis(model, "chemical=0") #doesnt work

Note 18

For carryover, unused and unfinished

carryover <- trt_str
carryover$period <- as.factor(as.numeric(trt_str$period) + 1)

Regarding multicolinearity: https://stats.stackexchange.com/questions/112442/what-are-aliased-coefficients