# The following is required to calculate Type III SS when running ANOVA.
# See Note 0
# options("contrasts")
options(contrasts=c('contr.sum','contr.poly'))
# car for Anova().
library(tidyverse); library(car)
## Warning: package 'car' was built under R version 3.5.3
paper <- read.csv("data/paper.csv")[2]
#H0: mean <= 1600
#Ha: mean > 1600
t.test(paper, mu=1600, alternative="greater")
##
## One Sample t-test
##
## data: paper
## t = 4.5171, df = 29, p-value = 4.843e-05
## alternative hypothesis: true mean is greater than 1600
## 95 percent confidence interval:
## 1677.149 Inf
## sample estimates:
## mean of x
## 1723.667
stream <- read.csv("data/stream.csv")
#Part A
#Equal variances will yield Pooled variance T test, whereas unequal will yield Satterthwaite.
#H0: dowmstream - upstream >= -.5
#Ha: downstream - upstream < -.5
t.test(stream$downstream, stream$upstream,
alternative="less", mu=-.5, conf.level=.01,
var.equal=T)
##
## Two Sample t-test
##
## data: stream$downstream and stream$upstream
## t = -6.9625, df = 28, p-value = 7.166e-08
## alternative hypothesis: true difference in means is less than -0.5
## 1 percent confidence interval:
## -Inf -1.402898
## sample estimates:
## mean of x mean of y
## 3.740000 4.906667
#Part B
shapiro.test(stream$downstream)
##
## Shapiro-Wilk normality test
##
## data: stream$downstream
## W = 0.95141, p-value = 0.5469
shapiro.test(stream$upstream)
##
## Shapiro-Wilk normality test
##
## data: stream$upstream
## W = 0.94347, p-value = 0.428
layout(mat = matrix(c(1,2), 2, 1), height = c(3,1)) #matrix(data, nrow, ncol)
par(mar=c(0, 3, 2, 1)) #margins for each plot
hist(stream$downstream, xlim=c(3,5), col="pink", main="Distribution of Downstream")
boxplot(stream$downstream, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(3,5), axes=F)
qqnorm(stream$downstream)
qqline(stream$downstream)
layout(mat = matrix(c(1,2), 2, 1), height = c(3,1))
par(mar=c(0, 3, 2, 1))
hist(stream$upstream, xlim=c(4,6), col="pink", main="Distribution of Upstream")
boxplot(stream$upstream, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(4,6), axes=F)
qqnorm(stream$upstream)
qqline(stream$upstream)
repairpersons <- read.csv("data/ex6-28.txt")
#Part A
#H0: After - Before >= 0
#Ha: After - Before < 0
t.test(repairpersons$X.After., repairpersons$X.Before.,
alternative="less", paired=T)
##
## Paired t-test
##
## data: repairpersons$X.After. and repairpersons$X.Before.
## t = -0.86098, df = 9, p-value = 0.2058
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 2.917599
## sample estimates:
## mean of the differences
## -2.584
#Part B
#95% Confidence interval
t.test(repairpersons$X.After., repairpersons$X.Before., paired=T)
##
## Paired t-test
##
## data: repairpersons$X.After. and repairpersons$X.Before.
## t = -0.86098, df = 9, p-value = 0.4116
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.373261 4.205261
## sample estimates:
## mean of the differences
## -2.584
#Part C
repairpersons <- mutate(repairpersons, diff = X.After. - X.Before.)
shapiro.test(repairpersons$diff)
##
## Shapiro-Wilk normality test
##
## data: repairpersons$diff
## W = 0.9398, p-value = 0.5508
layout(mat = matrix(c(1,2), 2, 1), height = c(3,1))
par(mar=c(0, 3, 2, 1))
hist(repairpersons$diff, xlim=c(-17,19), col="pink", main="Distribution of After-Before")
boxplot(repairpersons$diff, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(-17,19), axes=F)
qqnorm(repairpersons$diff)
qqline(repairpersons$diff)
See Note 4 for processing hairy strings
golf <- read.csv("data/golf.csv")
#The base fn wilcox.test cannot handle ties, so another package is necessary.
library(exactRankTests)
## Warning: package 'exactRankTests' was built under R version 3.5.3
## Package 'exactRankTests' is no longer under development.
## Please consider using package 'coin' instead.
#H0: medians/distributions are the same
#Ha: medians/distributions are different
wilcox.exact(score ~ gender, data=golf)
##
## Exact Wilcoxon rank sum test
##
## data: score by gender
## W = 3, p-value = 0.003497
## alternative hypothesis: true mu is not equal to 0
mining <- read.csv("data/mining.csv")
wilcox.exact(mining$after, mining$before, paired=T)
##
## Exact Wilcoxon signed rank test
##
## data: mining$after and mining$before
## V = 89, p-value = 0.0007324
## alternative hypothesis: true mu is not equal to 0
#The value V = 89 corresponds to the sum of ranks assigned to the differences with positive sign. See Note 5
See Note 6 for references
oil <- read.csv("data/oil.csv")
Using car package
library(car) #easier to calculate type 3 SS using this package than base R
Anova(lm(visc ~ type, data=oil), type=3)
## Anova Table (Type III tests)
##
## Response: visc
## Sum Sq Df F value Pr(>F)
## (Intercept) 54140 1 255.8093 7.329e-13 ***
## type 231 2 0.5454 0.588
## Residuals 4233 20
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#HOV Levene test. SAS does Levene's a different way, see Note 6.
leveneTest(visc ~ type, data=oil)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 5.5299 0.01225 *
## 20
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
kruskal.test(visc ~ type, data = oil)
##
## Kruskal-Wallis rank sum test
##
## data: visc by type
## Kruskal-Wallis chi-squared = 1.2054, df = 2, p-value = 0.5473
de <- read.csv("data/de.csv")
#Chi-square function requires data to be in a table
tbl <- table(de$exposure, de$disease)
#Chi Sq Test for Independence
chisq.test(tbl, correct=F) #Default of correct=T yields Continuity Adj Chi Sq
## Warning in chisq.test(tbl, correct = F): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: tbl
## X-squared = 9.1146, df = 1, p-value = 0.002536
#Contingency Table
library(gmodels)
CrossTable(de$exposure, de$disease)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 28
##
##
## | de$disease
## de$exposure | negative | Positive | Row Total |
## -------------|-----------|-----------|-----------|
## exposed | 5 | 15 | 20 |
## | 1.488 | 1.116 | |
## | 0.250 | 0.750 | 0.714 |
## | 0.417 | 0.938 | |
## | 0.179 | 0.536 | |
## -------------|-----------|-----------|-----------|
## notExposed | 7 | 1 | 8 |
## | 3.720 | 2.790 | |
## | 0.875 | 0.125 | 0.286 |
## | 0.583 | 0.062 | |
## | 0.250 | 0.036 | |
## -------------|-----------|-----------|-----------|
## Column Total | 12 | 16 | 28 |
## | 0.429 | 0.571 | |
## -------------|-----------|-----------|-----------|
##
##
#Fisher Exact Test
fisher.test(de$exposure, de$disease)
##
## Fisher's Exact Test for Count Data
##
## data: de$exposure and de$disease
## p-value = 0.004236
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.0009998694 0.5759906625
## sample estimates:
## odds ratio
## 0.05433825
gator <- read.csv("data/gator.csv")
#data must be summarized for chisq test
gator_count <- gator %>%
group_by(type) %>%
summarise(count = n())
#chisq.test(x = observed, p = expected)
#F I O
chisq.test(x=gator_count$count, p=c(1/2, 3/10, 1/5))
##
## Chi-squared test for given probabilities
##
## data: gator_count$count
## X-squared = 0.42938, df = 2, p-value = 0.8068
See Note 10 for reference
scores <- read.csv("data/scores.csv")
#The order of the contrast weights maps to the order of the levels. Here, I set the order to be alphabetical.
levels(scores$method) <- c("computer", "control", "instructor", "piano")
#Calculate inverse matrix of contrasts
c1 <- c(1, 0, 0, -1) #computer vs piano
c2 <- c(0, 1, -1, 0) #control vs instructor
c3 <- c(-1, 3, -1,-1) #control vs all
mat_temp <- rbind(constant=1/4, c1, c2, c3)
mat <- solve(mat_temp)
mat <- mat[, -1]
#Calculate contrasts.
model <- lm(score ~ method, data=scores, contrasts=list(method = mat))
summary(model)
##
## Call:
## lm(formula = score ~ method, data = scores, contrasts = list(method = mat))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.300 -1.971 -0.028 2.352 7.912
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.9610 0.3150 12.576 < 2e-16 ***
## methodc1 0.9720 0.8908 1.091 0.278
## methodc2 -7.5120 0.8908 -8.433 3.42e-13 ***
## methodc3 -16.2920 2.1821 -7.466 3.76e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.15 on 96 degrees of freedom
## Multiple R-squared: 0.433, Adjusted R-squared: 0.4153
## F-statistic: 24.44 on 3 and 96 DF, p-value: 7.807e-12
Different package used, see Note 11
library(TukeyC)
## Warning: package 'TukeyC' was built under R version 3.5.3
## Loading required package: doBy
## Warning: package 'doBy' was built under R version 3.5.3
#Tukey won't take lm or Anova, only aov.
fit <- aov(score ~ method, data=scores)
#This package matches SAS output. TukeyC(x, which)
tukes <- TukeyC(fit, "method")
tukes
## Results
## Means G1 G2 G3
## instructor 7.40 a
## computer 4.76 b
## piano 3.79 b
## control -0.11 c
##
## Sig.level
## 0.05
##
## Diff_Prob
## instructor computer piano control
## instructor 0.000 2.636 3.608 7.512
## computer 0.020 0.000 0.972 4.876
## piano 0.001 0.696 0.000 3.904
## control 0.000 0.000 0.000 0.000
##
## MSD
## instructor computer piano control
## instructor 0.000 2.329 2.329 2.329
## computer 2.329 0.000 2.329 2.329
## piano 2.329 2.329 0.000 2.329
## control 2.329 2.329 2.329 0.000
plot(tukes)
See Note 12
soil <- read.csv("data/soil.csv")
soil$calcium <- as.factor(soil$calcium)
soil$ph <- as.factor(soil$ph)
Using car package
#Two way Anova
soil_fit <- lm(diameter ~ calcium * ph, data=soil)
Anova(soil_fit, type=3)
## Anova Table (Type III tests)
##
## Response: diameter
## Sum Sq Df F value Pr(>F)
## (Intercept) 1790.70 1 26420.1680 < 2.2e-16 ***
## calcium 1.47 2 10.8238 0.0004462 ***
## ph 4.46 3 21.9385 4.635e-07 ***
## calcium:ph 3.26 6 8.0041 8.186e-05 ***
## Residuals 1.63 24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residuals test for normality
shapiro.test(soil_fit$residuals)
##
## Shapiro-Wilk normality test
##
## data: soil_fit$residuals
## W = 0.98385, p-value = 0.8659
layout(mat = matrix(c(1,2), 2, 1), height = c(3,1))
par(mar=c(0, 3, 2, 1))
hist(soil_fit$residuals, xlim=c(-1,1), col="pink", main="Distribution of Residuals")
boxplot(soil_fit$residuals, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(-1,1), axes=F)
qqnorm(soil_fit$residuals)
qqline(soil_fit$residuals)
Another example from HW4 #14-8
#Another example from HW4 #14-8
attention <- read.csv("data/attention.csv")
#Two way anova
Anova(lm(span ~ age * product, data=attention), type=3)
## Anova Table (Type III tests)
##
## Response: span
## Sum Sq Df F value Pr(>F)
## (Intercept) 44608 1 303.2284 < 2.2e-16 ***
## age 1303 2 4.4287 0.0165575 *
## product 2018 1 13.7202 0.0005001 ***
## age:product 1384 2 4.7049 0.0130735 *
## Residuals 7944 54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
music <- read.csv("data/music.csv")
music$subject <- as.factor(music$subject)
Anova(lm(typing_efficiency ~ type_music + subject, data=music), type=3)
## Anova Table (Type III tests)
##
## Response: typing_efficiency
## Sum Sq Df F value Pr(>F)
## (Intercept) 9557.3 1 4041.0201 < 2.2e-16 ***
## type_music 31.0 2 6.5436 0.0119776 *
## subject 149.3 6 10.5235 0.0003462 ***
## Residuals 28.4 12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lights <- read.csv("data/lights.csv")
lights$intersection <- as.factor(lights$intersection)
lights$period <- as.factor(lights$period)
Anova(lm(minutes ~ signaling + intersection + period, data=lights), type=3)
## Anova Table (Type III tests)
##
## Response: minutes
## Sum Sq Df F value Pr(>F)
## (Intercept) 13372.6 1 2289.3339 4.545e-15 ***
## signaling 76.3 4 3.2648 0.04976 *
## intersection 18.2 4 0.7801 0.55927
## period 1091.7 4 46.7222 3.202e-07 ***
## Residuals 70.1 12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Bonus tukey
fit <- aov(minutes ~ signaling + intersection + period, data=lights)
tukes <- TukeyC(fit, "signaling")
tukes
## Results
## Means G1 G2
## 'B' 24.88 a
## 'E' 24.14 a b
## 'A' 24.12 a b
## 'D' 22.50 a b
## 'C' 20.00 b
##
## Sig.level
## 0.05
##
## Diff_Prob
## 'B' 'E' 'A' 'D' 'C'
## 'B' 0.000 0.740 0.760 2.380 4.88
## 'E' 0.987 0.000 0.020 1.640 4.14
## 'A' 0.986 1.000 0.000 1.620 4.12
## 'D' 0.549 0.817 0.823 0.000 2.50
## 'C' 0.050 0.111 0.113 0.504 0.00
##
## MSD
## 'B' 'E' 'A' 'D' 'C'
## 'B' 0.000 4.872 4.872 4.872 4.872
## 'E' 4.872 0.000 4.872 4.872 4.872
## 'A' 4.872 4.872 0.000 4.872 4.872
## 'D' 4.872 4.872 4.872 0.000 4.872
## 'C' 4.872 4.872 4.872 4.872 0.000
plot(tukes)
oysters <- read.csv("data/oysters.csv")
oysters$treatment <- as.factor(oysters$treatment)
oysters$replication <- as.factor(oysters$replication)
#ANCOVA
Anova(lm(final_weight ~ initial_weight + treatment, data=oysters), type=3)
## Anova Table (Type III tests)
##
## Response: final_weight
## Sum Sq Df F value Pr(>F)
## (Intercept) 1.718 1 5.6959 0.0316708 *
## initial_weight 156.040 1 517.3840 1.867e-12 ***
## treatment 12.089 4 10.0212 0.0004819 ***
## Residuals 4.222 14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Test for heteroskedasticity (interaction term)
Anova(lm(final_weight ~ initial_weight * treatment, data=oysters), type=3)
## Anova Table (Type III tests)
##
## Response: final_weight
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.064 1 0.2275 0.6436
## initial_weight 68.529 1 241.8091 2.472e-08 ***
## treatment 1.696 4 1.4963 0.2752
## initial_weight:treatment 1.388 4 1.2247 0.3602
## Residuals 2.834 10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
See Note 16 for all the testing
pesticide <- read.csv("data/pesticide.csv")
pesticide$location <- as.factor(pesticide$location)
pesticide$chemical <- as.factor(pesticide$chemical)
pest_fit <- lm(number_killed ~ location * chemical, data=pesticide)
Anova(pest_fit, type=3)
## Anova Table (Type III tests)
##
## Response: number_killed
## Sum Sq Df F value Pr(>F)
## (Intercept) 1686.10 1 4869.6094 < 2.2e-16 ***
## location 3.81 4 2.7520 0.056700 .
## chemical 180.13 3 173.4130 1.759e-14 ***
## location:chemical 16.16 12 3.8889 0.003652 **
## Residuals 6.92 20
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# When you have random effect, divide the individual effects by interaction term.
# Chemical is random and interacts with Location, so calculating the F-stat for both involves dividing by mean square of the interaction, not the error.
# F-value for Location = ms_location/ms_location:chemical
(3.81/4)/(16.16/12)
## [1] 0.707302
pf((3.81/4)/(16.16/12), 4, 12, lower.tail=F)
## [1] 0.6022125
# F-value for Chemical = ms_chemical/ms_location:chemical
(180.13/3)/(16.16/12)
## [1] 44.58663
pf((180.13/3)/(16.16/12), 3, 12, lower.tail=F)
## [1] 8.802727e-07
Varcomp, see Note 16 for reference.
library(VCA)
## Warning: package 'VCA' was built under R version 3.5.3
#Look at VC column in fit$aov.tab
fit <- remlMM(number_killed~(location)*chemical, pesticide, cov=T)
print(fit)
##
##
## REML-Estimation of Mixed Model:
## -------------------------------
##
## [Fixed Effects]
##
## int chemical1 chemical2 chemical3 chemical4
## 5.71 2.85 -2.34 2.62 0.00
##
##
## [Variance Components]
##
## Name DF VC %Total SD CV[%]
## 1 total 24.594337 0.797191 100 0.892856 13.752109
## 2 location 0 0 2.1e-05 0.00041 0.006316
## 3 location:chemical 5.989053 0.450943 56.566506 0.671523 10.343059
## 4 error 20 0.346248 43.433473 0.588428 9.063201
## Var(VC)
## 1 0.05168
## 2 0.016228
## 3 0.067907
## 4 0.011989
##
## Mean: 6.4925 (N = 40)
##
## Experimental Design: balanced | Method: REML
drug <- read.csv("data/drug.csv")
drug$site <- as.factor(drug$site)
drug$batch <- as.factor(drug$batch)
drug$tablet <- as.factor(drug$tablet)
# Specifying nested term: site/batch == batch(site) in SAS
drug_fit <- Anova(lm(y ~ site + site/batch, data=drug), type=3)
drug_fit
## Anova Table (Type III tests)
##
## Response: y
## Sum Sq Df F value Pr(>F)
## (Intercept) 763.06 1 63105.9683 < 2.2e-16 ***
## site 0.02 1 1.5096 0.2311153
## site:batch 0.45 4 9.3869 0.0001028 ***
## Residuals 0.29 24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Batch is a random effect
# F-stat for site = MS_site / MS_site:batch
# Anova() rounds
0.02/(0.45/4)
## [1] 0.1777778
# But if you grab from fit object, you can get exact answers
x <- (drug_fit$`Sum Sq`[2]/drug_fit$Df[2])/(drug_fit$`Sum Sq`[3]/drug_fit$Df[3])
x
## [1] 0.1608176
pf(x, 1, 4, lower.tail=F)
## [1] 0.7089034
Week 8 lab, #2
engi <- read.csv("data/engi.csv")
engi$device <- as.factor(engi$device)
engi$rep <- as.factor(engi$rep)
engi$facility <- as.factor(engi$facility)
engi$tester <- as.factor(engi$tester)
# Tester is nested within facility
engi_fit <- Anova(lm(y ~ facility + device + facility:device + facility/tester, data=engi), type=3)
engi_fit
## Anova Table (Type III tests)
##
## Response: y
## Sum Sq Df F value Pr(>F)
## (Intercept) 269.327 1 5191.8434 < 2.2e-16 ***
## facility 0.935 1 18.0281 0.0001462 ***
## device 0.009 2 0.0843 0.9193022
## facility:device 0.225 2 2.1727 0.1285749
## facility:tester 1.106 6 3.5542 0.0072376 **
## Residuals 1.867 36
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Finding F-stat and p-value for Facility, which has random effect Tester nested within it.
# Approx
(0.935)/(1.106/6)
## [1] 5.072333
pf((0.935)/(1.106/6),1,6,lower.tail=F)
## [1] 0.06524507
# Exact
x <- engi_fit$`Sum Sq`[2]/(engi_fit$`Sum Sq`[5]/engi_fit$Df[5])
x
## [1] 5.072316
pf(x, 1, 6, lower.tail=F)
## [1] 0.0652454
bp <- read.csv("data/bp.csv")
bp$sequence<- as.factor(bp$sequence)
bp$patient<- as.factor(bp$patient)
bp$period<- as.factor(bp$period)
# Since interaction is insignificant, rerun model without it.
bp_fit <- Anova(lm(y ~ sequence + treatment + period + sequence/patient, data=bp), type=3)
bp_fit
## Anova Table (Type III tests)
##
## Response: y
## Sum Sq Df F value Pr(>F)
## (Intercept) 224.500 1 1655.4711 < 2.2e-16 ***
## sequence 0.234 2 0.8624 0.4373
## treatment 9.517 2 35.0901 2.879e-07 ***
## period 0.017 2 0.0635 0.9387
## sequence:patient 0.669 9 0.5483 0.8221
## Residuals 2.712 20
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Patient is nested within Sequence, and thus is considered a random effect.
# F-stat for Sequence = MS_seq / MS_seq:pat
(bp_fit$`Sum Sq`[2]/bp_fit$Df[2])/(bp_fit$`Sum Sq`[5]/bp_fit$Df[5])
## [1] 1.572852
pf((bp_fit$`Sum Sq`[2]/bp_fit$Df[2])/(bp_fit$`Sum Sq`[5]/bp_fit$Df[5]), 2,9,lower.tail = F)
## [1] 0.259531
Reference for changing options: https://www.r-bloggers.com/ensuring-r-generates-the-same-anova-f-values-as-spss/)
#Use gsub for messier strings. Here it's unnecessary.
#golf <- read.table(text=gsub("(?<=[a-z])\\s+", " ", golf_text, perl=TRUE), header=FALSE, col.names = c("gender", "score"))
#Calculation of V:
diff <- mining$after - mining$before
diff <- diff[diff!=0] #delete all differences = 0, as per Signed Rank procedure
diff.rank <- rank(abs(diff))
diff.rank.sign <- diff.rank * sign(diff[])
ranks.pos <- sum(diff.rank.sign[diff.rank.sign > 0])
ranks.neg <- -sum(diff.rank.sign[diff.rank.sign < 0])
Referenced used for ANOVA: https://www.r-bloggers.com/anova-%E2%80%93-type-iiiiii-ss-explained/
#SAS calculates Leven using Squared deviations. R and it's packages seem to do Absolute instead.
#The following replicates what SAS does.
#https://stackoverflow.com/questions/22396543/levenes-test-using-the-squared-residuals
oil.lm <- lm(visc ~ type, data = oil)
anova(lm(residuals(oil.lm)^2 ~ oil$type))
#HOV Levene using absolute deviations
leveneTest(visc ~ type, data=oil)
Using ezANOVA package
library(ez)
#ezANOVA requires an ID column, wid. This is so that it can handle repeated measures.
oil$id <- as.factor(c(1:length(oil$type)))
av <- ezANOVA(data=oil, dv=visc, wid=id ,between=type, detail=T, type=3, white.adjust=T)
print(av)
ANT example
data(ANT)
b_anova_full <- ezANOVA(data=ANT,
dv=rt,
wid=subnum,
between=group,
within_full=.(cue, flank),
type=3,
detailed=T,
return_aov=T)
print(b_anova_full)
Reference, see DIY Contrasts: https://rstudio-pubs-static.s3.amazonaws.com/65059_586f394d8eb84f84b1baaf56ffb6b47f.html
#Base R uses TukeyHSD, which yields less elegant output.
tukes <- TukeyHSD(fit, "method")
tukes
plot(tukes)
Using ezANOVA package
soil$id <- as.factor(c(1:length(soil$diameter))) #ezANOVA requires an id column
soil_anova <- ezANOVA(data=soil, dv=diameter, wid=id, between=.(calcium, ph), type=3, detailed=T, return_aov=T)
resids <- resid(soil_anova$aov) #bc we used return_aov=T, we can call the base aov() and output the residuals
shapiro.test(resids)
layout(mat = matrix(c(1,2), 2, 1), height = c(3,1))
par(mar=c(0, 3, 2, 1))
hist(resids, xlim=c(-1,1), col="pink", main="Distribution of Residuals")
boxplot(resids, horizontal=T, outline=T, frame=F, col="green1", width=10, ylim=c(-1,1), axes=F)
qqnorm(resids)
qqline(resids)
reference: https://mcfromnz.wordpress.com/2011/03/02/anova-type-iiiiii-ss-explained/ Varcomp reference: https://stats.stackexchange.com/questions/161225/estimates-of-the-variance-of-the-variance-component-of-a-mixed-effects-model Testing on paint
paint <- read.csv("data/CH17/ex17-1.txt")
colnames(paint) <- c("batch", "percentage")
Testing on pesticide
#Both of these attempts have chemical F = 48.11, but answer should be 173.41 if divided by ms_error, or 44.49 if divided by ms_interaction
#attempt 1 with interaction, wrong
library(afex)
mixed(number_killed ~ chemical + (1|location) + (1|location:chemical), data=pesticide)
#with no interaction, CORRECT, but that means interaction is wrong...
model2 <- lmer_alt(number_killed ~ chemical + (1|location), data=pesticide)
anova(model2)
#attempt 2 with interaction, wrong
model3 <- lmer_alt(number_killed ~ chemical + (1|location) + (1|chemical:location), data=pesticide)
summary(model3)
Working solution (but answer is type 2)
#Make F table manually
fit <- lm(number_killed ~ location * chemical, data=pesticide)
results <- Anova(fit) #this is the anova table. The following replaces F tests with MS_main/MS_interaction
Df <- results$Df
SumSq <- results$"Sum Sq"
MeanSq <- SumSq/results$Df
Fvalue <- results$"F value"
Pvalue <- results$"Pr(>F)"
Error.Term <- MeanSq[3]
df.error <- Df[3]
Fvalue[1] <- MeanSq[1]/Error.Term
Pvalue[1] <- 1 - pf(Fvalue[1], Df[1], df.error)
Fvalue[2] <- MeanSq[2]/Error.Term
Pvalue[2] <- 1 - pf(Fvalue[2], Df[2], df.error)
Ftable <- cbind(Df, SumSq, MeanSq, Fvalue, Pvalue)
rownames(Ftable) <- c("Locations", "Chemicals", "Locations:Chemicals", "Residuals")
print(Ftable)
Misc things
library(lmerTest)
coef(fit)
#other attempts
model <- lmerTest::lmer(number_killed ~ chemical + (1|location) + (1|location:chemical), data=pesticide)
rand(model)
linearHypothesis(model, "chemical=0") #doesnt work
For carryover, unused and unfinished
carryover <- trt_str
carryover$period <- as.factor(as.numeric(trt_str$period) + 1)
Regarding multicolinearity: https://stats.stackexchange.com/questions/112442/what-are-aliased-coefficients