Section 1: Importation and Descriptive Analysis
diet = read.csv("/Users/rehan/Downloads/diet.csv",row.names=1)
diet$weight.loss = diet$initial.weight - diet$final.weight
diet$diet.type = factor(diet$diet.type,levels=c("A","B","C"))
diet$gender = factor(diet$gender,levels=c("Female","Male"))
boxplot(weight.loss~diet.type,data=diet,col="light gray",
ylab = "Weight loss (kg)", xlab = "Diet type")
abline(h=0,col="blue")

Section 2: ANOVA
diet.fisher = aov(weight.loss~diet.type,data=diet)
diet.welch = oneway.test(weight.loss~diet.type,data=diet)
diet.kruskal = kruskal.test(weight.loss~diet.type,data=diet)
summary(diet.fisher)
## Df Sum Sq Mean Sq F value Pr(>F)
## diet.type 2 60.5 30.264 5.383 0.0066 **
## Residuals 73 410.4 5.622
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(diet.welch)
##
## One-way analysis of means (not assuming equal variances)
##
## data: weight.loss and diet.type
## F = 5.2693, num df = 2.00, denom df = 48.48, p-value = 0.008497
print(diet.kruskal)
##
## Kruskal-Wallis rank sum test
##
## data: weight.loss by diet.type
## Kruskal-Wallis chi-squared = 9.4159, df = 2, p-value = 0.009023
summary(aov(weight.loss~diet.type,data=diet[diet$diet.type!="B",]))
## Df Sum Sq Mean Sq F value Pr(>F)
## diet.type 1 43.4 43.4 8.036 0.00664 **
## Residuals 49 264.6 5.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(weight.loss~diet.type,data=diet[diet$diet.type!="B",],var.equal = TRUE)
##
## Two Sample t-test
##
## data: weight.loss by diet.type
## t = -2.8348, df = 49, p-value = 0.006644
## alternative hypothesis: true difference in means between group A and group C is not equal to 0
## 95 percent confidence interval:
## -3.1582988 -0.5379975
## sample estimates:
## mean in group A mean in group C
## 3.300000 5.148148
Section 3: Model Check
# mean and median weight loss per group:
mean_group = tapply(diet$weight.loss,diet$diet.type,mean)
median_group = tapply(diet$weight.loss,diet$diet.type,median)
mean_group
## A B C
## 3.300000 3.268000 5.148148
# residuals
diet$resid.mean = (diet$weight.loss - mean_group[as.numeric(diet$diet.type)])
diet$resid.median = (diet$weight.loss - median_group[as.numeric(diet$diet.type)])
diet[1:10,]
## gender age height diet.type initial.weight final.weight weight.loss
## 1 Female 22 159 A 58 54.2 3.8
## 2 Female 46 192 A 60 54.0 6.0
## 3 Female 55 170 A 64 63.3 0.7
## 4 Female 33 171 A 64 61.1 2.9
## 5 Female 50 170 A 65 62.2 2.8
## 6 Female 50 201 A 66 64.0 2.0
## 7 Female 37 174 A 67 65.0 2.0
## 8 Female 28 176 A 69 60.5 8.5
## 9 Female 28 165 A 70 68.1 1.9
## 10 Female 45 165 A 70 66.9 3.1
## resid.mean resid.median
## 1 0.5 0.75
## 2 2.7 2.95
## 3 -2.6 -2.35
## 4 -0.4 -0.15
## 5 -0.5 -0.25
## 6 -1.3 -1.05
## 7 -1.3 -1.05
## 8 5.2 5.45
## 9 -1.4 -1.15
## 10 -0.2 0.05
par(mfrow=c(1,2),mar=c(4.5,4.5,2,0))
#
boxplot(resid.mean~diet.type,data=diet,main="Residual boxplot per group",col="light gray",xlab="Diet type",ylab="Residuals")
abline(h=0,col="blue")
#
col_group = rainbow(nlevels(diet$diet.type))
qqnorm(diet$resid.mean,col=col_group[as.numeric(diet$diet.type)])
qqline(diet$resid.mean)
legend("top",legend=levels(diet$diet.type),col=col_group,pch=21,ncol=3,box.lwd=NA)

shapiro.test(diet$resid.mean)
##
## Shapiro-Wilk normality test
##
## data: diet$resid.mean
## W = 0.99175, p-value = 0.9088
bartlett.test(diet$resid.mean~as.numeric(diet$diet.type))
##
## Bartlett test of homogeneity of variances
##
## data: diet$resid.mean by as.numeric(diet$diet.type)
## Bartlett's K-squared = 0.21811, df = 2, p-value = 0.8967
Section 4: Mutiple Comparisons
plot(TukeyHSD(diet.fisher))

t.test(weight.loss~diet.type,data=diet[diet$diet.type!="C",],var.equal = TRUE)
##
## Two Sample t-test
##
## data: weight.loss by diet.type
## t = 0.0475, df = 47, p-value = 0.9623
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
## -1.323275 1.387275
## sample estimates:
## mean in group A mean in group B
## 3.300 3.268
Section 5: Two-way ANOVA
diet.fisher = aov(weight.loss~diet.type*gender,data=diet)
summary(diet.fisher)
## Df Sum Sq Mean Sq F value Pr(>F)
## diet.type 2 60.5 30.264 5.629 0.00541 **
## gender 1 0.2 0.169 0.031 0.85991
## diet.type:gender 2 33.9 16.952 3.153 0.04884 *
## Residuals 70 376.3 5.376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(weight.loss~diet.type*gender,data=diet))
## Analysis of Variance Table
##
## Response: weight.loss
## Df Sum Sq Mean Sq F value Pr(>F)
## diet.type 2 60.53 30.2635 5.6292 0.005408 **
## gender 1 0.17 0.1687 0.0314 0.859910
## diet.type:gender 2 33.90 16.9520 3.1532 0.048842 *
## Residuals 70 376.33 5.3761
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1