Section 1: Importation and Descriptive Analysis

diet = read.csv("/Users/rehan/Downloads/diet.csv",row.names=1)
diet$weight.loss = diet$initial.weight - diet$final.weight 
diet$diet.type   = factor(diet$diet.type,levels=c("A","B","C"))
diet$gender      = factor(diet$gender,levels=c("Female","Male"))
boxplot(weight.loss~diet.type,data=diet,col="light gray",
        ylab = "Weight loss (kg)", xlab = "Diet type")
abline(h=0,col="blue")

Section 2: ANOVA

diet.fisher  = aov(weight.loss~diet.type,data=diet)
diet.welch   = oneway.test(weight.loss~diet.type,data=diet)
diet.kruskal = kruskal.test(weight.loss~diet.type,data=diet)

summary(diet.fisher)
##             Df Sum Sq Mean Sq F value Pr(>F)   
## diet.type    2   60.5  30.264   5.383 0.0066 **
## Residuals   73  410.4   5.622                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(diet.welch)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  weight.loss and diet.type
## F = 5.2693, num df = 2.00, denom df = 48.48, p-value = 0.008497
print(diet.kruskal)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  weight.loss by diet.type
## Kruskal-Wallis chi-squared = 9.4159, df = 2, p-value = 0.009023
summary(aov(weight.loss~diet.type,data=diet[diet$diet.type!="B",]))
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## diet.type    1   43.4    43.4   8.036 0.00664 **
## Residuals   49  264.6     5.4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(weight.loss~diet.type,data=diet[diet$diet.type!="B",],var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  weight.loss by diet.type
## t = -2.8348, df = 49, p-value = 0.006644
## alternative hypothesis: true difference in means between group A and group C is not equal to 0
## 95 percent confidence interval:
##  -3.1582988 -0.5379975
## sample estimates:
## mean in group A mean in group C 
##        3.300000        5.148148

Section 3: Model Check

# mean and median weight loss per group:
mean_group   = tapply(diet$weight.loss,diet$diet.type,mean)
median_group = tapply(diet$weight.loss,diet$diet.type,median)
mean_group
##        A        B        C 
## 3.300000 3.268000 5.148148
# residuals
diet$resid.mean   = (diet$weight.loss - mean_group[as.numeric(diet$diet.type)])
diet$resid.median = (diet$weight.loss - median_group[as.numeric(diet$diet.type)])
diet[1:10,]
##    gender age height diet.type initial.weight final.weight weight.loss
## 1  Female  22    159         A             58         54.2         3.8
## 2  Female  46    192         A             60         54.0         6.0
## 3  Female  55    170         A             64         63.3         0.7
## 4  Female  33    171         A             64         61.1         2.9
## 5  Female  50    170         A             65         62.2         2.8
## 6  Female  50    201         A             66         64.0         2.0
## 7  Female  37    174         A             67         65.0         2.0
## 8  Female  28    176         A             69         60.5         8.5
## 9  Female  28    165         A             70         68.1         1.9
## 10 Female  45    165         A             70         66.9         3.1
##    resid.mean resid.median
## 1         0.5         0.75
## 2         2.7         2.95
## 3        -2.6        -2.35
## 4        -0.4        -0.15
## 5        -0.5        -0.25
## 6        -1.3        -1.05
## 7        -1.3        -1.05
## 8         5.2         5.45
## 9        -1.4        -1.15
## 10       -0.2         0.05
par(mfrow=c(1,2),mar=c(4.5,4.5,2,0)) 
#
boxplot(resid.mean~diet.type,data=diet,main="Residual boxplot per group",col="light gray",xlab="Diet type",ylab="Residuals")
abline(h=0,col="blue")
#
col_group = rainbow(nlevels(diet$diet.type))
qqnorm(diet$resid.mean,col=col_group[as.numeric(diet$diet.type)])
qqline(diet$resid.mean)
legend("top",legend=levels(diet$diet.type),col=col_group,pch=21,ncol=3,box.lwd=NA)

shapiro.test(diet$resid.mean)
## 
##  Shapiro-Wilk normality test
## 
## data:  diet$resid.mean
## W = 0.99175, p-value = 0.9088
bartlett.test(diet$resid.mean~as.numeric(diet$diet.type))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  diet$resid.mean by as.numeric(diet$diet.type)
## Bartlett's K-squared = 0.21811, df = 2, p-value = 0.8967

Section 4: Mutiple Comparisons

plot(TukeyHSD(diet.fisher))

t.test(weight.loss~diet.type,data=diet[diet$diet.type!="C",],var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  weight.loss by diet.type
## t = 0.0475, df = 47, p-value = 0.9623
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
##  -1.323275  1.387275
## sample estimates:
## mean in group A mean in group B 
##           3.300           3.268

Section 5: Two-way ANOVA

diet.fisher = aov(weight.loss~diet.type*gender,data=diet)
summary(diet.fisher)
##                  Df Sum Sq Mean Sq F value  Pr(>F)   
## diet.type         2   60.5  30.264   5.629 0.00541 **
## gender            1    0.2   0.169   0.031 0.85991   
## diet.type:gender  2   33.9  16.952   3.153 0.04884 * 
## Residuals        70  376.3   5.376                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(weight.loss~diet.type*gender,data=diet))
## Analysis of Variance Table
## 
## Response: weight.loss
##                  Df Sum Sq Mean Sq F value   Pr(>F)   
## diet.type         2  60.53 30.2635  5.6292 0.005408 **
## gender            1   0.17  0.1687  0.0314 0.859910   
## diet.type:gender  2  33.90 16.9520  3.1532 0.048842 * 
## Residuals        70 376.33  5.3761                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1