library(car) #leveneTest
## Loading required package: carData
#loading RatWeightGain dataset
ratdata <- read.csv('RatWeightGain.csv')
#Calculating the mean and standard deviation of weight gain for each source and for each Source
ratsourcem <- aggregate(WEIGHTGAIN~SOURCE, data=ratdata, FUN=function(x) c(mean=mean(x)))
ratsourcesd <- aggregate(WEIGHTGAIN~SOURCE, data=ratdata, FUN=function(x) c(sd=sd(x)))
ratsourcem
## SOURCE WEIGHTGAIN
## 1 Beef 89.6
## 2 Cereal 84.9
ratsourcesd
## SOURCE WEIGHTGAIN
## 1 Beef 17.71232
## 2 Cereal 14.99438
#Calculating the mean and standard deviation of weight gain for each Type
rattypem <- aggregate(WEIGHTGAIN~TYPE, data=ratdata, FUN=function(x) c(mean=mean(x)))
rattypesd <- aggregate(WEIGHTGAIN~TYPE, data=ratdata, FUN=function(x) c(sd=sd(x)))
rattypem
## TYPE WEIGHTGAIN
## 1 High 92.95
## 2 Low 81.55
rattypesd
## TYPE WEIGHTGAIN
## 1 High 16.36259
## 2 Low 14.63045
#Boxplot showing the mean of beef is 90 where as Cereal is less than 90 in weightgain
boxplot(WEIGHTGAIN~SOURCE, vertical=T,pch=19, data = ratdata, main='WEIGHTGAIN Vs SOURCE', xlab='SOURCE',
ylab='WAIGHTGAIN')

#Boxplot showing the mean of high diet is 95 where as low diet is less than 85 in weightgain
boxplot(WEIGHTGAIN~TYPE, vertical=T,pch=19, data = ratdata, main='WEIGHTGAIN Vs TYPE', xlab='TYPE',
ylab='WAIGHTGAIN')

#H0 = all variance are equal
#H1 = all variance are not equal
#p-value > significance level, The test reveals a p-value greater than 0.05,
#indicating that there is no significant difference between the group variances in Source.
leveneTest(WEIGHTGAIN ~ SOURCE, data = ratdata)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.3062 0.5833
## 38
#H0 = All means are equal
#H1 = All means are not equal
#we will run linear model for analysis of variance
analysis <- lm(WEIGHTGAIN~SOURCE, data = ratdata)
#now look into the structure of the fitted model, we can see degree of freedom,sum squares
#mean square = varibility between sources and varibility within the source, here we can see thats
#F value < 1 and P value > 0.05 thus we accept null hypothesis, which tells all means are equal
anova(analysis)
## Analysis of Variance Table
##
## Response: WEIGHTGAIN
## Df Sum Sq Mean Sq F value Pr(>F)
## SOURCE 1 220.9 220.90 0.8203 0.3708
## Residuals 38 10232.6 269.28
#ploting the residuals
plot(analysis, which = 1)

plot(analysis, which = 2)

#positive equally sqweness data
resids <- rstandard(analysis)
hist(resids)

##we have accept the null hypothesis thus tukey test is not required to look into the difference in means.
#H0 = all variance are equal
#H1 = all variance are not equal
#p-value > significance level, The test reveals a p-value greater than 0.05,
#indicating that there is no significant difference between the group variances in Type.
leveneTest(WEIGHTGAIN ~ TYPE, data = ratdata)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.2038 0.6542
## 38
#H0 = All means are equal
#H1 = All means are not equal
#we will run linear model for analysis of variance
analysis2 <- lm(WEIGHTGAIN~TYPE, data = ratdata)
#now look into the structure of the fitted model, we can see degree of freedom,sum squares
#mean square = varibility between sources and varibility within the source, here we can see thats
#F value > 1 and P value < 0.05 thus we reject null hypothesis, which tells all means are not equal
anova(analysis2)
## Analysis of Variance Table
##
## Response: WEIGHTGAIN
## Df Sum Sq Mean Sq F value Pr(>F)
## TYPE 1 1299.6 1299.60 5.3949 0.02565 *
## Residuals 38 9153.9 240.89
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ploting the residuals
plot(analysis2, which = 1)

plot(analysis2, which = 2)

#left hand sqweed data
resids <- rstandard(analysis2)
hist(resids)

#we have reject the null hypothesis thus accepting that there is a difference in effectiveness in Type,
#but which are the effective type, diffence in means can be calculated by tukey test and aov for analysis
#of variance
#we can see that low - high are having 0.02 are differ from one another
TukeyHSD(aov(analysis2))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = analysis2)
##
## $TYPE
## diff lwr upr p adj
## Low-High -11.4 -21.33588 -1.464119 0.0256485
#__________________________________________________________________________________________________________
# Research Question: Is there any relationship between the weightgain
#with source and type of diet made?
#H0 = There is no interaction between source and type of diet
#H1 = There is an interaction between source and type of diet
# Generate frequency table. If values in all cells are same, then have a balanced design.
table(ratdata$SOURCE, ratdata$TYPE)
##
## High Low
## Beef 10 10
## Cereal 10 10
# Visualize the data, showing beef high diet has 104 median where as beef low diet has 84 median
#there must be some intersacting point but not showing
boxplot(WEIGHTGAIN ~ SOURCE * TYPE, ratdata)

interaction.plot(ratdata$SOURCE, ratdata$TYPE, ratdata$WEIGHTGAIN)

# Compute two-way ANOVA test. We begin by using a model with interaction. If
# interaction is not significant, then use additive model.
aovres3 <- aov(WEIGHTGAIN ~ SOURCE * TYPE, ratdata)
summary(aovres3)
## Df Sum Sq Mean Sq F value Pr(>F)
## SOURCE 1 221 220.9 0.988 0.3269
## TYPE 1 1300 1299.6 5.812 0.0211 *
## SOURCE:TYPE 1 884 883.6 3.952 0.0545 .
## Residuals 36 8049 223.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interpret results:
# From ANOVA results, we see that based on p-values and significance level of 0.05:
# 1. The p-value of SOURCE > 0.05, which indicates
# that the sources are not associated with significant different weightgain.
# 2. The p-value of TYPE < 0.05,which indicates that the type of diet
# are associated with significant different weightgain.
# 3. The p-value for the interaction between SOURCE:TYPE = 0.05, which
# indicates that the relationships between source of diet and weightgain are
#equally depends on the type of diet. That means we accept H1 i.e.,
#There is an interaction between source and type of diet
# ANOVA test is not significant for Source of diet but for Type of diet
#significant difference exists so we need to perform Tukeytest for Type of diet.
# low-high diet < 0.05 significant difference found in dataset
TukeyHSD(aovres3, "TYPE", conf.level = 0.95)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = WEIGHTGAIN ~ SOURCE * TYPE, data = ratdata)
##
## $TYPE
## diff lwr upr p adj
## Low-High -11.4 -20.99 -1.81 0.0211449
# 1. Check homogeneity of variance assumption
# 1.1 Residuals vs. fit plot
# This suggests that the assumption that the relationship is linear is reasonable.
# There are outliers 6 at point -30 residual, 33 at point -29 residual and 11 at the point -31 residual
#outliers exist, it can be useful to remove outliers to meet test assumptions.
plot(aovres3, 1)

#Levene's test
#p-value > significance level, we can assume homogeneity of variances in the different
#treatment groups.
library(car)
leveneTest(WEIGHTGAIN ~ SOURCE * TYPE, ratdata)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.1635 0.9202
## 36
#Checking for normality assumptions
# Normality plot of the residuals
# Quantiles of residuals are plotted against quantiles of normal distribution
# along with a 45-degree reference line.
# Verify assumption that residuals are normally distributed.
# Normal probability plot of residuals should approximately follow a straight line. but there is
# is outlier 6, 33, 11 below the straight line
plot(aovres3, 2)

# histogram of the residuals and seems like it is equally skewed
aov_residuals <- residuals(aovres3)
hist(aov_residuals)
