kidscalories <- read.csv("~/Downloads/kidscalories.csv")
summary(kidscalories)
## helpedinprep calorieintake
## Min. :1.000 Min. :139.7
## 1st Qu.:1.000 1st Qu.:300.7
## Median :1.000 Median :404.0
## Mean :1.468 Mean :391.8
## 3rd Qu.:2.000 3rd Qu.:447.5
## Max. :2.000 Max. :635.2
var(kidscalories$calorieintake)
## [1] 12169.81
sd(kidscalories$calorieintake)
## [1] 110.3168
plot(density(kidscalories$calorieintake))
library(moments)
agostino.test(kidscalories$calorieintake)
##
## D'Agostino skewness test
##
## data: kidscalories$calorieintake
## skew = -0.011821, z = -0.037082, p-value = 0.9704
## alternative hypothesis: data have a skewness
anscombe.test(kidscalories$calorieintake)
##
## Anscombe-Glynn kurtosis test
##
## data: kidscalories$calorieintake
## kurt = 2.89410, z = 0.25439, p-value = 0.7992
## alternative hypothesis: kurtosis is not equal to 3
shapiro.test(kidscalories$calorieintake)
##
## Shapiro-Wilk normality test
##
## data: kidscalories$calorieintake
## W = 0.97936, p-value = 0.5663
helpedinprep <- factor(kidscalories$helpedinprep)
bartlett.test(kidscalories$calorieintake,helpedinprep)
##
## Bartlett test of homogeneity of variances
##
## data: kidscalories$calorieintake and helpedinprep
## Bartlett's K-squared = 0.079795, df = 1, p-value = 0.7776
summary(aov(kidscalories$calorieintake ~ helpedinprep, data = kidscalories))
## Df Sum Sq Mean Sq F value Pr(>F)
## helpedinprep 1 83755 83755 7.917 0.00724 **
## Residuals 45 476056 10579
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model1<- aov(calorieintake ~ helpedinprep, data = kidscalories)
qqnorm(model1$residuals)
TukeyHSD(aov(kidscalories$calorieintake~factor(kidscalories$helpedinprep)))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = kidscalories$calorieintake ~ factor(kidscalories$helpedinprep))
##
## $`factor(kidscalories$helpedinprep)`
## diff lwr upr p adj
## 2-1 -84.60051 -145.1586 -24.04243 0.0072362
tapply(kidscalories$calorieintake, kidscalories$helpedinprep, mean)
## 1 2
## 431.3996 346.7991
tapply(kidscalories$calorieintake, kidscalories$helpedinprep, sd)
## 1 2
## 105.70124 99.50114
This dataset is normally distributed. The mean and the standard diviation shows that group 1 has higher caolories than group 2.
library(readxl)
library(readr)
library(readxl)
CholestoralData <- read_excel("~/Downloads/CholestoralData.xlsx")
summary(CholestoralData)
## ID Before After Margarine
## Min. : 1.00 Min. : 3.910 Min. : 3.660 Length:40
## 1st Qu.:10.75 1st Qu.: 6.530 1st Qu.: 5.290 Class :character
## Median :20.50 Median : 7.860 Median : 6.415 Mode :character
## Mean :20.50 Mean : 8.932 Mean : 6.886
## 3rd Qu.:30.25 3rd Qu.:10.380 3rd Qu.: 7.690
## Max. :40.00 Max. :17.730 Max. :12.100
plot(density(CholestoralData$Before))
plot(density(CholestoralData$After))
library(moments)
agostino.test(CholestoralData$Before)
##
## D'Agostino skewness test
##
## data: CholestoralData$Before
## skew = 0.89653, z = 2.38360, p-value = 0.01714
## alternative hypothesis: data have a skewness
agostino.test(CholestoralData$After)
##
## D'Agostino skewness test
##
## data: CholestoralData$After
## skew = 0.73814, z = 2.01770, p-value = 0.04362
## alternative hypothesis: data have a skewness
anscombe.test(CholestoralData$Before)
##
## Anscombe-Glynn kurtosis test
##
## data: CholestoralData$Before
## kurt = 3.05270, z = 0.53771, p-value = 0.5908
## alternative hypothesis: kurtosis is not equal to 3
anscombe.test(CholestoralData$After)
##
## Anscombe-Glynn kurtosis test
##
## data: CholestoralData$After
## kurt = 2.58600, z = -0.27481, p-value = 0.7835
## alternative hypothesis: kurtosis is not equal to 3
shapiro.test(CholestoralData$Before)
##
## Shapiro-Wilk normality test
##
## data: CholestoralData$Before
## W = 0.91834, p-value = 0.00683
shapiro.test(CholestoralData$After)
##
## Shapiro-Wilk normality test
##
## data: CholestoralData$After
## W = 0.91706, p-value = 0.006231
Before <- log(CholestoralData$Before)
plot(density(Before))
After<- log(CholestoralData$After)
plot(density(After))
t.test(Before, After, paired=TRUE, alternative='two.sided')
##
## Paired t-test
##
## data: Before and After
## t = 4.7191, df = 39, p-value = 3.026e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1379106 0.3448133
## sample estimates:
## mean of the differences
## 0.241362
margarine <- factor(CholestoralData$Margarine)
bartlett.test(After,margarine)
##
## Bartlett test of homogeneity of variances
##
## data: After and margarine
## Bartlett's K-squared = 0.93119, df = 1, p-value = 0.3346
difference = Before-After
summary(aov(difference ~ margarine, data = CholestoralData))
## Df Sum Sq Mean Sq F value Pr(>F)
## margarine 1 1.506 1.5059 22.22 3.23e-05 ***
## Residuals 38 2.575 0.0678
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model3<- aov(difference ~ margarine, data = CholestoralData)
qqnorm(model3$residuals)
Based on result above, group A has higher cholestrol level than group B.
espressodata <- read.csv("~/Downloads/EspressoData.csv")
plot(density(espressodata$cereme))
library(moments)
agostino.test(espressodata$cereme)
##
## D'Agostino skewness test
##
## data: espressodata$cereme
## skew = 0.54679, z = 1.32790, p-value = 0.1842
## alternative hypothesis: data have a skewness
anscombe.test(espressodata$cereme)
##
## Anscombe-Glynn kurtosis test
##
## data: espressodata$cereme
## kurt = 2.33130, z = -0.58842, p-value = 0.5563
## alternative hypothesis: kurtosis is not equal to 3
shapiro.test(espressodata$cereme)
##
## Shapiro-Wilk normality test
##
## data: espressodata$cereme
## W = 0.92201, p-value = 0.04414
cereme <- log(espressodata$cereme)
plot(density(cereme))
brewmethod <- factor(espressodata$brewmethod)
bartlett.test(cereme, brewmethod)
##
## Bartlett test of homogeneity of variances
##
## data: cereme and brewmethod
## Bartlett's K-squared = 1.3633, df = 2, p-value = 0.5058
summary(aov(cereme ~ brewmethod))
## Df Sum Sq Mean Sq F value Pr(>F)
## brewmethod 2 1.9797 0.9898 24.41 1.64e-06 ***
## Residuals 24 0.9732 0.0405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model<- aov(cereme ~ brewmethod)
qqnorm(model$residuals)
tukeytest<- TukeyHSD(aov(cereme ~ brewmethod))
tukeytest
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cereme ~ brewmethod)
##
## $brewmethod
## diff lwr upr p adj
## 2-1 0.6506828 0.41362616 0.8877395 0.0000013
## 3-1 0.2139631 -0.02309354 0.4510198 0.0823041
## 3-2 -0.4367197 -0.67377635 -0.1996630 0.0003265
plot(tukeytest)
tapply(cereme, brewmethod, mean)
## 1 2 3
## 3.452541 4.103223 3.666504
tapply(cereme, brewmethod, sd)
## 1 2 3
## 0.2475991 0.1698051 0.1775045
Based on result aboeve, I understand that method 2 has the highest creme. In comparison, method 1 has the lowest creme.