Preperation
f1 <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6)
resf1 <- f1- mean(f1)
f2 <- c(16.9, 15.3, 18.6, 17.1, 19.5, 20.3)
resf2 <- f2- mean(f2)
f3 <- c(21.4, 23.6, 19.4, 18.5, 20.5, 22.3)
resf3 <- f3- mean(f3)
f4 <- c(19.3, 21.1, 16.9, 17.5, 18.3, 19.8)
resf4 <- f4- mean(f4)
datf <- data.frame(f1,f2,f3,f4)
stackdat <- stack(datf)
An ANOVA test is conducted to see if the life of the fluids vary. The null hypothesis is that they do not while the alternative is that the means differ. From the returned P value, they do not differ at alpha = .05
faov <- (aov(values~ind,data=stackdat)) # ANOVA
summary(faov)
## Df Sum Sq Mean Sq F value Pr(>F)
## ind 3 30.17 10.05 3.047 0.0525 .
## Residuals 20 65.99 3.30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the returned P value of the ANOVA test, they do not differ at alpha = .05
We can use Tukey’s HSD to test for which fluid has the longest life
TukeyHSD(faov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = values ~ ind, data = stackdat)
##
## $ind
## diff lwr upr p adj
## f2-f1 -0.7000000 -3.63540073 2.2354007 0.9080815
## f3-f1 2.3000000 -0.63540073 5.2354007 0.1593262
## f4-f1 0.1666667 -2.76873407 3.1020674 0.9985213
## f3-f2 3.0000000 0.06459927 5.9354007 0.0440578
## f4-f2 0.8666667 -2.06873407 3.8020674 0.8413288
## f4-f3 -2.1333333 -5.06873407 0.8020674 0.2090635
From Tukeys HSD fluid 3 has the longest life.
Varience will be analized useing boxplots
boxplot(resf1,resf2,resf3,resf4,main= "Plots of Residuals")
As all boxes are generally of the same size constant variance is valid
m1 <- c(110, 157, 194, 178)
resm1 <- m1- mean(m1)
m2 <- c(1, 2, 4, 18)
resm2 <- m2- mean(m2)
m3 <-c(880, 1256, 5276, 4355)
resm3 <- m3- mean(m3)
m4 <-c(495, 7040, 5307, 10050)
resm4 <- m4- mean(m4)
m5 <- c(7, 5, 29, 2)
resm5 <- m5- mean(m5)
An ANOVA test is used to test if material types have diffrent failure times. The null hypothesis is that there is no diffrence in material type used in regards to time while the alternative is that there is.
datm <- data.frame(m1,m2,m3,m4)
stackmdat <- stack(datm)
maov <- aov(values~ind, data= stackmdat)
summary(maov)
## Df Sum Sq Mean Sq F value Pr(>F)
## ind 3 87746579 29248860 5.615 0.0122 *
## Residuals 12 62505200 5208767
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
With a P value of 0.0122 we can reject the null that the means are equal and therefore material type is statistically significant.
plot(c(1,2,3,4),resm1)
abline(h=mean(resm1))
plot(c(1,2,3,4),resm2)
abline(h=mean(resm2))
plot(c(1,2,3,4),resm3)
abline(h=mean(resm3))
plot(c(1,2,3,4),resm4)
abline(h=mean(resm4))
plot(c(1,2,3,4),resm5)
abline(h=mean(resm5))
material.resiudals <- c(resm1,resm2,resm3,resm4,resm5)
qqnorm(material.resiudals)
If the data seems to be randomly distributed in the residual v. predicted value plots we can assume equal variance. However from NPP data does not appear to be normal.
Tukeys HSD will be used to determine which material lasts the longest.
TukeyHSD(maov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = values ~ ind, data = stackmdat)
##
## $ind
## diff lwr upr p adj
## m2-m1 -153.50 -4944.7429 4637.743 0.9996734
## m3-m1 2782.00 -2009.2429 7573.243 0.3540570
## m4-m1 5563.25 772.0071 10354.493 0.0217119
## m3-m2 2935.50 -1855.7429 7726.743 0.3116642
## m4-m2 5716.75 925.5071 10507.993 0.0183769
## m4-m3 2781.25 -2009.9929 7572.493 0.3542729
from the results of the test material 4 seems to have the highest failure time.
s1 <- c(31 ,10 ,21, 4, 1)
ress1 <- s1-mean(s1)
s2 <-c(62, 40, 24, 30, 35)
ress2 <- s2-mean(s2)
s3 <-c(53, 27, 120, 97, 68)
ress3 <- s3- mean(s3)
ANOVA is used to determine if all methods have the same effect
dats <- data.frame(s1,s2,s3)
stacks <- stack(dats)
saov <- aov(values~ind,data= stacks)
summary(saov)
## Df Sum Sq Mean Sq F value Pr(>F)
## ind 2 8964 4482 7.914 0.00643 **
## Residuals 12 6796 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the P value we reject the null that the methods have no effect on particle count
plot(c(1,2,3,4,5),ress1,xlab = "Observation")
abline(h=mean(ress1))
plot(c(1,2,3,4,5),ress2,xlab = "Observation")
abline(h=mean(ress2))
plot(c(1,2,3,4,5),ress3,xlab = "Observation")
abline(h=mean(ress3))
method.resiudals <- c(ress1,ress2,ress3)
qqnorm(method.resiudals)
Since residuals are normally distributed data is most likely normal, from residuals v predicted value same is true about varience
##Part C Tukeys HSD is conducted to observe which method is best
TukeyHSD(saov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = values ~ ind, data = stacks)
##
## $ind
## diff lwr upr p adj
## s2-s1 24.8 -15.354081 64.95408 0.2644362
## s3-s1 59.6 19.445919 99.75408 0.0049747
## s3-s2 34.8 -5.354081 74.95408 0.0924469
Method 3 seems to have the highest significant difference
#Problem 3.51
kruskal.test(values~ind, stackdat)
##
## Kruskal-Wallis rank sum test
##
## data: values by ind
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015
From Kruskal Wallis test no statistical significance was found
kruskal.test(values~ind,data= stackmdat)
##
## Kruskal-Wallis rank sum test
##
## data: values by ind
## Kruskal-Wallis chi-squared = 13.059, df = 3, p-value = 0.004511
from kruskal wallis test a higher significance was found indicating that the means differ as compared to parametric ANOVA
#Question 3.23
#Prep
f1 <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6)
resf1 <- f1- mean(f1)
f2 <- c(16.9, 15.3, 18.6, 17.1, 19.5, 20.3)
resf2 <- f2- mean(f2)
f3 <- c(21.4, 23.6, 19.4, 18.5, 20.5, 22.3)
resf3 <- f3- mean(f3)
f4 <- c(19.3, 21.1, 16.9, 17.5, 18.3, 19.8)
resf4 <- f4- mean(f4)
datf <- data.frame(f1,f2,f3,f4)
stackdat <- stack(datf)
# Part A
faov <- (aov(values~ind,data=stackdat)) # ANOVA
summary(faov)
# From the P value of .0525 we do not reject the null hypothesis that the means are equal
#Part B
# Now we perform tukey's HSD to compare the means,
library(car)
?TukeyHSD(faov,value~ind )
TukeyHSD(faov)
# From results presented in tukey's hsd I would choose fluid 3 for longest life
# Part C
# Variance will be analysed using boxplots
boxplot(resf1,resf2,resf3,resf4,main= "Plots of Residuals")
# As all boxes are generally of the same size constant variance is valid
#---------------------------------------------------------------------------------------------------
# Question 3.28
#Prep
m1 <- c(110, 157, 194, 178)
resm1 <- m1- mean(m1)
m2 <- c(1, 2, 4, 18)
resm2 <- m2- mean(m2)
m3 <-c(880, 1256, 5276, 4355)
resm3 <- m3- mean(m3)
m4 <-c(495, 7040, 5307, 10050)
resm4 <- m4- mean(m4)
m5 <- c(7, 5, 29, 2)
resm5 <- m5- mean(m5)
# An ANOVA test is used to test if material types have diffrent failure types
datm <- data.frame(m1,m2,m3,m4)
stackmdat <- stack(datm)
maov <- aov(values~ind, data= stackmdat)
summary(maov)
# With a P value of 0.0122 we can reject the null that the means are equal
#Part B
# The predicted value is assumed to be the mean
plot(c(1,2,3,4),resm1)
abline(h=mean(resm1))
plot(c(1,2,3,4),resm2)
abline(h=mean(resm2))
plot(c(1,2,3,4),resm3)
abline(h=mean(resm3))
plot(c(1,2,3,4),resm4)
abline(h=mean(resm4))
plot(c(1,2,3,4),resm5)
abline(h=mean(resm5))
material.resiudals <- c(resm1,resm2,resm3,resm4,resm5)
qqnorm(material.resiudals)
# If the data seems to be randomly distributed in the residual v. predicted value plots we can assume
# equal variance
# From the normal probability plot, since residuals are what the data points. a horizontal line would be expected if the
#means were the same, therefore we can argue that the means are diffrent.
# Part C
# Tukeys HSD will be used to determine which material lasts the longest.
TukeyHSD(maov)
# from the results of the test material 4 seems to have the highest failure time.
#_______________________________________________________________________________________________________________
# Problem 3.29
#Prep
s1 <- c(31 ,10 ,21, 4, 1)
ress1 <- s1-mean(s1)
s2 <-c(62, 40, 24, 30, 35)
ress2 <- s2-mean(s2)
s3 <-c(53, 27, 120, 97, 68)
ress3 <- s3- mean(s3)
# ANOVA is used to determine if all methods have the same effect
dats <- data.frame(s1,s2,s3)
stacks <- stack(dats)
saov <- aov(values~ind,data= stacks)
summary(saov)
# From the P value we reject the null that the methods have no effect on particle count
# Part B
plot(c(1,2,3,4,5),ress1,xlab = "Observation")
abline(h=mean(ress1))
plot(c(1,2,3,4,5),ress2,xlab = "Observation")
abline(h=mean(ress2))
plot(c(1,2,3,4,5),ress3,xlab = "Observation")
abline(h=mean(ress3))
method.resiudals <- c(ress1,ress2,ress3)
qqnorm(method.resiudals)
# Since residuals are normally distributed data is most likely normal, from residuals v predicted value
# same is true about variance
#Part C Tukeys HSD is conducted to observe which method is best
TukeyHSD(saov)
# Method 3 seems to have the highest significant difference
#_______________________________________________________________________________________________
#Problem 3.51
kruskal.test(values~ind, stackdat)
# From Kruskal Wallis test no statistical significance was found
# __________________________________________________________________
#Problem 3.52
kruskal.test(values~ind,data= stackmdat)
# from kruskal wallis test a higher significance was found indicating that the means differ