## Warning: package 'car' was built under R version 4.2.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Question 3.23
data <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,16.9, 15.3, 18.6, 17.1, 19.5, 20.3,21.4, 23.6, 19.4, 18.5, 20.5, 22.3,19.3, 21.1, 16.9, 17.5, 18.3, 19.8)
labels <- c(rep('fluid 1',6),rep('fluid 2',6), rep('fluid 3',6),rep('fluid 4',6))
insulating <- data.frame(data,labels)
insulating$labels <- as.factor(insulating$labels)
anova.model <- aov(data~labels, data=insulating)
summary(anova.model)
## Df Sum Sq Mean Sq F value Pr(>F)
## labels 3 30.17 10.05 3.047 0.0525 .
## Residuals 20 65.99 3.30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We cannot conclude that the liquids differ at an alpha of 0.5 although it is very close.
Based on the outcome of the anova, I would choose any of the fluids as we cannot say for certain that one is higher than the others.
plot(anova.model)
Question 3.28
anova.model2 <- aov(time~material, data = df)
summary(anova.model2)
## Df Sum Sq Mean Sq F value Pr(>F)
## material 4 103191489 25797872 6.191 0.00379 **
## Residuals 15 62505657 4167044
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(anova.model2)
b/c) These plots show that the data is not normally distributed and that it does not have constant variance. Therefore this data should not be tested with anova. Looking at the data makes it evident that 2,1, and 5 are drastically lower than the others making a statistical analysis of them unnecessary.
Question 3.29
count <- c(31,10,21,4,1,62,40,21,30,35,53,27,120,97,68)
method <- c(rep(1,5),rep(2,5),rep(3,5))
df2 <- data.frame(count,method)
df2$method <- as.factor(df2$method)
anova.model3 <- aov(count~method, data=df2)
summary(anova.model3)
## Df Sum Sq Mean Sq F value Pr(>F)
## method 2 8985 4492 7.826 0.00668 **
## Residuals 12 6888 574
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(anova.model3)
b)The plots for the variablity show slight concern while the normal plot is suggests normality except for the two values at the end which could suggest a slight deviation from normality
Question 3.51
kruskal.test(data~labels, data=insulating)
##
## Kruskal-Wallis rank sum test
##
## data: data by labels
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015
Based on the Krustal-Wallis test, we would fail to reject the hypothesis that the fluid types have the same lifespan. This is the same conclusion that was reached in Question 3.23.
Question 3.52
The results are comparable to each other since the krustal-wallace test does not require the data to be normal or have constant variance. This test could even be more effective if you came to the conclusion that the variance was not constant from the graphs.
Question 4.3
observations <- c(73,68,74,71,67,73,67,75,72,70,75,68,78,73,68,73,71,75,75,69)
bolt <- c(seq(1,5),seq(1,5),seq(1,5),seq(1,5))
chemical <- c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4)
bolt <- as.fixed(bolt)
chemical <- as.fixed(chemical)
df3 <- data.frame(observations,bolt)
\(H_{o}\): \(\mu_{1}\)=\(\mu_{2}\)=\(\mu_{3}\)=\(\mu_{4}\)
\(H_{a}\)=At least one differs
model1 <- lm(observations~chemical+bolt)
gad(model1)
## $anova
## Analysis of Variance Table
##
## Response: observations
## Df Sum Sq Mean Sq F value Pr(>F)
## chemical 3 12.95 4.317 2.3761 0.1211
## bolt 4 157.00 39.250 21.6055 2.059e-05 ***
## Residuals 12 21.80 1.817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
At an alpha of 0.05, we would fail to reject the null hypothesis.
Question 4.16 - Estimating Ti and Bj from 4.3
grand_mean=mean(observations)
c1 <- observations[1:5]
c2 <- observations[6:10]
c3 <- observations[11:15]
c4 <- observations[15:20]
mc1 <- mean(c1)
mc2 <- mean(c2)
mc3 <- mean(c3)
mc4 <- mean(c4)
b1 <- filter(df3,bolt==1)
b2 <- filter(df3,bolt==2)
b3 <- filter(df3,bolt==3)
b4 <- filter(df3,bolt==4)
b5 <- filter(df3,bolt==5)
mb1 <- mean(b1$observations)
mb2 <- mean(b2$observations)
mb3 <- mean(b3$observations)
mb4 <- mean(b4$observations)
mb5 <- mean(b5$observations)
Estimating \(\tau_{i}\) which corresponds to the chemical used
T1 <- mc1-grand_mean
T2 <- mc2-grand_mean
T3 <- mc3-grand_mean
T4 <- mc4-grand_mean
T1
## [1] -1.15
T2
## [1] -0.35
T3
## [1] 0.65
T4
## [1] 0.08333333
Estimating \(\beta_{i}\) which is the effect of the bolt used
Beta1 <- mb1-grand_mean
Beta2 <- mb2-grand_mean
Beta3 <- mb3-grand_mean
Beta4 <- mb4-grand_mean
Beta5 <- mb5-grand_mean
Beta1
## [1] 1.75
Beta2
## [1] -3.25
Beta3
## [1] 3.75
Beta4
## [1] 1
Beta5
## [1] -3.25
Question 4.22
value <- c(8,7,1,7,3,11,2,7,3,8,4,9,10,1,5,6,8,6,6,10,4,2,3,8,8)
ing <- c('A','B','D','C','E','C','E','A','D','B','B','A','C','E','D','D','C','E','B','A','E','D','B','A','C')
batch <- c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
day <- c(rep(seq(1,5),5))
dat <- data.frame(batch, day, value, ing)
dat$batch <- as.factor(dat$batch)
dat$day <- as.factor(dat$day)
dat$chem <- as.factor(dat$ing)
\(H_{o}\): \(\mu_{1}\)=\(\mu_{2}\)=\(\mu_{3}\)=\(\mu_{4}\)=\(\mu_{5}\)
\(H_{a}\)=At least one differs
anova.model <- aov(value~ing+batch+day, data=dat)
summary(anova.model)
## Df Sum Sq Mean Sq F value Pr(>F)
## ing 4 141.44 35.36 11.309 0.000488 ***
## batch 4 15.44 3.86 1.235 0.347618
## day 4 12.24 3.06 0.979 0.455014
## Residuals 12 37.52 3.13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Reject the null hypothesis at an an alpha of 0.05. The ingredients do change the reaction time of the chemical process.
#Complete Code
#3.22
library(car)
library(agricolae)
library(tidyr)
library(dplyr)
library(pwr)
library(GAD)
data <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,16.9, 15.3, 18.6, 17.1, 19.5, 20.3,21.4, 23.6, 19.4, 18.5, 20.5, 22.3,19.3, 21.1, 16.9, 17.5, 18.3, 19.8)
labels <- c(rep('fluid 1',6),rep('fluid 2',6), rep('fluid 3',6),rep('fluid 4',6))
insulating <- data.frame(data,labels)
insulating$labels <- as.factor(insulating$labels)
anova.model <- aov(data~labels, data=insulating)
summary(anova.model)
plot(anova.model)
#3.28
time <- c(110, 157, 194, 178,1,2,4,18,880,1256,5276,4355,495,7040,5307,10050,7,5,29,2)
material <- c(rep("Material 1",4),rep("Material 2",4),rep("Material 3",4),rep("Material 4",4),rep("Material 5",4))
df <- data.frame(time,material)
df$material <- as.factor(df$material)
anova.model2 <- aov(time~material, data = df)
summary(anova.model2)
plot(anova.model2)
#3.29
count <- c(31,10,21,4,1,62,40,21,30,35,53,27,120,97,68)
method <- c(rep(1,5),rep(2,5),rep(3,5))
df2 <- data.frame(count,method)
df2$method <- as.factor(df2$method)
anova.model3 <- aov(count~method, data=df2)
summary(anova.model3)
plot(anova.model3)
#3.51
kruskal.test(data~labels, data=insulating)
#4.3
observations <- c(73,68,74,71,67,73,67,75,72,70,75,68,78,73,68,73,71,75,75,69)
bolt <- c(seq(1,5),seq(1,5),seq(1,5),seq(1,5))
chemical <- c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4)
bolt <- as.fixed(bolt)
chemical <- as.fixed(chemical)
df3 <- data.frame(observations,bolt)
model1 <- lm(observations~chemical+bolt)
gad(model1)
#4.16
grand_mean=mean(observations)
c1 <- observations[1:5]
c2 <- observations[6:10]
c3 <- observations[11:15]
c4 <- observations[15:20]
mc1 <- mean(c1)
mc2 <- mean(c2)
mc3 <- mean(c3)
mc4 <- mean(c4)
b1 <- filter(df3,bolt==1)
b2 <- filter(df3,bolt==2)
b3 <- filter(df3,bolt==3)
b4 <- filter(df3,bolt==4)
b5 <- filter(df3,bolt==5)
mb1 <- mean(b1$observations)
mb2 <- mean(b2$observations)
mb3 <- mean(b3$observations)
mb4 <- mean(b4$observations)
mb5 <- mean(b5$observations)
T1 <- mc1-grand_mean
T2 <- mc2-grand_mean
T3 <- mc3-grand_mean
T4 <- mc4-grand_mean
T1
T2
T3
T4
Beta1 <- mb1-grand_mean
Beta2 <- mb2-grand_mean
Beta3 <- mb3-grand_mean
Beta4 <- mb4-grand_mean
Beta5 <- mb5-grand_mean
Beta1
Beta2
Beta3
Beta4
Beta5
#4.22
value <- c(8,7,1,7,3,11,2,7,3,8,4,9,10,1,5,6,8,6,6,10,4,2,3,8,8)
chem <- c('A','B','D','C','E','C','E','A','D','B','B','A','C','E','D','D','C','E','B','A','E','D','B','A','C')
batch <- c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
day <- c(rep(seq(1,5),5))
dat <- data.frame(batch, day, value, chem)
dat$batch <- as.factor(dat$batch)
dat$day <- as.factor(dat$day)
dat$chem <- as.factor(dat$chem)
anova.model <- aov(value~chem+batch+day, data=dat)
summary(anova.model)