Module 4 Homework

## Warning: package 'car' was built under R version 4.2.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.2.3

## Warning: package 'tidyr' was built under R version 4.2.3

## Warning: package 'dplyr' was built under R version 4.2.3

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Question 3.23

data <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,16.9, 15.3, 18.6, 17.1, 19.5, 20.3,21.4, 23.6, 19.4, 18.5, 20.5, 22.3,19.3, 21.1, 16.9, 17.5, 18.3, 19.8)
labels <- c(rep('fluid 1',6),rep('fluid 2',6), rep('fluid 3',6),rep('fluid 4',6))
insulating <- data.frame(data,labels)
insulating$labels <- as.factor(insulating$labels)
anova.model <- aov(data~labels, data=insulating)
summary(anova.model)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## labels       3  30.17   10.05   3.047 0.0525 .
## Residuals   20  65.99    3.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We cannot conclude that the liquids differ at an alpha of 0.5 although it is very close.
Based on the outcome of the anova, I would choose any of the fluids as we cannot say for certain that one is higher than the others.

plot(anova.model)

The basic assumptions appear to be correct as the graphs show a constant variance and a mostly normal distribution.

Question 3.28

anova.model2 <- aov(time~material, data = df)
summary(anova.model2)

##             Df    Sum Sq  Mean Sq F value  Pr(>F)   
## material     4 103191489 25797872   6.191 0.00379 **
## Residuals   15  62505657  4167044                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The 5 five materials do not have the same effect on the mean failure time since the p-value is very small. This also makes sense since the values in each category have wildly different values.

plot(anova.model2)

b/c) These plots show that the data is not normally distributed and that it does not have constant variance. Therefore this data should not be tested with anova. Looking at the data makes it evident that 2,1, and 5 are drastically lower than the others making a statistical analysis of them unnecessary.

Question 3.29

count <- c(31,10,21,4,1,62,40,21,30,35,53,27,120,97,68)
method <- c(rep(1,5),rep(2,5),rep(3,5))
df2 <- data.frame(count,method)
df2$method <- as.factor(df2$method)

anova.model3 <- aov(count~method, data=df2)
summary(anova.model3)

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## method       2   8985    4492   7.826 0.00668 **
## Residuals   12   6888     574                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

All of the methods do not have the same effect on the mean particle count since the p-value is very small.

plot(anova.model3)

b)The plots for the variablity show slight concern while the normal plot is suggests normality except for the two values at the end which could suggest a slight deviation from normality

Based on b I would perform the same test and come to the same conclusions

Question 3.51

 kruskal.test(data~labels, data=insulating)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  data by labels
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015

Based on the Krustal-Wallis test, we would fail to reject the hypothesis that the fluid types have the same lifespan. This is the same conclusion that was reached in Question 3.23.

Question 3.52

The results are comparable to each other since the krustal-wallace test does not require the data to be normal or have constant variance. This test could even be more effective if you came to the conclusion that the variance was not constant from the graphs.

Question 4.3

observations <- c(73,68,74,71,67,73,67,75,72,70,75,68,78,73,68,73,71,75,75,69)
bolt <- c(seq(1,5),seq(1,5),seq(1,5),seq(1,5))
chemical <- c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4)
bolt <- as.fixed(bolt)
chemical <- as.fixed(chemical)

df3 <- data.frame(observations,bolt)

\(H_{o}\): \(\mu_{1}\)=\(\mu_{2}\)=\(\mu_{3}\)=\(\mu_{4}\)

\(H_{a}\)=At least one differs

model1 <- lm(observations~chemical+bolt)
gad(model1)

## $anova
## Analysis of Variance Table
## 
## Response: observations
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## chemical   3  12.95   4.317  2.3761    0.1211    
## bolt       4 157.00  39.250 21.6055 2.059e-05 ***
## Residuals 12  21.80   1.817                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

At an alpha of 0.05, we would fail to reject the null hypothesis.

Question 4.16 - Estimating Ti and Bj from 4.3

grand_mean=mean(observations)
c1 <- observations[1:5]
c2 <- observations[6:10]
c3 <- observations[11:15]
c4 <- observations[15:20]
mc1 <- mean(c1)
mc2 <- mean(c2)
mc3 <- mean(c3)
mc4 <- mean(c4)

b1 <- filter(df3,bolt==1)
b2 <- filter(df3,bolt==2)
b3 <- filter(df3,bolt==3)
b4 <- filter(df3,bolt==4)
b5 <- filter(df3,bolt==5)
mb1 <- mean(b1$observations)
mb2 <- mean(b2$observations)
mb3 <- mean(b3$observations)
mb4 <- mean(b4$observations)
mb5 <- mean(b5$observations)

Estimating \(\tau_{i}\) which corresponds to the chemical used

T1 <- mc1-grand_mean
T2 <- mc2-grand_mean
T3 <- mc3-grand_mean
T4 <- mc4-grand_mean
T1

## [1] -1.15

T2

## [1] -0.35

T3

## [1] 0.65

T4

## [1] 0.08333333

Estimating \(\beta_{i}\) which is the effect of the bolt used

Beta1 <- mb1-grand_mean
Beta2 <- mb2-grand_mean
Beta3 <- mb3-grand_mean
Beta4 <- mb4-grand_mean
Beta5 <- mb5-grand_mean
Beta1

## [1] 1.75

Beta2

## [1] -3.25

Beta3

## [1] 3.75

Beta4

## [1] 1

Beta5

## [1] -3.25

Question 4.22

value <- c(8,7,1,7,3,11,2,7,3,8,4,9,10,1,5,6,8,6,6,10,4,2,3,8,8)
ing <- c('A','B','D','C','E','C','E','A','D','B','B','A','C','E','D','D','C','E','B','A','E','D','B','A','C')
batch <- c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
day <- c(rep(seq(1,5),5))
dat <- data.frame(batch, day, value, ing)
dat$batch <- as.factor(dat$batch)
dat$day <- as.factor(dat$day)
dat$chem <- as.factor(dat$ing)

\(H_{o}\): \(\mu_{1}\)=\(\mu_{2}\)=\(\mu_{3}\)=\(\mu_{4}\)=\(\mu_{5}\)

\(H_{a}\)=At least one differs

anova.model <- aov(value~ing+batch+day, data=dat)
summary(anova.model)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## ing          4 141.44   35.36  11.309 0.000488 ***
## batch        4  15.44    3.86   1.235 0.347618    
## day          4  12.24    3.06   0.979 0.455014    
## Residuals   12  37.52    3.13                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Reject the null hypothesis at an an alpha of 0.05. The ingredients do change the reaction time of the chemical process.

#Complete Code

#3.22
library(car)
library(agricolae)
library(tidyr)
library(dplyr)
library(pwr)
library(GAD)

data <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,16.9, 15.3, 18.6, 17.1, 19.5, 20.3,21.4, 23.6, 19.4, 18.5, 20.5, 22.3,19.3, 21.1, 16.9, 17.5, 18.3, 19.8)
labels <- c(rep('fluid 1',6),rep('fluid 2',6), rep('fluid 3',6),rep('fluid 4',6))
insulating <- data.frame(data,labels)
insulating$labels <- as.factor(insulating$labels)
anova.model <- aov(data~labels, data=insulating)
summary(anova.model)
plot(anova.model)

#3.28
time <- c(110, 157, 194, 178,1,2,4,18,880,1256,5276,4355,495,7040,5307,10050,7,5,29,2)
material <- c(rep("Material 1",4),rep("Material 2",4),rep("Material 3",4),rep("Material 4",4),rep("Material 5",4))
df <- data.frame(time,material)
df$material <- as.factor(df$material)
anova.model2 <- aov(time~material, data = df)
summary(anova.model2)
plot(anova.model2)


#3.29
count <- c(31,10,21,4,1,62,40,21,30,35,53,27,120,97,68)
method <- c(rep(1,5),rep(2,5),rep(3,5))
df2 <- data.frame(count,method)
df2$method <- as.factor(df2$method)
anova.model3 <- aov(count~method, data=df2)
summary(anova.model3)
plot(anova.model3)

#3.51
kruskal.test(data~labels, data=insulating)

#4.3
observations <- c(73,68,74,71,67,73,67,75,72,70,75,68,78,73,68,73,71,75,75,69)
bolt <- c(seq(1,5),seq(1,5),seq(1,5),seq(1,5))
chemical <- c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4)
bolt <- as.fixed(bolt)
chemical <- as.fixed(chemical)
df3 <- data.frame(observations,bolt)

model1 <- lm(observations~chemical+bolt)
gad(model1)

#4.16
grand_mean=mean(observations)
c1 <- observations[1:5]
c2 <- observations[6:10]
c3 <- observations[11:15]
c4 <- observations[15:20]
mc1 <- mean(c1)
mc2 <- mean(c2)
mc3 <- mean(c3)
mc4 <- mean(c4)

b1 <- filter(df3,bolt==1)
b2 <- filter(df3,bolt==2)
b3 <- filter(df3,bolt==3)
b4 <- filter(df3,bolt==4)
b5 <- filter(df3,bolt==5)
mb1 <- mean(b1$observations)
mb2 <- mean(b2$observations)
mb3 <- mean(b3$observations)
mb4 <- mean(b4$observations)
mb5 <- mean(b5$observations)

T1 <- mc1-grand_mean
T2 <- mc2-grand_mean
T3 <- mc3-grand_mean
T4 <- mc4-grand_mean
T1
T2
T3
T4

Beta1 <- mb1-grand_mean
Beta2 <- mb2-grand_mean
Beta3 <- mb3-grand_mean
Beta4 <- mb4-grand_mean
Beta5 <- mb5-grand_mean
Beta1
Beta2
Beta3
Beta4
Beta5

#4.22
value <- c(8,7,1,7,3,11,2,7,3,8,4,9,10,1,5,6,8,6,6,10,4,2,3,8,8)
chem <- c('A','B','D','C','E','C','E','A','D','B','B','A','C','E','D','D','C','E','B','A','E','D','B','A','C')
batch <- c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
day <- c(rep(seq(1,5),5))
dat <- data.frame(batch, day, value, chem)
dat$batch <- as.factor(dat$batch)
dat$day <- as.factor(dat$day)
dat$chem <- as.factor(dat$chem)
anova.model <- aov(value~chem+batch+day, data=dat)
summary(anova.model)

Module 4 Homework

Clay Cleavinger

10/11/2024