Problem 3.23

# Input the data manually
Fluid_Type <- factor(rep(1:4, each = 6))
Life <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,  # Fluid Type 1
          16.9, 15.3, 18.6, 17.1, 19.5, 20.3,  # Fluid Type 2
          21.4, 23.6, 19.4, 18.5, 20.5, 22.3,  # Fluid Type 3
          19.3, 21.1, 16.9, 17.5, 18.3, 19.8)  # Fluid Type 4

# Combine into a data frame
data <- data.frame(Fluid_Type, Life)

# Display the first few rows of data
head(data)

##   Fluid_Type Life
## 1          1 17.6
## 2          1 18.9
## 3          1 16.3
## 4          1 17.4
## 5          1 20.1
## 6          1 21.6

Perform ANOVA

# Perform ANOVA
model <- aov(Life ~ Fluid_Type, data = data)

# Display the summary of the ANOVA
summary(model)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## Fluid_Type   3  30.16   10.05   3.047 0.0525 .
## Residuals   20  65.99    3.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Answer for part (a)

While the result is borderline, with a p-value of 0.0525, we would fail to reject the null hypothesis at the 0.05 significance level. This means that, based on this analysis, we cannot say with confidence that the fluid types differ in terms of life expectancy.

# Check p-value
if (summary(model)[[1]][["Pr(>F)"]][1] < 0.05) {
  cat("There is a significant difference between the fluids.")
} else {
  cat("There is no significant difference between the fluids.")
}

## There is no significant difference between the fluids.

Part (b)

# Calculate the mean life for each fluid type
mean_life <- aggregate(Life ~ Fluid_Type, data, mean)

# Display the mean life for each fluid type
mean_life

##   Fluid_Type     Life
## 1          1 18.65000
## 2          2 17.95000
## 3          3 20.95000
## 4          4 18.81667

# Identify the fluid with the longest average life
best_fluid <- mean_life[which.max(mean_life$Life), ]
cat("The fluid with the longest life is Fluid Type", best_fluid$Fluid_Type, "with an average life of", best_fluid$Life, "hours.")

## The fluid with the longest life is Fluid Type 3 with an average life of 20.95 hours.

part (c)

# Perform Kruskal-Wallis test
kruskal_test <- kruskal.test(Life ~ Fluid_Type, data = data)

# Display the result of the test
kruskal_test

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Life by Fluid_Type
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015

interpretation

Since the the p-value >0.05, we fail to reject the null hypothesis and conclude that there is no significant between fluid types.

# Extract p-value
p_value <- kruskal_test$p.value

if (p_value < 0.05) {
  cat("There is a significant difference between the fluid types (p-value =", p_value, ").\n")
} else {
  cat("There is no significant difference between the fluid types (p-value =", p_value, ").\n")
}

## There is no significant difference between the fluid types (p-value = 0.1014857 ).

Problem 3.28

# Input the data
Material <- factor(rep(1:5, each = 4))
Failure_Time <- c(110, 157, 194, 178,  # Material 1
                  1, 2, 4, 18,        # Material 2
                  880, 1256, 5276, 4355,  # Material 3
                  495, 7040, 5307, 10050,  # Material 4
                  7, 5, 29, 2)  # Material 5

# Combine into a data frame
data <- data.frame(Material, Failure_Time)

# Display the first few rows of data
head(data)

##   Material Failure_Time
## 1        1          110
## 2        1          157
## 3        1          194
## 4        1          178
## 5        2            1
## 6        2            2

perform ANOVA

# Perform ANOVA
model <- aov(Failure_Time ~ Material, data = data)

# Display the ANOVA summary
summary(model)

##             Df    Sum Sq  Mean Sq F value  Pr(>F)   
## Material     4 103191489 25797872   6.191 0.00379 **
## Residuals   15  62505657  4167044                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Extract p-value
p_value <- summary(model)[[1]][["Pr(>F)"]][1]

if (p_value < 0.05) {
  cat("There is a significant difference between the materials (p-value =", p_value, ").\n")
} else {
  cat("There is no significant difference between the materials (p-value =", p_value, ").\n")
}

## There is a significant difference between the materials (p-value = 0.003785956 ).

part (b): Plot residuals vs predicted and Q-Q plot

# Extract residuals and fitted values
residuals <- model$residuals
fitted_values <- model$fitted.values

# Residuals vs Fitted Plot
plot(fitted_values, residuals, 
     xlab = "Fitted Values", ylab = "Residuals",
     main = "Residuals vs Fitted Values")
abline(h = 0, col = "red")

# Q-Q Plot
qqnorm(residuals)
qqline(residuals)

Part (c)

# Perform Kruskal-Wallis test
kruskal_test <- kruskal.test(Failure_Time ~ Material, data = data)

# Display the result of the test
kruskal_test

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Failure_Time by Material
## Kruskal-Wallis chi-squared = 16.873, df = 4, p-value = 0.002046

# Interpretation
if (kruskal_test$p.value < 0.05) {
  cat("Based on the Kruskal-Wallis test, there is a significant difference between the materials (p-value =", kruskal_test$p.value, ").\n")
} else {
  cat("Based on the Kruskal-Wallis test, there is no significant difference between the materials (p-value =", kruskal_test$p.value, ").\n")
}

## Based on the Kruskal-Wallis test, there is a significant difference between the materials (p-value = 0.002045539 ).

###Problem 3.29 # part (a)

# Input the data
Method <- factor(rep(1:3, each = 5))
Count <- c(31, 10, 21, 4, 1,  # Method 1
           62, 40, 24, 30, 35,  # Method 2
           53, 27, 120, 97, 68)  # Method 3

# Combine into a data frame
data <- data.frame(Method, Count)

# Display the first few rows of data
head(data)

##   Method Count
## 1      1    31
## 2      1    10
## 3      1    21
## 4      1     4
## 5      1     1
## 6      2    62

Performa ANOVA

# Perform ANOVA
model <- aov(Count ~ Method, data = data)

# Display the ANOVA summary
summary(model)

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Method       2   8964    4482   7.914 0.00643 **
## Residuals   12   6796     566                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Extract p-value
p_value <- summary(model)[[1]][["Pr(>F)"]][1]

if (p_value < 0.05) {
  cat("There is a significant difference between the methods (p-value =", p_value, ").\n")
} else {
  cat("There is no significant difference between the methods (p-value =", p_value, ").\n")
}

## There is a significant difference between the methods (p-value = 0.006430238 ).

#part (b) Residual Analysis

# Extract residuals and fitted values
residuals <- model$residuals
fitted_values <- model$fitted.values

# Residuals vs Fitted Plot
plot(fitted_values, residuals, 
     xlab = "Fitted Values", ylab = "Residuals",
     main = "Residuals vs Fitted Values")
abline(h = 0, col = "blue")

# Q-Q Plot
qqnorm(residuals)
qqline(residuals)

# part (C)

# Perform Kruskal-Wallis test
kruskal_test <- kruskal.test(Count ~ Method, data = data)

# Display the result of the test
kruskal_test

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Count by Method
## Kruskal-Wallis chi-squared = 8.54, df = 2, p-value = 0.01398

# Interpretation
if (kruskal_test$p.value < 0.05) {
  cat("Based on the Kruskal-Wallis test, there is a significant difference between the methods (p-value =", kruskal_test$p.value, ").\n")
} else {
  cat("Based on the Kruskal-Wallis test, there is no significant difference between the methods (p-value =", kruskal_test$p.value, ").\n")
}

## Based on the Kruskal-Wallis test, there is a significant difference between the methods (p-value = 0.01398178 ).

Problem 3.51

# Input the data
Fluid_Type <- factor(rep(1:4, each = 6))
Life <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,  # Fluid Type 1
          16.9, 15.3, 18.6, 17.1, 19.5, 20.3,  # Fluid Type 2
          21.4, 23.6, 19.4, 18.5, 20.5, 22.3,  # Fluid Type 3
          19.3, 21.1, 16.9, 17.5, 18.3, 19.8)  # Fluid Type 4

# Combine into a data frame
data <- data.frame(Fluid_Type, Life)

# Display the first few rows of data
head(data)

##   Fluid_Type Life
## 1          1 17.6
## 2          1 18.9
## 3          1 16.3
## 4          1 17.4
## 5          1 20.1
## 6          1 21.6

# Perform the Kruskal-Wallis test
kruskal_test <- kruskal.test(Life ~ Fluid_Type, data = data)

# Display the result of the test
kruskal_test

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Life by Fluid_Type
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015

# Extract the p-value
p_value <- kruskal_test$p.value

if (p_value < 0.05) {
  cat("There is a significant difference between the fluid types (p-value =", p_value, ").\n")
} else {
  cat("There is no significant difference between the fluid types (p-value =", p_value, ").\n")
}

## There is no significant difference between the fluid types (p-value = 0.1014857 ).

# Perform the ANOVA test for comparison
anova_model <- aov(Life ~ Fluid_Type, data = data)

# Display the ANOVA summary
summary(anova_model)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## Fluid_Type   3  30.16   10.05   3.047 0.0525 .
## Residuals   20  65.99    3.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Problem 3.52

# Input the data
Fluid_Type <- factor(rep(1:4, each = 6))
Life <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,  # Fluid Type 1
          16.9, 15.3, 18.6, 17.1, 19.5, 20.3,  # Fluid Type 2
          21.4, 23.6, 19.4, 18.5, 20.5, 22.3,  # Fluid Type 3
          19.3, 21.1, 16.9, 17.5, 18.3, 19.8)  # Fluid Type 4

# Combine into a data frame
data <- data.frame(Fluid_Type, Life)

# Display the first few rows of data
head(data)

##   Fluid_Type Life
## 1          1 17.6
## 2          1 18.9
## 3          1 16.3
## 4          1 17.4
## 5          1 20.1
## 6          1 21.6

# Perform the Kruskal-Wallis test
kruskal_test <- kruskal.test(Life ~ Fluid_Type, data = data)

# Display the result of the test
kruskal_test

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Life by Fluid_Type
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015

# Extract p-value from Kruskal-Wallis test
kruskal_p_value <- kruskal_test$p.value

if (kruskal_p_value < 0.05) {
  cat("There is a significant difference between the fluid types based on the Kruskal-Wallis test (p-value =", kruskal_p_value, ").\n")
} else {
  cat("There is no significant difference between the fluid types based on the Kruskal-Wallis test (p-value =", kruskal_p_value, ").\n")
}

## There is no significant difference between the fluid types based on the Kruskal-Wallis test (p-value = 0.1014857 ).

#Perform ANOVA (for Comparison)

# Perform ANOVA
anova_model <- aov(Life ~ Fluid_Type, data = data)

# Display the ANOVA summary
summary(anova_model)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## Fluid_Type   3  30.16   10.05   3.047 0.0525 .
## Residuals   20  65.99    3.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Extract p-value from ANOVA
anova_p_value <- summary(anova_model)[[1]][["Pr(>F)"]][1]

if (anova_p_value < 0.05) {
  cat("There is a significant difference between the fluid types based on ANOVA (p-value =", anova_p_value, ").\n")
} else {
  cat("There is no significant difference between the fluid types based on ANOVA (p-value =", anova_p_value, ").\n")
}

## There is no significant difference between the fluid types based on ANOVA (p-value = 0.05246316 ).

Compare results

if (kruskal_p_value < 0.05 & anova_p_value < 0.05) {
  cat("Both the Kruskal-Wallis test and ANOVA conclude that there is a significant difference between the fluid types.\n")
} else if (kruskal_p_value >= 0.05 & anova_p_value >= 0.05) {
  cat("Both the Kruskal-Wallis test and ANOVA conclude that there is no significant difference between the fluid types.\n")
} else {
  cat("The results from the Kruskal-Wallis test and ANOVA differ, suggesting that the assumptions of ANOVA may not be met.\n")
}

## Both the Kruskal-Wallis test and ANOVA conclude that there is no significant difference between the fluid types.

Problem 4.3

Input data

# Data: Tensile Strength of Cloth by Chemical and Bolt
Chemical <- factor(rep(1:4, each = 5))
Bolt <- factor(rep(1:5, times = 4))
Strength <- c(73, 68, 74, 71, 67,  # Chemical 1
              73, 67, 75, 72, 70,  # Chemical 2
              75, 68, 78, 73, 68,  # Chemical 3
              73, 71, 75, 75, 69)  # Chemical 4

# Combine into a data frame
data <- data.frame(Chemical, Bolt, Strength)

# Display the first few rows of data
head(data)

##   Chemical Bolt Strength
## 1        1    1       73
## 2        1    2       68
## 3        1    3       74
## 4        1    4       71
## 5        1    5       67
## 6        2    1       73

Perform Two-way Anova(randomized block design)

# Perform two-way ANOVA
model <- aov(Strength ~ Chemical + Bolt, data = data)

# Display the ANOVA summary
summary(model)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Chemical     3  12.95    4.32   2.376    0.121    
## Bolt         4 157.00   39.25  21.606 2.06e-05 ***
## Residuals   12  21.80    1.82                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interpretations

# Extract the p-value for Chemical factor
p_value_chemical <- summary(model)[[1]][["Pr(>F)"]][1]

if (p_value_chemical < 0.05) {
  cat("There is a significant effect of the chemicals on the tensile strength (p-value =", p_value_chemical, ").\n")
} else {
  cat("There is no significant effect of the chemicals on the tensile strength (p-value =", p_value_chemical, ").\n")
}

## There is no significant effect of the chemicals on the tensile strength (p-value = 0.1211445 ).

Check assumptions of ANOVA

# Extract residuals and fitted values
residuals <- model$residuals
fitted_values <- model$fitted.values

# Residuals vs Fitted Values Plot
plot(fitted_values, residuals, 
     xlab = "Fitted Values", ylab = "Residuals",
     main = "Residuals vs Fitted Values")
abline(h = 0, col = "red")

#Normal Q-Q plot

# Q-Q Plot for normality of residuals
qqnorm(residuals)
qqline(residuals)

# Conclusion The p-value for the Chemical factor is 0.1211, which is greater than the significant level of 0.05. Therefore, we fail to reject the null hypothesis.Meaning there is no difference in the tensile strength between the four chemicals.

Problem 4.16

Input data

# Data: Tensile Strength of Cloth by Chemical and Bolt
Chemical <- factor(rep(1:4, each = 5))
Bolt <- factor(rep(1:5, times = 4))
Strength <- c(73, 68, 74, 71, 67,  # Chemical 1
              73, 67, 75, 72, 70,  # Chemical 2
              75, 68, 78, 73, 68,  # Chemical 3
              73, 71, 75, 75, 69)  # Chemical 4

# Combine into a data frame
data <- data.frame(Chemical, Bolt, Strength)

# Display the first few rows of data
head(data)

##   Chemical Bolt Strength
## 1        1    1       73
## 2        1    2       68
## 3        1    3       74
## 4        1    4       71
## 5        1    5       67
## 6        2    1       73

Estimate the Overall Mean

# Calculate the overall mean
mu <- mean(data$Strength)
mu

## [1] 71.75

Estimate the chemical effects

# Calculate the mean response for each chemical
chemical_means <- aggregate(Strength ~ Chemical, data, mean)

# Estimate the chemical effects (tau_i)
tau <- chemical_means$Strength - mu
tau

## [1] -1.15 -0.35  0.65  0.85

Estimate the Bolt Effects

# Calculate the mean response for each bolt
bolt_means <- aggregate(Strength ~ Bolt, data, mean)

# Estimate the bolt effects (beta_j)
beta <- bolt_means$Strength - mu
beta

## [1]  1.75 -3.25  3.75  1.00 -3.25

Display the results

Chemical Effects

chemical_means$Chemical

## [1] 1 2 3 4
## Levels: 1 2 3 4

tau

## [1] -1.15 -0.35  0.65  0.85

Bolt Effects

bolt_means$Bolt

## [1] 1 2 3 4 5
## Levels: 1 2 3 4 5

beta

## [1]  1.75 -3.25  3.75  1.00 -3.25

Problem 4.22

Input data

# Input the data
Batch <- factor(rep(1:5, each = 5))
Day <- factor(rep(1:5, 5))
Ingredient <- factor(c('A', 'B', 'D', 'C', 'E',  # Batch 1
                       'C', 'E', 'A', 'D', 'B',  # Batch 2
                       'B', 'A', 'C', 'E', 'D',  # Batch 3
                       'D', 'C', 'E', 'B', 'A',  # Batch 4
                       'E', 'D', 'B', 'A', 'C')) # Batch 5
Reaction_Time <- c(8, 7, 1, 7, 3,  # Batch 1
                   11, 2, 7, 3, 8, # Batch 2
                   4, 9, 10, 1, 5, # Batch 3
                   6, 8, 6, 6, 10, # Batch 4
                   4, 2, 3, 8, 8)  # Batch 5

# Combine into a data frame
data <- data.frame(Batch, Day, Ingredient, Reaction_Time)

# Display the first few rows of data
head(data)

##   Batch Day Ingredient Reaction_Time
## 1     1   1          A             8
## 2     1   2          B             7
## 3     1   3          D             1
## 4     1   4          C             7
## 5     1   5          E             3
## 6     2   1          C            11

Perform Two-way ANOVA(Latin Square Design)

# Perform two-way ANOVA (Latin square design)
model <- aov(Reaction_Time ~ Ingredient + Batch + Day, data = data)

# Display the ANOVA summary
summary(model)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Ingredient   4 141.44   35.36  11.309 0.000488 ***
## Batch        4  15.44    3.86   1.235 0.347618    
## Day          4  12.24    3.06   0.979 0.455014    
## Residuals   12  37.52    3.13                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interpretation of ANOVA

# Extract p-value for Ingredient
p_value_ingredient <- summary(model)[[1]][["Pr(>F)"]][1]

if (p_value_ingredient < 0.05) {
  cat("There is a significant difference in reaction times due to the ingredients (p-value =", p_value_ingredient, ").\n")
} else {
  cat("There is no significant difference in reaction times due to the ingredients (p-value =", p_value_ingredient, ").\n")
}

## There is a significant difference in reaction times due to the ingredients (p-value = 0.0004876512 ).

Analyze Residuals (Check ANOVA assumptions)

# Extract residuals and fitted values
residuals <- model$residuals
fitted_values <- model$fitted.values

# Residuals vs Fitted Values Plot
plot(fitted_values, residuals, 
     xlab = "Fitted Values", ylab = "Residuals",
     main = "Residuals vs Fitted Values")
abline(h = 0, col = "green")

Normal Q-Q plot

# Q-Q Plot for normality of residuals
qqnorm(residuals)
qqline(residuals)

## Conclusion

if (p_value_ingredient < 0.05) {
  cat("We conclude that the different ingredients significantly affect the reaction time.\n")
} else {
  cat("We conclude that the different ingredients do not significantly affect the reaction time.\n")
}

## We conclude that the different ingredients significantly affect the reaction time.

cat("Check the residual plots above to confirm if the ANOVA assumptions (normality and equal variance) are satisfied.")

## Check the residual plots above to confirm if the ANOVA assumptions (normality and equal variance) are satisfied.

HW-Module 4

Juan Carlos Soto

2024-10-11

Problem 3.23

Perform ANOVA

Answer for part (a)

Part (b)

part (c)

interpretation

Problem 3.28

perform ANOVA

part (b): Plot residuals vs predicted and Q-Q plot

Part (c)

Performa ANOVA

Problem 3.51

Problem 3.52

Compare results

Problem 4.3

Input data

Perform Two-way Anova(randomized block design)

Interpretations

Check assumptions of ANOVA

Problem 4.16

Input data

Estimate the Overall Mean

Estimate the chemical effects

Estimate the Bolt Effects

Display the results

Chemical Effects

Bolt Effects

Problem 4.22

Input data

Perform Two-way ANOVA(Latin Square Design)

Interpretation of ANOVA

Analyze Residuals (Check ANOVA assumptions)

Normal Q-Q plot