Homework 4

Question 3.23 Part a

# Part (a) - ANOVA test to see if there is a significant difference between the fluids
library(GAD)

# Data from the experiment
life_hours <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,
                16.9, 15.3, 18.6, 17.1, 19.5, 20.3,
                21.4, 23.6, 19.4, 18.5, 20.5, 22.3,
                19.3, 21.1, 16.9, 17.5, 18.3, 19.8)

# Fluid types as fixed factors
fluid_type <- as.fixed(c(rep(1, 6), rep(2, 6), rep(3, 6), rep(4, 6)))

# Bolt (Random factor or block, each trial is different)
bolt <- as.fixed(rep(1:6, 4))

# Build the linear model
model <- lm(life_hours ~ fluid_type + bolt)

# Perform ANOVA using GAD package
anova_results <- gad(model)

# Print the ANOVA table
print(anova_results)

## $anova
## Analysis of Variance Table
## 
## Response: life_hours
##            Df Sum Sq Mean Sq F value  Pr(>F)  
## fluid_type  3 30.165 10.0550  4.5470 0.01857 *
## bolt        5 32.823  6.5647  2.9686 0.04655 *
## Residuals  15 33.170  2.2113                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Part b

# Part (b) - Calculate the average life for each fluid and determine the best one

# Calculate the mean life for each fluid type
mean_life <- tapply(life_hours, fluid_type, mean)

# Print the mean life values for each fluid
print(mean_life)

##        1        2        3        4 
## 18.65000 17.95000 20.95000 18.81667

# Identify the fluid with the maximum average life
best_fluid <- which.max(mean_life)
cat("Fluid type with the longest life is:", best_fluid, "\n")

## Fluid type with the longest life is: 3

part c

# Part (c) - Analyze residuals to check ANOVA assumptions

# Get the residuals from the model
residuals <- resid(model)

# Q-Q plot to check for normality of residuals
qqnorm(residuals)
qqline(residuals)

# Plot residuals vs fitted values to check for homoscedasticity
fitted_values <- fitted(model)
plot(fitted_values, residuals, main="Residuals vs Fitted")
abline(h=0, col="red")

# Histogram of residuals to check the distribution
hist(residuals, main="Histogram of Residuals", xlab="Residuals")

# Summary of residuals for additional insights
summary(residuals)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -3.28333 -0.80000 -0.05417  0.00000  0.81667  2.01667

Question 3.28

part a:

failure_time <- c(110, 157,
                  1, 2, 4, 18,
                  880, 1256, 5276, 4355,
                  495, 7040, 5307, 10050,
                  7, 5, 29, 2)

# Material type as a factor (1 to 5)
material <- factor(c(rep(1, 2), rep(2, 4), rep(3, 4), rep(4, 4), rep(5, 4)))

# Run ANOVA with aov (can handle unbalanced designs)
model_unbalanced <- aov(failure_time ~ material)

# Print the ANOVA table
summary(model_unbalanced)

##             Df   Sum Sq  Mean Sq F value Pr(>F)  
## material     4 97630527 24407632   5.077  0.011 *
## Residuals   13 62502773  4807906                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Part b and c

# Residual diagnostics for unbalanced design

# Get the residuals
residuals_unbalanced <- resid(model_unbalanced)

# Q-Q plot for residuals
qqnorm(residuals_unbalanced)
qqline(residuals_unbalanced)

# Residuals vs Fitted values
fitted_unbalanced <- fitted(model_unbalanced)
plot(fitted_unbalanced, residuals_unbalanced, main="Residuals vs Fitted")
abline(h=0, col="red")

# Histogram of residuals
hist(residuals_unbalanced, main="Histogram of Residuals", xlab="Residuals")

Question 3.29 part a

# Data for particle counts by method
particle_count <- c(31, 10, 
                    62, 40, 
                    53, 27, 120, 97, 68)

# Method as a factor
method <- factor(c(rep(1, 2), rep(2, 2), rep(3, 5)))

# Perform ANOVA using aov (handles unbalanced designs)
model_particle <- aov(particle_count ~ method)

# Print the ANOVA table
summary(model_particle)

##             Df Sum Sq Mean Sq F value Pr(>F)
## method       2   4014  2006.9    2.08  0.206
## Residuals    6   5789   964.8

part b

# Residual diagnostics for particle count model

# Get the residuals
residuals_particle <- resid(model_particle)

# Q-Q plot for residuals
qqnorm(residuals_particle)
qqline(residuals_particle)

# Residuals vs Fitted values
fitted_particle <- fitted(model_particle)
plot(fitted_particle, residuals_particle, main="Residuals vs Fitted")
abline(h=0, col="red")

# Histogram of residuals
hist(residuals_particle, main="Histogram of Residuals", xlab="Residuals")

# Summary of residuals
summary(residuals_particle)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     -46     -11      -5       0      11      47

part c

# Part (c) - Non-parametric test using Kruskal-Wallis test

# Perform Kruskal-Wallis test (non-parametric alternative to ANOVA)
kruskal_test <- kruskal.test(particle_count ~ method)

# Print the Kruskal-Wallis test result
kruskal_test

## 
##  Kruskal-Wallis rank sum test
## 
## data:  particle_count by method
## Kruskal-Wallis chi-squared = 3.36, df = 2, p-value = 0.1864

3.51

# Data for fluid life spans
life <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,   # Fluid Type 1
          16.9, 15.3, 18.6, 17.1, 19.5, 20.3,   # Fluid Type 2
          21.4, 23.6, 19.4, 18.5, 20.5, 22.3,   # Fluid Type 3
          19.3, 21.1, 16.9, 17.5, 18.3, 19.8)   # Fluid Type 4

# Fluid types as a factor
fluid_type <- factor(rep(1:4, each = 6))

# Perform Kruskal-Wallis test
kruskal_test <- kruskal.test(life ~ fluid_type)

# Print the results of the Kruskal-Wallis test
kruskal_test

## 
##  Kruskal-Wallis rank sum test
## 
## data:  life by fluid_type
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015

# Data for life spans of fluids
life <- c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6,   # Fluid Type 1
          16.9, 15.3, 18.6, 17.1, 19.5, 20.3,   # Fluid Type 2
          21.4, 23.6, 19.4, 18.5, 20.5, 22.3,   # Fluid Type 3
          19.3, 21.1, 16.9, 17.5, 18.3, 19.8)   # Fluid Type 4

# Fluid types as a factor
fluid_type <- factor(rep(1:4, each = 6))

# Perform ANOVA
anova_result <- aov(life ~ fluid_type)
anova_summary <- summary(anova_result)

# Perform Kruskal-Wallis test
kruskal_test <- kruskal.test(life ~ fluid_type)

# Output results
print(anova_summary)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## fluid_type   3  30.17   10.05   3.047 0.0525 .
## Residuals   20  65.99    3.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

print(kruskal_test)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  life by fluid_type
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015

4.3

library(GAD)  # Load the GAD library

# Data for tensile strength of cloth by chemical and bolt
tensile_strength <- c(73, 68, 74, 71, 67,   # Bolt 1
                      73, 67, 75, 72, 70,   # Bolt 2
                      75, 68, 78, 73, 68,   # Bolt 3
                      73, 71, 75, 75, 69)   # Bolt 4

# Create a data frame with factors for chemicals and bolts
data <- data.frame(
  tensile_strength = tensile_strength,
  chemical = factor(rep(1:4, each = 5)),  # 4 chemicals, 5 measurements each
  bolt = factor(rep(1:5, times = 4))      # 5 bolts, applied to each chemical
)

# Perform Two-Way ANOVA
anova_result <- aov(tensile_strength ~ chemical + bolt, data = data)
anova_summary <- summary(anova_result)

# Print the ANOVA summary
print(anova_summary)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## chemical     3  12.95    4.32   2.376    0.121    
## bolt         4 157.00   39.25  21.606 2.06e-05 ***
## Residuals   12  21.80    1.82                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.16

# Load the GAD library
library(GAD)  # Load the GAD library

# Data for tensile strength of cloth by chemical and bolt
tensile_strength <- c(73, 68, 74, 71, 67,   # Bolt 1
                      73, 67, 75, 72, 70,   # Bolt 2
                      75, 68, 78, 73, 68,   # Bolt 3
                      73, 71, 75, 75, 69)   # Bolt 4

# Create a data frame with factors for chemicals and bolts
data <- data.frame(
  tensile_strength = tensile_strength,
  chemical = factor(rep(1:4, each = 5)),  # 4 chemicals, 5 measurements each
  bolt = factor(rep(1:5, times = 4))      # 5 bolts, applied to each chemical
)

# Fit the two-way ANOVA model
anova_result <- aov(tensile_strength ~ chemical + bolt, data = data)

# Summary of the ANOVA to extract model parameters
anova_summary <- summary(anova_result)

# Calculate the overall mean
overall_mean <- mean(tensile_strength)

# Extract means for chemicals (tau_i) and bolts (beta_j)
chemical_means <- tapply(tensile_strength, data$chemical, mean)
bolt_means <- tapply(tensile_strength, data$bolt, mean)

# Calculate tau_i and beta_j
tau_i <- chemical_means - overall_mean
beta_j <- bolt_means - overall_mean

4.22

# Load necessary library
library(GAD)

# Create a data frame for the reaction times with explicit day and batch factors
data <- data.frame(
  ReactionTime = c(8, 7, 1, 7, 3,  # Day 1
                   11, 2, 7, 3, 8, # Day 2
                   4, 9, 10, 1, 5, # Day 3
                   6, 8, 6, 6, 10, # Day 4
                   4, 2, 3, 8, 8),# Day 5
  Day = factor(rep(1:5, each = 5)),  # 5 days
  Batch = factor(rep(1:5, times = 5)), # 5 batches
  Ingredient = factor(rep(c("A", "B", "C", "D", "E"), times = 5)) # 5 ingredients
)

# Fit the two-way ANOVA model (Ingredient and Day as factors)
anova_result <- aov(ReactionTime ~ Ingredient + Day + Batch, data = data)

# Summary of the ANOVA
anova_summary <- summary(anova_result)

# Output the ANOVA summary
anova_summary

##             Df Sum Sq Mean Sq F value Pr(>F)
## Ingredient   4  12.24    3.06   0.274  0.891
## Day          4  15.44    3.86   0.345  0.844
## Residuals   16 178.96   11.19

Homework 4

Carlos Mas

2024-10-11