##Problem 2a
#State the fixed effects model along with all relevant assumptions.
# (5 points)
#Fixed Effects Model
#In a fixed effects, one-factor experimental design with 5 levels, the fixed effects model is as follows:
#Model: Yij=μ+αi+ϵij
#Where:
#Yij:is the observed response for the j-th observation under the
#i-th treatment (level).
#μ: μ is the overall mean response (grand mean).
#αi:is the effect of the i-th treatment level (where𝑖=1,2,3,4,5).
#ϵij: is the random error associated with the j-th observation under the
#i: i-th treatment.
#Assumptions:
#Independence: The errors are independent across all observations.
#Normality: The errors are normally distributed with mean 0
#Homogeneity of Variance: The variance of the errors is constant across all treatment levels
#Fixed Treatment Effects: The treatment levels are fixed and not random.
#The purpose of the analysis is to make inferences about these fixed effects.
##Problem 2b
#Complete the table below based on the information provided. (15
#points)
#Null and Alternative Hypothesis for Global ANOVA Test
#Null Hypothesis(H0): All treatment levels have the same effect,
#meaning there are no differences in the means across the 5 levels.
#Alternative Hypothesis(Ha): At least one treatment level has a different
#effect, meaning there is a significant difference in means across the
#treatment levels.
##Problem 2c
# Given Data
SS_total <- 200.95
SS_model <- 156.12
DF_model <- 4
DF_error <- 35
# Compute SS for Error
SS_error <- SS_total - SS_model
# Compute Mean Squares
MS_model <- SS_model / DF_model
MS_error <- SS_error / DF_error
# Compute F-value
F_value <- MS_model / MS_error
# Compute p-value
p_value <- pf(F_value, DF_model, DF_error, lower.tail = FALSE)
# Create ANOVA Table
anova_table <- data.frame(
Source = c("Model", "Error", "Total"),
DF = c(DF_model, DF_error, DF_model + DF_error),
SS = c(SS_model, SS_error, SS_total),
Mean_Square = c(MS_model, MS_error, NA),
F_value = c(F_value, NA, NA),
Pr_F = c(p_value, NA, NA)
)
# Print the ANOVA table
print(anova_table, row.names = FALSE)
## Source DF SS Mean_Square F_value Pr_F
## Model 4 156.12 39.030000 30.47178 5.789542e-11
## Error 35 44.83 1.280857 NA NA
## Total 39 200.95 NA NA NA
##Probem d
# At a signicant level of 0.05, what would be your conclusion?(5
#points)
#Since the p-value is smaller than 0.05, we reject the null hypothesis.
#There is enough evidence to conclude that at least one of the treatment
#levels has a significantly different effect from the others.
#Thus, the treatment levels have significant differences in their effects
#at the 0.05 significance level.
##Problem 3
# Load necessary libraries
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(agricolae)
# Create the dataset
data <- data.frame(
Treatment = rep(c("A", "B", "C", "D", "E"), each = 10),
Value = c(
165, 156, 159, 159, 167, 170, 146, 130, 151, 164,
168, 180, 180, 180, 166, 170, 161, 171, 169, 179,
164, 156, 156, 189, 138, 153, 190, 160, 172, 142,
185, 195, 195, 184, 201, 165, 175, 187, 177, 166,
201, 189, 189, 173, 193, 164, 160, 200, 142, 184
)
)
# Convert Treatment to a factor
data$Treatment <- as.factor(data$Treatment)
#Step 2 One-way Anova
# Perform ANOVA
anova_result <- aov(Value ~ Treatment, data = data)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 4 5033 1258 6.2 0.000462 ***
## Residuals 45 9133 203
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Null Hypothesis(H0): There is no significant difference between the means
#of the treatments (A, B, C, D, E).
#Alternative Hypothesis(Ha): At least one treatment mean is significantly different.
#Results:
#The F-statistic is 6.2, indicating the variation between treatments
#relative to within-treatment variation.
#The p-value is 0.000462, which is far below 0.05.
#Conclusion: Since p<0.05, we reject null, meaning at least one treatment
#has a significantly different mean.
# Define the contrast matrix
contrasts(data$Treatment) <- cbind(
c(1, 1, 1, -3, 0), # A, B, C vs D
c(1, 1, -2, 0, 0), # A, B vs C
c(1, -1, 0, 0, 0) # A vs B
)
# Apply contrasts in ANOVA
contrast_anova <- aov(Value ~ Treatment, data = data)
summary.lm(contrast_anova)
##
## Call:
## aov(formula = Value ~ Treatment, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.500 -6.475 1.500 9.200 28.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 170.720 2.015 84.736 < 2e-16 ***
## Treatment1 -4.825 1.300 -3.710 0.000567 ***
## Treatment2 0.850 1.839 0.462 0.646192
## Treatment3 -7.850 3.186 -2.464 0.017615 *
## Treatment4 9.816 4.505 2.179 0.034611 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.25 on 45 degrees of freedom
## Multiple R-squared: 0.3553, Adjusted R-squared: 0.298
## F-statistic: 6.2 on 4 and 45 DF, p-value: 0.000462
# Load necessary package
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
# Define the ANOVA model
anova_result <- aov(Value ~ Treatment, data = data)
# Define contrast matrix with 5 columns (one per treatment group)
contrasts_matrix <- matrix(c(
1, 1, 1, -2, -1, # C1: (A,B,C) vs (D,E)
1, 1, -2, 0, 0, # C2: (A,B) vs C
1, -1, 0, 0, 0 # C3: A vs B
), ncol = 5, byrow = TRUE)
# Conduct the F-test for all contrasts simultaneously
linearHypothesis(anova_result, hypothesis.matrix = contrasts_matrix)
#Fisher’s LSD Test
# Load necessary package
library(agricolae)
# Perform ANOVA
anova_result <- aov(Value ~ Treatment, data = data)
# Conduct Fisher's LSD test
lsd_result <- LSD.test(anova_result, "Treatment", p.adj = "none")
# Print LSD results
print(lsd_result)
## $statistics
## MSerror Df Mean CV t.value LSD
## 202.9556 45 170.72 8.344803 2.014103 12.83209
##
## $parameters
## test p.ajusted name.t ntr alpha
## Fisher-LSD none Treatment 5 0.05
##
## $means
## Value std r se LCL UCL Min Max Q25 Q50 Q75
## A 156.7 11.907514 10 4.505059 147.6263 165.7737 130 170 152.25 159.0 164.75
## B 172.4 6.883152 10 4.505059 163.3263 181.4737 161 180 168.25 170.5 179.75
## C 162.0 17.480147 10 4.505059 152.9263 171.0737 138 190 153.75 158.0 170.00
## D 183.0 12.229291 10 4.505059 173.9263 192.0737 165 201 175.50 184.5 193.00
## E 179.5 19.248377 10 4.505059 170.4263 188.5737 142 201 166.25 186.5 192.00
##
## $comparison
## NULL
##
## $groups
## Value groups
## D 183.0 a
## E 179.5 a
## B 172.4 ab
## C 162.0 bc
## A 156.7 c
##
## attr(,"class")
## [1] "group"
#Turkeys' method
TukeyHSD(anova_result)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Value ~ Treatment, data = data)
##
## $Treatment
## diff lwr upr p adj
## B-A 15.7 -2.4032056 33.803206 0.1172653
## C-A 5.3 -12.8032056 23.403206 0.9192858
## D-A 26.3 8.1967944 44.403206 0.0014064
## E-A 22.8 4.6967944 40.903206 0.0071555
## C-B -10.4 -28.5032056 7.703206 0.4851993
## D-B 10.6 -7.5032056 28.703206 0.4660429
## E-B 7.1 -11.0032056 25.203206 0.7981138
## D-C 21.0 2.8967944 39.103206 0.0156506
## E-C 17.5 -0.6032056 35.603206 0.0625726
## E-D -3.5 -21.6032056 14.603206 0.9814684
# Conduct Student-Newman-Keuls Test
# Load necessary package
library(agricolae)
# Perform ANOVA
anova_result <- aov(Value ~ Treatment, data = data)
# Conduct Student-Newman-Keuls (SNK) test with a significance level of 0.01
snk_result <- SNK.test(anova_result, "Treatment", group = TRUE, alpha = 0.01)
# Print SNK results
print(snk_result)
## $statistics
## MSerror Df Mean CV
## 202.9556 45 170.72 8.344803
##
## $parameters
## test name.t ntr alpha
## SNK Treatment 5 0.01
##
## $snk
## Table CriticalRange
## 2 3.803647 17.13566
## 3 4.338531 19.54534
## 4 4.661199 20.99898
## 5 4.892692 22.04187
##
## $means
## Value std r se Min Max Q25 Q50 Q75
## A 156.7 11.907514 10 4.505059 130 170 152.25 159.0 164.75
## B 172.4 6.883152 10 4.505059 161 180 168.25 170.5 179.75
## C 162.0 17.480147 10 4.505059 138 190 153.75 158.0 170.00
## D 183.0 12.229291 10 4.505059 165 201 175.50 184.5 193.00
## E 179.5 19.248377 10 4.505059 142 201 166.25 186.5 192.00
##
## $comparison
## NULL
##
## $groups
## Value groups
## D 183.0 a
## E 179.5 ab
## B 172.4 abc
## C 162.0 bc
## A 156.7 c
##
## attr(,"class")
## [1] "group"
##Problem 5
#Part (a): Effects Model and Assumptions
# Creating the dataset based on the provided table
rate <- rep(rep(1:4, each = 10), 2)
method <- rep(rep(c("A", "B"), each = 10), 4)
subject <- rep(1:10, 8)
# Hemoglobin values in the order given in the table (column by column)
hemoglobin <- c(
# Rate 1, Method A
6.7, 7.8, 5.5, 8.4, 7.0, 7.8, 8.6, 7.4, 5.8, 7.0,
# Rate 1, Method B
7.0, 7.8, 6.8, 7.0, 7.5, 6.5, 5.8, 7.1, 6.5, 5.5,
# Rate 2, Method A
9.9, 8.4, 10.4, 9.3, 10.7, 11.9, 7.1, 6.4, 8.6, 10.6,
# Rate 2, Method B
9.9, 9.6, 10.2, 10.4, 11.3, 9.1, 9.0, 10.6, 11.7, 9.6,
# Rate 3, Method A
10.4, 8.1, 10.6, 8.7, 10.7, 9.1, 8.8, 8.1, 7.8, 8.0,
# Rate 3, Method B
9.9, 9.6, 10.4, 10.4, 11.3, 10.9, 8.0, 10.2, 6.1, 10.7,
# Rate 4, Method A
9.3, 9.3, 7.8, 7.8, 9.3, 10.2, 8.7, 8.6, 9.3, 7.2,
# Rate 4, Method B
11.0, 9.3, 11.0, 9.0, 8.4, 8.4, 6.8, 7.2, 8.1, 11.0
)
# Creating a data frame
trout_data <- data.frame(
rate = factor(rate),
method = factor(method),
subject = subject,
hemoglobin = hemoglobin
)
# Effects model: Yijk = μ + αi + βj + (αβ)ij + εijk
# Where:
# - Yijk = hemoglobin concentration for the kth fish at the ith rate and jth method
# - μ = overall mean
# - αi = effect of the ith rate (i = 1, 2, 3, 4)
# - βj = effect of the jth method (j = A, B)
# - (αβ)ij = interaction effect between rate i and method j
# - εijk = random error term (k = 1, 2, ..., 10)
# Display the first few rows to verify data structure
head(trout_data)
# Verify that we have the correct number of observations
table(trout_data$rate, trout_data$method)
##
## A B
## 1 20 0
## 2 0 20
## 3 20 0
## 4 0 20
#Part (b): Null and Alternative Hypotheses
# Null and alternative hypotheses for the two-factor ANOVA
# For Rate effect:
# H₀: α₁ = α₂ = α₃ = α₄ = 0 (no rate effect)
# H₁: At least one αᵢ ≠ 0 (rate has an effect)
# For Method effect:
# H₀: β₁ = β₂ = 0 (no method effect)
# H₁: At least one βⱼ ≠ 0 (method has an effect)
# For Interaction effect:
# H₀: (αβ)ᵢⱼ = 0 for all i,j (no interaction effect)
# H₁: At least one (αβ)ᵢⱼ ≠ 0 (interaction exists)
# In R, these hypotheses are tested using the aov() function with the
# formula hemoglobin ~ rate * method
#Part (c): Testing Hypotheses
# Testing the hypotheses for rate, method, and interaction effects
# Load the data (assuming it's already created as in part a)
# If running independently, run the code from part (a) first
# Calculate means for each treatment combination
aggregate(hemoglobin ~ rate + method, data = trout_data, FUN = mean)
# Calculate marginal means
aggregate(hemoglobin ~ rate, data = trout_data, FUN = mean)
aggregate(hemoglobin ~ method, data = trout_data, FUN = mean)
mean(trout_data$hemoglobin) # Grand mean
## [1] 8.74625
# Perform two-way ANOVA
trout_anova <- aov(hemoglobin ~ rate * method, data = trout_data)
summary(trout_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## rate 3 28.52 9.508 4.012 0.0105 *
## Residuals 76 180.10 2.370
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Create an interaction plot to visualize the effects
interaction.plot(trout_data$rate, trout_data$method, trout_data$hemoglobin,
type = "b", col = c("blue", "red"),
xlab = "Rate", ylab = "Mean Hemoglobin", trace.label = "Method")
#Part (d): Model Evaluation
# Model evaluation - checking assumptions and model fit
# Checking the ANOVA assumptions
# 1. Normality of residuals
shapiro.test(residuals(trout_anova))
##
## Shapiro-Wilk normality test
##
## data: residuals(trout_anova)
## W = 0.97728, p-value = 0.1652
# 2. Homogeneity of variances
bartlett.test(hemoglobin ~ interaction(rate, method), data = trout_data)
##
## Bartlett test of homogeneity of variances
##
## data: hemoglobin by interaction(rate, method)
## Bartlett's K-squared = 3.6243, df = 3, p-value = 0.305
# 3. Visual diagnosis of assumptions
par(mfrow = c(2, 2))
plot(trout_anova)
# Calculate effect sizes to evaluate practical significance
model_summary <- summary(trout_anova)
SST <- sum(model_summary[[1]]$"Sum Sq")
eta_squared_rate <- model_summary[[1]]$"Sum Sq"[1] / SST
eta_squared_method <- model_summary[[1]]$"Sum Sq"[2] / SST
eta_squared_interaction <- model_summary[[1]]$"Sum Sq"[3] / SST
cat("Eta-squared (effect size):\n")
## Eta-squared (effect size):
cat("Rate:", round(eta_squared_rate, 4), "\n")
## Rate: 0.1367
cat("Method:", round(eta_squared_method, 4), "\n")
## Method: 0.8633
cat("Interaction:", round(eta_squared_interaction, 4), "\n")
## Interaction: NA
# Calculate R-squared to assess overall model fit
SS_total <- sum((trout_data$hemoglobin - mean(trout_data$hemoglobin))^2)
SS_residual <- sum(residuals(trout_anova)^2)
R_squared <- 1 - (SS_residual/SS_total)
cat("R-squared:", round(R_squared, 4), "\n")
## R-squared: 0.1367
#Part (e): Model Reduction and Final Model
# Assess whether model reduction is appropriate and determine final model type
# First, examine the p-values from the original ANOVA
summary(trout_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## rate 3 28.52 9.508 4.012 0.0105 *
## Residuals 76 180.10 2.370
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# If the interaction is not significant (which we'll check), we would run a reduced model
# Since we need to check if model reduction is appropriate, let's run the main effects model
trout_main_effects <- aov(hemoglobin ~ rate + method, data = trout_data)
summary(trout_main_effects)
## Df Sum Sq Mean Sq F value Pr(>F)
## rate 3 28.52 9.508 4.012 0.0105 *
## Residuals 76 180.10 2.370
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Compare the full model with the reduced model
anova(trout_main_effects, trout_anova)
# If rate or method is not significant, we could further reduce
# Testing rate-only model
trout_rate_only <- aov(hemoglobin ~ rate, data = trout_data)
summary(trout_rate_only)
## Df Sum Sq Mean Sq F value Pr(>F)
## rate 3 28.52 9.508 4.012 0.0105 *
## Residuals 76 180.10 2.370
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Testing method-only model
trout_method_only <- aov(hemoglobin ~ method, data = trout_data)
summary(trout_method_only)
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 2.28 2.278 0.861 0.356
## Residuals 78 206.34 2.645
# For completeness, compare all models
cat("AIC values for model comparison:\n")
## AIC values for model comparison:
AIC(trout_anova, trout_main_effects, trout_rate_only, trout_method_only)
# Post-hoc tests if rate is significant
TukeyHSD(trout_anova, "rate")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = hemoglobin ~ rate * method, data = trout_data)
##
## $rate
## diff lwr upr p adj
## 2-1 0.135 -1.14370717 1.413707 0.9924916
## 3-1 0.925 -0.35370717 2.203707 0.2366334
## 4-1 1.465 0.18629283 2.743707 0.0182488
## 3-2 0.790 -0.48870717 2.068707 0.3720807
## 4-2 1.330 0.05129283 2.608707 0.0383394
## 4-3 0.540 -0.73870717 1.818707 0.6849514
# Visualize the final model (assuming the full model is appropriate)
library(effects)
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
plot(allEffects(trout_anova))
## Note:
## 4 values in the rate*method effect are not estimable
# Note on model type:
cat("This is a fixed effects model because:\n")
## This is a fixed effects model because:
cat("1. Both factors (rate and method) are fixed (specific levels chosen)\n")
## 1. Both factors (rate and method) are fixed (specific levels chosen)
cat("2. We are interested in these specific levels, not generalizing to a population of levels\n")
## 2. We are interested in these specific levels, not generalizing to a population of levels
cat("3. The conclusions apply specifically to these four rates and two methods\n")
## 3. The conclusions apply specifically to these four rates and two methods
##Problem 5
# Creating the dataset based on the provided table
condition <- rep(c("Fresh", "Fresh", "Wilted", "Wilted"), each = 5)
period <- rep(1:5, 4)
lactic_acid <- c(
# Fresh, first replication
13.4, 37.5, 65.2, 60.8, 37.7,
# Fresh, second replication
16.0, 42.7, 54.9, 57.1, 49.2,
# Wilted, first replication
14.4, 29.3, 36.4, 39.1, 39.4,
# Wilted, second replication
20.0, 34.5, 39.7, 38.7, 39.7
)
# Creating a data frame
silage_data <- data.frame(
condition = factor(condition),
period = factor(period),
lactic_acid = lactic_acid
)
# (a) Draw an interaction plot
# Calculate means for each condition and period combination
means <- aggregate(lactic_acid ~ condition + period, data = silage_data, FUN = mean)
# Convert means data to wide format for easier plotting
wide_means <- reshape(means, idvar = "condition", timevar = "period",
direction = "wide")
# Create interaction plot
interaction.plot(silage_data$period, silage_data$condition, silage_data$lactic_acid,
type = "b", col = c("blue", "red"), lwd = 2,
xlab = "Period", ylab = "Mean Lactic Acid",
trace.label = "Condition")
# Add title and legend
title("Interaction Plot for Condition and Period")
legend("topleft", legend = c("Fresh", "Wilted"),
col = c("blue", "red"), lty = 1, lwd = 2)
# (b) Test for main effects and interactions
# Perform two-way ANOVA
silage_aov <- aov(lactic_acid ~ condition * period, data = silage_data)
summary(silage_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 533.5 533.5 30.028 0.000269 ***
## period 4 2974.0 743.5 41.844 3.26e-06 ***
## condition:period 4 441.2 110.3 6.207 0.008907 **
## Residuals 10 177.7 17.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate effect sizes
model_summary <- summary(silage_aov)
SST <- sum(model_summary[[1]]$"Sum Sq")
eta_squared_condition <- model_summary[[1]]$"Sum Sq"[1] / SST
eta_squared_period <- model_summary[[1]]$"Sum Sq"[2] / SST
eta_squared_interaction <- model_summary[[1]]$"Sum Sq"[3] / SST
cat("Eta-squared (effect size):\n")
## Eta-squared (effect size):
cat("Condition:", round(eta_squared_condition, 4), "\n")
## Condition: 0.1293
cat("Period:", round(eta_squared_period, 4), "\n")
## Period: 0.7207
cat("Interaction:", round(eta_squared_interaction, 4), "\n")
## Interaction: 0.1069
#(c) Test the main effects of Condition sliced by Period 1
# Filter data for Period 1
period1_data <- silage_data[silage_data$period == "1", ]
# Perform one-way ANOVA for Condition effect in Period 1
period1_aov <- aov(lactic_acid ~ condition, data = period1_data)
summary(period1_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 6.25 6.25 0.656 0.503
## Residuals 2 19.06 9.53
# (d) Test the main effects of Period sliced by wilted condition
# Filter data for Wilted condition
wilted_data <- silage_data[silage_data$condition == "Wilted", ]
# Perform one-way ANOVA for Period effect in Wilted condition
wilted_aov <- aov(lactic_acid ~ period, data = wilted_data)
summary(wilted_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## period 4 708.0 177.00 25.45 0.0016 **
## Residuals 5 34.8 6.95
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Additional visualization: means plot
library(ggplot2)
ggplot(means, aes(x = period, y = lactic_acid, group = condition, color = condition)) +
geom_line() +
geom_point(size = 3) +
labs(title = "Mean Lactic Acid by Period and Condition",
x = "Period", y = "Lactic Acid") +
theme_minimal()
##Problem 6
# Input the paper density measurements
paper_density <- c(0.816, 0.836, 0.815, 0.822, 0.822, 0.843, 0.824,
0.788, 0.782, 0.795, 0.805, 0.836, 0.738, 0.772, 0.776)
# Display the data
paper_density
## [1] 0.816 0.836 0.815 0.822 0.822 0.843 0.824 0.788 0.782 0.795 0.805 0.836
## [13] 0.738 0.772 0.776
# Null hypothesis: median = 0.8
# Alternative hypothesis: median > 0.8
# Count how many values are greater than 0.8
greater_than <- sum(paper_density > 0.8)
# Count how many values are less than 0.8
less_than <- sum(paper_density < 0.8)
# Count how many values are equal to 0.8
equal_to <- sum(paper_density == 0.8)
# Display the counts
cat("Values greater than 0.8:", greater_than, "\n")
## Values greater than 0.8: 9
cat("Values less than 0.8:", less_than, "\n")
## Values less than 0.8: 6
cat("Values equal to 0.8:", equal_to, "\n")
## Values equal to 0.8: 0
# Total number of non-ties
n <- greater_than + less_than
# Number of successes (values > 0.8)
x <- greater_than
# Calculate p-value using binomial test
# For H1: median > 0.8, we use the upper tail probability
p_value <- pbinom(x-1, n, 0.5, lower.tail = FALSE)
cat("P-value:", p_value, "\n")
## P-value: 0.3036194
# Alternative way using the binom.test function
binom_test <- binom.test(x, n, p = 0.5, alternative = "greater")
print(binom_test)
##
## Exact binomial test
##
## data: x and n
## number of successes = 9, number of trials = 15, p-value = 0.3036
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.3595652 1.0000000
## sample estimates:
## probability of success
## 0.6
# Decision at α = 0.05
alpha <- 0.05
if(p_value < alpha) {
cat("Reject H0: There is sufficient evidence that the median paper density is greater than 0.8\n")
} else {
cat("Fail to reject H0: There is insufficient evidence that the median paper density is greater than 0.8\n")
}
## Fail to reject H0: There is insufficient evidence that the median paper density is greater than 0.8
# Visualize the data
boxplot(paper_density, main = "Paper Density Measurements",
ylab = "Density (gms/cubic cm)", horizontal = TRUE)
abline(v = 0.8, col = "red", lwd = 2, lty = 2)
legend("topleft", legend = "Hypothesized median = 0.8", col = "red", lwd = 2, lty = 2)
# Create a histogram
hist(paper_density, breaks = 10, main = "Distribution of Paper Density",
xlab = "Density (gms/cubic cm)")
abline(v = 0.8, col = "red", lwd = 2, lty = 2)
##Problem 7
#Wilcoxon's Signed-Rank Test for Quiz Scores
# Input the data
morning_scores <- c(6, 5, 8.5, 7, 6, 9, 5, 5, 10, 8.5)
afternoon_scores <- c(2.5, 10, 10, 4.5, 5.5, 7, 6, 8, 9.5, 6.5)
# Display the data
data.frame(
Morning = morning_scores,
Afternoon = afternoon_scores
)
# Calculate the differences (Morning - Afternoon)
differences <- morning_scores - afternoon_scores
# Display the differences
data.frame(
Morning = morning_scores,
Afternoon = afternoon_scores,
Difference = differences
)
# Compute the absolute differences and their signs
abs_diff <- abs(differences)
signs <- sign(differences)
# Create a data frame for the analysis
analysis_df <- data.frame(
Morning = morning_scores,
Afternoon = afternoon_scores,
Difference = differences,
AbsDifference = abs_diff,
Sign = signs
)
# Sort by absolute difference
sorted_df <- analysis_df[order(abs_diff), ]
# Assign ranks (handling ties correctly)
ranks <- rank(sorted_df$AbsDifference, ties.method = "average")
sorted_df$Rank <- ranks
# Calculate signed ranks
sorted_df$SignedRank <- sorted_df$Sign * sorted_df$Rank
# Display the complete analysis
print(sorted_df)
## Morning Afternoon Difference AbsDifference Sign Rank SignedRank
## 5 6.0 5.5 0.5 0.5 1 1.5 1.5
## 9 10.0 9.5 0.5 0.5 1 1.5 1.5
## 7 5.0 6.0 -1.0 1.0 -1 3.0 -3.0
## 3 8.5 10.0 -1.5 1.5 -1 4.0 -4.0
## 6 9.0 7.0 2.0 2.0 1 5.5 5.5
## 10 8.5 6.5 2.0 2.0 1 5.5 5.5
## 4 7.0 4.5 2.5 2.5 1 7.0 7.0
## 8 5.0 8.0 -3.0 3.0 -1 8.0 -8.0
## 1 6.0 2.5 3.5 3.5 1 9.0 9.0
## 2 5.0 10.0 -5.0 5.0 -1 10.0 -10.0
# Calculate the test statistic W (sum of positive ranks)
W_plus <- sum(sorted_df$SignedRank[sorted_df$SignedRank > 0])
W_minus <- abs(sum(sorted_df$SignedRank[sorted_df$SignedRank < 0]))
cat("Sum of positive ranks (W+):", W_plus, "\n")
## Sum of positive ranks (W+): 30
cat("Sum of negative ranks (W-):", W_minus, "\n")
## Sum of negative ranks (W-): 25
# For a one-sided test (morning > afternoon), we use W+
test_statistic <- W_plus
# Perform the Wilcoxon signed-rank test in R
wilcox_test <- wilcox.test(morning_scores, afternoon_scores,
paired = TRUE, alternative = "greater")
## Warning in wilcox.test.default(morning_scores, afternoon_scores, paired = TRUE,
## : cannot compute exact p-value with ties
print(wilcox_test)
##
## Wilcoxon signed rank test with continuity correction
##
## data: morning_scores and afternoon_scores
## V = 30, p-value = 0.4191
## alternative hypothesis: true location shift is greater than 0
# Create a visualization
boxplot(morning_scores, afternoon_scores,
names = c("Morning", "Afternoon"),
main = "Quiz Scores Comparison",
ylab = "Score (out of 10)")
# Add the means to the boxplot
points(c(1, 2), c(mean(morning_scores), mean(afternoon_scores)),
col = "red", pch = 19)
legend("bottomright", legend = "Mean", col = "red", pch = 19)
# Plot the differences
hist(differences,
main = "Histogram of Differences (Morning - Afternoon)",
xlab = "Difference in Score")
abline(v = 0, col = "red", lwd = 2, lty = 2)
##Problem 2
# Create sample data structure based on information given
# (This is simulated since we don't have the raw data)
# Let's assume equal sample sizes across treatments (8 per treatment)
groups <- rep(1:5, each=8)
# Set total SS and between SS to match our ANOVA table
# (Note: actual analysis would use raw data)
# To perform this analysis with raw data:
# aov_result <- aov(response ~ factor(treatment), data=your_data)
# summary(aov_result)
# Manual calculation to match our table:
MS_model <- 156.12/4
MS_error <- 44.83/35
F_value <- MS_model/MS_error
p_value <- pf(F_value, df1=4, df2=35, lower.tail=FALSE)
# Output results
cat("DF Model =", 4, "\n")
## DF Model = 4
cat("DF Error =", 35, "\n")
## DF Error = 35
cat("DF Total =", 39, "\n")
## DF Total = 39
cat("SS Model =", 156.12, "\n")
## SS Model = 156.12
cat("SS Error =", 44.83, "\n")
## SS Error = 44.83
cat("SS Total =", 200.95, "\n")
## SS Total = 200.95
cat("MS Model =", MS_model, "\n")
## MS Model = 39.03
cat("MS Error =", MS_error, "\n")
## MS Error = 1.280857
cat("F value =", F_value, "\n")
## F value = 30.47178
cat("p-value =", format(p_value, scientific=TRUE), "\n")
## p-value = 5.789542e-11
cat("Decision: Reject H0 at alpha =", 0.05, "\n")
## Decision: Reject H0 at alpha = 0.05
# Create a data frame with the ANOVA results from the table
anova_results <- data.frame(
Source = c("Model", "Error", "Total"),
DF = c(4, 35, 39),
SS = c(156.12, 44.83, 200.95),
MS = c(39.03, 1.281, NA),
F_value = c(30.47, NA, NA),
p_value = c(2.98e-11, NA, NA)
)
# Print the ANOVA table
print(anova_results)
## Source DF SS MS F_value p_value
## 1 Model 4 156.12 39.030 30.47 2.98e-11
## 2 Error 35 44.83 1.281 NA NA
## 3 Total 39 200.95 NA NA NA
# Interpret the results
alpha <- 0.05
if (anova_results$p_value[1] < alpha) {
cat("At significance level", alpha, ", we reject the null hypothesis.\n")
cat("There is significant evidence that at least one treatment mean differs from the others.\n")
} else {
cat("At significance level", alpha, ", we fail to reject the null hypothesis.\n")
cat("There is insufficient evidence that any treatment means differ.\n")
}
## At significance level 0.05 , we reject the null hypothesis.
## There is significant evidence that at least one treatment mean differs from the others.
# Calculate critical F value
critical_F <- qf(1-alpha, df1=4, df2=35)
cat("Critical F value at alpha =", alpha, "is", round(critical_F, 4), "\n")
## Critical F value at alpha = 0.05 is 2.6415
cat("Since the calculated F value (", anova_results$F_value[1], ") exceeds the critical value, we reject H0.\n")
## Since the calculated F value ( 30.47 ) exceeds the critical value, we reject H0.
# If we wanted to simulate data that would produce similar ANOVA results:
set.seed(123)
# We know there are 5 treatment groups with a total of 40 observations
n_per_group <- 8 # Since total df = 39, total n = 40
groups <- rep(1:5, each=n_per_group)
# Creating means that would result in significant differences
group_means <- c(2, 3, 5, 6, 4)
# Creating simulated data with the right variance to match our MS_error
variance <- 1.281 # MS_error from the table
y <- rnorm(n=length(groups), mean=group_means[groups], sd=sqrt(variance))
# Run ANOVA on simulated data
sim_model <- aov(y ~ factor(groups))
summary(sim_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(groups) 4 57.86 14.464 14.72 3.8e-07 ***
## Residuals 35 34.39 0.983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1