# Load necessary libraries
library(ggplot2) # For QQ plot
library(car) # For Levene's test
## Loading required package: carData
library(stats) # For Shapiro-Wilk test
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(rstatix) # For normality tests
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
library(dunn.test)
library(ggpubr)
# Read the CSV file
df <- read.csv('/Users/rafael/Desktop/UCSB_Grad/BIOE 212/Homework/HW5_Sample_Sheet _Group_two.csv') # Replace with actual file path
# Ensure treatment column is a factor
df$Treatment <- as.factor(df$Treatment)
# Function to compute residuals for a single dataset (one-sample model)
compute_residuals <- function(y) {
x <- seq_along(y) # Index values (1, 2, 3, ...)
model <- lm(y ~ x) # Fit linear model
predicted <- predict(model) # Compute predicted values
return(y - predicted) # Compute residuals manually
}
# Function to compute variance for a single dataset
compute_single_group_variance <- function(y) {
return(var(y)) # Compute variance of the dataset
}
# Function to perform Levene's test for homogeneity of variances
perform_levene_test <- function(df, value_col, group_col) {
long_df <- data.frame(values = df[[value_col]], group = df[[group_col]]) # Convert to long format
levene_test <- leveneTest(values ~ group, data = long_df) # Levene's test
print("Levene's Test for Equal Variances:")
print(levene_test)
return(levene_test)
}
# Function to plot QQ plot for residuals
plot_qq_residuals <- function(residuals, title) {
qq_plot <- ggplot(data.frame(residuals = residuals), aes(sample = residuals)) +
stat_qq() +
stat_qq_line() +
ggtitle(title) +
theme_minimal()
print(qq_plot)
}
# Function to plot homoscedasticity check (Residuals vs Fitted Values)
plot_homoscedasticity <- function(anova_model, title) {
residuals_anova <- residuals(anova_model)
fitted_values <- fitted(anova_model)
homoscedasticity_plot <- ggplot(data.frame(Fitted = fitted_values, Residuals = residuals_anova),
aes(x = Fitted, y = Residuals)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
ggtitle(title) +
theme_minimal()
print(homoscedasticity_plot)
}
# Compute residuals for each treatment group
residuals_list <- lapply(split(df$`nuclear_cytoplasmic_brachyury_ratio`, df$Treatment), compute_residuals)
# Perform Shapiro-Wilk test for each treatment's residuals
shapiro_results <- lapply(residuals_list, shapiro.test)
print(shapiro_results)
## $Treatment_A
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.96523, p-value = 0.6526
##
##
## $Treatment_B
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.97855, p-value = 0.9569
##
##
## $Treatment_D
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.97749, p-value = 0.7557
##
##
## $Treatment_F
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.92199, p-value = 0.03022
##
##
## $Treatment_H
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.95669, p-value = 0.48
# Compute variance for each treatment group
variances <- sapply(split(df$`nuclear_cytoplasmic_brachyury_ratio`, df$Treatment), compute_single_group_variance)
print(variances)
## Treatment_A Treatment_B Treatment_D Treatment_F Treatment_H
## 0.005368700 0.009660559 0.027971679 0.027720539 0.005805254
# Perform Levene's test for homogeneity of variances
levene_test <- perform_levene_test(df, "nuclear_cytoplasmic_brachyury_ratio", "Treatment")
## [1] "Levene's Test for Equal Variances:"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 3.0986 0.01869 *
## 105
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perform one-way ANOVA
anova_model <- aov(`nuclear_cytoplasmic_brachyury_ratio` ~ Treatment, data = df)
anova_result <- summary(anova_model)
print(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 4 0.8976 0.22439 12.31 3.04e-08 ***
## Residuals 105 1.9143 0.01823
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# If ANOVA is significant, perform Tukey's post-hoc test
if (anova_result[[1]]$"Pr(>F)"[1] < 0.05) {
posthoc <- TukeyHSD(anova_model)
print(posthoc)
}
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = nuclear_cytoplasmic_brachyury_ratio ~ Treatment, data = df)
##
## $Treatment
## diff lwr upr p adj
## Treatment_B-Treatment_A 0.05508394 -0.090073451 0.20024134 0.8297043
## Treatment_D-Treatment_A 0.24067423 0.132480299 0.34886816 0.0000001
## Treatment_F-Treatment_A 0.09699584 -0.011198095 0.20518977 0.1010740
## Treatment_H-Treatment_A 0.20244779 0.083927276 0.32096831 0.0000650
## Treatment_D-Treatment_B 0.18559029 0.048734586 0.32244599 0.0025095
## Treatment_F-Treatment_B 0.04191189 -0.094943808 0.17876760 0.9141267
## Treatment_H-Treatment_B 0.14736385 0.002206455 0.29252124 0.0447808
## Treatment_F-Treatment_D -0.14367839 -0.240449989 -0.04690680 0.0007080
## Treatment_H-Treatment_D -0.03822644 -0.146420373 0.06996749 0.8633936
## Treatment_H-Treatment_F 0.10545195 -0.002741979 0.21364589 0.0598591
# Plot homoscedasticity (Residuals vs Fitted Values)
plot_homoscedasticity(anova_model, "Residuals vs Fitted Values")
## `geom_smooth()` using formula = 'y ~ x'

# Plot QQ plot for residuals of the ANOVA model
plot_qq_residuals(residuals(anova_model), "QQ Plot of ANOVA Residuals")

# Plot results
ggplot(df, aes(x = Treatment, y = `nuclear_cytoplasmic_brachyury_ratio`)) +
stat_summary(fun = mean, geom = "bar", fill = "lightblue", color = "black", alpha = 0.7) + # Mean as bar
stat_summary(fun.data = function(x) {
mean_x <- mean(x)
sd_x <- sd(x)
data.frame(y = mean_x, ymin = mean_x - sd_x, ymax = mean_x + sd_x)
}, geom = "errorbar", width = 0.2, color = "black") + # SD error bars
theme_minimal() +
labs(title = "Mean Nuclear/Cytoplasmic Brachyury Ratio by Treatment Group",
x = "Treatment Group",
y = "Nuclear/Cytoplasmic Brachyury Ratio")

# Standard Curve Data
concentration <- c(0.46, 0.46, 0.34, 0.34, 0.17, 0.17, 0.11, 0.11, 0.027, 0.027, 0.013, 0.013, 0.007, 0.007, 0, 0) # mM
absorbance <- c(1.111, 1.112, 0.876, 0.832, 0.551, 0.505, 0.398, 0.387, 0.221, 0.293, 0.2, 0.198, 0.187, 0.19, 0.165, 0.177) # Average absorbance from wells
standard_model <- lm(log(concentration + 1) ~ absorbance)
# Get R-squared value
r_squared <- summary(standard_model)$r.squared
# Plot the standard curve
ggplot(data = data.frame(concentration, absorbance), aes(x = concentration, y = absorbance)) +
geom_point(size = 3, color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Standard Curve: Absorbance vs. ORO Concentration") +
xlab("Concentration of ORO (mM)") +
ylab("Absorbance") +
annotate("text", x = 0.2, y = 2.5, label = paste("y =", round(coef(standard_model)[2], 3), "*x +", round(coef(standard_model)[1], 3))) +
annotate("text", x = 0.2, y = 2.3, label = paste("R² =", round(r_squared, 3))) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Function to calculate concentration from absorbance using the standard curve equation
predict_concentration <- function(abs_value) {
(abs_value - coef(standard_model)[1]) / coef(standard_model)[2]
}
# Step 1: Compute Residuals
compute_residuals <- function(y) {
x <- seq_along(y) # Index values (1, 2, 3, ...)
model <- lm(y ~ x) # Fit linear model
predicted <- predict(model) # Compute predicted values
return(y - predicted) # Compute residuals manually
}
# Function to plot Q-Q plot for each group
plot_qq_by_group <- function(df, value_col, group_col) {
unique_groups <- unique(df[[group_col]])
for (group in unique_groups) {
group_data <- df[df[[group_col]] == group, value_col] # Subset data for the group
qq_plot <- ggplot(data.frame(residuals = group_data), aes(sample = residuals)) +
stat_qq() +
stat_qq_line() +
ggtitle(paste("Q-Q Plot for", group)) +
theme_minimal()
print(qq_plot)
}
}
# Function to perform Shapiro-Wilk test for each group
perform_shapiro_by_group <- function(df, value_col, group_col) {
unique_groups <- unique(df[[group_col]])
shapiro_results <- list()
for (group in unique_groups) {
group_data <- df[df[[group_col]] == group, value_col] # Subset data for the group
shapiro_test <- shapiro.test(group_data) # Perform test
shapiro_results[[group]] <- shapiro_test
print(paste("Shapiro-Wilk Test for", group))
print(shapiro_test)
}
return(shapiro_results)
}
# Step 4: Compute Variance for Each Group
compute_group_variances <- function(df, value_col, group_col) {
variances <- aggregate(df[[value_col]], by = list(df[[group_col]]), FUN = var)
colnames(variances) <- c("Group", "Variance")
print("Variances for Each Group:")
print(variances)
return(variances)
}
# Step 5: Create Homoscedasticity Plot (Residuals vs Fitted Values)
plot_homoscedasticity <- function(anova_model, title) {
residuals_anova <- residuals(anova_model)
fitted_values <- fitted(anova_model)
homoscedasticity_plot <- ggplot(data.frame(Fitted = fitted_values, Residuals = residuals_anova),
aes(x = Fitted, y = Residuals)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
ggtitle(title) +
theme_minimal()
print(homoscedasticity_plot)
}
# Step 6: Perform Levene's test for homogeneity of variances
perform_levene_test <- function(df, value_col, group_col) {
long_df <- data.frame(values = df[[value_col]], group = df[[group_col]]) # Convert to long format
levene_test <- leveneTest(values ~ group, data = long_df) # Levene's test
print("Levene's Test for Equal Variances:")
print(levene_test)
return(levene_test)
}
# Load Sample Data
sample_data <- data.frame(
Group = rep(c("Myo", "Osteo", "Carti", "Teno", "Uni", "Blank"), each = 8),
Replicate = rep(rep(1:4, each = 2), times = 6),
Well = c(
"A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8",
"A9", "A10", "A11", "A12", "B1", "B2", "B3", "B4",
"B5", "B6", "B7", "B8", "B9", "B10", "B11", "B12",
"C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8",
"C9", "C10", "C11", "C12", "D1", "D2", "D3", "D4",
"D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12"
),
Absorbance = c(
0.187, 0.175, 0.174, 0.187, 0.178, 0.172, 0.181, 0.179,
0.22, 0.226, 0.214, 0.23, 0.217, 0.225, 0.234, 0.227,
0.194, 0.207, 0.206, 0.203, 0.2, 0.192, 0.208, 0.192,
0.209, 0.208, 0.195, 0.189, 0.191, 0.2, 0.195, 0.194,
0.11, 0.291, 0.143, 0.224, 0.185, 0.422, 0.289, 0.404,
0.172, 0.17, 0.165, 0.164, 0.171, 0.163, 0.166, 0.168
)
)
sample_data$Group <- as.factor(sample_data$Group)
str(sample_data) # Ensure Group is a factor and Absorbance is numeric
## 'data.frame': 48 obs. of 4 variables:
## $ Group : Factor w/ 6 levels "Blank","Carti",..: 3 3 3 3 3 3 3 3 4 4 ...
## $ Replicate : int 1 1 2 2 3 3 4 4 1 1 ...
## $ Well : chr "A1" "A2" "A3" "A4" ...
## $ Absorbance: num 0.187 0.175 0.174 0.187 0.178 0.172 0.181 0.179 0.22 0.226 ...
summary(sample_data) # Check for missing values or outliers
## Group Replicate Well Absorbance
## Blank:8 Min. :1.00 Length:48 Min. :0.1100
## Carti:8 1st Qu.:1.75 Class :character 1st Qu.:0.1747
## Myo :8 Median :2.50 Mode :character Median :0.1940
## Osteo:8 Mean :2.50 Mean :0.2045
## Teno :8 3rd Qu.:3.25 3rd Qu.:0.2147
## Uni :8 Max. :4.00 Max. :0.4220
# Convert absorbance to concentration
sample_data$Concentration <- sapply(sample_data$Absorbance, predict_concentration)
# Perform Analysis
residuals_data <- compute_residuals(sample_data$Absorbance)
# Run Q-Q plots for each treatment group
plot_qq_by_group(sample_data, "Absorbance", "Group")






# Run Shapiro-Wilk tests for each treatment group
shapiro_results <- perform_shapiro_by_group(sample_data, "Absorbance", "Group")
## [1] "Shapiro-Wilk Test for Myo"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.91687, p-value = 0.405
##
## [1] "Shapiro-Wilk Test for Osteo"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.9721, p-value = 0.9139
##
## [1] "Shapiro-Wilk Test for Carti"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.86708, p-value = 0.1411
##
## [1] "Shapiro-Wilk Test for Teno"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.88514, p-value = 0.2107
##
## [1] "Shapiro-Wilk Test for Uni"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.93804, p-value = 0.5919
##
## [1] "Shapiro-Wilk Test for Blank"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.93635, p-value = 0.5756
group_variances <- compute_group_variances(sample_data, "Absorbance", "Group")
## [1] "Variances for Each Group:"
## Group Variance
## 1 Blank 1.141071e-05
## 2 Carti 4.592857e-05
## 3 Myo 3.183929e-05
## 4 Osteo 4.498214e-05
## 5 Teno 5.541071e-05
## 6 Uni 1.312486e-02
# Fit ANOVA model to check homoscedasticity
anova_model <- aov(Absorbance ~ Group, data = sample_data)
plot_homoscedasticity(anova_model, "Homoscedasticity Check: Residuals vs Fitted")
## `geom_smooth()` using formula = 'y ~ x'

# Perform Levene's Test for homogeneity of variances
perform_levene_test(sample_data, "Absorbance", "Group")
## [1] "Levene's Test for Equal Variances:"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 18.833 8.773e-10 ***
## 42
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 18.833 8.773e-10 ***
## 42
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Ensure Group is a factor
sample_data$Group <- as.factor(sample_data$Group)
# Perform Welch’s ANOVA
welch_anova <- oneway.test(Absorbance ~ Group, data = sample_data, var.equal = FALSE)
# Print Welch’s ANOVA results
print("Welch's ANOVA Results:")
## [1] "Welch's ANOVA Results:"
print(welch_anova)
##
## One-way analysis of means (not assuming equal variances)
##
## data: Absorbance and Group
## F = 98.5, num df = 5.000, denom df = 18.977, p-value = 6.699e-13
# Ensure Group is a factor
sample_data$Group <- as.factor(sample_data$Group)
# Perform Games-Howell post-hoc test
games_howell_results <- sample_data %>%
games_howell_test(Absorbance ~ Group)
# Print full Games-Howell test results
print("Full Games-Howell Test Results:")
## [1] "Full Games-Howell Test Results:"
print(games_howell_results)
## # A tibble: 15 × 8
## .y. group1 group2 estimate conf.low conf.high p.adj p.adj.signif
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Absorbance Blank Carti 0.0329 0.0236 0.0421 2 e-6 ****
## 2 Absorbance Blank Myo 0.0117 0.00388 0.0196 3 e-3 **
## 3 Absorbance Blank Osteo 0.0568 0.0476 0.0659 7.25e-9 ****
## 4 Absorbance Blank Teno 0.0302 0.0202 0.0403 1.36e-5 ****
## 5 Absorbance Blank Uni 0.0911 -0.0624 0.245 3.23e-1 ns
## 6 Absorbance Carti Myo -0.0211 -0.0314 -0.0109 1.24e-4 ***
## 7 Absorbance Carti Osteo 0.0239 0.0128 0.0349 6.58e-5 ****
## 8 Absorbance Carti Teno -0.00263 -0.0143 0.00906 9.74e-1 ns
## 9 Absorbance Carti Uni 0.0582 -0.0952 0.212 7.09e-1 ns
## 10 Absorbance Myo Osteo 0.0450 0.0348 0.0552 1.42e-8 ****
## 11 Absorbance Myo Teno 0.0185 0.00755 0.0294 9.46e-4 ***
## 12 Absorbance Myo Uni 0.0794 -0.0741 0.233 4.43e-1 ns
## 13 Absorbance Osteo Teno -0.0265 -0.0381 -0.0149 3.81e-5 ****
## 14 Absorbance Osteo Uni 0.0344 -0.119 0.188 9.48e-1 ns
## 15 Absorbance Teno Uni 0.0609 -0.0926 0.214 6.75e-1 ns
# Extract significant comparisons (p < 0.05)
significant_comparisons <- games_howell_results %>%
filter(p.adj < 0.05) %>%
select(group1, group2, p.adj, p.adj.signif)
# ✅ Convert comparisons into correct format for ggplot (list of vectors)
sig_comparisons_list <- split(significant_comparisons[, c("group1", "group2")], seq(nrow(significant_comparisons)))
sig_comparisons_list <- lapply(sig_comparisons_list, as.character) # Ensure correct type
# ✅ Dynamically adjust y-position for significance bars
# Adjust space for significance bars dynamically
max_y <- max(sample_data$Absorbance) -0.1 # Start position for bars
spacing_factor <- 0.05 # 🔹 Increase or decrease this for more or less spacing
y_positions <- seq(max_y, max_y + (length(sig_comparisons_list) - 1) * spacing_factor, by = spacing_factor)
# ✅ Prevent error if no significant comparisons exist
if (length(sig_comparisons_list) > 0) {
# Create the boxplot with significance bars
ggplot(sample_data, aes(x = Group, y = Absorbance, fill = Group)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) + # Remove outliers for clarity
geom_jitter(width = 0.2, alpha = 0.6, size = 2, color = "black") + # Improve visibility of points
ggtitle("Quantified Absorbance of Stain Across Groups") +
xlab("Group") +
ylab("Absorbance") +
theme_minimal() +
# Welch’s ANOVA p-value annotation
stat_compare_means(
method = "anova",
label.y = max_y + 0.5,
label.x.npc = "center"
) +
# Games-Howell Post-Hoc Test Significance Bars
geom_signif(
comparisons = sig_comparisons_list,
map_signif_level = TRUE, # Uses significance levels (*, **, ***)
y_position = y_positions,
tip_length = 0.03, # Adjusts length of bar tips
textsize = 5 # Adjusts significance label size
) +
# Formatting improvements
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "right"
)
} else {
print("No significant pairwise comparisons found. No significance bars added to plot.")
}
## Warning in wilcox.test.default(c(0.172, 0.17, 0.165, 0.164, 0.171, 0.163, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.172, 0.17, 0.165, 0.164, 0.171, 0.163, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.172, 0.17, 0.165, 0.164, 0.171, 0.163, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.194, 0.207, 0.206, 0.203, 0.2, 0.192, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.194, 0.207, 0.206, 0.203, 0.2, 0.192, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.187, 0.175, 0.174, 0.187, 0.178, 0.172, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.187, 0.175, 0.174, 0.187, 0.178, 0.172, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.22, 0.226, 0.214, 0.23, 0.217, 0.225, :
## cannot compute exact p-value with ties
