# Load necessary libraries
library(ggplot2) # For QQ plot
library(car) # For Levene's test
## Loading required package: carData
library(stats) # For Shapiro-Wilk test
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(rstatix) # For normality tests
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
library(dunn.test)
library(ggpubr)
# Read the CSV file
df <- read.csv('/Users/rafael/Desktop/UCSB_Grad/BIOE 212/Homework/HW5_Sample_Sheet _Group_two.csv') # Replace with actual file path
# Ensure treatment column is a factor
df$Treatment <- as.factor(df$Treatment)
# Function to compute residuals for a single dataset (one-sample model)
compute_residuals <- function(y) {
x <- seq_along(y) # Index values (1, 2, 3, ...)
model <- lm(y ~ x) # Fit linear model
predicted <- predict(model) # Compute predicted values
return(y - predicted) # Compute residuals manually
}
# Function to compute variance for a single dataset
compute_single_group_variance <- function(y) {
return(var(y)) # Compute variance of the dataset
}
# Function to perform Levene's test for homogeneity of variances
perform_levene_test <- function(df, value_col, group_col) {
long_df <- data.frame(values = df[[value_col]], group = df[[group_col]]) # Convert to long format
levene_test <- leveneTest(values ~ group, data = long_df) # Levene's test
print("Levene's Test for Equal Variances:")
print(levene_test)
return(levene_test)
}
# Function to plot QQ plot for residuals
plot_qq_residuals <- function(residuals, title) {
qq_plot <- ggplot(data.frame(residuals = residuals), aes(sample = residuals)) +
stat_qq() +
stat_qq_line() +
ggtitle(title) +
theme_minimal()
print(qq_plot)
}
# Function to plot homoscedasticity check (Residuals vs Fitted Values)
plot_homoscedasticity <- function(anova_model, title) {
residuals_anova <- residuals(anova_model)
fitted_values <- fitted(anova_model)
homoscedasticity_plot <- ggplot(data.frame(Fitted = fitted_values, Residuals = residuals_anova),
aes(x = Fitted, y = Residuals)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
ggtitle(title) +
theme_minimal()
print(homoscedasticity_plot)
}
# Compute residuals for each treatment group
residuals_list <- lapply(split(df$`nuclear_cytoplasmic_brachyury_ratio`, df$Treatment), compute_residuals)
# Perform Shapiro-Wilk test for each treatment's residuals
shapiro_results <- lapply(residuals_list, shapiro.test)
print(shapiro_results)
## $Treatment_A
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.96523, p-value = 0.6526
##
##
## $Treatment_B
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.97855, p-value = 0.9569
##
##
## $Treatment_D
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.97749, p-value = 0.7557
##
##
## $Treatment_F
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.92199, p-value = 0.03022
##
##
## $Treatment_H
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.95669, p-value = 0.48
# Compute variance for each treatment group
variances <- sapply(split(df$`nuclear_cytoplasmic_brachyury_ratio`, df$Treatment), compute_single_group_variance)
print(variances)
## Treatment_A Treatment_B Treatment_D Treatment_F Treatment_H
## 0.005368700 0.009660559 0.027971679 0.027720539 0.005805254
# Perform Levene's test for homogeneity of variances
levene_test <- perform_levene_test(df, "nuclear_cytoplasmic_brachyury_ratio", "Treatment")
## [1] "Levene's Test for Equal Variances:"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 3.0986 0.01869 *
## 105
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perform one-way ANOVA
anova_model <- aov(`nuclear_cytoplasmic_brachyury_ratio` ~ Treatment, data = df)
anova_result <- summary(anova_model)
print(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 4 0.8976 0.22439 12.31 3.04e-08 ***
## Residuals 105 1.9143 0.01823
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# If ANOVA is significant, perform Tukey's post-hoc test
if (anova_result[[1]]$"Pr(>F)"[1] < 0.05) {
posthoc <- TukeyHSD(anova_model)
# Extract Tukey's test results into a data frame
tukey_results <- as.data.frame(posthoc$Treatment)
tukey_results$Comparison <- rownames(tukey_results)
# Select significant comparisons (p-value < 0.05)
tukey_significant <- tukey_results[tukey_results$`p adj` < 0.05, ]
print("Significant Comparisons from Tukey's Test:")
print(tukey_significant)
}
## [1] "Significant Comparisons from Tukey's Test:"
## diff lwr upr p adj
## Treatment_D-Treatment_A 0.2406742 0.132480299 0.3488682 1.263292e-07
## Treatment_H-Treatment_A 0.2024478 0.083927276 0.3209683 6.496919e-05
## Treatment_D-Treatment_B 0.1855903 0.048734586 0.3224460 2.509519e-03
## Treatment_H-Treatment_B 0.1473638 0.002206455 0.2925212 4.478078e-02
## Treatment_F-Treatment_D -0.1436784 -0.240449989 -0.0469068 7.080263e-04
## Comparison
## Treatment_D-Treatment_A Treatment_D-Treatment_A
## Treatment_H-Treatment_A Treatment_H-Treatment_A
## Treatment_D-Treatment_B Treatment_D-Treatment_B
## Treatment_H-Treatment_B Treatment_H-Treatment_B
## Treatment_F-Treatment_D Treatment_F-Treatment_D
# Extract significant comparisons into a list format
significant_comparisons <- lapply(strsplit(tukey_significant$Comparison, "-"), function(x) c(x[1], x[2]))
# Plot homoscedasticity (Residuals vs Fitted Values)
plot_homoscedasticity(anova_model, "Residuals vs Fitted Values")
## `geom_smooth()` using formula = 'y ~ x'

# Plot QQ plot for residuals of the ANOVA model
plot_qq_residuals(residuals(anova_model), "QQ Plot of ANOVA Residuals")

# Plot with significance bars using ggpubr
ggplot(df, aes(x = Treatment, y = `nuclear_cytoplasmic_brachyury_ratio`)) +
stat_summary(fun = mean, geom = "bar", fill = "lightblue", color = "black", alpha = 0.7) + # Mean as bar
stat_summary(fun.data = function(x) {
mean_x <- mean(x)
sd_x <- sd(x)
data.frame(y = mean_x, ymin = mean_x - sd_x, ymax = mean_x + sd_x)
}, geom = "errorbar", width = 0.2, color = "black") + # SD error bars
theme_minimal() +
labs(title = "Mean Nuclear/Cytoplasmic Brachyury Ratio by Treatment Group",
x = "Treatment Group",
y = "Nuclear/Cytoplasmic Brachyury Ratio") +
stat_compare_means(method = "anova", label.y = max(df$nuclear_cytoplasmic_brachyury_ratio) + 0.5) + # ANOVA p-value at the top
stat_compare_means(comparisons = significant_comparisons,
method = "t.test",
label = "p.signif",
p.adjust.method = "BH") # Apply Benjamini-Hochberg correction

# Standard Curve Data
concentration <- c(0.46, 0.34, 0.17, 0.11, 0.027, 0.013, 0.007, 0) # mM
absorbance <- c(1.111, 1.112, 0.876, 0.832, 0.551, 0.505, 0.398, 0.387) # Average absorbance from wells
standard_model <- lm(log(concentration + 1) ~ absorbance)
# Get R-squared value
r_squared <- summary(standard_model)$r.squared
# Plot the standard curve
ggplot(data = data.frame(concentration, absorbance), aes(x = concentration, y = absorbance)) +
geom_point(size = 3, color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Standard Curve: Absorbance vs. ORO Concentration") +
xlab("Concentration of ORO (mM)") +
ylab("Absorbance") +
annotate("text", x = 0.2, y = 2.5, label = paste("y =", round(coef(standard_model)[2], 3), "*x +", round(coef(standard_model)[1], 3))) +
annotate("text", x = 0.2, y = 2.3, label = paste("R² =", round(r_squared, 3))) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Function to calculate concentration from absorbance using the standard curve equation
predict_concentration <- function(abs_value) {
(abs_value - coef(standard_model)[1]) / coef(standard_model)[2]
}
# Step 1: Compute Residuals
compute_residuals <- function(y) {
x <- seq_along(y) # Index values (1, 2, 3, ...)
model <- lm(y ~ x) # Fit linear model
predicted <- predict(model) # Compute predicted values
return(y - predicted) # Compute residuals manually
}
# Function to plot Q-Q plot for each group
plot_qq_by_group <- function(df, value_col, group_col) {
unique_groups <- unique(df[[group_col]])
for (group in unique_groups) {
group_data <- df[df[[group_col]] == group, value_col] # Subset data for the group
qq_plot <- ggplot(data.frame(residuals = group_data), aes(sample = residuals)) +
stat_qq() +
stat_qq_line() +
ggtitle(paste("Q-Q Plot for", group)) +
theme_minimal()
print(qq_plot)
}
}
# Function to perform Shapiro-Wilk test for each group
perform_shapiro_by_group <- function(df, value_col, group_col) {
unique_groups <- unique(df[[group_col]])
shapiro_results <- list()
for (group in unique_groups) {
group_data <- df[df[[group_col]] == group, value_col] # Subset data for the group
shapiro_test <- shapiro.test(group_data) # Perform test
shapiro_results[[group]] <- shapiro_test
print(paste("Shapiro-Wilk Test for", group))
print(shapiro_test)
}
return(shapiro_results)
}
# Step 4: Compute Variance for Each Group
compute_group_variances <- function(df, value_col, group_col) {
variances <- aggregate(df[[value_col]], by = list(df[[group_col]]), FUN = var)
colnames(variances) <- c("Group", "Variance")
print("Variances for Each Group:")
print(variances)
return(variances)
}
# Step 5: Create Homoscedasticity Plot (Residuals vs Fitted Values)
plot_homoscedasticity <- function(anova_model, title) {
residuals_anova <- residuals(anova_model)
fitted_values <- fitted(anova_model)
homoscedasticity_plot <- ggplot(data.frame(Fitted = fitted_values, Residuals = residuals_anova),
aes(x = Fitted, y = Residuals)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
ggtitle(title) +
theme_minimal()
print(homoscedasticity_plot)
}
# Step 6: Perform Levene's test for homogeneity of variances
perform_levene_test <- function(df, value_col, group_col) {
long_df <- data.frame(values = df[[value_col]], group = df[[group_col]]) # Convert to long format
levene_test <- leveneTest(values ~ group, data = long_df) # Levene's test
print("Levene's Test for Equal Variances:")
print(levene_test)
return(levene_test)
}
# Load Sample Data
sample_data <- data.frame(
Group = rep(c("Myo", "Osteo", "Carti", "Teno", "Uni", "Blank"), each = 8),
Replicate = rep(rep(1:4, each = 2), times = 6),
Well = c(
"A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8",
"A9", "A10", "A11", "A12", "B1", "B2", "B3", "B4",
"B5", "B6", "B7", "B8", "B9", "B10", "B11", "B12",
"C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8",
"C9", "C10", "C11", "C12", "D1", "D2", "D3", "D4",
"D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12"
),
Absorbance = c(
0.187, 0.175, 0.174, 0.187, 0.178, 0.172, 0.181, 0.179,
0.217, 0.225, 0.234, 0.227, 0.194, 0.207, 0.206, 0.203,
0.209, 0.208, 0.195, 0.189, 0.191, 0.200, 0.195, 0.194,
0.185, 0.422, 0.289, 0.404, 0.172, 0.170, 0.165, 0.164,
0.171, 0.163, 0.166, 0.168, 0.172, 0.181, 0.179, 0.220,
0.226, 0.214, 0.230, 0.217, 0.225, 0.234, 0.227, 0.194
)
)
# Convert absorbance to concentration
sample_data$Concentration <- sapply(sample_data$Absorbance, predict_concentration)
# Perform Analysis
residuals_data <- compute_residuals(sample_data$Absorbance)
# Run Q-Q plots for each treatment group
plot_qq_by_group(sample_data, "Absorbance", "Group")






# Run Shapiro-Wilk tests for each treatment group
shapiro_results <- perform_shapiro_by_group(sample_data, "Absorbance", "Group")
## [1] "Shapiro-Wilk Test for Myo"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.91687, p-value = 0.405
##
## [1] "Shapiro-Wilk Test for Osteo"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.95543, p-value = 0.7656
##
## [1] "Shapiro-Wilk Test for Carti"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.88514, p-value = 0.2107
##
## [1] "Shapiro-Wilk Test for Teno"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.74551, p-value = 0.007285
##
## [1] "Shapiro-Wilk Test for Uni"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.73032, p-value = 0.004937
##
## [1] "Shapiro-Wilk Test for Blank"
##
## Shapiro-Wilk normality test
##
## data: group_data
## W = 0.86129, p-value = 0.1237
group_variances <- compute_group_variances(sample_data, "Absorbance", "Group")
## [1] "Variances for Each Group:"
## Group Variance
## 1 Blank 1.601250e-04
## 2 Carti 5.541071e-05
## 3 Myo 3.183929e-05
## 4 Osteo 1.904107e-04
## 5 Teno 1.229227e-02
## 6 Uni 3.322857e-04
# Fit ANOVA model to check homoscedasticity
anova_model <- aov(Absorbance ~ Group, data = sample_data)
plot_homoscedasticity(anova_model, "Homoscedasticity Check: Residuals vs Fitted")
## `geom_smooth()` using formula = 'y ~ x'

# Perform Levene's Test for homogeneity of variances
perform_levene_test(sample_data, "Absorbance", "Group")
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## [1] "Levene's Test for Equal Variances:"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 3.6817 0.00748 **
## 42
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 3.6817 0.00748 **
## 42
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perform Kruskal-Wallis
kruskal_test <- kruskal.test(Absorbance ~ Group, data = sample_data)
# Print Kruskal-Wallis results
print("Kruskal-Wallis Test:")
## [1] "Kruskal-Wallis Test:"
print(kruskal_test)
##
## Kruskal-Wallis rank sum test
##
## data: Absorbance by Group
## Kruskal-Wallis chi-squared = 22.119, df = 5, p-value = 0.0004971
# Perform Benjamini-Hochberg post-hoc test
# Perform pairwise Dunn’s test
# Perform Dunn’s test with Benjamini-Hochberg correction
dunn_results <- dunn.test(sample_data$Concentration, sample_data$Group, method = "bh")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 22.1187, df = 5, p-value = 0
##
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | Blank Carti Myo Osteo Teno
## ---------+-------------------------------------------------------
## Carti | 1.572111
## | 0.0966
## |
## Myo | 3.322871 1.750760
## | 0.0033* 0.0857
## |
## Osteo | 0.482352 -1.089759 -2.840519
## | 0.3373 0.1724 0.0084*
## |
## Teno | 2.179518 0.607406 -1.143353 1.697165
## | 0.0439 0.3136 0.1724 0.0841
## |
## Uni | 3.698034 2.125923 0.375162 3.215682 1.518516
## | 0.0016* 0.0419 0.3538 0.0033* 0.0967
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
# Extract pairwise comparisons & adjusted p-values
significance_table <- data.frame(
group1 = sapply(strsplit(dunn_results$comparisons, " - "), `[`, 1),
group2 = sapply(strsplit(dunn_results$comparisons, " - "), `[`, 2),
p.adj = dunn_results$P.adjusted
)
# Print significance results
print(significance_table)
## group1 group2 p.adj
## 1 Blank Carti 0.096603940
## 2 Blank Myo 0.003341095
## 3 Carti Myo 0.085700566
## 4 Blank Osteo 0.337261964
## 5 Carti Osteo 0.172387072
## 6 Myo Osteo 0.008445025
## 7 Blank Teno 0.043939812
## 8 Carti Teno 0.313604463
## 9 Myo Teno 0.172426201
## 10 Osteo Teno 0.084061260
## 11 Blank Uni 0.001629564
## 12 Carti Uni 0.041887057
## 13 Myo Uni 0.353769646
## 14 Osteo Uni 0.003253370
## 15 Teno Uni 0.096663154
ggplot(sample_data, aes(x = Group, y = Concentration, fill = Group)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.6) +
ggtitle("Quantified Concentration of Stain Across Groups") +
xlab("Group") +
ylab("Stain Concentration (mM)") +
theme_minimal() +
stat_compare_means(
method = "kruskal.test",
label.y = max(sample_data$Concentration) + 0.4,
label.x = 5 # Centers the label horizontally
) +
stat_compare_means(
comparisons = list(c("Myo", "Osteo"), c("Myo", "Carti"), c("Myo", "Teno"),
c("Myo", "Uni"), c("Myo", "Blank"), c("Osteo", "Carti")),
method = "wilcox.test", # Dunn’s test is based on Wilcoxon test
p.adjust.method = "BH",
label = "p.signif"
) +
theme(plot.title = element_text(hjust = 0.5)) # Center title
## Warning in wilcox.test.default(c(0.86354511408227, 0.837169049328785,
## 0.834971043932661, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.86354511408227, 0.837169049328785,
## 0.834971043932661, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.86354511408227, 0.837169049328785,
## 0.834971043932661, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.86354511408227, 0.837169049328785,
## 0.834971043932661, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.86354511408227, 0.837169049328785,
## 0.834971043932661, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.929485275965982, 0.947069319134971,
## 0.966851367700085, : cannot compute exact p-value with ties
