1 + 1[1] 2
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:
1 + 1[1] 2
You can add options to executable code like this
[1] 4
The echo: false option disables the printing of code (only output is displayed).
data2 <- read.delim("C:/Users/ch11574/data2.txt", header = TRUE, sep = "\t")
original_names <- c("ID", "Sex (Female/Male)", "Baseline Compliance (seconds)", "Post Treatment Compliance (seconds)",
"Weight (kg)", "IRT (Celsius)", "Ambient Air Temperature (Celcius)",
"Blood Cortisol Level (mcg/dl)", "Heart Rate (Beats Per Minute)", "Standardised Stress Score",
"Standardised_Stress_Score_Abs", "Standardised_Stress_Score_Scaled", "Sex", "Compliance_Change")
current_names <- colnames(data2)
# Create a named vector for renaming
rename_vector <- setNames(original_names, current_names)
# Rename columns
colnames(data2) <- rename_vector[colnames(data2)]
# Check the column names to ensure they are correctly renamed
print(names(data2)) [1] "ID" "Sex (Female/Male)"
[3] "Baseline Compliance (seconds)" "Post Treatment Compliance (seconds)"
[5] "Weight (kg)" "IRT (Celsius)"
[7] "Ambient Air Temperature (Celcius)" "Blood Cortisol Level (mcg/dl)"
[9] "Heart Rate (Beats Per Minute)" "Standardised Stress Score"
[11] "Standardised_Stress_Score_Abs" "Standardised_Stress_Score_Scaled"
[13] "Sex" "Compliance_Change"
library(GGally)Warning: package 'GGally' was built under R version 4.4.2
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 4.4.2
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
library(ggplot2)
library(dplyr)Warning: package 'dplyr' was built under R version 4.4.2
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Check for missing values and handle them
data2 <- data2 %>% na.omit() # Remove rows with missing values
# Ensure the columns have the correct types
data2$`Sex (Female/Male)` <- as.factor(data2$`Sex (Female/Male)`) # Ensure 'Sex' is a factor (categorical)
# Specify columns to include in the pairwise plot, including 'Sex'
columns_to_plot <- c("Baseline Compliance (seconds)", "Heart Rate (Beats Per Minute)", "IRT (Celsius)",
"Blood Cortisol Level (mcg/dl)", "Ambient Air Temperature (Celcius)", "Weight (kg)",
"Sex (Female/Male)")
# Ensure the columns exist in the dataset
columns_to_plot <- columns_to_plot[columns_to_plot %in% colnames(data2)]
# Custom function for boxplots (with outliers)
custom_boxplot <- function(data, mapping, ...) {
ggplot(data = data, mapping = mapping) +
geom_boxplot(outlier.shape = 16, outlier.size = 2, alpha = 0.7) + # Add boxplot with outliers
theme_minimal() # Minimal theme
}
# Generate the pairwise plot
pairwise_plot <- ggpairs(
data2,
aes(color = `Sex (Female/Male)`), # Group by 'Sex'
columns = columns_to_plot, # Columns to include, including 'Sex'
lower = list(
continuous = "smooth", # Regression lines for lower panels
combo = "facetdensity", # Density for combos
discrete = "facetbar" # Bars for discrete variables
),
upper = list(
continuous = wrap("cor", size = 3, digits = 2), # Correlation in upper panels
combo = custom_boxplot # Use custom boxplot function for combos
),
diag = list(
continuous = "densityDiag", # Density on diagonal
discrete = "barDiag" # Bars for discrete variables
)
)
# Customize the theme
pairwise_plot <- pairwise_plot +
theme_minimal() +
theme(panel.grid = element_blank())
# Save the plot to a file (optional)
ggsave("pairwise_plot_with_boxplots_clean.png", pairwise_plot, width = 12, height = 8)
# Display the plot
print(pairwise_plot)library(ggplot2)
library(reshape2)Warning: package 'reshape2' was built under R version 4.4.2
library(Hmisc)Warning: package 'Hmisc' was built under R version 4.4.2
Attaching package: 'Hmisc'
The following objects are masked from 'package:dplyr':
src, summarize
The following objects are masked from 'package:base':
format.pval, units
library(dplyr)
# Assume data2 is already loaded and contains the required columns
# Calculate correlations and p-values
corr_test <- rcorr(as.matrix(data2[, c("Baseline Compliance (seconds)", "Heart Rate (Beats Per Minute)", "IRT (Celsius)",
"Blood Cortisol Level (mcg/dl)", "Ambient Air Temperature (Celcius)", "Weight (kg)")]))
# Extract p-values
p_matrix <- corr_test$P
# Convert the p-values matrix to a long format data frame
p_values_long <- melt(p_matrix, na.rm = TRUE)
# Filter out duplicate combinations (upper triangle)
p_values_long <- p_values_long %>%
filter(as.integer(Var1) < as.integer(Var2))
# Create a new column to indicate significance
p_values_long$significant <- ifelse(p_values_long$value < 0.05, "Significant", "Not Significant")
# Create the heatmap, coloring only significant p-values
heatmap <- ggplot(p_values_long, aes(Var1, Var2, fill = significant)) +
geom_tile(color = "white") +
scale_fill_manual(values = c("Significant" = "red", "Not Significant" = "grey90"), name = "Significance") +
geom_text(aes(label = sprintf("%.3f", value)), color = "black", size = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
coord_fixed() +
labs(title = "P-Value Heatmap with Significant Values Highlighted", x = "Variables", y = "Variables")
# Save the heatmap to a file (optional)
ggsave("significant_p_value_heatmap.png", heatmap, width = 10, height = 8, dpi = 300)
# Display the heatmap
print(heatmap)library(ggplot2)
library(reshape2)
library(Hmisc)
# Subset data to only include numeric columns
vars <- c(
"Baseline Compliance (seconds)",
"Heart Rate (Beats Per Minute)",
"IRT (Celsius)",
"Blood Cortisol Level (mcg/dl)",
"Ambient Air Temperature (Celcius)",
"Weight (kg)"
)
# Ensure we're working only with numeric data
data_subset <- data2[, c(vars, "Sex (Female/Male)")]
data_subset <- na.omit(data_subset) # Remove rows with missing data
# Split the data by Sex
data_male <- data_subset %>% filter(`Sex (Female/Male)` == "Male")
data_female <- data_subset %>% filter(`Sex (Female/Male)` == "Female")
# Calculate correlation results using rcorr() function for each group
correlation_results_male <- rcorr(as.matrix(data_male[, vars]), type = "pearson")
correlation_results_female <- rcorr(as.matrix(data_female[, vars]), type = "pearson")
# Extract p-values matrix (p) for each group
p_matrix_male <- correlation_results_male$P
p_matrix_female <- correlation_results_female$P
# Convert the p-values matrices to long format data frames
p_values_long_male <- melt(p_matrix_male, na.rm = TRUE)
p_values_long_female <- melt(p_matrix_female, na.rm = TRUE)
# Filter out duplicate combinations (upper triangle) for both matrices
p_values_long_male <- p_values_long_male %>% filter(as.integer(Var1) < as.integer(Var2))
p_values_long_female <- p_values_long_female %>% filter(as.integer(Var1) < as.integer(Var2))
# Add a new column to indicate significance for both data frames
p_values_long_male$significant <- ifelse(p_values_long_male$value < 0.05, "Significant", "Not Significant")
p_values_long_female$significant <- ifelse(p_values_long_female$value < 0.05, "Significant", "Not Significant")
# Combine the p-values with indication of sex
p_values_long_male$Sex <- "Male"
p_values_long_female$Sex <- "Female"
p_values_combined <- merge(p_values_long_male, p_values_long_female, by = c("Var1", "Var2"), suffixes = c("_Male", "_Female"))
# Create a combined significance column
p_values_combined$significance <- apply(p_values_combined, 1, function(row) {
male_sig <- ifelse(row["significant_Male"] == "Significant", "M", "")
female_sig <- ifelse(row["significant_Female"] == "Significant", "F", "")
paste0(male_sig, female_sig)
})
# Create the heatmap
heatmap <- ggplot(p_values_combined, aes(Var1, Var2)) +
geom_tile(aes(fill = significance), color = "white") +
scale_fill_manual(values = c("M" = "blue", "F" = "pink", "MF" = "purple", "Not Significant" = "grey90"),
name = "Significance",
labels = c("M" = "Male", "F" = "Female", "MF" = "Both", "Not Significant" = "None")) +
geom_text(aes(label = sprintf("M: %.3f\nF: %.3f", value_Male, value_Female)), color = "black", size = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
coord_fixed() +
labs(title = "P-Value Heatmap: Male and Female Comparison",
x = "Variables",
y = "Variables")
# Save the heatmap to a file (optional)
ggsave("merged_significant_p_value_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)
# Display the heatmap
print(heatmap)library(ggplot2)
library(reshape2)
library(broom)
# Step 1: Subset data to only include the specified numeric columns
vars <- c(
"Baseline Compliance (seconds)",
"Heart Rate (Beats Per Minute)",
"IRT (Celsius)",
"Blood Cortisol Level (mcg/dl)",
"Ambient Air Temperature (Celcius)",
"Weight (kg)"
)
# Ensure we're working only with numeric data
data_subset <- data2[, c(vars, "Sex (Female/Male)"), drop = FALSE]
data_subset <- na.omit(data_subset) # Remove rows with missing data
# Step 2: Perform t-tests for each continuous variable
t_tests <- lapply(vars, function(var) t.test(data_subset[[var]] ~ data_subset$`Sex (Female/Male)`))
# Extract t-values and p-values from the t-tests
t_values <- sapply(t_tests, function(test) test$statistic)
p_values <- sapply(t_tests, function(test) test$p.value)
means_male <- sapply(vars, function(var) mean(data_subset[data_subset$`Sex (Female/Male)` == "Male", var], na.rm = TRUE))
means_female <- sapply(vars, function(var) mean(data_subset[data_subset$`Sex (Female/Male)` == "Female", var], na.rm = TRUE))
# Step 3: Create a data frame for the t-values and p-values
t_test_results <- data.frame(
Variable = vars,
T_Value = t_values,
P_Value = p_values,
Significant = ifelse(p_values < 0.05, "Significant", "Not Significant"),
Mean_Male = means_male,
Mean_Female = means_female,
Direction = ifelse(means_male > means_female, "Male > Female", "Female > Male")
)
# Step 4: Convert the data frame to long format for ggplot2
t_test_results_long <- melt(t_test_results, id.vars = "Variable", variable.name = "Metric", value.name = "Value")
# Ensure that all values are numeric
t_test_results_long$Value <- as.numeric(as.character(t_test_results_long$Value))Warning: NAs introduced by coercion
# Create a function to add stars based on significance
add_stars <- function(p_value) {
if (p_value < 0.001) {
"***"
} else if (p_value < 0.01) {
"**"
} else if (p_value < 0.05) {
"*"
} else {
""
}
}
# Add stars to p-values
p_values_data <- t_test_results_long %>%
filter(Metric == "P_Value") %>%
mutate(Stars = sapply(Value, add_stars))
# Add stars to t-values based on p-value thresholds
t_values_data <- t_test_results_long %>%
filter(Metric == "T_Value") %>%
mutate(Stars = sapply(t_test_results_long %>% filter(Metric == "P_Value") %>% .$Value, add_stars))
# Step 5: Create the heatmap
heatmap <- ggplot() +
geom_tile(data = t_values_data, aes(Variable, Metric, fill = as.numeric(Value)), color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
limit = c(-max(abs(t_values_data$Value)), max(abs(t_values_data$Value))),
space = "Lab", name = "T-Value") +
geom_text(data = t_values_data, aes(Variable, Metric, label = sprintf("%.2f%s", as.numeric(Value), Stars)), color = "black", size = 3) +
geom_text(data = p_values_data, aes(Variable, Metric, label = sprintf("%.3f%s", as.numeric(Value), Stars)), color = "black", size = 3, nudge_y = 0.3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
coord_fixed() +
labs(title = "T-Values Heatmap with Significant Differences Highlighted",
x = "Variables",
y = "Metric") +
theme(legend.title = element_text(face = "bold"),
legend.text = element_text(size = 10))
# Add the direction of the differences to the legend
heatmap <- heatmap + guides(fill = guide_legend(title = "T-Value\n(Male > Female in Blue, Female > Male in Red)"))
# Save the heatmap to a file (optional)
ggsave("t_values_significant_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)
# Display the heatmap
print(heatmap)library(GGally)
library(ggplot2)
library(dplyr)
# Check for missing values and handle them
data2 <- data2 %>% na.omit() # Remove rows with missing values
# Ensure the columns have the correct types
data2$`Sex (Female/Male)` <- as.factor(data2$`Sex (Female/Male)`) # Ensure 'Sex' is a factor (categorical)
# Specify columns to include in the pairwise plot, including 'Sex'
columns_to_plot <- c("Baseline Compliance (seconds)", "Post Treatment Compliance (seconds)", "Compliance_Change", "Standardised Stress Scrore", "Weight (kg)", "Sex (Female/Male)")
# Ensure the columns exist in the dataset
columns_to_plot <- columns_to_plot[columns_to_plot %in% colnames(data2)]
# Custom function for boxplots (without individual data points)
custom_boxplot <- function(data, mapping, ...) {
ggplot(data = data, mapping = mapping) +
geom_boxplot(outlier.shape = 16, outlier.size = 2, alpha = 0.7) + # Add boxplot with outliers
theme_minimal() # Minimal theme
}
# Generate the pairwise plot
pairwise_plot <- ggpairs(
data2,
aes(color = `Sex (Female/Male)`), # Group by 'Sex'
columns = columns_to_plot, # Columns to include, including 'Sex'
lower = list(
continuous = "smooth", # Regression lines for lower panels
combo = "facetdensity", # Density for combos
discrete = "facetbar" # Bars for discrete variables
),
upper = list(
continuous = wrap("cor", size = 3, digits = 2), # Correlation in upper panels
combo = custom_boxplot # Use custom boxplot function for combos
),
diag = list(
continuous = "densityDiag", # Density on diagonal
discrete = "barDiag" # Bars for discrete variables
)
)
# Customize the theme
pairwise_plot <- pairwise_plot +
theme_minimal() +
theme(panel.grid = element_blank())
# Save the plot to a file (optional)
ggsave("pairwise_plot_with_boxplots_clean_POST.png", pairwise_plot, width = 12, height = 8)
# Display the plot
print(pairwise_plot)library(ggplot2)
library(reshape2)
library(Hmisc)
# Step 1: Subset data to only include the specified numeric columns
vars <- c(
"Baseline Compliance (seconds)",
"Post Treatment Compliance (seconds)",
"Compliance_Change",
"Weight (kg)"
)
# Ensure we're working only with numeric data
data_subset <- data2[, vars]
data_subset <- na.omit(data_subset) # Remove rows with missing data
# Step 2: Calculate correlation results using rcorr() function
correlation_results <- rcorr(as.matrix(data_subset), type = "pearson")
# Step 3: Extract p-values matrix (p)
p_matrix <- correlation_results$P
# Step 4: Convert the p-values matrix to a long format data frame
p_values_long <- melt(p_matrix, na.rm = TRUE)
# Filter out duplicate combinations (upper triangle)
p_values_long <- p_values_long %>% filter(as.integer(Var1) < as.integer(Var2))
# Create a new column to indicate significance
p_values_long$significant <- ifelse(p_values_long$value < 0.05, "Significant", "Not Significant")
# Create the heatmap, coloring only significant p-values
heatmap <- ggplot(p_values_long, aes(Var1, Var2, fill = significant)) +
geom_tile(color = "white") +
scale_fill_manual(values = c("Significant" = "red", "Not Significant" = "grey90"), name = "Significance") +
geom_text(aes(label = sprintf("%.3f", value)), color = "black", size = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
coord_fixed() +
labs(title = "P-Value Heatmap with Significant Values Highlighted",
x = "Variables",
y = "Variables")
# Save the heatmap to a file (optional)
ggsave("whole_cohort_significant_p_value_heatmap.png", heatmap, width = 10, height = 8, dpi = 300)
# Display the heatmap
print(heatmap)library(ggplot2)
library(reshape2)
library(Hmisc)
# Subset data to only include numeric columns
vars <- c(
"Baseline Compliance (seconds)",
"Post Treatment Compliance (seconds)",
"Compliance_Change",
"Weight (kg)")
# Ensure we're working only with numeric data
data_subset <- data2[, c(vars, "Sex (Female/Male)")]
data_subset <- na.omit(data_subset) # Remove rows with missing data
# Split the data by Sex
data_male <- data_subset %>% filter(`Sex (Female/Male)` == "Male")
data_female <- data_subset %>% filter(`Sex (Female/Male)` == "Female")
# Calculate correlation results using rcorr() function for each group
correlation_results_male <- rcorr(as.matrix(data_male[, vars]), type = "pearson")
correlation_results_female <- rcorr(as.matrix(data_female[, vars]), type = "pearson")
# Extract p-values matrix (p) for each group
p_matrix_male <- correlation_results_male$P
p_matrix_female <- correlation_results_female$P
# Convert the p-values matrices to long format data frames
p_values_long_male <- melt(p_matrix_male, na.rm = TRUE)
p_values_long_female <- melt(p_matrix_female, na.rm = TRUE)
# Filter out duplicate combinations (upper triangle) for both matrices
p_values_long_male <- p_values_long_male %>% filter(as.integer(Var1) < as.integer(Var2))
p_values_long_female <- p_values_long_female %>% filter(as.integer(Var1) < as.integer(Var2))
# Add a new column to indicate significance for both data frames
p_values_long_male$significant <- ifelse(p_values_long_male$value < 0.05, "Significant", "Not Significant")
p_values_long_female$significant <- ifelse(p_values_long_female$value < 0.05, "Significant", "Not Significant")
# Combine the p-values with indication of sex
p_values_long_male$Sex <- "Male"
p_values_long_female$Sex <- "Female"
p_values_combined <- merge(p_values_long_male, p_values_long_female, by = c("Var1", "Var2"), suffixes = c("_Male", "_Female"))
# Create a combined significance column
p_values_combined$significance <- apply(p_values_combined, 1, function(row) {
male_sig <- ifelse(row["significant_Male"] == "Significant", "M", "")
female_sig <- ifelse(row["significant_Female"] == "Significant", "F", "")
paste0(male_sig, female_sig)
})
# Create the heatmap
heatmap <- ggplot(p_values_combined, aes(Var1, Var2)) +
geom_tile(aes(fill = significance), color = "white") +
scale_fill_manual(values = c("M" = "blue", "F" = "pink", "MF" = "purple", "Not Significant" = "grey90"),
name = "Significance",
labels = c("M" = "Male", "F" = "Female", "MF" = "Both", "Not Significant" = "None")) +
geom_text(aes(label = sprintf("M: %.3f\nF: %.3f", value_Male, value_Female)), color = "black", size = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
coord_fixed() +
labs(title = "P-Value Heatmap: Male and Female Comparison",
x = "Variables",
y = "Variables")
# Save the heatmap to a file (optional)
ggsave("merged_significant_p_value_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)
# Display the heatmap
print(heatmap)library(ggplot2)
library(reshape2)
library(broom)
# Step 1: Subset data to only include the specified numeric columns
vars <- c(
"Baseline Compliance (seconds)",
"Post Treatment Compliance (seconds)",
"Compliance_Change",
"Standardised Stress Score",
"Weight (kg)"
)
# Ensure we're working only with numeric data
data_subset <- data2[, c(vars, "Sex (Female/Male)"), drop = FALSE]
data_subset <- na.omit(data_subset) # Remove rows with missing data
# Step 2: Perform t-tests for each continuous variable
t_tests <- lapply(vars, function(var) t.test(data_subset[[var]] ~ data_subset$`Sex (Female/Male)`))
# Extract t-values and p-values from the t-tests
t_values <- sapply(t_tests, function(test) test$statistic)
p_values <- sapply(t_tests, function(test) test$p.value)
means_male <- sapply(vars, function(var) mean(data_subset[data_subset$`Sex (Female/Male)` == "Male", var], na.rm = TRUE))
means_female <- sapply(vars, function(var) mean(data_subset[data_subset$`Sex (Female/Male)` == "Female", var], na.rm = TRUE))
# Step 3: Create a data frame for the t-values and p-values
t_test_results <- data.frame(
Variable = vars,
T_Value = t_values,
P_Value = p_values,
Significant = ifelse(p_values < 0.05, "Significant", "Not Significant"),
Mean_Male = means_male,
Mean_Female = means_female,
Direction = ifelse(means_male > means_female, "Male > Female", "Female > Male")
)
# Step 4: Convert the data frame to long format for ggplot2
t_test_results_long <- melt(t_test_results, id.vars = "Variable", variable.name = "Metric", value.name = "Value")
# Ensure that all values are numeric
t_test_results_long$Value <- as.numeric(as.character(t_test_results_long$Value))Warning: NAs introduced by coercion
# Create a function to add stars based on significance
add_stars <- function(p_value) {
if (p_value < 0.001) {
"***"
} else if (p_value < 0.01) {
"**"
} else if (p_value < 0.05) {
"*"
} else {
""
}
}
# Add stars to p-values
p_values_data <- t_test_results_long %>%
filter(Metric == "P_Value") %>%
mutate(Stars = sapply(Value, add_stars))
# Add stars to t-values based on p-value thresholds
t_values_data <- t_test_results_long %>%
filter(Metric == "T_Value") %>%
mutate(Stars = sapply(t_test_results_long %>% filter(Metric == "P_Value") %>% .$Value, add_stars))
# Step 5: Create the heatmap
heatmap <- ggplot() +
geom_tile(data = t_values_data, aes(Variable, Metric, fill = as.numeric(Value)), color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
limit = c(-max(abs(t_values_data$Value)), max(abs(t_values_data$Value))),
space = "Lab", name = "T-Value") +
geom_text(data = t_values_data, aes(Variable, Metric, label = sprintf("%.2f%s", as.numeric(Value), Stars)), color = "black", size = 3) +
geom_text(data = p_values_data, aes(Variable, Metric, label = sprintf("%.3f%s", as.numeric(Value), Stars)), color = "black", size = 3, nudge_y = 0.3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
coord_fixed() +
labs(title = "T-Values Heatmap with Significant Differences Highlighted",
x = "Variables",
y = "Metric") +
theme(legend.title = element_text(face = "bold"),
legend.text = element_text(size = 10))
# Add the direction of the differences to the legend
heatmap <- heatmap + guides(fill = guide_legend(title = "T-Value\n(Male > Female in Blue, Female > Male in Red)"))
# Save the heatmap to a file (optional)
ggsave("t_values_significant_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)
# Display the heatmap
print(heatmap)# Load necessary libraries
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)Warning: package 'tidyr' was built under R version 4.4.2
Attaching package: 'tidyr'
The following object is masked from 'package:reshape2':
smiths
library(corrplot)corrplot 0.95 loaded
# Assuming data2 is already loaded in your R environment
# Convert 'Sex (Female/Male)' column to a factor if it's not already
data2$`Sex (Female/Male)` <- as.factor(data2$`Sex (Female/Male)`)
# Summary statistics for Baseline Compliance and Post Treatment Compliance
summary_stats <- data2 %>%
summarise(
baseline_mean = mean(`Baseline Compliance (seconds)`, na.rm = TRUE),
baseline_sd = sd(`Baseline Compliance (seconds)`, na.rm = TRUE),
post_treatment_mean = mean(`Post Treatment Compliance (seconds)`, na.rm = TRUE),
post_treatment_sd = sd(`Post Treatment Compliance (seconds)`, na.rm = TRUE)
)
print(summary_stats) baseline_mean baseline_sd post_treatment_mean post_treatment_sd
1 35.1249 1.430335 29.94207 1.046642
# Create a new data frame for long format to use in box plots
data_long <- data2 %>%
gather(key = "Treatment_Phase", value = "Compliance",
`Baseline Compliance (seconds)`, `Post Treatment Compliance (seconds)`)
# Define your Documents directory
documents_dir <- "~/Documents/"
# Box plot for the whole cohort: Baseline vs Post Treatment Compliance
plot1 <- ggplot(data_long, aes(x = Treatment_Phase, y = Compliance, fill = Treatment_Phase)) +
geom_boxplot() +
labs(
title = "Comparison of Baseline and Post Treatment Compliance (Whole Cohort)",
x = "Treatment Phase",
y = "Compliance (seconds)"
) +
theme_minimal()
ggsave(filename = paste0(documents_dir, "Whole_Cohort_Box_Plot.png"), plot = plot1, width = 12, height = 8)
# Box plot by Sex: Baseline vs Post Treatment Compliance for each gender
plot2 <- ggplot(data_long, aes(x = Treatment_Phase, y = Compliance, fill = Treatment_Phase)) +
geom_boxplot() +
facet_wrap(~`Sex (Female/Male)`) +
labs(
title = "Comparison of Baseline and Post Treatment Compliance by Sex",
x = "Treatment Phase",
y = "Compliance (seconds)"
) +
theme_minimal()
ggsave(filename = paste0(documents_dir, "Sex_Box_Plot.png"), plot = plot2, width = 12, height = 8)
# Statistical comparison: Paired t-test for the whole cohort
paired_t_test_cohort <- t.test(
data2$`Baseline Compliance (seconds)`,
data2$`Post Treatment Compliance (seconds)`,
paired = TRUE
)
print(paired_t_test_cohort)
Paired t-test
data: data2$`Baseline Compliance (seconds)` and data2$`Post Treatment Compliance (seconds)`
t = 268.28, df = 497, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
5.144875 5.220788
sample estimates:
mean difference
5.182831
# Statistical comparison: Paired t-test by Sex
paired_t_test_sex <- data2 %>%
group_by(`Sex (Female/Male)`) %>%
summarise(
t_test_result = list(t.test(
`Baseline Compliance (seconds)`,
`Post Treatment Compliance (seconds)`,
paired = TRUE
))
)
print(paired_t_test_sex)# A tibble: 2 × 2
`Sex (Female/Male)` t_test_result
<fct> <list>
1 Female <htest>
2 Male <htest>
# Correlation analysis for the whole cohort
correlation_full_cohort <- cor(data2$`Baseline Compliance (seconds)`, data2$`Post Treatment Compliance (seconds)`, use = "complete.obs")
print(paste("Correlation for the full cohort: ", round(correlation_full_cohort, 2)))[1] "Correlation for the full cohort: 0.99"
# Correlation analysis by Sex category
correlation_by_sex <- data2 %>%
group_by(`Sex (Female/Male)`) %>%
summarise(
correlation = cor(`Baseline Compliance (seconds)`, `Post Treatment Compliance (seconds)`, use = "complete.obs")
)
print(correlation_by_sex)# A tibble: 2 × 2
`Sex (Female/Male)` correlation
<fct> <dbl>
1 Female 0.982
2 Male 0.991
# Visualize the relationship between Baseline and Post Treatment for full cohort and by Sex category
plot3 <- ggplot(data2, aes(x = `Baseline Compliance (seconds)`, y = `Post Treatment Compliance (seconds)`)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(
title = "Relationship between Baseline Compliance and Post Treatment Compliance",
x = "Baseline Compliance (seconds)",
y = "Post Treatment Compliance (seconds)"
) +
theme_minimal()
ggsave(filename = paste0(documents_dir, "Relationship_Full_Cohort.png"), plot = plot3, width = 12, height = 8)`geom_smooth()` using formula = 'y ~ x'
# Visualize the relationship by Sex category
plot4 <- ggplot(data2, aes(x = `Baseline Compliance (seconds)`, y = `Post Treatment Compliance (seconds)`, color = `Sex (Female/Male)`)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Relationship between Baseline Compliance and Post Treatment Compliance by Sex",
x = "Baseline Compliance (seconds)",
y = "Post Treatment Compliance (seconds)"
) +
theme_minimal()
ggsave(filename = paste0(documents_dir, "Relationship_By_Sex.png"), plot = plot4, width = 12, height = 8)`geom_smooth()` using formula = 'y ~ x'
# Box plot for the change in compliance by Sex (Baseline - Post-Treatment)
data2$Compliance_Change <- data2$`Post Treatment Compliance (seconds)` - data2$`Baseline Compliance (seconds)`
plot5 <- ggplot(data2, aes(x = `Sex (Female/Male)`, y = Compliance_Change, fill = `Sex (Female/Male)`)) +
geom_boxplot() +
labs(
title = "Change in Compliance by Sex",
x = "Sex",
y = "Change in Compliance (seconds)"
) +
theme_minimal()
ggsave(filename = paste0(documents_dir, "Change_By_Sex.png"), plot = plot5, width = 12, height = 8)# Load necessary libraries
library(ggplot2)
library(dplyr)
# Assuming data2 is already loaded in your R environment
data2$`Sex (Female/Male)` <- as.factor(data2$`Sex (Female/Male)`)
# Fit a linear regression model
linear_model <- lm(`Post Treatment Compliance (seconds)` ~ `Baseline Compliance (seconds)`, data = data2)
summary(linear_model)
Call:
lm(formula = `Post Treatment Compliance (seconds)` ~ `Baseline Compliance (seconds)`,
data = data2)
Residuals:
Min 1Q Median 3Q Max
-1.01780 -0.02830 0.03085 0.06742 2.60626
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.571268 0.184969 24.71 <2e-16 ***
`Baseline Compliance (seconds)` 0.722302 0.005262 137.28 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.1678 on 496 degrees of freedom
Multiple R-squared: 0.9744, Adjusted R-squared: 0.9743
F-statistic: 1.884e+04 on 1 and 496 DF, p-value: < 2.2e-16
# Fit a sigmoidal (logistic) model
sigmoid_model <- nls(`Post Treatment Compliance (seconds)` ~ SSlogis(`Baseline Compliance (seconds)`, Asym, xmid, scal), data = data2)
summary(sigmoid_model)
Formula: `Post Treatment Compliance (seconds)` ~ SSlogis(`Baseline Compliance (seconds)`,
Asym, xmid, scal)
Parameters:
Estimate Std. Error t value Pr(>|t|)
Asym 41.1991 1.1558 35.65 <2e-16 ***
xmid 24.0942 0.3441 70.02 <2e-16 ***
scal 11.2337 0.8467 13.27 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.1603 on 495 degrees of freedom
Number of iterations to convergence: 6
Achieved convergence tolerance: 1.941e-06
# Create a sequence of baseline compliance values for prediction
baseline_seq <- seq(min(data2$`Baseline Compliance (seconds)`), max(data2$`Baseline Compliance (seconds)`), length.out = 100)
# Ensure the column name is correctly referenced and exists in new_data
new_data <- data.frame(`Baseline Compliance (seconds)` = baseline_seq)
# Debugging step: Print new_data to verify
print(new_data) Baseline.Compliance..seconds.
1 30.78000
2 30.87394
3 30.96788
4 31.06182
5 31.15576
6 31.24970
7 31.34364
8 31.43758
9 31.53152
10 31.62545
11 31.71939
12 31.81333
13 31.90727
14 32.00121
15 32.09515
16 32.18909
17 32.28303
18 32.37697
19 32.47091
20 32.56485
21 32.65879
22 32.75273
23 32.84667
24 32.94061
25 33.03455
26 33.12848
27 33.22242
28 33.31636
29 33.41030
30 33.50424
31 33.59818
32 33.69212
33 33.78606
34 33.88000
35 33.97394
36 34.06788
37 34.16182
38 34.25576
39 34.34970
40 34.44364
41 34.53758
42 34.63152
43 34.72545
44 34.81939
45 34.91333
46 35.00727
47 35.10121
48 35.19515
49 35.28909
50 35.38303
51 35.47697
52 35.57091
53 35.66485
54 35.75879
55 35.85273
56 35.94667
57 36.04061
58 36.13455
59 36.22848
60 36.32242
61 36.41636
62 36.51030
63 36.60424
64 36.69818
65 36.79212
66 36.88606
67 36.98000
68 37.07394
69 37.16788
70 37.26182
71 37.35576
72 37.44970
73 37.54364
74 37.63758
75 37.73152
76 37.82545
77 37.91939
78 38.01333
79 38.10727
80 38.20121
81 38.29515
82 38.38909
83 38.48303
84 38.57697
85 38.67091
86 38.76485
87 38.85879
88 38.95273
89 39.04667
90 39.14061
91 39.23455
92 39.32848
93 39.42242
94 39.51636
95 39.61030
96 39.70424
97 39.79818
98 39.89212
99 39.98606
100 40.08000
library(ggplot2)
library(reshape2)
library(broom)
# Step 1: Calculate the difference between Baseline and Post Treatment Compliance
data_subset <- data2[, c("Baseline Compliance (seconds)", "Post Treatment Compliance (seconds)", "Sex (Female/Male)")]
data_subset <- na.omit(data_subset) # Remove rows with missing data
data_subset$Compliance_Change <- data_subset$`Post Treatment Compliance (seconds)` - data_subset$`Baseline Compliance (seconds)`
# Ensure the 'Sex (Female/Male)' column has exactly two levels: "Male" and "Female"
data_subset <- data_subset %>% filter(`Sex (Female/Male)` %in% c("Male", "Female"))
# Step 2: Perform t-tests for the full cohort and by sex
t_test_full <- t.test(data_subset$Compliance_Change)
t_test_male <- t.test(data_subset$Compliance_Change[data_subset$`Sex (Female/Male)` == "Male"])
t_test_female <- t.test(data_subset$Compliance_Change[data_subset$`Sex (Female/Male)` == "Female"])
# Extract t-values, p-values, and means
t_values <- c(Full = t_test_full$statistic, Male = t_test_male$statistic, Female = t_test_female$statistic)
p_values <- c(Full = t_test_full$p.value, Male = t_test_male$p.value, Female = t_test_female$p.value)
means <- c(Full = mean(data_subset$Compliance_Change),
Male = mean(data_subset$Compliance_Change[data_subset$`Sex (Female/Male)` == "Male"]),
Female = mean(data_subset$Compliance_Change[data_subset$`Sex (Female/Male)` == "Female"]))
# Determine if pre or post-treatment values are higher
baseline_means <- c(Full = mean(data_subset$`Baseline Compliance (seconds)`),
Male = mean(data_subset$`Baseline Compliance (seconds)`[data_subset$`Sex (Female/Male)` == "Male"]),
Female = mean(data_subset$`Baseline Compliance (seconds)`[data_subset$`Sex (Female/Male)` == "Female"]))
post_treatment_means <- c(Full = mean(data_subset$`Post Treatment Compliance (seconds)`),
Male = mean(data_subset$`Post Treatment Compliance (seconds)`[data_subset$`Sex (Female/Male)` == "Male"]),
Female = mean(data_subset$`Post Treatment Compliance (seconds)`[data_subset$`Sex (Female/Male)` == "Female"]))
direction <- ifelse(post_treatment_means > baseline_means, "Post > Pre", "Pre > Post")
# Step 3: Create a data frame for the t-values, p-values, means, and direction
t_test_results <- data.frame(
Group = c("Full Cohort", "Male", "Female"),
T_Value = t_values,
P_Value = p_values,
Mean = means,
Significant = ifelse(p_values < 0.05, "Significant", "Not Significant"),
Direction = direction
)
# Create a function to add stars based on significance
add_stars <- function(p_value) {
if (p_value < 0.001) {
"***"
} else if (p_value < 0.01) {
"**"
} else if (p_value < 0.05) {
"*"
} else {
""
}
}
# Add stars to p-values
t_test_results$P_Stars <- sapply(t_test_results$P_Value, add_stars)
# Add stars to t-values
t_test_results$T_Stars <- sapply(t_test_results$P_Value, add_stars)
# Step 4: Remove unnecessary rows and convert to long format for ggplot2
t_test_results_long <- melt(t_test_results[, c("Group", "T_Value", "P_Value", "Mean")], id.vars = "Group", variable.name = "Metric", value.name = "Value")
# Ensure that all values are numeric where appropriate
t_test_results_long$Value <- as.numeric(as.character(t_test_results_long$Value))
# Separate stars data for T-Values and P-Values
t_stars_data <- t_test_results$T_Stars
p_stars_data <- t_test_results$P_Stars
# Step 5: Create the heatmap with corrected label alignment and length consistency
heatmap <- ggplot(t_test_results_long, aes(Group, Metric, fill = as.numeric(Value))) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
limit = c(-max(abs(t_test_results_long$Value)), max(abs(t_test_results_long$Value))),
space = "Lab", name = "Value") +
geom_text(data = t_test_results_long %>% filter(Metric == "T_Value"),
aes(Group, Metric, label = sprintf("%.2f%s", as.numeric(Value), t_stars_data[match(Group, t_test_results$Group)])),
color = "black", size = 3, vjust = -0.3) +
geom_text(data = t_test_results_long %>% filter(Metric == "P_Value"),
aes(Group, Metric, label = sprintf("%.3f%s", as.numeric(Value), p_stars_data[match(Group, t_test_results$Group)])),
color = "black", size = 3, vjust = 0.3) +
geom_text(data = t_test_results_long %>% filter(Metric == "Mean"),
aes(Group, Metric, label = sprintf("%.2f", as.numeric(Value))), color = "black", size = 3, vjust = 1.3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
coord_fixed() +
labs(title = "T-Values and P-Values Heatmap for Compliance Change",
x = "Groups",
y = "Metric") +
theme(legend.title = element_text(face = "bold"),
legend.text = element_text(size = 10))
# Add the direction of the differences to the legend
heatmap <- heatmap + guides(fill = guide_legend(title = "T-Value\n(Post > Pre in Red, Pre > Post in Blue)"))
# Save the heatmap to a file (optional)
ggsave("compliance_change_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)
# Display the heatmap
print(heatmap)# Compare AIC values
AIC(linear_model)[1] -360.7006
AIC(sigmoid_model)[1] -404.8558
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Create the scatterplot with points colored by Sex
plot <- ggplot(data2, aes(x = `Baseline Compliance (seconds)`, y = `Post Treatment Compliance (seconds)`, color = `Sex (Female/Male)`)) +
geom_point() + # Scatterplot
stat_smooth(method = "nls", formula = y ~ SSlogis(x, Asym, xmid, scal), se = FALSE) +
labs(title = "Scatterplot with Sigmoid Curve",
x = "Baseline Compliance (seconds)",
y = "Post Treatment Compliance (seconds)",
color = "Sex") +
theme_minimal()
# Save the plot as a PNG file in the Documents folder
ggsave(filename = "~/Documents/scatterplot_sigmoid_curve.png", plot = plot, width = 10, height = 6, dpi = 300)Warning: Failed to fit group 1.
Caused by error in `method()`:
! number of iterations exceeded maximum of 50
print(plot)Warning: Failed to fit group 1.
Caused by error in `method()`:
! number of iterations exceeded maximum of 50
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Fit the logistic regression model
model <- nls(`Post Treatment Compliance (seconds)` ~ SSlogis(`Baseline Compliance (seconds)`, Asym, xmid, scal), data = data2)
# Summary of the model to see the parameters
summary(model)
Formula: `Post Treatment Compliance (seconds)` ~ SSlogis(`Baseline Compliance (seconds)`,
Asym, xmid, scal)
Parameters:
Estimate Std. Error t value Pr(>|t|)
Asym 41.1991 1.1558 35.65 <2e-16 ***
xmid 24.0942 0.3441 70.02 <2e-16 ***
scal 11.2337 0.8467 13.27 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.1603 on 495 degrees of freedom
Number of iterations to convergence: 6
Achieved convergence tolerance: 1.941e-06
# Function to predict post-treatment compliance based on baseline compliance
predict_compliance <- function(baseline) {
Asym <- coef(model)["Asym"]
xmid <- coef(model)["xmid"]
scal <- coef(model)["scal"]
Asym / (1 + exp((xmid - baseline) / scal))
}
##Example usage: Predicting post-treatment compliance for a new baseline compliance value
new_baseline <- 35 # Replace this with your actual baseline compliance value
predicted_post_treatment <- predict_compliance(new_baseline)
print(predicted_post_treatment) Asym
29.88098
library(ggplot2)
library(reshape2)
library(psych)Warning: package 'psych' was built under R version 4.4.2
Attaching package: 'psych'
The following object is masked from 'package:Hmisc':
describe
The following objects are masked from 'package:ggplot2':
%+%, alpha
library(FactoMineR)Warning: package 'FactoMineR' was built under R version 4.4.2
library(factoextra)Warning: package 'factoextra' was built under R version 4.4.2
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Step 1: Prepare Data
vars <- c(
"Baseline Compliance (seconds)",
"Heart Rate (Beats Per Minute)",
"IRT (Celsius)",
"Blood Cortisol Level (mcg/dl)",
"Weight (kg)",
"Sex (Female/Male)",
"Ambient Air Temperature (Celcius)"
)
data_subset <- data2[, vars]
data_subset <- na.omit(data_subset) # Remove rows with missing data
# Encode Sex as a numeric variable for PCA
data_subset$`Sex (Female/Male)` <- as.numeric(factor(data_subset$`Sex (Female/Male)`, levels = c("Female", "Male")))
# Step 2: Perform PCA
pca_result <- PCA(data_subset, scale.unit = TRUE, graph = FALSE)
# Step 3: Interpret Results
# Print PCA summary to see the variance explained by each component
print(pca_result)**Results for the Principal Component Analysis (PCA)**
The analysis was performed on 498 individuals, described by 7 variables
*The results are available in the following objects:
name description
1 "$eig" "eigenvalues"
2 "$var" "results for the variables"
3 "$var$coord" "coord. for the variables"
4 "$var$cor" "correlations variables - dimensions"
5 "$var$cos2" "cos2 for the variables"
6 "$var$contrib" "contributions of the variables"
7 "$ind" "results for the individuals"
8 "$ind$coord" "coord. for the individuals"
9 "$ind$cos2" "cos2 for the individuals"
10 "$ind$contrib" "contributions of the individuals"
11 "$call" "summary statistics"
12 "$call$centre" "mean of the variables"
13 "$call$ecart.type" "standard error of the variables"
14 "$call$row.w" "weights for the individuals"
15 "$call$col.w" "weights for the variables"
# Step 4: Create Visuals
# Scree Plot
fviz_eig(pca_result, addlabels = TRUE, ylim = c(0, 50)) +
labs(title = "Scree Plot", x = "Principal Components", y = "Percentage of Variance Explained")# Biplot
fviz_pca_biplot(pca_result,
repel = TRUE,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, label = "var") +
labs(title = "PCA Biplot")# Step 5: Extract PCA results for detailed analysis
# Contribution of variables to PCs
contrib <- pca_result$var$contrib
# Print the contributions to assess which variables are most influential in the PCs
print(contrib) Dim.1 Dim.2 Dim.3
Baseline Compliance (seconds) 48.777304622 0.2744583 0.13435490
Heart Rate (Beats Per Minute) 47.205924420 0.2661722 0.36434048
IRT (Celsius) 0.080662520 41.5720099 5.44239408
Blood Cortisol Level (mcg/dl) 0.001988482 18.4253230 39.86148730
Weight (kg) 0.004983144 14.5214501 27.69799434
Sex (Female/Male) 3.014068274 8.5945862 0.07459062
Ambient Air Temperature (Celcius) 0.915068538 16.3460004 26.42483828
Dim.4 Dim.5
Baseline Compliance (seconds) 0.3181619 0.01761312
Heart Rate (Beats Per Minute) 3.3724379 0.00172521
IRT (Celsius) 4.4977211 1.22601161
Blood Cortisol Level (mcg/dl) 0.7131504 0.12078170
Weight (kg) 12.1585346 42.27662004
Sex (Female/Male) 78.2268230 8.93084365
Ambient Air Temperature (Celcius) 0.7131711 47.42640465
library(FactoMineR)
library(factoextra)
# Step 1: Prepare Data
data_subset <- data2[, c("Baseline Compliance (seconds)", "Heart Rate (Beats Per Minute)", "IRT (Celsius)", "Blood Cortisol Level (mcg/dl)")]
data_subset <- na.omit(data_subset) # Remove rows with missing data
# Step 2: Perform PCA
pca_result <- PCA(data_subset, scale.unit = TRUE, graph = FALSE)
# Step 3: Extract Principal Components
# Get the scores for the first two principal components
pc_scores <- pca_result$ind$coord[, 1:2]
# Convert to a data frame and add to the original data
pc_scores_df <- as.data.frame(pc_scores)
colnames(pc_scores_df) <- c("PC1_Score", "PC2_Score")
# Add the scores to the original dataset
data_combined <- cbind(data_subset, pc_scores_df)
# Step 4: Standardize the scores
data_combined$PC1_Standardized <- scale(data_combined$PC1_Score)
data_combined$PC2_Standardized <- scale(data_combined$PC2_Score)
# Step 5: Create the Combined Stress Score
data_combined$Stress_Score <- rowMeans(data_combined[, c("PC1_Standardized", "PC2_Standardized")])
# View the first few rows of the dataset with the combined stress score
head(data_combined) Baseline Compliance (seconds) Heart Rate (Beats Per Minute) IRT (Celsius)
1 37.17 153.2263 37.73957
2 34.55 149.6390 35.68413
3 36.33 149.4406 34.79854
4 35.34 150.2971 36.15973
5 37.40 149.0318 33.61477
6 33.55 146.8415 36.38343
Blood Cortisol Level (mcg/dl) PC1_Score PC2_Score PC1_Standardized
1 64.11183 2.0297384 0.45015896 1.5477482
2 73.68676 -0.4574603 -0.28182430 -0.3488299
3 54.43466 0.3960235 -1.49251301 0.3019821
4 86.32615 0.1389429 0.48806683 0.1059489
5 107.97796 0.7861604 0.09535008 0.5994754
6 108.86475 -1.9762299 1.41777556 -1.5069460
PC2_Standardized Stress_Score
1 0.4233912 0.9855697
2 -0.2650662 -0.3069481
3 -1.4037639 -0.5508909
4 0.4590450 0.2824969
5 0.0896803 0.3445779
6 1.3334706 -0.0867377
# Histogram of Combined Stress Score
histogram <- ggplot(data_combined, aes(x = Stress_Score)) +
geom_histogram(binwidth = 0.5, fill = "#00AFBB", color = "black", alpha = 0.7) +
labs(title = "Distribution of Combined Stress Score", x = "Stress Score", y = "Frequency") +
theme_minimal()
ggsave("histogram_combined_stress_score.png", histogram, width = 8, height = 6)
# Display Histogram
print(histogram)library(ggplot2)
library(Hmisc)
# Calculate correlations and p-values
corr_test <- rcorr(as.matrix(data_combined[, c("Stress_Score", "Baseline Compliance (seconds)", "Heart Rate (Beats Per Minute)", "IRT (Celsius)", "Blood Cortisol Level (mcg/dl)")]), type = "pearson")
# Extract r-values and p-values
r_values <- corr_test$r
p_values <- corr_test$P
# Filter for rows/columns related to Stress_Score, and ensure the correct indices are used
r_values_filtered <- r_values["Stress_Score", -which(colnames(r_values) == "Stress_Score")]
p_values_filtered <- p_values["Stress_Score", -which(colnames(p_values) == "Stress_Score")]
# Convert to data frames and ensure rownames are preserved
r_values_df <- data.frame(Variable = names(r_values_filtered), R_Value = as.numeric(r_values_filtered), stringsAsFactors = FALSE)
p_values_df <- data.frame(Variable = names(p_values_filtered), P_Value = as.numeric(p_values_filtered), stringsAsFactors = FALSE)
# Function to add significance stars for p-values
add_p_stars <- function(p_value) {
if (p_value < 0.001) {
"***"
} else if (p_value < 0.01) {
"**"
} else if (p_value < 0.05) {
"*"
} else {
""
}
}
# Function to add significance stars for r-values based on the strength of correlation
add_r_stars <- function(r_value) {
if (abs(r_value) >= 0.80) {
"****"
} else if (abs(r_value) >= 0.60) {
"***"
} else if (abs(r_value) >= 0.40) {
"**"
} else if (abs(r_value) >= 0.20) {
"*"
} else {
""
}
}
# Add stars to p-values
p_values_df$Stars <- sapply(p_values_df$P_Value, add_p_stars)
# Add stars to r-values
r_values_df$R_Stars <- sapply(r_values_df$R_Value, add_r_stars)
# Merge r-values and p-values data frames
results_df <- merge(r_values_df, p_values_df, by = "Variable")
# Visualize the table
table_plot <- ggplot(results_df, aes(x = Variable)) +
geom_tile(aes(y = 1, fill = as.factor(R_Stars)), color = "white") +
geom_text(aes(y = 1, label = sprintf("%.2f%s", R_Value, R_Stars)), hjust = 0.5, vjust = 0.5, color = "black") +
geom_text(aes(y = 2, label = sprintf("%.3f%s", P_Value, Stars)), hjust = 0.5, vjust = 0.5, color = "black") +
scale_fill_manual(values = c(
"*" = "#FFFFCC",
"**" = "#FFD700",
"***" = "#FFA500",
"****" = "#FF4500"
)) +
scale_y_continuous(breaks = 1:2, labels = c("R Value", "P Value")) +
labs(title = "Correlation of Combined Stress Score with Individual Stress Measures",
x = "Variables",
y = "") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(size = 12, face = "bold"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
legend.position = "none")
ggsave("correlation_table_combined_stress_score.png", table_plot, width = 10, height = 5)
# Display the table
print(table_plot)