Horse Tidy

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

1 + 1
[1] 2

You can add options to executable code like this

[1] 4

The echo: false option disables the printing of code (only output is displayed).

data2 <- read.delim("C:/Users/ch11574/data2.txt", header = TRUE, sep = "\t")

original_names <- c("ID", "Sex (Female/Male)", "Baseline Compliance (seconds)", "Post Treatment Compliance (seconds)",
                    "Weight (kg)", "IRT (Celsius)", "Ambient Air Temperature (Celcius)",
                    "Blood Cortisol Level (mcg/dl)", "Heart Rate (Beats Per Minute)", "Standardised Stress Score",
                    "Standardised_Stress_Score_Abs", "Standardised_Stress_Score_Scaled", "Sex", "Compliance_Change")

current_names <- colnames(data2)

# Create a named vector for renaming
rename_vector <- setNames(original_names, current_names)

# Rename columns
colnames(data2) <- rename_vector[colnames(data2)]

# Check the column names to ensure they are correctly renamed
print(names(data2))
 [1] "ID"                                  "Sex (Female/Male)"                  
 [3] "Baseline Compliance (seconds)"       "Post Treatment Compliance (seconds)"
 [5] "Weight (kg)"                         "IRT (Celsius)"                      
 [7] "Ambient Air Temperature (Celcius)"   "Blood Cortisol Level (mcg/dl)"      
 [9] "Heart Rate (Beats Per Minute)"       "Standardised Stress Score"          
[11] "Standardised_Stress_Score_Abs"       "Standardised_Stress_Score_Scaled"   
[13] "Sex"                                 "Compliance_Change"                  
library(GGally)
Warning: package 'GGally' was built under R version 4.4.2
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 4.4.2
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
library(ggplot2)
library(dplyr)
Warning: package 'dplyr' was built under R version 4.4.2

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# Check for missing values and handle them
data2 <- data2 %>% na.omit()  # Remove rows with missing values

# Ensure the columns have the correct types
data2$`Sex (Female/Male)` <- as.factor(data2$`Sex (Female/Male)`)  # Ensure 'Sex' is a factor (categorical)

# Specify columns to include in the pairwise plot, including 'Sex'
columns_to_plot <- c("Baseline Compliance (seconds)",  "Heart Rate (Beats Per Minute)", "IRT (Celsius)", 
                     "Blood Cortisol Level (mcg/dl)", "Ambient Air Temperature (Celcius)", "Weight (kg)", 
                     "Sex (Female/Male)")

# Ensure the columns exist in the dataset
columns_to_plot <- columns_to_plot[columns_to_plot %in% colnames(data2)]

# Custom function for boxplots (with outliers)
custom_boxplot <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) +
    geom_boxplot(outlier.shape = 16, outlier.size = 2, alpha = 0.7) +  # Add boxplot with outliers
    theme_minimal()                                           # Minimal theme
}

# Generate the pairwise plot
pairwise_plot <- ggpairs(
  data2,
  aes(color = `Sex (Female/Male)`),  # Group by 'Sex'
  columns = columns_to_plot,  # Columns to include, including 'Sex'
  lower = list(
    continuous = "smooth",  # Regression lines for lower panels
    combo = "facetdensity",  # Density for combos
    discrete = "facetbar"  # Bars for discrete variables
  ),
  upper = list(
    continuous = wrap("cor", size = 3, digits = 2),  # Correlation in upper panels
    combo = custom_boxplot  # Use custom boxplot function for combos
  ),
  diag = list(
    continuous = "densityDiag",  # Density on diagonal
    discrete = "barDiag"  # Bars for discrete variables
  )
)

# Customize the theme
pairwise_plot <- pairwise_plot +
  theme_minimal() +
  theme(panel.grid = element_blank())

# Save the plot to a file (optional)
ggsave("pairwise_plot_with_boxplots_clean.png", pairwise_plot, width = 12, height = 8)

# Display the plot
print(pairwise_plot)

library(ggplot2)
library(reshape2)
Warning: package 'reshape2' was built under R version 4.4.2
library(Hmisc)
Warning: package 'Hmisc' was built under R version 4.4.2

Attaching package: 'Hmisc'
The following objects are masked from 'package:dplyr':

    src, summarize
The following objects are masked from 'package:base':

    format.pval, units
library(dplyr)

# Assume data2 is already loaded and contains the required columns
# Calculate correlations and p-values
corr_test <- rcorr(as.matrix(data2[, c("Baseline Compliance (seconds)",  "Heart Rate (Beats Per Minute)", "IRT (Celsius)", 
                     "Blood Cortisol Level (mcg/dl)", "Ambient Air Temperature (Celcius)", "Weight (kg)")]))

# Extract p-values
p_matrix <- corr_test$P

# Convert the p-values matrix to a long format data frame
p_values_long <- melt(p_matrix, na.rm = TRUE)

# Filter out duplicate combinations (upper triangle)
p_values_long <- p_values_long %>%
  filter(as.integer(Var1) < as.integer(Var2))

# Create a new column to indicate significance
p_values_long$significant <- ifelse(p_values_long$value < 0.05, "Significant", "Not Significant")

# Create the heatmap, coloring only significant p-values
heatmap <- ggplot(p_values_long, aes(Var1, Var2, fill = significant)) +
  geom_tile(color = "white") +
  scale_fill_manual(values = c("Significant" = "red", "Not Significant" = "grey90"), name = "Significance") +
  geom_text(aes(label = sprintf("%.3f", value)), color = "black", size = 3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
  coord_fixed() +
  labs(title = "P-Value Heatmap with Significant Values Highlighted", x = "Variables", y = "Variables")

# Save the heatmap to a file (optional)
ggsave("significant_p_value_heatmap.png", heatmap, width = 10, height = 8, dpi = 300)

# Display the heatmap
print(heatmap)

library(ggplot2)
library(reshape2)
library(Hmisc)

# Subset data to only include numeric columns
vars <- c(
  "Baseline Compliance (seconds)",
  "Heart Rate (Beats Per Minute)",
  "IRT (Celsius)",
  "Blood Cortisol Level (mcg/dl)",
  "Ambient Air Temperature (Celcius)",
  "Weight (kg)"
)

# Ensure we're working only with numeric data
data_subset <- data2[, c(vars, "Sex (Female/Male)")]
data_subset <- na.omit(data_subset)  # Remove rows with missing data

# Split the data by Sex
data_male <- data_subset %>% filter(`Sex (Female/Male)` == "Male")
data_female <- data_subset %>% filter(`Sex (Female/Male)` == "Female")

# Calculate correlation results using rcorr() function for each group
correlation_results_male <- rcorr(as.matrix(data_male[, vars]), type = "pearson")
correlation_results_female <- rcorr(as.matrix(data_female[, vars]), type = "pearson")

# Extract p-values matrix (p) for each group
p_matrix_male <- correlation_results_male$P
p_matrix_female <- correlation_results_female$P

# Convert the p-values matrices to long format data frames
p_values_long_male <- melt(p_matrix_male, na.rm = TRUE)
p_values_long_female <- melt(p_matrix_female, na.rm = TRUE)

# Filter out duplicate combinations (upper triangle) for both matrices
p_values_long_male <- p_values_long_male %>% filter(as.integer(Var1) < as.integer(Var2))
p_values_long_female <- p_values_long_female %>% filter(as.integer(Var1) < as.integer(Var2))

# Add a new column to indicate significance for both data frames
p_values_long_male$significant <- ifelse(p_values_long_male$value < 0.05, "Significant", "Not Significant")
p_values_long_female$significant <- ifelse(p_values_long_female$value < 0.05, "Significant", "Not Significant")

# Combine the p-values with indication of sex
p_values_long_male$Sex <- "Male"
p_values_long_female$Sex <- "Female"
p_values_combined <- merge(p_values_long_male, p_values_long_female, by = c("Var1", "Var2"), suffixes = c("_Male", "_Female"))

# Create a combined significance column
p_values_combined$significance <- apply(p_values_combined, 1, function(row) {
  male_sig <- ifelse(row["significant_Male"] == "Significant", "M", "")
  female_sig <- ifelse(row["significant_Female"] == "Significant", "F", "")
  paste0(male_sig, female_sig)
})

# Create the heatmap
heatmap <- ggplot(p_values_combined, aes(Var1, Var2)) +
  geom_tile(aes(fill = significance), color = "white") +
  scale_fill_manual(values = c("M" = "blue", "F" = "pink", "MF" = "purple", "Not Significant" = "grey90"),
                    name = "Significance",
                    labels = c("M" = "Male", "F" = "Female", "MF" = "Both", "Not Significant" = "None")) +
  geom_text(aes(label = sprintf("M: %.3f\nF: %.3f", value_Male, value_Female)), color = "black", size = 3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
  coord_fixed() +
  labs(title = "P-Value Heatmap: Male and Female Comparison",
       x = "Variables",
       y = "Variables")

# Save the heatmap to a file (optional)
ggsave("merged_significant_p_value_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)

# Display the heatmap
print(heatmap)

library(ggplot2)
library(reshape2)
library(broom)

# Step 1: Subset data to only include the specified numeric columns
vars <- c(
  "Baseline Compliance (seconds)",
  "Heart Rate (Beats Per Minute)",
  "IRT (Celsius)",
  "Blood Cortisol Level (mcg/dl)",
  "Ambient Air Temperature (Celcius)",
  "Weight (kg)"
)

# Ensure we're working only with numeric data
data_subset <- data2[, c(vars, "Sex (Female/Male)"), drop = FALSE]
data_subset <- na.omit(data_subset)  # Remove rows with missing data

# Step 2: Perform t-tests for each continuous variable
t_tests <- lapply(vars, function(var) t.test(data_subset[[var]] ~ data_subset$`Sex (Female/Male)`))

# Extract t-values and p-values from the t-tests
t_values <- sapply(t_tests, function(test) test$statistic)
p_values <- sapply(t_tests, function(test) test$p.value)
means_male <- sapply(vars, function(var) mean(data_subset[data_subset$`Sex (Female/Male)` == "Male", var], na.rm = TRUE))
means_female <- sapply(vars, function(var) mean(data_subset[data_subset$`Sex (Female/Male)` == "Female", var], na.rm = TRUE))

# Step 3: Create a data frame for the t-values and p-values
t_test_results <- data.frame(
  Variable = vars,
  T_Value = t_values,
  P_Value = p_values,
  Significant = ifelse(p_values < 0.05, "Significant", "Not Significant"),
  Mean_Male = means_male,
  Mean_Female = means_female,
  Direction = ifelse(means_male > means_female, "Male > Female", "Female > Male")
)

# Step 4: Convert the data frame to long format for ggplot2
t_test_results_long <- melt(t_test_results, id.vars = "Variable", variable.name = "Metric", value.name = "Value")

# Ensure that all values are numeric
t_test_results_long$Value <- as.numeric(as.character(t_test_results_long$Value))
Warning: NAs introduced by coercion
# Create a function to add stars based on significance
add_stars <- function(p_value) {
  if (p_value < 0.001) {
    "***"
  } else if (p_value < 0.01) {
    "**"
  } else if (p_value < 0.05) {
    "*"
  } else {
    ""
  }
}

# Add stars to p-values
p_values_data <- t_test_results_long %>% 
  filter(Metric == "P_Value") %>%
  mutate(Stars = sapply(Value, add_stars))

# Add stars to t-values based on p-value thresholds
t_values_data <- t_test_results_long %>% 
  filter(Metric == "T_Value") %>%
  mutate(Stars = sapply(t_test_results_long %>% filter(Metric == "P_Value") %>% .$Value, add_stars))

# Step 5: Create the heatmap
heatmap <- ggplot() +
  geom_tile(data = t_values_data, aes(Variable, Metric, fill = as.numeric(Value)), color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
                       limit = c(-max(abs(t_values_data$Value)), max(abs(t_values_data$Value))), 
                       space = "Lab", name = "T-Value") +
  geom_text(data = t_values_data, aes(Variable, Metric, label = sprintf("%.2f%s", as.numeric(Value), Stars)), color = "black", size = 3) +
  geom_text(data = p_values_data, aes(Variable, Metric, label = sprintf("%.3f%s", as.numeric(Value), Stars)), color = "black", size = 3, nudge_y = 0.3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
  coord_fixed() +
  labs(title = "T-Values Heatmap with Significant Differences Highlighted",
       x = "Variables",
       y = "Metric") +
  theme(legend.title = element_text(face = "bold"),
        legend.text = element_text(size = 10))

# Add the direction of the differences to the legend
heatmap <- heatmap + guides(fill = guide_legend(title = "T-Value\n(Male > Female in Blue, Female > Male in Red)"))

# Save the heatmap to a file (optional)
ggsave("t_values_significant_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)

# Display the heatmap
print(heatmap)

library(GGally)
library(ggplot2)
library(dplyr)

# Check for missing values and handle them
data2 <- data2 %>% na.omit()  # Remove rows with missing values

# Ensure the columns have the correct types
data2$`Sex (Female/Male)` <- as.factor(data2$`Sex (Female/Male)`)  # Ensure 'Sex' is a factor (categorical)

# Specify columns to include in the pairwise plot, including 'Sex'
columns_to_plot <- c("Baseline Compliance (seconds)", "Post Treatment Compliance (seconds)", "Compliance_Change", "Standardised Stress Scrore", "Weight (kg)",  "Sex (Female/Male)")

# Ensure the columns exist in the dataset
columns_to_plot <- columns_to_plot[columns_to_plot %in% colnames(data2)]

# Custom function for boxplots (without individual data points)
custom_boxplot <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) +
    geom_boxplot(outlier.shape = 16, outlier.size = 2, alpha = 0.7) +  # Add boxplot with outliers
    theme_minimal()                                            # Minimal theme
}

# Generate the pairwise plot
pairwise_plot <- ggpairs(
  data2,
  aes(color = `Sex (Female/Male)`),  # Group by 'Sex'
  columns = columns_to_plot,  # Columns to include, including 'Sex'
  lower = list(
    continuous = "smooth",  # Regression lines for lower panels
    combo = "facetdensity",  # Density for combos
    discrete = "facetbar"  # Bars for discrete variables
  ),
  upper = list(
    continuous = wrap("cor", size = 3, digits = 2),  # Correlation in upper panels
    combo = custom_boxplot  # Use custom boxplot function for combos
  ),
  diag = list(
    continuous = "densityDiag",  # Density on diagonal
    discrete = "barDiag"  # Bars for discrete variables
  )
)

# Customize the theme
pairwise_plot <- pairwise_plot +
  theme_minimal() +
  theme(panel.grid = element_blank())

# Save the plot to a file (optional)
ggsave("pairwise_plot_with_boxplots_clean_POST.png", pairwise_plot, width = 12, height = 8)

# Display the plot
print(pairwise_plot)

library(ggplot2)
library(reshape2)
library(Hmisc)

# Step 1: Subset data to only include the specified numeric columns
vars <- c(
  "Baseline Compliance (seconds)",
  "Post Treatment Compliance (seconds)",
  "Compliance_Change",
  
  "Weight (kg)"
)

# Ensure we're working only with numeric data
data_subset <- data2[, vars]
data_subset <- na.omit(data_subset)  # Remove rows with missing data

# Step 2: Calculate correlation results using rcorr() function
correlation_results <- rcorr(as.matrix(data_subset), type = "pearson")

# Step 3: Extract p-values matrix (p)
p_matrix <- correlation_results$P

# Step 4: Convert the p-values matrix to a long format data frame
p_values_long <- melt(p_matrix, na.rm = TRUE)

# Filter out duplicate combinations (upper triangle)
p_values_long <- p_values_long %>% filter(as.integer(Var1) < as.integer(Var2))

# Create a new column to indicate significance
p_values_long$significant <- ifelse(p_values_long$value < 0.05, "Significant", "Not Significant")

# Create the heatmap, coloring only significant p-values
heatmap <- ggplot(p_values_long, aes(Var1, Var2, fill = significant)) +
  geom_tile(color = "white") +
  scale_fill_manual(values = c("Significant" = "red", "Not Significant" = "grey90"), name = "Significance") +
  geom_text(aes(label = sprintf("%.3f", value)), color = "black", size = 3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
  coord_fixed() +
  labs(title = "P-Value Heatmap with Significant Values Highlighted",
       x = "Variables",
       y = "Variables")

# Save the heatmap to a file (optional)
ggsave("whole_cohort_significant_p_value_heatmap.png", heatmap, width = 10, height = 8, dpi = 300)

# Display the heatmap
print(heatmap)

library(ggplot2)
library(reshape2)
library(Hmisc)

# Subset data to only include numeric columns
vars <- c(
"Baseline Compliance (seconds)",
  "Post Treatment Compliance (seconds)",
  "Compliance_Change",
    "Weight (kg)")

# Ensure we're working only with numeric data
data_subset <- data2[, c(vars, "Sex (Female/Male)")]
data_subset <- na.omit(data_subset)  # Remove rows with missing data

# Split the data by Sex
data_male <- data_subset %>% filter(`Sex (Female/Male)` == "Male")
data_female <- data_subset %>% filter(`Sex (Female/Male)` == "Female")

# Calculate correlation results using rcorr() function for each group
correlation_results_male <- rcorr(as.matrix(data_male[, vars]), type = "pearson")
correlation_results_female <- rcorr(as.matrix(data_female[, vars]), type = "pearson")

# Extract p-values matrix (p) for each group
p_matrix_male <- correlation_results_male$P
p_matrix_female <- correlation_results_female$P

# Convert the p-values matrices to long format data frames
p_values_long_male <- melt(p_matrix_male, na.rm = TRUE)
p_values_long_female <- melt(p_matrix_female, na.rm = TRUE)

# Filter out duplicate combinations (upper triangle) for both matrices
p_values_long_male <- p_values_long_male %>% filter(as.integer(Var1) < as.integer(Var2))
p_values_long_female <- p_values_long_female %>% filter(as.integer(Var1) < as.integer(Var2))

# Add a new column to indicate significance for both data frames
p_values_long_male$significant <- ifelse(p_values_long_male$value < 0.05, "Significant", "Not Significant")
p_values_long_female$significant <- ifelse(p_values_long_female$value < 0.05, "Significant", "Not Significant")

# Combine the p-values with indication of sex
p_values_long_male$Sex <- "Male"
p_values_long_female$Sex <- "Female"
p_values_combined <- merge(p_values_long_male, p_values_long_female, by = c("Var1", "Var2"), suffixes = c("_Male", "_Female"))

# Create a combined significance column
p_values_combined$significance <- apply(p_values_combined, 1, function(row) {
  male_sig <- ifelse(row["significant_Male"] == "Significant", "M", "")
  female_sig <- ifelse(row["significant_Female"] == "Significant", "F", "")
  paste0(male_sig, female_sig)
})

# Create the heatmap
heatmap <- ggplot(p_values_combined, aes(Var1, Var2)) +
  geom_tile(aes(fill = significance), color = "white") +
  scale_fill_manual(values = c("M" = "blue", "F" = "pink", "MF" = "purple", "Not Significant" = "grey90"),
                    name = "Significance",
                    labels = c("M" = "Male", "F" = "Female", "MF" = "Both", "Not Significant" = "None")) +
  geom_text(aes(label = sprintf("M: %.3f\nF: %.3f", value_Male, value_Female)), color = "black", size = 3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
  coord_fixed() +
  labs(title = "P-Value Heatmap: Male and Female Comparison",
       x = "Variables",
       y = "Variables")

# Save the heatmap to a file (optional)
ggsave("merged_significant_p_value_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)

# Display the heatmap
print(heatmap)

library(ggplot2)
library(reshape2)
library(broom)

# Step 1: Subset data to only include the specified numeric columns
vars <- c(
  "Baseline Compliance (seconds)",
  "Post Treatment Compliance (seconds)",
  "Compliance_Change",
  "Standardised Stress Score",
  "Weight (kg)"
)

# Ensure we're working only with numeric data
data_subset <- data2[, c(vars, "Sex (Female/Male)"), drop = FALSE]
data_subset <- na.omit(data_subset)  # Remove rows with missing data

# Step 2: Perform t-tests for each continuous variable
t_tests <- lapply(vars, function(var) t.test(data_subset[[var]] ~ data_subset$`Sex (Female/Male)`))

# Extract t-values and p-values from the t-tests
t_values <- sapply(t_tests, function(test) test$statistic)
p_values <- sapply(t_tests, function(test) test$p.value)
means_male <- sapply(vars, function(var) mean(data_subset[data_subset$`Sex (Female/Male)` == "Male", var], na.rm = TRUE))
means_female <- sapply(vars, function(var) mean(data_subset[data_subset$`Sex (Female/Male)` == "Female", var], na.rm = TRUE))

# Step 3: Create a data frame for the t-values and p-values
t_test_results <- data.frame(
  Variable = vars,
  T_Value = t_values,
  P_Value = p_values,
  Significant = ifelse(p_values < 0.05, "Significant", "Not Significant"),
  Mean_Male = means_male,
  Mean_Female = means_female,
  Direction = ifelse(means_male > means_female, "Male > Female", "Female > Male")
)

# Step 4: Convert the data frame to long format for ggplot2
t_test_results_long <- melt(t_test_results, id.vars = "Variable", variable.name = "Metric", value.name = "Value")

# Ensure that all values are numeric
t_test_results_long$Value <- as.numeric(as.character(t_test_results_long$Value))
Warning: NAs introduced by coercion
# Create a function to add stars based on significance
add_stars <- function(p_value) {
  if (p_value < 0.001) {
    "***"
  } else if (p_value < 0.01) {
    "**"
  } else if (p_value < 0.05) {
    "*"
  } else {
    ""
  }
}

# Add stars to p-values
p_values_data <- t_test_results_long %>% 
  filter(Metric == "P_Value") %>%
  mutate(Stars = sapply(Value, add_stars))

# Add stars to t-values based on p-value thresholds
t_values_data <- t_test_results_long %>% 
  filter(Metric == "T_Value") %>%
  mutate(Stars = sapply(t_test_results_long %>% filter(Metric == "P_Value") %>% .$Value, add_stars))

# Step 5: Create the heatmap
heatmap <- ggplot() +
  geom_tile(data = t_values_data, aes(Variable, Metric, fill = as.numeric(Value)), color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
                       limit = c(-max(abs(t_values_data$Value)), max(abs(t_values_data$Value))), 
                       space = "Lab", name = "T-Value") +
  geom_text(data = t_values_data, aes(Variable, Metric, label = sprintf("%.2f%s", as.numeric(Value), Stars)), color = "black", size = 3) +
  geom_text(data = p_values_data, aes(Variable, Metric, label = sprintf("%.3f%s", as.numeric(Value), Stars)), color = "black", size = 3, nudge_y = 0.3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
  coord_fixed() +
  labs(title = "T-Values Heatmap with Significant Differences Highlighted",
       x = "Variables",
       y = "Metric") +
  theme(legend.title = element_text(face = "bold"),
        legend.text = element_text(size = 10))

# Add the direction of the differences to the legend
heatmap <- heatmap + guides(fill = guide_legend(title = "T-Value\n(Male > Female in Blue, Female > Male in Red)"))

# Save the heatmap to a file (optional)
ggsave("t_values_significant_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)

# Display the heatmap
print(heatmap)

# Load necessary libraries
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)
Warning: package 'tidyr' was built under R version 4.4.2

Attaching package: 'tidyr'
The following object is masked from 'package:reshape2':

    smiths
library(corrplot)
corrplot 0.95 loaded
# Assuming data2 is already loaded in your R environment
# Convert 'Sex (Female/Male)' column to a factor if it's not already
data2$`Sex (Female/Male)` <- as.factor(data2$`Sex (Female/Male)`)

# Summary statistics for Baseline Compliance and Post Treatment Compliance
summary_stats <- data2 %>%
  summarise(
    baseline_mean = mean(`Baseline Compliance (seconds)`, na.rm = TRUE),
    baseline_sd = sd(`Baseline Compliance (seconds)`, na.rm = TRUE),
    post_treatment_mean = mean(`Post Treatment Compliance (seconds)`, na.rm = TRUE),
    post_treatment_sd = sd(`Post Treatment Compliance (seconds)`, na.rm = TRUE)
  )
print(summary_stats)
  baseline_mean baseline_sd post_treatment_mean post_treatment_sd
1       35.1249    1.430335            29.94207          1.046642
# Create a new data frame for long format to use in box plots
data_long <- data2 %>%
  gather(key = "Treatment_Phase", value = "Compliance", 
         `Baseline Compliance (seconds)`, `Post Treatment Compliance (seconds)`)

# Define your Documents directory
documents_dir <- "~/Documents/"

# Box plot for the whole cohort: Baseline vs Post Treatment Compliance
plot1 <- ggplot(data_long, aes(x = Treatment_Phase, y = Compliance, fill = Treatment_Phase)) +
  geom_boxplot() +
  labs(
    title = "Comparison of Baseline and Post Treatment Compliance (Whole Cohort)",
    x = "Treatment Phase",
    y = "Compliance (seconds)"
  ) +
  theme_minimal()
ggsave(filename = paste0(documents_dir, "Whole_Cohort_Box_Plot.png"), plot = plot1, width = 12, height = 8)

# Box plot by Sex: Baseline vs Post Treatment Compliance for each gender
plot2 <- ggplot(data_long, aes(x = Treatment_Phase, y = Compliance, fill = Treatment_Phase)) +
  geom_boxplot() +
  facet_wrap(~`Sex (Female/Male)`) +
  labs(
    title = "Comparison of Baseline and Post Treatment Compliance by Sex",
    x = "Treatment Phase",
    y = "Compliance (seconds)"
  ) +
  theme_minimal()
ggsave(filename = paste0(documents_dir, "Sex_Box_Plot.png"), plot = plot2, width = 12, height = 8)

# Statistical comparison: Paired t-test for the whole cohort
paired_t_test_cohort <- t.test(
  data2$`Baseline Compliance (seconds)`, 
  data2$`Post Treatment Compliance (seconds)`,
  paired = TRUE
)
print(paired_t_test_cohort)

    Paired t-test

data:  data2$`Baseline Compliance (seconds)` and data2$`Post Treatment Compliance (seconds)`
t = 268.28, df = 497, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 5.144875 5.220788
sample estimates:
mean difference 
       5.182831 
# Statistical comparison: Paired t-test by Sex
paired_t_test_sex <- data2 %>%
  group_by(`Sex (Female/Male)`) %>%
  summarise(
    t_test_result = list(t.test(
      `Baseline Compliance (seconds)`, 
      `Post Treatment Compliance (seconds)`, 
      paired = TRUE
    ))
  )
print(paired_t_test_sex)
# A tibble: 2 × 2
  `Sex (Female/Male)` t_test_result
  <fct>               <list>       
1 Female              <htest>      
2 Male                <htest>      
# Correlation analysis for the whole cohort
correlation_full_cohort <- cor(data2$`Baseline Compliance (seconds)`, data2$`Post Treatment Compliance (seconds)`, use = "complete.obs")
print(paste("Correlation for the full cohort: ", round(correlation_full_cohort, 2)))
[1] "Correlation for the full cohort:  0.99"
# Correlation analysis by Sex category
correlation_by_sex <- data2 %>%
  group_by(`Sex (Female/Male)`) %>%
  summarise(
    correlation = cor(`Baseline Compliance (seconds)`, `Post Treatment Compliance (seconds)`, use = "complete.obs")
  )
print(correlation_by_sex)
# A tibble: 2 × 2
  `Sex (Female/Male)` correlation
  <fct>                     <dbl>
1 Female                    0.982
2 Male                      0.991
# Visualize the relationship between Baseline and Post Treatment for full cohort and by Sex category
plot3 <- ggplot(data2, aes(x = `Baseline Compliance (seconds)`, y = `Post Treatment Compliance (seconds)`)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(
    title = "Relationship between Baseline Compliance and Post Treatment Compliance",
    x = "Baseline Compliance (seconds)",
    y = "Post Treatment Compliance (seconds)"
  ) +
  theme_minimal()
ggsave(filename = paste0(documents_dir, "Relationship_Full_Cohort.png"), plot = plot3, width = 12, height = 8)
`geom_smooth()` using formula = 'y ~ x'
# Visualize the relationship by Sex category
plot4 <- ggplot(data2, aes(x = `Baseline Compliance (seconds)`, y = `Post Treatment Compliance (seconds)`, color = `Sex (Female/Male)`)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "Relationship between Baseline Compliance and Post Treatment Compliance by Sex",
    x = "Baseline Compliance (seconds)",
    y = "Post Treatment Compliance (seconds)"
  ) +
  theme_minimal()
ggsave(filename = paste0(documents_dir, "Relationship_By_Sex.png"), plot = plot4, width = 12, height = 8)
`geom_smooth()` using formula = 'y ~ x'
# Box plot for the change in compliance by Sex (Baseline - Post-Treatment)
data2$Compliance_Change <- data2$`Post Treatment Compliance (seconds)` - data2$`Baseline Compliance (seconds)`

plot5 <- ggplot(data2, aes(x = `Sex (Female/Male)`, y = Compliance_Change, fill = `Sex (Female/Male)`)) +
  geom_boxplot() +
  labs(
    title = "Change in Compliance by Sex",
    x = "Sex",
    y = "Change in Compliance (seconds)"
  ) +
  theme_minimal()
ggsave(filename = paste0(documents_dir, "Change_By_Sex.png"), plot = plot5, width = 12, height = 8)
# Load necessary libraries
library(ggplot2)
library(dplyr)

# Assuming data2 is already loaded in your R environment
data2$`Sex (Female/Male)` <- as.factor(data2$`Sex (Female/Male)`)

# Fit a linear regression model
linear_model <- lm(`Post Treatment Compliance (seconds)` ~ `Baseline Compliance (seconds)`, data = data2)
summary(linear_model)

Call:
lm(formula = `Post Treatment Compliance (seconds)` ~ `Baseline Compliance (seconds)`, 
    data = data2)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.01780 -0.02830  0.03085  0.06742  2.60626 

Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                     4.571268   0.184969   24.71   <2e-16 ***
`Baseline Compliance (seconds)` 0.722302   0.005262  137.28   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1678 on 496 degrees of freedom
Multiple R-squared:  0.9744,    Adjusted R-squared:  0.9743 
F-statistic: 1.884e+04 on 1 and 496 DF,  p-value: < 2.2e-16
# Fit a sigmoidal (logistic) model
sigmoid_model <- nls(`Post Treatment Compliance (seconds)` ~ SSlogis(`Baseline Compliance (seconds)`, Asym, xmid, scal), data = data2)
summary(sigmoid_model)

Formula: `Post Treatment Compliance (seconds)` ~ SSlogis(`Baseline Compliance (seconds)`, 
    Asym, xmid, scal)

Parameters:
     Estimate Std. Error t value Pr(>|t|)    
Asym  41.1991     1.1558   35.65   <2e-16 ***
xmid  24.0942     0.3441   70.02   <2e-16 ***
scal  11.2337     0.8467   13.27   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1603 on 495 degrees of freedom

Number of iterations to convergence: 6 
Achieved convergence tolerance: 1.941e-06
# Create a sequence of baseline compliance values for prediction
baseline_seq <- seq(min(data2$`Baseline Compliance (seconds)`), max(data2$`Baseline Compliance (seconds)`), length.out = 100)

# Ensure the column name is correctly referenced and exists in new_data
new_data <- data.frame(`Baseline Compliance (seconds)` = baseline_seq)

# Debugging step: Print new_data to verify
print(new_data)
    Baseline.Compliance..seconds.
1                        30.78000
2                        30.87394
3                        30.96788
4                        31.06182
5                        31.15576
6                        31.24970
7                        31.34364
8                        31.43758
9                        31.53152
10                       31.62545
11                       31.71939
12                       31.81333
13                       31.90727
14                       32.00121
15                       32.09515
16                       32.18909
17                       32.28303
18                       32.37697
19                       32.47091
20                       32.56485
21                       32.65879
22                       32.75273
23                       32.84667
24                       32.94061
25                       33.03455
26                       33.12848
27                       33.22242
28                       33.31636
29                       33.41030
30                       33.50424
31                       33.59818
32                       33.69212
33                       33.78606
34                       33.88000
35                       33.97394
36                       34.06788
37                       34.16182
38                       34.25576
39                       34.34970
40                       34.44364
41                       34.53758
42                       34.63152
43                       34.72545
44                       34.81939
45                       34.91333
46                       35.00727
47                       35.10121
48                       35.19515
49                       35.28909
50                       35.38303
51                       35.47697
52                       35.57091
53                       35.66485
54                       35.75879
55                       35.85273
56                       35.94667
57                       36.04061
58                       36.13455
59                       36.22848
60                       36.32242
61                       36.41636
62                       36.51030
63                       36.60424
64                       36.69818
65                       36.79212
66                       36.88606
67                       36.98000
68                       37.07394
69                       37.16788
70                       37.26182
71                       37.35576
72                       37.44970
73                       37.54364
74                       37.63758
75                       37.73152
76                       37.82545
77                       37.91939
78                       38.01333
79                       38.10727
80                       38.20121
81                       38.29515
82                       38.38909
83                       38.48303
84                       38.57697
85                       38.67091
86                       38.76485
87                       38.85879
88                       38.95273
89                       39.04667
90                       39.14061
91                       39.23455
92                       39.32848
93                       39.42242
94                       39.51636
95                       39.61030
96                       39.70424
97                       39.79818
98                       39.89212
99                       39.98606
100                      40.08000
library(ggplot2)
library(reshape2)
library(broom)

# Step 1: Calculate the difference between Baseline and Post Treatment Compliance
data_subset <- data2[, c("Baseline Compliance (seconds)", "Post Treatment Compliance (seconds)", "Sex (Female/Male)")]
data_subset <- na.omit(data_subset)  # Remove rows with missing data
data_subset$Compliance_Change <- data_subset$`Post Treatment Compliance (seconds)` - data_subset$`Baseline Compliance (seconds)`

# Ensure the 'Sex (Female/Male)' column has exactly two levels: "Male" and "Female"
data_subset <- data_subset %>% filter(`Sex (Female/Male)` %in% c("Male", "Female"))

# Step 2: Perform t-tests for the full cohort and by sex
t_test_full <- t.test(data_subset$Compliance_Change)
t_test_male <- t.test(data_subset$Compliance_Change[data_subset$`Sex (Female/Male)` == "Male"])
t_test_female <- t.test(data_subset$Compliance_Change[data_subset$`Sex (Female/Male)` == "Female"])

# Extract t-values, p-values, and means
t_values <- c(Full = t_test_full$statistic, Male = t_test_male$statistic, Female = t_test_female$statistic)
p_values <- c(Full = t_test_full$p.value, Male = t_test_male$p.value, Female = t_test_female$p.value)
means <- c(Full = mean(data_subset$Compliance_Change), 
           Male = mean(data_subset$Compliance_Change[data_subset$`Sex (Female/Male)` == "Male"]), 
           Female = mean(data_subset$Compliance_Change[data_subset$`Sex (Female/Male)` == "Female"]))

# Determine if pre or post-treatment values are higher
baseline_means <- c(Full = mean(data_subset$`Baseline Compliance (seconds)`), 
                    Male = mean(data_subset$`Baseline Compliance (seconds)`[data_subset$`Sex (Female/Male)` == "Male"]), 
                    Female = mean(data_subset$`Baseline Compliance (seconds)`[data_subset$`Sex (Female/Male)` == "Female"]))
post_treatment_means <- c(Full = mean(data_subset$`Post Treatment Compliance (seconds)`), 
                          Male = mean(data_subset$`Post Treatment Compliance (seconds)`[data_subset$`Sex (Female/Male)` == "Male"]), 
                          Female = mean(data_subset$`Post Treatment Compliance (seconds)`[data_subset$`Sex (Female/Male)` == "Female"]))
direction <- ifelse(post_treatment_means > baseline_means, "Post > Pre", "Pre > Post")

# Step 3: Create a data frame for the t-values, p-values, means, and direction
t_test_results <- data.frame(
  Group = c("Full Cohort", "Male", "Female"),
  T_Value = t_values,
  P_Value = p_values,
  Mean = means,
  Significant = ifelse(p_values < 0.05, "Significant", "Not Significant"),
  Direction = direction
)

# Create a function to add stars based on significance
add_stars <- function(p_value) {
  if (p_value < 0.001) {
    "***"
  } else if (p_value < 0.01) {
    "**"
  } else if (p_value < 0.05) {
    "*"
  } else {
    ""
  }
}

# Add stars to p-values
t_test_results$P_Stars <- sapply(t_test_results$P_Value, add_stars)
# Add stars to t-values
t_test_results$T_Stars <- sapply(t_test_results$P_Value, add_stars)

# Step 4: Remove unnecessary rows and convert to long format for ggplot2
t_test_results_long <- melt(t_test_results[, c("Group", "T_Value", "P_Value", "Mean")], id.vars = "Group", variable.name = "Metric", value.name = "Value")

# Ensure that all values are numeric where appropriate
t_test_results_long$Value <- as.numeric(as.character(t_test_results_long$Value))

# Separate stars data for T-Values and P-Values
t_stars_data <- t_test_results$T_Stars
p_stars_data <- t_test_results$P_Stars

# Step 5: Create the heatmap with corrected label alignment and length consistency
heatmap <- ggplot(t_test_results_long, aes(Group, Metric, fill = as.numeric(Value))) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
                       limit = c(-max(abs(t_test_results_long$Value)), max(abs(t_test_results_long$Value))), 
                       space = "Lab", name = "Value") +
  geom_text(data = t_test_results_long %>% filter(Metric == "T_Value"), 
            aes(Group, Metric, label = sprintf("%.2f%s", as.numeric(Value), t_stars_data[match(Group, t_test_results$Group)])), 
            color = "black", size = 3, vjust = -0.3) +
  geom_text(data = t_test_results_long %>% filter(Metric == "P_Value"), 
            aes(Group, Metric, label = sprintf("%.3f%s", as.numeric(Value), p_stars_data[match(Group, t_test_results$Group)])), 
            color = "black", size = 3, vjust = 0.3) +
  geom_text(data = t_test_results_long %>% filter(Metric == "Mean"), 
            aes(Group, Metric, label = sprintf("%.2f", as.numeric(Value))), color = "black", size = 3, vjust = 1.3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
  coord_fixed() +
  labs(title = "T-Values and P-Values Heatmap for Compliance Change",
       x = "Groups",
       y = "Metric") +
  theme(legend.title = element_text(face = "bold"),
        legend.text = element_text(size = 10))

# Add the direction of the differences to the legend
heatmap <- heatmap + guides(fill = guide_legend(title = "T-Value\n(Post > Pre in Red, Pre > Post in Blue)"))

# Save the heatmap to a file (optional)
ggsave("compliance_change_heatmap.png", heatmap, width = 12, height = 8, dpi = 300)

# Display the heatmap
print(heatmap)

# Compare AIC values
AIC(linear_model)
[1] -360.7006
AIC(sigmoid_model)
[1] -404.8558
# Load necessary libraries
library(ggplot2)
library(dplyr)

# Create the scatterplot with points colored by Sex
plot <- ggplot(data2, aes(x = `Baseline Compliance (seconds)`, y = `Post Treatment Compliance (seconds)`, color = `Sex (Female/Male)`)) +
  geom_point() +  # Scatterplot
  stat_smooth(method = "nls", formula = y ~ SSlogis(x, Asym, xmid, scal), se = FALSE) +
  labs(title = "Scatterplot with Sigmoid Curve",
       x = "Baseline Compliance (seconds)",
       y = "Post Treatment Compliance (seconds)",
       color = "Sex") +
  theme_minimal()

# Save the plot as a PNG file in the Documents folder
ggsave(filename = "~/Documents/scatterplot_sigmoid_curve.png", plot = plot, width = 10, height = 6, dpi = 300)
Warning: Failed to fit group 1.
Caused by error in `method()`:
! number of iterations exceeded maximum of 50
print(plot)
Warning: Failed to fit group 1.
Caused by error in `method()`:
! number of iterations exceeded maximum of 50

# Load necessary libraries
library(ggplot2)
library(dplyr)

# Fit the logistic regression model
model <- nls(`Post Treatment Compliance (seconds)` ~ SSlogis(`Baseline Compliance (seconds)`, Asym, xmid, scal), data = data2)

# Summary of the model to see the parameters
summary(model)

Formula: `Post Treatment Compliance (seconds)` ~ SSlogis(`Baseline Compliance (seconds)`, 
    Asym, xmid, scal)

Parameters:
     Estimate Std. Error t value Pr(>|t|)    
Asym  41.1991     1.1558   35.65   <2e-16 ***
xmid  24.0942     0.3441   70.02   <2e-16 ***
scal  11.2337     0.8467   13.27   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1603 on 495 degrees of freedom

Number of iterations to convergence: 6 
Achieved convergence tolerance: 1.941e-06
# Function to predict post-treatment compliance based on baseline compliance
predict_compliance <- function(baseline) {
  Asym <- coef(model)["Asym"]
  xmid <- coef(model)["xmid"]
  scal <- coef(model)["scal"]
  Asym / (1 + exp((xmid - baseline) / scal))
}

#
#Example usage: Predicting post-treatment compliance for a new baseline compliance value
new_baseline <- 35  # Replace this with your actual baseline compliance value
predicted_post_treatment <- predict_compliance(new_baseline)
print(predicted_post_treatment)
    Asym 
29.88098 
library(ggplot2)
library(reshape2)
library(psych)
Warning: package 'psych' was built under R version 4.4.2

Attaching package: 'psych'
The following object is masked from 'package:Hmisc':

    describe
The following objects are masked from 'package:ggplot2':

    %+%, alpha
library(FactoMineR)
Warning: package 'FactoMineR' was built under R version 4.4.2
library(factoextra)
Warning: package 'factoextra' was built under R version 4.4.2
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Step 1: Prepare Data
vars <- c(
  "Baseline Compliance (seconds)",
  "Heart Rate (Beats Per Minute)",
  "IRT (Celsius)",
  "Blood Cortisol Level (mcg/dl)",
  "Weight (kg)",
  "Sex (Female/Male)",
  "Ambient Air Temperature (Celcius)"
)

data_subset <- data2[, vars]
data_subset <- na.omit(data_subset)  # Remove rows with missing data

# Encode Sex as a numeric variable for PCA
data_subset$`Sex (Female/Male)` <- as.numeric(factor(data_subset$`Sex (Female/Male)`, levels = c("Female", "Male")))

# Step 2: Perform PCA
pca_result <- PCA(data_subset, scale.unit = TRUE, graph = FALSE)

# Step 3: Interpret Results
# Print PCA summary to see the variance explained by each component
print(pca_result)
**Results for the Principal Component Analysis (PCA)**
The analysis was performed on 498 individuals, described by 7 variables
*The results are available in the following objects:

   name               description                          
1  "$eig"             "eigenvalues"                        
2  "$var"             "results for the variables"          
3  "$var$coord"       "coord. for the variables"           
4  "$var$cor"         "correlations variables - dimensions"
5  "$var$cos2"        "cos2 for the variables"             
6  "$var$contrib"     "contributions of the variables"     
7  "$ind"             "results for the individuals"        
8  "$ind$coord"       "coord. for the individuals"         
9  "$ind$cos2"        "cos2 for the individuals"           
10 "$ind$contrib"     "contributions of the individuals"   
11 "$call"            "summary statistics"                 
12 "$call$centre"     "mean of the variables"              
13 "$call$ecart.type" "standard error of the variables"    
14 "$call$row.w"      "weights for the individuals"        
15 "$call$col.w"      "weights for the variables"          
# Step 4: Create Visuals

# Scree Plot
fviz_eig(pca_result, addlabels = TRUE, ylim = c(0, 50)) +
  labs(title = "Scree Plot", x = "Principal Components", y = "Percentage of Variance Explained")

# Biplot
fviz_pca_biplot(pca_result, 
                repel = TRUE,
                col.var = "contrib", # Color by contributions to the PC
                gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                addEllipses = TRUE, label = "var") +
  labs(title = "PCA Biplot")

# Step 5: Extract PCA results for detailed analysis
# Contribution of variables to PCs
contrib <- pca_result$var$contrib

# Print the contributions to assess which variables are most influential in the PCs
print(contrib)
                                         Dim.1      Dim.2       Dim.3
Baseline Compliance (seconds)     48.777304622  0.2744583  0.13435490
Heart Rate (Beats Per Minute)     47.205924420  0.2661722  0.36434048
IRT (Celsius)                      0.080662520 41.5720099  5.44239408
Blood Cortisol Level (mcg/dl)      0.001988482 18.4253230 39.86148730
Weight (kg)                        0.004983144 14.5214501 27.69799434
Sex (Female/Male)                  3.014068274  8.5945862  0.07459062
Ambient Air Temperature (Celcius)  0.915068538 16.3460004 26.42483828
                                       Dim.4       Dim.5
Baseline Compliance (seconds)      0.3181619  0.01761312
Heart Rate (Beats Per Minute)      3.3724379  0.00172521
IRT (Celsius)                      4.4977211  1.22601161
Blood Cortisol Level (mcg/dl)      0.7131504  0.12078170
Weight (kg)                       12.1585346 42.27662004
Sex (Female/Male)                 78.2268230  8.93084365
Ambient Air Temperature (Celcius)  0.7131711 47.42640465
library(FactoMineR)
library(factoextra)

# Step 1: Prepare Data
data_subset <- data2[, c("Baseline Compliance (seconds)", "Heart Rate (Beats Per Minute)", "IRT (Celsius)", "Blood Cortisol Level (mcg/dl)")]
data_subset <- na.omit(data_subset)  # Remove rows with missing data

# Step 2: Perform PCA
pca_result <- PCA(data_subset, scale.unit = TRUE, graph = FALSE)

# Step 3: Extract Principal Components
# Get the scores for the first two principal components
pc_scores <- pca_result$ind$coord[, 1:2]

# Convert to a data frame and add to the original data
pc_scores_df <- as.data.frame(pc_scores)
colnames(pc_scores_df) <- c("PC1_Score", "PC2_Score")

# Add the scores to the original dataset
data_combined <- cbind(data_subset, pc_scores_df)

# Step 4: Standardize the scores
data_combined$PC1_Standardized <- scale(data_combined$PC1_Score)
data_combined$PC2_Standardized <- scale(data_combined$PC2_Score)

# Step 5: Create the Combined Stress Score
data_combined$Stress_Score <- rowMeans(data_combined[, c("PC1_Standardized", "PC2_Standardized")])

# View the first few rows of the dataset with the combined stress score
head(data_combined)
  Baseline Compliance (seconds) Heart Rate (Beats Per Minute) IRT (Celsius)
1                         37.17                      153.2263      37.73957
2                         34.55                      149.6390      35.68413
3                         36.33                      149.4406      34.79854
4                         35.34                      150.2971      36.15973
5                         37.40                      149.0318      33.61477
6                         33.55                      146.8415      36.38343
  Blood Cortisol Level (mcg/dl)  PC1_Score   PC2_Score PC1_Standardized
1                      64.11183  2.0297384  0.45015896        1.5477482
2                      73.68676 -0.4574603 -0.28182430       -0.3488299
3                      54.43466  0.3960235 -1.49251301        0.3019821
4                      86.32615  0.1389429  0.48806683        0.1059489
5                     107.97796  0.7861604  0.09535008        0.5994754
6                     108.86475 -1.9762299  1.41777556       -1.5069460
  PC2_Standardized Stress_Score
1        0.4233912    0.9855697
2       -0.2650662   -0.3069481
3       -1.4037639   -0.5508909
4        0.4590450    0.2824969
5        0.0896803    0.3445779
6        1.3334706   -0.0867377
# Histogram of Combined Stress Score
histogram <- ggplot(data_combined, aes(x = Stress_Score)) +
  geom_histogram(binwidth = 0.5, fill = "#00AFBB", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Combined Stress Score", x = "Stress Score", y = "Frequency") +
  theme_minimal()
ggsave("histogram_combined_stress_score.png", histogram, width = 8, height = 6)

# Display Histogram
print(histogram)

library(ggplot2)
library(Hmisc)

# Calculate correlations and p-values
corr_test <- rcorr(as.matrix(data_combined[, c("Stress_Score", "Baseline Compliance (seconds)", "Heart Rate (Beats Per Minute)", "IRT (Celsius)", "Blood Cortisol Level (mcg/dl)")]), type = "pearson")

# Extract r-values and p-values
r_values <- corr_test$r
p_values <- corr_test$P

# Filter for rows/columns related to Stress_Score, and ensure the correct indices are used
r_values_filtered <- r_values["Stress_Score", -which(colnames(r_values) == "Stress_Score")]
p_values_filtered <- p_values["Stress_Score", -which(colnames(p_values) == "Stress_Score")]

# Convert to data frames and ensure rownames are preserved
r_values_df <- data.frame(Variable = names(r_values_filtered), R_Value = as.numeric(r_values_filtered), stringsAsFactors = FALSE)
p_values_df <- data.frame(Variable = names(p_values_filtered), P_Value = as.numeric(p_values_filtered), stringsAsFactors = FALSE)

# Function to add significance stars for p-values
add_p_stars <- function(p_value) {
  if (p_value < 0.001) {
    "***"
  } else if (p_value < 0.01) {
    "**"
  } else if (p_value < 0.05) {
    "*"
  } else {
    ""
  }
}

# Function to add significance stars for r-values based on the strength of correlation
add_r_stars <- function(r_value) {
  if (abs(r_value) >= 0.80) {
    "****"
  } else if (abs(r_value) >= 0.60) {
    "***"
  } else if (abs(r_value) >= 0.40) {
    "**"
  } else if (abs(r_value) >= 0.20) {
    "*"
  } else {
    ""
  }
}

# Add stars to p-values
p_values_df$Stars <- sapply(p_values_df$P_Value, add_p_stars)

# Add stars to r-values
r_values_df$R_Stars <- sapply(r_values_df$R_Value, add_r_stars)

# Merge r-values and p-values data frames
results_df <- merge(r_values_df, p_values_df, by = "Variable")

# Visualize the table
table_plot <- ggplot(results_df, aes(x = Variable)) +
  geom_tile(aes(y = 1, fill = as.factor(R_Stars)), color = "white") +
  geom_text(aes(y = 1, label = sprintf("%.2f%s", R_Value, R_Stars)), hjust = 0.5, vjust = 0.5, color = "black") +
  geom_text(aes(y = 2, label = sprintf("%.3f%s", P_Value, Stars)), hjust = 0.5, vjust = 0.5, color = "black") +
  scale_fill_manual(values = c(
    "*" = "#FFFFCC", 
    "**" = "#FFD700", 
    "***" = "#FFA500", 
    "****" = "#FF4500"
  )) +
  scale_y_continuous(breaks = 1:2, labels = c("R Value", "P Value")) +
  labs(title = "Correlation of Combined Stress Score with Individual Stress Measures",
       x = "Variables",
       y = "") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text.y = element_text(size = 12, face = "bold"),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none")

ggsave("correlation_table_combined_stress_score.png", table_plot, width = 10, height = 5)

# Display the table
print(table_plot)