setwd("C:/Users/emily_2vx2daj/OneDrive - IL State University/Documents/Info/School/College/Grad School/ISU/Thesis/Kahn Lab 2025 - Budik Thesis Study")
#library downloads 
library(dplyr)
library(ggplot2)
library(tidyr) 
library(effectsize)
library(apaTables)
#read file
thesis <- read.csv("thesis.csv")
#ORDER OF FILE:
# #Demographics (First)
# #Creating ANOVAs and analyses (meat+potatoes)
# # Mean, SD, Correlations of Vairbles (Last)
# The following Variables all have #a Mean, #b ANOVA, 
# #c Summary #d Bar graph Plot #e Interaction plot if significant
# #1 Well-being, #2 Stress, 
# #3 Perceived Difficulty, #4, Perceived Enjoyment #5 Perceived Control 
# #6 Perceived Effort #7 Perceived Performance
# BEGIN DEMOGRAPHICS 

#AGE
age_summary <- thesis %>%
    summarise(
    Mean = mean(AGE, na.rm = TRUE),
    Median = median(AGE, na.rm = TRUE),
    SD = sd(AGE, na.rm = TRUE),
    Min = min(AGE, na.rm = TRUE),
    Max = max(AGE, na.rm = TRUE))
print(age_summary)
##       Mean Median       SD Min Max
## 1 19.50562     19 1.431176  17  26
# 89 total particip, find percentages by x/89 * 100 

#GENDER
  #1 = Male, 2 = Female, #3 = Other (nonbinary, fluid, thirdgender, etc.)
  gend_counts <- thesis %>%
  count(GEND, sort = TRUE)
  print(gend_counts)
##   GEND  n
## 1    2 69
## 2    1 19
## 3    3  1
#YEAR IN SCHOOL 
  #1 = Freshmen #2 = sophomore 
  #3 = junior #4 =  senior #5 = grad student
  yr_counts <- thesis %>%
  count(YR, sort = TRUE)
  print(yr_counts)
##   YR  n
## 1  1 28
## 2  3 25
## 3  2 21
## 4  4 15
#ETHNICITY
  #1 = Asian #2 = Biracial / Multiracial #3 = Black or African American
  #4 = Latinx #5 = American Indian or Alaskan Native 
  #6 = Pacific Islander / Native Hawaiian #7 = White #8 = Other 
  eth_counts <- thesis %>%
  count(ETH, sort = TRUE)
  print(eth_counts)
##   ETH  n
## 1   7 45
## 2   4 18
## 3   3 17
## 4   2  5
## 5   1  2
## 6   5  1
## 7   8  1
#ANOVA #creating circumstances, music yes/no, ae yes/no #ae yes/no
thesis <- 
  thesis %>% mutate(ae = case_when(COND %in% c(1, 2) ~ "No",
                                  COND %in% c(3, 4) ~ "Yes"))

#music yes/no 
thesis <- thesis %>% 
  mutate(mus = case_when(COND %in%c(1, 3) ~ "No", 
                         COND %in% c(2, 4) ~ "Yes"))

#creating them as factors for analysis 
thesis$ae <- as.factor(thesis$ae)
thesis$mus <- as.factor(thesis$mus)
#Begin wellbeing eval 
# Create a new column for wellbeing averages
thesis <- thesis %>%
  mutate(wellbeing_avg = rowMeans(select(., starts_with("WB_")), na.rm = TRUE) / 10)

#begin ANOVA for wellbeing
wellbeing_aov <- aov(wellbeing_avg ~ ae * mus, data = thesis)
summary(wellbeing_aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## ae           1   0.13   0.130   0.050  0.824
## mus          1   5.19   5.194   1.998  0.161
## ae:mus       1   2.20   2.205   0.848  0.360
## Residuals   85 221.00   2.600
#Summary for Wellbeing
wellbeing_summary <- thesis %>%
  group_by(ae, mus) %>%
  summarise(
    N = n(),
    Mean = mean(wellbeing_avg, na.rm = TRUE),
    SD = sd(wellbeing_avg, na.rm = TRUE),
    SE = SD / sqrt(N), # Standard Error for error bars
    .groups = 'drop'
  )
print(wellbeing_summary)
## # A tibble: 4 × 6
##   ae    mus       N  Mean    SD    SE
##   <fct> <fct> <int> <dbl> <dbl> <dbl>
## 1 No    No       22  7.33  1.54 0.328
## 2 No    Yes      22  6.53  1.34 0.285
## 3 Yes   No       23  7.09  1.47 0.307
## 4 Yes   Yes      22  6.92  2.02 0.431
#plot for WB
wellbeing_plot <- ggplot(wellbeing_summary, aes(x = ae, y = Mean, fill = mus)) +
  # Create the bar chart, specifying "dodge" for side-by-side bars
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), color = "black") +
  geom_errorbar(
  aes(ymin = Mean - SE, ymax = Mean + SE), 
  width = 0.2, # Width of the error bar caps
  position = position_dodge(width = 0.9)
  ) + labs(
    title = "Wellbeing Average",
    x = "Aesthetic Enrichment (AE)",
    y = "Average Wellbeing Score",
    fill = "Music" ) +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "darkblue")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
print(wellbeing_plot)

#Begin Stress eval 
# Create a new column for stress averages
thesis <- thesis %>%
  mutate(stress_avgscores = rowMeans(select(., starts_with("DASS_")),
                                     na.rm = TRUE))
#begin ANOVA for stress
stress_aov <- aov(stress_avgscores ~ ae * mus, data = thesis)
print(summary(stress_aov))
##             Df Sum Sq Mean Sq F value Pr(>F)
## ae           1  0.155  0.1550   0.562  0.456
## mus          1  0.293  0.2925   1.060  0.306
## ae:mus       1  0.343  0.3432   1.244  0.268
## Residuals   85 23.454  0.2759
#Summary for Stress
stress_summary <- thesis %>%
  group_by(ae, mus) %>%
  summarise(
    N = n(),
    Mean = mean(stress_avgscores, na.rm = TRUE),
    SD = sd(stress_avgscores, na.rm = TRUE),
    SE = SD / sqrt(N),
    .groups = 'drop')

#begin plot for stress
stress_plot <- ggplot(stress_summary, aes(x = ae, y = Mean, fill = mus)) +
  # Create the bar chart
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), color = "black") +
  geom_errorbar(
  aes(ymin = Mean - SE, ymax = Mean + SE), 
  width = 0.2, 
  position = position_dodge(width = 0.9)
  ) + labs(
    title = "Stress Average",
    x = "Aesthetic Enrichment (AE)",
    y = "Average Stress Score (Lower is Better)",
    fill = "Music"
  ) +
  scale_fill_manual(values = c("No" = "lightcoral", "Yes" = "red4")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
print(stress_plot)

#Begin perceived difficulty eval 
#level of perceived difficulty 
mean(thesis$DIFF)
## [1] 2.865169
# ANOVA for perceived difficulty - 11/13; AE*, everything reaching sig 
difficulty_aov <- aov(DIFF ~ ae * mus, data = thesis)
print(summary(difficulty_aov))
##             Df Sum Sq Mean Sq F value Pr(>F)  
## ae           1   4.43   4.435   5.375 0.0228 *
## mus          1   2.75   2.751   3.334 0.0714 .
## ae:mus       1   3.07   3.070   3.722 0.0571 .
## Residuals   85  70.13   0.825                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
diff_ome <- omega_squared(difficulty_aov, partial = TRUE)

#Summary for perceived difficulty
diff_summary <- thesis %>%
  group_by(ae, mus) %>%
  summarise(
    N = n(),
    Mean = mean(DIFF, na.rm = TRUE),
    SD = sd(DIFF, na.rm = TRUE),
    SE = SD / sqrt(N),
    .groups = 'drop')

diff_plot <- ggplot(diff_summary, aes(x = ae, y = Mean, fill = mus)) +
  # Create the bar chart, specifying "dodge" for side-by-side bars
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), color = "black") +
  geom_errorbar(
  aes(ymin = Mean - SE, ymax = Mean + SE), 
  width = 0.2, # Width of the error bar caps
  position = position_dodge(width = 0.9) # Match bar position
  ) +
  labs(title = "Perceived Difficulty Average",
    x = "Aesthetic Enrichment (AE)",
    y = "Perceived Difficulty Score",
    fill = "Music" # 'fill' is used for the legend when bars are filled
  ) +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "darkblue")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
print(diff_plot)

#diffifculty interaction plot since marginally signif trend
#Interaction Plot for perceived difficulty
diff_intplot <- ggplot(diff_summary, aes(x = ae, y = Mean, group = mus, color = mus)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  # Add error bars (using 95% CI is common, but SE is shown here for simplicity)
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = 0.15) +
  labs(
    title = "Perceived Difficulty Average Interaction",
    x = "Aesthetic Enrichment (AE)",
    y = "Average Perceived Difficulty Score",
    color = "Music"
  ) +
  scale_color_manual(values = c("No" = "red", "Yes" = "blue")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

# Display the plot
print(diff_intplot)

#Begin percevied enjoyment eval 
# avg level of perceived enjoyment
mean(thesis$ENJ)
## [1] 2.988764
#begin ANOVA for perceived enjoyment - 11/13 - 
enjoyment_aov <- aov(ENJ ~ ae * mus, data = thesis)
print(summary(enjoyment_aov))
##             Df Sum Sq Mean Sq F value Pr(>F)
## ae           1   0.55  0.5524   0.549  0.461
## mus          1   0.90  0.8969   0.891  0.348
## ae:mus       1   0.01  0.0078   0.008  0.930
## Residuals   85  85.53  1.0063
enj_summary <- thesis %>%
  group_by(ae, mus) %>%
  summarise(
    N = n(),
    Mean = mean(ENJ, na.rm = TRUE),
    SD = sd(ENJ, na.rm = TRUE),
    SE = SD / sqrt(N),
    .groups = 'drop')

enj_plot <- ggplot(enj_summary, aes(x = ae, y = Mean, fill = mus)) +
  # Create the bar chart, specifying "dodge" for side-by-side bars
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), color = "black") +
  geom_errorbar(
    aes(ymin = Mean - SE, ymax = Mean + SE), 
    width = 0.2, # Width of the error bar caps
    position = position_dodge(width = 0.9) # Match bar position
  ) +
  labs(title = "Perceived Enjoyment Average",
       x = "Aesthetic Enrichment (AE)",
       y = "Perceived Enjoyment Score",
       fill = "Music" # 'fill' is used for the legend when bars are filled
  ) +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "darkblue")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
print(enj_plot)

#Begin perceived control eval 
#level of perceived control
mean(thesis$CNTRL)
## [1] 3.41573
#begin ANOVA for perceived control of the environment - 11/13 mus*, ae:mus*
control_aov <- aov(CNTRL ~ ae * mus, data = thesis)
print(summary(control_aov))
##             Df Sum Sq Mean Sq F value Pr(>F)  
## ae           1   3.09   3.091   3.412 0.0682 .
## mus          1   3.80   3.805   4.200 0.0435 *
## ae:mus       1   3.72   3.722   4.109 0.0458 *
## Residuals   85  77.00   0.906                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
control_ome <- omega_squared(control_aov, partial = TRUE)

#Summary for perceived control
cntrl_summary <- thesis %>%
  group_by(ae, mus) %>%
  summarise(
    N = n(),
    Mean = mean(CNTRL, na.rm = TRUE),
    SD = sd(CNTRL, na.rm = TRUE),
    SE = SD / sqrt(N),
    .groups = 'drop')

cntrl_plot <- ggplot(cntrl_summary, aes(x = ae, y = Mean, fill = mus)) +
  # Create the bar chart, specifying "dodge" for side-by-side bars
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), color = "black") +
  geom_errorbar(
  aes(ymin = Mean - SE, ymax = Mean + SE), 
  width = 0.2, # Width of the error bar caps
  position = position_dodge(width = 0.9) # Match bar position
  ) + labs(
    title = "Perceived Control Average",
    x = "Aesthetic Enrichment (AE)",
    y = "Perceived Control Score",
    fill = "Music" # 'fill' is used for the legend when bars are filled
  ) +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "darkblue")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
print(cntrl_plot)

#Interaction Plot for CNTRL
cntrl_intplot <- ggplot(cntrl_summary, aes(x = ae, y = Mean, group = mus, color = mus)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  # Add error bars (using 95% CI is common, but SE is shown here for simplicity)
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = 0.15) +
  labs(
    title = "Perceived Control Average Interaction",
    x = "Aesthetic Enrichment (AE)",
    y = "Average Perceived Control Score",
    color = "Music"
  ) +
  scale_color_manual(values = c("No" = "red", "Yes" = "blue")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

# Display the plot
print(cntrl_intplot)

#Begin effort eval 
#level of perceived effort 
mean(thesis$EFF)
## [1] 3.595506
#begin ANOVA for perceived effort - 11/13 mus*
effort_aov <- aov(EFF ~ ae * mus, data = thesis)
print(summary(effort_aov))
##             Df Sum Sq Mean Sq F value Pr(>F)  
## ae           1   0.35  0.3518   0.695 0.4068  
## mus          1   2.06  2.0584   4.067 0.0469 *
## ae:mus       1   0.00  0.0043   0.008 0.9271  
## Residuals   85  43.02  0.5062                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
effort_ome <- omega_squared(effort_aov, partial = TRUE)

#Summary for effort
eff_summary <- thesis %>%
  group_by(ae, mus) %>%
  summarise(
    N = n(),
    Mean = mean(EFF, na.rm = TRUE),
    SD = sd(EFF, na.rm = TRUE),
    SE = SD / sqrt(N),
    .groups = 'drop')

#plot effort eval
eff_plot <- ggplot(eff_summary, aes(x = ae, y = Mean, fill = mus)) +
  # Create the bar chart, specifying "dodge" for side-by-side bars
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), color = "black") +
  geom_errorbar(
    aes(ymin = Mean - SE, ymax = Mean + SE), 
    width = 0.2, # Width of the error bar caps
    position = position_dodge(width = 0.9) # Match bar position
  ) + labs(
    title = "Effort Average",
    x = "Aesthetic Enrichment (AE)",
    y = "Effort Score",
    fill = "Music" # 'fill' is used for the legend when bars are filled
  ) +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "darkblue")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
print(eff_plot)

#level of perceived performance
mean(thesis$SBP)
## [1] 3.101124
#begin ANOVA for perceived subjective performance - 11/13 - 
subjectiveperf_aov <- aov(SBP ~ ae * mus, data = thesis)
print(summary(subjectiveperf_aov))
##             Df Sum Sq Mean Sq F value Pr(>F)
## ae           1   0.09  0.0944   0.194  0.661
## mus          1   0.09  0.0924   0.190  0.664
## ae:mus       1   0.58  0.5770   1.187  0.279
## Residuals   85  41.33  0.4862
#Summary for perceived subjective performance
sbp_summary <- thesis %>%
  group_by(ae, mus) %>%
  summarise(
    N = n(),
    Mean = mean(SBP, na.rm = TRUE),
    SD = sd(SBP, na.rm = TRUE),
    SE = SD / sqrt(N),
    .groups = 'drop')

#plot Subjective performance
sbp_plot <- ggplot(sbp_summary, aes(x = ae, y = Mean, fill = mus)) +
  # Create the bar chart, specifying "dodge" for side-by-side bars
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), color = "black") +
  
  # Add error bars (SE)
  geom_errorbar(
    aes(ymin = Mean - SE, ymax = Mean + SE), 
    width = 0.2, # Width of the error bar caps
    position = position_dodge(width = 0.9) # Match bar position
  ) +
  
  labs(
    title = "Perceived Performance Average",
    x = "Aesthetic Enrichment (AE)",
    y = "Perceived Performance Score",
    fill = "Music" # 'fill' is used for the legend when bars are filled
  ) +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "darkblue")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
print(sbp_plot)

#Begin Descriptive Statistics
#Mean / SD table
  descriptive_table <- thesis %>%
  # Select only the measures you're interested in
  select(
    `Well-being` = wellbeing_avg,
    `Stress` = stress_avgscores,
    `Perceived Difficulty` = DIFF,
    `Perceived Enjoyment` = ENJ,
    `Perceived Control` = CNTRL,
    `Perceived Effort` = EFF,
    `Perceived Performance` = SBP
  ) %>%
  summarise(across(
    .cols = everything(), 
    .fns = list(
      Mean = ~mean(.x, na.rm = TRUE),
      SD = ~sd(.x, na.rm = TRUE)
    ),
    .names = "{.col}---{.fn}" 
  )) %>%
  pivot_longer(
    cols = everything(),
    names_to = c("Measure", ".value"), # ".value" takes the part after "---" (Mean, SD)
    names_sep = "---"
  )
print(descriptive_table)
## # A tibble: 7 × 3
##   Measure                Mean    SD
##   <chr>                 <dbl> <dbl>
## 1 Well-being             6.97 1.61 
## 2 Stress                 1.71 0.525
## 3 Perceived Difficulty   2.87 0.956
## 4 Perceived Enjoyment    2.99 0.994
## 5 Perceived Control      3.42 0.998
## 6 Perceived Effort       3.60 0.719
## 7 Perceived Performance  3.10 0.692
#begin correlation analysis
correlation_data <- thesis %>%
  select(
    `Wellbeing` = wellbeing_avg,
    `Stress` = stress_avgscores,
    `Difficulty` = DIFF,
    `Enjoyment` = ENJ,
    `Control` = CNTRL,
    `Effort` = EFF,
    `Performance` = SBP
  )

# Create the correlation table
# This function calculates correlations, p-values, and formats it
correlation_results <- apa.cor.table(correlation_data, 
                                     filename = "apa_correlation_table.doc",
                                     show.conf.interval = FALSE) # Don't show confidence intervals
## The ability to suppress reporting of reporting confidence intervals has been deprecated in this version.
## The function argument show.conf.interval will be removed in a later version.
print(correlation_results)
## 
## 
## Means, standard deviations, and correlations with confidence intervals
##  
## 
##   Variable       M    SD   1           2            3            4          
##   1. Wellbeing   6.97 1.61                                                  
##                                                                             
##   2. Stress      1.71 0.52 -.10                                             
##                            [-.31, .11]                                      
##                                                                             
##   3. Difficulty  2.87 0.96 -.11        .55**                                
##                            [-.31, .10] [.39, .68]                           
##                                                                             
##   4. Enjoyment   2.99 0.99 .36**       -.33**       -.31**                  
##                            [.16, .53]  [-.51, -.13] [-.49, -.11]            
##                                                                             
##   5. Control     3.42 1.00 .27*        -.27*        -.26*        .33**      
##                            [.06, .45]  [-.45, -.06] [-.45, -.06] [.13, .50] 
##                                                                             
##   6. Effort      3.60 0.72 .25*        .10          .15          .15        
##                            [.05, .44]  [-.11, .30]  [-.06, .35]  [-.06, .35]
##                                                                             
##   7. Performance 3.10 0.69 .20         -.24*        -.43**       .38**      
##                            [-.01, .39] [-.42, -.03] [-.58, -.24] [.19, .55] 
##                                                                             
##   5           6          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##   -.05                   
##   [-.25, .16]            
##                          
##   .17         .20        
##   [-.04, .36] [-.01, .39]
##                          
## 
## Note. M and SD are used to represent mean and standard deviation, respectively.
## Values in square brackets indicate the 95% confidence interval.
## The confidence interval is a plausible range of population correlations 
## that could have caused the sample correlation (Cumming, 2014).
##  * indicates p < .05. ** indicates p < .01.
##