Emil_Project

library(tidyverse)
library(tidyselect)
library(readxl)

# Read the "Model Evaluation" tab
model_evaluation <- read_excel("DataSet.xlsx", sheet = "Model Evaluation")

# Read the "Tagging data" tab
tagging_data <- read_excel("DataSet.xlsx", sheet = "Tagging Data") %>%
  mutate(ModelNum = case_when(
    model == "1- Initial Model" ~ "ModelOne",
    model == "7 - KE and temp after peer sharing" ~ "ModelSeven",
    model == "14 - model after experiment and peer sharing" ~ "ModelFourteen", TRUE ~ NA_character_))%>%
    filter(ModelNum %in% c("ModelOne", "ModelSeven", "ModelFourteen"))%>%
   mutate(IDModel = paste(user_id, ModelNum, sep = "_"))


# Read the "IRR interation" tab
irr_interation <- read_excel("DataSet.xlsx", sheet = "IRR interation 14")

# Load the necessary libraries
library(dplyr)

# Perform the selection, renaming, and removal of the number "1" in column names
model1 <- model_evaluation %>%
  select(Prompt, ID, Teacher, Model1, KeyvariablesKE1, KeyvriablesPE1, 
         KeyVariablesIMF1, KeyvariablesEvaporationrate1, Keyvariablestemperature1, 
         Boundariesofthesystem1, Backbonetype1, Backboneevaluation1, IMFevaluation1, 
         Feedback1, `#brokenflows1`, Modelbehavior1) %>%
  rename(model = Model1) %>%
  rename_all(~ gsub("1", "", .))%>%
  mutate(ModelNum = "ModelOne")

model7 <- model_evaluation %>%
  select(Prompt, ID, Teacher, Model7peerreviews, KeyvariablesKE2, KeyvriablesPE2, 
         KeyVariablesIMF2, KeyvariablesEvaporationrate2, Keyvariablestemperature2, 
         Boundariesofthesystem2, Backbonetype2, Backboneevaluation2, IMFevaluation2, 
         Feedback2, `#brokenflows2`, Modelbehavior2) %>%
  rename(model = Model7peerreviews) %>%
  rename_all(~ gsub("2", "", .))%>%
  mutate(ModelNum = "ModelSeven")

model14 <- model_evaluation %>%
  select(Prompt, ID, Teacher, Model14peerreview2, KeyvariablesKE3, KeyvriablesPE3, 
         KeyVariablesIMF3, KeyvariablesEvaporationrate3, Keyvariablestemperature3, 
         Boundariesofthesystem3, Backbonetype3, Backboneevaluation3, IMFevaluation3, 
         Feedback3, `#brokenflows3`, Modelbehavior3) %>%
  rename(model = Model14peerreview2) %>%
  rename_all(~ gsub("3", "", .))%>%
  mutate(ModelNum = "ModelFourteen")

model_eval_combined<- rbind(model1, model7, model14)%>%
   mutate(IDModel = paste(ID, ModelNum, sep = "_"))

library(dplyr)
library(readxl)

finalset <- model_eval_combined %>%
  left_join(tagging_data, by = "IDModel")

finalset<- finalset%>%
  mutate(
    KeyvariablesKE = as.numeric(KeyvariablesKE),
    KeyvariablesPE = as.numeric(KeyvriablesPE),
    KeyVariablesIMF = as.numeric(KeyVariablesIMF),
    KeyvariablesEvaporationrate = as.numeric(KeyvariablesEvaporationrate),
    Keyvariablestemperature = as.numeric(Keyvariablestemperature),
    keysum = KeyvariablesKE + KeyvariablesPE + KeyVariablesIMF +
             KeyvariablesEvaporationrate + Keyvariablestemperature
  )


write.csv(finalset, "finalset.csv")

library(psych)
library(dplyr)

finalset <- finalset %>%
  mutate(
    `Ration between links and nodes` = as.numeric(`Ration between links and nodes`),
    nodes = as.numeric(nodes),
    links = as.numeric(links),
    keysum = as.numeric(keysum),
    `#brokenflows` = as.numeric(`#brokenflows`))%>%
  filter(!is.na(`Ration between links and nodes`) & 
         !is.infinite(`Ration between links and nodes`) & 
         !is.nan(`Ration between links and nodes`))

# Calculate summary statistics by 'ModelNum.y'
summary_stats <- describeBy(finalset %>% select(
  `Ration between links and nodes`,
  nodes,
  links,
  keysum,
  `#brokenflows`
), group = finalset$ModelNum.y, mat = TRUE)

# Convert to a data frame
summary_df <- as.data.frame(summary_stats)

# Write the summary statistics to a CSV file
write.csv(summary_df, file = "summary_statistics_by_ModelNum_y.csv", row.names = FALSE)

summary_df

##                                 item        group1 vars  n      mean        sd
## Ration between links and nodes1    1 ModelFourteen    1 34 0.9982865 0.2141836
## Ration between links and nodes2    2      ModelOne    1 36 1.3285494 0.3902497
## Ration between links and nodes3    3    ModelSeven    1 42 1.1445295 0.2438371
## nodes1                             4 ModelFourteen    2 34 8.1470588 1.7430790
## nodes2                             5      ModelOne    2 36 5.2777778 1.7502834
## nodes3                             6    ModelSeven    2 42 7.2380952 2.2611191
## links1                             7 ModelFourteen    3 34 8.5882353 2.6067238
## links2                             8      ModelOne    3 36 4.3333333 2.0563491
## links3                             9    ModelSeven    3 42 6.6428571 2.4872497
## keysum1                           10 ModelFourteen    4 34 4.2941176 1.1685114
## keysum2                           11      ModelOne    4 35 1.7142857 0.7100716
## keysum3                           12    ModelSeven    4 42 3.3333333 0.9283257
## #brokenflows1                     13 ModelFourteen    5 34 0.2352941 0.6059715
## #brokenflows2                     14      ModelOne    5 35 0.4857143 1.0395539
## #brokenflows3                     15    ModelSeven    5 42 0.3333333 1.1825286
##                                   median    trimmed      mad       min
## Ration between links and nodes1 1.000000 0.98087389 0.185325 0.5833333
## Ration between links and nodes2 1.333333 1.28500000 0.247100 0.7500000
## Ration between links and nodes3 1.125000 1.11474090 0.185325 0.7500000
## nodes1                          8.000000 8.14285714 1.482600 5.0000000
## nodes2                          5.000000 5.16666667 1.482600 3.0000000
## nodes3                          7.000000 7.00000000 1.482600 2.0000000
## links1                          9.000000 8.64285714 1.482600 3.0000000
## links2                          4.000000 4.13333333 1.482600 2.0000000
## links3                          6.000000 6.47058824 1.482600 1.0000000
## keysum1                         5.000000 4.50000000 0.000000 1.0000000
## keysum2                         2.000000 1.68965517 0.000000 0.0000000
## keysum3                         3.000000 3.44117647 1.482600 1.0000000
## #brokenflows1                   0.000000 0.10714286 0.000000 0.0000000
## #brokenflows2                   0.000000 0.24137931 0.000000 0.0000000
## #brokenflows3                   0.000000 0.05882353 0.000000 0.0000000
##                                       max     range        skew   kurtosis
## Ration between links and nodes1  1.666667  1.083333  0.98289687  1.4603271
## Ration between links and nodes2  3.000000  2.250000  2.29268109  7.5515598
## Ration between links and nodes3  2.000000  1.250000  1.34435018  2.3140269
## nodes1                          12.000000  7.000000  0.01475819 -0.5454316
## nodes2                           9.000000  6.000000  0.36199600 -0.8141643
## nodes3                          16.000000 14.000000  1.29281421  3.7976049
## links1                          14.000000 11.000000 -0.14716875 -0.4246446
## links2                           9.000000  7.000000  0.71131538 -0.4336245
## links3                          16.000000 15.000000  1.12442850  3.4042795
## keysum1                          5.000000  4.000000 -1.33873751  0.4359094
## keysum2                          3.000000  3.000000 -0.03908775 -0.4363537
## keysum3                          5.000000  4.000000 -0.85976630  0.6689679
## #brokenflows1                    3.000000  3.000000  3.04058895 10.1357474
## #brokenflows2                    4.000000  4.000000  2.02061574  2.9861396
## #brokenflows3                    7.000000  7.000000  4.53711120 21.6820707
##                                         se
## Ration between links and nodes1 0.03673219
## Ration between links and nodes2 0.06504161
## Ration between links and nodes3 0.03762489
## nodes1                          0.29893558
## nodes2                          0.29171390
## nodes3                          0.34889826
## links1                          0.44704944
## links2                          0.34272484
## links3                          0.38379096
## keysum1                         0.20039805
## keysum2                         0.12002401
## keysum3                         0.14324377
## #brokenflows1                   0.10392325
## #brokenflows2                   0.17571668
## #brokenflows3                   0.18246813

library(psych)
library(dplyr)
library(bannerCommenter)


banner("ANOVA DV=Ratio, IV=Model Number")

## 
## #################################################################
## ##               ANOVA DV=Ratio, IV=Model Number               ##
## #################################################################

# Prepare data
anovaratio <- na.omit(select(finalset, `Ration between links and nodes`, ModelNum.y)) 

# Convert columns to appropriate types
anovaratio$ModelNum.y <- as.factor(anovaratio$ModelNum.y)
anovaratio$`Ration between links and nodes` <- as.numeric(anovaratio$`Ration between links and nodes`)

anovaratio<- anovaratio%>%
  filter(!is.na(`Ration between links and nodes`) & 
         !is.infinite(`Ration between links and nodes`) & 
         !is.nan(`Ration between links and nodes`))

# Run ANOVA
ratio_anova_result <- aov(`Ration between links and nodes` ~ ModelNum.y, data = anovaratio)

# Print the summary of the ANOVA
summary(ratio_anova_result)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## ModelNum.y    2  1.922  0.9609   11.28 3.51e-05 ***
## Residuals   109  9.282  0.0852                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tukey_result<- TukeyHSD(ratio_anova_result)
tukey_result

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = `Ration between links and nodes` ~ ModelNum.y, data = anovaratio)
## 
## $ModelNum.y
##                                diff         lwr         upr     p adj
## ModelOne-ModelFourteen    0.3302629  0.16444241  0.49608332 0.0000199
## ModelSeven-ModelFourteen  0.1462430 -0.01372113  0.30620705 0.0805481
## ModelSeven-ModelOne      -0.1840199 -0.34150926 -0.02653055 0.0176427

# Extract means and standard deviations
group_means <- aggregate(`Ration between links and nodes` ~ ModelNum.y, data = anovaratio, FUN = mean)
group_sds <- aggregate(`Ration between links and nodes` ~ ModelNum.y, data = anovaratio, FUN = sd)
group_ns <- aggregate(`Ration between links and nodes` ~ ModelNum.y, data = anovaratio, FUN = length)

# Combine into a single data frame
group_stats <- merge(group_means, group_sds, by = "ModelNum.y")
group_stats <- merge(group_stats, group_ns, by = "ModelNum.y")
names(group_stats) <- c("Group", "Mean", "SD", "N")

# Calculate pooled standard deviation and Cohen's d for each pairwise comparison
pairwise_comparisons <- tukey_result$`ModelNum.y`
comparison_results <- data.frame(Comparison = rownames(pairwise_comparisons), 
                                 MeanDiff = pairwise_comparisons[, "diff"], 
                                 pValue = pairwise_comparisons[, "p adj"])

# Function to calculate Cohen's d
calculate_cohen_d <- function(mean1, mean2, sd1, sd2, n1, n2) {
  sd_pooled <- sqrt(((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2) / (n1 + n2 - 2))
  return((mean1 - mean2) / sd_pooled)
}

# Calculate Cohen's d for each pairwise comparison
for (i in 1:nrow(comparison_results)) {
  groups <- strsplit(comparison_results$Comparison[i], "-")[[1]]
  group1 <- groups[1]
  group2 <- groups[2]
  
  mean1 <- group_stats$Mean[group_stats$Group == group1]
  mean2 <- group_stats$Mean[group_stats$Group == group2]
  sd1 <- group_stats$SD[group_stats$Group == group1]
  sd2 <- group_stats$SD[group_stats$Group == group2]
  n1 <- group_stats$N[group_stats$Group == group1]
  n2 <- group_stats$N[group_stats$Group == group2]
  
  cohen_d <- calculate_cohen_d(mean1, mean2, sd1, sd2, n1, n2)
  comparison_results$Cohen_d[i] <- cohen_d
}

# Print Cohen's d results
print(comparison_results)

##                                        Comparison   MeanDiff       pValue
## ModelOne-ModelFourteen     ModelOne-ModelFourteen  0.3302629 1.987428e-05
## ModelSeven-ModelFourteen ModelSeven-ModelFourteen  0.1462430 8.054807e-02
## ModelSeven-ModelOne           ModelSeven-ModelOne -0.1840199 1.764269e-02
##                             Cohen_d
## ModelOne-ModelFourteen    1.0410066
## ModelSeven-ModelFourteen  0.6328567
## ModelSeven-ModelOne      -0.5755936

##############################

banner("ANOVA DV=Nodes, IV=Model Number")

## 
## #################################################################
## ##               ANOVA DV=Nodes, IV=Model Number               ##
## #################################################################

# Prepare data
anovanode <- na.omit(select(finalset, nodes, ModelNum.y)) 

# Convert columns to appropriate types
anovanode$ModelNum.y <- as.factor(anovanode$ModelNum.y)


# Run ANOVA
node_anova_result <- aov(nodes ~ ModelNum.y, data = anovanode)

# Print the summary of the ANOVA
summary(node_anova_result)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## ModelNum.y    2  152.4   76.19   19.91 4.26e-08 ***
## Residuals   109  417.1    3.83                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tukey_result<- TukeyHSD(node_anova_result)
tukey_result

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = nodes ~ ModelNum.y, data = anovanode)
## 
## $ModelNum.y
##                                diff        lwr        upr     p adj
## ModelOne-ModelFourteen   -2.8692810 -3.9808666 -1.7576955 0.0000000
## ModelSeven-ModelFourteen -0.9089636 -1.9812907  0.1633635 0.1136225
## ModelSeven-ModelOne       1.9603175  0.9045799  3.0160551 0.0000713

# Extract means and standard deviations
group_means <- aggregate(nodes ~ ModelNum.y, data = anovanode, FUN = mean)
group_sds <- aggregate(nodes ~ ModelNum.y, data = anovanode, FUN = sd)
group_ns <- aggregate(nodes ~ ModelNum.y, data = anovanode, FUN = length)

# Combine into a single data frame
group_stats <- merge(group_means, group_sds, by = "ModelNum.y")
group_stats <- merge(group_stats, group_ns, by = "ModelNum.y")
names(group_stats) <- c("Group", "Mean", "SD", "N")

# Calculate pooled standard deviation and Cohen's d for each pairwise comparison
pairwise_comparisons <- tukey_result$`ModelNum.y`
comparison_results <- data.frame(Comparison = rownames(pairwise_comparisons), 
                                 MeanDiff = pairwise_comparisons[, "diff"], 
                                 pValue = pairwise_comparisons[, "p adj"])

# Function to calculate Cohen's d
calculate_cohen_d <- function(mean1, mean2, sd1, sd2, n1, n2) {
  sd_pooled <- sqrt(((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2) / (n1 + n2 - 2))
  return((mean1 - mean2) / sd_pooled)
}

# Calculate Cohen's d for each pairwise comparison
for (i in 1:nrow(comparison_results)) {
  groups <- strsplit(comparison_results$Comparison[i], "-")[[1]]
  group1 <- groups[1]
  group2 <- groups[2]
  
  mean1 <- group_stats$Mean[group_stats$Group == group1]
  mean2 <- group_stats$Mean[group_stats$Group == group2]
  sd1 <- group_stats$SD[group_stats$Group == group1]
  sd2 <- group_stats$SD[group_stats$Group == group2]
  n1 <- group_stats$N[group_stats$Group == group1]
  n2 <- group_stats$N[group_stats$Group == group2]
  
  cohen_d <- calculate_cohen_d(mean1, mean2, sd1, sd2, n1, n2)
  comparison_results$Cohen_d[i] <- cohen_d
}

# Print Cohen's d results
print(comparison_results)

##                                        Comparison   MeanDiff       pValue
## ModelOne-ModelFourteen     ModelOne-ModelFourteen -2.8692810 4.221266e-08
## ModelSeven-ModelFourteen ModelSeven-ModelFourteen -0.9089636 1.136225e-01
## ModelSeven-ModelOne           ModelSeven-ModelOne  1.9603175 7.132409e-05
##                             Cohen_d
## ModelOne-ModelFourteen   -1.6426013
## ModelSeven-ModelFourteen -0.4441840
## ModelSeven-ModelOne       0.9600909

##################################
banner("ANOVA DV=Links, IV=Model Number")

## 
## #################################################################
## ##               ANOVA DV=Links, IV=Model Number               ##
## #################################################################

# Prepare data
anovalinks <- na.omit(select(finalset, links, ModelNum.y)) 

# Convert columns to appropriate types
anovalinks$ModelNum.y <- as.factor(anovalinks$ModelNum.y)


# Run ANOVA
link_anova_result <- aov(links ~ ModelNum.y, data = anovalinks)

# Print the summary of the ANOVA
summary(link_anova_result)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## ModelNum.y    2  318.1  159.06    27.7 1.87e-10 ***
## Residuals   109  625.9    5.74                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tukey_result<- TukeyHSD(link_anova_result)
tukey_result

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = links ~ ModelNum.y, data = anovalinks)
## 
## $ModelNum.y
##                               diff       lwr       upr     p adj
## ModelOne-ModelFourteen   -4.254902 -5.616549 -2.893255 0.0000000
## ModelSeven-ModelFourteen -1.945378 -3.258935 -0.631821 0.0018188
## ModelSeven-ModelOne       2.309524  1.016288  3.602759 0.0001365

# Extract means and standard deviations
group_means <- aggregate(links ~ ModelNum.y, data = anovalinks, FUN = mean)
group_sds <- aggregate(links ~ ModelNum.y, data = anovalinks, FUN = sd)
group_ns <- aggregate(links ~ ModelNum.y, data = anovalinks, FUN = length)

# Combine into a single data frame
group_stats <- merge(group_means, group_sds, by = "ModelNum.y")
group_stats <- merge(group_stats, group_ns, by = "ModelNum.y")
names(group_stats) <- c("Group", "Mean", "SD", "N")

# Calculate pooled standard deviation and Cohen's d for each pairwise comparison
pairwise_comparisons <- tukey_result$`ModelNum.y`
comparison_results <- data.frame(Comparison = rownames(pairwise_comparisons), 
                                 MeanDiff = pairwise_comparisons[, "diff"], 
                                 pValue = pairwise_comparisons[, "p adj"])

# Function to calculate Cohen's d
calculate_cohen_d <- function(mean1, mean2, sd1, sd2, n1, n2) {
  sd_pooled <- sqrt(((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2) / (n1 + n2 - 2))
  return((mean1 - mean2) / sd_pooled)
}

# Calculate Cohen's d for each pairwise comparison
for (i in 1:nrow(comparison_results)) {
  groups <- strsplit(comparison_results$Comparison[i], "-")[[1]]
  group1 <- groups[1]
  group2 <- groups[2]
  
  mean1 <- group_stats$Mean[group_stats$Group == group1]
  mean2 <- group_stats$Mean[group_stats$Group == group2]
  sd1 <- group_stats$SD[group_stats$Group == group1]
  sd2 <- group_stats$SD[group_stats$Group == group2]
  n1 <- group_stats$N[group_stats$Group == group1]
  n2 <- group_stats$N[group_stats$Group == group2]
  
  cohen_d <- calculate_cohen_d(mean1, mean2, sd1, sd2, n1, n2)
  comparison_results$Cohen_d[i] <- cohen_d
}

# Print Cohen's d results
print(comparison_results)

##                                        Comparison  MeanDiff       pValue
## ModelOne-ModelFourteen     ModelOne-ModelFourteen -4.254902 7.957901e-11
## ModelSeven-ModelFourteen ModelSeven-ModelFourteen -1.945378 1.818796e-03
## ModelSeven-ModelOne           ModelSeven-ModelOne  2.309524 1.365423e-04
##                             Cohen_d
## ModelOne-ModelFourteen   -1.8185918
## ModelSeven-ModelFourteen -0.7655284
## ModelSeven-ModelOne       1.0046371

##################################
banner("ANOVA DV=keysum, IV=Model Number")

## 
## ##################################################################
## ##               ANOVA DV=keysum, IV=Model Number               ##
## ##################################################################

# Prepare data
anovakeysum <- na.omit(select(finalset, keysum, ModelNum.y)) 

# Convert columns to appropriate types
anovakeysum$ModelNum.y <- as.factor(anovakeysum$ModelNum.y)


# Run ANOVA
keysum_anova_result <- aov(keysum ~ ModelNum.y, data = anovakeysum)

# Print the summary of the ANOVA
summary(keysum_anova_result)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## ModelNum.y    2 117.94   58.97    65.3 <2e-16 ***
## Residuals   108  97.54    0.90                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tukey_result<- TukeyHSD(keysum_anova_result)
tukey_result

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = keysum ~ ModelNum.y, data = anovakeysum)
## 
## $ModelNum.y
##                                diff       lwr        upr    p adj
## ModelOne-ModelFourteen   -2.5798319 -3.123645 -2.0360189 0.00e+00
## ModelSeven-ModelFourteen -0.9607843 -1.481789 -0.4397801 8.06e-05
## ModelSeven-ModelOne       1.6190476  1.102173  2.1359223 0.00e+00

# Extract means and standard deviations
group_means <- aggregate(keysum ~ ModelNum.y, data = anovakeysum, FUN = mean)
group_sds <- aggregate(keysum ~ ModelNum.y, data = anovakeysum, FUN = sd)
group_ns <- aggregate(keysum ~ ModelNum.y, data = anovakeysum, FUN = length)

# Combine into a single data frame
group_stats <- merge(group_means, group_sds, by = "ModelNum.y")
group_stats <- merge(group_stats, group_ns, by = "ModelNum.y")
names(group_stats) <- c("Group", "Mean", "SD", "N")

# Calculate pooled standard deviation and Cohen's d for each pairwise comparison
pairwise_comparisons <- tukey_result$`ModelNum.y`
comparison_results <- data.frame(Comparison = rownames(pairwise_comparisons), 
                                 MeanDiff = pairwise_comparisons[, "diff"], 
                                 pValue = pairwise_comparisons[, "p adj"])

# Function to calculate Cohen's d
calculate_cohen_d <- function(mean1, mean2, sd1, sd2, n1, n2) {
  sd_pooled <- sqrt(((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2) / (n1 + n2 - 2))
  return((mean1 - mean2) / sd_pooled)
}

# Calculate Cohen's d for each pairwise comparison
for (i in 1:nrow(comparison_results)) {
  groups <- strsplit(comparison_results$Comparison[i], "-")[[1]]
  group1 <- groups[1]
  group2 <- groups[2]
  
  mean1 <- group_stats$Mean[group_stats$Group == group1]
  mean2 <- group_stats$Mean[group_stats$Group == group2]
  sd1 <- group_stats$SD[group_stats$Group == group1]
  sd2 <- group_stats$SD[group_stats$Group == group2]
  n1 <- group_stats$N[group_stats$Group == group1]
  n2 <- group_stats$N[group_stats$Group == group2]
  
  cohen_d <- calculate_cohen_d(mean1, mean2, sd1, sd2, n1, n2)
  comparison_results$Cohen_d[i] <- cohen_d
}

# Print Cohen's d results
print(comparison_results)

##                                        Comparison   MeanDiff       pValue
## ModelOne-ModelFourteen     ModelOne-ModelFourteen -2.5798319 2.375877e-14
## ModelSeven-ModelFourteen ModelSeven-ModelFourteen -0.9607843 8.060455e-05
## ModelSeven-ModelOne           ModelSeven-ModelOne  1.6190476 7.520051e-11
##                             Cohen_d
## ModelOne-ModelFourteen   -2.6774894
## ModelSeven-ModelFourteen -0.9217962
## ModelSeven-ModelOne       1.9355710

##################################
banner("ANOVA DV=brokenflows, IV=Model Number")

## 
## #################################################################
## ##            ANOVA DV=brokenflows, IV=Model Number            ##
## #################################################################

# Prepare data
anovabroken <- na.omit(select(finalset, '#brokenflows', ModelNum.y)) %>%
  rename(brokenflows = '#brokenflows')


# Convert columns to appropriate types
anovabroken$ModelNum.y <- as.factor(anovabroken$ModelNum.y)
anovabroken<- anovabroken%>%
  filter(!is.na(brokenflows) & 
         !is.infinite(brokenflows) & 
         !is.nan(brokenflows))

anovabroken$brokenflows<- as.numeric(anovabroken$brokenflows)

anovabroken<- na.omit(anovabroken)

# Run ANOVA
broken_anova_result <- aov(brokenflows ~ ModelNum.y, data = anovabroken)

# Print the summary of the ANOVA
summary(broken_anova_result)

##              Df Sum Sq Mean Sq F value Pr(>F)
## ModelNum.y    2    1.1  0.5517   0.561  0.572
## Residuals   108  106.2  0.9833

tukey_result<- TukeyHSD(broken_anova_result)
tukey_result

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = brokenflows ~ ModelNum.y, data = anovabroken)
## 
## $ModelNum.y
##                                 diff        lwr       upr     p adj
## ModelOne-ModelFourteen    0.25042017 -0.3170186 0.8178590 0.5479050
## ModelSeven-ModelFourteen  0.09803922 -0.4455998 0.6416783 0.9038173
## ModelSeven-ModelOne      -0.15238095 -0.6917111 0.3869491 0.7805570

# Extract means and standard deviations
group_means <- aggregate(brokenflows ~ ModelNum.y, data = anovabroken, FUN = mean)
group_sds <- aggregate(brokenflows ~ ModelNum.y, data = anovabroken, FUN = sd)
group_ns <- aggregate(brokenflows ~ ModelNum.y, data = anovabroken, FUN = length)

# Combine into a single data frame
group_stats <- merge(group_means, group_sds, by = "ModelNum.y")
group_stats <- merge(group_stats, group_ns, by = "ModelNum.y")
names(group_stats) <- c("Group", "Mean", "SD", "N")

# Calculate pooled standard deviation and Cohen's d for each pairwise comparison
pairwise_comparisons <- tukey_result$`ModelNum.y`
comparison_results <- data.frame(Comparison = rownames(pairwise_comparisons), 
                                 MeanDiff = pairwise_comparisons[, "diff"], 
                                 pValue = pairwise_comparisons[, "p adj"])

# Function to calculate Cohen's d
calculate_cohen_d <- function(mean1, mean2, sd1, sd2, n1, n2) {
  sd_pooled <- sqrt(((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2) / (n1 + n2 - 2))
  return((mean1 - mean2) / sd_pooled)
}

# Calculate Cohen's d for each pairwise comparison
for (i in 1:nrow(comparison_results)) {
  groups <- strsplit(comparison_results$Comparison[i], "-")[[1]]
  group1 <- groups[1]
  group2 <- groups[2]
  
  mean1 <- group_stats$Mean[group_stats$Group == group1]
  mean2 <- group_stats$Mean[group_stats$Group == group2]
  sd1 <- group_stats$SD[group_stats$Group == group1]
  sd2 <- group_stats$SD[group_stats$Group == group2]
  n1 <- group_stats$N[group_stats$Group == group1]
  n2 <- group_stats$N[group_stats$Group == group2]
  
  cohen_d <- calculate_cohen_d(mean1, mean2, sd1, sd2, n1, n2)
  comparison_results$Cohen_d[i] <- cohen_d
}

# Print Cohen's d results
print(comparison_results)

##                                        Comparison    MeanDiff    pValue
## ModelOne-ModelFourteen     ModelOne-ModelFourteen  0.25042017 0.5479050
## ModelSeven-ModelFourteen ModelSeven-ModelFourteen  0.09803922 0.9038173
## ModelSeven-ModelOne           ModelSeven-ModelOne -0.15238095 0.7805570
##                             Cohen_d
## ModelOne-ModelFourteen    0.2932431
## ModelSeven-ModelFourteen  0.1011991
## ModelSeven-ModelOne      -0.1360572

## 
## 
## ANOVA results using Ration between links and nodes as the dependent variable
##  
## 
##    Predictor    SS  df    MS      F    p partial_eta2 CI_90_partial_eta2
##  (Intercept) 33.88   1 33.88 397.90 .000                                
##   ModelNum.y  1.92   2  0.96  11.28 .000          .17         [.07, .27]
##        Error  9.28 109  0.09                                            
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

## 
## 
## ANOVA results using nodes as the dependent variable
##  
## 
##    Predictor      SS  df      MS      F    p partial_eta2 CI_90_partial_eta2
##  (Intercept) 2256.74   1 2256.74 589.74 .000                                
##   ModelNum.y  152.39   2   76.19  19.91 .000          .27         [.15, .36]
##        Error  417.11 109    3.83                                            
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

## 
## 
## ANOVA results using links as the dependent variable
##  
## 
##    Predictor      SS  df      MS      F    p partial_eta2 CI_90_partial_eta2
##  (Intercept) 2507.76   1 2507.76 436.74 .000                                
##   ModelNum.y  318.11   2  159.06  27.70 .000          .34         [.21, .43]
##        Error  625.88 109    5.74                                            
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

## 
## 
## ANOVA results using keysum as the dependent variable
##  
## 
##    Predictor     SS  df     MS      F    p partial_eta2 CI_90_partial_eta2
##  (Intercept) 626.94   1 626.94 694.21 .000                                
##   ModelNum.y 117.94   2  58.97  65.30 .000          .55         [.44, .62]
##        Error  97.54 108   0.90                                            
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

## 
## 
## ANOVA results using brokenflows as the dependent variable
##  
## 
##    Predictor     SS  df   MS    F    p partial_eta2 CI_90_partial_eta2
##  (Intercept)   1.88   1 1.88 1.91 .169                                
##   ModelNum.y   1.10   2 0.55 0.56 .572          .01         [.00, .05]
##        Error 106.19 108 0.98                                          
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

#Model number and feedback with freidman

library(psych)
library(dplyr)
library(bannerCommenter)
library(tidyverse)
library(ggpubr)
library(rstatix)
library(dplyr)


# Convert to numeric and factor
finalset$Feedback <- as.numeric(finalset$Feedback)
finalset$ModelNum.y <- as.factor(finalset$ModelNum.y)

# Select and rename columns
fb <- finalset %>%
  select(ID, ModelNum.y, Feedback) %>%
  rename(Subject = ID)

# Filter complete cases
fb <- fb[complete.cases(fb), ]

# Ensure each subject has all model feedbacks
required_models <- levels(fb$ModelNum.y)

# Group by Subject and filter for complete data
fb <- fb %>%
  group_by(Subject) %>%
  filter(all(required_models %in% ModelNum.y)) %>%
  ungroup()  # Ungroup to avoid grouping issues

# Run the Friedman test
res.fried <- fb %>%
  friedman_test(Feedback ~ ModelNum.y | Subject)

# Print the result of the Friedman test
print(res.fried)

## # A tibble: 1 × 6
##   .y.          n statistic    df        p method       
## * <chr>    <int>     <dbl> <dbl>    <dbl> <chr>        
## 1 Feedback    27      16.9     2 0.000213 Friedman test

# Calculate effect sizes
effsize <- fb %>%
  friedman_effsize(Feedback ~ ModelNum.y | Subject)

# Perform post-hoc pairwise comparisons using Wilcoxon signed-rank test
pwc <- fb %>%
  wilcox_test(Feedback ~ ModelNum.y, paired = TRUE, p.adjust.method = "bonferroni")

pwc

## # A tibble: 3 × 9
##   .y.      group1        group2      n1    n2 statistic     p p.adj p.adj.signif
## * <chr>    <chr>         <chr>    <int> <int>     <dbl> <dbl> <dbl> <chr>       
## 1 Feedback ModelFourteen ModelOne    27    27      93.5 0.009 0.026 *           
## 2 Feedback ModelFourteen ModelSe…    27    27      73   0.006 0.017 *           
## 3 Feedback ModelOne      ModelSe…    27    27       2.5 0.424 1     ns

# Perform post-hoc pairwise comparisons using Sign test
pwc2 <- fb %>%
  sign_test(Feedback ~ ModelNum.y, p.adjust.method = "bonferroni")

pwc2

## # A tibble: 3 × 10
##   .y.      group1    group2    n1    n2 statistic    df     p p.adj p.adj.signif
## * <chr>    <chr>     <chr>  <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
## 1 Feedback ModelFou… Model…    27    27        13    14 0.002 0.005 **          
## 2 Feedback ModelFou… Model…    27    27        11    12 0.006 0.019 *           
## 3 Feedback ModelOne  Model…    27    27         1     4 0.625 1     ns

# Add xy positions for p-values for plotting
pwc <- pwc %>% add_xy_position(x = "ModelNum.y")

fb$ModelNum.y <- factor(fb$ModelNum.y, levels = c("ModelOne", "ModelSeven", "ModelFourteen"))

# Plot the results
ggboxplot(fb, x = "ModelNum.y", y = "Feedback", add = "point") +
  stat_pvalue_manual(pwc, hide.ns = TRUE) +
  labs(
    subtitle = get_test_label(res.fried, detailed = TRUE),
    caption = get_pwc_label(pwc)
  )

#Model number and Modelbehavior with freidman

library(psych)
library(dplyr)
library(bannerCommenter)
library(tidyverse)
library(ggpubr)
library(rstatix)
library(dplyr)


# Convert to numeric and factor
finalset$Modelbehavior <- as.numeric(finalset$Modelbehavior)
finalset$ModelNum.y <- as.factor(finalset$ModelNum.y)

# Select and rename columns
fb <- finalset %>%
  select(ID, ModelNum.y, Modelbehavior) %>%
  rename(Subject = ID)

# Filter complete cases
fb <- fb[complete.cases(fb), ]

# Ensure each subject has all model Modelbehaviors
required_models <- levels(fb$ModelNum.y)

# Group by Subject and filter for complete data
fb <- fb %>%
  group_by(Subject) %>%
  filter(all(required_models %in% ModelNum.y)) %>%
  ungroup()  # Ungroup to avoid grouping issues



# Run the Friedman test
res.fried <- fb %>%
  friedman_test(Modelbehavior ~ ModelNum.y | Subject)

# Print the result of the Friedman test
print(res.fried)

## # A tibble: 1 × 6
##   .y.               n statistic    df         p method       
## * <chr>         <int>     <dbl> <dbl>     <dbl> <chr>        
## 1 Modelbehavior    27      21.7     2 0.0000198 Friedman test

# Calculate effect sizes
effsize <- fb %>%
  friedman_effsize(Modelbehavior ~ ModelNum.y | Subject)

# Perform post-hoc pairwise comparisons using Wilcoxon signed-rank test
pwc <- fb %>%
  wilcox_test(Modelbehavior ~ ModelNum.y, paired = TRUE, p.adjust.method = "bonferroni")
pwc

## # A tibble: 3 × 9
##   .y.           group1   group2    n1    n2 statistic       p p.adj p.adj.signif
## * <chr>         <chr>    <chr>  <int> <int>     <dbl>   <dbl> <dbl> <chr>       
## 1 Modelbehavior ModelFo… Model…    27    27      166. 3.88e-4 0.001 **          
## 2 Modelbehavior ModelFo… Model…    27    27       96  6   e-3 0.017 *           
## 3 Modelbehavior ModelOne Model…    27    27        0  8   e-3 0.025 *

# Perform post-hoc pairwise comparisons using Sign test
pwc2 <- fb %>%
  sign_test(Modelbehavior ~ ModelNum.y, p.adjust.method = "bonferroni")
pwc2

## # A tibble: 3 × 10
##   .y.     group1 group2    n1    n2 statistic    df       p   p.adj p.adj.signif
## * <chr>   <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl> <chr>       
## 1 Modelb… Model… Model…    27    27        17    18 1.45e-4 4.35e-4 ***         
## 2 Modelb… Model… Model…    27    27        12    14 1.3 e-2 3.9 e-2 *           
## 3 Modelb… Model… Model…    27    27         0     8 8   e-3 2.3 e-2 *

# Add xy positions for p-values for plotting
pwc <- pwc %>% add_xy_position(x = "ModelNum.y")

fb$ModelNum.y <- factor(fb$ModelNum.y, levels = c("ModelOne", "ModelSeven", "ModelFourteen"))

# Plot the results
ggboxplot(fb, x = "ModelNum.y", y = "Modelbehavior", add = "point") +
  stat_pvalue_manual(pwc, hide.ns = TRUE) +
  labs(
    subtitle = get_test_label(res.fried, detailed = TRUE),
    caption = get_pwc_label(pwc)
  )

#Model number and Boundariesofthesystem with freidman

library(psych)
library(dplyr)
library(bannerCommenter)
library(tidyverse)
library(ggpubr)
library(rstatix)
library(dplyr)


# Convert to numeric and factor
finalset$Boundariesofthesystem <- as.numeric(finalset$Boundariesofthesystem)
finalset$ModelNum.y <- as.factor(finalset$ModelNum.y)

# Select and rename columns
fb <- finalset %>%
  select(ID, ModelNum.y, Boundariesofthesystem) %>%
  rename(Subject = ID)

# Filter complete cases
fb <- fb[complete.cases(fb), ]

# Ensure each subject has all model Boundariesofthesystems
required_models <- levels(fb$ModelNum.y)

# Group by Subject and filter for complete data
fb <- fb %>%
  group_by(Subject) %>%
  filter(all(required_models %in% ModelNum.y)) %>%
  ungroup()  # Ungroup to avoid grouping issues



# Run the Friedman test
res.fried <- fb %>%
  friedman_test(Boundariesofthesystem ~ ModelNum.y | Subject)

# Print the result of the Friedman test
print(res.fried)

## # A tibble: 1 × 6
##   .y.                       n statistic    df      p method       
## * <chr>                 <int>     <dbl> <dbl>  <dbl> <chr>        
## 1 Boundariesofthesystem    27      7.73     2 0.0209 Friedman test

# Calculate effect sizes
effsize <- fb %>%
  friedman_effsize(Boundariesofthesystem ~ ModelNum.y | Subject)

# Perform post-hoc pairwise comparisons using Wilcoxon signed-rank test
pwc <- fb %>%
  wilcox_test(Boundariesofthesystem ~ ModelNum.y, paired = TRUE, p.adjust.method = "bonferroni")
pwc

## # A tibble: 3 × 9
##   .y.               group1 group2    n1    n2 statistic     p p.adj p.adj.signif
## * <chr>             <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
## 1 Boundariesofthes… Model… Model…    27    27      122. 0.004 0.013 *           
## 2 Boundariesofthes… Model… Model…    27    27       69  0.09  0.269 ns          
## 3 Boundariesofthes… Model… Model…    27    27       36  0.051 0.152 ns

# Perform post-hoc pairwise comparisons using Sign test
pwc2 <- fb %>%
  sign_test(Boundariesofthesystem ~ ModelNum.y, p.adjust.method = "bonferroni")
pwc2

## # A tibble: 3 × 10
##   .y.         group1 group2    n1    n2 statistic    df     p p.adj p.adj.signif
## * <chr>       <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
## 1 Boundaries… Model… Model…    27    27        13    16 0.021 0.064 ns          
## 2 Boundaries… Model… Model…    27    27         9    13 0.267 0.801 ns          
## 3 Boundaries… Model… Model…    27    27         5    17 0.143 0.429 ns

# Add xy positions for p-values for plotting
pwc <- pwc %>% add_xy_position(x = "ModelNum.y")

fb$ModelNum.y <- factor(fb$ModelNum.y, levels = c("ModelOne", "ModelSeven", "ModelFourteen"))

# Plot the results
ggboxplot(fb, x = "ModelNum.y", y = "Boundariesofthesystem", add = "point") +
  stat_pvalue_manual(pwc, hide.ns = TRUE) +
  labs(
    subtitle = get_test_label(res.fried, detailed = TRUE),
    caption = get_pwc_label(pwc)
  )

Model number and Feedback Structure with freidman

library(psych)
library(dplyr)
library(bannerCommenter)
library(tidyverse)
library(ggpubr)
library(rstatix)
library(dplyr)

finalset<- finalset%>%
  rename(feedbackstructure = 'feedback structure')

# Convert to numeric and factor
finalset$feedbackstructure <- as.numeric(finalset$feedbackstructure)
finalset$ModelNum.y <- as.factor(finalset$ModelNum.y)

# Select and rename columns
fb <- finalset %>%
  select(ID, ModelNum.y, feedbackstructure) %>%
  rename(Subject = ID)

# Filter complete cases
fb <- fb[complete.cases(fb), ]

# Ensure each subject has all model feedbackstructures
required_models <- levels(fb$ModelNum.y)

# Group by Subject and filter for complete data
fb <- fb %>%
  group_by(Subject) %>%
  filter(all(required_models %in% ModelNum.y)) %>%
  ungroup()  # Ungroup to avoid grouping issues



# Run the Friedman test
res.fried <- fb %>%
  friedman_test(feedbackstructure ~ ModelNum.y | Subject)

# Print the result of the Friedman test
print(res.fried)

## # A tibble: 1 × 6
##   .y.                   n statistic    df           p method       
## * <chr>             <int>     <dbl> <dbl>       <dbl> <chr>        
## 1 feedbackstructure    27      30.5     2 0.000000242 Friedman test

# Calculate effect sizes
effsize <- fb %>%
  friedman_effsize(feedbackstructure ~ ModelNum.y | Subject)

# Perform post-hoc pairwise comparisons using Wilcoxon signed-rank test
pwc <- fb %>%
  wilcox_test(feedbackstructure ~ ModelNum.y, paired = TRUE, p.adjust.method = "bonferroni")
pwc

## # A tibble: 3 × 9
##   .y.           group1 group2    n1    n2 statistic       p   p.adj p.adj.signif
## * <chr>         <chr>  <chr>  <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
## 1 feedbackstru… Model… Model…    27    27       153 4.20e-5 1.26e-4 ***         
## 2 feedbackstru… Model… Model…    27    27       120 1.23e-4 3.69e-4 ***         
## 3 feedbackstru… Model… Model…    27    27         0 3.46e-1 1   e+0 ns

# Perform post-hoc pairwise comparisons using Sign test
pwc2 <- fb %>%
  sign_test(feedbackstructure ~ ModelNum.y, p.adjust.method = "bonferroni")
pwc2

## # A tibble: 3 × 10
##   .y.     group1 group2    n1    n2 statistic    df       p   p.adj p.adj.signif
## * <chr>   <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl> <chr>       
## 1 feedba… Model… Model…    27    27        17    17 1.53e-5 4.59e-5 ****        
## 2 feedba… Model… Model…    27    27        15    15 6.1 e-5 1.83e-4 ***         
## 3 feedba… Model… Model…    27    27         0     2 5   e-1 1   e+0 ns

# Add xy positions for p-values for plotting
pwc <- pwc %>% add_xy_position(x = "ModelNum.y")

fb$ModelNum.y <- factor(fb$ModelNum.y, levels = c("ModelOne", "ModelSeven", "ModelFourteen"))

# Plot the results
ggboxplot(fb, x = "ModelNum.y", y = "feedbackstructure", add = "point") +
  stat_pvalue_manual(pwc, hide.ns = TRUE) +
  labs(
    subtitle = get_test_label(res.fried, detailed = TRUE),
    caption = get_pwc_label(pwc)
  )

Emil_Project

Mark Perkins, Ph.D.

2024-07-05