Supervisor: Dr. Lorena Gianotti
Affiliation: Department of Social Neuroscience and Social Psychology, Institute of Psychology, University of Bern
This script performs the R analyses of the master’s thesis “Temporal Dynamics of Resting-State EEG Microstates and Prosociality: Gender-Specific Associations”. The data files required for this are: dataMS with the temporal microstate parameters calculated using the MICROSTATELAB toolbox in Matlab; dataBehavior, which contains the contributions of the participants in the behavioral economic games (PGG-1.2, PGG-2, PGG-3.2, and DG); dataCycle, which contains the cycle phases of female participants with a regular menstrual cycles and without hormonal contraception; EEG data that were processed using a Fast Fourier Transform (FFT) for the power analysis. All data sets contain a variable with the ID of the subjects for matching the data sets.
All necessary packages and the data are loaded. The FFT data used to create the plot depicting the power in the frequency bands will be loaded later.
# Load Packages
library(tidyverse)
library(xlsx)
library(psych)
library(car)
library(papaja)
library(apa7)
library(ggtext)
library(patchwork)
# Load data with microstate parameters
dataMS <- read_csv(file.path(params$pathMS, params$fileMS))
# Load behavioral data with the values from the economic games, as well as demographic information
dataBehavior <- read.xlsx(file.path(params$pathBehavior, params$fileBehavior), sheetIndex = 1)
# Load data with the cycle phases for women who do not use hormonal contraception and have a regular cycle
dataCycle <- read_csv(file.path(params$pathBehavior, params$fileCycle))
The data is prepared and then combined into one dataset that contains all variables.
# Prepare dataMS
dataMS <- dataMS %>%
# Change name of Dataset to ID so that it is the same as in dataBehavior
rename("ID" = "Dataset") %>%
# Delete "_rest" after the ID-number so that it is the same as in dataBehavior
mutate(ID = str_replace(ID, "_rest", "")) %>%
# Replace "->" with "_" in all column names
rename_with(~ gsub("->", "_", .x)) %>%
# Delete empty columns from the dataset
dplyr::select(-Subject, -Group, -Condition) %>%
# Rename duration- and occurrence-variables
rename_with(~ sub("^MeanDuration", "Duration", .x), .cols = starts_with("MeanDuration")) %>%
rename_with(~ sub("^MeanOccurrence", "Occurrence", .x), .cols = starts_with("MeanOccurrence")) %>%
# Transform the values of the duration-variables from seconds to milliseconds by multiplying them by 1000
mutate(across(c(Duration_A, Duration_B, Duration_C, Duration_D),
~ . * 1000)) %>%
# Transform the values of the explained variances to percentages by multiplying them by 100
mutate(across(c(TotalExpVar, IndExpVar_A, IndExpVar_B,
IndExpVar_C, IndExpVar_D),
~ . * 100))
# Change ID to a character so that it can be merged with the other datasets
dataBehavior <- dataBehavior %>%
mutate (ID = as.character(ID),
sex_1male = factor(sex_1male, levels = c("1", "2")))
# Change ID to a character so that it can be merged with the other datasets
dataCycle <- dataCycle %>%
mutate (ID = as.character(ID))
# Merge dataMS with dataBehavior by ID to one dataset
data <- dataBehavior %>%
full_join(dataMS, by = "ID")
# Merge dataCycle by ID to the other datasets so that it contains all variables
data <- data %>%
full_join(dataCycle, by = "ID")
The data is first z-transformed to prepare it for analysis. Then a parallel analysis is performed to determine the appropriate number of factors.
# Create a new dataset for the PFA
dataIndex <- data %>%
# Select only the needed variables for the index
dplyr::select(PGG1, PGG2, PGG3, DG) %>%
# z-transform the variables to prepare them for the factor analysis
mutate(across(everything(), ~as.numeric(scale(.))))
# Parallel analysis to determine the number of factors
fa.parallel(dataIndex, fm = "pa", plot = TRUE)
## Parallel analysis suggests that the number of factors = 1 and the number of components = 1
As the parallel analysis has determined one factor to be appropriate, the scores for this factor are calculated for each subject and then added to the dataset as a new variable “Index”.
# Principal Axis Factoring
faFinal <- fa(dataIndex, nfactors = 1, fm = "pa", rotate = "none")
# Calculate factor scores per subject and add it to data
factorScores <- faFinal$scores
data <- cbind(data, factorScores)
data <- data %>%
rename(Index = PA1)
Several model fit indices and factor loadings are calculated to ensure that the model has a good fit. In addition, the range of the index is calculated (other descriptive statistics for the index are calculated under “descriptive statistics”).
# RMSEA
print(paste("RMSEA:", apa_num(faFinal$RMSEA["RMSEA"], digits = 2)))
## [1] "RMSEA: 0.00"
# TLI
print(paste("TLI:", apa_num(faFinal$TLI, digits = 2)))
## [1] "TLI: 1.07"
# Proportion of explained variance by the factor
print(paste("Proportion of explained variance:", apa_num(faFinal$Vaccounted["Proportion Var", 1] * 100, digits = 1)))
## [1] "Proportion of explained variance: 38.1"
# Factor loadings
print(paste("Loading PGG1:", apa_num(faFinal$loadings[1, 1], digits = 2)))
## [1] "Loading PGG1: 0.49"
print(paste("Loading PGG2:", apa_num(faFinal$loadings[2, 1], digits = 2)))
## [1] "Loading PGG2: 0.91"
print(paste("Loading PGG3:", apa_num(faFinal$loadings[3, 1], digits = 2)))
## [1] "Loading PGG3: 0.60"
print(paste("Loading DG:", apa_num(faFinal$loadings[4, 1], digits = 2)))
## [1] "Loading DG: 0.31"
# Range of Index (other descriptive statistics are calculated and displayed in tbl-descriptive)
print(paste("Range:", apa_num(min(data$Index), digits = 2), " - ", apa_num(max(data$Index), digits = 2)))
## [1] "Range: -1.78 - 1.88"
The histogram of the index is created. Women and men are shown separately.
bw <- 0.25 # Histogram bin width
bar_frac <- 0.8 # Fraction of bin width occupied by each bar
# Histogram of 'Index' by gender
plotIndex <- ggplot(data = data, aes(x = Index, fill = sex_1male)) +
geom_histogram(
aes(width = after_stat(width) * bar_frac), # Adjust bar width
binwidth = bw,
color = "black",
boundary = 0,
position = position_dodge2(
width = bw * bar_frac, # Align with bar width
preserve = "single",
padding = 0
)
) +
scale_fill_manual( # Custom colors and labels
values = c("1" = "#88C999", "2" = "#E6AA80"),
labels = c("1" = "Men", "2" = "Women")
) +
scale_x_continuous( # X-axis limits and ticks
limits = c(-2, 2),
breaks = seq(-2, 2, by = 1),
minor_breaks = NULL
) +
labs(x = "Prosociality", y = "Frequency", fill = "Gender") +
ylim(c(0, 22)) + # Y-axis limit
theme_minimal() +
theme( # Aesthetic adjustments
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(5, "pt"),
axis.ticks = element_line(color = "black", size = 0.75),
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
axis.line.x = element_line(color = "black", size = 1),
axis.line.y = element_line(color = "black", size = 1),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
legend.text = element_text(size = 14),
legend.title = element_text(size = 14, face = "bold"),
legend.position = c(0.98, 0.98),
legend.justification = c("right", "top"),
legend.background = element_rect(fill = "white", color = "grey85"))
print(plotIndex)
Sample size and age statistics for the sample are calculated.
# Sample size
print(paste("N of the total sample:", apa_num(nrow(data), digits = 0)))
## [1] "N of the total sample: 133"
print(paste("N women:", apa_num(sum(data$sex_1male == 2), digits = 0)))
## [1] "N women: 102"
print(paste("N men:", apa_num(sum(data$sex_1male == 1), digits = 0)))
## [1] "N men: 31"
# Sample age
print(paste("Mean age:", apa_num(mean(data$age), digits = 1)))
## [1] "Mean age: 21.1"
print(paste("SD age:", apa_num(sd(data$age), digits = 1)))
## [1] "SD age: 3.0"
print(paste("Minimum age:", apa_num(min(data$age), digits = 0)))
## [1] "Minimum age: 18"
print(paste("Maximum age:", apa_num(max(data$age), digits = 0)))
## [1] "Maximum age: 46"
Descriptive statistical values for EEG data time available for microstate analysis and the total variance explained by the microstates are calculated here.
# Available EEG data time
print(paste("Mean of available EEG data time in s:", apa_num(mean(data$TotalTime), digits = 1)))
## [1] "Mean of available EEG data time in s: 153.9"
print(paste("SD of available EEG data time in s:", apa_num(sd(data$TotalTime), digits = 1)))
## [1] "SD of available EEG data time in s: 30.0"
# Total explained variance
print(paste("Mean of total explained variance in %:", apa_num(mean(data$TotalExpVar), digits = 1)))
## [1] "Mean of total explained variance in %: 71.8"
print(paste("SD of total explained variance in %:", apa_num(sd(data$TotalExpVar), digits = 1)))
## [1] "SD of total explained variance in %: 7.6"
print(paste("Minimum of total explained variance in %:", apa_num(min(data$TotalExpVar), digits = 1)))
## [1] "Minimum of total explained variance in %: 45.5"
print(paste("Maximum of total explained variance in %:", apa_num(max(data$TotalExpVar), digits = 1)))
## [1] "Maximum of total explained variance in %: 85.9"
A APA7-formatted table is calculated to display the descriptive statistics of all variables. Per variable, the mean and standard deviation are calculated, and the Shapiro-Wilk test for normal distribution is performed on the entire sample. In addition, the mean and standard deviation are calculated separately for women and men, and a Welch’s t-test is used to test whether there is a mean difference between the genders for this variable.
# Create a new dataset with the variables for the descriptive statistics
dataSummary <- data %>%
dplyr::select(sex_1male, Index,
Coverage_A, Coverage_B, Coverage_C, Coverage_D,
Duration_A, Duration_B, Duration_C, Duration_D,
Occurrence_A, Occurrence_B, Occurrence_C, Occurrence_D)
# Create a new dataframe for the summary table
summaryTable <- data.frame(Variable = character(),
Total_M = numeric(), Total_SD = numeric(), Shapiro_p = character(),
Men_M = numeric(), Men_SD = numeric(),
Women_M = numeric(), Women_SD = numeric(),
Genderdifference_p = numeric(),
stringsAsFactors = FALSE)
# Loop to calculate the descriptive statistics for all variables
for (var in setdiff(names(dataSummary), "sex_1male")) {
# Calculate mean and sd for each variable
meanValue <- round(mean(dataSummary[[var]], na.rm = TRUE), 1)
sdValue <- round(sd(dataSummary[[var]], na.rm = TRUE), 1)
# Shapiro-Wilk-Test to test for normal distribution
shapiro_p <- shapiro.test(na.omit(dataSummary[[var]]))$p.value
# Extract values for males and females
maleValues <- na.omit(dataSummary[[var]][dataSummary$sex_1male == 1])
femaleValues <- na.omit(dataSummary[[var]][dataSummary$sex_1male == 2])
# Calculate means for males and females separately
meanMale <- round(mean(maleValues, na.rm = TRUE), 1)
meanFemale <- round(mean(femaleValues, na.rm = TRUE), 1)
# Calculate standard deviations for males and females separately
sdMale <- round(sd(maleValues, na.rm = TRUE), 1)
sdFemale <- round(sd(femaleValues, na.rm = TRUE), 1)
# Welch-t-Test to test if variables differ between males and females
genderdifference_p <- t.test(maleValues, femaleValues, var.equal = FALSE)$p.value
# Append results to the summary table
summaryTable <- rbind(summaryTable, data.frame(
Variable = var, Total_M = meanValue, Total_SD = sdValue, Shapiro_p = shapiro_p,
Men_M = meanMale,
Men_SD = sdMale,
Women_M = meanFemale,
Women_SD = sdFemale,
Genderdifference_p = genderdifference_p,
stringsAsFactors = FALSE))
}
# Reformat and prepare descriptive statistics table for APA-style output
summaryTable_clean <- summaryTable %>%
# Rename test columns for clearer APA-style labels
rename(`Shapiro-Wilk test_p` = Shapiro_p,
`Welch's t-test_p` = Genderdifference_p) %>%
mutate(
# Replace underscores with spaces and capitalize the following letter
# e.g., "Duration_A" → "Duration A"
Variable = sub("^(.*)_([A-Za-z])$", "\\1 \\U\\2", Variable, perl = TRUE),
# Format p-values according to APA guidelines (e.g., p = .001 → "p < .001")
`Shapiro-Wilk test_p` = apa7::apa_p(`Shapiro-Wilk test_p`),
`Welch's t-test_p` = apa7::apa_p(`Welch's t-test_p`))
# Identify rows where p < .05 for bold highlighting in the table
rows_shapiro <- which(as.numeric(summaryTable[["Shapiro_p"]]) < 0.05)
rows_welch <- which(as.numeric(summaryTable[["Genderdifference_p"]]) < 0.05)
# Get column indices for the test columns (used for formatting later)
j_shapiro <- match("Shapiro-Wilk test_p", names(summaryTable_clean))
j_welch <- match("Welch's t-test_p", names(summaryTable_clean))
# Create APA-style flextable without automatic numeric or markdown formatting
tableSum <- apa_flextable(
data = summaryTable_clean,
auto_format_columns = FALSE,
markdown_body = FALSE)
# Apply bold font to rows with significant results (p < .05)
tableSum <- flextable::bold(tableSum, i = rows_shapiro, j = j_shapiro, part = "body")
tableSum <- flextable::bold(tableSum, i = rows_welch, j = j_welch, part = "body")
# Display final formatted APA-style table
tableSum
Variable | Total | Shapiro-Wilk test | Men | Women | Welch’s t-test | |||
|---|---|---|---|---|---|---|---|---|
M | SD | p | M | SD | M | SD | p | |
Index | 0.0 | 0.9 | .002 | 0.0 | 1.3 | 0.0 | 0.8 | .88 |
Coverage A | 22.2 | 5.1 | .68 | 21.1 | 4.8 | 22.5 | 5.2 | .17 |
Coverage B | 21.6 | 5.8 | .68 | 22.7 | 6.5 | 21.2 | 5.6 | .26 |
Coverage C | 28.3 | 6.8 | <.001 | 28.6 | 7.6 | 28.2 | 6.5 | .79 |
Coverage D | 27.9 | 6.6 | <.001 | 27.6 | 6.7 | 28.1 | 6.6 | .73 |
Duration A | 65.7 | 7.7 | <.001 | 65.6 | 7.6 | 65.7 | 7.8 | .95 |
Duration B | 64.9 | 8.6 | <.001 | 67.3 | 11.6 | 64.2 | 7.4 | .17 |
Duration C | 72.9 | 15.8 | <.001 | 74.2 | 16.5 | 72.5 | 15.6 | .61 |
Duration D | 71.7 | 15.2 | <.001 | 71.9 | 14.3 | 71.6 | 15.5 | .92 |
Occurrence A | 3.4 | 0.7 | .62 | 3.2 | 0.7 | 3.4 | 0.8 | .17 |
Occurrence B | 3.3 | 0.8 | .07 | 3.4 | 0.8 | 3.3 | 0.8 | .70 |
Occurrence C | 3.9 | 0.5 | .54 | 3.8 | 0.5 | 3.9 | 0.5 | .63 |
Occurrence D | 3.9 | 0.5 | .08 | 3.8 | 0.6 | 3.9 | 0.5 | .40 |
Only women who do not use hormonal contraception and have a regular cycle are included in the additional analyses on the influence of the cycle phase as a moderator. The sample size for these analyses is calculated and then the same descriptive statistics as before are calculated for this sample.
# Create a new dataset with the variables for the descriptive statistics
dataCycle <- data %>%
dplyr::select(Index, phase,
Coverage_A, Coverage_B, Coverage_C, Coverage_D,
Duration_A, Duration_B, Duration_C, Duration_D,
Occurrence_A, Occurrence_B, Occurrence_C, Occurrence_D) %>%
filter(!is.na(phase))
print(paste("N of the total sample:", apa_num(sum(!is.na(dataCycle$phase)), digits = 1)))
## [1] "N of the total sample: 39"
print(paste("N of the luteal group:", apa_num(sum(dataCycle$phase == "luteal", na.rm = TRUE), digits = 1)))
## [1] "N of the luteal group: 14"
print(paste("N of the follicular group:", apa_num(sum(dataCycle$phase == "follicular", na.rm = TRUE), digits = 1)))
## [1] "N of the follicular group: 25"
#Create a new dataframe for the summary table
cycleTable <- data.frame(Variable = character(),
Mean = numeric(), SD = numeric(),Shapiro_p = character(),
Mean_luteal = numeric(),Mean_follicular = numeric(),
Cycledifference_p = numeric(),
stringsAsFactors = FALSE)
# Loop to calculate the descriptive statistics for all variables
for (var in setdiff(names(dataCycle), "phase")) {
# Calculate mean and sd for each variable
meanValue <- round(mean(dataCycle[[var]], na.rm = TRUE), 3)
sdValue <- round(sd(dataCycle[[var]], na.rm = TRUE), 3)
# Shapiro-Wilk-Test to test for normal distribution
shapiro_p <- shapiro.test(na.omit(dataCycle[[var]]))$p.value
# Extract values for males and females
lutealValues <- na.omit(dataCycle[[var]][dataCycle$phase == "luteal"])
follicularValues <- na.omit(dataCycle[[var]][dataCycle$phase == "follicular"])
# Calculate means and sds for males and females separately
meanLuteal <- apa_num(mean(lutealValues, na.rm = TRUE), digits = 1)
sdLuteal <- apa_num(sd(lutealValues, na.rm = TRUE), digits = 1)
meanFollicular <- apa_num(mean(follicularValues, na.rm = TRUE), digits = 1)
sdFollicular <- apa_num(sd(follicularValues, na.rm = TRUE), digits = 1)
# Welch-t-Test to test if variables differ between males and females
cycledifference_p <- t.test(lutealValues, follicularValues, var.equal = FALSE)$p.value
# Append results to the summary table
cycleTable <- rbind(cycleTable, data.frame(
Variable = var,
Total_M = meanValue, Total_SD = sdValue,
Shapiro_p = shapiro_p,
Luteal_M = meanLuteal,
Luteal_SD = sdLuteal,
Follicular_M = meanFollicular,
Follicular_SD = sdFollicular,
Cycledifference_p = cycledifference_p,
stringsAsFactors = FALSE))
}
# Reformat and prepare descriptive statistics table for APA-style output
cycleTable_clean <- cycleTable %>%
# Rename test columns for clearer APA-style labels
rename(`Shapiro-Wilk test_p` = Shapiro_p,
`Welch's t-test_p` = Cycledifference_p) %>%
mutate(
# Replace underscores with spaces and capitalize the following letter
# e.g., "Duration_A" → "Duration A"
Variable = sub("^(.*)_([A-Za-z])$", "\\1 \\U\\2", Variable, perl = TRUE),
# Format p-values according to APA guidelines (e.g., p = .001 → "p < .001")
`Shapiro-Wilk test_p` = apa7::apa_p(`Shapiro-Wilk test_p`),
`Welch's t-test_p` = apa7::apa_p(`Welch's t-test_p`))
# Identify rows where p < .05 for bold highlighting in the table
rows_shapiro <- which(as.numeric(cycleTable[["Shapiro_p"]]) < 0.05)
rows_welch <- which(as.numeric(cycleTable[["Genderdifference_p"]]) < 0.05)
# Get column indices for the test columns (used for formatting later)
j_shapiro <- match("Shapiro-Wilk test_p", names(cycleTable_clean))
j_welch <- match("Welch's t-test_p", names(cycleTable_clean))
# Create APA-style flextable without automatic numeric or markdown formatting
tableCycle <- apa_flextable(
data = cycleTable_clean,
auto_format_columns = FALSE,
markdown_body = FALSE)
# Apply bold font to rows with significant results (p < .05)
tableCycle <- flextable::bold(tableCycle, i = rows_shapiro, j = j_shapiro, part = "body")
tableCycle <- flextable::bold(tableCycle, i = rows_welch, j = j_welch, part = "body")
# Display final formatted APA-style table
tableCycle
Variable | Total | Shapiro-Wilk test | Luteal | Follicular | Welch’s t-test | |||
|---|---|---|---|---|---|---|---|---|
M | SD | p | M | SD | M | SD | p | |
Index | 0.167 | 0.880 | .14 | 0.4 | 0.9 | 0.0 | 0.9 | .19 |
Coverage A | 21.093 | 6.285 | .99 | 21.8 | 6.8 | 20.7 | 6.1 | .62 |
Coverage B | 20.710 | 5.597 | .15 | 21.3 | 6.8 | 20.4 | 4.9 | .65 |
Coverage C | 28.679 | 7.205 | .07 | 29.1 | 7.5 | 28.5 | 7.2 | .81 |
Coverage D | 29.518 | 7.692 | .10 | 27.8 | 8.8 | 30.5 | 7.0 | .34 |
Duration A | 65.746 | 8.064 | <.001 | 66.0 | 6.3 | 65.6 | 9.0 | .87 |
Duration B | 64.652 | 6.825 | .44 | 64.1 | 7.6 | 64.9 | 6.5 | .74 |
Duration C | 74.281 | 17.063 | <.001 | 74.5 | 18.6 | 74.1 | 16.6 | .95 |
Duration D | 75.345 | 17.480 | <.001 | 72.9 | 21.5 | 76.7 | 15.1 | .57 |
Occurrence A | 3.177 | 0.805 | .52 | 3.3 | 0.9 | 3.1 | 0.8 | .60 |
Occurrence B | 3.184 | 0.735 | .87 | 3.3 | 0.9 | 3.1 | 0.7 | .56 |
Occurrence C | 3.861 | 0.453 | .04 | 3.9 | 0.4 | 3.8 | 0.5 | .60 |
Occurrence D | 3.915 | 0.471 | .33 | 3.8 | 0.5 | 4.0 | 0.4 | .30 |
Correlations between all temporal MS parameters and the index are calculated. Spearman’s rank correlations are calculated since the index is not normally distributed.
# Create a new dataset with the variables for the correlations
dataCorrelationIndex <- data %>%
dplyr::select(Index,
Coverage_A, Coverage_B, Coverage_C, Coverage_D,
Duration_A, Duration_B, Duration_C, Duration_D,
Occurrence_A, Occurrence_B, Occurrence_C, Occurrence_D)
# Create a new dataframe for the correlation table
correlationTableIndex <- tibble(
Variable = character(),
Correlation = numeric(),
p_value = numeric())
# Loop to calculate the correlations for all MS parameters with the Index
for (var in setdiff(names(dataCorrelationIndex), "Index")) {
# Calculate Spearman's rank correlations, since Index is not normally distributed
spearman_test <- cor.test(
dataCorrelationIndex[[var]],
dataCorrelationIndex$Index,
method = "spearman")
# Append results to the correlation table
new_row <- tibble(
Variable = var,
Correlation = unname(spearman_test$estimate),
p_value = spearman_test$p.value)
correlationTableIndex <- bind_rows(correlationTableIndex, new_row)
}
An APA7-formatted table (for the appendix) is created.
# Prepare values for APA7 reporting
correlationTableIndex <- correlationTableIndex %>%
mutate(
Correlation = papaja::apa_num(Correlation, digits = 2, gt1 = FALSE),
p_value = apa7::apa_p(p_value))
# Create APA7-formatted table and display it
correlationTableIndex %>%
mutate(Variable = Variable %>%
str_replace_all("_", " ")) %>%
transmute(`Microstate parameter` = Variable,
r = Correlation,
p = p_value) %>%
apa_flextable()
Microstate parameter | r | p |
|---|---|---|
Coverage A | .03 | .73 |
Coverage B | -.06 | .48 |
Coverage C | .00 | .99 |
Coverage D | -.05 | .60 |
Duration A | -.16 | .07 |
Duration B | -.14 | .12 |
Duration C | -.10 | .28 |
Duration D | -.08 | .35 |
Occurrence A | .06 | .47 |
Occurrence B | .04 | .67 |
Occurrence C | .12 | .18 |
Occurrence D | .09 | .31 |
Multiple linear regressions for all microstate parameters on the domain-general prosociality index with gender as moderator are calculated. Before that, all variables except for the dichotomous gender-variable “sex_1male” are z-standardized.
# Create a new dataset with the variables for the regressions
dataRegressionIndex <- data %>%
dplyr::select(Index, sex_1male,
Coverage_A, Coverage_B, Coverage_C, Coverage_D,
Duration_A, Duration_B, Duration_C, Duration_D,
Occurrence_A, Occurrence_B, Occurrence_C, Occurrence_D)
# Standardize continuous variables (except dichotomous variables sex_1male) for standardized coefficients
dataRegressionIndex <- dataRegressionIndex %>%
mutate(across(-c(sex_1male), scale))
# Create a new dataframe for the regression table
regressionTableIndex <- data.frame(Variable = character(),
Interaction_beta = numeric(),
Interaction_p = numeric(),
Men_beta = numeric(),
Men_p = numeric(),
Women_beta = numeric(),
Women_p = numeric(),
stringsAsFactors = FALSE)
# Create a new dataframe for the regression table appendix
regressionTableAppendix <- data.frame(Variable = character(),
Interaction_beta = numeric(),
Interaction_p = numeric(),
Men_beta = numeric(),
Men_p = numeric(),
Women_beta = numeric(),
Women_p = numeric(),
stringsAsFactors = FALSE)
# Create a list for the models for the plots
models_plot <- list()
# Loop to calculate linear regressions for all MS parameters on Index and gender as moderator
for (var in setdiff(names(dataRegressionIndex), c("Index", "sex_1male"))) {
# Calculate a linear regression model
form <- as.formula(paste("Index ~", var, "* sex_1male"))
modelIndex <- lm(form, data = dataRegressionIndex)
# Extract beta-coefficients from the model
coef_summary <- summary(modelIndex)$coefficients
beta1 <- coef_summary[2, 1]
beta3 <- coef_summary[4, 1]
# Assign coefficients for the effects of MS parameters on Index
effect_men <- beta1
effect_women <- beta1 + beta3
effect_interaction <- beta3
# Extract p-value
effect_men_p <- coef_summary[2, 4]
interaction_p <- coef_summary[4, 4]
# Calculate p-value for women
interaction_term <- paste0(var, ":", "sex_1male2")
hyp_test <- linearHypothesis(modelIndex, paste(var, "+", interaction_term, "= 0"))
effect_women_p <- hyp_test$`Pr(>F)`[2]
# Append results to the table for the appendix
regressionTableAppendix <- rbind(
regressionTableAppendix,
data.frame(
Variable = var,
Interaction_beta = apa_num(effect_interaction, digits = 2),
Interaction_p = interaction_p,
Men_beta = apa_num(effect_men, digits = 2),
Men_p = effect_men_p,
Women_beta = apa_num(effect_women, digits = 2),
Women_p = effect_women_p,
stringsAsFactors = FALSE))
# Append results to the table for the plots
regressionTableIndex <- rbind(
regressionTableIndex,
data.frame(
Variable = var,
Interaction_beta = apa_num(effect_interaction, digits = 2),
Interaction_p = apa_p(interaction_p),
Men_beta = apa_num(effect_men, digits = 2),
Men_p = apa_p(effect_men_p),
Women_beta = apa_num(effect_women, digits = 2),
Women_p = apa_p(effect_women_p),
stringsAsFactors = FALSE))
# Unstandardized model for the plot
modelPlot <- lm(form, data = data)
models_plot[[var]] <- modelPlot
}
An APA7-formatted table (for the appendix) is created.
# Create APA-style table for the appendix
tableReg <- regressionTableAppendix %>%
# Replace underscores in variable names with spaces
# e.g., "Duration_A" → "Duration A"
mutate(Variable = Variable %>% str_replace_all("_", " ")) %>%
# Rename 'Variable' column for clearer labeling in the table
rename(`Microstate parameter` = Variable)
# Identify rows with significant effects (p < .05)
rows_interaction <- which(tableReg[["Interaction_p"]] < 0.05)
rows_men <- which(tableReg[["Men_p"]] < 0.05)
rows_women <- which(tableReg[["Women_p"]] < 0.05)
# Convert the regression table to APA-style formatting
tableReg <- tableReg %>%
apa_flextable()
# Apply bold formatting to significant results in each model
tableReg <- flextable::bold(tableReg, i = rows_interaction, j = "Interaction_*p*", part = "body")
tableReg <- flextable::bold(tableReg, i = rows_men, j = "Men_*p*", part = "body")
tableReg <- flextable::bold(tableReg, i = rows_women, j = "Women_*p*", part = "body")
# Display final formatted regression table for the appendix
tableReg
Microstate parameter | Interaction | Men | Women | |||
|---|---|---|---|---|---|---|
β | p | β | p | β | p | |
Coverage A | -0.57 | .009 | 0.46 | .02 | -0.11 | .27 |
Coverage B | 0.32 | .10 | -0.24 | .14 | 0.07 | .48 |
Coverage C | -0.35 | .07 | 0.27 | .10 | -0.08 | .46 |
Coverage D | 0.49 | .02 | -0.39 | .03 | 0.10 | .32 |
Duration A | -0.25 | .23 | 0.11 | .57 | -0.15 | .14 |
Duration B | 0.28 | .11 | -0.31 | .02 | -0.02 | .83 |
Duration C | -0.23 | .25 | 0.16 | .36 | -0.07 | .47 |
Duration D | 0.50 | .02 | -0.43 | .03 | 0.07 | .49 |
Occurrence A | -0.48 | .03 | 0.43 | .03 | -0.05 | .59 |
Occurrence B | 0.15 | .45 | -0.07 | .70 | 0.09 | .40 |
Occurrence C | -0.33 | .10 | 0.33 | .06 | 0.00 | .99 |
Occurrence D | 0.19 | .32 | -0.12 | .48 | 0.08 | .46 |
Figures are created to illustrate the results for the significant effects.
# Helper to create labels
make_label <- function(title, beta, p, total_width = 11) {
title_fixed <- sprintf("%-*s", total_width, title)
paste0("<b>", title_fixed, "</b><br>",
"β = ", beta, "<br>",
"<i>p</i> = ", p)
}
# Function to create and save plot
plot_ms_variable <- function(var, x_label, data, regressionTableIndex, models_plot,
y_label = "Prosociality",
y_min = -2, y_max = 2) {
# X limits based on data
x_min <- min(data[[var]], na.rm = TRUE)
x_max <- max(data[[var]], na.rm = TRUE)
# Extract effect sizes & p-values from table
row_vals <- regressionTableIndex %>% filter(Variable == var) %>% slice(1)
label_interaction <- with(row_vals, make_label("Interaction", Interaction_beta, Interaction_p))
label_men <- with(row_vals, make_label("Men", Men_beta, Men_p))
label_women <- with(row_vals, make_label("Women", Women_beta, Women_p))
# Predictions from stored model
m_plot <- models_plot[[var]]
seq_var <- seq(x_min, x_max, length.out = 100)
newdat <- expand.grid(sex_1male = factor(c(1, 2), levels = c(1, 2)),
var = seq_var)
names(newdat)[names(newdat) == "var"] <- var
pred <- predict(m_plot, newdata = newdat, interval = "confidence")
dataPlotIndex <- cbind(newdat, pred)
# Main scatter + regression plot
p_main <- ggplot(data, aes(x = .data[[var]], y = Index)) +
geom_ribbon(
data = filter(dataPlotIndex, sex_1male == 1),
aes(x = .data[[var]], ymin = lwr, ymax = upr),
fill = "#88C999", alpha = 0.4, inherit.aes = FALSE
) +
geom_ribbon(
data = filter(dataPlotIndex, sex_1male == 2),
aes(x = .data[[var]], ymin = lwr, ymax = upr),
fill = "#E6AA80", alpha = 0.4, inherit.aes = FALSE
) +
geom_point(aes(color = as.factor(sex_1male)),
alpha = 0.6, size = 1, show.legend = FALSE
) +
geom_line(
data = filter(dataPlotIndex, sex_1male == 1),
aes(x = .data[[var]], y = fit),
color = "#0B6623", linewidth = 1
) +
geom_line(
data = filter(dataPlotIndex, sex_1male == 2),
aes(x = .data[[var]], y = fit),
color = "#8B3A00", linewidth = 1
) +
coord_cartesian(xlim = c(x_min, x_max), ylim = c(y_min, y_max)) +
scale_color_manual(values = c("1" = "#0B6623", "2" = "#8B3A00")) +
labs(x = x_label, y = y_label) +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(5, "pt"),
axis.ticks = element_line(color = "black", size = 0.75),
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
axis.line.x = element_line(color = "black", size = 1),
axis.line.y = element_line(color = "black", size = 1),
axis.title.x = element_text(size = 10, face = "bold", margin = margin(t = 3)),
axis.title.y = element_text(size = 10, face = "bold", margin = margin(r = 3)),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
legend.text = element_text(size = 10),
legend.title = element_text(size = 10, face = "bold")
)
# Effect boxes
text_plot <- ggplot() +
geom_textbox(aes(x = 0, y = 4.5, label = label_interaction),
fill = "white", box.color = "black",
size = 3, hjust = 0, width = unit(0.75, "inch"),
halign = 0, family = "Courier") +
geom_textbox(aes(x = 0, y = 2.5, label = label_men),
fill = "#88C999", box.color = "black",
size = 3, hjust = 0, width = unit(0.75, "inch"),
halign = 0, family = "Courier") +
geom_textbox(aes(x = 0, y = 0.5, label = label_women),
fill = "#E6AA80", box.color = "black",
size = 3, hjust = 0, width = unit(0.75, "inch"),
halign = 0, family = "Courier") +
xlim(0, 1) + ylim(0, 5) +
theme_void() + theme(plot.margin = margin(0,0,0,6))
# Combine plot with text
p_final <- (p_main | text_plot) + plot_layout(widths = c(2.5, 1))
return(p_final)
}
plot_CoverageA <- plot_ms_variable("Coverage_A",
x_label = "Coverage A (%)",
data = data,
regressionTableIndex = regressionTableIndex,
models_plot = models_plot)
plot_OccurrenceA <- plot_ms_variable("Occurrence_A",
x_label = "Occurrence A (appearances/s)",
data = data,
regressionTableIndex = regressionTableIndex,
models_plot = models_plot)
plot_CoverageD <- plot_ms_variable("Coverage_D",
x_label = "Coverage D (%)",
data = data,
regressionTableIndex = regressionTableIndex,
models_plot = models_plot)
plot_DurationD <- plot_ms_variable("Duration_D",
x_label = "Duration D (ms)",
data = data,
regressionTableIndex = regressionTableIndex,
models_plot = models_plot)
plot_CoverageA <- wrap_plots(plot_CoverageA, ncol = 1)
plot_OccurrenceA <- wrap_plots(plot_OccurrenceA, ncol = 1)
plot_CoverageD <- wrap_plots(plot_CoverageD, ncol = 1)
plot_DurationD <- wrap_plots(plot_DurationD, ncol = 1)
# Display the plots for coverage and occurrence A side by side
plot_A <- (plot_CoverageA | plot_OccurrenceA)
print(plot_A)
# Display the plots for coverage and duration D side by side
plot_D <- (plot_CoverageD | plot_DurationD)
print(plot_D)
A linear multiple regression with cycle phase as moderator is calculated in the group of women without hormonal contraception and a regular cycle to see whether the hormonal differences in the female menstrual cycle have an influence on the effect of the microstate parameters on the domain-general prosociality index.
# Change "phase" to a factor
dataCycle$phase <- as.factor(dataCycle$phase)
# Define reference-category
dataCycle$phase <- relevel(dataCycle$phase, ref = "luteal")
# Create a new dataframe for the regression table
regressionTableCycle <- data.frame(Variable = character(),
Interaction_beta = numeric(),
Interaction_p = numeric(),
Luteal_beta = numeric(),
Luteal_p = numeric(),
Follicular_beta = numeric(),
Follicular_p = numeric(),
stringsAsFactors = FALSE)
# Loop to calculate linear regressions for all MS parameters on Index and phase as moderator
for (var in setdiff(names(dataCycle), c("Index", "phase"))) {
# Calculate a linear regression model
formula <- as.formula(paste("Index ~", var, "* phase")) # Create formula for the model
modelCycle <- lm(formula, data = dataCycle) # Calculate model
# Extract beta-coefficients from the model
coef_summary <- summary(modelCycle)$coefficients
beta1 <- coef_summary[2, 1]
beta3 <- coef_summary[4, 1]
# Assign coefficients for the effects of MS parameters on Index
effect_luteal <- beta1 # only for luteal
effect_follicular <- beta1 + beta3 # only for follciular
effect_interaction <- beta3 # interaction effect with gender
# Extract p-value
effect_luteal_p <- coef_summary[2, 4] # for luteal
interaction_p <- coef_summary[4, 4] # for the interaction
# Calculate p-value for follicular
interaction_term <- paste0(var,":","phasefollicular") # create term for next step
# Perform a linear hypothesis test to test if the effect is sig. for follicular
hyp_test <- linearHypothesis(modelCycle, paste(var, "+", interaction_term, "= 0"))
effect_follicular_p <- hyp_test$`Pr(>F)`[2] # Extract p-value for follicular
# Append results to the table
regressionTableCycle <- rbind(regressionTableCycle, data.frame(
Variable = var,
Interaction_beta = apa_num(effect_interaction, digits = 2),
Interaction_p = apa_p(interaction_p),
Luteal_beta = apa_num(effect_luteal, digits = 2),
Luteal_p = apa_p(effect_luteal_p),
Follicular_beta = apa_num(effect_follicular, digits = 2),
Follicular_p = apa_p(effect_follicular_p),
stringsAsFactors = FALSE))
}
An APA7-formatted table is created.
# Create and display table for regressions with cycle phase as a moderator
regressionTableCycle %>%
mutate(Variable = Variable %>%
str_replace_all("_", " ")) %>%
rename(`Microstate parameter` = Variable) %>%
apa_flextable()
Microstate parameter | Interaction | Luteal | Follicular | |||
|---|---|---|---|---|---|---|
β | p | β | p | β | p | |
Coverage A | 0.01 | .80 | -0.05 | .16 | -0.04 | .18 |
Coverage B | -0.02 | .69 | 0.05 | .14 | 0.03 | .36 |
Coverage C | -0.01 | .84 | 0.00 | .92 | -0.01 | .65 |
Coverage D | 0.02 | .53 | 0.00 | .98 | 0.02 | .34 |
Duration A | 0.01 | .81 | -0.05 | .19 | -0.04 | .05 |
Duration B | -0.06 | .13 | 0.05 | .10 | -0.01 | .66 |
Duration C | -0.01 | .70 | 0.00 | .92 | -0.01 | .63 |
Duration D | 0.01 | .68 | 0.00 | .73 | 0.01 | .37 |
Occurrence A | 0.15 | .68 | -0.32 | .24 | -0.17 | .47 |
Occurrence B | 0.03 | .94 | 0.29 | .30 | 0.32 | .24 |
Occurrence C | 0.44 | .57 | -0.40 | .55 | 0.03 | .93 |
Occurrence D | 0.34 | .60 | -0.21 | .67 | 0.13 | .75 |
A plot is created showing that the EEG data used to calculate the microstates has more power in the frequencies with an inhibitory function.
# Extract IDs to match against file names
subject_ids <- as.character(data$ID)
# List all .dat files in the target folder
files_all <- list.files(params$pathFFT, pattern = "\\.dat$", full.names = TRUE)
# Extract the first 4 characters of each file name as the subject ID
file_ids <- substr(basename(files_all), 1, 4)
# Keep only files whose ID appears in the dataset
files <- files_all[file_ids %in% subject_ids]
# Define frequency vector (example: 500 bins → 0.5 Hz steps)
n_bins <- 500 # adjust to your data
freqs <- seq(0, by = 0.5, length.out = n_bins)
exclude_freqs <- (freqs < 2 | freqs > 20) # exclude frequencies outside 2–20 Hz
# Prepare containers for per subject matrices and channel labels
subject_list <- list()
channel_labels <- NULL
for (file in files) {
# Read first line as channel names
header_line <- readLines(file, n = 1)
channels <- unlist(strsplit(header_line, "\\s+"))
# Store channel labels once (from the first file)
if (is.null(channel_labels)) {
channel_labels <- channels
}
# Read the remaining lines as a numeric matrix (skip header line)
data <- as.matrix(read.table(file, skip = 1))
# Keep only rows (frequencies) within 2–20 Hz
data_filtered <- data[!exclude_freqs, ]
# Save per-subject filtered matrix
subject_list[[file]] <- data_filtered
}
# Stack all subjects into a 3D array: [freq x channel x subject]
subject_array <- array(
unlist(subject_list),
dim = c(sum(!exclude_freqs), length(channel_labels), length(subject_list))
)
# Mean across subjects → frequency × channel matrix
mean_power <- apply(subject_array, c(1, 2), mean)
# Label rows/columns by frequency and channel
filtered_freqs <- freqs[!exclude_freqs]
colnames(mean_power) <- channel_labels
rownames(mean_power) <- filtered_freqs
# Mean across channels → one spectrum (mean power per frequency)
power_medio <- rowMeans(mean_power)
# Build tidy data for plotting
df_power <- data.frame(
Frequency = filtered_freqs,
Power = power_medio
)
# Define shaded bands for visualization
df_inhib <- subset(df_power, Frequency >= 2 & Frequency <= 12); df_inhib$band <- "Inhibitory"
df_excit <- subset(df_power, Frequency >= 12 & Frequency <= 20); df_excit$band <- "Excitatory"
# Plot mean spectrum with shaded frequency bands
plot_power <- ggplot() +
# Shaded area: inhibitory band (2–12 Hz)
geom_area(data = df_inhib, aes(x = Frequency, y = Power, fill = band), alpha = 0.6) +
# Shaded area: excitatory band (12–20 Hz)
geom_area(data = df_excit, aes(x = Frequency, y = Power, fill = band), alpha = 0.3) +
# Mean spectrum line (across channels and subjects)
geom_line(data = df_power, aes(x = Frequency, y = Power), color = "black", size = 1) +
# Axis labels and legend title
labs(
x = "Frequency (Hz)",
y = "Power (\u00B5V\u00B2)",
fill = "Frequency Band"
) +
# Manual fill and legend order
scale_fill_manual(
values = c("Inhibitory" = "grey80", "Excitatory" = "grey95"),
breaks = c("Inhibitory", "Excitatory")
) +
guides(fill = guide_legend(override.aes = list(alpha = 0.5))) +
# X-axis breaks/limits
scale_x_continuous(breaks = seq(2, 20, by = 2), limits = c(2, 20)) +
# Minimal theme and APA-like cosmetic tweaks
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(5, "pt"),
axis.ticks = element_line(color = "black", size = 0.75),
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
axis.line.x = element_line(color = "black", size = 1),
axis.line.y = element_line(color = "black", size = 1),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
legend.text = element_text(size = 14),
legend.title = element_text(size = 14, face = "bold"),
legend.position = c(0.98, 0.98),
legend.justification = c("right", "top"),
legend.background = element_rect(fill = "white", color = "grey85"))
# Display the plot
print(plot_power)
The relative power in frequency bands with an inhibitory or excitatory function is calculated.
# Total power (sum across all frequencies)
total_power <- sum(df_power$Power, na.rm = TRUE)
# Power in inhibitory band (2–12 Hz)
inhib_power <- sum(subset(df_power, Frequency >= 2 & Frequency <= 12)$Power, na.rm = TRUE)
# Power in excitatory band (12–20 Hz)
excit_power <- sum(subset(df_power, Frequency > 12 & Frequency <= 20)$Power, na.rm = TRUE)
# Convert to percentages of total power
inhib_percent <- 100 * inhib_power / total_power
excit_percent <- 100 * excit_power / total_power
# Display results
cat("Inhibitory band (2–12 Hz):", apa_num(inhib_percent, digits = 1), "% of total power\n")
## Inhibitory band (2–12 Hz): 86.7 % of total power
cat("Excitatory band (12–20 Hz):", apa_num(excit_percent, digits = 1), "% of total power\n")
## Excitatory band (12–20 Hz): 13.3 % of total power