#LIBRARIES, NOT ALL OF THESE ARE NEEDED BUT JUST OT BE SURE

##Run this chunk to receive a file containing all trials # CHANGE SUBJECT, BLOCK NUMBER AND FILE NAME AT END OF CHUNK #FILE WILL BE SAVED IN WORKING DIRECTORY

#set your wd to folder containing block trials
##read all excel filed from folder
setwd("C:/Users/johan/Documents/UTwente/M12_Thesis/DataAnalysis/Scripts/EEG_recordings/participant8/participant8_beta/block6/")

excel_files <- list.files(pattern = "\\.xlsx$")

data_frames <- list()

Participant <- 8

Block <- 6

file_name <- paste0("PB", Participant, "_", Block, ".xlsx")

# Loop through each Excel file and import it into R
for (file in excel_files) {
  # Read the Excel file into a data frame
  data <- read_excel(file)
  
  # Store the data frame in the list
  data_frames[[file]] <- data
}

##select only channels c3. c4 and cz
select_columns <- function(df) {
  df <- df[, c('C3', 'C4', 'Cz')]
  return(df)
}

# Apply the function to each data frame in the list
data_frames <- lapply(data_frames, select_columns)


##remove the first 101 rows of all datasets because that is before the 27 marker (200ms) and save in a seperate list
remove_first_rows <- function(df) {
  first_rows <- df[1:101, ]
  df <- df[-(1:101), ]
  return(list(first_rows = first_rows, modified_df = df))
}

# Apply the function to each data frame in the list
baseline <- lapply(data_frames, function(df) remove_first_rows(df)$first_rows)
modified_data <- lapply(data_frames, function(df) remove_first_rows(df)$modified_df)

#remove last 100ms of all datasets which is 51 rows
remove_last_rows <- function(df) {
  df <- df[1:(nrow(df) - 51), ]
  return(df)
}

# Apply the function to each data frame in the modified_data list
final_modified_data <- lapply(modified_data, remove_last_rows)

# Define a function to downsize the dataset
downsize_dataset <- function(df) {
  # Calculate the number of rows in the dataset
  num_rows <- nrow(df)
  
  # Define the number of bins (always use 300 bins)
  num_bins <- 300
  
  # Calculate the number of rows per bin
  rows_per_bin <- floor(num_rows / num_bins)
  
  # Calculate the number of excess rows
  excess_rows <- num_rows %% num_bins
  
  # Calculate the bin width
  bin_width <- rep(rows_per_bin, num_bins)
  
  # Distribute excess rows evenly among bins
  if (excess_rows > 0) {
    bin_width[1:excess_rows] <- bin_width[1:excess_rows] + 1
  }
  
  # Create bin indices
  bin_indices <- rep(1:num_bins, times = bin_width)
  
  # Calculate the mean of the values within each bin for each column separately
  df_binned <- df %>%
    group_by(bin = bin_indices) %>%
    summarise(mean_C3 = mean(C3),
              mean_C4 = mean(C4),
              mean_Cz = mean(Cz), .groups = "drop")
  
  return(df_binned)
}

# Apply the function to each data frame in the final_modified_data list
downsized_data <- lapply(final_modified_data, downsize_dataset)

##get mean for baseline in each dataset
baseline_mean = function(df)
  colMeans(df, na.rm = TRUE)

mean_baseline <- lapply(baseline, baseline_mean)

##make step dataset for each trial

calculate_averages <- function(df) {
  
# Add a new variable called "step" indicating the group number (each group consists of 50 bins)
  df <- df %>%
    mutate(step = ceiling(bin / 50))
  
  # Calculate the average for each group of 50 bins
  averaged_df <- df %>%
    group_by(step) %>%
    summarise(C3 = mean(mean_C3),
              C4 = mean(mean_C4),
              Cz = mean(mean_Cz), .groups = "drop")
  
  return(averaged_df)
}

# Apply the function to each dataframe in final_modified_data list
averaged_trials <- lapply(downsized_data, calculate_averages)

##add baseline value to each step dataset (per trial)

# Define a function to add the Step column with value 0 to each dataset in baseline_list
add_step_column <- function(df) {
  # Add the Step column with value 0
  df$step <- 0
  return(df)
}

# Apply the function to each dataset in the baseline_list
mean_baseline <- lapply(mean_baseline, add_step_column)
## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list

## Warning in df$step <- 0: Coercing LHS to a list
for (i in seq_along(mean_baseline)) {
  # Add the observation to the corresponding dataset in list2
  averaged_trials[[i]] <- rbind(mean_baseline[[i]], averaged_trials[[i]])
}

##change step column to chracter so the baseline calculation is not iterated over it 
##convert time to numeric 

convert_step_to_character <- function(df) {
  df$step <- as.character(df$step)
  return(df)
}

# Apply the function to each data frame in the list
averaged_trials <- lapply(averaged_trials, convert_step_to_character)

save_file = averaged_trials


##apply ERD/S formula to the datasets

# Iterate over each dataset in the list
for (i in seq_along(averaged_trials)) {
  # Get the current dataset
  df <- averaged_trials[[i]]
  
  # Get the baseline values from row 1
  baseline <- df[1, ]
  
  # Iterate over each column
  for (col_name in colnames(df)) {
    # Skip the column if it's not numeric or if it's the first column (which is the baseline)
    if (!is.numeric(df[[col_name]])) {
      next
    }
    
    # Apply the formula to calculate ERDS
    df[[col_name]] <- (df[[col_name]] - baseline[[col_name]]) / baseline[[col_name]] * 100
  }
  
  # Assign the updated dataset back to the list
  averaged_trials[[i]] <- df
}

##change step to numeric again

convert_step_to_numeric <- function(df) {
  df$step <- as.numeric(df$step)
  return(df)
}
# Apply the function to each data frame in the list
averaged_trials <- lapply(averaged_trials, convert_step_to_numeric)


##add participant number, block number, trial number
##participant and block number needs to be changed every time
for (i in seq_along(averaged_trials)) {
  # Add a new column "subject" with the value 5 to the current dataset
  averaged_trials[[i]]$subject <- Participant
}

for (i in seq_along(averaged_trials)) {
  # Add a new column "subject" with the value 5 to the current dataset
  averaged_trials[[i]]$block <- Block
}
for (i in seq_along(averaged_trials)) {
  # Add a new column "trial" with the corresponding trial number to the current dataset
  averaged_trials[[i]]$trial <- i
}

#combine all datasets and remove baseline
df2 <- do.call(rbind, averaged_trials)
df2 = df2 %>% filter(step != 0)
df2 = df2 %>%
  rownames_to_column() %>%
  select(-rowname)

# save this file
library(openxlsx)
write.xlsx(df2, file_name)

##RUN THIS CHUNK TO BIND ALL THE FILES ##ALL FILES HAVE TO BE IN ONE FOLDER ##SET NEW WD

setwd("C:/Users/Alex/Desktop/eeg data/files_to_bind")

excel_files <- list.files(pattern = "\\.xlsx$")

files_to_bind <- list()

for (file in excel_files) {
  # Read the Excel file into a data frame
  data <- read_excel(file)
  
  # Store the data frame in the list
  files_to_bind[[file]] <- data
}
#bind the dataframes into one
eeg_data_v.01 <- do.call(rbind, files_to_bind)

#remove row names
eeg_data_v.01 = eeg_data_v.01 %>%
  rownames_to_column() %>%
  select(-rowname)

#save this as excel
write.xlsx(eeg_data_v.01, "eeg_data_1.xlsx")

###THIS CHUNK IS FOR GRAND MEAN, IT GIVES ONE DATASET PER BLOCK

###combine all datasets into one mean 

combined_data <- do.call(rbind, averaged_trials)

# Calculate the mean for each bin across all datasets
mean_across_datasets <- combined_data %>%
  group_by(step) %>%
  summarise(C3 = mean(C3),
            C4 = mean(C4),
            Cz = mean(Cz), .groups = "drop")


# Plot the data for all channels
ggplot(mean_across_datasets, aes(x = step)) +
  geom_line(aes(y = C3, color = "C3")) +
  geom_line(aes(y = C4, color = "C4")) +
  geom_line(aes(y = Cz, color = "Cz")) +
  labs(x = "Steps (inlcuding Baseline)", y = "Relative increase in Theta Frequency (in %)", color = "Electrode values") +
  scale_color_manual(values = c("C3" = "blue", "C4" = "red", "Cz" = "green")) +
  theme_minimal()

##save the dataset

library(openxlsx)

write.xlsx(mean_across_datasets, "P5_1_means.xlsx")