library(readxl)
library(knitr)
library(dplyr)
library(ggplot2)
library(HKprocess)
library(parallel)
library(openxlsx)
setwd("C:/Users/Marcel/Desktop/Bachelor Thesis/Data Analysis/Relevant R Scripts/Movement Period Data (27 - Last-Step +65)/Hurst Exponents")
fractal_df <-read_excel("movementperiod_go_until_last_step+65_merged.xlsx")
fractal_df$Block <- factor(fractal_df$Block, levels = as.character(1:10))
fractal_df$Subject <- as.factor(fractal_df$Subject)
fractal_df$Trial <- factor(fractal_df$Trial, levels = as.character(1:48))
fractal_df$Sequence <- as.factor(fractal_df$Sequence)
str(fractal_df)
# Calculate the minimum trial length
trial_lengths <- fractal_df %>% group_by(Subject, Block, Trial) %>% summarise(Trial_Length = n())
min_length <- min(trial_lengths$Trial_Length)
# Filter each trial to the minimum length
fractal_df <- fractal_df %>% group_by(Subject, Block, Trial) %>% slice(1:min_length)
# Check trial lengths again
trial_lengths <- fractal_df %>% group_by(Subject, Block, Trial) %>% summarise(Trial_Length = n())
# Splitting the dataset into the learning blocks and the 9th and 10th block seperately to seperate the computation load into more manageable chunks
Sequence1 <- fractal_df %>%
filter(Sequence == 1)
Sequence2 <- fractal_df %>%
filter(Sequence == 2)
# Function to calculate Hurst exponent for each time series
calculate_hurst_exponent_hk <- function(time_series, sample_size = 50) {
set.seed(12345) # For reproducibility
hurst_samples <- inferH(time_series, n = sample_size)
hurst_estimate <- median(hurst_samples)
return(hurst_estimate)
}
# Define the columns you want to iterate over
columns_to_analyze <- c("COM_acceleration_X", "COM_acceleration_Y", "COM_acceleration_Z")
# Parallel Processing Setup
num_cores <- detectCores() - 1 # Use one less core than available
cl <- makeCluster(num_cores)
# Load Necessary Libraries on Worker Nodes
clusterEvalQ(cl, {
library(dplyr)
library(HKprocess)
})
# Export Necessary Variables and Functions to the Cluster
clusterExport(cl, varlist = c("Sequence1", "Sequence2", "calculate_hurst_exponent_hk", "columns_to_analyze"))
# Function to Calculate Hurst Exponents in Parallel for Each Group
parallel_hurst_calculation <- function(sequence, time_series_column) {
# Split the Data into Groups Manually
grouped_data <- sequence %>% group_by(Subject, Block, Trial) %>% group_split()
results <- parLapply(cl, grouped_data, function(group) {
hurst_value <- calculate_hurst_exponent_hk(group[[time_series_column]])
result <- data.frame(Subject = unique(group$Subject),
Block = unique(group$Block),
Trial = unique(group$Trial),
Hurst = hurst_value)
return(result)
})
return(do.call(rbind, results))
}
# Perform Parallel Computation
results_parallel <- parallel_hurst_calculation(Sequence2, columns_to_analyze[1])
write.xlsx(results_parallel, file = "Hurst_Exponents_Sequence2_COM_acceleration_X.xlsx")
results_parallel2 <- parallel_hurst_calculation(Sequence2, columns_to_analyze[2])
write.xlsx(results_parallel2, file = "Hurst_Exponents_Sequence2_COM_acceleration_Y.xlsx")
results_parallel3 <- parallel_hurst_calculation(Sequence2, columns_to_analyze[3])
write.xlsx(results_parallel3, file = "Hurst_Exponents_Sequence2_COM_acceleration_Z.xlsx")
results_parallel4 <- parallel_hurst_calculation(Sequence1, columns_to_analyze[4])
write.xlsx(results_parallel4, file = "Hurst_Exponents_Sequence1_COM_acceleration_X.xlsx")
results_parallel5 <- parallel_hurst_calculation(Sequence1, columns_to_analyze[5])
write.xlsx(results_parallel5, file = "Hurst_Exponents_Sequence1_COM_acceleration_Y.xlsx")
results_parallel6 <- parallel_hurst_calculation(Sequence1, columns_to_analyze[6])
write.xlsx(results_parallel6, file = "Hurst_Exponents_Sequence1_COM_acceleration_Z.xlsx")
# Stop the Cluster
stopCluster(cl)