Loading Required Libraries

# Load necessary libraries
library(tidyverse)
library(forecast)
library(reshape2)
library(viridis)
library(gridExtra)
library(parallel)
library(doParallel)

# Set up parallel processing for faster execution
num_cores <- min(detectCores() - 1, 2)  # Limit to 2 cores to avoid overloading
cl <- makeCluster(num_cores)
registerDoParallel(cl)

Creating a Folder for Results

dir.create("eeg_results", showWarnings = FALSE)

Loading Data

# Read EEG data
eeg_data <- read.csv("features_raw.csv", header = TRUE)

# Select a subset of channels for balanced performance
all_channels <- names(eeg_data)
# Select 10 channels or all if less than 10
sample_channels <- sample(all_channels, min(10, length(all_channels)))

# Display basic info about dataset
cat("Total number of channels:", ncol(eeg_data), "\n")
## Total number of channels: 33
cat("Number of time points:", nrow(eeg_data), "\n")
## Number of time points: 8064
cat("Selected channels for analysis:", length(sample_channels), "\n")
## Selected channels for analysis: 10
cat("Selected channels:", paste(sample_channels, collapse = ", "), "\n")
## Selected channels: Fp1, O1, F4, CP1, C3, Oz, FC2, Fp2, Pz, PO3

1. Creating Plots for Each Channel

# Function to create and save individual channel plots
plot_channel <- function(channel_name) {
  # Extract channel data
  channel_data <- eeg_data[[channel_name]]
  time_points <- 1:length(channel_data)
  
  # Calculate mean value
  mean_value <- mean(channel_data, na.rm = TRUE)
  
  # Create plot
  p <- ggplot() +
    geom_line(aes(x = time_points, y = channel_data), color = "blue") +
    geom_hline(yintercept = mean_value, color = "red", linetype = "dashed") +
    annotate("text", x = max(time_points) * 0.8, y = mean_value * 1.1,
             label = paste("Mean:", round(mean_value, 2)), color = "red") +
    theme_minimal() +
    labs(title = paste("EEG Signal of Channel", channel_name),
         x = "Time (points)",
         y = "Amplitude (μV)")
  
  # Save the plot
  ggsave(paste0("eeg_results/", channel_name, "_signal.png"), p, width = 10, height = 6)
  
  return(p)
}

# Create plots for each selected channel
channel_plots <- list()
for (channel in sample_channels) {
  cat("Plotting channel:", channel, "\n")
  p <- plot_channel(channel)
  channel_plots[[channel]] <- p
  print(p)
}
## Plotting channel: Fp1
## Plotting channel: O1

## Plotting channel: F4

## Plotting channel: CP1

## Plotting channel: C3

## Plotting channel: Oz

## Plotting channel: FC2

## Plotting channel: Fp2

## Plotting channel: Pz

## Plotting channel: PO3

Creating a Combined Plot with All Channels

# Convert data to long format for ggplot
eeg_long <- eeg_data[, sample_channels] %>%
  mutate(time_point = row_number()) %>%
  pivot_longer(cols = -time_point,
               names_to = "channel",
               values_to = "amplitude")

# Create combined plot
all_channels_plot <- ggplot(eeg_long, aes(x = time_point, y = amplitude, color = channel)) +
  geom_line(alpha = 0.7) +
  scale_color_viridis(discrete = TRUE) +
  theme_minimal() +
  theme(legend.position = "right") +
  labs(title = "EEG Signals of All Selected Channels",
       x = "Time (points)",
       y = "Amplitude (μV)")

# Display and save plot
print(all_channels_plot)

ggsave("eeg_results/all_channels.png", all_channels_plot, width = 12, height = 8)

Creating Correlation Matrix

# Calculate correlation matrix
cor_matrix <- cor(eeg_data[, sample_channels], use = "pairwise.complete.obs")
write.csv(cor_matrix, "eeg_results/correlation_matrix.csv")

# Create heatmap visualization
cor_melted <- melt(cor_matrix)
corr_heatmap <- ggplot(cor_melted, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        axis.text.y = element_text(size = 8)) +
  labs(title = "Correlation Heatmap Between EEG Channels",
       x = "", y = "", fill = "Correlation")

print(corr_heatmap)

ggsave("eeg_results/correlation_heatmap.png", corr_heatmap, width = 12, height = 10)

4. Time Series Decomposition

# Minimum points needed for proper decomposition
min_points_needed <- 16
num_points <- nrow(eeg_data)

# Function to create decomposition plots
decompose_and_plot <- function(channel_name, frequency = 8) {
  cat("Decomposing channel:", channel_name, "\n")
  
  if (num_points < min_points_needed) {
    # Classical decomposition for shorter series
    ts_data <- ts(eeg_data[[channel_name]], frequency = frequency)
    decomp <- decompose(ts_data, type = "additive")
    
    # Extract components
    observed <- as.numeric(decomp$x)
    trend <- as.numeric(decomp$trend)
    seasonal <- as.numeric(decomp$seasonal)
    remainder <- as.numeric(decomp$random)
    
    # Prepare data for plotting
    decomp_df <- data.frame(
      time = 1:length(observed),
      observed = observed,
      trend = trend,
      seasonal = seasonal,
      remainder = remainder,
      trend_seasonal = trend + seasonal
    )
    
    # Remove NA values
    decomp_df <- na.omit(decomp_df)
    
    # Format for plotting
    decomp_long <- pivot_longer(decomp_df,
                               cols = c(observed, trend, seasonal, remainder, trend_seasonal),
                               names_to = "component",
                               values_to = "value")
    
    # Create component breakdown plot
    p1 <- ggplot(filter(decomp_long, component %in% c("observed", "trend", "seasonal", "remainder")),
                aes(x = time, y = value)) +
      geom_line(aes(color = component)) +
      facet_wrap(~ component, scales = "free_y", ncol = 1) +
      theme_minimal() +
      labs(title = paste("Decomposition of EEG Signal for Channel", channel_name),
          x = "Time (points)", y = "Amplitude (μV)") +
      theme(legend.position = "none")
    
    # Create comparison plot
    p2 <- ggplot(filter(decomp_long, component %in% c("observed", "trend_seasonal")),
                aes(x = time, y = value, color = component)) +
      geom_line() +
      scale_color_manual(values = c("observed" = "blue", "trend_seasonal" = "red"),
                        labels = c("observed" = "Original Signal", "trend_seasonal" = "Trend + Seasonality")) +
      theme_minimal() +
      labs(title = paste("Signal Comparison for Channel", channel_name),
          x = "Time (points)", y = "Amplitude (μV)")
    
    # Combine plots
    combined_plot <- grid.arrange(p1, p2, ncol = 1, heights = c(3, 1))
    
    # Save plot
    ggsave(paste0("eeg_results/", channel_name, "_decomposition.png"), combined_plot, width = 10, height = 8)
    
    return(combined_plot)
  } else {
    # STL decomposition for longer series
    ts_data <- ts(eeg_data[[channel_name]], frequency = frequency)
    stl_result <- stl(ts_data, s.window = "periodic")
    
    # Prepare data for plotting
    decomp_df <- data.frame(
      time = 1:length(ts_data),
      observed = as.numeric(ts_data),
      trend = as.numeric(stl_result$time.series[, "trend"]),
      seasonal = as.numeric(stl_result$time.series[, "seasonal"]),
      remainder = as.numeric(stl_result$time.series[, "remainder"]),
      trend_seasonal = as.numeric(stl_result$time.series[, "trend"]) +
                      as.numeric(stl_result$time.series[, "seasonal"])
    )
    
    # Format for plotting
    decomp_long <- pivot_longer(decomp_df,
                               cols = c(observed, trend, seasonal, remainder, trend_seasonal),
                               names_to = "component",
                               values_to = "value")
    
    # Create component breakdown plot
    p1 <- ggplot(filter(decomp_long, component %in% c("observed", "trend", "seasonal", "remainder")),
                aes(x = time, y = value)) +
      geom_line(aes(color = component)) +
      facet_wrap(~ component, scales = "free_y", ncol = 1) +
      theme_minimal() +
      labs(title = paste("Decomposition of EEG Signal for Channel", channel_name),
          x = "Time (points)", y = "Amplitude (μV)") +
      theme(legend.position = "none")
    
    # Create comparison plot
    p2 <- ggplot(filter(decomp_long, component %in% c("observed", "trend_seasonal")),
                aes(x = time, y = value, color = component)) +
      geom_line() +
      scale_color_manual(values = c("observed" = "blue", "trend_seasonal" = "red"),
                        labels = c("observed" = "Original Signal", "trend_seasonal" = "Trend + Seasonality")) +
      theme_minimal() +
      labs(title = paste("Signal Comparison for Channel", channel_name),
          x = "Time (points)", y = "Amplitude (μV)")
    
    # Combine plots
    combined_plot <- grid.arrange(p1, p2, ncol = 1, heights = c(3, 1))
    
    # Save plot
    ggsave(paste0("eeg_results/", channel_name, "_decomposition.png"), combined_plot, width = 10, height = 8)
    
    return(combined_plot)
  }
}

# Process channels in parallel for better performance
decomposition_plots <- foreach(channel = sample_channels, .packages = c("tidyverse", "forecast", "gridExtra")) %dopar% {
  result <- try(decompose_and_plot(channel))
  if (!inherits(result, "try-error")) {
    result
  } else {
    NULL
  }
}

# Name the results list with channel names
names(decomposition_plots) <- sample_channels

# Display the plots
for (channel in sample_channels) {
  if (!is.null(decomposition_plots[[channel]])) {
    print(decomposition_plots[[channel]])
  }
}
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]

Time Series Forecast

# Function to forecast time series
forecast_channel <- function(channel_name, frequency = 8) {
  cat("Forecasting channel:", channel_name, "\n")
  
  # Get data
  data <- eeg_data[[channel_name]]
  n <- length(data)
  
  # Calculate train-test split (80% train, 20% test)
  train_size <- floor(0.8 * n)
  test_size <- n - train_size
  
  # Split data
  train_data <- data[1:train_size]
  test_data <- data[(train_size+1):n]
  
  # Convert to time series
  train_ts <- ts(train_data, frequency = frequency)
  
  # Fit ARIMA model (with faster settings)
  arima_model <- auto.arima(train_ts, 
                         seasonal = TRUE,
                         stepwise = TRUE, 
                         approximation = TRUE,
                         max.p = 3,  # Limit model complexity
                         max.q = 3,
                         max.P = 1, 
                         max.Q = 1)
  
  # Generate forecast
  arima_forecast <- forecast(arima_model, h = test_size)
  
  # Create forecast plot
  forecast_plot <- autoplot(arima_forecast) +
    autolayer(ts(c(train_data, test_data), frequency = frequency), 
              series = "Actual", color = "black") +
    theme_minimal() +
    labs(title = paste("Forecast for Channel", channel_name),
         x = "Time", 
         y = "Amplitude (μV)")
  
  # Save plot
  ggsave(paste0("eeg_results/", channel_name, "_forecast.png"), forecast_plot, width = 12, height = 8)
  
  # Calculate accuracy
  forecast_accuracy <- accuracy(arima_forecast$mean, test_data)
  
  return(list(
    plot = forecast_plot,
    model = arima_model,
    accuracy = forecast_accuracy
  ))
}

# Process a subset of channels to save time (just 5 for forecasting)
forecast_channels <- sample(sample_channels, min(5, length(sample_channels)))

# Forecast each channel
forecast_results <- list()
forecast_accuracy <- data.frame()

for (channel in forecast_channels) {
  result <- try(forecast_channel(channel))
  if (!inherits(result, "try-error") && !is.null(result)) {
    forecast_results[[channel]] <- result
    print(result$plot)
    
    # Add accuracy to dataframe - check which metrics are available
    # Some metrics might not be available depending on the data
    accuracy_df <- as.data.frame(result$accuracy)
    channel_accuracy <- data.frame(
      Channel = channel,
      RMSE = accuracy_df$RMSE[1],
      MAE = accuracy_df$MAE[1]
    )
    
    # Add additional metrics if they exist
    if ("MAPE" %in% colnames(accuracy_df)) {
      channel_accuracy$MAPE <- accuracy_df$MAPE[1]
    }
    if ("MASE" %in% colnames(accuracy_df)) {
      channel_accuracy$MASE <- accuracy_df$MASE[1]
    }
    forecast_accuracy <- rbind(forecast_accuracy, channel_accuracy)
  }
}
## Forecasting channel: FC2
## Forecasting channel: Fp1

## Forecasting channel: O1

## Forecasting channel: CP1

## Forecasting channel: C3

# Save forecast accuracy metrics
write.csv(forecast_accuracy, "eeg_results/forecast_accuracy.csv", row.names = FALSE)

Summary of Analysis

# Display forecast accuracy table
knitr::kable(forecast_accuracy, 
             caption = "Forecast Accuracy Metrics by Channel",
             digits = 4,
             align = "lrrrr")
Forecast Accuracy Metrics by Channel
Channel RMSE MAE MAPE
FC2 48.8374 33.8903 103.5980
Fp1 4.2923 3.3779 146.9306
O1 6.6982 5.1091 100.9169
CP1 12.2207 8.4304 101.2658
C3 6.7068 4.9394 101.0683
# Get channel with best RMSE
best_channel <- forecast_accuracy$Channel[which.min(forecast_accuracy$RMSE)]
worst_channel <- forecast_accuracy$Channel[which.max(forecast_accuracy$RMSE)]

cat("## Summary of Analysis\n\n")
## ## Summary of Analysis
cat("Analyzed", length(sample_channels), "out of", ncol(eeg_data), "total channels\n\n")
## Analyzed 10 out of 33 total channels
cat("## Forecast Performance\n\n")
## ## Forecast Performance
cat("Best performing channel:", best_channel, "with RMSE =", min(forecast_accuracy$RMSE, na.rm = TRUE), "\n")
## Best performing channel: Fp1 with RMSE = 4.292303
cat("Worst performing channel:", worst_channel, "with RMSE =", max(forecast_accuracy$RMSE, na.rm = TRUE), "\n\n")
## Worst performing channel: FC2 with RMSE = 48.83742
cat("## Files Generated\n\n")
## ## Files Generated
cat("1. Individual channel plots: [channel_name]_signal.png\n")
## 1. Individual channel plots: [channel_name]_signal.png
cat("2. Combined channels plot: all_channels.png\n")
## 2. Combined channels plot: all_channels.png
cat("3. Correlation matrix: correlation_matrix.csv and correlation_heatmap.png\n")
## 3. Correlation matrix: correlation_matrix.csv and correlation_heatmap.png
cat("4. Time series decomposition: [channel_name]_decomposition.png\n")
## 4. Time series decomposition: [channel_name]_decomposition.png
cat("5. Forecast plots: [channel_name]_forecast.png\n")
## 5. Forecast plots: [channel_name]_forecast.png
cat("6. Forecast accuracy: forecast_accuracy.csv\n")
## 6. Forecast accuracy: forecast_accuracy.csv