Loading Required Libraries
# Load necessary libraries
library(tidyverse)
library(forecast)
library(reshape2)
library(viridis)
library(gridExtra)
library(parallel)
library(doParallel)
# Set up parallel processing for faster execution
num_cores <- min(detectCores() - 1, 2) # Limit to 2 cores to avoid overloading
cl <- makeCluster(num_cores)
registerDoParallel(cl)
Creating a Folder for Results
dir.create("eeg_results", showWarnings = FALSE)
Loading Data
# Read EEG data
eeg_data <- read.csv("features_raw.csv", header = TRUE)
# Select a subset of channels for balanced performance
all_channels <- names(eeg_data)
# Select 10 channels or all if less than 10
sample_channels <- sample(all_channels, min(10, length(all_channels)))
# Display basic info about dataset
cat("Total number of channels:", ncol(eeg_data), "\n")
## Total number of channels: 33
cat("Number of time points:", nrow(eeg_data), "\n")
## Number of time points: 8064
cat("Selected channels for analysis:", length(sample_channels), "\n")
## Selected channels for analysis: 10
cat("Selected channels:", paste(sample_channels, collapse = ", "), "\n")
## Selected channels: Fp1, O1, F4, CP1, C3, Oz, FC2, Fp2, Pz, PO3
1. Creating Plots for Each Channel
# Function to create and save individual channel plots
plot_channel <- function(channel_name) {
# Extract channel data
channel_data <- eeg_data[[channel_name]]
time_points <- 1:length(channel_data)
# Calculate mean value
mean_value <- mean(channel_data, na.rm = TRUE)
# Create plot
p <- ggplot() +
geom_line(aes(x = time_points, y = channel_data), color = "blue") +
geom_hline(yintercept = mean_value, color = "red", linetype = "dashed") +
annotate("text", x = max(time_points) * 0.8, y = mean_value * 1.1,
label = paste("Mean:", round(mean_value, 2)), color = "red") +
theme_minimal() +
labs(title = paste("EEG Signal of Channel", channel_name),
x = "Time (points)",
y = "Amplitude (μV)")
# Save the plot
ggsave(paste0("eeg_results/", channel_name, "_signal.png"), p, width = 10, height = 6)
return(p)
}
# Create plots for each selected channel
channel_plots <- list()
for (channel in sample_channels) {
cat("Plotting channel:", channel, "\n")
p <- plot_channel(channel)
channel_plots[[channel]] <- p
print(p)
}
## Plotting channel: Fp1
## Plotting channel: O1

## Plotting channel: F4

## Plotting channel: CP1

## Plotting channel: C3

## Plotting channel: Oz

## Plotting channel: FC2

## Plotting channel: Fp2

## Plotting channel: Pz

## Plotting channel: PO3


Creating a Combined Plot with All Channels
# Convert data to long format for ggplot
eeg_long <- eeg_data[, sample_channels] %>%
mutate(time_point = row_number()) %>%
pivot_longer(cols = -time_point,
names_to = "channel",
values_to = "amplitude")
# Create combined plot
all_channels_plot <- ggplot(eeg_long, aes(x = time_point, y = amplitude, color = channel)) +
geom_line(alpha = 0.7) +
scale_color_viridis(discrete = TRUE) +
theme_minimal() +
theme(legend.position = "right") +
labs(title = "EEG Signals of All Selected Channels",
x = "Time (points)",
y = "Amplitude (μV)")
# Display and save plot
print(all_channels_plot)

ggsave("eeg_results/all_channels.png", all_channels_plot, width = 12, height = 8)
Creating Correlation Matrix
# Calculate correlation matrix
cor_matrix <- cor(eeg_data[, sample_channels], use = "pairwise.complete.obs")
write.csv(cor_matrix, "eeg_results/correlation_matrix.csv")
# Create heatmap visualization
cor_melted <- melt(cor_matrix)
corr_heatmap <- ggplot(cor_melted, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
axis.text.y = element_text(size = 8)) +
labs(title = "Correlation Heatmap Between EEG Channels",
x = "", y = "", fill = "Correlation")
print(corr_heatmap)

ggsave("eeg_results/correlation_heatmap.png", corr_heatmap, width = 12, height = 10)
4. Time Series Decomposition
# Minimum points needed for proper decomposition
min_points_needed <- 16
num_points <- nrow(eeg_data)
# Function to create decomposition plots
decompose_and_plot <- function(channel_name, frequency = 8) {
cat("Decomposing channel:", channel_name, "\n")
if (num_points < min_points_needed) {
# Classical decomposition for shorter series
ts_data <- ts(eeg_data[[channel_name]], frequency = frequency)
decomp <- decompose(ts_data, type = "additive")
# Extract components
observed <- as.numeric(decomp$x)
trend <- as.numeric(decomp$trend)
seasonal <- as.numeric(decomp$seasonal)
remainder <- as.numeric(decomp$random)
# Prepare data for plotting
decomp_df <- data.frame(
time = 1:length(observed),
observed = observed,
trend = trend,
seasonal = seasonal,
remainder = remainder,
trend_seasonal = trend + seasonal
)
# Remove NA values
decomp_df <- na.omit(decomp_df)
# Format for plotting
decomp_long <- pivot_longer(decomp_df,
cols = c(observed, trend, seasonal, remainder, trend_seasonal),
names_to = "component",
values_to = "value")
# Create component breakdown plot
p1 <- ggplot(filter(decomp_long, component %in% c("observed", "trend", "seasonal", "remainder")),
aes(x = time, y = value)) +
geom_line(aes(color = component)) +
facet_wrap(~ component, scales = "free_y", ncol = 1) +
theme_minimal() +
labs(title = paste("Decomposition of EEG Signal for Channel", channel_name),
x = "Time (points)", y = "Amplitude (μV)") +
theme(legend.position = "none")
# Create comparison plot
p2 <- ggplot(filter(decomp_long, component %in% c("observed", "trend_seasonal")),
aes(x = time, y = value, color = component)) +
geom_line() +
scale_color_manual(values = c("observed" = "blue", "trend_seasonal" = "red"),
labels = c("observed" = "Original Signal", "trend_seasonal" = "Trend + Seasonality")) +
theme_minimal() +
labs(title = paste("Signal Comparison for Channel", channel_name),
x = "Time (points)", y = "Amplitude (μV)")
# Combine plots
combined_plot <- grid.arrange(p1, p2, ncol = 1, heights = c(3, 1))
# Save plot
ggsave(paste0("eeg_results/", channel_name, "_decomposition.png"), combined_plot, width = 10, height = 8)
return(combined_plot)
} else {
# STL decomposition for longer series
ts_data <- ts(eeg_data[[channel_name]], frequency = frequency)
stl_result <- stl(ts_data, s.window = "periodic")
# Prepare data for plotting
decomp_df <- data.frame(
time = 1:length(ts_data),
observed = as.numeric(ts_data),
trend = as.numeric(stl_result$time.series[, "trend"]),
seasonal = as.numeric(stl_result$time.series[, "seasonal"]),
remainder = as.numeric(stl_result$time.series[, "remainder"]),
trend_seasonal = as.numeric(stl_result$time.series[, "trend"]) +
as.numeric(stl_result$time.series[, "seasonal"])
)
# Format for plotting
decomp_long <- pivot_longer(decomp_df,
cols = c(observed, trend, seasonal, remainder, trend_seasonal),
names_to = "component",
values_to = "value")
# Create component breakdown plot
p1 <- ggplot(filter(decomp_long, component %in% c("observed", "trend", "seasonal", "remainder")),
aes(x = time, y = value)) +
geom_line(aes(color = component)) +
facet_wrap(~ component, scales = "free_y", ncol = 1) +
theme_minimal() +
labs(title = paste("Decomposition of EEG Signal for Channel", channel_name),
x = "Time (points)", y = "Amplitude (μV)") +
theme(legend.position = "none")
# Create comparison plot
p2 <- ggplot(filter(decomp_long, component %in% c("observed", "trend_seasonal")),
aes(x = time, y = value, color = component)) +
geom_line() +
scale_color_manual(values = c("observed" = "blue", "trend_seasonal" = "red"),
labels = c("observed" = "Original Signal", "trend_seasonal" = "Trend + Seasonality")) +
theme_minimal() +
labs(title = paste("Signal Comparison for Channel", channel_name),
x = "Time (points)", y = "Amplitude (μV)")
# Combine plots
combined_plot <- grid.arrange(p1, p2, ncol = 1, heights = c(3, 1))
# Save plot
ggsave(paste0("eeg_results/", channel_name, "_decomposition.png"), combined_plot, width = 10, height = 8)
return(combined_plot)
}
}
# Process channels in parallel for better performance
decomposition_plots <- foreach(channel = sample_channels, .packages = c("tidyverse", "forecast", "gridExtra")) %dopar% {
result <- try(decompose_and_plot(channel))
if (!inherits(result, "try-error")) {
result
} else {
NULL
}
}
# Name the results list with channel names
names(decomposition_plots) <- sample_channels
# Display the plots
for (channel in sample_channels) {
if (!is.null(decomposition_plots[[channel]])) {
print(decomposition_plots[[channel]])
}
}
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
Time Series Forecast
# Function to forecast time series
forecast_channel <- function(channel_name, frequency = 8) {
cat("Forecasting channel:", channel_name, "\n")
# Get data
data <- eeg_data[[channel_name]]
n <- length(data)
# Calculate train-test split (80% train, 20% test)
train_size <- floor(0.8 * n)
test_size <- n - train_size
# Split data
train_data <- data[1:train_size]
test_data <- data[(train_size+1):n]
# Convert to time series
train_ts <- ts(train_data, frequency = frequency)
# Fit ARIMA model (with faster settings)
arima_model <- auto.arima(train_ts,
seasonal = TRUE,
stepwise = TRUE,
approximation = TRUE,
max.p = 3, # Limit model complexity
max.q = 3,
max.P = 1,
max.Q = 1)
# Generate forecast
arima_forecast <- forecast(arima_model, h = test_size)
# Create forecast plot
forecast_plot <- autoplot(arima_forecast) +
autolayer(ts(c(train_data, test_data), frequency = frequency),
series = "Actual", color = "black") +
theme_minimal() +
labs(title = paste("Forecast for Channel", channel_name),
x = "Time",
y = "Amplitude (μV)")
# Save plot
ggsave(paste0("eeg_results/", channel_name, "_forecast.png"), forecast_plot, width = 12, height = 8)
# Calculate accuracy
forecast_accuracy <- accuracy(arima_forecast$mean, test_data)
return(list(
plot = forecast_plot,
model = arima_model,
accuracy = forecast_accuracy
))
}
# Process a subset of channels to save time (just 5 for forecasting)
forecast_channels <- sample(sample_channels, min(5, length(sample_channels)))
# Forecast each channel
forecast_results <- list()
forecast_accuracy <- data.frame()
for (channel in forecast_channels) {
result <- try(forecast_channel(channel))
if (!inherits(result, "try-error") && !is.null(result)) {
forecast_results[[channel]] <- result
print(result$plot)
# Add accuracy to dataframe - check which metrics are available
# Some metrics might not be available depending on the data
accuracy_df <- as.data.frame(result$accuracy)
channel_accuracy <- data.frame(
Channel = channel,
RMSE = accuracy_df$RMSE[1],
MAE = accuracy_df$MAE[1]
)
# Add additional metrics if they exist
if ("MAPE" %in% colnames(accuracy_df)) {
channel_accuracy$MAPE <- accuracy_df$MAPE[1]
}
if ("MASE" %in% colnames(accuracy_df)) {
channel_accuracy$MASE <- accuracy_df$MASE[1]
}
forecast_accuracy <- rbind(forecast_accuracy, channel_accuracy)
}
}
## Forecasting channel: FC2
## Forecasting channel: Fp1

## Forecasting channel: O1

## Forecasting channel: CP1

## Forecasting channel: C3


# Save forecast accuracy metrics
write.csv(forecast_accuracy, "eeg_results/forecast_accuracy.csv", row.names = FALSE)
Summary of Analysis
# Display forecast accuracy table
knitr::kable(forecast_accuracy,
caption = "Forecast Accuracy Metrics by Channel",
digits = 4,
align = "lrrrr")
Forecast Accuracy Metrics by Channel
| FC2 |
48.8374 |
33.8903 |
103.5980 |
| Fp1 |
4.2923 |
3.3779 |
146.9306 |
| O1 |
6.6982 |
5.1091 |
100.9169 |
| CP1 |
12.2207 |
8.4304 |
101.2658 |
| C3 |
6.7068 |
4.9394 |
101.0683 |
# Get channel with best RMSE
best_channel <- forecast_accuracy$Channel[which.min(forecast_accuracy$RMSE)]
worst_channel <- forecast_accuracy$Channel[which.max(forecast_accuracy$RMSE)]
cat("## Summary of Analysis\n\n")
## ## Summary of Analysis
cat("Analyzed", length(sample_channels), "out of", ncol(eeg_data), "total channels\n\n")
## Analyzed 10 out of 33 total channels
cat("## Forecast Performance\n\n")
## ## Forecast Performance
cat("Best performing channel:", best_channel, "with RMSE =", min(forecast_accuracy$RMSE, na.rm = TRUE), "\n")
## Best performing channel: Fp1 with RMSE = 4.292303
cat("Worst performing channel:", worst_channel, "with RMSE =", max(forecast_accuracy$RMSE, na.rm = TRUE), "\n\n")
## Worst performing channel: FC2 with RMSE = 48.83742
cat("## Files Generated\n\n")
## ## Files Generated
cat("1. Individual channel plots: [channel_name]_signal.png\n")
## 1. Individual channel plots: [channel_name]_signal.png
cat("2. Combined channels plot: all_channels.png\n")
## 2. Combined channels plot: all_channels.png
cat("3. Correlation matrix: correlation_matrix.csv and correlation_heatmap.png\n")
## 3. Correlation matrix: correlation_matrix.csv and correlation_heatmap.png
cat("4. Time series decomposition: [channel_name]_decomposition.png\n")
## 4. Time series decomposition: [channel_name]_decomposition.png
cat("5. Forecast plots: [channel_name]_forecast.png\n")
## 5. Forecast plots: [channel_name]_forecast.png
cat("6. Forecast accuracy: forecast_accuracy.csv\n")
## 6. Forecast accuracy: forecast_accuracy.csv