# Load packages
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(boot)
library(ggplot2)

# Read Data
ageDataTable <- read_csv("~/Desktop/psychology/PSYC3361/face_lab/group_data/ageDataTable.csv")
## New names:
## Rows: 68985 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): ID, Gender, Task dbl (9): ...1, Age, Cohort, logAge, logAge2, Age2, Age3,
## Age4, Score
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Subset the data for relevant columns: Age, Task, and Score
subset_data <- ageDataTable[, c("Age", "Task", "Score")]

# Calculate average accuracy for each participant age and task combination
avg_accuracy <- aggregate(Score ~ Age + Task, data = subset_data, FUN = mean)

# Calculate count of participants in each age group
age_counts <- table(subset_data$Age)

# Merge average accuracy data with age counts data
merged_data <- merge(avg_accuracy, data.frame(Age = names(age_counts), Participant_Count = age_counts), by = "Age")

# Rename the columns for clarity
colnames(merged_data) <- c("Age", "Task", "Accuracy", "Participant_Count")

#change merge data to numeric 
merged_data$Participant_Count <- as.numeric(as.character(merged_data$Participant_Count))

# Separate the data for overall, memory task, and sorting task
overall_data <- subset(merged_data, Task == "Overall")
memory_data <- subset(merged_data, Task == "Memory Task")
sorting_data <- subset(merged_data, Task == "Sorting Task")

Quadratic Function and Bootstrapping

# Fit quadratic function to logarithm of age and test accuracy
model <- lm(log(Accuracy) ~ poly(log(Age), 2), data = merged_data)

# Extract coefficients from the fitted model
coefficients <- coef(model)

# Calculate estimated peak accuracy
peak_age <- -coefficients[2] / (2 * coefficients[3])


# Bootstrapping function
boot_func <- function(data, indices) {
  # Subset the data using the indices
  subset_data <- data[indices, ]
  
  # Fit a second-order polynomial to the logarithm of Age and Accuracy
  model <- lm(log(Accuracy) ~ poly(log(Age), 2), data = subset_data)
  
  # Extract the coefficients from the fitted model
  coefficients <- coef(model)
  
  # Calculate the optimal X value (Age) where Y (Accuracy) is maximized
  optimal_x <- -coefficients[2] / (2 * coefficients[3])
  
  # Convert the optimal X value from log(Age) to Age
  optimal_age <- exp(optimal_x)
  
  # Return the optimal Age
  return(optimal_age)
}

# Perform bootstrapping resampling procedure
bootstrap_results <- boot(merged_data, boot_func, R = 200)

# Calculate the mean optimal point value and convert it back to Age
mean_optimal_age <- mean(bootstrap_results$t)

# Print the mean optimal Age
mean_optimal_age
## [1] 0.952783

Plot the figure using ggplot2

# Overall Task
Overall <- ggplot(overall_data, aes(x = Age, y = Accuracy, size = Participant_Count, color = Participant_Count)) +
  geom_point() +
  scale_size(range = c(1, 10)) +
  scale_color_gradient(low = "blue", high = "red") +
  labs(title = "Average Accuracy for Overall Task",
       x = "Age",
       y = "Accuracy",
       size = "Participant Count",
       color = "Participant Count")

# Memory Task
Memory <- ggplot(memory_data, aes(x = Age, y = Accuracy, size = Participant_Count, color = Participant_Count)) +
  geom_point() +
  scale_size(range = c(1, 10)) +
  scale_color_gradient(low = "blue", high = "red") +
  labs(title = "Average Accuracy for Memory Task",
       x = "Age",
       y = "Accuracy",
       size = "Participant Count",
       color = "Participant Count")

# Sorting Task
Sort <- ggplot(sorting_data, aes(x = Age, y = Accuracy, size = Participant_Count, color = Participant_Count)) +
  geom_point() +
  scale_size(range = c(1, 10)) +
  scale_color_gradient(low = "blue", high = "red") +
  labs(title = "Average Accuracy for Sorting Task",
       x = "Age",
       y = "Accuracy",
       size = "Participant Count",
       color = "Participant Count")

print(Overall)

print(Memory)

print(Sort)