# Load packages
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(boot)
library(ggplot2)
# Read Data
ageDataTable <- read_csv("~/Desktop/psychology/PSYC3361/face_lab/group_data/ageDataTable.csv")
## New names:
## Rows: 68985 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): ID, Gender, Task dbl (9): ...1, Age, Cohort, logAge, logAge2, Age2, Age3,
## Age4, Score
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Subset the data for relevant columns: Age, Task, and Score
subset_data <- ageDataTable[, c("Age", "Task", "Score")]
# Calculate average accuracy for each participant age and task combination
avg_accuracy <- aggregate(Score ~ Age + Task, data = subset_data, FUN = mean)
# Calculate count of participants in each age group
age_counts <- table(subset_data$Age)
# Merge average accuracy data with age counts data
merged_data <- merge(avg_accuracy, data.frame(Age = names(age_counts), Participant_Count = age_counts), by = "Age")
# Rename the columns for clarity
colnames(merged_data) <- c("Age", "Task", "Accuracy", "Participant_Count")
#change merge data to numeric
merged_data$Participant_Count <- as.numeric(as.character(merged_data$Participant_Count))
# Separate the data for overall, memory task, and sorting task
overall_data <- subset(merged_data, Task == "Overall")
memory_data <- subset(merged_data, Task == "Memory Task")
sorting_data <- subset(merged_data, Task == "Sorting Task")
Quadratic Function and Bootstrapping
# Fit quadratic function to logarithm of age and test accuracy
model <- lm(log(Accuracy) ~ poly(log(Age), 2), data = merged_data)
# Extract coefficients from the fitted model
coefficients <- coef(model)
# Calculate estimated peak accuracy
peak_age <- -coefficients[2] / (2 * coefficients[3])
# Bootstrapping function
boot_func <- function(data, indices) {
# Subset the data using the indices
subset_data <- data[indices, ]
# Fit a second-order polynomial to the logarithm of Age and Accuracy
model <- lm(log(Accuracy) ~ poly(log(Age), 2), data = subset_data)
# Extract the coefficients from the fitted model
coefficients <- coef(model)
# Calculate the optimal X value (Age) where Y (Accuracy) is maximized
optimal_x <- -coefficients[2] / (2 * coefficients[3])
# Convert the optimal X value from log(Age) to Age
optimal_age <- exp(optimal_x)
# Return the optimal Age
return(optimal_age)
}
# Perform bootstrapping resampling procedure
bootstrap_results <- boot(merged_data, boot_func, R = 200)
# Calculate the mean optimal point value and convert it back to Age
mean_optimal_age <- mean(bootstrap_results$t)
# Print the mean optimal Age
mean_optimal_age
## [1] 0.952783