Clean up data

# Subset the data for relevant columns: Age, Task, and Score
subsetdata <- ageDataTable[, c("Age", "Task", "Score")]

# Assign "Task" as a factor with 3 levels: "Overall", "Memory Task", "Sorting Task"
subsetdata$Task <- factor(subsetdata$Task, levels = c("Overall", "Memory Task", "Sorting Task"))


# Calculate average accuracy score on the UNSW FT for each participant age 
avg_accuracy <- ageDataTable %>%
  group_by(Age, Task) %>%
  summarize(Average_Score = mean(Score, na.rm = TRUE), .groups = "drop")

# Calculate count of participants in each age group
age_counts <- ageDataTable %>%
  group_by(Age) %>%
  summarize(Participant_Count = n())

# Separate the data for overall, memory task, and sorting task
overall_data <- subset(avg_accuracy, Task == "Overall")
memory_data <- subset(avg_accuracy, Task == "Memory Task")
sorting_data <- subset(avg_accuracy, Task == "Sorting Task")

# Add age_counts as a column to overall_data, memory_data, and sorting_data
overall_data <- merge(overall_data, age_counts)
memory_data <- merge(memory_data, age_counts)
sorting_data <- merge(sorting_data, age_counts)

all_data <- rbind(overall_data, memory_data, sorting_data)

# Specify the desired order of the facets
facet_order <- c("Overall", "Memory Task", "Sorting Task")

# Convert "Task" column to an ordered factor
all_data$Task <- factor(all_data$Task, levels = c("Overall", "Memory Task", "Sorting Task"))

Plot

blue_palette <- colorRampPalette(c("#6BAED6", "#2171B5"))

fig_8 <- ggplot(all_data, aes(x = Age, y = Average_Score, size = Participant_Count, colour = Participant_Count, fill = as.factor(Participant_Count))) +
  geom_point(shape = 21, color = "black", stroke = 0.5) +
  scale_size(range = c(2, 4), breaks = c(30, 100, 300)) +
  scale_fill_manual(values = blue_palette(length(unique(all_data$Participant_Count))), guide = FALSE) +
  scale_color_manual(values = blue_palette(length(unique(all_data$Participant_Count))), guide = FALSE) +
  facet_wrap(~Task) +
  labs(
       x = "Participant Age",
       y = "Percent Correct",
       size = "Participant Count",
       colour = NULL,
       fill = NULL) +
  theme(panel.background = element_blank(),
        panel.grid = element_blank(),
        axis.line = element_line(color = "black"),
        strip.background = element_blank(),  # Remove gray background from top variable labels
        strip.text = element_text(color = "black", size = 14),  # Set text color of top variable labels to black
        legend.position = "right",
        plot.title = element_text(hjust = 0.5))

fig_8
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.