# Step 1: Subset columns of interest from the (ageDataTable) and then transform 
#  the "Task" column into a categorical factor variable with predefined levels.  
fig8_subsetdata <- ageDataTable[, c("Age", "Task", "Score")]
fig8_subsetdata$Task <- factor(
  fig8_subsetdata$Task, levels = c("Overall", "Memory Task", "Sorting Task"))

# Step 2: Calculate average accuracy grouped by Age and Task by calculating the mean. 
#  I have also removed any missing values (na.rm = TRUE) and NA values (nAcc=n()). 
fig8_avg_accuracy <- ageDataTable %>%
  group_by(Age, Task) %>%
  summarise(Average_Score = mean(Score, na.rm = TRUE), nAcc=n(), .groups = "drop")

# Step 3: Convert "Task" to ordered factor
fig8_avg_accuracy$Task = factor(
  fig8_avg_accuracy$Task, levels=c("Overall","Memory Task","Sorting Task"))

# Step 4: Calculate counts (n()) of participants by age. 
fig8_age_counts <- ageDataTable %>%
  group_by(Age) %>%
  summarize(N = n())

# Step 5: Create another subset with the variables of interest from the 
#  fig8_avg_accuracy data frame and assign each frame to correspond to a specific 
#  tasks ("Overall," "Memory Task," and "Sorting Task").
fig8_Overall_data <- subset(fig8_avg_accuracy, Task == "Overall")
fig8_memory_data <- subset(fig8_avg_accuracy, Task == "Memory Task")
fig8_sorting_data <- subset(fig8_avg_accuracy, Task == "Sorting Task")

# Step 6: Each subset is merged with the fig8_age_counts data frame, adding a 
#  count of participants by age to each subset. These data frames are then 
#  combined using the rbind function.  
fig8_Overall_data <- merge(fig8_Overall_data, fig8_age_counts)
fig8_memory_data <- merge(fig8_memory_data, fig8_age_counts)
fig8_sorting_data <- merge(fig8_sorting_data, fig8_age_counts)

fig8_all_data <- rbind(fig8_Overall_data, fig8_memory_data, fig8_sorting_data)

# Step 7: Specify the desired order of facets/ panels which will be the order 
#  of the plots. 
fig8_facet_order <- c("Overall", "Memory Task", "Sorting Task")

# Step 7: Convert "Task" column to an ordered factor in all_data
fig8_all_data$Task <- factor(
  fig8_all_data$Task, levels = c("Overall", "Memory Task", "Sorting Task"))

# Step 8: Create the Figure 8 scatterplot using the geom_point function in the 
#  ggplot2 package. Shape= 21 indicates that we want circle shapes as the data points 
#  and black and stroke indicate the colour thickness of the outline of the circles.
fig8_plot <- ggplot(fig8_avg_accuracy, aes(
  x = Age, 
  y = Average_Score, 
  size = nAcc, colour = nAcc, fill = nAcc)) +
  geom_point(shape = 21, color = "black", stroke = 0.5) +

  # Step 8.1: Use the facet_grid function to include the facets which were ordered 
  #  in Step 6. cols=vars(Task) specifies that I want the Task variable to be the columns. 
  facet_grid(cols=vars(Task)) +
  labs(
    x = "Participant Age",
    y = "Percent Correct",
    size = "N",
    colour = "N",
    fill = "N",
    title = 
      "Fig 8. Average accuracy for each participant age on the UNSW Face Test",
    caption = 
      "Note: Size and shade of each data point show the number of participants in that age group"
  ) +
  #  Step 8.2: Using the theme() function in the ggplot2 package we can specify 
  #   the axis labels , background colour and legend key. 
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    axis.line = element_line(color = "black"),
    strip.background = element_blank(),
    strip.text = element_text(color = "black", size = 14, face = "bold"),
    legend.position = "right",
    legend.key=element_rect(fill='white'),
    legend.title = element_text(face = "bold", hjust = 0.5),
    plot.title = element_text(hjust = 0.5),
    plot.margin = margin(b = 50),
    axis.text = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    plot.caption = element_text(hjust = 0)  
  ) +
  
# Step 9: This section of the code specify is the x and y axis and also most 
#  importantly Adjusts the fill color scale using a logarithmic transformation 
#  (scale_fill_continuous(trans="log10")) and rescales the size of plotted items 
#  using a logarithmic transformation within a specified range (scale_size_continuous
#  (trans="log10", range=c(1,3))). The range dictates the size of the dots in the graph. 
  scale_fill_continuous(trans="log10")+
  scale_size_continuous(trans="log10",range=c(1,3))+
  scale_x_continuous(limits = c(10, 80), breaks = seq(10,80,by=20)) +
  scale_y_continuous(breaks = seq(56, 70,by=2))+
  guides(fill=guide_legend(),size=guide_legend())


fig8_plot