# Step 1: Subset columns of interest from the (ageDataTable) and then transform
# the "Task" column into a categorical factor variable with predefined levels.
fig8_subsetdata <- ageDataTable[, c("Age", "Task", "Score")]
fig8_subsetdata$Task <- factor(
fig8_subsetdata$Task, levels = c("Overall", "Memory Task", "Sorting Task"))
# Step 2: Calculate average accuracy grouped by Age and Task by calculating the mean.
# I have also removed any missing values (na.rm = TRUE) and NA values (nAcc=n()).
fig8_avg_accuracy <- ageDataTable %>%
group_by(Age, Task) %>%
summarise(Average_Score = mean(Score, na.rm = TRUE), nAcc=n(), .groups = "drop")
# Step 3: Convert "Task" to ordered factor
fig8_avg_accuracy$Task = factor(
fig8_avg_accuracy$Task, levels=c("Overall","Memory Task","Sorting Task"))
# Step 4: Calculate counts (n()) of participants by age.
fig8_age_counts <- ageDataTable %>%
group_by(Age) %>%
summarize(N = n())
# Step 5: Create another subset with the variables of interest from the
# fig8_avg_accuracy data frame and assign each frame to correspond to a specific
# tasks ("Overall," "Memory Task," and "Sorting Task").
fig8_Overall_data <- subset(fig8_avg_accuracy, Task == "Overall")
fig8_memory_data <- subset(fig8_avg_accuracy, Task == "Memory Task")
fig8_sorting_data <- subset(fig8_avg_accuracy, Task == "Sorting Task")
# Step 6: Each subset is merged with the fig8_age_counts data frame, adding a
# count of participants by age to each subset. These data frames are then
# combined using the rbind function.
fig8_Overall_data <- merge(fig8_Overall_data, fig8_age_counts)
fig8_memory_data <- merge(fig8_memory_data, fig8_age_counts)
fig8_sorting_data <- merge(fig8_sorting_data, fig8_age_counts)
fig8_all_data <- rbind(fig8_Overall_data, fig8_memory_data, fig8_sorting_data)
# Step 7: Specify the desired order of facets/ panels which will be the order
# of the plots.
fig8_facet_order <- c("Overall", "Memory Task", "Sorting Task")
# Step 7: Convert "Task" column to an ordered factor in all_data
fig8_all_data$Task <- factor(
fig8_all_data$Task, levels = c("Overall", "Memory Task", "Sorting Task"))
# Step 8: Create the Figure 8 scatterplot using the geom_point function in the
# ggplot2 package. Shape= 21 indicates that we want circle shapes as the data points
# and black and stroke indicate the colour thickness of the outline of the circles.
fig8_plot <- ggplot(fig8_avg_accuracy, aes(
x = Age,
y = Average_Score,
size = nAcc, colour = nAcc, fill = nAcc)) +
geom_point(shape = 21, color = "black", stroke = 0.5) +
# Step 8.1: Use the facet_grid function to include the facets which were ordered
# in Step 6. cols=vars(Task) specifies that I want the Task variable to be the columns.
facet_grid(cols=vars(Task)) +
labs(
x = "Participant Age",
y = "Percent Correct",
size = "N",
colour = "N",
fill = "N",
title =
"Fig 8. Average accuracy for each participant age on the UNSW Face Test",
caption =
"Note: Size and shade of each data point show the number of participants in that age group"
) +
# Step 8.2: Using the theme() function in the ggplot2 package we can specify
# the axis labels , background colour and legend key.
theme(
panel.background = element_blank(),
panel.grid = element_blank(),
axis.line = element_line(color = "black"),
strip.background = element_blank(),
strip.text = element_text(color = "black", size = 14, face = "bold"),
legend.position = "right",
legend.key=element_rect(fill='white'),
legend.title = element_text(face = "bold", hjust = 0.5),
plot.title = element_text(hjust = 0.5),
plot.margin = margin(b = 50),
axis.text = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
plot.caption = element_text(hjust = 0)
) +
# Step 9: This section of the code specify is the x and y axis and also most
# importantly Adjusts the fill color scale using a logarithmic transformation
# (scale_fill_continuous(trans="log10")) and rescales the size of plotted items
# using a logarithmic transformation within a specified range (scale_size_continuous
# (trans="log10", range=c(1,3))). The range dictates the size of the dots in the graph.
scale_fill_continuous(trans="log10")+
scale_size_continuous(trans="log10",range=c(1,3))+
scale_x_continuous(limits = c(10, 80), breaks = seq(10,80,by=20)) +
scale_y_continuous(breaks = seq(56, 70,by=2))+
guides(fill=guide_legend(),size=guide_legend())
fig8_plot
