library(tidyverse)
library(gt)

# Step 1: Create the "Age_Group" variable using the mutate() function from the 
#  dplyr package. The age groups are determined based on the "Age" column values 
#  using the factor() and case_when() functions. We had to name age groups, 
#  Middle Age (50-59)", Older Middle Age (60-69), Seniors (70-76) which was not 
#  in the original table but without the names we were unable to use just the 
#  numbers in brackets as a column value. 
ageDataTable <- ageDataTable %>%
  mutate(Age_Group = factor(case_when(
    Age >= 10 & Age <= 17 ~ "Adolescence (10-17)",
    Age >= 18 & Age <= 35 ~ "Early Adult (18-35)",
    Age >= 36 & Age <= 49 ~ "Early Middle Age (36-49)",
    Age >= 50 & Age <= 59 ~ "Middle Age (50-59)",
    Age >= 60 & Age <= 69 ~ "Older Middle Age (60-69)",
    Age >= 70 & Age <= 76 ~ "Seniors (70-76)"
  ), levels = c("Adolescence (10-17)", "Early Adult (18-35)", "Early Middle Age (36-49)", 
               "Middle Age (50-59)", "Older Middle Age (60-69)", "Seniors (70-76)"))) %>%
  filter(Age >= 10 & Age <= 76) %>%
 mutate(Task = recode(Task,
                       "Overall" = "Overall",
                       "Memory Task" = "Memory",
                       "Sorting Task" = "Sorting")) %>%
  mutate(Task = factor(Task, levels = c("Overall", "Memory", "Sorting"), ordered = TRUE))

# Step 2: Calculate the mean, standard deviation, and count of participants for 
#  each "Age_Group" and "Task" using the group_by() and summarise() functions 
#  from dplyr. The results are stored in the "age_group_summary" data frame. 
#  We removed any NA values using the argument (na.rm = TRUE). 
age_group_summary <- ageDataTable %>%
  group_by(Age_Group, Task) %>%
  summarise(Mean = round(mean(Score, na.rm = TRUE), 1),
           SD = round(sd(Score, na.rm = TRUE), 1),
           N = n())

# Step 3: Pivot the "age_group_summary" data frame wider using the pivot_wider() 
#  function from tidyr, so that age groups become column variables and mean, 
#  standard deviation, and count values are placed accordingly.
tab4_age_wider <- age_group_summary %>%
  pivot_wider(names_from = Age_Group,
              values_from = c(Mean, SD, N),
              names_sep = "_")

# Step 4: Create the gt() table. Because the`variable names`must be unique we 
#  have to rename manually the column labels to get Mean, SD and N using gt()
tab4_age_wider %>%
  gt() %>%
  cols_label(
    `Mean_Adolescence (10-17)` = 'Mean',
        `SD_Adolescence (10-17)` = 'SD',
        `N_Adolescence (10-17)` = 'N',
        `Mean_Early Adult (18-35)` = 'Mean',
        `SD_Early Adult (18-35)` = 'SD',
        `N_Early Adult (18-35)` = 'N', 
        `Mean_Early Middle Age (36-49)` = 'Mean',
        `SD_Early Middle Age (36-49)` = 'SD',
        `N_Early Middle Age (36-49)` = 'N', 
        `Mean_Middle Age (50-59)` = 'Mean',
        `SD_Middle Age (50-59)` = 'SD',
        `N_Middle Age (50-59)` = 'N', 
        `Mean_Older Middle Age (60-69)` = 'Mean',
        `SD_Older Middle Age (60-69)` = 'SD',
        `N_Older Middle Age (60-69)` = 'N', 
        `Mean_Seniors (70-76)` = 'Mean',
        `SD_Seniors (70-76)` = 'SD',
        `N_Seniors (70-76)` = 'N', 
       ) %>%   
  
# Step 4.1: Customise the table's appearance using the tab_header() and tab_spanner()
# functions from the gt package to set the title, subtitles, and spanners. Using 
# spanners allows you to use multi-row headers that group related columns together
  tab_header(
    title = md(
    'Table 4. Mean accuracy (%) and standard deviation by participant age from online samples'),
    subtitle = md('Age group (years)')
  ) %>% 
  tab_spanner(
    label = md('**Adolescence (10-17)**'),
    columns = c(2,8,14)
  ) %>% 
  tab_spanner(
    label = md('**Early Adult <br> (18-35)**'), 
    columns = c(3,9,15)
   ) %>% 
  tab_spanner(
    label = md('**Early Middle Age (36-49)**'),
    columns = c(4,10,16)
  ) %>% 
  tab_spanner(
    label = md('**Middle Age <br>(50-59)**'),
    columns = c(5,11,17)
  ) %>% 
  tab_spanner(
    label = md('**Older Middle Age (60-69)**'),
    columns = c(6,12,18)
  ) %>% 
  tab_spanner(
    label = md('**Seniors <br> (70-76)**'),
    columns = c(7,13,19)
  )
Table 4. Mean accuracy (%) and standard deviation by participant age from online samples
Age group (years)
Task Adolescence (10-17) Early Adult
(18-35)
Early Middle Age (36-49) Middle Age
(50-59)
Older Middle Age (60-69) Seniors
(70-76)
Mean SD N Mean SD N Mean SD N Mean SD N Mean SD N Mean SD N
Overall 59.1 5.8 1470 62.8 6.5 10163 62.5 6.4 6905 61.1 5.9 3149 59.8 5.8 1139 58.8 5.1 169
Memory 62.2 8.4 1470 65.6 8.7 10163 65.3 8.7 6905 62.6 8.3 3149 60.5 8.0 1139 59.5 7.3 169
Sorting 57.5 6.8 1470 61.4 7.6 10163 61.1 7.7 6905 60.4 7.3 3149 59.5 7.2 1139 58.4 6.5 169