###*** Read data- first administration of Stroop
  stroop_child <- readxl::read_xls("child_data\\child_stroop.xls")

# Convert vector to factor, pull out any levels wanted at end of the vector- 'other_option_vector', and alphabetize rest
  ## df %>% mutate(across(var1:varn, ~factor_level_sort(.x, other_option_vector))) # Apply as such below
  factor_level_sort <- function(var, other_option_vector){
    factor(var, levels = c(sort(unique(var)[! unique(var) %in% other_option_vector]), other_option_vector))
  }

## Function to recode main data- assign names to variable levels now coded as numbers, convert to factors, and sort
  
  stroop_df_clean <- function(df) {
    df %>%
      ## Variable naming convention is word1_word2
    rename(
      word_raw_score = WRScore,
      color_raw_score = CRScore,
      color_word_raw_score = CWRScore,
      interference_raw_score = IntRscore
    ) %>% 
    mutate(
      ## Age calculations- get as decimal, factor, and separate years
      birth_date = as.Date(paste(Byear, Bmonth, Bday, sep = "/"), origin = 1970-01-01),
      test_date = as.Date(paste(EVyear, EVmonth, EVday, sep = "/"), origin = 1970-01-01),
      EVyear = EVyear, # Needed later- would be dropped in above
      years = floor(age_calc(birth_date, enddate = test_date, units = "years")),
      months = floor(age_calc(birth_date, enddate = test_date, units = "months")%%12),
      decimal_age = years + months/12,
      years = factor(years), # as factor after decimal_age calculated
      gender = factor(dplyr::recode(Gender, '1' = "Male", '2' = "Female", .default = "Another Value"), levels = c("Male", "Female", "Another Value")),
      ethnicity = dplyr::recode(Ethnic, '1' = "Caucasian", '2' = "African-American", '3' = "Asian", '4' = "Hispanic", '5' = "Native-American", '6' = "Other", .default = "Another Value"),
      education_group = factor(dplyr::recode(SelEdGrp, '1' = "No HS Diploma", '2' = "HS Diploma", '3' = "Some College", '4' = "BA", '5' = "Post-grad", .default = "Another Value"), levels = c("No HS Diploma", "HS Diploma", "Some College", "BA", "Post-grad", "Another Value")),
      region = dplyr::recode(Region, '1' = "Northeast", '2' = "Midwest", '3' = "South", '4' = "West", .default = "Another Value"),
      community = dplyr::recode(Commun, '1' = "Rural", '2' = "Urban", .default = "Another Value"),
      primary_dx = dplyr::recode(PrimeDX, '0' = "No Dx", '2' = "LD", '4' = "Autism", '6' = "ID", '10' = "Dementia", '11' = "ADHD", '12' = "ESL", '14' = "TBI", .default = "Another Value"),
      drugs = dplyr::recode(`Drugs?`, '1' = "No", '2' = "Yes", .default = "Another Value"),
      color_blind = dplyr::recode(ColorBl, '1' = "Yes", '2' = "No", .default = "Another Value"),
      case_id = paste(ID, `Case #`, sep="_"),
        # Adds 'Other' levels at end of sort
      across(c(region, community, drugs, color_blind), ~factor_level_sort(.x, "Another Value")),
      across(ethnicity, ~factor_level_sort(.x, c("Other", "Another Value"))),
      across(primary_dx, ~factor_level_sort(.x, c("No Dx", "Another Value"))),
      .keep = "unused" # This removes variables used in calculations
    )
  }
  child <- stroop_df_clean(stroop_child)
####*** Check that all group sizes add up to total ###***

  count_col <- function(df, col){
    df %>% 
      group_by(!! sym(col)) %>% # variable to check
        summarize(n = n()) %>%  # number of groups
      mutate(total = sum(n)) # Total of all groups
  }
  
  sel_names <- child %>% dplyr::select(c(gender:color_blind, Cyear)) %>% names
  lapply(sel_names, function(col) count_col(child, col))

###*** Check for duplicates ***###
  
  ## Looks for anyone with same raw scores, birthdate-- unlikely that would randomly be same
  child_long <- child %>% 
    mutate(raw_scores = paste(word_raw_score, color_raw_score, color_word_raw_score, interference_raw_score, sep = "_"),
           birth_date = as.character(birth_date),
           row_number = row.names(.)) %>% 
    # Select any vars you wish to check for duplicates, include row_number, calculated above
    dplyr::select(case_id, birth_date, raw_scores, row_number) %>% 
    pivot_longer(., -row_number, names_to = "variable") %>% # We can then check if any same vars selected above
    group_by(variable) %>% 
    nest()
  ## Apply the date over ids, birth_date, raw_scores, desired vars to check for duplicates in this case
lapply(child_long$data, function(data) {
  bind_cols( # This will make a row with the first and subsequent occurrence of duplicates
    data %>% 
      filter(duplicated(value, fromLast = TRUE)) %>% 
      setNames(paste0(colnames(.), "_first_occurrence")),
    data %>% 
      filter(duplicated(value)) %>% 
      setNames(paste0(colnames(.), "_subsequent_occurrence"))
  )
})

###*** Check for invalid calculations***### 

  # interference_raw_score is only calculated var in this df
  miscalculation <- child %>% 
    mutate(row_number = row.names(.),
           calculated_t = color_word_raw_score - color_raw_score) %>% 
    filter(interference_raw_score != calculated_t) %>% 
    dplyr::select(row_number, color_word_raw_score, color_raw_score, calculated_t, interference_raw_score)

#Confirmed that there was one error value- change to calculated value
  child[row.names(child) == miscalculation$row_number,]$interference_raw_score<- miscalculation$calculated_t

# The T-scores for interference are negative - this will create new interference_t that will fix
  child <- child %>% 
    mutate(interference_t_score = (50 - IntTscore) + 50)

###*** Check for extreme values ***###
  
  ## Get range of each var
  range_col <- function(df, col){
    col <- sym(col)
    df %>% 
        summarize(min = min(!! col),
                  max = max(!! col)
        )
  }
  
  sel_names <- child %>% dplyr::select(word_raw_score:IntTscore) %>% names # Vars to check extreme values
  lapply(setNames(sel_names, sel_names), function(col) range_col(child, col))
  
###*** Check for T-scores above 85 ***###
  
  high_t_col <- function(df, col){
    col <- sym(col)
    df %>% 
      mutate(row_number = row.names(.)) %>% 
      dplyr::select(row_number, !! col) %>% 
      filter(!! col > 85)
  }
  
  sel_names <- child %>% dplyr::select(ends_with("tscore")) %>% names # All T-scores
  lapply(setNames(sel_names, sel_names), function(col) high_t_col(child, col))
  
  
###*** Look for values +/- 3SD from mean ###***
  
    extreme_col <- function(df, col){
    col <- sym(col)
    df %>% 
      mutate(row_number = row.names(.)) %>% 
      dplyr::select(row_number, !! col) %>% 
      mutate(standardized_score = (!! col - mean(!! col)) / sd(!! col)) %>% 
      filter(abs(standardized_score) >= 3)
  }
    
  sel_names <- child %>% dplyr::select(word_raw_score:IntTscore) %>% names
  lapply(setNames(sel_names, sel_names), function(col) extreme_col(child, col))
  
## Found one value ~>6 SD outside mean. Required visual inspection of source data-- confirmed as a violation, thus removed by hand
  child <- child %>% 
    filter(row.names(.) != 97)
# Read other datasets- other tests or retests
  retest <- readxl::read_xls("child_data\\retest_child_stroop.xls")
  wj <- readxl::read_xls("child_data\\wj3_child_stroop.xls")
  ctmt <- readxl::read_xls("child_data\\ctmt_child_stroop.xls")
  
# Recode convergent datasets- get id to match across sets and join with full set, with names "full-set_other-data"
  retest <- retest %>% 
    rename(CWRScore = CWRscore) %>% 
    stroop_df_clean(.) %>% 
    mutate(
      case_id = gsub("R", "", case_id) # Removes 'R' for retest, so can be joined
    )
  child_retest <- inner_join(child, retest, by = c("case_id"), suffix = c("_initial", "_comparison")) %>% 
    rowwise() %>% 
    mutate(
      retest_length = test_date_comparison - test_date_initial
    ) %>% 
    ungroup()
  
# Check if all cases in child_retest are present- no lost cases
  nrow(child_retest) == nrow(retest)
    
  wj <- wj %>%
    mutate(case_id = paste(ID, `Case #`, sep="_")) %>%
    rename(LW_raw_score = `WJ3-LW`, WA_raw_score = `WJ3-WA`, PC_raw_score = `WJ3- PC`)
  
  ctmt <- ctmt %>%
    mutate(case_id = paste(ID, `Case #`, sep="_")) %>%
    rename(total_raw_score = `T-Sc Sum`)

### Join all content validity df's
  child_content_df <- child %>% left_join(., wj, by = "case_id", suffix = c("_stroop", "_wj")) %>% left_join(., ctmt, by = "case_id", suffix = c("_stroop", "_ctmt"))
#### Enter following information based on scores to analyze
#Raw
  raw_scores <- child %>% dplyr::select(ends_with("raw_score") & ! starts_with("interference")) %>% names
  raw_scores_int <- child %>% dplyr::select(ends_with("raw_score")) %>% names
# Names of Scores
  raw_scores_names<- tools::toTitleCase(gsub("_", " ", raw_scores))
  raw_scores_names_int <- tools::toTitleCase(gsub("_", " ", raw_scores_int))
# Demographic factors
  demographic_factors <- c("years", "gender", "ethnicity", "region", "community", "primary_dx")
  demographic_factors_names <- c("Age", "Gender", "Ethnicity", "Region", "Community", "Diagnosis")
# Names of all scores in all different tests
  all_scores_list <- list(stroop = child %>% dplyr::select(ends_with("raw_score")) %>% names, wj = wj %>% dplyr::select(ends_with("raw_score")) %>% names, ctmt = ctmt %>% dplyr::select(ends_with("raw_score")) %>% names)
  all_scores_names_list <- lapply(1:length(all_scores_list), function(i) tools::toTitleCase(gsub("_", " ", all_scores_list[[i]])))
## Use to number tables and figures
table_counter_n <<- 0
table_counter_func <- function(table_title){
  table_counter_n <<- table_counter_n + 1
  paste0("*Table* ", table_counter_n, ": ", table_title)
}
figure_counter_n <<- 0
figure_counter_func <- function(figure_title){
  figure_counter_n <<- figure_counter_n + 1
  paste0("*Figure* ", figure_counter_n, ": ", figure_title)
}

getOutputFormat <- function() {
  output <- rmarkdown:::parse_yaml_front_matter(
    readLines(knitr::current_input())
  )$output
  if (is.list(output)){
    return(names(output)[1])
  } else {
    return(output[1])
  }
}

## Use to print tables in any format

table_output_func <- function(df, colnames, caption, row_names, ...){
  # with html- typically use table_output_func(df, colnames = c("col1", "col2"), caption = table_counter_func("caption")), df req

  if (getOutputFormat() == "html_document"){
    if (missing(colnames)){
      colnames <- names(df)
    }
    if (missing(caption)){
      caption <- NULL
    }
    kable(df, col.names = colnames, caption = caption, row.names = row_names, ...) %>%
      kable_options()
  } else {
    # with pdf or word use table_output_func(df, colnames = c("col1", "col2"), values = table_counter_func("caption")), df req
    names1 = names(df)
    names2 = if(missing(colnames)){
      names1
    } else {
      colnames
    }
    if (missing(caption)){
      caption <- NULL
    }
    ft <- flextable(df)
    ft <- set_header_df(x = ft, mapping = data.frame(keys = names1, values = names2, stringsAsFactors = FALSE),
                        key = "keys" ) %>%
      add_header_lines(top = TRUE, values = caption)
    theme_zebra(ft) # or theme_booktabs for plain tables
  }
}

###**** Add footnote in either kable or flextable
# For kableExtra note printed in APA format
      footnote_func <- function(df = ., footnote_caption = "", i = 1, j = 1) {
        if (isTRUE(getOption('knitr.in.progress'))) {
          if (opts_knit$get("rmarkdown.pandoc.to") == "html") { # If kable
            df %>% 
              kableExtra::footnote(general = footnote_caption, general_title = "Note. ",
                       footnote_as_chunk = T, title_format = c("italic"))
          } else {
            df %>%
              flextable::footnote(., i = i, j = j, value = as_paragraph(footnote_caption))
          }
        } else {
          df %>% 
            kableExtra::footnote(general = footnote_caption, general_title = "Note. ",
                     footnote_as_chunk = T, title_format = c("italic"))
        }
      }
      
###**** Add header to kable or flextable table
      # For kableExtra note printed in APA format
      header_func <- function(df = ., header_text, col_length) {
        if (isTRUE(getOption('knitr.in.progress'))) {
          if (opts_knit$get("rmarkdown.pandoc.to") == "html") { # If kable
            df %>% 
              kableExtra::add_header_above(., header = data.frame(names = header_text,
                                colspan = col_length)
                        )
          } else {
            df %>%
              flextable::add_header_row(., values = header_text, colwidths = col_length)
          }
        } else {
          df %>% 
              kableExtra::add_header_above(., header = data.frame(names = header_text,
                                colspan = col_length)
                        )
        }
      }

The standardization and analysis of psychometric properties of the Children’s Stroop Color and Word Test (“Stroop”) was undertaken with the goals of updating standardized score conversion tables and examining psychometric properties with current normative data using modern analytic techniques. To this end, data were collected from a sample of children taking the Stroop and comparative tests. These scores were examined, models to predict Stroop scores for individuals were constructed, scaled score tables were constructed from these models, and reliability and validity were examined.

Sample

Using a national standardization plan, the Stroop was administered and data were collected by trained field researchers over a period of time between 2013 to 2014 with 111 participants ages 4 to 14. The field researchers represent professionals who will typically be administering the Stroop (see Qualifications section of this Manual).

Data Cleaning

Data were examined for integrity of entry and for aberrations in values. The details of this process are presented in source code documentation for this manual.

Data were examined for aberrations in the following ways:

  1. Number of cases of each demographic variable equaled sample size
  2. Cases were examined for duplicate entry using combinations of fields unlikely to result in random duplication
  3. Raw score outliers were examined
  4. Differences between test and retest values (see Test-Retest section) were examined for extreme values

In all above procedures, if and when any cases were identified programmatically, those cases were consulted by visual inspection and consultation of raw data to determine if there was an incorrect value. There was one case with a value approximately six standard deviations from the mean when considered with normative and ipsative comparisons. It was assumed this value was erroneous. This case was removed from the dataset. Remaining cases and values were left intact.

Sample Description

The standardization plan was designed to approximately replicate the U.S. population along demographic characteristics typically accounted for in standardized test development. The following demographic criteria were considered, with variable name and corresponding levels:

# Function gets levels of all demographic variables, combines them as string, and returns vector
  demo_levels <- sapply(demographic_factors[-1], function(demo_var){ 
    child %>% dplyr::select(all_of(demo_var)) %>% pull() %>% levels %>% paste(., collapse = ", ")
  })
# Dataframe describing all demographic variables
  data.frame(
    description = c("Age", "Sex/Gender", "Race/Ethnicity", "Region of the U.S. Currently Residing", "Community Size", "Primary Mental Health Diagnosis"), # description of each variable
    demo_name = demographic_factors_names,
    levels = c("Continuous", demo_levels),
    row.names = NULL
    ) %>% table_print(., colnames = c("Description", "Variable", "Levels"), caption = table_counter_func("Demographic Criteria"))
Table 1: Demographic Criteria
Description Variable Levels
Age Age Continuous
Sex/Gender Gender Male, Female, Another Value
Race/Ethnicity Ethnicity African-American, Asian, Caucasian, Hispanic, Native-American, Other, Another Value
Region of the U.S. Currently Residing Region Northeast, South, West, Another Value
Community Size Community Rural, Urban, Another Value
Primary Mental Health Diagnosis Diagnosis ADHD, Dementia, ID, TBI, No Dx, Another Value

The following counts of participants were obtained for each variable:

# Function to count number of cases for each level for variables and frequency
  demographics_count_func <- function(df, col, col_names){
    col <- sym(col)
    df %>% 
      group_by(grouping_var = factor(!! col)) %>% # factor to convert age
        summarize(n = n()) %>%
      add_column(factor_name = tools::toTitleCase(gsub("_", " ", col_names)), .before = 1) %>% 
      mutate(
        freq = round(n/sum(n),4) * 100,
        factor_name = ifelse(duplicated(factor_name), "", factor_name) # Labels variable on first occurrence for style
      ) 
  }
    lapply(1:length(demographic_factors), function(i) demographics_count_func(child, demographic_factors[i], demographic_factors_names[i])) %>% 
    bind_rows() %>% 
    table_print(., colnames = c("Variable", "Levels", "n", "% of Sample"), caption = table_counter_func("Demographic Variables Count")) %>% 
      footnote_func(., footnote_caption = paste0("N = ", nrow(child)), j = 3)
Table 2: Demographic Variables Count
Variable Levels n % of Sample
Age 4 1 0.90
5 2 1.80
6 6 5.41
7 8 7.21
8 6 5.41
9 14 12.61
10 12 10.81
11 12 10.81
12 19 17.12
13 14 12.61
14 17 15.32
Gender Male 50 45.05
Female 61 54.95
Ethnicity African-American 2 1.80
Asian 12 10.81
Caucasian 77 69.37
Hispanic 13 11.71
Native-American 1 0.90
Other 6 5.41
Region Northeast 78 70.27
South 26 23.42
West 7 6.31
Community Rural 20 18.02
Urban 91 81.98
Diagnosis ADHD 1 0.90
Dementia 3 2.70
ID 7 6.31
TBI 5 4.50
No Dx 94 84.68
Another Value 1 0.90
Note. N = 111

There were some discrepancies between sample and population proportions, as described further in the Demographics section. Each of these variables is considered and controlled for or incorporated into the analysis of Stroop performance and in the models used to construct standardized scores, as described further.

Measures

Stroop Color and Word Test (Stroop)

The Children’s Stroop Color and Word Test (“Stroop”) is a brief measure of executive functions, described in more detail in this manual. The Stroop consists of three tasks that generate three raw scores. They are:

  1. Color Congruent (“Color”)
  2. Word Congruent (“Word”)
  3. Color-Word Incongruent (“Color-Word”)

A calculation using these scores (described in the Scoring section of this Manual) represents a fourth score:

  1. Interference (“Interference”)

Further interpretation and description of these scales is provided in the Interpretation section of this Manual.

Performance on these tasks is contextualized via standardized scores. Each task purports to represent a separate, though related, construct, so each score also represents a scale. The values are generally referred to as scores earlier in this chapter when providing descriptive statistics and constructing models for scaled scores. The content and relationship of these values as scales is discussed further below in this chapter.

Score Description

###*** Gives basic mean, sd, sem, range stats in table, grouped by each variable
  child %>% 
    summarize(
      n = n(),
      across(ends_with("raw_score"), # choose variables to extract, here they are same as raw_scores_names_int
             list( 
               mean = ~ paste0(round(mean(.x),2), "(", round(sd(.x),2), ")"),
               sem = ~ round(psych::describe(.x)$se, 2),
               min_max = ~ paste(min(.x), "-", max(.x))
             )
      )
    ) %>% 
    table_print(colnames = c("*N*", rep(c(paste("Mean", "(SD)"), "SEM", "Min-Max"), length(raw_scores_names_int))),  # Prints names of descriptive stats above each variable metric
                caption = table_counter_func("Overall Raw Score Descriptives")) %>%
  # Adds grouping header for each variable
    header_func(., header_text = c("",raw_scores_names_int),col_length = c(1, rep(3,length(raw_scores_names_int)))) %>% 
    {
      if (opts_knit$get("rmarkdown.pandoc.to") == "html"){
        # Border separating each variable
        column_spec(., seq(2, 3*length(raw_scores_names_int), 3),
                    border_left = "1px solid gray")
      } else{
        border_inner_v(., border = NULL, part = "header")
      }
    }
Table 3: Overall Raw Score Descriptives
Word Raw Score
Color Raw Score
Color Word Raw Score
Interference Raw Score
N Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max
111 83.64(20.98) 1.99 18 - 124 58.06(13.8) 1.31 25 - 92 35.59(10.77) 1.02 15 - 65 -22.47(7.91) 0.75 -42 - -3
###*** hitogram_plot function from ggplot_helper_functions file
# histogram plotter to choose binwidths
  histogram_plot <- function(binwidth_option = "fd", x) {
    if(binwidth_option == "sturge"){
        geom_histogram(binwidth = function(x) (trunc((max(x) - min(x)) / (1 + 3.322 * log(length(x)))))) # Sturge's calculation
    } else if (binwidth_option == "fd") {   
        geom_histogram(binwidth = function(x) 2 * IQR(x) / (length(x)^(1/3))) # Freedman-Diaconis calculation
    } else{
        geom_histogram()
    }
  }

###*** Create a histogram of each raw score, with min and max values labeled, along with SD error bars
  child %>% 
  dplyr::select(all_of(raw_scores_int)) %>% 
  pivot_longer(everything()) %>% 
  group_by(name) %>% 
  mutate(min = min(value),
         max = max(value),
        avg = round(mean(value), 2), 
        sd = sd(value), # Below are some other options I was examining-- not used here
        se = psych::describe(value)$se,
        iqr = IQR(value),
        binwidth = 2 * IQR(value) / length(value)^(1/3),
        bins = trunc((max - min)/ binwidth),
        bin_num = trunc(1 + 3.322 * log(length(value)))) %>% 
  ungroup() %>% 
  mutate(name = factor(name, levels = raw_scores_int)) %>% # Puts in desired facet order with this
  ggplot(., aes(x = value)) +
  histogram_plot(binwidth_option = "sturge") + # From ggplot_helper_functions.R, auto sets bin widths
        theme_bw() +
# adds min and max labels
  geom_text(aes(x = (min + 5), y = 15, label = paste0("Min \n", min))) +
  geom_text(aes(x = max, y = 15, label = paste0("Max \n", max))) +
  geom_errorbar(aes(xmin=avg-sd, xmax=avg+sd, y = 10), width=2,
                position=position_dodge(.9)) +
  geom_vline(aes(xintercept=avg)) + # adds mean
        facet_wrap(~name,scales = "free_x", labeller = labeller(name = setNames(raw_scores_names_int, raw_scores_int))) +
        ggtitle(figure_counter_func("Raw Score Distributions")) + 
        labs(y = "Number of Participants", x = "Raw Score", caption = "Min, Max, Mean, +/- 1SD Errorbar")

The data appeared to assume fairly normal distributions, based on visual inspection of histograms. D’Agostino Pearson Test of Normality statistics, assessing skewness, were calculated to evaluate normality. This procedure was deemed appropriate for the sample sizes:

###*** Agostino test of normality for each score
child %>% 
  dplyr::select(!!! syms(raw_scores_int)) %>% # Select all our raw score vars
  pivot_longer(everything()) %>% 
  group_by(name) %>% 
  nest() -> raw_scores_long # This technique of pivot long for desired variable and then lapplying over function is repeated
  lapply(raw_scores_long$data, function(data){
    data %>% 
      pull() %>% 
      agostino.test() -> out
    round(out$p.value, 3)
  }) %>%
    unlist() %>% 
    cbind.data.frame(Scale = raw_scores_names_int, `Agostino p-value` = .) %>% 
    table_print(caption = table_counter_func("Normality Check"), colnames = colnames(.))
Table 4: Normality Check
Scale Agostino p-value
Word Raw Score 0.001
Color Raw Score 0.434
Color Word Raw Score 0.110
Interference Raw Score 0.878

The above table shows that Word Raw Score appeared to have issues with normality. Upon inspection, it appears that a long left tail may be responsible for these violations. Word Raw Score is a calculation of words read, which is especially susceptible to age influences. Assumptions of normality will be examined further when constructing models and performing further analyses, accounting for age.

Demographics

Sample Description

The following tables and plots are of Stroop scores by demographic variables. Numbers of participants within each group, the mean, SD, and SEM were identified for each Stroop score.

For each demographic variable, a Multiple Analysis of Variance (MANOVA) was conducted to identify score differences among demographic groups for any Stroop scale. Word, Color, Color-Word were only considered in the MANOVA, as Interference scores are dependent on Color and Color-Word scores and violate MANOVA assumptions.

ANOVA was also conducted for each Stroop score within each demographic variable. The results are shown in column headers. Though ANOVA results are shown for each score, it is only proper to consider ANOVA outcomes for a specific scale when the MANOVA, which accounts for multiple comparisons, is significant.

###*** In this section we are constructing summary statistics for each outcome variable for each demographic factor. For each demographic factor, there will be an overall MANOVA for all outcome var, then an ANOVA for each outcome var. Then below are summary stats for each outcome var, broken down by each level for each demographic var

# MANOVA test function for scores

  manova_func <- function(dataset, group_var, score_vars){
    group_var <- sym(group_var)
    score_vars <- syms(score_vars[-length(score_vars)])
    response_matrix <- as.matrix(dataset %>% dplyr::select(!!! score_vars))
    predictor_variable <- as.matrix(dataset %>% dplyr::select(!!group_var))
    res.man <- manova(response_matrix ~ predictor_variable)
    res.man
  }

# Extract p-value from MANOVA for overall results
  manova_overall_p_value_func <- function(dataset, group_var, score_vars){
    res.man <- manova_func(dataset, group_var, score_vars)
    p_val <- summary(res.man, tol = 0)$stats[1,6]
    p_val
  }

# Extract p-value from MANOVA for individual score results
  manova_scale_p_value_func <- function(dataset, group_var, score_vars){
    res.man <- manova_func(dataset, group_var, score_vars)
    lapply(summary.aov(res.man, tol = 0), function(x) x$`Pr(>F)`[1])
  }
# *** Alternative approach to construct MANOVA here.  Not used
# scales <- c("CRScore", "WRScore", "CWRscore")
# responses <- paste(scales, collapse = ",")
# myformula <- as.formula( paste0( "cbind(", responses , ")~ unit" ) )
# manova( myformula, data = plots )

# P-value interpretation of MANOVA- overall
 manova_p_value_results_func <- function(dataset, group_var, score_vars){
    p_val <- round(manova_overall_p_value_func(dataset, group_var, score_vars),4)
        if(p_val < .05 & p_val >= .01){
      # Can choose to print full table or summary
       # summary.aov(res.man)
      paste("*, p =", p_val)
       # data.frame(unclass(summary(res.man)), check.names = FALSE, stringsAsFactors = FALSE)
    } else if (p_val < .01 & p_val >= .0001) {
        paste("**, p =", p_val)
    }  else if (p_val < .0001) {
        paste("***, p < 0.001")
    }  else {
      paste("NS, p =", p_val)
    }
  }

# P-value interpretation of MANOVA- scales
 manova_scales_p_value_results_func <- function(dataset, group_var, score_vars){
   lapply(manova_scale_p_value_func(dataset, group_var, score_vars), function(p_val){
     if(p_val < .05 & p_val >= .01){
       # Can shoose to print full table or summary
       # summary.aov(res.man)
       paste("*, p =", round(p_val,4))
       # data.frame(unclass(summary(res.man)), check.names = FALSE, stringsAsFactors = FALSE)
     } else if (p_val < .01 & p_val >= .0001) {
       paste("**, p =", round(p_val,4))
     }  else if (p_val < .0001) {
       paste("***, p < 0.001")
     }  else {
       paste("NS, p =", round(p_val, 4))
     }
   }
   )
 }
 
# Function to generate summary tables- can choose data, grouping, summary variables, and whether to rename
  generate_summary_tbl <- function(dataset, group_var, score_vars, useColname = T) {
    group_var   <- sym(group_var)
    score_vars   <- sym(score_vars)
    # Uncomment Below if not passing list of quosures for summary variables
    # score_vars <- enquo(score_vars)
    # overall_p_value_result <- manova_p_value_results_func(dataset, !!group_var)
    dataset %>% 
      group_by(!!group_var) %>% 
      summarize(
        N = n(),
        mean = round(mean(!!score_vars),2),
        SD  = round(sd(!!score_vars),2),
        SEM = round(psych::describe(!!score_vars)$se, 2)
        # Other metrics that need to be generated frequently
      ) %>% 
      # Get proportion and consolidate some columns
      mutate( 
             # variable = quo_name(group_var),
             # variable = ifelse(duplicated(variable), "", variable),
             levels = !!group_var,
             prop = paste0(round(N/sum(N),4)*100),
             n_prop = paste0(N," (", prop, ")"),
             mean_sd = paste0(mean, " (", SD, ")")
             
      ) %>%
      dplyr::select(levels, n_prop, mean_sd, SEM) %>%
      # dplyr::select(variable, levels, n_prop, mean_sd, SEM) %>%
      ungroup -> smryDta
    
    # if (useColname) 
    #   smryDta <- smryDta %>%  
    #   rename_at(
    #     vars(-one_of(quo_name(score_vars))), 
    #     ~paste(quo_name(score_vars), .x, sep="_")
    #   ) 
    # smryDta <- smryDta %>%
    #   rename_at(1, ~paste(quo_name(group_var), overall_p_value_result))
    
    
    return(smryDta)
  }

# Following function combines all manova results and summary stats-- group_vars are demographic factors, score_vars are scores
  score_df <- function(dataset, group_vars, score_vars){
    # This constructs the header that is the result of the overall MANOVA test, then ANOVA for each outcome var
    myHeader <- c(setNames(manova_p_value_results_func(dataset, group_vars, raw_scores_int), 2), setNames(unlist(manova_scales_p_value_results_func(dataset, group_vars, raw_scores_int)), 2), setNames("No Test", 2))
  myHeader <- data.frame(names = myHeader, colspan = 2)
  myHeader$colspan <- as.numeric(myHeader$colspan)

# This constructs the table for each level of each demographic variable, generating summary stats
    cbind.data.frame(
      generate_summary_tbl(dataset, group_vars, score_vars[1]), 
      lapply(score_vars[-1], function(score_vars) generate_summary_tbl(dataset, group_vars, score_vars)[,3:4])
        ) %>%
          kable(col.names = c("Levels", "*N* (%)", rep(c("Mean (SD)", "SEM"),length(raw_scores_int))),  caption = table_counter_func(tools::toTitleCase(gsub("_", " ", group_vars))), booktabs = T) %>%
          kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position="left", font_size = 10) %>%
          add_header_above(header = myHeader) %>%
          column_spec(c(3,5,7, 9), border_left = "1px solid gray") %>%
          kableExtra::footnote(general = "NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")
  }
####*** Function to plot bar plots for each outcome var (score_vars) grouped by levels of each demographic variable (group_vars)
  demographics_function_plot <- function(dataset, score_vars, group_vars){
    dataset %>%
      dplyr::select(!!! syms(score_vars), !! sym(group_vars)) %>% 
      pivot_longer(.,-any_of(group_vars)) %>% # Create long df with score as factor
      mutate(name = factor(name, levels = score_vars)) %>% # Sorts by desired order
      group_by(name, !! sym(group_vars)) %>% # Groups by each score, then by levels within each score
      mutate(
             mean = mean(value),
             sd = sd(value)
      ) %>% 
      ungroup() %>% 
      mutate(name = dplyr::recode_factor(name, !!! setNames(raw_scores_names_int, raw_scores_int))) %>% # Gives pretty names
      ggplot(aes(name, value, fill = !! sym(group_vars))) + # Bar plot with each level as different bar, by each score
      geom_bar(stat = "summary", fun = "mean", position =  position_dodge(width=0.9)) +
      geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), position = position_dodge(width=0.9)) + # +/- 1SD Errors
      theme_bw()+
      scale_fill_grey(start = 0, end = .9) + # Black and white printing job
      labs(x = "Scale", y = "Score", fill = tools::toTitleCase(gsub("_", " ", group_vars)), caption = "Mean, +/- 1SD") +
      facet_wrap(~name,scales = "free_x", labeller = labeller(name = label_wrap_gen(width = 10))) # Wraps long text
  }
###*** Following puts together plot and demographic table for each demographic var- Row for each, side-by-side
# Output each table and plot
  demo_table <- lapply(demographic_factors, function(group_vars) score_df(child, group_vars, raw_scores_int))
  demo_plot <- lapply(demographic_factors, function(group_vars) demographics_function_plot(child, raw_scores_int, group_vars))
# This is the grand header at the top
manova_column_header <- setNames(data.frame(matrix(ncol = 5, nrow = 0)),paste0("col_name_", c(1:5))) %>%
        kable(col.names = c("MANOVA Test Results", paste(raw_scores_names, "ANOVA"), "Interference"),  caption = table_counter_func("Test of Differences for following columns"), booktabs = T) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position="left", font_size = 12) %>%
        add_header_above(header = c(" " = 1, "Results" = 4))
# Go through each variable, output a row with two columns
  cat("<div class = 'row h-100'>",
        "<div class = 'col-md-6'>",
            manova_column_header,
        "</div>",
      "</div>",
      "<hr>")
Table 11: Test of Differences for following columns
Results
MANOVA Test Results Word Raw Score ANOVA Color Raw Score ANOVA Color Word Raw Score ANOVA Interference

for (i in 1:length(demographic_factors)){
  cat(
      "<div class = 'row h-100'>",
        "<div class = 'col-sm-6'>")
          print(demo_table[[i]])
      cat(
        "</div>",
        "<div class = 'col-sm-6' style = 'margin-top: 150px'>"
        )
          print(demo_plot[[i]])
      cat(
        "</div>",
      "</div>",
      "<hr>"
      )
}
Table 5: Years
***, p < 0.001
***, p < 0.001
***, p < 0.001
***, p < 0.001
No Test
Levels N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
4 1 (0.9) 40 (NA) NA 34 (NA) NA 20 (NA) NA -14 (NA) NA
5 2 (1.8) 42.5 (31.82) 22.50 43.5 (12.02) 8.50 24 (5.66) 4.00 -19.5 (17.68) 12.50
6 6 (5.41) 37 (15.39) 6.28 36.5 (12.47) 5.09 25.5 (12.99) 5.30 -11 (7.01) 2.86
7 8 (7.21) 64.38 (5.9) 2.09 46.88 (11.85) 4.19 31.75 (12.76) 4.51 -15.12 (4.88) 1.73
8 6 (5.41) 77 (10.53) 4.30 49.5 (5.82) 2.38 29.33 (5.47) 2.23 -20.17 (5.15) 2.10
9 14 (12.61) 76.79 (8.34) 2.23 50.36 (6.59) 1.76 29.14 (5.97) 1.60 -21.21 (7.59) 2.03
10 12 (10.81) 81.75 (10.19) 2.94 51.17 (6.7) 1.93 29.17 (5.37) 1.55 -22 (4.51) 1.30
11 12 (10.81) 88.42 (21.53) 6.21 61.58 (11.08) 3.20 35.5 (8.06) 2.33 -26.08 (7.98) 2.30
12 19 (17.12) 95.79 (12.83) 2.94 66.26 (8.5) 1.95 40.37 (7.2) 1.65 -25.89 (7.67) 1.76
13 14 (12.61) 94.86 (9.97) 2.66 66.79 (9.63) 2.57 41.21 (7.49) 2.00 -25.57 (5.21) 1.39
14 17 (15.32) 99.71 (12.59) 3.05 69.47 (12.8) 3.10 45.41 (12.19) 2.96 -24.06 (7.95) 1.93
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 6: Gender
NS, p = 0.5735
NS, p = 0.3657
NS, p = 0.3629
NS, p = 0.1811
No Test
Levels N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
Male 50 (45.05) 81.64 (22.48) 3.18 56.74 (13.98) 1.98 34.08 (10.19) 1.44 -22.66 (8.46) 1.20
Female 61 (54.95) 85.28 (19.71) 2.52 59.15 (13.67) 1.75 36.84 (11.16) 1.43 -22.31 (7.49) 0.96
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 7: Ethnicity
**, p = 0.0048
NS, p = 0.1713
NS, p = 0.8539
NS, p = 0.5836
No Test
Levels N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
African-American 2 (1.8) 44 (21.21) 15.00 59.5 (3.54) 2.50 48.5 (3.54) 2.50 -11 (7.07) 5.00
Asian 12 (10.81) 86.33 (12.52) 3.61 56.17 (10.64) 3.07 33.5 (9.36) 2.70 -22.67 (6.3) 1.82
Caucasian 77 (69.37) 84.48 (21.69) 2.47 58.13 (14.62) 1.67 35.31 (10.84) 1.24 -22.82 (8.18) 0.93
Hispanic 13 (11.71) 81.15 (18.14) 5.03 55.85 (13.58) 3.77 35.77 (12.67) 3.51 -20.08 (7.86) 2.18
Native-American 1 (0.9) 86 (NA) NA 65 (NA) NA 39 (NA) NA -26 (NA) NA
Other 6 (5.41) 85.67 (25.17) 10.28 64.17 (13.14) 5.36 38.17 (9.95) 4.06 -26 (5.18) 2.11
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 8: Region
NS, p = 0.1493
NS, p = 0.2826
NS, p = 0.5622
NS, p = 0.3827
No Test
Levels N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
Northeast 78 (70.27) 85.71 (21.31) 2.41 58.09 (13.98) 1.58 35.4 (10.98) 1.24 -22.69 (7.91) 0.90
South 26 (23.42) 78.85 (19.05) 3.74 59.35 (12.65) 2.48 37.38 (10.14) 1.99 -21.96 (8.45) 1.66
West 7 (6.31) 78.43 (23.3) 8.81 53 (16.74) 6.33 31.14 (10.64) 4.02 -21.86 (6.59) 2.49
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 9: Community
**, p = 0.0066
*, p = 0.024
NS, p = 0.6662
NS, p = 0.5218
No Test
Levels N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
Rural 20 (18.02) 74.1 (17.33) 3.88 56.85 (10.68) 2.39 37 (9.71) 2.17 -19.85 (8.22) 1.84
Urban 91 (81.98) 85.74 (21.21) 2.22 58.33 (14.43) 1.51 35.29 (11.02) 1.16 -23.04 (7.77) 0.81
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 10: Primary Dx
NS, p = 0.2713
NS, p = 0.8585
NS, p = 0.5182
NS, p = 0.1613
No Test
Levels N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
ADHD 1 (0.9) 86 (NA) NA 49 (NA) NA 35 (NA) NA -14 (NA) NA
Dementia 3 (2.7) 92.33 (17.21) 9.94 61.67 (15.31) 8.84 46.33 (20.13) 11.62 -15.33 (9.07) 5.24
ID 7 (6.31) 85.57 (12.03) 4.55 57.43 (15.73) 5.94 30.57 (11.3) 4.27 -26.86 (9.08) 3.43
TBI 5 (4.5) 72.8 (10.13) 4.53 49.6 (7.57) 3.39 28.8 (6.3) 2.82 -20.8 (4.15) 1.85
No Dx 94 (84.68) 83.82 (22.16) 2.29 58.72 (13.9) 1.43 36.12 (10.43) 1.08 -22.61 (7.86) 0.81
Another Value 1 (0.9) 79 (NA) NA 41 (NA) NA 24 (NA) NA -17 (NA) NA
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

The relationship between demographic factors (excluding Age, examined afterwards) and Stroop scales were explored through stepwise regression. Models with all demographic variables entered as predictors were constructed. At each step, the poorest fitting variable was removed and that model was compared to the fuller model until all remaining variables were significant predictors. There were modest relationships between predictors Community and Gender with Word Raw Score and Ethnicity with Color-Word and Interference.

# Following from helper_functions

###*** Stepwise function removes one variable at constructs regression model. Chooses model with lowest AIC or highest R-squared at each step, them compares that reduced model to full. If not significant, proceeds and repeats. Outputs final significant model and summarizes
# Constructs lm formula
  lm_formula_func <- function(outcome_var, pred_var, data){
    model_formula <- formula(paste0(outcome_var, "~ ", sub(" \\+ $.*", "", paste(sapply(pred_var, function(pred_var) paste(pred_var, "+ ")), collapse = ""))))
    lm(model_formula, data = data) -> mod1
  }
# Function that removes one var at each step
  lm_factor_stepwise_func <- function(outcome_var, pred_var, data, selection_method = "aic"){
    final_var <- pred_var # All the predictor variables
    mod_full <- lm_formula_func(outcome_var, pred_var, data) # Full regression model
    mod_full_sum <- summary(mod_full) # Get summary information
  if (length(final_var) > 1) { # Check to make sure we are removing a variable, if not construct null model
    # Generate models for pred_vars removing one at each iteration
    mod_one_removed<- lapply(1:length(pred_var), function(i) lm_formula_func(outcome_var, pred_var[-i], data))
  # Choose to select by AIC or r-squared; recommended to try both and then compare
    if(selection_method == "aic"){
      aic_vec <- sapply(1:length(pred_var), function(i) AIC(mod_one_removed[[i]])) # can use AIC here or adjusted r
      pred_var <- pred_var[-which(aic_vec == min(aic_vec, na.rm = T))] # Removes the variable with lowest AIC
    } else if(selection_method == "r_squared"){
      aic_vec <- sapply(1:length(pred_var), function(i) summary(mod_one_removed[[i]])$adj.r.squared) # Use adjusted r squared
      pred_var <- pred_var[-which(aic_vec == max(aic_vec, na.rm = T))] # Removes the variable with highest r-squared
    }
  
    mod_reduced <- lm_formula_func(outcome_var, pred_var, data = na.omit(data[ , all.vars(formula(mod_full))])) # reduced model with one predictor removed
    } else {
  mod_reduced <- lm_formula_func(outcome_var, "1", data = na.omit(data[ , all.vars(formula(mod_full))])) # null model if only one remaining predictors to remove
    }   
# Conduct the likelihood ratio test
  full_reduced_anova <- lrtest(mod_full, mod_reduced)
    if (full_reduced_anova$`Pr(>Chisq)`[-1] > .05){ # If no significant difference proceed (adapts parsimonious model)
      outcome_msg <- "proceed" 
    } else {
      outcome_msg <- "All significant predictors" # Don't proceed, significant difference if term dropped, so don't
    }
    while ((outcome_msg != "Final Model") & (outcome_msg != "All significnat predictors") & 
  (pred_var[1] != "1") & (length(final_var) > 1)) {
    # Repeats above
      final_var <- pred_var
      mod_full <- lm_formula_func(outcome_var, pred_var, data)
      mod_full_sum <- summary(mod_full)
      if (length(final_var) > 1) {
        mod_one_removed<- lapply(1:length(pred_var), function(i) lm_formula_func(outcome_var, pred_var[-i], data))
  # Choose to select by AIC or r-squared; recommended to try both and then compare
    if(selection_method == "aic"){
      aic_vec <- sapply(1:length(pred_var), function(i) AIC(mod_one_removed[[i]])) # can use AIC here or adjusted r
      pred_var <- pred_var[-which(aic_vec == min(aic_vec, na.rm = T))] # Removes the variable with lowest AIC
    } else if(selection_method == "r_squared"){
      aic_vec <- sapply(1:length(pred_var), function(i) summary(mod_one_removed[[i]])$adj.r.squared) # Use adjusted r squared
      pred_var <- pred_var[-which(aic_vec == max(aic_vec, na.rm = T))] # Removes the variable with highest r-squared
    }
    # Assigns null model if no predictors remaining
      pred_var <- if(identical(pred_var, character(0))) {
        "1"
      } else{
        pred_var
      }
      mod_reduced <- lm_formula_func(outcome_var, pred_var, data=na.omit(data[ , all.vars(formula(mod_full))]))
      } else {
        mod_reduced <- lm_formula_func(outcome_var, "1", data = na.omit(data[ , all.vars(formula(mod_full))]))
      }
      full_reduced_anova <- lrtest(mod_full, mod_reduced)
      if (isFALSE(getOption('knitr.in.progress'))) {
        print(full_reduced_anova)
      }
      if (full_reduced_anova$`Pr(>Chisq)`[-1] > .05){
        outcome_msg <- "proceed" 
      } else {
        outcome_msg <- "Final Model"
      }
    }
# This is text output explaining model
    paste0("Results of the linear regression indicated that there was", 
           ifelse(full_reduced_anova$`Pr(>Chisq)`[-1] > .05, " no significant effect ", " a significant effect "), 
           "between predictor variables ", paste(final_var, collapse = ", "), 
           " and the outcome variable ", outcome_var, " X2(", abs(full_reduced_anova$Df[-1]), "), ", 
           ifelse(full_reduced_anova$`Pr(>Chisq)`[-1] < .001, "p < .001", 
                  paste0("p = ", round(full_reduced_anova$`Pr(>Chisq)`[-1], 3)))) -> cap
# Gets all summary coefficients and probabilities  
    mod_full_sum$coefficients %>%
      as_tibble(rownames = "Coefficients") %>%
      mutate(across(where(is.double), round,3)) -> output_df

    output_df %>% 
      table_print(., colnames = colnames(.),
                  caption = table_counter_func(paste(outcome_var, cap)))
  }

# Apply stepwise function across scores with demographic factors as predictors

  lapply(raw_scores_int, function(outcome_var){
    print(lm_factor_stepwise_func(outcome_var, demographic_factors[1:6], child))
  }) -> suppress_print_msg
# Evidence of community, gender effects with word_raw_score and ethnicity with color_word_raw_score
# Test mixed models with those effects as random effects
  ## Word
    # Overfit model with all random effects with random intercepts and slopes
      # mod_word_slope_intercept <- lmer(word_raw_score ~ decimal_age + (1 + decimal_age|community) + (1 + decimal_age|gender), data = child)
    # After removing effects, found no significance for random slopes, and no significant random intercept for community
      mod_word_intercept <- lmer(word_raw_score ~ decimal_age + (1|community), data = child)
      mod_word_lrtest <- rand(mod_word_intercept) # No effect
  ## Color_word
    # mod_color_word_slope_intercept <- lmer(color_word_raw_score ~ decimal_age + (1 + decimal_age|ethnicity), data = child)
    mod_color_word_intercept <- lmer(color_word_raw_score ~ decimal_age + (1 |ethnicity), data = child)
    # After removing effects, found no significance for random slopes or intercept for ethnicity
      mod_color_word_lrtest <- rand(mod_color_word_intercept)
  ## Interference
    # No slope or intercept significance for ethnicity in interference
      mod_interference_intercept <- lmer(interference_raw_score ~ decimal_age + (1|ethnicity), data = child)
      mod_interference_lrtest <- rand(mod_interference_intercept)

The effect of these variables was examined further in models with Age as a continuous predictor. As Age is known to have a significant and strong relationship with Stroop performance, mixed effects models were constructed to examine demographic factors found significant in stepwise regression above. These demographic factors were entered as random effects, with both random slopes and intercepts, and with Age as a fixed effect. Random effects in Word (X²(1) = 3.57, p = 0.059), Color-Word (X²(1) = 2.11, p = 0.147), and Interference (X²(1) = 0, p = 1) were found to not be significant. Thus, it does not appear that these demographic variables have a significant relationship with Stroop scores in the presence of Age.

Age

The variable Age was grouped by years in the tables above. MANOVA showed significant differences among groups. Also, age is known from research with the Stroop and similar instruments to have a significant relationship with Stroop performance. Age for each participant was expressed as a decimal value using years and months, with days truncated (that is, months were rounded down in all cases). Stroop scores as outcome variables with respect to Age as a continuous predictor variable were examined. Regression models were constructed and plotted.

###*** Construct plots of Stroop scores across age, conduct gam regression with age as predictor on scores
# Long-form grouped by scores
  child %>% 
    dplyr::select(all_of(raw_scores_int), decimal_age) %>% 
    pivot_longer(., -decimal_age) %>% 
    group_by(name) -> long_age

  long_age_lm <- long_age %>% nest # nest for lm, not ggplot
  
# Uses Loess regression to get optimal span
    lapply(long_age_lm$data, function(data){
      data %$% 
        fANCOVA::loess.as(decimal_age, value, family = "gaussian") -> loess_out
        loess_out$pars$span # returns span
      })-> loess_span

# Conduct GAM for each score, conduct full and reduced likelihood test
  mapply(function(data, span_value){
  # GAM with loess regression  
    data %>%
      gam::gam(value ~ lo(decimal_age, span = span_value), data = .) -> mod_full
  
    na.omit(data[ , all.vars(formula(mod_full))[1:2]]) %>%
      gam::gam(value ~ 1, data = .) -> mod_null # Null model with no predictor
  
    lrtest(mod_full, mod_null) -> out # Compare GAM model and null and collect outputs
    
  # Put results in report format to apply as label to plot
    paste0("X2(", abs(out$Df[-1]), ") = ", round(out$Chisq[-1],2), ", ", p_value_func(out$`Pr(>Chisq)`[-1])) # P
  }, data = long_age_lm$data, span_value = loess_span) -> age_lm_results 

# Create dataframe of labels of lrtest output for each plot
  dat_text <- data.frame(
    label = age_lm_results %>% unlist,
    name = factor(raw_scores_names_int) # Preserves order
  )
  
# Plots of scores across age-- originally facet_wrap, but not arrange multiple individual plots because need to custom specify span for each
  lapply(1:length(long_age_lm$data), function(i){
    long_age_lm$data[[i]]  %>% 
      ggplot(data = ., aes(x= decimal_age, y =value)) + 
      geom_point(alpha = .2) +
      geom_smooth(se = F, span = loess_span[[i]]) + # Custom span from above
      geom_text(
        data    = dat_text[i,],
        mapping = aes(x = -Inf, y = -Inf, label = label),
        hjust   = -0.1, # Adjustments just done from visual inspection
        vjust   = -1) +
      labs(x = "Age", y = "Raw Score", title = raw_scores_names_int[i]) 
    })-> p

# Arrange plots in grid
  plot_list <- ggarrange(plotlist = p, nrow = 2, ncol = 2)
# Title plots
  annotate_figure(plot_list, top = text_grob(figure_counter_func("Raw Scores Across Age"), face = "bold", size = 14))

Plot labels represent general additive model (GAM) with Loess regression of Age on given raw score. GAM regression was initially used to explore the relationship between Age and Raw Scores. Linear models with simple and quadratic terms will be fitted in the next section to attempt to generate equations for predicting Scores from Age. Here, the fitted lines on plots were constructed with Loess regression, using the same optimized span as in the GAM, to explore the relationship between Age and raw score. Plots with linear regression fitted lines are presented in the next section. Visual inspection of these Loess regression plots and confirmation by regression outcomes labeled on the plots reveal improved performance on Stroop tasks across age (increases in Word, Color, and Color-Word and decreased Interference).

Models to Predict Raw Scores

Models were constructed to predict each Stroop score. The quadratic models allow for a nonlinear relationship. The models were then used to construct standardized score tables for each scale.

As was observed, Stroop scores were significantly predicted by Age. Other factors were found not to explain Stroop scores in the presence of Age as a predictor. Regression models were constructed for each of the Raw Scores with Age as a predictor with both a quadratic and linear term. Quadratic terms were retained where significant.

Full models with Age as a continuous variable (“decimal_age”), with and without quadratic terms, were compared to null models with maximum likelihood analyses. Maximum likelihood analysis has advantages in analysis compared to Wald-based methods of:

  • Being more robust to deviations from normalcy and tolerating unbalanced groups
  • Allowing for missing data (though there were none in this sample)
  • Incorporating full information from the models to allow for precise standard error estimation.

In addition, a Breusch-Pagan (BP) Test was conducted for each model to test for heteroskedasticity. All results were above alpha = 0.05, so all models were considered not to violate assumptions of homogeneity.

###*** Following constructs the regression equation for each model
# Long data with all scores
  child %>% 
    dplyr::select(all_of(raw_scores_int), decimal_age) %>% 
    pivot_longer(., -decimal_age) %>% 
    group_by(name) %>% 
    nest -> lm_data

# Apply each score as outcome, with decimal_age as predictor for full model with quadratic term
  lapply(lm_data$data, function(data) {
    data %>% 
      lm(value ~ poly(decimal_age,2), data = .)
  }) -> poly_lm_data
  
# Apply each score as outcome, with decimal_age as predictor for full model
  lapply(lm_data$data, function(data) {
    data %>% 
      lm(value ~ decimal_age, data = .)
  }) -> full_lm_data

  # If poly significant include quadratic term in full_lm
  lapply(1:length(full_lm_data), function(i){
    if(lrtest(poly_lm_data[[i]], full_lm_data[[i]])[2,5] < .05){
      poly_lm_data[[i]]
    } else{
      full_lm_data[[i]]
    }
  }) -> full_lm_data
      
  
# Apply each score as outcome, with intercept only for null model
  lapply(lm_data$data, function(data) {
    data %>% 
      lm(value ~ 1, data = .)
  }) -> null_lm_data

# Produces summary equation of regression model for each score
    reg_eq <- function(mod) {
      paste(
        c(round(summary(mod)$coefficients[1,1],2), 
          mapply(function(coefficient_names, coefficients) {
            # Pulls out each coefficient and intercept and pastes together
            paste("+", coefficients, " * ", coefficient_names) 
          },
            coefficient_names = summary(mod)$coefficients[,1][-1] %>% names, 
            coefficients = round(summary(mod)$coefficients[,1][-1], 2))
        ),collapse = " ")
      }
# Function compares the full and reduced model, constructs equation, gives p-value, and bp test of heterogeneity
  model_dataframe <- function(full_model, null_model){
    
    # Full-reduced likelihood test
    critical_comparison <- lrtest(full_model, null_model)
  
    # Best fitting model, test for heterskedasticity- get p-value
    bp_p.value <- lmtest::bptest(full_model)$p.value
    
    # Regression equation
    regression_equation <- reg_eq(full_model)
    
    model_stats_df <- data.frame(
      equation = regression_equation, 
      `model_significance_Pr(X2)` = p_value_func(critical_comparison$`Pr(>Chisq)`[-1]),
      bp_test_significance = p_value_func(bp_p.value)
      )
    
    row.names(model_stats_df) <- NULL
    return(model_stats_df)
  }

mapply(model_dataframe, full_lm_data, null_lm_data) %>% 
  t() %>% 
  data.frame() %>% 
  add_column(Scale = raw_scores_names_int, .before = 1) %>% 
  table_print(colnames = tools::toTitleCase(gsub("[_|.]", " ", colnames(.))), caption = table_counter_func("Raw Score Regression Model Equations and Diagnostics"))
Table 16: Raw Score Regression Model Equations and Diagnostics
Scale Equation Model Significance Pr X2 Bp Test Significance
Word Raw Score 83.64 + 166.79 * poly(decimal_age, 2)1 + -43.32 * poly(decimal_age, 2)2 p < .001 p = 0.43
Color Raw Score 14.97 + 3.89 * decimal_age p < .001 p = 0.453
Color Word Raw Score 7.89 + 2.5 * decimal_age p < .001 p = 0.857
Interference Raw Score -22.47 + -37.06 * poly(decimal_age, 2)1 + 16.1 * poly(decimal_age, 2)2 p < .001 p = 0.235

As can be seen, the quadratic term was significant for Word Raw Score and Interference Raw Score. As discussed in the Development section of this Manual, this effect is likely due to the sharp gains young children make in Word naming ability, relative to color. This affects Interference, as children are more impervious to the effects of the word distraction in the Color-Word naming and able to focus better on color. In both Word and Interference, these effects level off as children get older.

Plots of Predicted Scores and Residuals

The following plots represent the models above, with fitted equation lines and residuals included. Tables with tests of normality, including D’Agostino and Brusch-Pagan tests are also included.

###*** Plot of each scale fitted line and residuals
  lapply(1:length(raw_scores_names_int), function(i){
    lm_data$data[[i]] %>% # lm data for each scale- add predictions and residuals to each
      add_predictions(full_lm_data[[i]]) %>%
      add_residuals(full_lm_data[[i]]) %>%
      {
        ggplot(data = ., aes(x = decimal_age, y = pred)) +
        geom_smooth(formula = ifelse(length(coefficients(full_lm_data[[i]])) > 2, 'y ~ poly(x,2)', 'y ~ x'), method = "lm", aes(linetype = "")) + # Plots lm model fitted line, choosing either quadratic if in full model or simple
        geom_point(aes(x = decimal_age, y = resid, color = "")) + # Adds residuals
          scale_y_continuous(breaks = seq(round(min(.$pred, .$resid), -1) - 10, round(max(.$pred, .$resid), -1) + 10, 10)) + # Fits y-axis based on extremum of predictions and residuals
          scale_x_continuous(breaks = seq(round(min(.$decimal_age), 0) - 1, round(max(.$decimal_age), 0) + 1, 2)) + #Age scale
          labs(x = "Age", y = "Raw Score", color = "Residuals", linetype = "Predicted") +
      ggtitle(figure_counter_func(paste("Predicted", raw_scores_names_int[i], "\n and Residuals Age")))
      } 
    
  }) -> normality_plot_output
###*** Following conducts D'agostino test and BP Test and combines output into df
  lapply(1:length(raw_scores_names_int), function(i) {
    # Agostino Test- Tests skewness
     lm_data$data[[i]] %>% 
        # Add residuals from Full lm models
        add_residuals(full_lm_data[[i]]) %>% 
        dplyr::select(resid) %>% # Pull out residuals
        pull() %>% # Vector necessary for Agostino
        agostino.test() -> agostino_out
    # BP Test- Tests heteroskedasticity
      lmtest::bptest(full_lm_data[[i]]) -> bptest_out # BP Test on model
    # Gather into df and print
     # print(
       cbind.data.frame(
           scale = raw_scores_names_int[i], # Scale name
           data.frame(as.list(agostino_out$statistic)), # Agostino Stat
           p_value_func(agostino_out$p.value), # Agostino z-value
           data.frame(as.list(bptest_out$statistic)), # BP Stat
           p_value_func(bptest_out$p.value) #BP p-value
         ) %>% 
        mutate_if(is.numeric, round, digits = 3) %>%
        table_print(colnames = c("Scale", "Agostino Skewness Statistic", "Agostino Z-Value", "Agostino p-value", "Brush-Pagan Statistic", "Brush-Pagan p-value"), caption = "Tests of Normality") 
      # )
  }) -> normality_test_output
for (i in 1:length(raw_scores_int)){
    cat(
      "<div class = 'row h-100'>",
        "<div class = 'col-sm-6'>"
      )
        print(normality_plot_output[[i]])
    cat(
      "</div>",
      "<div class = 'col-sm-6'>"
      )
        print(normality_test_output[[i]])
    cat(
      "</div>",
    "</div>",
    "<hr>"
      )
}
Tests of Normality
Scale Agostino Skewness Statistic Agostino Z-Value Agostino p-value Brush-Pagan Statistic Brush-Pagan p-value
Word Raw Score -0.319 -1.423 p = 0.155 1.687 p = 0.43

Tests of Normality
Scale Agostino Skewness Statistic Agostino Z-Value Agostino p-value Brush-Pagan Statistic Brush-Pagan p-value
Color Raw Score -0.438 -1.924 p = 0.054 0.564 p = 0.453

Tests of Normality
Scale Agostino Skewness Statistic Agostino Z-Value Agostino p-value Brush-Pagan Statistic Brush-Pagan p-value
Color Word Raw Score 0.521 2.257 p = 0.024 0.033 p = 0.857

Tests of Normality
Scale Agostino Skewness Statistic Agostino Z-Value Agostino p-value Brush-Pagan Statistic Brush-Pagan p-value
Interference Raw Score -0.285 -1.277 p = 0.202 2.898 p = 0.235

Inspection of residuals and tests of normality reveal a lack of serious violations of normality.

Standardized Scores

The best-fitting models, identified above, were used to construct standardized score conversion tables (Appendix A). The models generated predicted scores for each scale for each age, in years. T-scores were assigned to each possible raw score to a maximum of 3.5 standard deviations from the mean. These scores represent a comparison of an individual’s performance on a given scale relative to same-aged peers. Methods for converting raw scores to standardized scores are presented in this Manual.

Scale Structure

The structure of the Stroop was examined. The raw scores derived from the Color, Word, and Color-Word tasks and calculated Interference score were considered separate scales. Their relationship independently and in combination were examined.

Scale Correlation

First, the correlation of the scales was examined. As Interference is a calculation from the other three scores, Interference is not independent and, thus, does not meet assumptions typically required for inclusion in some analyses. Nonetheless, its association with the other scales is of interest. Analyses were typically conducted with and without Interference included to examine its association with the other scales (with its inclusion) and to obtain outcome measures fulfilling stricter assumptions (without inclusion).

A correlation matrix was constructed, along with scatterplots of association between scales, and histograms of individual scales, with all combinations of two scales included.

###*** Correlation matrix with scatterplots
child %>% 
  dplyr::select(setNames(raw_scores_int, raw_scores_names_int)) %>% # Prettify scale names
  PerformanceAnalytics::chart.Correlation() # Function does all the work
  title(sub="NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")
  title(main = figure_counter_func("Stroop Correlation Matrix"), line = 3)

Observing the chart above, we can see that Color, Word, and Color-Word have significant approximately moderate-to-strong correlations with each other.

Interference can be seen to have a moderate negative correlation with Word and Color scores. This is intuitive, as Interference is calculated by subtracting Color from Color-Word, so as Color increases, Interference decreases, holding Color-Word constant.

###*** Polynomial regression with Color-Word as predictor of Interference
# Quadratic model
  int_cw_quad_mod <- lm(interference_raw_score ~ poly(color_word_raw_score,2), data = child)
# Linear Model
  int_cw_linear_mod <- lm(interference_raw_score ~ color_word_raw_score, data = child)
# Compare quadratic and linear
  int_cw_quad_lrtest <- lrtest(int_cw_quad_mod, int_cw_linear_mod)

However, there appeared to be a nonexistent correlation between Interference and Color-Word, which is puzzling, as Color-Word is used to calculate Interference. Examining the scatterplot and fitted line, it appears that there is a curvlinear relationship between the two variables. This was confirmed by constructing a regression model predicting Interference from Color-Word score with a quadratic term for Color-Word, which resulted in significant findings (X2(1) = 6.07, p = 0.014).

# Plot of quadratic relationship between Interference and Color-Word
  ggplot(child, aes(color_word_raw_score, interference_raw_score)) + 
    geom_point() + 
    geom_smooth(method = "lm", formula = y ~ poly(x, 2)) +
    labs(x = "Color-Word Raw Score", y = "Interference") + 
    ggtitle(figure_counter_func("Curvlinear Relationship of Interference Across Color-Word"))

Examining the scatterplot and fitted quadratic line, it can be seen that Interference performance is greatest at low and high Color-Word Raw Score. Said another way, the Stroop effect (Interference) seems most pronounced for people of moderate abilities to inhibit incongruent stimuli.

Internal Consistency

Cronbach’s Alpha

###*** Cronbach alpha calculation and output to df
cbind.data.frame(
  scale = c("Without Interference"),
    # Stroop Cronbach without Interference
      cbind.data.frame(child %>% dplyr::select(all_of(raw_scores)) %>% cronbach())
  ) %>%
  mutate_if(is.numeric, round, digits = 3) %>%
  table_print(colnames = c("Scales Included", "N", "Number of Scales", "Alpha"), caption = table_counter_func("Cronbach's Alpha"))
Table 17: Cronbach’s Alpha
Scales Included N Number of Scales Alpha
Without Interference 111 3 0.846

Due to Interference’s dependent relationship with other variables and curvlinear relationship with Color-Word, Interference was not used in calculation of internal consistency. Stroop scales demonstrated moderate internal reliability, using traditional interpretation criteria of Cronbach alpha.

Bartlett Correlation Test

A Bartlett’s correlation test was conducted. Results of the test indicated the correlation of scales without Interference was significantly different than zero, χ 2(3)= 215.14 , p < .001. The determinant of the correlation matrix was 0.137, of sufficient value to conduct further analysis of the factor structure of the items.

The correlation of the variables was graphically represented, where items closer in proximity have more of an association than those farther apart.

Graphical Representation of Correlation using Principal Components Analysis- 2d
###*** mdspca- plot of correlation of scales

child %>% 
  dplyr::select(setNames(all_of(raw_scores_int), raw_scores_names_int)) %>% 
  mdspca(cx = 1)
title(main = figure_counter_func("PCA Stroop Correlation"), line = -1, sub = "Color and Word labels overlap")

Graphical Representation of Correlation using Principal Components Analysis- 3d
###*** sphpca- 3d plot of correlation of Stroop scales
child %>% 
  dplyr::select(setNames(all_of(raw_scores_int), raw_scores_names_int)) %>% 
  sphpca(cx = .75, nbsphere = "one")
title(main = figure_counter_func("PCA Stroop Correlation 3d"), line = -1, adj = .5)

###*** FPCA- assign variable to be dependent. Doesn't add anything here, so not included
fpca(interference_raw_score ~ color_raw_score + word_raw_score + color_word_raw_score, data = child)
title(main = figure_counter_func("\n Correlation Interference Dependent"), line = -2)

Principal Components Analysis

A Principal Components Analysis (PCA) with promax rotation, to allow for correlation of components, was conducted to further explore the relationship of Stroop scales. An increasing number of components were specified.

Unspecified Model
###*** PCA, unspecified number of components
  fit_unspecified <- child %>% 
    dplyr::select(setNames(all_of(raw_scores_int), raw_scores_names_int)) %>% 
    principal(., rotate="promax") # conducts pca

# Outputs in df
  data.frame(
    factors = seq(1:length(raw_scores_names_int)), # Number of components
    eigenvalues = round(fit_unspecified$values[1:length(raw_scores_names_int)],2) # Eigenvalues
    ) %>%
    table_print(caption = table_counter_func("PCA Components and Eigenvalues"), colnames = c("Number of Components", "Eigenvalue"))
Table 18: PCA Components and Eigenvalues
Number of Components Eigenvalue
1 2.75
2 0.93
3 0.31
4 0.00
One Component
###***  PCA Model One Component
data.frame(
  raw_scores_names_int, # Names of each scale
  matrix(round(as.numeric(fit_unspecified$loadings), 2), # Gets standardized loadings
         attributes(fit_unspecified$loadings)$dim, # Gets number of rows and columns- # of scales, one col
         dimnames=list(raw_scores_names_int)) # Name of Scales
           ) %>% 
  table_print(caption = table_counter_func("One Component PCA"), colnames = c("Scale", "Standardized Loading")) 
Table 19: One Component PCA
Scale Standardized Loading
Word Raw Score 0.88
Color Raw Score 0.98
Color Word Raw Score 0.79
Interference Raw Score -0.63

The model with one component yielded chi-square of 2254.52.

Two Components
fit_two_components <- child %>% 
  dplyr::select(setNames(all_of(raw_scores_int), raw_scores_names_int)) %>% 
  principal(., nfactors = 2, rotate="promax")
data.frame(
  raw_scores_names_int,
  matrix(round(as.numeric(fit_two_components$loadings), 2), 
         attributes(fit_two_components$loadings)$dim, 
         dimnames=list(raw_scores_names_int, c("Processing", "Interference")))) %>% 
  table_print(caption = table_counter_func("Stroop PCA Two Factor"), colnames = c("Scale", "Stroop Raw Scores", "Interference")) 
Table 20: Stroop PCA Two Factor
Scale Stroop Raw Scores Interference
Word Raw Score 0.68 -0.31
Color Raw Score 0.77 -0.33
Color Word Raw Score 1.11 0.35
Interference Raw Score 0.17 1.06

The model with two components yielded chi-square of 2013.12. Interference seemed to be unique and load into its own component, while other Stroop measures loaded strongly into one component.

A scree plot was constructed to explore the appropriate number of dimensions the scales assume.

child %>% 
  dplyr::select(setNames(all_of(raw_scores_int), raw_scores_names_int)) %>% 
  scree.plot(., simu = 20, use="P", title = figure_counter_func("Scree Plot of PCA"))

It can be seen that a model with two components has an eigenvalue slightly below 1, typically considered the threshold for the number of dimensions. The optimal number of dimensions for the Stroop was analyzed with various methods.

###*** Optimal number of components using various tests from parameters package
child %>% 
     dplyr::select(setNames(all_of(raw_scores_int), raw_scores_names_int)) %>% 
     n_components(
         .,
         type = "PCA",
         rotation = "promax",
         algorithm = "default",
         package = c("nFactors", "psych"),
         cor = NULL,
         safe = TRUE
     ) %>% capture.output() %>% # Gets text output to format without asterisks
  .[3] %>% # Summary is third
  cat()

The choice of 1 dimensions is supported by 7 (53.85%) methods out of 13 (Bentler, Optimal coordinates, Acceleration factor, Parallel analysis, Kaiser criterion, Scree (SE), Velicer’s MAP).

Taken together, the above analyses suggest that viewing all four scales as one dimension is optimal, but that there is support to also conceptualize Stroop scales as assuming two dimensions, one with Color, Word, and Color-Word and the other with Interference.

Structural Equation Modeling

Structural Equation Modeling was used to further explore the factor structure suggested by PCA above. Alpha, from Cronbach’s Alpha, and Omega, a measure of congeneric, or composite, reliability model statistics are presented. In general, composite reliability is seen to be a more accurate indicator of structure of scales for a number of reasons, including that it is more unbiased than alpha and seen to be more accurate (Peterson & Kim, 2013). Interference was not included, as it is calculated from the other scales and not independent.

One-Factor Model
###*** SEM One Factor and Composite Reliability
# Specify model
  one_factor.model_no_interference <-
 '                        total =~ word_raw_score + color_raw_score + color_word_raw_score'
  
  #  Fit the model
  fit_one_factor_no_Int <- lavaan::cfa(one_factor.model_no_interference, data = child)
  # Get Reliabilities and print
  reliability(fit_one_factor_no_Int, return.total = TRUE)[1:2,] %>% cbind.data.frame(names(.), .) %>%
    mutate_if(is.numeric,round,2) %>% 
    table_print(colnames = c("Reliability Measure", "Reliability"), caption = table_counter_func("Stroop One Factor Reliability"))
Table 21: Stroop One Factor Reliability
Reliability Measure Reliability
alpha 0.85
omega 0.87
###*** SEM with two factors. Not done here. Doesn't fit will
  # Two Factor Model with no Interference
#   two_factor.model_interference <-
# '
#                        color_word =~ a*word_raw_score + a*color_raw_score
#                        cw_int =~ b*color_word_raw_score 
#                        
#                         word_raw_score ~~ word_raw_score
#                         color_raw_score ~~ color_raw_score
#                         color_word_raw_score  ~~ color_word_raw_score 
# '
#   
#   #  Fit the model
#   fit_two_factor_int <- lavaan::cfa(two_factor.model_interference, data = child, std.lv=TRUE)
#   
#   # Factor Loadings
#     sl <- standardizedSolution(fit_two_factor_int)
#     sl <- sl$est.std[sl$op == "=~"]
# 
#   # Print SEM Paths
#   # semPaths(fit_two_factor_int,"std", edge.label.cex = 2, edge.label.color = "black", exoVar = FALSE,
#   #          exoCov = FALSE, nodeLabels = c("Color", "Word", "Color-Word", "Interference", "Processing \n Speed", "Stroop"), sizeMan = 17, sizeMan2 = 10, sizeInt = 10, sizeLat = 17, label.cex = 1.3)
#   # title(main = figure_counter_func("SEM Paths"), line = 2)
#   
#   node_labels <- c("Color", "Word", "Color-Word", "Processing \n Speed", "Stroop")
#   # Print SEM Paths
#   semPaths(fit_two_factor_int,"std", label.prop = .8, edge.label.cex = 1.5, edge.label.color = "black",  exoVar = FALSE, nodeLabels = node_labels, exoCov = FALSE, sizeMan = 8, sizeInt = 10, sizeLat = 13, label.cex = 1, layout = "tree", rotation = 2)
# 
# # Compute residual variance of each item
# re <- 1 - sl^2
# 
# # Compute composite reliability
# composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
# as.data.frame(reliability(fit_two_factor_int, return.total = T)[1:2,]) %>%
#   round_df(., 3) %>%
#   table_print(colnames = c("Processing Speed", "Factors Combined"), caption = table_counter_func("Stroop Two-Factor SEM w/ Interference")) %>% 
#   column_spec(3, border_left = "1px solid gray") 

Reliability

Test-Retest

# Give maximum length of time allowed for test-retest
time_limit <- max(child_retest$retest_length)

To assess test-retest reliability, 24 participants were re-administered the Stroop up to 90 days between initial test (“Test”) and retest (“Retest”). Information about number of participants, length of time between test and retest, other information, and the impact of these factors on scores is presented below.

  child_retest %>%
    dplyr::select(contains("raw_score")) %>% #Get all scale scores
    pivot_longer(everything()) %>% 
    rowwise() %>% 
    mutate(
      test_occasion = factor(ifelse(grepl("_initial", name), "Test", "Retest"), levels = c("Test", "Retest")), # Group by test or retest if it is initial = 'test', comparison = 'retest'
      name = tools::toTitleCase(gsub("_", " ", sub("_[^_]+$", "", name))) # Prettify names, drop initial, comparison and underscore
    ) %>% 
    ungroup() %$% # magrittr for piping if no built in data object
    describeBy(value, list(test_occasion, name), mat = TRUE) %>% # Groups by scale and test occasion
    arrange(factor(group2, levels = raw_scores_names_int)) %>% # Arrange in custom order
  # Following mutate removes duplicate occurences
    mutate(group2 = ifelse(duplicated(group2), "", group2),
           n = ifelse(duplicated(n), "", n)) %>% 
    dplyr::select(group2, group1, n, mean, sd, se, median, min, max, skew, kurtosis) %>%
      mutate_if(is.numeric, round, 2) %>%
    rename('Test' = 'group1', 'Scale' = 'group2') %>% 
    table_print(caption = table_counter_func("Test-Retest Descriptive Statistics"), colnames = colnames(.))
Table 22: Test-Retest Descriptive Statistics
Scale Test n mean sd se median min max skew kurtosis
Word Raw Score Test 24 77.38 23.53 4.80 79.0 18 106 -0.92 0.19
Retest 79.92 23.65 4.83 81.5 25 115 -0.67 -0.17
Color Raw Score Test 53.33 15.74 3.21 54.5 25 83 -0.07 -0.83
Retest 57.17 16.59 3.39 54.0 27 88 0.28 -0.76
Color Word Raw Score Test 33.58 11.93 2.43 31.0 19 63 0.70 -0.49
Retest 36.12 13.18 2.69 33.5 19 68 0.83 -0.23
Interference Raw Score Test -19.75 7.80 1.59 -20.0 -35 -3 0.15 -0.41
Retest -21.04 6.89 1.41 -21.0 -35 -5 0.07 -0.04
###*** Compute ICC measures
## Compute with irr
# Note that, the two-way mixed-effects model and the absolute agreement are recommended for both test-retest and intra-rater reliability studies (Koo et al., 206).

# Create df with test-retest for each scale

# Create list of each initial test score and retest df's
  icc_list <- list(cbind.data.frame(child_retest$word_raw_score_initial, child_retest$word_raw_score_comparison), cbind.data.frame(child_retest$color_raw_score_initial, child_retest$color_raw_score_comparison), cbind.data.frame(child_retest$color_word_raw_score_initial, child_retest$color_word_raw_score_comparison), cbind.data.frame(child_retest$interference_raw_score_initial, child_retest$interference_raw_score_comparison)) 

# Apply the list of test-retest df's to calculate icc for each
  icc_output_list <- lapply(icc_list, function(df_list){
    irr::icc(
      df_list, model = "twoway", 
      type = "consistency", unit = "single"
      )
  })
  names(icc_output_list) <- raw_scores_names_int

Intraclass correlation coefficients (ICC) are presented as measures of test-retest reliability. Two-way mixed-effects model and the absolute agreement to derive intraclass correlation coefficients were used. The following table presents ICC statistics for each scale.

###*** Prints ICC list created above
# Create lists with icc statistic, 95% CI, F-value, p-value
  lapply(icc_output_list,function(out_list) {data.frame(
      val = paste0(round(out_list$value,2)," (", round(out_list$lbound,2), " - ", round(out_list$ubound,2), ")"),
      f_val = round(out_list$Fvalue, 2),
      p_val = p_value_func(out_list$p.value)
    )
  }) %>%
    bind_rows() %>%
    add_column(scales = raw_scores_names_int, .before = T) %>% # Scale names
    table_print(colnames = c("Scale", "ICC Value (95% CI)", "F", "P"), caption = table_counter_func("ICC Test-Retest Statistics"))
Table 23: ICC Test-Retest Statistics
Scale ICC Value (95% CI) F P
Word Raw Score 0.94 (0.87 - 0.97) 33.64 p < .001
Color Raw Score 0.95 (0.88 - 0.98) 37.72 p < .001
Color Word Raw Score 0.93 (0.85 - 0.97) 28.18 p < .001
Interference Raw Score 0.78 (0.55 - 0.9) 7.98 p < .001

Concordance Correlation Coefficients were also produced as a measure of test-retest reliability.

Concordance Correlation

###*** Concordance correlation statistics

  lapply(raw_scores_int, function(scale) {
    child_retest %>% 
    # Select all test-retest scores from df with similar column beginning names for scores
      dplyr::select(starts_with(scale)) %>% 
      data.matrix() %>% # Matrix needed for ccc
      agree.ccc() %>% # Calculates ccc from each test_retest scores
      data.frame() %>% 
      mutate(across(where(is.numeric),round,2)) 
  }) %>% 
    bind_rows() %>% 
    cbind.data.frame(raw_scores_names_int, .) %>% # Add Scale names
    table_print(colnames = c("Scale", "Concordance Correlation", "Lower Bound", "Upper Bound"), caption = table_counter_func("Concordance Correlation")) %>%
  header_func(., header_text = c(" ", "95% CI"), col_length = c(2,2))
Table 24: Concordance Correlation
95% CI
Scale Concordance Correlation Lower Bound Upper Bound
Word Raw Score 0.94 0.86 0.98
Color Raw Score 0.92 0.85 0.96
Color Word Raw Score 0.91 0.78 0.97
Interference Raw Score 0.78 0.52 0.90

Bland-Altman and Means and Differences plots were produced to assess test-retest values. Bland-Altman plots show individual test-retest differences and a confidence interval band around the mean. Plots with a number of values falling outside this interval can suggest problems with test-retest reliability.

Means and Differences plots are plots with test and retest values as coordinates. A fitted line is compared to a unity line (a line representing perfect test to retest correspondence) to compare agreement of test-retest values.

###*** Print Bland-Altman and Means Differences plots
# Go through each scale and generate list of df's for each scale
  lapply(raw_scores_int, function(scale){
    child_retest %>% 
      dplyr::select(starts_with(scale)) %>% 
      mutate(across(everything(), as.numeric))
  }) -> test_retest_list

# Generate two columns to print each scale plots side-by-side with html
  # Generate two column header for all plots
  cat("<div class = 'row h-100'>",
        "<div class = 'col-md-6'>",
          "<h3>Bland-Altman Plot</h3>",
        "</div>",
        "<div class = 'col-md-6'>",
          "<h3>Means and Differences Scatterplot</h3>",
        "</div>",
      "</div>")

Bland-Altman Plot

Means and Differences Scatterplot

  # Print two column rows for each scale
    lapply(1:length(test_retest_list), function(i) {
        cat("<div class = 'row h-100'>",
            paste("<h4>", raw_scores_names_int[i], "</h4>"), # Each scale name
            "<div class = 'col-md-6'>")
          # Bland Altman plot with test, retest as df
            print(bland.altman.plot(
                as.numeric(test_retest_list[[i]][,1] %>% pull()), 
                as.numeric(test_retest_list[[i]][,2] %>% pull()), 
              graph.sys = "ggplot2")) 
          cat("</div>",
          "<div class = 'col-md-6'>")
        # Print Means Differences plot
          print(
            test_retest_list[[i]] %>%
              as.data.frame() %>%
              ggplot(., aes(x = .[,1], y = .[,2])) + # Set up scatterplot
              geom_point() + # All test-retest points
              geom_smooth(aes(linetype = "Fitted Line"), method = "lm",se = F) +
              geom_abline(aes(linetype = "Unity Line" , intercept = 0, slope = 1)) +
              labs(x = "Test", y = "Retest")
          ) 
          cat("</div>",
            "</div>")
    })-> suppress_print_msg

Word Raw Score

Color Raw Score

Color Word Raw Score

Interference Raw Score

Each of the scales had significant ICC statistics, with moderate to strong correlation. However, it was also desired to ascertain the effect the length of time had on test and retest scores, which can impact the correlation, as well as have practical ramifications about practice effects and recommended length of time between retesting.

To examine the effect of time on test-retest results:

  1. Plots were constructed that show the change in score from Test to Retest across the time between tests (“Score Change Over Time Between Tests Plot”),
  2. Linear models were constructed with length of time between test as predictor and test-retest difference as outcome. These models were compared to null models. Results of likelihood ratio tests are presented.
  3. For those scales where time was a significant predictor of score change, the time until no difference between test and retest was calculated. The 95% Confidence Interval of a prediction of score change being 0 was provided.
###*** For each scale, 1) Plot Difference between test-retest over time, 2) Linear Model for Difference predicted by Time, 3) In case significant, predicts time until no difference
# Two column titles
  cat("<div class = 'row h-100'>",
        "<div class = 'col-md-6'>",
            "<h4>Score Change Over Time Between Tests Plot</h4>",
        "</div>",
        "<div class = 'col-md-6'>",
            "<h4>Score Change Over Time Significance Test and Prediction of Time Until No Difference in Test-Retest</h4>",
        "</div>",
      "</div>")

Score Change Over Time Between Tests Plot

Score Change Over Time Significance Test and Prediction of Time Until No Difference in Test-Retest

## Function builds plot, sets up lm, and predicts when 0 for significant lm for each scale
  lapply(raw_scores_int, function(scale){
    child_retest %>% 
      mutate(
        retest_time = as.numeric(age_calc(test_date_initial, enddate = test_date_comparison, units = "days")), # Date -> numeric for time from test to retest
      # Calulate change from test-retest for each scale
        !! paste0(scale, "_change") := !! sym(paste0(scale, "_comparison")) - !! sym(paste0(scale, "_initial"))
        ) %>%
        {
          { . -> tmp } %>% # Stores this df in tmp to be used in lm below
        
        ## ggplot of difference in test-retest score over time  
          ggplot(., aes(x = retest_time, y = !! sym(paste0(scale, "_change")))) + 
            geom_point() +
            geom_smooth(method = "lm", formula = y ~ x) + # No nonlinear effects found
            xlab("Time Between Test and Retest") +
            ylab("Score Change") +
            ggtitle(figure_counter_func(paste("Change in", tools::toTitleCase(gsub("_", " ", scale)), "\n Over Time From Test to Retest"))) -> retest_time_plot
          
        ## LM for retest_time as predictor of score difference compared to null for each scale
          # Outcome variable
          outcome <- sym(paste0(scale, "_change"))
          outcome <- ensym(outcome)
          # Predictor
          predictor <- sym("retest_time")
          predictor <- ensym(predictor)
          # Models
          full_expr <- expr(!! outcome ~ as.numeric(!! predictor))
          null_expr <- expr(!! outcome ~ 1)
          
          full_model <- lm(full_expr, data = tmp) # data from tmp df that was stored from above
          null_model <- lm(null_expr, data = tmp)
          # Compare full and null model with lrtest
          lrtest(full_model, null_model) %$% # Passes test results
            {
            ## Predict when test-retest will be 0, if lm significant
              {.$`Pr(>Chisq)`[-1] -> p_value} # lrtest p-value
              # Conditional to calculate time to 0 test-retest difference
                if(p_value < .05){
                # This is intercept divided by time coefficient beta to calculate value when time is 0--from lm terms
                  retest_length <- -full_model$coefficients[1] / full_model$coefficients[2] 
                  # We want to get 95% confidence interval for prediction of 0 difference
                  predict(full_model, newdata = data.frame(retest_time = retest_length), interval = 'confidence') %>% # Use confidence interval b/c prediction within range of data
                    data.frame() %>% 
                    dplyr::select(-1) %>% # Drop predicted value, we know it's 0
                    add_column(retest_length, .before = 1) %>%  # This is the time until 0
                    mutate(across(where(is.numeric),round,2)) %>% 
                    table_print(
                      caption = table_counter_func("Days Until Expected Test-Retest Difference is Zero"), 
                      colnames = c("Days", "95% Lower", "95% Upper")) -> predicted_df
              } else {
                  predicted_df <- "No prediction" # If lm nonsignificant
              }
              
          # DF of lm results    
            cbind.data.frame(
              chi = round(Chisq[-1],2), 
              df = abs(Df[-1]), 
              p = p_value_func(`Pr(>Chisq)`[-1])
              ) %>% 
            table_print(
              colnames = c("Chisq", "Df", "Pr(>Chisq)"), 
              caption = table_counter_func("Main Effects of Difference between Test and Retest")) -> retest_time_lm
     
          # Prints each of above in two columns, right has two rows
            cat(
              "<div class = 'row h-100'>")
                cat("#####", tools::toTitleCase(gsub("_", " ", scale)), "\n")
                cat("<div class = 'col-md-6'>")
                  print(retest_time_plot)
              cat(
                "</div>",
                "<div class = 'col-md-6'>"
                )
                  cat(knit_print(retest_time_lm))
                  cat(knit_print(predicted_df))
                cat(
                "</div>",
              "</div>",
              "<hr>"
              )
            }
          }
  }) -> suppress_print_msg
Word Raw Score
Table 26: Main Effects of Difference between Test and Retest
Chisq Df Pr(>Chisq)
7.28 1 p = 0.007
Table 25: Days Until Expected Test-Retest Difference is Zero
Days 95% Lower 95% Upper
48.88 -3.53 3.53

Color Raw Score
Table 28: Main Effects of Difference between Test and Retest
Chisq Df Pr(>Chisq)
8.14 1 p = 0.004
Table 27: Days Until Expected Test-Retest Difference is Zero
Days 95% Lower 95% Upper
67.78 -3.27 3.27

Color Word Raw Score
Table 29: Main Effects of Difference between Test and Retest
Chisq Df Pr(>Chisq)
0.23 1 p = 0.635
[1] “No prediction” No prediction

Interference Raw Score
Table 31: Main Effects of Difference between Test and Retest
Chisq Df Pr(>Chisq)
6.16 1 p = 0.013
Table 30: Days Until Expected Test-Retest Difference is Zero
Days 95% Lower 95% Upper
47.2 -2.15 2.15

As can be seen in the above results, there were significant improvements in scores from test to retest that lessened over time for Color, Word, and Interference scales. There were expected to be no differences in scores within approximately two months (maximum 67 days for Color) for all scales from test to retest. This topic should continue to be explored to examine mediating variables and with a larger sample.

Validity

A number of participants were administered multiple instruments to assess the concurrent validity of the Stroop with other measures, reviewed below. In analyses involving multiple comparisons of instruments, pairwise deletion was used for missing data.

Content Validity with Previous Edition

None of the current Stroop stimulus content or administration procedures were changed from the previous edition. As such, there was no attempt to compare the content of the previous and current revised edition.

Construct Validity

Measures

Woodcock Johnson- Third Edition (WJ)

The Woodcock Johnson- Third Edition (WJ-3; Woodcock et al., 2003) is an academic and cognitive battery. The following subtests were used:

  1. Letter-Word Identification (LW)- This subtest assesses ability to recognize words and letters in isolation
  2. Word Attak (WA)- This subtest requires phonetic decoding
  3. Punctuation and Capitalization (PC)- Subtest of English language mechanics

Taken together, these tasks are considered useful measures of an individual’s ability to interpret and communicate visual language and are considered important skill components of reading (Mather & Gregg, 2001). Raw scores from all subtests were used.

Comprehensive Trail-Making Test (CTMT)

Comprehensive Trail-Making Test (CTMT; Reynolds, 2002) involves connecting multiple stimuli with various distractions under timed conditions. It is considered a useful assessment of attention, resistance to distractions, and processing speed (Gray, 2006).

  1. The T-scores of individual tasks of the CTMT were summed to yield a Total T-Score (“Trails Total”).

Concurrent Validity

Descriptive statistics and histograms of scores from the sample for each measure were calculated. Interference score was not used in further analyses as is not independent from other Stroop scores:

###*** Following produces summary table and histogram panel for each group of scores from content validity assessments
  lapply(2:length(all_scores_list), function(i){ # all_scores_list is names list of scales from each assessment
  ## This is summary table
    child_content_df %>%
      dplyr::select(all_scores_list[[i]]) %>%
      filter(if_any(everything(), ~ !is.na(.x))) %>%  # Goes across all columns in each row and keeps those that aren't all NA
        summarize(
          n = n(),
          across(-n, # Don't summarize across n
                 list(
                   mean = ~ paste0(round(mean(.x, na.rm = T),2), "(", round(sd(.x, na.rm = T),2), ")"), # Mean(SD)
                   sem = ~ round(psych::describe(.x)$se, 2), # SEM
                   min_max = ~ paste(min(.x, na.rm = T), "-", max(.x, na.rm = T)) # Minimum - Maximum Score
                 )
          )
        ) %>%
    # Use kable, instead of table_print because need to customize
      kable(col.names = c("*N*", rep(c(paste("Mean", "(SD)"), "SEM", "Min-Max"), length(all_scores_list[[i]]))), # Headers for each group of scores
            caption = table_counter_func(paste("Overall", toupper(names(all_scores_list)[i]), "Descriptives")), # Scale name
            format = "html", table.attr = "style='width:30%;'", # Use this with font_size below to reduce width of table
            ) %>% 
      kable_styling(font_size = 10) %>% # By experimentation, fits plot by side--make style-width as small as you want above-- it won't go smaller than font-size
      # Adds grouping header for each variable
        add_header_above(header = data.frame(names = c("",all_scores_names_list[[i]]),
                                             colspan = c(1, rep(3,length(all_scores_names_list[[i]])))) # score name above each group of summary stats
        ) %>%
      # Border separating each variable
        column_spec(seq(2, 3*length(all_scores_names_list[[i]]), 3),
                    border_left = "1px solid gray") -> content_validity_table
  
  ## Histogram panel for scores from each assessment
    child_content_df %>%
        dplyr::select(all_scores_list[[i]]) %>% # All scales from each assessment
        pivot_longer(everything()) %>%
        ggplot(., aes(x = value)) +
          histogram_plot(binwidth_option = "sturge") + # Function from helpers to control binwidth
          theme_bw() +
          facet_wrap(~name,scales = "free_x", 
                    # Prettifies names of scales
                     labeller = labeller(name = setNames(all_scores_names_list[[i]], all_scores_list[[i]]))) +
        # Caps assessment name
          ggtitle(figure_counter_func(paste(toupper(names(all_scores_list)[i]), "Histogram"))) +
          labs(y = "Number of Participants", x = "Raw Score") -> content_validity_histogram
    
    # Puts table and plot in row of two columns side-by-side
      cat(
        paste("<h4>", toupper(names(all_scores_list)[i]), "</h4>"),
          "<div class = 'row h-100'>",
            "<div class = 'col-md-6'>"
        )
              print(content_validity_table)
          cat(
            "</div>",
            "<div class = 'col-md-6'>"
            )
              print(content_validity_histogram)
            cat(
            "</div>",
          "</div>")
  }) -> suppress_print_msg

WJ

Table 32: Overall WJ Descriptives
LW Raw Score
PC Raw Score
WA Raw Score
N Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max
77 56.1(13.21) 1.5 14 - 74 29.58(7.6) 0.87 5 - 44 22.26(6.98) 0.8 2 - 34

CTMT

Table 33: Overall CTMT Descriptives
Total Raw Score
N Mean (SD) SEM Min-Max
37 239.89(47.21) 7.76 132 - 373

Correlation with Other Measures

The following correlation matrix presents the correlation of all concurrent measures.

###*** Generate correlation matrix of all scales from convergent assessments

  # correlation_matrix function from: https://paulvanderlaken.com/2020/07/28/publication-ready-correlation-matrix-significance-r/

# Method saves original df to tmp to get scale names, then applies to correlation matrix 
  convergent_cor_mat <- child_content_df %>% # DF with all convergent assessment scores
    dplyr::select(ends_with("raw_score"), -interference_raw_score) %>% # Interference is not independent
    {
      {. -> tmp} %>% # Store df with raw scores in tmp to get colnames
      correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower') -> col_mat # This is function from helper
      dimnames(col_mat) <- (rep(list(tools::toTitleCase(gsub("_", " ", colnames(tmp)))), 2)) # Adds pretty colnames from original df
      col_mat %>% 
        data.frame() %>% 
        setNames(., tools::toTitleCase(gsub("_", " ", colnames(tmp))))
    }
# Print 
  convergent_cor_mat %>% 
    table_print(colnames = colnames(.), 
                caption = "NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001",
                row_names = T)
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
Word Raw Score Color Raw Score Color Word Raw Score LW Raw Score PC Raw Score WA Raw Score Total Raw Score
Word Raw Score
Color Raw Score 0.76***
Color Word Raw Score 0.62*** 0.82***
LW Raw Score 0.82*** 0.73*** 0.63***
PC Raw Score 0.81*** 0.69*** 0.60*** 0.91***
WA Raw Score 0.78*** 0.66*** 0.63*** 0.89*** 0.83***
Total Raw Score 0.27 0.52*** 0.41* 0.05 0.18 0.13

The following is a graphical representation of correlation using Principal Components Analysis of all measures. Points closer in space represent closer correlations.

###*** mdspca with other measures
child_content_df %>% 
  dplyr::select(ends_with("raw_score"), -interference_raw_score) %>% # Get all concurrent measures
      setNames(tools::toTitleCase(gsub("_", " ", colnames(.)))) %>% # Prettify names
      mdspca()
      title(main = figure_counter_func("PCA All Scales"), line = -1) # Bring title down

Convergent and Discriminant Validity

As was reviewed, Stroop scales were found to correlate with other measures. The extent to which Stroop scores together formed an integral construct and were distinct from other measures was examined. Scales were assigned to factors representing the measures from which they came.

Confirmatory factor analysis (CFA) was conducted to assess the convergent and discriminant validity of the Stroop with concurrent measures.

###*** CFA with discriminant validity
# Set up model, assigning scales to chosen factors- explored different relationships
  # two_factor_cfa_stroop_all <- "
  # fluency =~ word_raw_score + color_raw_score + LW_raw_score + PC_raw_score + WA_raw_score 
  # stroop =~ color_word_raw_score + total_raw_score 
  # stroop ~~ fluency
  # "
# Stroop and all other measures as own scales, with contrasts
  two_factor_cfa_stroop_all <- "
  stroop =~ word_raw_score + color_raw_score + color_word_raw_score
  wj =~ LW_raw_score + PC_raw_score + WA_raw_score
  ctmt =~ total_raw_score 
  stroop ~~ wj
  stroop ~~ ctmt
  "

# Conduct CFA
  fit_two_factor_cfa_stroop_all<- cfa(two_factor_cfa_stroop_all, data = child_content_df, missing = "ml")
  # summary(fit_two_factor_cfa_stroop_all, standardized = TRUE, fit.measures =T) # Explore results

# Get fit measures to assess cfa
  fitMeasures(fit_two_factor_cfa_stroop_all, # CFA Model
            # Choose measures
              c("chisq", "df", "pvalue", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "rmsea.pvalue", "srmr")) %>%
    data.frame() %>% 
    mutate_if(is.numeric, round, 3) %>%
    t() %>%
    data.frame() %>% 
    mutate(
      rmsea = paste0(rmsea, " (", rmsea.ci.lower, "-", rmsea.ci.upper, ")"), # RMSEA 95% CI
      pvalue = p_value_func(pvalue), 
      rmsea.pvalue = p_value_func(rmsea.pvalue), 
        .keep = "unused"
      ) %>% 
    table_print(caption = table_counter_func("Two-Factor Analysis for Stroop and Concurrent Measures"), 
                colnames = c("Chi-Square", "Df", "Chi-square p-value", "RMSEA (95% CI)", "RMSEA p-value", "SRMR"))
Table 34: Two-Factor Analysis for Stroop and Concurrent Measures
Chi-Square Df Chi-square p-value RMSEA (95% CI) RMSEA p-value SRMR
40.009 12 p < .001 0.145 (0.097-0.196) p = 0.001 0.121
# Construct table with Latent Variable Factor Loading
  parameterEstimates(fit_two_factor_cfa_stroop_all)[1:(fitMeasures(fit_two_factor_cfa_stroop_all, "df")-1),] %>% # Choose report stats to display
      mutate(across(where(is.numeric),round,3)) %>% 
    data.frame() %>% 
    filter(op == "=~") %>% # Only keeps rows that have scale within factor
    mutate(
      lhs = tools::toTitleCase(ifelse(duplicated(lhs), "", lhs)), # Grouping value
      rhs = tools::toTitleCase(gsub("_", " ", rhs)), # Prettify Scale name
      est = paste0(est, " (", ci.lower, "-", ci.upper, ")"), # 95% CI
      pvalue = ifelse(pvalue < .001, "< .001", pvalue), # Round p-values
        .keep = "unused"
    ) %>% 
    dplyr::select(-op) %>% 
      table_print(caption = table_counter_func("Latent Variable Factor Loading"), 
                  colnames = c("Factor", "Scale", "Estimate (95% CI)", "SE", "Z", "p"))
Table 35: Latent Variable Factor Loading
Factor Scale Estimate (95% CI) SE Z p
Stroop Word Raw Score 1 (1-1) 0.000 NA NA
Color Raw Score 0.801 (0.664-0.939) 0.070 11.417 < .001
Color Word Raw Score 0.544 (0.438-0.651) 0.054 10.035 < .001
Wj LW Raw Score 1 (1-1) 0.000 NA NA
PC Raw Score 0.538 (0.479-0.598) 0.030 17.803 < .001
WA Raw Score 0.483 (0.425-0.542) 0.030 16.151 < .001
Ctmt Total Raw Score 1 (1-1) 0.000 NA NA

Scales significantly loaded into the factors they were assigned.

# Reliability of factors
  reliability(fit_two_factor_cfa_stroop_all)%>% 
    as.data.frame() %>%  
      mutate(across(where(is.numeric),round,3)) %>% 
      slice(1:2) %>% # Just want alpha and omega in this case
      table_print(caption = "Reliability", 
                  colnames = tools::toTitleCase(colnames(.)),
                  row_names = T)
Reliability
Stroop Wj
alpha 0.846 0.907
omega 0.878 0.965

There appeared to be significant discriminant validity between factors.

# Discriminant validity
  discriminantValidity(fit_two_factor_cfa_stroop_all, merge = T) %>% 
      mutate(
        across(where(is.numeric),round,3),
        est = paste0(est, " (", ci.lower, "-", ci.upper, ")"), # 95% CI
        pvalue = ifelse(`Pr(>Chisq)` < .001, "< .001", `Pr(>Chisq)`), # Round p-values
          .keep = "unused"
      ) %>% 
      dplyr::select(est, Chisq, Df, pvalue) %>% 
        table_print(caption = table_counter_func("Discriminant Validity"), 
                    colnames = c("Estimate (95% CI)", "Chisq", "Df", "p"))
Table 36: Discriminant Validity
Estimate (95% CI) Chisq Df p
0.769 (0.667-0.87) 108.180 14 < .001
0.547 (0.131-0.962) 108.224 14 < .001
0.277 (-0.315-0.868) 110.177 14 < .001

Grand Summary

Standardization

The Children’s Stroop Color and Word Test Normative Update standardization was conducted with 111 participants, ages 4 to 14, generally representative of the U.S. population. Stratification variables were reviewed and assessed during analyses.

Models were constructed, scaled score tables were constructed from these models, and reliability and validity were assessed. The administration methods and stimului of this version of the Stroop were identical to the previous version.

Structure

The structure of the Stroop was examined. The scales demonstrated significant correlation and moderately strong internal consistency (Cronbach’s alpha = 0.846, without Interference).

Principal Components Analysis suggested that the Stroop was best represented as one component, but also had two-dimension properties, with Color and Word as one dimension, representing fluid processing, and Color-Word, related to Interference, representing processing with effortful inhibition.

Reliability

The Stroop demonstrated moderate-to-strong test-retest reliability (approximately 0.78 to 0.95 for intraclass correlation coefficients and concordance correlation statistics). Analysis of retest scores found that there would be no expected difference from test to retest performance across scales after about two months, suggesting this is an advisable period of time to avoid practice effects.

Validity

The content of this normative update of the Stroop remained unchanged from the immediately previous version. The test’s construct validity was examined by administration of purportedly similar measures of brief processing efficiency. Signifiant moderate-to-strong correlations were found between Stroop scales and these other measures and convergent and discriminant validity with these measures.

Appendices

Appendix A: T-Score Tables for All Scales

These tables are incomplete and only for illustration. Full tables may be obtained in official Stroop Children’s Normative Update Manual.

###*** FOLLOWING PROCEDURES CREATE T-SCORE TABLES FOR EACH SCALE BY OBTAINING PREDICTED MEAN SCORE FOR EACH AGE AND SD FOR EACH SCALE, THEN GENERATING T-SCORES FOR EACH RAW SCORE, THEN COMBINING ALL AGES INTO EACH SCALE TABLE###***

###*** For each scale, function calculates predicted score and se for each age, sd for overall scale
  lapply(1:length(raw_scores_int), function(i){
    # Gets Standard error for each age- Isn't used here, as too small-- SD for each scale is more appropriate
      se_list <- lapply(
        seq(floor(min(child$decimal_age)) + 1, ceiling(max(child$decimal_age)), 1), # Goes through all ages 5-14
        function(age) {
          # For each lm model, generates predicted scores and SE
          predicted_score_se <- round(
            predict(
              full_lm_data[[i]], 
              data.frame(decimal_age = age), se.fit=TRUE)$se.fit,2) # Predict with each age
          data.frame(age, predicted_score_se) # DF of each age and SE
          })
    # Gets Mean Predicted score for each age
    fit_list <- lapply(seq(floor(min(child$decimal_age)), ceiling(max(child$decimal_age)), 1), # Goes through all ages 5-14
                       function(age) {
                         # For each lm model, generates predicted scores
        predicted_score<- round(
          predict(
            full_lm_data[[i]], 
            data.frame(decimal_age = age), se.fit=TRUE)$fit, 2) # Predict with each age
        data.frame(age, predicted_score) # DF of each age and SE
        })
    # Put together predictions, se, and sd for each scale
      inner_join(bind_rows(fit_list), 
                 bind_rows(se_list), by = "age") %>% # joins two prediction functions from above
        cbind.data.frame(., 
                         sd = child %>% dplyr::summarize(sd = sd(!! sym(raw_scores_int[i])))) # Gets SD for whole scale
    }) -> predicted_avg_score_by_age_list

lapply(1:length(raw_scores_int), function(list_number){
  # Function goes through each age and outputs mean value and sd
    lapply(5:14, function(i){
     predicted_avg_score_by_age_list[[list_number]] %>% # Go through each scale
       filter(age == i) %>% # Go through each score within each scale
       dplyr::select(predicted_score, sd) -> predicted_df # Get mean and sd
   
   data.frame(
     raw_score = -75:150, # Extend of possible raw scores
     # Function to convert between raw score and t-score
       t_score = sapply(-75:150, function(raw_score){
        round(((raw_score - predicted_df$predicted_score)/predicted_df$sd)*10 + 50, 0)
      }),
      age = i
   ) %>%
     mutate(t_score = ifelse((t_score < 15) | (t_score > 85), NA, t_score)) # Limit to +/- 3SD from mean
  }) %>% 
    bind_rows() %>% 
    # Creates wide df with column names as t_score_age, with the t-scores as values, give NA to columns that don't have values because t-scores are outside {15,85}
    pivot_wider(names_from = age, names_glue = "{.value}_{age}", values_from = t_score, values_fill = NA) %>% 
    filter(if_any(starts_with("t_score"), ~ !is.na(.x)), # Goes across all columns in each row and keeps those that aren't all NA
           (raw_score >= 0 | list_number == 4)) %>% # Interference (#4) can have negative raw, others can't
    slice_head(n = 10) %>% # Print head for shared version of this doc. Comment out for published
    table_print(
      colnames = c("Raw Score", gsub(".*_", "", colnames(.)[-1])), 
      caption = paste(raw_scores_names_int[list_number], "T-Scores")) %>% 
    header_func(., header_text = c(" ", "Ages"), col_length = c(1, 10)) %>% # 10 is length of age span 5-14 
    {
      if (opts_knit$get("rmarkdown.pandoc.to") == "html"){
        # Border separating each variable
        column_spec(., column = 2, border_left = "1px solid gray")
      } else{
        vline(., j = 1)
      }
    } -> score_table 
    cat(knit_print(score_table))
}) -> suppress_print_msg
Word Raw Score T-Scores
Ages
Raw Score 5 6 7 8 9 10 11 12 13 14
0 36 29 24 19 15 NA NA NA NA NA
1 36 30 25 20 16 NA NA NA NA NA
2 36 30 25 20 16 NA NA NA NA NA
3 37 31 25 21 16 NA NA NA NA NA
4 37 31 26 21 17 NA NA NA NA NA
5 38 32 26 22 17 NA NA NA NA NA
6 38 32 27 22 18 NA NA NA NA NA
7 39 33 27 23 18 15 NA NA NA NA
8 39 33 28 23 19 15 NA NA NA NA
9 40 34 28 24 19 16 NA NA NA NA
Color Raw Score T-Scores
Ages
Raw Score 5 6 7 8 9 10 11 12 13 14
0 25 22 19 17 NA NA NA NA NA NA
1 26 23 20 17 NA NA NA NA NA NA
2 27 24 21 18 15 NA NA NA NA NA
3 27 24 22 19 16 NA NA NA NA NA
4 28 25 22 19 17 NA NA NA NA NA
5 29 26 23 20 17 15 NA NA NA NA
6 29 27 24 21 18 15 NA NA NA NA
7 30 27 24 22 19 16 NA NA NA NA
8 31 28 25 22 20 17 NA NA NA NA
9 32 29 26 23 20 17 15 NA NA NA
Color Word Raw Score T-Scores
Ages
Raw Score 5 6 7 8 9 10 11 12 13 14
0 31 29 26 24 22 19 17 15 NA NA
1 32 30 27 25 23 20 18 16 NA NA
2 33 31 28 26 24 21 19 17 NA NA
3 34 32 29 27 25 22 20 18 15 NA
4 35 32 30 28 25 23 21 19 16 NA
5 36 33 31 29 26 24 22 19 17 15
6 37 34 32 30 27 25 23 20 18 16
7 38 35 33 31 28 26 24 21 19 17
8 38 36 34 32 29 27 25 22 20 18
9 39 37 35 32 30 28 25 23 21 19
Interference Raw Score T-Scores
Ages
Raw Score 5 6 7 8 9 10 11 12 13 14
-53 NA NA NA NA NA NA NA NA 15 15
-52 NA NA NA NA NA NA NA 16 16 16
-51 NA NA NA NA NA NA 16 17 18 18
-50 NA NA NA NA NA 15 17 18 19 19
-49 NA NA NA NA NA 16 18 19 20 20
-48 NA NA NA NA 15 18 20 21 21 21
-47 NA NA NA NA 17 19 21 22 23 23
-46 NA NA NA 15 18 20 22 23 24 24
-45 NA NA NA 16 19 22 23 24 25 25
-44 NA NA NA 17 20 23 25 26 26 26

References

###*** Generates references, sorts alphabetically and prints
# sessionInfo()$otherPkgs - gives a list of all packages in current session
# format function pulls the text message only (not bibtex) from ref
  lapply(c('base', names(sessionInfo()$otherPkgs)), function(pkg) format(citation(pkg), style = 'text')) %>% 
    unlist() %>% 
    sort() %>% 
    data.frame() %>% 
    table_print(., caption = "R References", colnames = "References")
R References
References
Bache S, Wickham H (2020). magrittr: A Forward-Pipe Operator for R. R package version 2.0.1, <URL: https://CRAN.R-project.org/package=magrittr>;.
Bates D, Mächler M, Bolker B, Walker S (2015). “Fitting Linear Mixed-Effects Models Using lme4.” Journal of Statistical Software, 67(1), 1-48. doi: 10.18637/jss.v067.i01 (URL: https://doi.org/10.18637/jss.v067.i01).
Bates D, Maechler M (2021). Matrix: Sparse and Dense Matrix Classes and Methods. R package version 1.3-3, <URL: https://CRAN.R-project.org/package=Matrix>;.
Berkelaar M, others (2020). lpSolve: Interface to ‘Lp_solve’ v. 5.5 to Solve Linear/Integer Programs. R package version 5.6.15, <URL: https://CRAN.R-project.org/package=lpSolve>;.
Eddelbuettel D (2013). Seamless R and C++ Integration with Rcpp. Springer, New York. doi: 10.1007/978-1-4614-6868-4 (URL: https://doi.org/10.1007/978-1-4614-6868-4), ISBN 978-1-4614-6867-7.
Eddelbuettel D, Balamuta JJ (2018). “Extending extitR with extitC++: A Brief Introduction to extitRcpp.” The American Statistician, 72(1), 28-36. doi: 10.1080/00031305.2017.1375990 (URL: https://doi.org/10.1080/00031305.2017.1375990), <URL: https://doi.org/10.1080/00031305.2017.1375990>;.
Eddelbuettel D, François R (2011). “Rcpp: Seamless R and C++ Integration.” Journal of Statistical Software, 40(8), 1-18. doi: 10.18637/jss.v040.i08 (URL: https://doi.org/10.18637/jss.v040.i08), <URL: https://www.jstatsoft.org/v40/i08/>;.
Epskamp S (2019). semPlot: Path Diagrams and Visual Analysis of Various SEM Packages’ Output. R package version 1.1.2, <URL: https://CRAN.R-project.org/package=semPlot>;.
Falissard B (2012). psy: Various procedures used in psychometry. R package version 1.1, <URL: https://CRAN.R-project.org/package=psy>;.
Feng D (2020). agRee: Various Methods for Measuring Agreement. R package version 0.5-3, <URL: https://CRAN.R-project.org/package=agRee>;.
Feng D (2020). miscF: Miscellaneous Functions. R package version 0.1-5, <URL: https://CRAN.R-project.org/package=miscF>;.
Gamer M, Lemon J, <; IFPS (2019). irr: Various Coefficients of Interrater Reliability and Agreement. R package version 0.84.1, <URL: https://CRAN.R-project.org/package=irr>;.
Gohel D (2021). flextable: Functions for Tabular Reporting. R package version 0.6.10, <URL: https://CRAN.R-project.org/package=flextable>;.
Gordon M (2021). Gmisc: Descriptive Statistics, Transition Plots, and More. R package version 2.1.0, <URL: https://CRAN.R-project.org/package=Gmisc>;.
Gordon M, Gragg S, Konings P (2021). htmlTable: Advanced Tables for Markdown/HTML. R package version 2.3.0, <URL: https://CRAN.R-project.org/package=htmlTable>;.
Harrell Jr F (2021). Hmisc: Harrell Miscellaneous. R package version 4.6-0, <URL: https://CRAN.R-project.org/package=Hmisc>;.
Hastie T (2020). gam: Generalized Additive Models. R package version 1.20, <URL: https://CRAN.R-project.org/package=gam>;.
Henry L, Wickham H (2020). purrr: Functional Programming Tools. R package version 0.3.4, <URL: https://CRAN.R-project.org/package=purrr>;.
Jorgensen TD, Pornprasertmanit S, Schoemann AM, Rosseel Y (2021). : Useful tools for structural equation modeling. R package version 0.5-5, <URL: https://CRAN.R-project.org/package=semTools>;.
Kassambara A (2020). ggpubr: ‘ggplot2’ Based Publication Ready Plots. R package version 0.4.0, <URL: https://CRAN.R-project.org/package=ggpubr>;.
Kassambara A (2021). rstatix: Pipe-Friendly Framework for Basic Statistical Tests. R package version 0.7.0, <URL: https://CRAN.R-project.org/package=rstatix>;.
Knowles JE (2020). eeptools: Convenience Functions for Education Data. R package version 1.2.4, <URL: https://CRAN.R-project.org/package=eeptools>;.
Komsta L, Novomestky F (2015). moments: Moments, cumulants, skewness, kurtosis and related tests. R package version 0.14, <URL: https://CRAN.R-project.org/package=moments>;.
Kuznetsova A, Brockhoff PB, Christensen RHB (2017). “lmerTest Package: Tests in Linear Mixed Effects Models.” Journal of Statistical Software, 82(13), 1-26. doi: 10.18637/jss.v082.i13 (URL: https://doi.org/10.18637/jss.v082.i13).
Lehnert B (2015). BlandAltmanLeh: Plots (Slightly Extended) Bland-Altman Plots. R package version 0.3.1, <URL: https://CRAN.R-project.org/package=BlandAltmanLeh>;.
Lüdecke D, Ben-Shachar M, Patil I, Makowski D (2020). “Extracting, Computing and Exploring the Parameters of Statistical Models using R.” Journal of Open Source Software, 5(53), 2445. doi: 10.21105/joss.02445 (URL: https://doi.org/10.21105/joss.02445).
Microsoft, Weston S (2020). foreach: Provides Foreach Looping Construct. R package version 1.5.1, <URL: https://CRAN.R-project.org/package=foreach>;.
Muggeo VM (2003). “Estimating regression models with unknown break-points.” Statistics in Medicine, 22, 3055-3071.
Muggeo VM (2008). “segmented: an R Package to Fit Regression Models with Broken-Line Relationships.” R News, 8(1), 20-25. <URL: https://cran.r-project.org/doc/Rnews/>;.
Muggeo VM (2016). “Testing with a nuisance parameter present only under the alternative: a score-based approach with application to segmented modelling.” J of Statistical Computation and Simulation, 86, 3059-3067.
Muggeo VM (2017). “Interval estimation for the breakpoint in segmented regression: a smoothed score-based approach.” Australian & New Zealand Journal of Statistics, 59, 311-322.
Müller K, Wickham H (2021). tibble: Simple Data Frames. R package version 3.1.5, <URL: https://CRAN.R-project.org/package=tibble>;.
Peterson BG, Carl P (2020). PerformanceAnalytics: Econometric Tools for Performance and Risk Analysis. R package version 2.0.4, <URL: https://CRAN.R-project.org/package=PerformanceAnalytics>;.
Plummer M (2021). rjags: Bayesian Graphical Models using MCMC. R package version 4-12, <URL: https://CRAN.R-project.org/package=rjags>;.
Plummer M, Best N, Cowles K, Vines K (2006). “CODA: Convergence Diagnosis and Output Analysis for MCMC.” R News, 6(1), 7-11. <URL: https://journal.r-project.org/archive/>;.
R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. <URL: https://www.R-project.org/>;.
Raiche G, Magis D (2020). nFactors: Parallel Analysis and Other Non Graphical Solutions to the Cattell Scree Test. R package version 2.4.1, <URL: https://CRAN.R-project.org/package=nFactors>;.
Revelle W (2021). psych: Procedures for Psychological, Psychometric, and Personality Research. Northwestern University, Evanston, Illinois. R package version 2.1.9, <URL: https://CRAN.R-project.org/package=psych>;.
Rosseel Y (2012). “lavaan: An R Package for Structural Equation Modeling.” Journal of Statistical Software, 48(2), 1-36. <URL: https://www.jstatsoft.org/v48/i02/>;.
Ryan JA, Ulrich JM (2020). xts: eXtensible Time Series. R package version 0.12.1, <URL: https://CRAN.R-project.org/package=xts>;.
Sarkar D (2008). Lattice: Multivariate Data Visualization with R. Springer, New York. ISBN 978-0-387-75968-5, <URL: http://lmdvr.r-forge.r-project.org>;.
Su Y, Yajima, M (2021). R2jags: Using R to Run ‘JAGS’. R package version 0.7-1, <URL: https://CRAN.R-project.org/package=R2jags>;.
Terry M. Therneau, Patricia M. Grambsch (2000). Modeling Survival Data: Extending the Cox Model. Springer, New York. ISBN 0-387-98784-3.
Therneau T (2021). A Package for Survival Analysis in R. R package version 3.2-11, <URL: https://CRAN.R-project.org/package=survival>;.
Venables WN, Ripley BD (2002). Modern Applied Statistics with S, Fourth edition. Springer, New York. ISBN 0-387-95457-0, <URL: https://www.stats.ox.ac.uk/pub/MASS4/>;.
Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, <URL: https://ggplot2.tidyverse.org>;.
Wickham H (2019). stringr: Simple, Consistent Wrappers for Common String Operations. R package version 1.4.0, <URL: https://CRAN.R-project.org/package=stringr>;.
Wickham H (2020). modelr: Modelling Functions that Work with the Pipe. R package version 0.1.8, <URL: https://CRAN.R-project.org/package=modelr>;.
Wickham H (2021). forcats: Tools for Working with Categorical Variables (Factors). R package version 0.5.1, <URL: https://CRAN.R-project.org/package=forcats>;.
Wickham H (2021). tidyr: Tidy Messy Data. R package version 1.1.4, <URL: https://CRAN.R-project.org/package=tidyr>;.
Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). “Welcome to the tidyverse.” Journal of Open Source Software, 4(43), 1686. doi: 10.21105/joss.01686 (URL: https://doi.org/10.21105/joss.01686).
Wickham H, Bryan J (2019). readxl: Read Excel Files. R package version 1.3.1, <URL: https://CRAN.R-project.org/package=readxl>;.
Wickham H, François R, Henry L, Müller K (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.7, <URL: https://CRAN.R-project.org/package=dplyr>;.
Wickham H, Hester J (2021). readr: Read Rectangular Text Data. R package version 2.0.2, <URL: https://CRAN.R-project.org/package=readr>;.
Xie Y (2014). “knitr: A Comprehensive Tool for Reproducible Research in R.” In Stodden V, Leisch F, Peng RD (eds.), Implementing Reproducible Computational Research. Chapman and Hall/CRC. ISBN 978-1466561595, <URL: http://www.crcpress.com/product/isbn/9781466561595>;.
Xie Y (2015). Dynamic Documents with R and knitr, 2nd edition. Chapman and Hall/CRC, Boca Raton, Florida. ISBN 978-1498716963, <URL: https://yihui.org/knitr/>;.
Xie Y (2021). knitr: A General-Purpose Package for Dynamic Report Generation in R. R package version 1.36, <URL: https://yihui.org/knitr/>;.
Zeileis A, Croissant Y (2010). “Extended Model Formulas in R: Multiple Parts and Multiple Responses.” Journal of Statistical Software, 34(1), 1-13. doi: 10.18637/jss.v034.i01 (URL: https://doi.org/10.18637/jss.v034.i01).
Zeileis A, Grothendieck G (2005). “zoo: S3 Infrastructure for Regular and Irregular Time Series.” Journal of Statistical Software, 14(6), 1-27. doi: 10.18637/jss.v014.i06 (URL: https://doi.org/10.18637/jss.v014.i06).
Zeileis A, Hothorn T (2002). “Diagnostic Checking in Regression Relationships.” R News, 2(3), 7-10. <URL: https://CRAN.R-project.org/doc/Rnews/>;.
Zhu H (2021). kableExtra: Construct Complex Table with ‘kable’ and Pipe Syntax. R package version 1.3.4, <URL: https://CRAN.R-project.org/package=kableExtra>;.