# Read data
stroop_adult <- readxl::read_xls("Stroop Adult Data.xls")
retest <- readxl::read_xls("Stroop Adult Data RETEST.xls")
wj <- readxl::read_xls("WJ3 Adult Data.xls")
ctmt <- readxl::read_xls("CTMT Adult Data.xls")
nscst <- readxl::read_xls("NSCST Data.xls")
cc_ratio <- read.csv("cc_ratio_scale.csv")
ci_ratio <- read.csv("ci_ratio_scale.csv")
stroop_effect <- read.csv("stroop_effect_scale.csv")

# Manipulate data
## Age as decimal from birth to exam date
birth_date <- as.Date(paste(stroop_adult$Byear, stroop_adult$Bmonth, stroop_adult$Bday, sep = "/"), origin = 1970-01-01)
test_date <- as.Date(paste(stroop_adult$EVyear, stroop_adult$EVmonth, stroop_adult$EVday, sep = "/"), origin = 1970-01-01)
years<- floor(age_calc(birth_date, enddate = test_date, units = "years"))
months<- floor(age_calc(birth_date, enddate = test_date, units = "months")%%12)
stroop_adult$decimal_age <- years + months/12
## Recode main data
adult <- stroop_adult %>%
  #add_labels(Gender, labels = c("Male" = "1", "Female" = "2"))
  mutate(
    Gender = dplyr::recode(as.factor(Gender), '1' = "Male", '2' = "Female", .default = "Another Value"),
    Ethnic = dplyr::recode(as.factor(Ethnic), '1' = "Caucasian", '2' = "African-American", '3' = "Asian", '4' = "Hispanic", '5' = "Native-American", '6' = "Other", .default = "Another Value"),
    SelEdGrp = dplyr::recode(as.factor(SelEdGrp), '1' = "No HS Diploma", '2' = "HS Diploma", '3' = "Some College", '4' = "BA", '5' = "Post-grad", .default = "Another Value"),
    Region = dplyr::recode(as.factor(Region), '1' = "Northeast", '2' = "Midwest", '3' = "South", '4' = "West", .default = "Another Value"),
    Commun = dplyr::recode(as.factor(Commun), '1' = "Rural", '2' = "Urban", .default = "Another Value"),
    PrimeDX = dplyr::recode(as.factor(PrimeDX), '0' = "No Dx", '2' = "LD", '4' = "Autism", '6' = "ID", '10' = "Dementia", '11' = "ADHD",
                            '12' = "ESL", '14' = "TBI", .default = "Another Value"),
    Drugs = dplyr::recode(as.factor(`Drugs?`), '1' = "No", '2' = "Yes", .default = "Another Value"),
    ColorBl = dplyr::recode(as.factor(ColorBl), '1' = "No", .default = "Another Value"),
    age = as.numeric(str_trunc(decimal_age, 1, ellipsis = "")),
         age = recode(age, '1' = "15-19", '2' = "20-29", '3' = "30-39", '4' = "40-49", '5' = "50-59", '6' = "60-69", '7' = "70-79", '8' = "80-89"),
    case_id = paste(ID, `Case #`, sep="_")
  )

duplicate_case <- subset(adult, ID=="LA145" & `Case #` == "1570")

adult <- anti_join(adult, duplicate_case, by = "case_id")

# Recode convergent datasets- get id to match across sets and join with full set, with names "full-set_other-data"
retest <- retest %>%
  mutate(case_id = paste(gsub("R","", ID), `Case #`, sep="_"))
adult_retest <- inner_join(adult, retest, by = c("case_id"))

wj <- wj %>%
  mutate(case_id = paste(ID, `Case #`, sep="_")) %>%
  rename(LW = `WJ3-LW`, WA = `WJ3-WA`)
adult_wj <- inner_join(adult, wj, by = c("case_id"))

ctmt <- ctmt %>%
  mutate(case_id = paste(ID, `Case #`, sep="_")) %>%
  rename(total = `T-Sc Sum`)
adult_ctmt <- inner_join(adult, ctmt, by = c("case_id"))

nscst <- nscst %>%
  mutate(case_id = paste(ID, `Case #`, sep="_"),
         Stroop = round(CIRatio - CCRatio,2)) 

adult_nscst <- inner_join(adult, nscst, by = c("case_id"))
# Prepare Nonverbal Stroop data for looking up
nscst_df <- list(stroop_effect)

  scale_score_func <- function(dataset, age, score) {
                            dataset %>%
 pivot_longer(., everything()[-1]) %>%
  mutate(endpoint = stringr::str_extract(name, "[A-z]$"),
         upper_age = as.numeric(gsub("[A-z]", "", name))
        ) %>%
  dplyr::select(., -"name") %>%
    pivot_wider(names_from = endpoint, values_from = value) %>%
    mutate(lower_age = as.numeric(rep(c(3,unique(upper_age)[-length(unique(upper_age))]), length(unique(.[[1]]))))) %>%
    filter(lower_age <= age & age < upper_age )%>%
    summarize(score = ifelse(score < L[1], 20, ifelse(score > H[nrow(.)], 90,
                                                    .[[1]][L <= score & score <= H ])))
  }
  
  adult_nscst <- adult_nscst %>%
  mutate(
    cc_ratio_t_rev = unlist(mapply(function(x,y) scale_score_func(cc_ratio, x, y), adult_nscst$decimal_age, adult_nscst$CCRatio)),
    ci_ratio_t_rev = unlist(mapply(function(x,y) scale_score_func(ci_ratio, x, y), adult_nscst$decimal_age, adult_nscst$CIRatio)),
    nscst_stroop_t_rev = unlist(mapply(function(x,y) scale_score_func(stroop_effect, x, y), adult_nscst$decimal_age, adult_nscst$Stroop)),
    cc_ratio_t = 50 - (cc_ratio_t_rev - 50),
    ci_ratio_t = 50 - (ci_ratio_t_rev - 50),
    nscst_stroop_t = 50 - (nscst_stroop_t_rev - 50)
  )
### Join al df's
  adult_all <- adult %>% inner_join(., adult_nscst, by = "case_id") %>% inner_join(., wj, by = "case_id") %>% inner_join(., ctmt, by = "case_id")
#### Enter following information based on scores to analyze

# Enter score names for descriptive stats
raw_score_quos <- quos(CRScore, WRScore, CWRscore)
raw_score_column_names <- c("CRScore", "WRScore", "CWRscore")
raw_score_quos_int <- quos(CRScore, WRScore, CWRscore, Interference)
raw_score_column_names_int <- c("CRScore", "WRScore", "CWRscore", "Interference")
# Names of Scores
scale_list <- list("Color Raw Score", "Word Raw Score", "Color-Word Raw Score")
score_names<- c("Color Raw Score", "Word Raw Score", "Color-Word Raw Score")
scale_score <- c(quo(CRScore), quo(WRScore), quo(CWRscore))
scale_list_int <- list("Color Raw Score", "Word Raw Score", "Color-Word Raw Score", "Interference")
score_names_int <- c("Color Raw Score", "Word Raw Score", "Color-Word Raw Score", "Interference")
scale_score_int <- c(quo(CRScore), quo(WRScore), quo(CWRscore), quo(Interference))
# Demographic vars
group_column <- c(quo(age), quo(Gender), quo(Ethnic), quo(SelEdGrp), quo(Region), quo(Commun), quo(PrimeDX))
group_column_title <- list("Age", "Gender", "Ethnicity", "Education", "Region", "Community", "Diagnosis")
# Names of all scores in all different tests
all_names <- c("Color", "Word", "Color-Word", "Interference", "Letter-Word", "Word Attack", "CC Ratio", "CI Ratio", "Nonverbal Stroop", "Trails Total")
# master dataset name
adult <- adult

Introduction

Data were collected to analyze the performance of the Stroop Color and Word Test (“Stroop”) with a sample reflective of the current population in the United States.

Using a national standardization plan, the Stroop was administered and data were collected by trained field researchers over a period of time between 2013 to 2014 with 373 participants. The field researchers represent professionals targeted to administer the Stroop (see Qualifications section of this Manual). The field researchers were trained Give more information about data collection.

In this data analysis, Stroop scores were examined, models to predict Stroop scores were constructed, scaled score tables were constructed from these models, and reliability and validity were examined.

Data Cleaning

Data were examined for integrity of entry and for aberrations in values. The details of this process are listed in Appendix Appendix # - place below to ‘Standardization’ in Appendix.

Data were cleaned using the following procedures:

  1. Cases were examined for duplicate entry using combinations of fields unlikely to result in random duplication.
  2. Raw score outliers (see below for procedure to identify outliers) were examined for error.
  3. Differences between test and retest values (see Test-Retest section) were examined for extreme values.

In all above procedures, if and when any cases were identified programmatically, those cases were consulted by visual inspection and consultation of raw data to determine if there was an incorrect value. If data were determined not to be incorrect they remained in the dataset.

Stroop Extreme Values

Scores were examined to identify outliers. Scores that were +/- 3 Standard Deviations (SD) from the mean were identified in the following table.

bind_rows(
    bind_rows(lapply(scale_score_int, function(score){
    adult %>%
      filter(!!score < (mean(!!score) - 3*(sd(!!score)))) %>%
      dplyr::select(!!!scale_score_int, !!!group_column) %>%
      mutate( #ID = str_extract(ID, "[A-z]{1,2}"),
             extreme_value = paste(quo_name(score), "Low"))
  })),
  bind_rows(lapply(scale_score_int, function(score){
    adult %>%
      filter(!!score > (mean(!!score) + 3*(sd(!!score)))) %>%
      dplyr::select(!!!scale_score_int, !!!group_column) %>%
      mutate(# ID = str_extract(ID, "[A-z]{1,2}"),
             extreme_value = paste(quo_name(score), "High"))
  })) 
) %>% 
  arrange(extreme_value) -> dat_df
ft <- table_output_func(dat_df, colnames = c(score_names_int, unlist(group_column_title), "Extreme Score"), caption =  table_counter_func("Extreme Score"))
Warning in readLines(knitr::current_input()): incomplete final line found on
'stroop_adult_analysis.Rmd'
Warning: namespace 'highr' is not available and has been replaced
by .GlobalEnv when processing object '<unknown>'
ft
Table 1: Extreme Score
Color Raw Score Word Raw Score Color-Word Raw Score Interference Age Gender Ethnicity Education Region Community Diagnosis Extreme Score
120 97 70 16 40-49 Male Caucasian BA South Rural No Dx CRScore High
31 60 12 -8 20-29 Female Caucasian HS Diploma Northeast Urban No Dx CRScore Low
102 134 76 18 30-39 Female Asian BA Northeast Urban No Dx CWRscore High
70 101 94 53 40-49 Male Other Post-grad South Urban No Dx CWRscore High
67 94 43 54 60-69 Female Other BA South Urban No Dx Interference High
70 101 94 53 40-49 Male Other Post-grad South Urban No Dx Interference High
90 125 20 -32 40-49 Female Caucasian HS Diploma Northeast Urban No Dx Interference Low
57 50 23 -4 30-39 Female African-American Some College South Urban No Dx WRScore Low
51 48 29 4 40-49 Male Hispanic No HS Diploma South Rural No Dx WRScore Low
57 52 37 10 40-49 Male Hispanic No HS Diploma South Rural No Dx WRScore Low
# %>%
  # kable(col.names = c(score_names_int, unlist(group_column_title), table_counter_func("Extreme Score"))) %>%
  # kable_options()

As can be seen in the above table, there were a relatively balanced number of extreme values above and below three standard deviations from the mean. The small number of cases did not permit statistical analysis. Upon visual inspection there did not appear to be any pattern with respect to demographic characteristics or by field researcher. As extreme values appear in naturally occurring data, and because these values did not appear to follow a pattern, suggestive of bias, the values were left in the dataset. Differences among groups represented by the above characteristics will be explored further.

# Following was performed to identify duplicate cases- 1 identified and removed
# bind_rows(subset(adult, duplicated(ID)),
# subset(adult, duplicated(ID, fromLast = T)))
# 
# bind_rows(subset(adult, duplicated(`Case #`)),
# subset(adult, duplicated(`Case #`, fromLast = T))) %>%
#   arrange(`Case #`)

# Check for abberrant values from test to retest- use below
ad_wide <- inner_join(adult, retest, by = "case_id",suffix = c(".test", ".retest"),)
 vars <- paste0("^(", paste(raw_score_column_names_int, collapse="|"), "|case_id)")
check_score_differences_df <- rbind.data.frame(
  ad_wide[abs(ad_wide$CRScore.test - ad_wide$CRScore.retest)> 30,],
  ad_wide[abs(ad_wide$WRScore.test - ad_wide$WRScore.retest)> 30,],
  ad_wide[abs(ad_wide$CWRscore.test - ad_wide$CWRscore.retest)> 30,],
  ad_wide[abs(ad_wide$Interference.test - ad_wide$Interference.retest)> 30,]) %>%
  dplyr::select(., matches(vars))

1 case was identified as a duplicate entry and removed. One incorrect value was entered for a raw score. The original raw data were consulted to correct the error.

End Appendix section

Standardization

Sample

The standardization plan was designed to approximately replicate the U.S. population along demographic characteristics typically accounted for in standardized test development. The following demographic criteria were considered, with variable name to be referred to in this analysis and corresponding levels below:

demo_levels <- function(demo_vars){
  demo_vars = enquo(demo_vars)
  adult %>% dplyr::select(!!demo_vars) %>% sapply(., levels)
}

lev <- lapply(unlist(lapply(group_column[2:7], quo_name)),demo_levels) 
lev <- sapply(lev, function(x) paste(x, collapse = ", "))

data.frame(
  demo = c("Age", "Sex/Gender", "Race/Ethnicity", "Highest Completed Education Group", "Region of the U.S. Currently Residing", "Community Size", "Primary Mental Health Diagnosis"),
  demo_name = c("Age", unlist(group_column_title)[2:7]),
  levels = c("Continuous", lev)
  ) -> dat_df
  ft <- table_output_func(dat_df, colnames = c("Description", "Variable", "Levels"), caption = table_counter_func("Demographic Criteria"))
Warning in readLines(knitr::current_input()): incomplete final line found on
'stroop_adult_analysis.Rmd'
  ft
Table 2: Demographic Criteria
Description Variable Levels
Age Age Continuous
Sex/Gender Gender Male, Female
Race/Ethnicity Ethnicity Caucasian, African-American, Asian, Hispanic, Native-American, Other
Highest Completed Education Group Education No HS Diploma, HS Diploma, Some College, BA, Post-grad, Another Value
Region of the U.S. Currently Residing Region Northeast, Midwest, South, West
Community Size Community Rural, Urban
Primary Mental Health Diagnosis Diagnosis No Dx, LD, Autism, ID, Dementia, ADHD, ESL, TBI, Another Value
  # kable(col.names = c("Description", "Variable", "Levels"), caption = table_counter_func("Demographic Criteria")) %>%
  # kable_options()

There was some variation in the proportion of levels within each variable between the sample and population, as described further in the Demographics section. Each of these variables is considered and controlled for or incorporated into the analysis of Stroop performance and in the models used to construct scaled scores, as described below.

Measures

Stroop Color and Word Test (Stroop)

The Stroop Color and Word Test- Golden & Freshwater version ref (Stroop) is a brief measure of executive functions and described in more detail in this manual. The Stroop consists of three tasks that generate three raw scores. They are:

  1. Color Congruent (“Color”)
  2. Word Congruent (“Word”)
  3. Color-Word Incongruent (“Color-Word”)

These three scores will be referred to collectively as “Direct Measure” scores.

A calculation using these scores (described in the Scoring section of this Manual) represents a fourth score:

  1. Interference (“Interference”)

This score will be referred to as a “Calculated” score to contrast with the Direct Measure scores.

Further interpretation and description of these scales is provided in the Interpretation section of this Manual.

Performance on these tasks is contextualized via scaled scores. Each score purports to represent a separate, though related, construct, so each score also represents a scale. The values are generally referred to as scores earlier in this chapter when providing descriptive statistics and constructing models for scaled scores. The content and relationship of these values as scales is discussed further below in this chapter.

Woodcock Johnson- Third Edition (WJ)

The Woodcock Johnson- Third Edition ref is an academic and cognitive battery. The following subtests were used:

  1. Letter-Word Identification (LW)- This subtest assesses ability to recognize words and letters in isolation
  2. Word Attak (WA)- This subtest requires phonetic decoding

Taken together, these tasks are considered useful measures of an individual’s ability to interpret and communicate visual language and are considered important skill components of reading ref- Mather & Gregg. Raw scores from both subtests were used.

Nonverbal Stroop Card Sorting Test (NSCST)

The Nonverbal Stroop Card Sorting Test (NSCST) ref is a short assessment of executive functions designed to elicit a Stroop effect, but administered and performed nonverbally. There are three scores obtained from the NSCST:

  1. Color-Congruent Ratio (“CC Ratio”)- this is a ratio of the time alloted to the number of items correct in a condition with congruent stimuli. It is similar to the Color and Word tasks on the Stroop Test.
  2. Color-Incongruent Ratio (“CI Ratio”)- this is a ratio of the time alloted to the number of items correct in a condition with incongruent stimuli. It is similar to the Color-Word task on the Stroop Test.

The above measures are calculated directly from participant performance. The following metric uses those scores in a calculation:

  1. Stroop Effect (“Nonverbal Stroop”)- This is the CC Ratio subtracted from the CI Ratio, so it is a comparison of a person’s ability to manage incongruent stimuli compared to management of congruent stimuli.

Scores of the above were converted to T-scores, using NSCST procedures. T-scores were reversed so that a higher score was indicative of more capable performance (e.g., a score above 50 indicated a person was better than average at processing stimuli effectively in a particular condition).

Comprehensive Trail-Making Test (CTMT)

Comprehensive Trail-Making Test (CTMT) ref involves connecting multiple stimuli with various distractions under timed conditions. It is considered a useful assessment of attention, resistance to distractions, and processing speed ref.

  1. The T-scores of individual tasks of the CTMT were summed to yield a Total T-Score (“Trails Total”).

Score Descriptions

      histogram_panel_function <- function(dataset, scores, recode_options, reorder_order){
        scores = enquo(scores)
        # recode_options = enexpr(recode_options)
        dataset %>%
        dplyr::select(!!scores) %>%
        gather("Scale", "Raw Score", convert = T) %>%
        mutate(Scale = dplyr::recode(Scale, !!!recode_options)) %>%
        arrange(match(Scale, reorder_order)) %>%
        mutate(Scales = factor(Scale, levels = reorder_order)) %>%
        ggplot(aes(`Raw Score`)) + #ggplot(., aes(x = Scales))+
        geom_bar(stat = "bin") +
        theme_bw() +
        facet_wrap(~Scales,scales = "free_x") +
        ggtitle(figure_counter_func("Raw Score Distributions")) + 
        ylab("Number of Participants")
      }

test_score_names <- list(raw_score_column_names_int, c("LW", "WA"), c("cc_ratio_t", "ci_ratio_t", "nscst_stroop_t"), "total")
recode_option <- list(list('CRScore' = "Color Raw Score", 'WRScore' = "Word Raw Score", 'CWRscore' = "Color-Word Raw Score", 'Interference' = "Interference"), list('LW' = "LW Scale", 'WA' = "WA Scale"), list('cc_ratio_t' = "CC Ratio", 'ci_ratio_t' = "CI Ratio", 'nscst_stroop_t' = "Nonverbal Stroop"), list('total' = "Total"))
test_scale_list <- list(scale_list_int, list("LW Scale", "WA Scale"), list("CC Ratio", "CI Ratio", "Nonverbal Stroop"), list("Total"))
data_set <- list(adult, wj, adult_nscst, ctmt)

# lapply(1:length(data_set), function(i) {
#   histogram_panel_function (data_set[[i]], test_score_names[[i]], recode_option[[i]], test_scale_list[[i]])
# })

Overall

The following are an overview of the scores for each measure and the characteristics of the sample overall for those scores:

adult_sum_df <- adult %>%
  summarize(n = n(),
         c_mean = paste0(round(mean(CRScore),2), '(', round(sd(CRScore),2), ')'),
         w_mean = paste0(round(mean(WRScore),2), '(', round(sd(WRScore),2), ')'),
         cw_mean = paste0(round(mean(CWRscore),2), '(', round(sd(CWRscore),2), ')'),
         int_mean = paste0(round(mean(Interference),2), '(', round(sd(Interference),2), ')'),
         c_SEM = round(psych::describe(CRScore)$se, 2),
         w_SEM = round(psych::describe(WRScore)$se, 2),
         cw_SEM = round(psych::describe(CWRscore)$se, 2),
         int_SEM = round(psych::describe(Interference)$se, 2),
         c_range = paste0(min(CRScore), "-", max(CRScore)),
         w_range = paste0(min(WRScore), "-", max(WRScore)),
         cw_range = paste0(min(CWRscore), "-", max(CWRscore)),
         int_range = paste0(min(Interference), "-", max(Interference))) %>%
  dplyr::select(n, c_mean, c_SEM, c_range, w_mean, w_SEM, w_range,  cw_mean, cw_SEM, cw_range, int_mean, int_SEM, int_range) 

wj_sum_df <- wj %>%
    summarize(n = n(),
         lw_mean = paste0(round(mean(LW),2), '(', round(sd(LW),2), ')'),
         wa_mean = paste0(round(mean(WA),2), '(', round(sd(WA),2), ')'),
         lw_SEM = round(psych::describe(LW)$se, 2),
         wa_SEM = round(psych::describe(WA)$se, 2),
         lw_range = paste0(min(LW), "-", max(LW)),
         wa_range = paste0(min(WA), "-", max(WA))) %>%
  dplyr::select(n, lw_mean, lw_SEM, lw_range, wa_mean, wa_SEM, wa_range)

nscst_sum_df <- adult_nscst %>%
    summarize(n = n(), 
         cc_ratio_mean = paste0(round(mean(cc_ratio_t),2), '(', round(sd(cc_ratio_t),2), ')'),
         ci_ratio_mean = paste0(round(mean(ci_ratio_t),2), '(', round(sd(ci_ratio_t),2), ')'),
         nscst_stroop_mean = paste0(round(mean(nscst_stroop_t),2), '(', round(sd(nscst_stroop_t),2), ')'),
         cc_ratio_SEM = round(psych::describe(cc_ratio_t)$se, 2),
         ci_ratio_SEM = round(psych::describe(ci_ratio_t)$se, 2),
         nscst_stroop_SEM = round(psych::describe(nscst_stroop_t)$se, 2),
         cc_ratio_range = paste0(min(cc_ratio_t), "-", max(cc_ratio_t)),
         ci_ratio_range = paste0(min(ci_ratio_t), "-", max(ci_ratio_t)),
         nscst_stroop_range = paste0(min(nscst_stroop_t), "-", max(nscst_stroop_t))) %>%
  dplyr::select(n, cc_ratio_mean, cc_ratio_SEM, cc_ratio_range, ci_ratio_mean, ci_ratio_SEM, ci_ratio_range, nscst_stroop_mean, nscst_stroop_SEM, nscst_stroop_range) 

ctmt_sum_df <- ctmt %>%
    summarize(n = n(),
         tot_mean = paste0(round(mean(total),2), '(', round(sd(total),2), ')'),
         tot_SEM = round(psych::describe(total)$se, 2),
         tot_range = paste0(min(total), "-", max(total))) %>%
  dplyr::select(n, tot_mean, tot_SEM, tot_range) 

data_sum_set <- list(adult_sum_df, wj_sum_df, nscst_sum_df, ctmt_sum_df)
data_sum_kable <- function(data, names){
          kable(data, col.names = c("*N*", rep(c(paste("Mean", "(SD)"), "SEM", "Min-Max"), length(names))),  caption = table_counter_func("Overall Raw Score Descriptives"), booktabs = T)  %>%
          kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position="left") %>%
          add_header_above(header = data.frame(names = c("",names), colspan = c(1, rep(3,length(names))))) %>%
          column_spec(seq(2, 3*length(names), 3), border_left = "1px solid gray")
}

data_set_names <- list("Stroop", "WJ-3", "NSCST", "CTMT")
lapply(1:length(data_set), function(i) {
  cat(paste("<h4>", data_set_names[[i]], "</h4>"))
  cat("<div class = 'row h-100'>")
    cat("<div class = 'col-md-6'>")
      invisible(print(histogram_panel_function (data_set[[i]], test_score_names[[i]], recode_option[[i]], test_scale_list[[i]])))
    cat("</div>")
    cat("<div class = 'col-md-6'>")
      invisible(print(data_sum_kable(data_sum_set[[i]], test_score_names[[i]])))
    cat("</div>")
  cat("</div>")
})

Stroop

Table 3: Overall Raw Score Descriptives
CRScore
WRScore
CWRscore
Interference
N Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max
373 73.31(13.62) 0.71 31-120 102.3(16.31) 0.84 48-147 42.38(11.02) 0.57 12-94 0.12(9.38) 0.49 -32-54

WJ-3

Table 4: Overall Raw Score Descriptives
LW
WA
N Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max
322 68.81(5.23) 0.29 52-78 26.3(3.94) 0.22 6-32

NSCST

Table 5: Overall Raw Score Descriptives
cc_ratio_t
ci_ratio_t
nscst_stroop_t
N Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max Mean (SD) SEM Min-Max
189 48.41(12.14) 0.88 10-60 51.45(7.15) 0.52 10-60 52.85(10.56) 0.77 10-80

CTMT

Table 6: Overall Raw Score Descriptives
total
N Mean (SD) SEM Min-Max
169 235.92(44.06) 3.39 109-336

[[1]] NULL

[[2]] NULL

[[3]] NULL

[[4]] NULL

Normality

The data appeared to assume normal distributions, based on visual inspection of histograms. D’Agostino Pearson Test of Normality statistics, assessing skewness, were calculated to evaluate normality. This procedure was deemed appropriate for the sample sizes:

do.call(rbind, lapply(1:length(data_set), function(i){
  do.call(rbind, lapply(1:length(test_score_names[[i]]), function(j){
    ag_res <- agostino.test(as.data.frame(data_set[[i]] %>% dplyr::select(!!as.name(test_score_names[[i]][j])))[,1])
    #shapiro_test(!!as.name(test_score_names[[i]][j]))
    c(ag_res$statistic, p = ag_res$p.value)
  }))
})
) %>% cbind.data.frame(unlist(test_scale_list), .) %>%
 mutate_if(is.numeric, round, digits = 3) %>%
  kable(col.names = c("Scale", "Skewness Score", "Z", "P"), caption = table_counter_func("D'Agostino Pearson Test of Normality of Scores")) %>%
  kable_options()
Table 7: D’Agostino Pearson Test of Normality of Scores
Scale Skewness Score Z P
Color Raw Score 0.089 0.712 0.477
Word Raw Score -0.087 -0.701 0.483
Color-Word Raw Score 0.335 2.627 0.009
Interference 0.737 5.346 0.000
LW Scale -0.788 -5.267 0.000
WA Scale -2.051 -10.247 0.000
CC Ratio -2.043 -8.021 0.000
CI Ratio -2.571 -9.103 0.000
Nonverbal Stroop -0.529 -2.914 0.004
Total -0.153 -0.841 0.400

The above table shows that some variables may have issues with normality, even with corrections for multiple comparisons. Assumptions of normality will be examined when constructing models and performing further analyses.

Demographics

Demographic Tables and Plots

The following tables and plots are of Stroop scores by demographic variable groups. Numbers of participants within each group, the mean, SD, and SEM were also identified.

For each demographic variable, a Multiple Analysis of Variance (MANOVA) was conducted to identify differences among groups within any Stroop score. ANOVA was also conducted for each variable. The results of each are shown in column headers. Though ANOVA results are shown for each variable, it is only proper to consider ANOVA outcomes for a specific score when the MANOVA, which accounts for multiple comparisons, is significant.

The results presented in these tables should be interpreted with caution as the sample was not stratified within each variable, so differences among groups within a score may be more attributable to differences in the group due to a more salient variable. Furthermore, these demographic factors are explored further, accounted for, and controlled for in models constructed and in analyses below.

# MANOVA test function for raw scores

manova_func <- function(dataset, group_column){
  group_column <- enquo(group_column)
  response_matrix <- as.matrix(dataset %>% dplyr::select(!!!raw_score_quos_int))
  predictor_variable <- as.matrix(dplyr::select(adult, !!group_column))
  res.man <- manova(response_matrix ~ predictor_variable)
  res.man
}

manova_overall_p_value_func <- function(dataset, group_column){
  group_column <- enquo(group_column)
  res.man <- manova_func(dataset, !!group_column)
  p_val <- summary(res.man)$stats[1,6]
  p_val
}

manova_scale_p_value_func <- function(dataset, group_column){
  group_column <- enquo(group_column)
  res.man <- manova_func(dataset, !!group_column)
  lapply(summary.aov(res.man), function(x) x$`Pr(>F)`[1])
}
# *** Alternative approach to construct MANOVA here.  Not used
# scales <- c("CRScore", "WRScore", "CWRscore")
# responses <- paste(scales, collapse = ",")
# myformula <- as.formula( paste0( "cbind(", responses , ")~ unit" ) )
# manova( myformula, data = plots )

 manova_p_value_results_func <- function(dataset, group_column){
   group_column <- enquo(group_column)
    p_val <- round(manova_overall_p_value_func(dataset, !!group_column),4)
        if(p_val < .05 & p_val >= .01){
      # Can choose to print full table or summary
       # summary.aov(res.man)
      paste("*, p =", p_val)
       # data.frame(unclass(summary(res.man)), check.names = FALSE, stringsAsFactors = FALSE)
    } else if (p_val < .01 & p_val >= .0001) {
        paste("**, p =", p_val)
    }  else if (p_val < .0001) {
        paste("***, p < 0.001")
    }  else {
      paste("NS, p =", p_val)
    }
  }

 manova_scales_p_value_results_func <- function(dataset, group_column){
   group_column <- enquo(group_column)
   lapply(manova_scale_p_value_func(dataset, !!group_column), function(p_val){
     if(p_val < .05 & p_val >= .01){
       # Can shoose to print full table or summary
       # summary.aov(res.man)
       paste("*, p =", round(p_val,4))
       # data.frame(unclass(summary(res.man)), check.names = FALSE, stringsAsFactors = FALSE)
     } else if (p_val < .01 & p_val >= .0001) {
       paste("**, p =", round(p_val,4))
     }  else if (p_val < .0001) {
       paste("***, p < 0.001")
     }  else {
       paste("NS, p =", round(p_val, 4))
     }
   }
   )
 }
 
 # Function to generate summary tables- can choose data, grouping, summary variables, and whether to rename
generate_summary_tbl <- function(dataset, group_column, summary_column, useColname = T) {
  group_column   <- enquo(group_column)
  # Uncomment Below if not passing list of quosures for summary variables
  # summary_column <- enquo(summary_column)
  overall_p_value_result <- manova_p_value_results_func(dataset, !!group_column)
  dataset %>% 
    group_by(!!group_column) %>% 
    summarise(
      N = n(),
      mean = round(mean(!!summary_column),2),
      SD  = round(sd(!!summary_column),2),
      SEM = round(psych::describe(!!summary_column)$se, 2)
      # Other metrics that need to be generated frequently
    ) %>% 
    # Get proportion and consolidate some columns
    mutate(grouping_column = !!group_column,
           prop = paste0(round(N/sum(N),4)*100, "%"),
           n_prop = paste0(N," (", prop, ")"),
           mean_sd = paste0(mean, " (", SD, ")"),
    ) %>%
    dplyr::select(grouping_column, n_prop, mean_sd, SEM) %>%
    ungroup -> smryDta
  
  if (useColname) 
    smryDta <- smryDta %>%  
    rename_at(
      vars(-one_of(quo_name(summary_column))), 
      ~paste(quo_name(summary_column), .x, sep="_")
    ) 
  smryDta <- smryDta %>%
    rename_at(1, ~paste(quo_name(group_column), overall_p_value_result))
  
  
  return(smryDta)
}

# Linear means test function for raw scores
  # lm_demo_test <- function(scales, demo_var){
  #   mod_diff <- lm(!!scales ~ !!demo_var, data = adult)
  #   mod_same <- lm(!!scales ~ 1, data = adult)
  #   p_val <- round(anova(mod_diff, mod_same)$`Pr(>F)`, 4)
  #   # summary(res.man)
  #   if(p_val < .05 & p_val >= .01){
  #     # Can shoose to print full table or summary
  #      # summary.aov(res.man)
  #     paste("*, p =", p_val)
  #      # data.frame(unclass(summary(res.man)), check.names = FALSE, stringsAsFactors = FALSE)
  #   } else if (p_val < .01 & p_val >= .0001) {
  #       paste("**, p =", p_val)
  #   }  else if (p_val < .0001) {
  #       paste("***, p < 0.001")
  #   }  else {
  #     paste("No significant difference, p =", round(summary(res.man)$stats[1,6], 2))
  #   }
  # }
  # 
  # sapply(scale_score, function(scales) lm_test(demo_variable, scales))

# myHeader <- list(c(" " = 1, "Color Raw Score" = 3),c(" " = 1, "Word Raw Score" = 3),c(" " = 1, "Color-Word Raw Score" = 3))

interpretation_headers <- function(scale_name, interpretation, group_column_title) {
  paste(scale_name, interpretation)
}

table_maker <- function(dataset, group_column, group_column_title){
  group_column   <- enquo(group_column)
  myHeader <-mapply(interpretation_headers, scale_list_int, manova_scales_p_value_results_func(dataset, !!group_column))
  myHeader <- data.frame(names = myHeader, colspan = 2)
  myHeader <- rbind(c(" ", 2), myHeader)
  myHeader$colspan <- as.numeric(myHeader$colspan)
  
  # Summary of Age Breakdown
  summary_df_list <- lapply(1:length(scale_score_int), {function(i)
      dataset %>% generate_summary_tbl(!!group_column, scale_score_int[[i]])
  }
  )
  cbind.data.frame(summary_df_list[[1]], lapply(2:length(scale_score_int), function(i) summary_df_list[[i]][,3:4])) %>%
        kable(col.names = c(names(summary_df_list[[1]])[1], "*N* (%)", rep(c("Mean (SD)", "SEM"),length(scale_score_int))),  caption = table_counter_func(group_column_title), booktabs = T) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position="left", font_size = 10) %>%
        add_header_above(header = myHeader) %>%
        column_spec(c(3,5,7, 9), border_left = "1px solid gray") %>%
        footnote(general = "NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")
}
# Function to plot bar plots for each Raw Score by Demographic Variables
  demographics_function_plot <- function(demo_var){
    adult %>%
      dplyr::select(UQ(demo_var), CRScore, WRScore, CWRscore, Interference) %>%
      gather("Scale", "Raw Score", -UQ(demo_var), convert = T) %>%
      mutate(Scale = dplyr::recode(Scale, 'CRScore' = "Color Raw Score", 'WRScore' = "Word Raw Score", 'CWRscore' = "Color-Word Raw Score", 'Interference' = "Interference")) %>%
      arrange(match(Scale, c("Word Raw Score", "Color Raw Score", "Color-Word Raw Score", "Interference"))) %>%
      mutate(Scales = factor(Scale, levels =c("Color Raw Score", "Word Raw Score","Color-Word Raw Score", "Interference"))) %>%
      ggplot(aes(Scales, `Raw Score`, fill = UQ(demo_var)))+
      geom_col(position = "dodge") +
      theme_bw()+
      facet_wrap(~Scales,scales = "free_x")
    # One graph only
      # group_by(Ethnic) %>%
      # ggplot(stroop_adult, aes(x = PrimeDX, y = IntTscore)) +
      # geom_bar(aes(x = PrimeDX, y = IntTscore, fill = as.factor(PrimeDX)), stat = "summary", fun.y = "mean")
  }
demo_table <- lapply(1:length(group_column), function(i){
    table_maker(adult, !!group_column[[i]], group_column_title[i])
  }
)
demo_plot <- lapply(group_column, function(demo_var){
  demographics_function_plot(demo_var)
}
)
manova_column_header <- setNames(data.frame(matrix(ncol = 5, nrow = 0)),paste0("col_name_", c(1:5))) %>%
        kable(col.names = c("MANOVA Test Results", paste(score_names_int, "ANOVA")),  caption = table_counter_func("Test of Differences for following columns"), booktabs = T) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position="left", font_size = 12) %>%
        add_header_above(header = c(" " = 1, "Results" = 4))
  cat("<div class = 'row h-100'>")
    cat("<div class = 'col-md-6'>")
      print(manova_column_header)
Table 15: Test of Differences for following columns
Results
MANOVA Test Results Color Raw Score ANOVA Word Raw Score ANOVA Color-Word Raw Score ANOVA Interference ANOVA
    cat("</div>")
  cat("</div>")
  cat("<hr>")

for (i in 1:length(group_column)){
  cat("<div class = 'row h-100'>")
    cat("<div class = 'col-sm-6'>")
      print(demo_table[[i]])
    cat("</div>")
    cat("<div class = 'col-sm-6' style = 'margin-top: 150px'>")
      print(demo_plot[[i]])
    cat("</div>")
  cat("</div>")
  cat("<hr>")
}
Table 8: Age
Color Raw Score ***, p < 0.001
Word Raw Score *, p = 0.0196
Color-Word Raw Score ***, p < 0.001
Interference **, p = 0.0062
age ***, p < 0.001 N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
15-19 46 (12.33%) 73.59 (12.86) 1.90 98.63 (15.47) 2.28 44.98 (10.42) 1.54 3.09 (7.69) 1.13
20-29 98 (26.27%) 74.8 (13.31) 1.34 103.97 (14.96) 1.51 45.17 (8.59) 0.87 1.83 (6.22) 0.63
30-39 68 (18.23%) 76.69 (14.43) 1.75 107.35 (16.76) 2.03 43.94 (11.71) 1.42 -0.46 (8.56) 1.04
40-49 78 (20.91%) 74.18 (13.95) 1.58 100.83 (17.57) 1.99 41.79 (13.04) 1.48 -0.55 (12.15) 1.38
50-59 48 (12.87%) 71.19 (12.27) 1.77 101.94 (17.12) 2.47 39.79 (8.61) 1.24 -2.1 (8.79) 1.27
60-69 24 (6.43%) 66.29 (8.6) 1.76 98.46 (14.89) 3.04 35.71 (8.2) 1.67 -0.67 (13.71) 2.80
70-79 7 (1.88%) 58.71 (7.85) 2.97 93 (8.49) 3.21 28.14 (8.15) 3.08 -7.86 (7.69) 2.91
80-89 4 (1.07%) 52.5 (14.39) 7.19 89.75 (7.68) 3.84 24.75 (6.9) 3.45 -8 (3.74) 1.87
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 9: Gender
Color Raw Score NS, p = 0.2732
Word Raw Score NS, p = 0.3811
Color-Word Raw Score NS, p = 0.4947
Interference NS, p = 0.7944
Gender NS, p = 0.7459 N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
Male 149 (39.95%) 72.36 (13.37) 1.10 101.39 (15.72) 1.29 41.9 (10.79) 0.88 -0.04 (9.03) 0.74
Female 224 (60.05%) 73.94 (13.78) 0.92 102.9 (16.7) 1.12 42.7 (11.19) 0.75 0.22 (9.63) 0.64
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 10: Ethnicity
Color Raw Score NS, p = 0.5943
Word Raw Score NS, p = 0.3279
Color-Word Raw Score NS, p = 0.3575
Interference NS, p = 0.2484
Ethnic NS, p = 0.1326 N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
Caucasian 249 (66.76%) 73.85 (13.66) 0.87 102.69 (16.06) 1.02 42.85 (10.83) 0.69 0.25 (8.77) 0.56
African-American 35 (9.38%) 73.94 (14.2) 2.40 100.11 (17.29) 2.92 39.14 (11.17) 1.89 -3.23 (8.06) 1.36
Asian 15 (4.02%) 72.2 (15.6) 4.03 104.33 (22.95) 5.93 44.13 (13.85) 3.57 1.8 (9.17) 2.37
Hispanic 54 (14.48%) 70.26 (12.22) 1.66 99.2 (16.28) 2.22 41.04 (9.44) 1.28 0.3 (7.64) 1.04
Native-American 4 (1.07%) 71.5 (17.67) 8.84 104 (6.38) 3.19 42.5 (7.55) 3.77 0.25 (3.86) 1.93
Other 16 (4.29%) 75.38 (14.05) 3.51 109.12 (10.79) 2.70 44.94 (15.52) 3.88 3.12 (20.9) 5.23
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 11: Education
Color Raw Score **, p = 0.0006
Word Raw Score ***, p < 0.001
Color-Word Raw Score **, p = 0.0035
Interference NS, p = 0.0778
SelEdGrp ***, p < 0.001 N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
No HS Diploma 45 (12.06%) 68.78 (14) 2.09 93.98 (16.1) 2.40 42.76 (10.35) 1.54 3.33 (7.16) 1.07
HS Diploma 73 (19.57%) 70.6 (13.4) 1.57 96.14 (15.25) 1.79 38.19 (9.67) 1.13 -2.14 (8.26) 0.97
Some College 93 (24.93%) 72.26 (12.55) 1.30 103.01 (15.46) 1.60 41.96 (10.54) 1.09 -0.2 (8.38) 0.87
BA 111 (29.76%) 77.48 (13.54) 1.29 105.86 (13.45) 1.28 44.27 (11.25) 1.07 0.27 (10.61) 1.01
Post-grad 49 (13.14%) 74.63 (13.57) 1.94 110.2 (19.48) 2.78 45.02 (12.45) 1.78 0.76 (11.01) 1.57
Another Value 2 (0.54%) 59.5 (2.12) 1.50 90 (2.83) 2.00 36.5 (7.78) 5.50 0.5 (6.36) 4.50
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 12: Region
Color Raw Score NS, p = 0.9414
Word Raw Score **, p = 0.0007
Color-Word Raw Score NS, p = 0.1347
Interference **, p = 0.0005
Region **, p = 0.0001 N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
Northeast 197 (52.82%) 73.03 (13.51) 0.96 105.27 (16.45) 1.17 41.29 (11.24) 0.80 -1.53 (8.81) 0.63
Midwest 5 (1.34%) 74.8 (4.66) 2.08 101.6 (10.92) 4.88 46.8 (2.77) 1.24 3.8 (3.03) 1.36
South 126 (33.78%) 73.33 (13.84) 1.23 97.63 (16.5) 1.47 44.02 (11.44) 1.02 2.82 (10.77) 0.96
West 45 (12.06%) 74.33 (14.39) 2.15 102.44 (12.54) 1.87 42.02 (8.79) 1.31 -0.67 (5.72) 0.85
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 13: Community
Color Raw Score NS, p = 0.8988
Word Raw Score ***, p < 0.001
Color-Word Raw Score NS, p = 0.1167
Interference **, p = 0.0011
Commun ***, p < 0.001 N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
Rural 54 (14.48%) 73.09 (15.25) 2.07 93.3 (15.48) 2.11 44.56 (10.64) 1.45 3.94 (9.39) 1.28
Urban 319 (85.52%) 73.35 (13.35) 0.75 103.82 (15.97) 0.89 42.01 (11.06) 0.62 -0.53 (9.24) 0.52
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Table 14: Diagnosis
Color Raw Score NS, p = 0.0615
Word Raw Score NS, p = 0.2278
Color-Word Raw Score NS, p = 0.5123
Interference NS, p = 0.979
PrimeDX NS, p = 0.84 N (%) Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM Mean (SD) SEM
No Dx 329 (88.2%) 74.15 (13.71) 0.76 103 (16.7) 0.92 42.84 (11.15) 0.61 0.2 (9.54) 0.53
LD 1 (0.27%) 61 (NA) NA 89 (NA) NA 41 (NA) NA 5 (NA) NA
Autism 1 (0.27%) 74 (NA) NA 110 (NA) NA 38 (NA) NA -6 (NA) NA
ID 21 (5.63%) 67.95 (12.44) 2.71 97.1 (11.91) 2.60 38.9 (9.59) 2.09 -0.86 (9.31) 2.03
Dementia 4 (1.07%) 62.25 (10.14) 5.07 103.5 (10.6) 5.30 37.25 (11.24) 5.62 -1.25 (7.85) 3.92
ADHD 1 (0.27%) 62 (NA) NA 94 (NA) NA 35 (NA) NA -2 (NA) NA
ESL 2 (0.54%) 54 (9.9) 7.00 75 (4.24) 3.00 30 (1.41) 1.00 -1 (1.41) 1.00
TBI 10 (2.68%) 71.2 (9.99) 3.16 97.1 (9.45) 2.99 39.8 (11.83) 3.74 -1.2 (9.1) 2.88
Another Value 4 (1.07%) 64 (9.27) 4.64 101 (15.17) 7.58 43.25 (3.86) 1.93 4.25 (3.3) 1.65
Note:
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001

Raw Scores Across Age

The variable Age was grouped into groups with ranges of 10 years for convenience in the tables above. MANOVA showed significant differences among groups. Also, age is known from research with the Stroop and similar instruments to have a significant relationship with Stroop performance. Age for each participant was expressed as a decimal value using years and months, with days truncated (that is, months were rounded down in all cases). As Age is a continuous variable, Stroop scores with respect to Age were examined (note: the directly measured Stroop Scores- Color, Word, and Color-Word were examined here, while the calculated score Interference was examined separately below).

## Following function to construct the regression equation for each model

    reg_eq <- function(mod) {paste(c(round(summary(mod)$coefficients[1,1],4), mapply(function(coefficient_names, coefficients) paste("+", coefficient_names, "*", round(coefficients,4)), coefficient_names = names(summary(mod)$coefficients[2:length(summary(mod)$coefficients[,1]),1]), coefficients = summary(mod)$coefficients[2:length(summary(mod)$coefficients[,1]),1])), collapse = " ")}

## Following constructs dataframe with important values for any model
# Need following terms
 # final_random_effects_model <- lmer model, like: lmer(CRScore ~ decimal_age + I(decimal_age^2) + SelEdGrp + (1|PrimeDX), data = adult)
  
  # Choose best lm model on RHS
  # chosen_lm_model <- mod2
  
  # Model vs. less restrictive
  # critical_comparison <- anova(mod1, mod2)

model_dataframe <- function(final_random_effects_model, chosen_lm_model, critical_comparison){
  
  # test for random effects
  random_effects_ranova <- lmerTest::ranova(final_random_effects_model)

  # Identifies included random effects and p-values and collapses
  random_effects_terms <- paste(attributes(random_effects_ranova)$row.names[-1], sep = ",")
  random_effects_terms_p_value <- paste(round(random_effects_ranova[,6][-1],4), sep = ",")
  
  # Best fitting model, test for heterskedasticity- get p-value
  bp_p.value <- lmtest::bptest(chosen_lm_model)$p.value
  
  # Regression equation
  regression_equation <- reg_eq(chosen_lm_model)
  
  model_stats_df <- data.frame(equation = regression_equation, model_significance = round(critical_comparison[,6][2],4),
                                  random_effects = random_effects_terms, random_effects_p = random_effects_terms_p_value, 
                                  bp_test_p.value = round(bp_p.value,4))
  row.names(model_stats_df) <- NULL
  return(model_stats_df)
}

Models to Predict Raw Scores

Models were constructed to predict each Stroop score. The models were then used to construct scaled scores for each individual raw score, beginning with the Direct Measure scores.

Summary Table of Models and Diagnostic Measures

As can be seen in the plots, Stroop performance across Raw scores seemed to remain steady, or slightly increase, through early and into mid-adulthood before declining beginning in mid- to late-adulthood. The relationship between Age and Scores may be nonlinear, so in models constructed below, quadratic and cubic effects were considered.

In addition to Age, Stroop performance was seen to vary significantly by Education level, which was used in previous editions to contextualize performance.

Mixed effects models were constructed for each of the Raw Scores, with Age and Education as predictors and other demographic variables entered as random effects. Full models with interactions between Age and Education and all demographic variables were constructed, along with Age with polynomial effects.

Terms were removed from the models and the fuller and reduced models were compared with maximum likelihood analyses. Maximum likelihood analysis has advantages in analysis compared to Wald-based methods of:

  • Being more robust to deviations from normalcy and tolerating unbalanced groups
  • Allowing for missing data (though there were none in this sample)
  • Incorporating full information from the models to allow for precise standard error estimation.

At each step of model construction full models with significant differences compared to reduced models were seen to contain important variables and adopted. When models were not significantly different the more parsimonious model was used. The following table contains the final regression equations, significance compared to fuller model, and random effects found to be significant. Because six random effects and each effects’ interaction with Age was considered, a more conservative alpha level was set using a Bonferonni correction of .05/number of copmarisons(12)*number of Scores(3) = 0.0014.

Random effects that had associated p-values below 0.05 were identified. It can be seen that none had p-values below the set alpha level, so all random effects were dropped from models.

In addition, a Breusch-Pagan (BP) Test was conducted for each model to test for heteroskedasticity. All results were above alpha = 0.05, so all models were considered not to violate assumptions of homogeneity.

cbind.data.frame(score_names, bind_rows(color_model_dataframe, word_model_dataframe, cw_model_dataframe)) %>%
        kable(col.names = c("Scale", "Regression Equation", "Significance Pr(>F)", "Included Random Effects", "Random Effects P-Value", "BP-Test P-Value"),  caption = table_counter_func("Regression Model Diagnostics for Stroop Raw Scores"), booktabs = T) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Table 16: Regression Model Diagnostics for Stroop Raw Scores
Scale Regression Equation Significance Pr(>F) Included Random Effects Random Effects P-Value BP-Test P-Value
Color Raw Score 67.4738 + decimal_age * 0.235 + I(decimal_age^2) * -0.0057 + SelEdGrpHS Diploma * 4.4776 + SelEdGrpSome College * 4.6917 + SelEdGrpBA * 10.7539 + SelEdGrpPost-grad * 8.887 + SelEdGrpAnother Value * -5.6689 0.0273 (1 | PrimeDX) 0.0084 0.4498
Word Raw Score 98.9497 + decimal_age * -0.2043 + SelEdGrpHS Diploma * 5.3024 + SelEdGrpSome College * 11.2381 + SelEdGrpBA * 15.1596 + SelEdGrpPost-grad * 20.3274 + SelEdGrpAnother Value * 0.9523 0.0000 (1 | Region) 0.0007 0.1699
Color-Word Raw Score 49.2216 + decimal_age * -0.2658 + SelEdGrpHS Diploma * -0.476 + SelEdGrpSome College * 2.0692 + SelEdGrpBA * 5.7824 + SelEdGrpPost-grad * 7.5984 + SelEdGrpAnother Value * 0.1562 0.0000 (1 | Commun) 0.0197 0.6097

Plots of Predicted Scores and Residuals

The following plots represent the models above, with fitted Raw Score values plotted across age, allowing for differences by Education group. As was identified in the table above, Age was found to have a significant quadratic term as a predictor of Color Raw score, which explains the nonlinear lines in that plot.

In addition, diagnostic plots are provided, in which residuals are plotted against fitted values and age. It does not appear that there is any heteroskedasticity, as identified by those plots.

Scaled Scores

The models were used to construct predicted Raw Score value tables (Appendix Appendix #). Ideally the above specified regression models would be used to convert given Raw scores to exact Predicted Scores. However, as this edition of the Stroop test is intended to be published in paper format, it is necessary to generate lookup tables usable by clinicians. As such, it is necessary to round the predicted values to the nearest integer. As both Age and Education were found to significantly predict Raw Scores, both were used to construct tables. For each Raw Score, a separate table was constructed for each Education level. Bands of Age values scores were then assigned to corresponding integer predicted Raw score values in the range of scores obtained from the sample. The Age values associated with a Raw score did not have regular intervals. This is a departure from the previous edition of the Stroop, where Age values were consistently grouped with ranges of five years (e.g., “21-25”, “26-30”). It is anticipated that this will allow more precise esitmation of predicted scores. This assertion will be evaluated, as follows.

Color

color_se_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score_se <- round(predict(c_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$se.fit,2)
    data.frame(age, predicted_score_se, degree)
    }, degree = degree
    )
}
  )
color_fit_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score<- round(predict(c_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 2)
    cbind(age, predicted_score, degree)
    }, degree = degree
    )
}
  )

color_fit_df_list <- lapply(color_fit_list, function(x) (do.call(rbind,x)))
color_fit_df <- do.call(rbind.data.frame, c(color_fit_df_list, make.row.names = F))
color_fit_df$age <- as.numeric(color_fit_df$age)
color_fit_df$predicted_score <- as.numeric(color_fit_df$predicted_score)
color_fit_df$degree <- factor(color_fit_df$degree, levels = levels(adult$SelEdGrp))

color_age_plot <- ggplot() +
  geom_point(data = adult, aes(x = decimal_age, y = CRScore, color = SelEdGrp)) +
         geom_smooth(data = color_fit_df, aes(x = age, y = predicted_score, color = degree)) +
  scale_x_continuous(breaks = seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 5)) +
  scale_y_continuous(breaks = seq(min(adult$CRScore) - min(adult$CRScore) %%5, max(adult$CRScore), 5)) +
  labs(color = "Education Level") +
  xlab("Age") +
  ylab("Color Raw Score") +
  ggtitle(figure_counter_func("Predicted Color Scores \n by Education Across Ages"))

res <- residuals(c_mod)
yhat <- fitted(c_mod)

  cat("<div class = 'row h-100'>")
    cat("<div class = 'col-md-6' style = 'margin-top: 150px'>")
      color_age_plot

    cat("</div>")
    cat("<div class = 'col-md-6'>")
      plot(yhat,res, xlab="fitted values", ylab="residuals")

      plot(adult$decimal_age,res, xlab="age", ylab="residuals")

    cat("</div>")
  cat("</div>")

Word

word_se_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score_se <- round(predict(w_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$se.fit,2)
    data.frame(age, predicted_score_se, degree)
    }, degree = degree
    )
}
  )
word_fit_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score<- round(predict(w_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 2)
    cbind(age, predicted_score, degree)
    }, degree = degree
    )
}
  )

word_fit_df_list <- lapply(word_fit_list, function(x) (do.call(rbind,x)))
word_fit_df <- do.call(rbind.data.frame, c(word_fit_df_list, make.row.names = F))
word_fit_df$age <- as.numeric(word_fit_df$age)
word_fit_df$predicted_score <- as.numeric(word_fit_df$predicted_score)
word_fit_df$degree <- factor(word_fit_df$degree, levels = levels(adult$SelEdGrp))

word_age_plot <- ggplot() +
  geom_point(data = adult, aes(x = decimal_age, y = WRScore, color = SelEdGrp)) +
         geom_smooth(data = word_fit_df, aes(x = age, y = predicted_score, color = degree)) +
  scale_x_continuous(breaks = seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 5)) +
  scale_y_continuous(breaks = seq(min(adult$WRScore) - min(adult$WRScore) %%5, max(adult$WRScore), 5)) +
  labs(color = "Education Level") +
  xlab("Age") +
  ylab("Word Raw Score") +
  ggtitle(figure_counter_func("Predicted Word Scores \n by Education Across Ages"))

res <- residuals(w_mod)
yhat <- fitted(w_mod)

  cat("<div class = 'row h-100'>")
    cat("<div class = 'col-md-6' style = 'margin-top: 150px'>")
      word_age_plot

    cat("</div>")
    cat("<div class = 'col-md-6'>")
      plot(yhat,res, xlab="fitted values", ylab="residuals")

      plot(adult$decimal_age,res, xlab="age", ylab="residuals")

    cat("</div>")
  cat("</div>")

Color-Word

color_word_se_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score_se <- round(predict(cw_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$se.fit,2)
    data.frame(age, predicted_score_se, degree)
    }, degree = degree
    )
}
  )
color_word_fit_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score<- round(predict(cw_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 2)
    cbind(age, predicted_score, degree)
    }, degree = degree
    )
}
  )

color_word_fit_df_list <- lapply(color_word_fit_list, function(x) (do.call(rbind,x)))
color_word_fit_df <- do.call(rbind.data.frame, c(color_word_fit_df_list, make.row.names = F))
color_word_fit_df$age <- as.numeric(color_word_fit_df$age)
color_word_fit_df$predicted_score <- as.numeric(color_word_fit_df$predicted_score)
color_word_fit_df$degree <- factor(color_word_fit_df$degree, levels = levels(adult$SelEdGrp))

color_word_age_plot <- ggplot() +
  geom_point(data = adult, aes(x = decimal_age, y = CWRscore, color = SelEdGrp)) +
         geom_smooth(data = color_word_fit_df, aes(x = age, y = predicted_score, color = degree)) +
  scale_x_continuous(breaks = seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 5)) +
  scale_y_continuous(breaks = seq(min(adult$CWRscore) - min(adult$CWRscore) %%5, max(adult$CWRscore), 5)) +
  labs(color = "Education Level") +
  xlab("Age") +
  ylab("Color-Word Raw Score") +
  ggtitle(figure_counter_func("Predicted Color-Word Scores \n by Education Across Ages"))

res <- residuals(cw_mod)
yhat <- fitted(cw_mod)

  cat("<div class = 'row h-100'>")
    cat("<div class = 'col-md-6' style = 'margin-top: 150px'>")
      color_word_age_plot

    cat("</div>")
    cat("<div class = 'col-md-6'>")
      plot(yhat,res, xlab="fitted values", ylab="residuals")

      plot(adult$decimal_age,res, xlab="age", ylab="residuals")

    cat("</div>")
  cat("</div>")
# # Test for age and education interaction- don't model Interference Predicted Scores
#   mod1 <- lm(IntPrescore ~ decimal_age + SelEdGrp, data = adult)
#   mod3 <- lm(IntPrescore ~ decimal_age*SelEdGrp, data = adult)
#   anova(mod1, mod3)
# 
#   # Test for quadratic age
#   mod2 <- lm(IntPrescore ~ decimal_age + I(decimal_age^2) + SelEdGrp, data = adult)
#   anova(mod1, mod2)
# 
#   # Test for Cubic Effect
#   mod3 <- lm(IntPrescore ~ decimal_age + I(decimal_age^2) + I(decimal_age^3) + SelEdGrp, data = adult)
#   anova(mod2, mod3)
#   anova(mod1, mod3)
#   # Test Interaction age and education
# 
#   mod_all_rand <- lmer(IntPrescore ~ decimal_age + I(decimal_age^2) + SelEdGrp + (1|Gender:decimal_age) + (1|Gender) + (1|Drugs:decimal_age) + (1|Drugs) + (1|PrimeDX:decimal_age) + (1|PrimeDX) + (1|Commun:decimal_age) + (1|Commun) + (1|Region:decimal_age) + (1|Region) + (1|Ethnic:decimal_age) + (1|Ethnic), data = adult)
#   lmerTest::ranova(mod_all_rand)
#   int_mod_rand <- lmer(IntPrescore ~ decimal_age + SelEdGrp + (1|PrimeDX), data = adult)
#   lmerTest::ranova(int_mod_rand)
#   int_mod <- mod1
# lmtest::bptest(int_mod)

Compare New Prediction Models to Previous

These new models’ accuracy in predicting Stroop scores were compared to algorithms used in the previous version of the Stroop Test. The absolute value of the prediction from actual score for each participant in the sample were calculated as the distance of the actual value from predicted value. The mean of these distances were then calculated for each score for previous and new models and compared.

predicted_score_fit_mean<- function(model, raw_score) {
  round(mean(abs(round(predict(model, data.frame(decimal_age = adult$decimal_age, SelEdGrp = adult$SelEdGrp), se.fit=TRUE)$fit, 0) -
    adult[[raw_score]])), 4)
}
raw_score_mods <- list(c_mod, w_mod, cw_mod)

new_model_deviation <- mapply(predicted_score_fit_mean, raw_score_mods, raw_score_column_names)

old_model_deviation <- round(c(mean(abs(adult$CRESscore)), mean(abs(adult$WRESscore)), mean(abs(adult$CWRESscore))),4)

difference <- old_model_deviation - new_model_deviation

deviation_df <- rbind.data.frame(new_model_deviation, old_model_deviation, difference)
rownames(deviation_df) = c("New Stroop Models", "Old Stroop Models", "Difference Between Old and New Models Average Residuals")
deviation_df %>%
  kable(col.names = c("Color", "Word", "Color-Word"), caption = table_counter_func("Residuals of New and Old Stroop Predicted Score Models")) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Table 17: Residuals of New and Old Stroop Predicted Score Models
Color Word Color-Word
New Stroop Models 9.7480 11.6300 7.5657
Old Stroop Models 10.8606 12.0295 8.2118
Difference Between Old and New Models Average Residuals 1.1126 0.3995 0.6461

As can be seen in the above table the new Stroop models all produced predictions that were on average closer to actual scores than the previous version of the Stroop. These predicted scores are in Appendix:

Following is in Appendix

Predicted Score Tables for Color, Word, Color-Word

#   prediction_list_function <- function(model){
#     predicted <- lapply(levels(adult$SelEdGrp), function(degree){
#         out_df <- sapply(seq(15, 90, 1), function(age) {
#         round(predict(model, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 0)
#         }
#         )
#          names(out_df) <- seq(15, 90, 1)
#         # row.names(out_df) <- seq(20, 130, 5)
#         return(out_df)
#     }
#     )
#     names(predicted) <- levels(adult$SelEdGrp)
#     return(predicted)
#   }
# mod_list <- list(c_mod, w_mod, cw_mod)
# predicted_score_list <- lapply(1:3, function(i){
#   # model <- as.formula(model)
#   predict_list <- prediction_list_function(mod_list[[i]])
#   
#   lapply(predict_list, function(predicted_list){
#       not_duplicated_score_names <- as.numeric(names(predicted_list[!duplicated(predicted_list)]))
#       upper_range <- not_duplicated_score_names -1
#       names <- c(upper_range[-1], ">")
#       not_duplicated_score_names <- paste(not_duplicated_score_names, names, sep = "-")
#       not_duplicated_scores <- predicted_list[!duplicated(predicted_list)]
#       not_duplicated_df <- cbind.data.frame(not_duplicated_score_names, not_duplicated_scores)
#     }
#     )
# }
# )
#   names(predicted_score_list) <- score_names
# 
#   cat("<div class = 'row h-100'>")
#     cat("<div class = 'col-md-4'>")
#       cat("<h3>Color</h3>")
#     cat("</div>")
#     cat("<div class = 'col-md-4'>")
#       cat("<h3>Word</h3>")
#     cat("</div>")
#     cat("<div class = 'col-md-4'>")
#       cat("<h3>Color-Word</h3>")
#     cat("</div>")
#   cat("</div>")
#   
# lapply(1:length(levels(adult$SelEdGrp)), function(i){
#   cat("<div class = 'row h-100'>")
#   cat(paste0("<h4>",levels(adult$SelEdGrp)[i], "</h4>"))
#   lapply(1:3, function(j){
#     cat("<div class = 'col-md-4'>")
#       print(predicted_score_list[[j]][i] %>%
#             kable(row.names = F, col.names = c("Age", "Predicted Score")) %>%
#             kable_options())
#       cat("\n")
#     cat("</div>")
#   })
#     cat("\n")
#   cat("</div>")
#   cat("<hr>")
# })

Stop Appendix

Residual T-Scores

Next, T-score tables were constructed for residuals, that is, the predicted score for an individual subtracted from the actual score that individual obtained for each scale. Scores were examined for normality and found to be sufficiently normal. Tables can be found in Appendix # appendix

word_predicted_scores <- predict(w_mod)
# Compare to current predicted scores
word_deviation <- adult$WRScore - word_predicted_scores
  t_score_func <- function(raw_score, scale) {
    # scale   <- enquo(scale)
    round(((raw_score - mean(scale))/sd(scale))*10 + 50, 0)
  }
 word_t_scores <- sapply(seq(-50, 50, 1), t_score_func, scale = word_deviation)
color_predicted_scores <- predict(c_mod)
# Compare to current predicted scores
color_deviation <- adult$CRScore - color_predicted_scores
  color_t_scores <- sapply(seq(-50, 50, 1), t_score_func, scale = color_deviation)
color_word_predicted_scores <- predict(cw_mod)
# Compare to current predicted scores
color_word_deviation <- adult$CWRscore - color_word_predicted_scores
color_word_t_scores <- sapply(seq(-50, 50, 1), t_score_func, scale = color_word_deviation)
scale_deviation_list <- list(color_deviation, word_deviation, color_word_deviation)
 cbind.data.frame(score_names, do.call(bind_rows, lapply(1:length(scale_deviation_list), function(i) #shapiro_test(scale_deviation_list[[i]]))) %>%
   c(agostino.test(scale_deviation_list[[i]])$statistic, p = agostino.test(scale_deviation_list[[i]])$p.value)))) %>%
 mutate_if(is.numeric, round, digits = 3)%>%
  kable(col.names = c("Scale", "Skewness", "Z-Value", "P")) %>%
  kable_options()
Scale Skewness Z-Value P
Color Raw Score 0.017 0.136 0.892
Word Raw Score -0.203 -1.616 0.106
Color-Word Raw Score 0.394 3.062 0.002
cbind.data.frame(mapply(function(scale, scale_name) {
  set_names(as.data.frame(scale), scale_name)
  }, scale_deviation_list, score_names)
) %>%
  pivot_longer(., everything(), "Scale") %>%
  arrange(match(Scale, score_names)) %>%
  mutate(Scales = factor(Scale, levels = score_names)) %>%
  ggplot(aes(value)) + #ggplot(., aes(x = Scales))+
        geom_bar(stat = "bin") +
        theme_bw() +
        facet_wrap(~Scales,scales = "free_x") +
        ggtitle(figure_counter_func("Residual Distributions")) + 
        ylab("Number of Participants")

Insert to Appendix #

T-Scores for Word, Color, Color-Word

# data.frame(raw_deviation_score = seq(-50, 50, 1), word_t = word_t_scores, color_t = color_t_scores, color_word_t = color_word_t_scores) %>%
#   kable(col.names = c("Raw Deviation", "Word", "Color", "Color-Word")) %>%
#   kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
#   add_header_above(c(" ", "T-Scores" = 3))

Stop Appendix

Interference Scores

Interference scores were then examined to determine the best model for predicting scores.

# Plot with intercept
ggplot(adult, aes(x = decimal_age, y = Interference)) +
  geom_smooth() +
  geom_hline(aes(yintercept=mean(Interference), color= "Overall Mean"),
            linetype="dashed") +
  ggtitle(figure_counter_func("Interference Scores Across Age")) +
  xlab("Age") +
  labs(color = "Statistic") +
  scale_x_continuous(breaks = seq(15,90,5))

The model with Age term as a predictor of Interference is the best fitting model (P < 0.0001), but a continuous age term cannot be used in paper tables. Nonetheless, using this model can help us identify if it is appropriate to create a segmented model with age as a predictor of Interference scores, and identify that break-point, which was found to be 68.

A segmented model was constructed and was compared to one without the break and was found to be significantly better (p = 0.0081).

group_mean <- adult %>%
  group_by(Xbar) %>%
  summarize(sd = round(sd(Interference),2),
            Interference = mean(Interference),
            decimal_age = mean(decimal_age)) %>%
  ungroup() %>%
  mutate(age_group = dplyr::recode(Xbar, '0' = paste("Under", age_break), '1' = paste("Over", age_break)))

ggplot(my.model, aes(x = age, y = Interference)) + 
  geom_line() + 
  geom_label(data = group_mean, aes(x = decimal_age, y = Interference, label = paste0(round(Interference,2), "(", sd,")"), color = age_group)) +
   ggtitle(figure_counter_func("Interference Scores Across \n Age Segmented at Break Point")) +
  xlab("Age") +
  labs(color = "Age Group Mean Score (SD)") +
  scale_x_continuous(breaks = seq(15,90,10))

# interference_se_list <- lapply(levels(adult$SelEdGrp), function(degree) {
#   lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
#     predicted_score_se <- round(predict(int_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$se.fit,2)
#     data.frame(age, predicted_score_se, degree)
#     }, degree = degree
#     )
# }
#   )
# interference_fit_list <- lapply(levels(adult$SelEdGrp), function(degree) {
#   lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
#     predicted_score<- round(predict(int_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 2)
#     cbind(age, predicted_score, degree)
#     }, degree = degree
#     )
# }
#   )
# 
# interference_fit_df_list <- lapply(interference_fit_list, function(x) (do.call(rbind,x)))
# interference_fit_df <- do.call(rbind.data.frame, c(interference_fit_df_list, make.row.names = F))
# interference_fit_df$age <- as.numeric(interference_fit_df$age)
# interference_fit_df$predicted_score <- as.numeric(interference_fit_df$predicted_score)
# interference_fit_df$degree <- factor(interference_fit_df$degree, levels = levels(adult$SelEdGrp))
# 
# ggplot() +
#   geom_point(data = adult, aes(x = decimal_age, y = Interference, color = SelEdGrp)) +
#          geom_smooth(data = interference_fit_df, aes(x = age, y = predicted_score, color = degree)) +
#   scale_x_continuous(breaks = seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 5)) +
#   scale_y_continuous(breaks = seq(min(adult$Interference) - min(adult$Interference) %%5, max(adult$Interference), 5)) +
#   labs(color = "Education Level") +
#   xlab("Age") +
#   ylab("Interference Raw Score") +
#   ggtitle("Predicted Interference Scores by Education Across Ages")
adult %>%
  mutate(Xbar = recode(Xbar, '0' = paste('Below', age_break), '1' = paste('At or above', age_break))) %>%
ggplot(., aes(x = Interference, color = Xbar)) +
  geom_bar(stat = "bin", binwidth = 5) +
  ggtitle(figure_counter_func("Histogram of Interference Scores \n by Age Above and Below Breakpoint")) +
  xlab("Interference Raw Score") +
  ylab("Number of Participants") +
  labs(color = "Group")

Data were divided into those individuals at or above the breakpoint and those below. The groups were found to resemble a normal distribution and differences in variances were not significantly different (F = 1.83 p = 0.26). Thus the entire sample standard deviation was used, as constant variance was assumed.

T-Score tables for Interference are presented in Appendix Appendix #

Interference T-Scores

# Interference T-scores
  # int_t_score_func <- function(raw_score, expr) {
  #   sub_adult <- adult %>%
  #   dplyr::filter(., !!rlang::enexpr(expr))
  #   # Convert to t-score
  #   round(((raw_score - mean(sub_adult$Interference))/sd(adult$Interference))*10 + 50, 0)
  # }
  # 
  # young_int_t_score <- data.frame(raw_scores = seq(min(adult$Interference), max(adult$Interference), 1), t_scores = sapply(seq(min(adult$Interference), max(adult$Interference), 1), int_t_score_func, expr = decimal_age <= age_break)) %>%
  #   filter(t_scores >= 20 & t_scores <= 80) %>%
  #   mutate(raw_scores = ifelse(t_scores == 20, paste("<=", raw_scores), ifelse(t_scores == 80, paste(">=", raw_scores), raw_scores)))
  # 
  # old_int_t_score <- data.frame(raw_scores = seq(min(adult$Interference), max(adult$Interference), 1), t_scores = sapply(seq(min(adult$Interference), max(adult$Interference), 1), int_t_score_func, expr = decimal_age > age_break)) %>%
  #   filter(t_scores >= 20 & t_scores <= 80) %>%
  #   mutate(raw_scores = ifelse(t_scores == 20, paste("<=", raw_scores), ifelse(t_scores == 80, paste(">=", raw_scores), raw_scores)))
  # 
  # overall_int_t_score <- data.frame(raw_scores = seq(min(adult$Interference), max(adult$Interference), 1), t_scores = sapply(seq(min(adult$Interference), max(adult$Interference), 1), int_t_score_func, expr = (TRUE)))  %>%
  #   filter(t_scores >= 20 & t_scores <= 80) %>%
  #   mutate(raw_scores = ifelse(t_scores == 20, paste("<=", raw_scores), ifelse(t_scores == 80, paste(">=", raw_scores), raw_scores)))


  
  ## Continuous Norming- not done here b/c quadratic and not enough cases
# library(cNORM)
#   norm_adult <- adult %>% data.frame(personID = row.names(.), raw = .$Interference, group = as.numeric(.$Xbar)) %>% dplyr::select (personID, raw, group)
#   normData <- rankByGroup(norm_adult, group = "group")
#   normData <- computePowers(normData, k = 4, norm = "normValue", age = "group") 
#   model <- bestModel(normData)
#   getNormCurve(50, model, minAge = 15, maxAge = 90, step = 2, minRaw = min(adult$Interference), maxRaw = max(adult$Interference))
#     
#   normData2 <- adult %>% data.frame(personID = row.names(.), raw = .$Interference, group = as.numeric(.$Cyear)) %>% dplyr::select (personID, raw, group) %>% 
#     rankBySlidingWindow(data = ., age = "group", raw = "raw", width = 5)
#   normData2 <- computePowers(normData2, k = 4, norm = "normValue", age = "group")
#     model2 <- bestModel(normData2)
#   getNormCurve(50, model2, minAge = 15, maxAge = 90, step = 2, minRaw = min(adult$Interference), maxRaw = max(adult$Interference))
#   normTable(70, model2, minRaw = -50, maxRaw = 50, 
#                minNorm=20, maxNorm=80, step = 1)
# cat("<div class = 'row h-100'>")
#   cat("<div class = 'col-md-6'>")
#     cat(paste("<h3>Age <=", age_break, "</h3>"))
#     print(young_int_t_score %>%
#             kable(col.names = c("Raw Scores", "T-Scores")) %>%
#             kable_options()) 
#   cat("</div>")
#   cat("<div class = 'col-md-6'>")
#     cat(paste("<h3>Age >", age_break, "</h3>"))
#     print(old_int_t_score %>%
#             kable(col.names = c("Raw Scores", "T-Scores")) %>%
#             kable_options())
#   cat("</div>")
# cat("</div>")

Stop Appendix

Scale Structure

The structure of the Stroop was examined. The raw scores derived from the Color, Word, and Color-Word tasks and those scores calculations to obtain an Interference score were considered separate scales. Their association and the combination of these scales as factors were examined.

Scale Correlation

First the correlation of the scales was examined. As Interference is a calculation from the other three scores, Interference is not independent and, thus, does not meet assumptions typically required for inclusion in some analyses. Nonetheless, its association with the other scales is of interest. Analyses were typically conducted with and without Interference included to examine its association with the other scales (with its inclusion) and to obtain outcome measures fulfilling stricter assumptions (without inclusion).

Correlation Matrix Scatterplot Chart

A correlation matrix was constructed, along with scatterplots of association between scales, and histograms of individual scales, with all combinations of two scales included.

adult %>% 
  dplyr::select(CRScore, WRScore, CWRscore, Interference) %>%
  rename(Color = CRScore, Word = WRScore, Color_Word = CWRscore) %>%
  chart.Correlation()
  title(sub="NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")
  title(main = figure_counter_func("Stroop Correlation Matrix"), line = 3)

Observing the chart above, we can see that Color, Word, and Color-Word have significant approximately moderate correlations with each other. Interference has a strong and significant correlation with Color-Word, but has only very weak correlations with Color and Word individually. That Interference and Color-Word have a strong relationship is intuitive, as Interference is the predicted Color-Word score subtracted from the actual Color-Word score, thus Color-Word score is used in the calculation of Interference. However, the predicted Color-Word score is calculated from Color and Word scores, so there would seemingly be a relationship between Interference and Color or Word. The relationship between individual scales is explored further in the Special Cases section below.

Cronbach Alpha

# Stroop Correlation Matrix- given above, so not used here
# adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% cor(use = "pair") %>% round(2)

cbind.data.frame(scale = c("With Interference", "Without Interference"),
  rbind.data.frame(
    # Stroop Cronbach with Interference
    cbind.data.frame(adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% cronbach()),
  # Stroop Cronbach without Interference
    cbind.data.frame(adult%>% dplyr::select(CRScore, WRScore, CWRscore) %>% cronbach())
  )
) %>%
  mutate_if(is.numeric, round, digits = 3) %>%
  kable(col.names = c("Scales Included", "N", "Number of Scales", "Alpha")) %>%
  kable_options()
Scales Included N Number of Scales Alpha
With Interference 373 4 0.698
Without Interference 373 3 0.768

Both with and without inclusion of Interference, Stroop scales demonstrated acceptable to moderate association, using traditional interpretation criteria of Cronbach alpha.

Bartlett Correlation Test

A Bartlett’s correlation test was conducted. Results of the test indicated the correlation of individual items was significantly different than zero, χ 2(6)= , p= 0. The determinant of the correlation matrix was 0.04, of sufficient value to conduct further analysis of the factor structure of the items.

The correlation of the variables was graphically represented with:

Graphical Representation of Correlation using Principal Components Analysis- 2d
# mdspca
adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% mdspca()
title(main = figure_counter_func("PCA Stroop Correlation"))

Graphical Representation of Correlation using Principal Components Analysis- 3d
# sphpca
adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% sphpca()
title(main = figure_counter_func("\n PCA Stroop \n Correlation 3d"), line = 3, adj = .2)

It can be seen that the Color scale and Word scale seem to have a have a relationship and a triangular relationship with Color-Word. Interference is associated with Color-Word. This is explored further through graphical representation with Interference as a dependent variable and the other three scales as independent.

Graphical Representation of Correlation with Interference as Dependent Variable
# FPCA
fpca(Interference ~ CRScore + WRScore + CWRscore, data = adult)
title(main = figure_counter_func("\n Correlation Interference \n Dependent"))

A Principal Components Analysis (PCA), or Exploratory Factor Analysis, was conducted to further explore the relationship.

Principal Components Analysis

Stroop Unspecified Model
# stroop pca
fit_unspecified <- adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% principal(., rotate="promax")
data.frame(factors = seq(1:length(raw_score_column_names_int)), variance = round(fit_unspecified$values[1:length(raw_score_column_names_int)],2)) %>%
  kable(caption = table_counter_func("Variance of Contrasts of Models with Unspecified Number of Factors"), row.names = F, col.names = c("Number of Factors in Model", "Variance")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Table 18: Variance of Contrasts of Models with Unspecified Number of Factors
Number of Factors in Model Variance
1 2.23
2 1.37
3 0.37
4 0.04
Stroop One Factor Model
data.frame(matrix(round(as.numeric(fit_unspecified$loadings), 3), attributes(fit_unspecified$loadings)$dim, dimnames=list(all_names[c(1:4)], c("Stroop Test")))) %>% kable(caption = table_counter_func("Stroop PCA One Factor")) %>% kable_options()
Table 19: Stroop PCA One Factor
Stroop.Test
Color 0.806
Word 0.655
Color-Word 0.936
Interference 0.525
cat (paste0('\n', "Examining the variance of the contrasts of models with increasing factors, it can be seen that the greatest amount of variance is in the model with ", which(fit_unspecified$values == max(fit_unspecified$values)), " factor. This is the model with all scores contributing.  However, further models should be evaluated for further information about how scales interact.", '\n' ), sep = " ")

Examining the variance of the contrasts of models with increasing factors, it can be seen that the greatest amount of variance is in the model with 1 factor. This is the model with all scores contributing. However, further models should be evaluated for further information about how scales interact.

Stroop Two Factor Model
fit_two_factor <- adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% principal(., nfactors = 2, rotate="promax")
data.frame(matrix(round(as.numeric(fit_two_factor$loadings), 3), attributes(fit_two_factor$loadings)$dim, dimnames=list(all_names[c(1:4)], c("Processing Speed", "Interference")))) %>% kable(caption = table_counter_func("Stroop PCA Two Factor")) %>% kable_options()
Table 20: Stroop PCA Two Factor
Processing.Speed Interference
Color 0.879 0.087
Word 0.935 -0.185
Color-Word 0.419 0.799
Interference -0.260 1.021

The model with two factors yielded chi-square of 168.429.

Stroop Three Factor Model
fit_three_factor <- adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% principal(., nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names[c(1:4)], c("Interference", "Color Processing Speed", "Word Processing Speed")))) %>% kable(caption = table_counter_func("Stroop PCA Three Factor")) %>% kable_options()
Table 21: Stroop PCA Three Factor
Interference Color.Processing.Speed Word.Processing.Speed
Color -0.077 1.054 -0.048
Word -0.047 -0.046 1.035
Color-Word 0.780 0.296 0.158
Interference 1.040 -0.184 -0.119

The model with three factors yielded chi-square of 209.954.

It is interesting to see how scales separate. When forcing a two-factor model, it appears Color and Word form one factor, while Interference and Color-Word form another. With a three-factor model, Color and Word separate.

A scree plot was constructed to explore the appropriate number of factors the scales assume.

adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% scree.plot(., simu = 20, use="P", title = figure_counter_func("Scree Plot"))

It can be seen that a model with two factors maintains an eigenvalue above 1, typically considered the threshold for the number of factors. Taken together, the above analyses suggest that viewing all four scales as one construct is optimal, but that there is support to also conceptualize Stroop scales as assuming two factors, one with Color and Word, seemingly as a measure of raw processing speed, referred to as “Processing Speed” and the other with Color-Word and Interference, measures of the Stroop Effect, referred to as “Stroop.”

Structural Equation Modeling

Structural Equation Modeling was used to further explore the factor structure suggested by PCA above. Alpha, from Cronbach’s Alpha, and Omega, a measure of congeneric, or composite, reliability model statistics are presented. In general, composite reliability is seen to be a more accurate indicator of structure of scales for a number of reasons, including that it is more unbiased than alpha and seen to be more accurate citation

One-Factor Model
  round_df <- function(df, digits) {
  nums <- vapply(df, is.numeric, FUN.VALUE = logical(1))

  df[,nums] <- round(df[,nums], digits = digits)

  (df)
  }
  # Two Factor Model with no Interference
   one_factor.model_interference <-
 '                        total =~ CRScore + WRScore + CWRscore + Interference'
  
  #  Fit the model
  fit_one_factor_Int <- lavaan::cfa(one_factor.model_interference, data = adult)

  one_factor.model_no_interference <-
 '                        total =~ CRScore + WRScore + CWRscore'
  
  #  Fit the model
  fit_one_factor_no_Int <- lavaan::cfa(one_factor.model_no_interference, data = adult)
  cbind.data.frame(c("Interference", "No Interference"), rbind.data.frame(reliability(fit_one_factor_Int, return.total = TRUE)[1:2,], reliability(fit_one_factor_no_Int, return.total = TRUE)[1:2,])) %>%
    round_df(., 3) %>%
    kable(col.names = c("Interference Included", "Alpha", "Omega"), caption = table_counter_func("Stroop One Factor SEM")) %>%
    kable_options()
Table 22: Stroop One Factor SEM
Interference Included Alpha Omega
Interference 0.698 0.872
No Interference 0.768 0.791
Two-Factor Model with Interference
  # Two Factor Model with no Interference
  two_factor.model_interference <-
'
                       color_word =~ a*CRScore + a*WRScore
                       cw_int =~ b*CWRscore + b*Interference

                        CRScore ~~ CRScore
                        WRScore ~~ WRScore
                        CWRscore ~~ CWRscore
                        Interference ~~ Interference
'
  
  #  Fit the model
  fit_two_factor_int <- lavaan::cfa(two_factor.model_interference, data = adult)
  # summary <- summary(fit, fit.measures=FALSE)$PE
  
  # Parameter Estimates- Not used here with additional factor in above equations or to output df
  # pe <- parameterEstimates(fit_two_factor_int, ci = TRUE, level = 0.95)[1:4,]
  
  # Factor Loadings
    sl <- standardizedSolution(fit_two_factor_int)
    sl <- sl$est.std[sl$op == "=~"]
 # names(sl) <- names(raw_df)

  # # Output DF of Parameter Estimates
  # pe %>%
  #   cbind.data.frame(., sl)[,-4] %>%
  #   round_df(.,2) %>%
  # kable(caption = "Parameter Estimates", row.names = F, col.names = c("Factor", "Contains", "Variable", "Estimate", "SE", "Z", "P-Value", "CI Low", "CI Upper", "Factor Loadings")) %>%
  # kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
  # 
  # # For interpretive report below
  # pe_no_na <- pe[complete.cases(pe),]
  # fit_measures <- fitMeasures(fit)
  
  # Print SEM Paths
  semPaths(fit_two_factor_int,"std", edge.label.cex = 1, edge.label.color = "black", exoVar = FALSE,
           exoCov = FALSE, nodeLabels = c(score_names_int, "Processing Speed", "Stroop"), sizeMan = 15, sizeInt = 10, sizeLat = 17, label.cex = 1.3)
  title(main = figure_counter_func("SEM Paths"), line = 2)

# Compute residual variance of each item
re <- 1 - sl^2

# Compute composite reliability
composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
as.data.frame(reliability(fit_two_factor_int, return.total = T)[1:2,]) %>%
  round_df(., 3) %>%
  kable(col.names = c("Processing Speed", "Stroop", "Factors Combined"), caption = table_counter_func("Stroop Two-Factor SEM w/ Interference")) %>%
  add_header_above(c(" " = 1, "Factor" = 2, " " = 1)) %>%
  column_spec(4, border_left = "1px solid gray") %>%
  kable_options()
Table 23: Stroop Two-Factor SEM w/ Interference
Factor
Processing Speed Stroop Factors Combined
alpha 0.75 0.849 0.698
omega 0.76 0.903 0.885
# <!-- The model was found to be of significantly good fit (χ2(`r round(fit_measures[names(fit_measures) == "df"], 0)`)=`r paste0(round(fit_measures[names(fit_measures) == "chisq"],2), ", p= ", round(fit_measures[names(fit_measures) == "pvalue"], 4))`).    -->
# 
# <!-- The parameter estimates for each variable's fit (rhs column) within the assigned factor (lhs) are given in the table.  `r ifelse(nrow(pe_no_na[pe_no_na$pvalue >= .05,]) > 0, print(paste("The following variables were found to not fit within the assigned factor- ", pe_no_na$lhs[pe_no_na$pvalue >= .05])), print("No variables were found to not fit factors they were assigned.")) ` -->
#   # Two Factor Model with no Interference
#   two_factor.model_no_interference <- '
#                        color_word =~ CRScore + WRScore
#                        cw =~ CWRscore
# 
#                         CRScore ~~ CRScore
#                         WRScore ~~ WRScore
#                         CWRscore ~~ 0*CWRscore
# '
#  
#   #  Fit the model
#   fit_two_factor_no_int <- lavaan::cfa(two_factor.model_no_interference, data = adult)
#   summary <- summary(fit_two_factor_no_int, fit.measures=FALSE)$PE
#   
#   # Parameter Estimates- Not used here with additional factor in above equations or to output df
#   pe <- parameterEstimates(fit_two_factor_no_int, ci = TRUE, level = 0.95)[1:3,]
#   
#   # Factor Loadings
#     sl <- standardizedSolution(fit_two_factor_no_int)
#     sl <- sl$est.std[sl$op == "=~"]
#   names(sl) <- score_names
# 
#   # Output DF of Parameter Estimates
#   pe %>%
#     cbind.data.frame(., sl) %>%
#     round_df(.,2) %>%
#   kable(caption = "Parameter Estimates", row.names = F, col.names = c("Factor", "Contains", "Variable", "Estimate", "SE", "Z", "P-Value", "CI Low", "CI Upper", "Factor Loadings")) %>%
#   kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
# 
#   # For interpretive report below
#   pe_no_na <- pe[complete.cases(pe),]
#   fit_measures <- fitMeasures(fit_two_factor_no_int)
#   
#   # Print SEM Paths
#   semPaths(fit_two_factor_no_int,"std", edge.label.cex = 0.5, exoVar = FALSE,
#            exoCov = FALSE)
# 
# # Compute residual variance of each item
# re <- 1 - sl^2
# 
# # Compute composite reliability
# composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
# 
# # Compute composite reliability
# composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
# as.data.frame(reliability(fit_two_factor_no_int, return.total = T)[1:2,]) %>%
#   round_df(., 4) %>%
#   kable(col.names = c("Color and Word", "Total")) %>%
#    add_header_above(c(" " = 1, "Factor" = 1, " " = 1)) %>%
#   column_spec(3, border_left = "1px solid gray") %>%
#   kable_options()

Reliability

Test-Retest

# Give maximum length of time allowed for test-retest
time_limit <- 50

To assess test-retest reliability a number of participants were re-administered the Stroop up to 50 days between initial test (“Test”) and retest (“Retest”). Information about number of participants, length of time between test and retest, other information, and the impact of these factors on scores is presented below.

# ICC
## Compute with irr
# Note that, the two-way mixed-effects model and the absolute agreement are recommended for both test-retest and intra-rater reliability studies (Koo et al., 206).

# Create df with test-retest for each scale
library("irr")

icc_list <- list(cbind.data.frame(adult_retest$CRScore.x, adult_retest$CRScore.y),cbind.data.frame(adult_retest$WRScore.x, adult_retest$WRScore.y), cbind.data.frame(adult_retest$CWRscore.x, adult_retest$CWRscore.y), cbind.data.frame(adult_retest$Interference.x, adult_retest$Interference.y)) 

icc_output_list <- lapply(icc_list, function(df_list){
  icc(
    df_list, model = "twoway", 
    type = "consistency", unit = "single"
    )
})
names(icc_output_list) <- scale_list_int

# # Using ICC in psych- not used here
# icc_psych_output_list <- lapply(icc_list, function(df_list){
#   ICC(
#     df_list
#     )
# })
# names(icc_psych_output_list) <- scale_list_int

# Using test-retest in psych
adult$time <- rep(1, nrow(adult))
retest$time <- rep(2, nrow(retest))

adult_long <- adult %>%
  subset(case_id %in% retest$case_id) %>%
  dplyr::select(case_id, time, CRScore, WRScore, CWRscore, Interference) #%>%
  # pivot_longer(
  #   cols = ends_with("core"),
  #   names_to = "raw_scores",
  #   values_to = "values"
  # )

retest_long <- retest %>%
  dplyr::select(case_id, time, CRScore, WRScore, CWRscore, Interference)

adult_retest <- bind_rows(adult_long, retest_long)
over_time_descriptives <- describeBy(adult_retest,group="time", mat = T)
over_time_descriptives %>% 
  subset(., vars > 2) %>%
  mutate(time = recode(group1, '1' = "Test", '2' = "Retest"),
         scale = rep(score_names_int, each = 2)) %>%
  mutate(scale = ifelse(duplicated(scale), "", scale),
         n = ifelse(duplicated(n), "", n)) %>% #,
         # time = recode(time, '1' = "Test", '2' = "Retest")) %>%
  dplyr::select(scale, time, n, mean, sd, se, median, min, max, skew, kurtosis) %>%
    mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("Test-Retest Statistics")) %>%
  kable_options()
Table 24: Test-Retest Statistics
scale time n mean sd se median min max skew kurtosis
CRScore1 Color Raw Score Test 48 70.729 13.137 1.896 70.5 31 98 -0.435 0.445
CRScore2 Retest 72.083 13.388 1.932 70.5 46 100 0.257 -0.519
WRScore1 Word Raw Score Test 102.812 16.283 2.350 100.5 60 139 0.058 -0.026
WRScore2 Retest 103.875 15.050 2.172 100.5 80 140 0.595 -0.358
CWRscore1 Color-Word Raw Score Test 39.271 10.790 1.557 39.0 12 61 -0.155 -0.405
CWRscore2 Retest 41.396 10.582 1.527 41.0 20 63 0.207 -0.539
Interference1 Interference Test -2.396 7.371 1.064 -2.0 -20 10 -0.357 -0.485
Interference2 Retest -0.917 9.055 1.307 0.0 -28 19 -0.366 0.639
# test_retest_ml <- mlr(adult_retest,grp="case_id",Time="time",items=3:5) 

Intraclass correlation coefficients (ICC) are presented as measures of test-retest reliability. Two-way mixed-effects model and the absolute agreement to derive intraclass correlation coefficients were used. The following table presents ICC statistics for each scale.

lapply(icc_output_list,function(out_list) {data.frame(
    val = paste0(round(out_list$value,2)," (", round(out_list$lbound,2), " - ", round(out_list$ubound,2), ")"),
    f_val = round(out_list$Fvalue, 2),
    p_val = round(out_list$p.value, 2)
  )
}) %>%
  bind_rows() %>%
  mutate(scales = score_names_int) %>%
  dplyr::select(scales, everything()) %>%
  kable(col.names = c("Scale", "ICC Value (95% CI)", "F", "P"), caption = table_counter_func("ICC Table Test-Retest")) %>%
  kable_options()
Table 25: ICC Table Test-Retest
Scale ICC Value (95% CI) F P
Color Raw Score 0.82 (0.7 - 0.9) 10.12 0
Word Raw Score 0.9 (0.83 - 0.94) 19.50 0
Color-Word Raw Score 0.68 (0.49 - 0.81) 5.25 0
Interference 0.5 (0.26 - 0.69) 3.02 0

Concordance Correlation Coefficients were also produced as a similar measure of test-retest reliability.

Concordance Correlation

library(rjags)
library(agRee)
library(BlandAltmanLeh)

cc_matrix_list <- lapply(raw_score_column_names_int, function(scale){
    scale <- enquo(scale)
    adult_retest %>%
    dplyr::select(case_id, time, !!scale) %>%
    pivot_wider(names_from = time, values_from = !!scale) %>%
    rename(test = `1`, retest = `2`) %>%
    dplyr::select(test, retest) %>%
     filter(abs(test - retest) < 50) %>%
    data.matrix()
})
  # agree.ccc()
  cc_output <- lapply(cc_matrix_list, function(mat) agree.ccc(mat))
  do.call(bind_rows, lapply(cc_output, unlist)) %>% 
    round(., 3) %>%
    cbind.data.frame(score_names_int, .) %>%
    kable(col.names = c("Scale", "Concordance Correlation", "Lower Bound", "Upper Bound"), caption = table_counter_func("Concordance Correlation")) %>%
    add_header_above(c(" " = 2, "95% CI" = 2)) %>%
    kable_options()
Table 26: Concordance Correlation
95% CI
Scale Concordance Correlation Lower Bound Upper Bound
Color Raw Score 0.810 0.635 0.906
Word Raw Score 0.890 0.696 0.963
Color-Word Raw Score 0.668 0.464 0.805
Interference 0.492 0.235 0.685

Bland-Altman and Means and Differences plots were produced to assess test-retest values. Bland-Altman plots show individual test-retest differences and a confidence interval band around the mean. Plots with a number of values falling outside this interval can suggest problems with test-retest reliability.

Means and Differences plots are a plot with test and retest values as coordinates. A fitted line is compared to a unity line (a line representing perfect test to retest correspondence) to compare agreement of test-retest values.

  # agree.plot(cc_matrix_list[[1]])
cat("<div class = 'row h-100'>")
  cat("<div class = 'col-md-6'>")
    cat("<h3>Bland-Altman Plot</h3>")

Bland-Altman Plot

  cat("</div>")
  cat("<div class = 'col-md-6'>")
    cat("<h3>Means and Differences Scatterplot</h3>")

Means and Differences Scatterplot

  cat("</div>")
cat("</div>")
  lapply(1:length(cc_matrix_list), function(i) {
      cat("<div class = 'row h-100'>")
        cat(paste("<h4>", score_names_int[i], "</h4>"))
        cat("<div class = 'col-md-6'>")
          print(bland.altman.plot(cc_matrix_list[[i]][,1], cc_matrix_list[[i]][,2], graph.sys="ggplot2"))
        cat("</div>")
        cat("<div class = 'col-md-6'>")
        print(
          cc_matrix_list[[i]] %>%
          as.data.frame() %>%
          ggplot(., aes(x = test, y = retest)) +
          geom_point() + 
          geom_smooth(aes(linetype = "Fitted Line"), method = "lm",se = F) +
          geom_abline(aes(linetype = "Unity Line" , intercept = 0, slope = 1))
        )
        cat("</div>")
      cat("</div>")
  })

Color Raw Score

Word Raw Score

Color-Word Raw Score

Interference

[[1]] NULL

[[2]] NULL

[[3]] NULL

[[4]] NULL

Each of the scales had significant ICC statistics, with moderate to strong correlation. However, it was also desired to ascertain the effect the length of time had on test and retest scores, which can impact the correlation, as well as have practical ramifications about practice effects and length of time between retesting.

To examine the effect of time on test-retest results:

  1. Plots were constructed that show the change in score from Test to Retest across the time between tests (“Plot of Change in Score Over Time Between Tests”),
  2. Mixed-effects models were constructed with Stroop scores as outcome variables and test type (test or retest) as predictor variable, with length of time between tests and within participant effects included in models and controlled for. Main effects of these models were examined, that is, whether there was a difference between test and retest scores controlling for those variables (“Main Effects of Differences Between Test and Retest”), and
  3. The interaction between participant and length of time between tests were examined for significance (“Random Effects Likelihood Ratio Test”).
# Interference retest wide
  cat("<div class = 'row h-100'>")
     cat("<div class = 'col-md-6'>")
      cat(figure_counter_func("Plot of Score Change Over Time Between Tests"))

Figure 19: Plot of Score Change Over Time Between Tests

     cat("</div>")
     cat("<div class = 'col-md-6'>")
      cat(table_counter_func("Score Change Over Time Test Results"))

Table 27: Score Change Over Time Test Results

     cat("</div>")
    cat("</div>")
mapply(function(scale, scale_name){
  re <- retest %>%
   dplyr::mutate(exam_date_retest= as.Date(paste(EVyear, EVmonth, EVday, sep = "/")),
          retest_score = !!scale
   ) %>%
    dplyr::select(case_id, exam_date_retest, retest_score)
  ad <- adult %>%
   dplyr::mutate(exam_date_test= as.Date(paste(EVyear, EVmonth, EVday, sep = "/")),
          test_score = !!scale
   ) %>%
    dplyr::select(case_id, exam_date_test, test_score)
  
  re_ad_wide <- inner_join(re, ad, by = "case_id") %>%
    dplyr::mutate(retest_time = (age_calc(exam_date_test, enddate = exam_date_retest, units = "days")),
           test_change = retest_score - test_score)
   cat(paste0("<h4>",scale_name, "</h4>"))
   cat("<div class = 'row h-100'>")
     cat("<div class = 'col-md-6'>")
    
     print(re_ad_wide %>%
      dplyr::filter(retest_time <= time_limit) %>%
        ggplot(., aes(retest_time, test_change)) +
      geom_point() +
      geom_smooth() + 
        xlab("Time Between Test and Retest") +
        ylab("Score Change Between Test and Retest") +
        ggtitle("Plot of Change in Score \n Over Time Between Tests")-> p)
    cat("</div>")
   cat("<div class = 'col-md-6'>")
   # Interference retest long
re <- retest %>%
 dplyr::mutate(exam_date= as.Date(paste(EVyear, EVmonth, EVday, sep = "/")),
        test_status = rep("retest", nrow(.)),
                          result = !!scale) %>%
  dplyr::select(case_id, exam_date, result, test_status)
ad <- adult %>%
 dplyr::mutate(exam_date = as.Date(paste(EVyear, EVmonth, EVday, sep = "/")),
        test_status = rep("test", nrow(.)),
                          result = !!scale) %>%
  dplyr::select(case_id, exam_date, result, test_status)

re_ad_long <- bind_rows(re,ad)

    lmer(result ~ test_status +  (1|exam_date) + (1|case_id) + (1|exam_date:case_id), data = re_ad_long, REML = F) -> mod_full
    lmer(result ~ 1 +  (1|exam_date) + (1|case_id) + (1|exam_date:case_id), data = re_ad_long, REML = F) -> mod_reduced
    anova_result <- anova(mod_full, mod_reduced)
    # reduced model is the right choice for all
    rand_eff <- lmerTest::ranova(mod_reduced)
    dat_df <- cbind.data.frame(chi = round(anova_result$Chisq[2],2), df = anova_result$Chisq[-1], p = round(anova_result$`Pr(>Chisq)`[2], 2))
    ft <- table_output_func(dat_df, colnames = c("Chisq", "Df", "Pr(>Chisq)"), caption = "Main Effects of Difference between Test and Retest")
    cat(knit_print(ft))
    
    ### Checks difference in score over time
    
    re_ad_wide <- inner_join(ad, re, by = "case_id", suffix = c("_test", "_retest"))
re_ad_wide <- re_ad_wide %>%
  rowwise() %>%
  mutate(test_difference = result_retest - result_test,
         date_difference = exam_date_retest - exam_date_test)
re_ad_wide %>%
  ggplot(., aes(x = date_difference, y = test_difference)) +
  geom_smooth()

mod1 <- lm(test_difference ~ date_difference, data = re_ad_wide)
mod2 <- lm(test_difference ~ 1, data = re_ad_wide)
lrtest(mod1, mod2)
      # kable(col.names = c("Chisq", "Df", "Pr(>Chisq)"), caption = "Main Effects of Difference between Test and Retest") %>%
      # kable_options())
  #   print(cbind.data.frame(names(attributes(rand_eff)$formulae), round(rand_eff$LRT[-1],3), rand_eff$Df[-1], round(rand_eff$`Pr(>Chisq)`[-1],3))[3,] %>%
  #           flextable()%>% 
  # theme_zebra() %>% 
  # autofit())
            #kable(col.names = c("Random Effect", "Likelihood Ratio Test", "Df", "Pr(>Chisq)"), caption = "Random Effects Likelihood Ratio Test", row.names = F) %>%
            #kable_options())
    dat_df <- cbind.data.frame(names(attributes(rand_eff)$formulae), round(rand_eff$LRT[-1],3), rand_eff$Df[-1], round(rand_eff$`Pr(>Chisq)`[-1],3))[3,] 
    ft <- table_output_func(dat_df, colnames = c("Random Effect", "Likelihood Ratio Test", "Df", "Pr(>Chisq)"))
    cat(knit_print(ft))
    # names1 <- names(dat_df)
    # names2 <- c("Random Effect", "Likelihood Ratio Test", "Df", "Pr(>Chisq)")
    # ft = flextable(dat_df)
    # ft <- set_header_df(x = ft, mapping = data.frame(keys = names1, values = names2, stringsAsFactors = FALSE),
    #                  key = "keys" )
    # print(theme_booktabs(ft)) 
   cat("</div>")
 cat("</div>")
}, scale_score_int, score_names_int)

Color Raw Score

Main Effects of Difference between Test and Retest
Chisq Df Pr(>Chisq)
0.22 0.2155431 0.64
Random Effect Likelihood Ratio Test Df Pr(>Chisq)
(1 | exam_date:case_id) 0.422 1 0.516

Word Raw Score

Main Effects of Difference between Test and Retest
Chisq Df Pr(>Chisq)
2.07 2.066951 0.15
Random Effect Likelihood Ratio Test Df Pr(>Chisq)
(1 | exam_date:case_id) 2.881 1 0.09

Color-Word Raw Score

Main Effects of Difference between Test and Retest
Chisq Df Pr(>Chisq)
0.11 0.1091937 0.74
Random Effect Likelihood Ratio Test Df Pr(>Chisq)
(1 | exam_date:case_id) 0 1 1

Interference

Main Effects of Difference between Test and Retest
Chisq Df Pr(>Chisq)
0.03 0.0342477 0.85
Random Effect Likelihood Ratio Test Df Pr(>Chisq)
(1 | exam_date:case_id) 0 1 1

[[1]] NULL

[[2]] NULL

[[3]] NULL

[[4]] NULL

As can be seen in the above results, there were no significant differences between Test and Retest values for any Stroop scores, when controlling for time, participant factors, and that interaction. There were also not significant differences in score outcomes when testing for the interaction between length of time between test and retest and participant for any scores. The implications of both of these results is that Stroop scores seem to hold constant over time within individuals, and there do not seem to be practice effects, or changes in scores at retest points, at least within about 2 months time (50 days).

Validity

A number of participants were administered multiple instruments to assess the concurrent validity of the Stroop with other measures. The measures and specific scales used were reviewed in the “Measures” section above.

Previous Edition

None of the current Stroop stimulus content or administration procedures were changed from the previous edition. As such, there was no attempt to evaluate the previous and current revised edition.

All Measures

All Measures Correlation Matrix

The following correlation matrix presents the correlation of all measures used in these analyses.

ad_al <- adult_all %>% dplyr::select(CRScore.x, WRScore.x, CWRscore.x, Interference.x, LW , WA, cc_ratio_t, ci_ratio_t, Stroop, total) %>%
  correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower')
dimnames(ad_al)<- rep(list(all_names), 2) 
ad_al %>% kable(caption = table_counter_func("NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")) %>% kable_options()
Table 28: NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
Color Word Color-Word Interference Letter-Word Word Attack CC Ratio CI Ratio Nonverbal Stroop Trails Total
Color
Word 0.57***
Color-Word 0.62*** 0.47***
Interference -0.05 -0.11 0.71***
Letter-Word 0.22* 0.36** 0.33** 0.15
Word Attack 0.17 0.30** 0.22* 0.07 0.57***
CC Ratio 0.17 0.18 0.23* 0.11 -0.02 -0.00
CI Ratio 0.38*** 0.31** 0.43*** 0.20 0.07 0.11 0.38***
Nonverbal Stroop -0.26* -0.16 -0.26* -0.12 -0.09 -0.12 0.45*** -0.64***
Trails Total 0.42*** 0.29** 0.32** 0.02 0.23* 0.15 0.14 0.33** -0.22*

All Measures Graphical Representation of Correlation using Princiapal Components Analysis- 2d

The following is a graphical representation of correlation using Principal Components Analysis of all measures. Points closer in space represent closer correlations.

# mdspca
adult_all %>% dplyr::select(CRScore.x, WRScore.x, CWRscore.x, Interference.x, LW , WA, cc_ratio_t, ci_ratio_t, Stroop, total)%>% setNames(all_names) %>% mdspca(namesupsubj = all_names)
title(figure_counter_func("PCA All Scales"))

All Measures Three Factor PCA Model

A Prinicpal Components Analysis was conducted to examine how measures related and formed separate factors. A three factor model was chosen after analyses indicated the Stroop tended to form at least two factors, given the presence of other assessments. The results below suggest the presence of three factors that tended to group together with characteristics represented by factors names below. Further analyses of the Stroop with individual measures explores the association of these measures, as follows.

fit_three_factor <- adult_all %>% dplyr::select(CRScore.x, WRScore.x, CWRscore.x, Interference.x, LW , WA, cc_ratio_t, ci_ratio_t, Stroop, total) %>% principal(., nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names, c("Stroop", "Fluency", "Nonverbal Stroop")))) %>% kable(caption = table_counter_func("All Measures Three Factor PCA Model Loadings")) %>% kable_options()
Table 29: All Measures Three Factor PCA Model Loadings
Stroop Fluency Nonverbal.Stroop
Color 0.438 0.477 -0.009
Word 0.188 0.687 0.066
Color-Word 0.806 0.196 0.090
Interference 0.689 -0.310 0.089
Letter-Word -0.105 0.792 0.012
Word Attack -0.170 0.771 -0.032
CC Ratio 0.383 0.002 0.796
CI Ratio 0.806 -0.048 -0.240
Nonverbal Stroop -0.477 0.032 0.884
Trails Total 0.338 0.356 -0.067

WJ

Stroop scores were compared with those obtained from Letter-Word (LW) and Word-Attack (WA) andThere were 18 cases that were identified in the WJ dataset that didn’t have a matching case in the Stroop dataset. The number of cases that matched were 304. There were no incomplete cases.

WJ-Stroop Correlation

The following analyses explore the correlation of the WJ subtests with Stroop scales. Review of analyses will be discussed as follows:

WJ-Stroop Correlation Matrix
# Stroop WJ Correlation Matrix
wj_mat <- adult_wj %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, LW , WA) %>%
  correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower')
dimnames(wj_mat)<- list(all_names[1:6], all_names[1:6]) 
wj_mat %>% kable(caption = table_counter_func("NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")) %>% kable_options()
Table 30: NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
Color Word Color-Word Interference Letter-Word Word Attack
Color
Word 0.64***
Color-Word 0.60*** 0.47***
Interference 0.03 -0.06 0.74***
Letter-Word 0.27*** 0.37*** 0.30*** 0.15*
Word Attack 0.29*** 0.37*** 0.32*** 0.15** 0.63***
WJ-Stroop Graphical Representation of Correlation using Princiapal Component Analysis- 2d
# mdspca
adult_wj %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, LW , WA) %>% setNames(all_names[1:6]) %>% mdspca()
title(figure_counter_func("PCA Stroop WJ"))

WJ-Stroop Graphical Representation of Correlation using Princiapal Component Analysis- 2d
# sphpca
adult_wj %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, LW , WA) %>% setNames(all_names[1:6]) %>% sphpca()
title(figure_counter_func("\n PCA Stroop WJ 3d"), line = 2)

WJ-Stroop Principal Components Analysis (PCA)

WJ-Stroop Scree Plot
adult_wj_short <- adult_wj %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, LW , WA) %>% as.data.frame()
scree.plot(adult_wj_short,simu=20,use="P", title = figure_counter_func("\n Stroop WJ Scree Plot"))
title(figure_counter_func("\n Stroop WJ Scree Plot"), line = 2)

WJ-Stroop Two Factor PCA Model
fit_two_factor <- principal(adult_wj_short, nfactors = 2, rotate="promax")
data.frame(matrix(round(as.numeric(fit_two_factor$loadings), 3), attributes(fit_two_factor$loadings)$dim, dimnames=list(all_names[1:6], c("Processing Speed", "Stroop")))) %>% kable() %>% kable_options()
Processing.Speed Stroop
Color 0.760 0.036
Word 0.871 -0.150
Color-Word 0.362 0.785
Interference -0.225 1.030
Letter-Word 0.701 0.000
Word Attack 0.701 0.018
WJ-Stroop Three Factor PCA Model
fit_three_factor <- principal(adult_wj_short, nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names[1:6], c("Processing Speed", "Stroop", "Reading Fluency")))) %>% kable() %>% kable_options()
Processing.Speed Stroop Reading.Fluency
Color 0.948 0.021 -0.090
Word 0.882 -0.162 0.118
Color-Word 0.450 0.770 -0.010
Interference -0.243 1.024 0.027
Letter-Word -0.006 0.006 0.904
Word Attack 0.006 0.024 0.890

As can be seen in the above analyses, the Stroop and NSCST subtests had significant correlations. The scales tended to form two or three groups. There was a group consisting of Stroop, Interference, and Nonverbal Stroop, termed “Stroop” and the Color, Word, CC Ratio, and CI Ratio tasks that formed a group, called “Processing Speed.” These Processing Speed tasks require efficient processing of written language and color. When forced to have three factors, the CI Ratio and Nonverbal Stroop scores formed a separate factor that seemed to suggest nonverbal Stroop processes are different than verbal ones. The scree plot supported a factor structure of two or three factors.

WJ-Stroop Multi-trait Multimethod (MTMM)

# MTMM Stroop with Lavaan- not used here, use psy pack instead

# mtmm_basic <- "
# T1 =~ Interference
# T2 =~ CRScore + WRScore + CWRscore
# M1 =~ 1*Interference + 1*CRScore
# M2 =~ 1*Interference + 1*WRScore
# M3 =~ 1*Interference + 1*CWRscore
# 
# T1~~1*T1
# T2~~1*T2
# 
# T1~~T2
# "
# fit_mtmm_basic <- lavaan(mtmm_basic, data = adult, missing = "ml",
# auto.fix.first = FALSE, auto.var = TRUE)
# summary(fit_mtmm_basic, standardized = TRUE)


# MTMM with Interference, two scales
par(mfrow=c(2,1))
mtmm(adult_wj_short,list(c("CRScore", "WRScore", "CWRscore", "Interference"), c("LW", "WA")), graphItem = T, itemTot = F) %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("WJ-Stroop MTMM Two Factors")) %>%
  kable_options()
Table 31: WJ-Stroop MTMM Two Factors
Item ScaleI Scale 1 Scale 2
1 CRScore 1 0.618 0.308
2 WRScore 1 0.487 0.405
3 CWRscore 1 0.803 0.344
4 Interference 1 0.224 0.164
9 LW 2 0.379 0.626
10 WA 2 0.391 0.626

The above Multitrait Multimethod analysis, with Strop and WJ subtests each assigned to two factors respectively, shows that Interference does not fit great with either factor, showing it has properties different from either.

WJ-Stroop Confirmatory Factor Analysis (CFA)

Confirmatory Factor Analysis was conducted with Stroop and WJ scales. As above analyses suggested two and three factor models were appropriate, CFA models were fit with those number of factors. It was found that a three factor model with Interference in its own factor was the best fit.

# CFA Stroop

# CFA Stroop WJ
two_factor_cfa_stroop_wj <- "
stroop_scales =~ 1* CRScore + WRScore + CWRscore + Interference
wj_scales =~ 1*LW + WA
"
# NA's from below model
fit_two_factor_cfa_stroop_wj<- cfa(two_factor_cfa_stroop_wj, data = adult_wj, missing = "ml")
# summary(fit_two_factor_cfa_stroop_wj, standardized = TRUE)


three_factor_stroop_wj <- "
stroop_color_word =~ 1* CRScore + WRScore
stroop_incongruent_interference =~ 1* CWRscore + Interference
wj_scales =~ 1*LW + WA
"
fit_three_factor_stroop_wj<- cfa(three_factor_stroop_wj, data = adult_wj, missing = "ml")
# summary(fit_three_factor_stroop_wj, standardized = TRUE)

# Interference didn't fit well

three_factor_stroop_wj_interference_alone <- "
stroop_color_word_incongruent =~ 1* CWRscore + CRScore + WRScore
stroop__interference =~ 1* Interference
wj_scales =~ 1*LW + WA
"
fit_three_factor_stroop_wj_interference_alone<- cfa(three_factor_stroop_wj_interference_alone, data = adult_wj, missing = "ml")
# summary(fit_three_factor_stroop_wj_interference_alone, standardized = TRUE)

fitMeasures(fit_three_factor_stroop_wj_interference_alone, c("chisq", "df", "pvalue")) %>% data.frame() %>% t() %>% 
  kable(caption = table_counter_func("WJ-Stroop Three Factor CFA Fit"), col.names = c("Chisq", "Df", "P")) %>%
  kable_options()
Table 32: WJ-Stroop Three Factor CFA Fit
Chisq Df P
. 217.0154 7 0
parameterEstimates(fit_three_factor_stroop_wj_interference_alone)[1:(fitMeasures(fit_three_factor_stroop_wj_interference_alone, "df")-1),] %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("WJ-Stroop Three Factor CFA Estimates")) %>%
  kable_options()
Table 33: WJ-Stroop Three Factor CFA Estimates
lhs op rhs est se z pvalue ci.lower ci.upper
stroop_color_word_incongruent =~ CWRscore 1.000 0.000 NA NA 1.000 1.000
stroop_color_word_incongruent =~ CRScore 0.199 0.063 3.164 0.002 0.076 0.323
stroop_color_word_incongruent =~ WRScore 0.153 0.053 2.884 0.004 0.049 0.257
stroop__interference =~ Interference 1.000 0.000 NA NA 1.000 1.000
wj_scales =~ LW 1.000 0.000 NA NA 1.000 1.000
wj_scales =~ WA 0.788 0.282 2.790 0.005 0.234 1.341

Nonverbal Stroop Card Sorting Test (NSCST)

The NSCST was used for convergent validity. There were 86 cases that were identified in the NSCST dataset that didn’t have a matching case in the Stroop dataset. The number of cases that matched were 189. There were no incomplete cases.

NSCST-Stroop Correlation

The following analyses explore the correlation of the NSCST subtests with Stroop scales. Review of analyses will be discussed as follows:

NSCST-Stroop Correlation Matrix
# Stroop NSCST Correlation Matrix
nscst_mat <- adult_nscst %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, cc_ratio_t, ci_ratio_t, nscst_stroop_t) %>%
  correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower')
dimnames(nscst_mat)<- list(all_names[c(1:4, 7:9)], all_names[c(1:4, 7:9)]) 
nscst_mat %>% kable(caption = "NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001") %>% kable_options()
NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
Color Word Color-Word Interference CC Ratio CI Ratio Nonverbal Stroop
Color
Word 0.57***
Color-Word 0.64*** 0.34***
Interference 0.06 -0.20** 0.71***
CC Ratio 0.15* 0.50*** -0.06 -0.28***
CI Ratio 0.31*** 0.38*** 0.29*** 0.09 0.40***
Nonverbal Stroop 0.10 -0.18* 0.29*** 0.35*** -0.64*** 0.42***
Graphical Representation of Correlation using Princiapal Component Analysis- 2d
# mdspca
adult_nscst %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, cc_ratio_t, ci_ratio_t, nscst_stroop_t) %>% setNames(all_names[c(1:4, 7:9)]) %>% mdspca()
title(figure_counter_func("Nonverbal Stroop Correlation"))

Graphical Representation of Correlation using Princiapal Component Analysis- 3d
# sphpca
adult_nscst %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, cc_ratio_t, ci_ratio_t, nscst_stroop_t) %>% setNames(all_names[c(1:4, 7:9)]) %>% sphpca()
title(figure_counter_func("\n Nonverbal Stroop \n Correlation 3d"), line = 1)

NSCST-Stroop Principal Components Analysis (PCA)

NSCST-Stroop Scree Plot
# Exploratory
adult_nscst_short <- adult_nscst %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, cc_ratio_t, ci_ratio_t, nscst_stroop_t) %>% as.data.frame()
fit <- principal(adult_nscst_short, rotate="promax")
scree.plot(adult_nscst_short,simu=20,use="P", title = figure_counter_func("Nonverbal Stroop Scree Plot"))

Two Factor PCA Model
fit_two_factor <- principal(adult_nscst_short, nfactors = 2, rotate="promax")
data.frame(matrix(round(as.numeric(fit_two_factor$loadings), 3), attributes(fit_two_factor$loadings)$dim, dimnames=list(all_names[c(1:4, 7:9)], c("Processing Speed", "Stroop")))) %>% kable() %>% kable_options()
Processing.Speed Stroop
Color 0.800 0.076
Word 0.802 -0.387
Color-Word 0.694 0.548
Interference 0.165 0.760
CC Ratio 0.499 -0.757
CI Ratio 0.661 0.074
Nonverbal Stroop 0.048 0.805
Three Factor PCA Model
fit_three_factor <- principal(adult_nscst_short, nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names[c(1:4, 7:9)], c("Processing Speed", "Stroop", "Nonverbal Stroop")))) %>% kable() %>% kable_options()
Processing.Speed Stroop Nonverbal.Stroop
Color 0.527 0.590 0.076
Word 0.827 0.194 0.095
Color-Word 0.151 0.976 -0.019
Interference -0.365 0.820 -0.092
CC Ratio 0.860 -0.194 -0.088
CI Ratio 0.368 -0.129 0.883
Nonverbal Stroop -0.547 0.074 0.823

NSCST-Stroop Multi-trait Multimethod (MTMM)

# MTMM Stroop with Lavaan- not used here, use psy pack instead

# mtmm_basic <- "
# T1 =~ Interference
# T2 =~ CRScore + WRScore + CWRscore
# M1 =~ 1*Interference + 1*CRScore
# M2 =~ 1*Interference + 1*WRScore
# M3 =~ 1*Interference + 1*CWRscore
# 
# T1~~1*T1
# T2~~1*T2
# 
# T1~~T2
# "
# fit_mtmm_basic <- lavaan(mtmm_basic, data = adult, missing = "ml",
# auto.fix.first = FALSE, auto.var = TRUE)
# summary(fit_mtmm_basic, standardized = TRUE)


# MTMM with Interference, two scales
par(mfrow=c(1,2))
mtmm(adult_nscst_short,list(c("CRScore", "WRScore", "CWRscore", "cc_ratio_t", "ci_ratio_t"), c("Interference", "nscst_stroop_t")), graphItem = T, itemTot = T) %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("Stroop-NSCST MTMM Two Scales")) %>%
  kable_options()
Table 34: Stroop-NSCST MTMM Two Scales
Item ScaleI Scale 1 Scale 2
1 CRScore 1 0.616 0.098
2 WRScore 1 0.673 -0.235
3 CWRscore 1 0.427 0.598
4 cc_ratio_t 1 0.336 -0.565
5 ci_ratio_t 1 0.470 0.322
11 Interference 2 0.057 0.349
12 nscst_stroop_t 2 -0.076 0.349
# # MTMM with Interference, three scales
# par(mfrow=c(1,3))
# mtmm(adult_nscst_short,list(c("WRScore", "cc_ratio_t"), c("CRScore", "CWRscore", "Interference"), c("ci_ratio_t", "nscst_stroop_t")), graphItem = T, itemTot = T) %>%
#   mutate_if(is.numeric, round, 3) %>%
#   kable(caption = table_counter_func("WJ-Stroop MTMM Three Scales")) %>%
#   kable_options()

The processing speed and Stroop scales were grouped into separate factors. Color-Word seemed to be have correspondence with both factors.

Stroop-NSCST Confirmatory Factor Analysis (CFA)

Using above information, CFA was conducted. Three scales were fit, Verbal Speed, Nonverbal Speed, and Interference. Interference represented Stroop and Nonverbal Stroop scores. The other scales from Stroop made up Verbal Speed and Nonverbal Stroop in Nonverbal Speed.

# CFA Stroop

# CFA Stroop NSCST
two_factor_cfa_stroop_nscst <- "
speed_scales =~ 1* CRScore + WRScore + CWRscore + cc_ratio_t + ci_ratio_t
interference_scales =~ 1*Interference + nscst_stroop_t
"
fit_two_factor_cfa_stroop_nscst<- cfa(two_factor_cfa_stroop_nscst, data = adult_nscst, missing = "ml")
# summary(fit_two_factor_cfa_stroop_nscst, standardized = TRUE)

# CFA Stroop NSCST Speed Broken into verbal and non
three_factor_cfa_stroop_nscst <- "
verbal_speed_scales =~ 1* CRScore + WRScore + CWRscore
nonverbal_speed_scales =~ 1*cc_ratio_t + ci_ratio_t
interference_scales =~ 1*Interference + nscst_stroop_t
"
fit_three_factor_cfa_stroop_nscst<- cfa(three_factor_cfa_stroop_nscst, data = adult_nscst, missing = "ml")

fitMeasures(fit_three_factor_cfa_stroop_nscst, c("chisq", "df", "pvalue")) %>% data.frame() %>% t() %>%
  kable(caption = table_counter_func("NSCST-Stroop Three Factor CFA Fit"), col.names = c("Chisq", "Df", "P")) %>%
  kable_options()
Table 35: NSCST-Stroop Three Factor CFA Fit
Chisq Df P
. 402.975 11 0
parameterEstimates(fit_three_factor_cfa_stroop_nscst)[1:(fitMeasures(fit_three_factor_cfa_stroop_nscst, "df")-1),] %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("NSCST-Stroop Three Factor CFA Estimates")) %>%
  kable_options()
Table 36: NSCST-Stroop Three Factor CFA Estimates
lhs op rhs est se z pvalue ci.lower ci.upper
verbal_speed_scales =~ CRScore 1.000 0.000 NA NA 1.000 1.000
verbal_speed_scales =~ WRScore 0.682 0.113 6.058 0.000 0.461 0.903
verbal_speed_scales =~ CWRscore 0.489 0.080 6.145 0.000 0.333 0.645
nonverbal_speed_scales =~ cc_ratio_t 1.000 0.000 NA NA 1.000 1.000
nonverbal_speed_scales =~ ci_ratio_t -0.394 0.095 -4.165 0.000 -0.580 -0.209
interference_scales =~ Interference 1.000 0.000 NA NA 1.000 1.000
interference_scales =~ nscst_stroop_t 2.962 0.577 5.132 0.000 1.831 4.093
CRScore ~~ CRScore -6.517 26.695 -0.244 0.807 -58.838 45.804
WRScore ~~ WRScore 207.807 24.088 8.627 0.000 160.595 255.020
CWRscore ~~ CWRscore 75.769 10.310 7.349 0.000 55.563 95.976

CTMT

The CTMT was used to assess convergent validity. There were 17 cases that were identified in the CTMT dataset that didn’t have a matching case in the Stroop dataset. The number of cases that matched were 152. There were no incomplete cases.

CTMT-Stroop Correlation

CTMT-Stroop Correlation Matrix
# Stroop CTMT Correlation Matrix
ctmt_mat <- adult_ctmt %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, total) %>% correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower')
dimnames(ctmt_mat)<- list(all_names[c(1:4, 10)], all_names[c(1:4, 10)]) 
ctmt_mat %>% kable(caption = table_counter_func("NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")) %>% kable_options()
Table 37: NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
Color Word Color-Word Interference Trails Total
Color
Word 0.67***
Color-Word 0.70*** 0.60***
Interference 0.13 0.07 0.77***
Trails Total 0.43*** 0.34*** 0.37*** 0.13
Graphical Representation of Correlation using Princiapal Component Analysis- 2d
# mdspca
adult_ctmt %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, total) %>% setNames(all_names[c(1:4, 10)]) %>% mdspca()
title(figure_counter_func("PCA Stroop CTMT"))

Graphical Representation of Correlation using Princiapal Component Analysis- 3d
# sphpca
adult_ctmt %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, total) %>% setNames(all_names[c(1:4, 10)]) %>% sphpca()
title(figure_counter_func("\n PCA Stroop \n CTMT 3d"), line = 1)

CTMT-Stroop Principal Components Analysis (PCA)

CTMT-Stroop Scree Plot
# Exploratory
adult_ctmt_short <- adult_ctmt %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, total) %>% as.data.frame()
fit <- principal(adult_ctmt_short, rotate="promax")
scree.plot(adult_ctmt_short,simu=20,use="P", title = figure_counter_func("\n Stroop CTMT Scree Plot"))

Two Factor PCA Model
fit_two_factor <- principal(adult_ctmt_short, nfactors = 2, rotate="promax")
data.frame(matrix(round(as.numeric(fit_two_factor$loadings), 3), attributes(fit_two_factor$loadings)$dim, dimnames=list(all_names[c(1:4, 10)], c("Processing Speed", "Stroop")))) %>% kable() %>% kable_options()
Processing.Speed Stroop
Color 0.901 -0.005
Word 0.891 -0.095
Color-Word 0.507 0.696
Interference -0.170 1.040
Trails Total 0.673 -0.064
Three Factor PCA Model
fit_three_factor <- principal(adult_ctmt_short, nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names[c(1:4, 10)], c("Processing Speed", "Stroop", "Trails")))) %>% kable() %>% kable_options()
Processing.Speed Stroop Trails
Color 0.884 -0.030 0.088
Word 0.996 -0.133 -0.079
Color-Word 0.505 0.688 0.009
Interference -0.201 1.058 -0.013
Trails Total -0.010 -0.012 1.005

CTMT-Stroop Multi-trait Multimethod (MTMM)

# MTMM Stroop with Lavaan- not used here, use psy pack instead

# mtmm_basic <- "
# T1 =~ Interference
# T2 =~ CRScore + WRScore + CWRscore
# M1 =~ 1*Interference + 1*CRScore
# M2 =~ 1*Interference + 1*WRScore
# M3 =~ 1*Interference + 1*CWRscore
# 
# T1~~1*T1
# T2~~1*T2
# 
# T1~~T2
# "
# fit_mtmm_basic <- lavaan(mtmm_basic, data = adult, missing = "ml",
# auto.fix.first = FALSE, auto.var = TRUE)
# summary(fit_mtmm_basic, standardized = TRUE)


# MTMM with Interference, two scales
par(mfrow=c(1,2))
mtmm(adult_ctmt_short,list(c("CRScore", "WRScore", "total"), c("Interference", "CWRscore")), graphItem = T, itemTot = T)%>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("Stroop-CTMT MTMM")) %>%
  kable_options()
Table 38: Stroop-CTMT MTMM
Item ScaleI Scale 1 Scale 2
1 CRScore 1 0.579 0.493
2 WRScore 1 0.475 0.409
3 total 1 0.420 0.288
7 Interference 2 0.143 0.767
8 CWRscore 2 0.588 0.767

Stroop-CTMT Confirmatory Factor Analysis (CFA)

# CFA Stroop

# CFA Stroop CTMT
two_factor_cfa_stroop_ctmt <- "
stroop_scales =~ 1* CRScore + WRScore + CWRscore + total
ctmt_scales =~ 1*Interference
"
fit_two_factor_cfa_stroop_ctmt<- cfa(two_factor_cfa_stroop_ctmt, data = adult_ctmt, missing = "ml")
# summary(fit_two_factor_cfa_stroop_ctmt, standardized = TRUE)

# CFA CTMT speed and interference
two_factor_cfa_speed_interference <- "
speed =~ 1* CRScore + WRScore + total
interference =~ 1*CWRscore + Interference
"
fit_two_factor_cfa_speed_interference<- cfa(two_factor_cfa_speed_interference, data = adult_ctmt, missing = "ml")
# summary(fit_two_factor_cfa_speed_interference, standardized = TRUE)

fitMeasures(fit_two_factor_cfa_stroop_ctmt, c("chisq", "df", "pvalue")) %>% data.frame() %>% t() %>%
  kable(caption = table_counter_func("CTMT-Stroop Two Factor CFA Fit"), col.names = c("Chisq", "Df", "P")) %>%
  kable_options()
Table 39: CTMT-Stroop Two Factor CFA Fit
Chisq Df P
. 109.6116 5 0
parameterEstimates(fit_two_factor_cfa_stroop_ctmt)[1:(fitMeasures(fit_two_factor_cfa_stroop_ctmt, "df")-1),] %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("CTMT-Stroop Two Factor CFA Estimates")) %>%
  kable_options()
Table 40: CTMT-Stroop Two Factor CFA Estimates
lhs op rhs est se z pvalue ci.lower ci.upper
stroop_scales =~ CRScore 1.000 0.000 NA NA 1.000 1.000
stroop_scales =~ WRScore 0.843 0.125 6.740 0.000 0.598 1.088
stroop_scales =~ CWRscore 2.532 0.499 5.075 0.000 1.554 3.510
stroop_scales =~ total 0.047 0.076 0.623 0.533 -0.101 0.196

Exploratory analysis above suggested that Trails Total may be associated with multiple Stroop scores. However, in CFA model construction Trails Total did not fit well. As such the optimum model was a two factor model with all Stroop scores as a factor and Trails Total as a separate factor.

Special Cases

Reading Ability and Style

# Select Relevant Variables, create composite reading score, create high and low reading group as those above vs. below 2 SD from mean
reading_dat <- adult_wj %>%
 dplyr::select(decimal_age, SelEdGrp, CRScore, WRScore, CWRscore, Interference, LW , WA) %>% 
  mutate(reading = LW + WA,
         level = ifelse(reading < mean(reading) - 2*sd(reading), "Low", "Normal"),
         color_word_ratio = (CRScore/WRScore),
         color_word_level = ifelse(color_word_ratio > (mean(color_word_ratio) + 2*SD(color_word_ratio)), "Color Faster", "Word Faster"))
# Calculate those with Color word ratio > 1
reading_dat %>%
  group_by(color_word_ratio >1) %>%
  dplyr::summarize(n = n()) %>%
  mutate(freq = n/sum(n)) %>%
  dplyr::filter(`color_word_ratio > 1` == T) %>%
  dplyr::select(freq) %>%
  mutate(freq = freq*100) %>%
  round(2) %>%
  paste0(., "%") -> color_above_1

As discussed in the Introduction chapter, the Stroop Effect has been observed in individuals with intact reading skills, hypothesized to be due to automatic word reading skills being faster than color reading skills for these individuals. Research has been mixed about the exact relationship between reading skills and Stroop performance, with some findings that individuals with lower reading abilities had less Stroop Interference than individuals with average or above abilities and other studies finding greater amounts of Interference with individuals with lower reading abilities.

We sought to investigate the relationship between reading and Stroop performance to understand how reading ability affects performance. An additional goal is to produce information the Stroop examiner can use to make actionable decisions about the appropriateness of Stroop administration with individuals and to aid in interpretation. Regarding this goal, analyses that may be simpler than other analyses, but provide succinct and usable information, without necessitating use of a computer in analysis, were considered to help provide actionable information. For full transparency and for maximum information, the more ideal analyses and simpler analyses are presented in conjuction.

The subset participants who were administered both the Stroop Test and Woodcock-Johnson (WJ) Test, who were used in the above analyses of Stroop and WJ concurrent validity was used (N = 304). Stroop Color, Word, Color-Word and Interference scores, along with WJ Letter-Word (LW) and Word-Attack (WA) scores, estimated to be good representations of reading skills, were examined. Raw scores from all these tasks were used, with factors Age and Education controlled for. These variables are used to determine and derive standardized scores on these tests, so controlling for these variables essentially standardizes the scores within this sample.

The WJ scores were summed together to create a “Reading” score, as an overall measure of reading skills (see Measures section above for description of these scales and how they comprise reading abilities). Reading scores were standardized (mean = 0 and SD = 1) in some analyses below to aid in generalizability of interpretation to other reading assessments. In addition, a ratio of an individual’s Color Raw Score to Word Raw Score was calculated (“Color to Word Ratio”), as a comparison of color naming proficiency to word naming proficiency, with a score of “1” indicating equivalency, and scores higher indicating better color naming than word naming. The Color to Word Ratio scores were used to identify and control for Stroop performance being a reflection of being able to identify colors better than numbers, as may occur with individuals with reading deficits.

Furthermore, individuals were assigned a reading level (“Reading Level”) of “Normal” or “Low” if they were above or below 2 SD’s below the mean, respectively, and a Color to Word Ratio level (“Color to Word Level”) of “Word Faster” or “Color Faster” if they were below or above 2 SD’s above the mean, respectively (not all individuals with “Color Faster” Color to Word levels had faster Color Raw Scores than Word Raw Scores [ratios above 1]. There was only 1 (0.33%) individual meeting this criteria, which limited useful information, thus leading to the use of > 2 SD’s as being reflective of discrepant color to word naming ability). These classifications were constructed to conveniently present summary information. Analyses were conducted with Reading and Color to Word Ratio as continuous variables.

Stroop and WJ Score Correlation

The correlation between Stroop scores and WJ scores are presented again here (previously presented with raw scores alone in Concurrent Validity section) with the additional calculated scores described above.

reading_dat %>%
dplyr::select(CRScore, WRScore, CWRscore, Interference, color_word_ratio, reading) %>%
  rename(Color = CRScore, Word = WRScore, Color_Word = CWRscore) %>%
  chart.Correlation()
  title(sub="NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")

Related to the findings discussed above in Concurrent Validity, there are informative observations with implications for Stroop performance understanding that can be observed in the Correlation chart above. Some of these are:

  1. Interference is only weakly correlated with other scores, with the exception of Color-Word, suggesting there is something unique about Interference, that it involves skills not strongly associated with the skills involved in these other tasks, which include reading ability, raw processing speed, and perception.
  2. Interference is strongly correlated with Color-Word. Interference is calculated from Color-Word.
  3. Color-Word is moderately related to Color and Word.
  4. Interference is not correlated with Color or Word individually. It may seem logical (transitive property) that Interference should be related to Color and Word based on above, but that is not occurring.
  5. Reading and Color to Word Ratio have moderate correlations with Stroop scores.

The plots above are expanded to see the relationship between Reading and Color to Word Ratio with other variables, using Loess regression methods:

Reading Scatterplot
reading_dat %>%
  pivot_longer(., c(CRScore, WRScore, CWRscore, Interference, color_word_ratio), names_to = "raw_scores") %>%
  mutate(raw_scores = dplyr::recode(raw_scores, 'CRScore' = 'Color', 'WRScore' = 'Word', 'CWRscore' = 'Color-Word', 'color_word_ratio' = 'Color to Word Ratio')) %>%
  ggplot(data = ., aes(x = reading, y = value)) +
  geom_point() +
  geom_smooth() +
  facet_wrap(~raw_scores,scales = "free_y") +
  ggtitle("Reading Across Scores") + 
  xlab("Reading") +
  ylab("Score")

Visual inspection of the plots reveals approximately linear positive relationships of Reading with scores. The exception is Color to Word Ratio, which seems to decrease as Reading scores increase. As Reading seems to be both positively correlated with Color and Word, it seems that Word performance must increase quicker than Color increases at higher levels of Reading.

Color to Word Ratio Scatterplot
reading_dat %>%
  pivot_longer(., c(CRScore, WRScore, CWRscore, Interference, reading), names_to = "raw_scores") %>%
  mutate(raw_scores = dplyr::recode(raw_scores, 'CRScore' = 'Color', 'WRScore' = 'Word', 'CWRscore' = 'Color-Word', 'reading' = 'Reading')) %>%
  ggplot(data = ., aes(x = color_word_ratio, y = value)) +
  geom_point() +
  geom_smooth() +
  facet_wrap(~raw_scores,scales = "free_y") +
  ggtitle("Color to Word Ratio Across Scores") + 
  xlab("Color to Word Ratio") +
  ylab("Score")

Fitted lines in the Color to Word Ratio plots seem to have a parabolic shape, suggesting a quadratic relationship of Color to Word Ratio with other scores. The fitted line of its relationship with Interference seems generally flat.

# Plot Reading by Interference
p <- reading_dat %>%
  # dplyr::filter(level == "normal") %>%
 ggplot(data = ., aes(x = color_word_ratio, y = CWRscore, color = "Overall")) +
  geom_smooth(method = lm, formula = y ~ poly(x, 2), se = FALSE) +
  ggtitle("Interference Scores Across Reading Scores") +
  xlab("Reading") + 
  scale_colour_discrete(name = "Color to Word Ratio Level")
#p + geom_smooth(data = reading_dat, aes(x = reading, y = Interference, color = color_word_level), method = "lm", se = F)

Score Description by Reading and Color to Word Ratio Level

  reading_summary <- function(...){
  reading_dat %>%
    group_by(!!!quos(...)) %>%
    summarize_if(is.numeric, list(mean, sd)) %>%
    mutate_if(is.numeric, round, 2) %>%
    as.data.frame()
   }
   
  reading_sum <- reading_summary(level, color_word_level) %>%
    mutate(level = ifelse(duplicated(level), "", level))

    cols <- unique(gsub("_[^_]+$", "", colnames(reading_sum[,-(1:2)])))
    cbind.data.frame(reading_sum[,1:2], sapply(cols, function(cols_present) cbind(paste0(do.call(paste, c(reading_sum[grepl(cols_present, colnames(reading_sum))], sep="(")), ")")))) %>%
      kable(col.names = c("Reading Level", "Color to Word Level", "Age", score_names_int, "Letter-Word", "Word-Attack", "Reading", "Color to Word Ratio"), caption = table_counter_func("Mean(SD)")) %>%
      kable_options() %>%
      column_spec(4:7, background = "yellow")
Table 41: Mean(SD)
Reading Level Color to Word Level Age Color Raw Score Word Raw Score Color-Word Raw Score Interference Letter-Word Word-Attack Reading Color to Word Ratio
Low Color Faster 28.79(10.31) 70(18.38) 68(25.46) 38.5(21.92) 4.5(12.02) 55(2.83) 15.5(3.54) 70.5(6.36) 1.05(0.12)
Word Faster 34.72(11.85) 59.27(8.79) 86.18(10.72) 27.55(8.26) -7.27(6.66) 57(3.61) 14.73(5.39) 71.73(6.05) 0.7(0.13)
Normal Color Faster 25.87(12.19) 77.86(21.86) 82(23.67) 48.71(12.32) 9.29(9.95) 69.14(4.3) 26.29(4.75) 95.43(7.89) 0.95(0.03)
Word Faster 38.3(15.86) 73.93(12.71) 105.71(14.63) 42.5(10.6) -0.55(9.19) 69.5(4.46) 26.91(2.77) 96.42(6.32) 0.7(0.09)

Individuals with Low Reading levels generally have lower scores across all Stroop and WJ scores, including Interference scores. When those groups are broken into groups with Color Faster or Word Faster Color to Word Ratio levels, those in the Color Faster groups in either High or Low Reading levels had higher Interference scores than those in the Word Faster group in either High or Low Reading levels. Furthermore, for the score on the task where the Stroop Effect is evoked, the Color-Word Raw Score (which in comparison to the Color and Word Scores determines the Interference score) is higher overall for those with higher reading abilities, but is also higher within reading groups for those with Color Faster Color to Word Ratio levels. So, taken together the initial impressions from these descriptive statistics is that:

  1. Interference Score is driven by Color to Word Ratio
  2. Color-Word Score is driven by Reading ability
  3. Word Raw Score is faster than Color Raw Score for Word Faster group than Color Raw Score is faster than Word Raw Score for Color Faster group.
  4. As Interference is a comparison of Color-Word Incongruent task performance compared to Color and Word Congruent performance Word Faster individuals may be establishing higher scores in those tasks compared to Interference, thus leading to lower comparative Interference scores

To more formally assess how Interference scores vary by reading ability and color and word performance, mixed-effects models were constructed with Reading scores and Color to Word Ratio scores as predictors of Stroop scores. Age and Education Group were random factors to be controlled for, as were each Reading scores and Color to Word Ratio scores in models when the other variable was entered as a main effect. The “Significance” column presents the p-value of the model compared to the next less restrictive model.

  lmfun <- function(data, formula_choice, y, x, z, i, random, polynom) {
    y <- sym(y[i])
    x <- enquo(x)
    z <- enquo(z)
  
    model_formula <- formula(paste0(quo_name(y), "~ poly(",quo_name(x), ",", polynom, ") +", quo_name(z), random))
    null_model <- formula(paste0(quo_name(y), "~ 1 +", quo_name(z), random))
    
    if(formula_choice == "lmer"){
      lmer(model_formula, data = data)
    } else if (formula_choice == "lm") {
      lm(model_formula, data = data)
    } else if (formula_choice == "lm null") {
      lm(null_model, data = data)
    } else if (formula_choice == "lmer null") {
      lmer(null_model, data = data)
    }
  }
  
lm_applier <- function(i, formula_choice, random, polynomial){lmfun(reading_dat, formula_choice, raw_score_column_names_int, "color_word_ratio", reading, i, random, polynomial)}

lm_null_applier <- function(i, formula_choice, random, polynomial){lmfun(reading_dat, formula_choice, raw_score_column_names_int, "color_word_ratio", 0, i, random, polynomial)}

# Apply for lmer and poly and null mods
lmer_mod_quadratic <- lapply(1:4, lm_applier, formula_choice = "lmer", random = "+ (1|decimal_age) + (1|SelEdGrp)", polynomial = 2)

lmer_mod_linear <- lapply(1:4, lm_applier, formula_choice = "lmer", random = "+ (1|decimal_age) + (1|SelEdGrp)", polynomial = 1)

lmer_mod_reading_only <- lapply(1:4, lm_applier, formula_choice = "lmer null", random = "+ (1|decimal_age) + (1|SelEdGrp)", polynomial = 0)

lmer_mod_null <- lapply(1:4, lm_null_applier, formula_choice = "lmer null", random = "+ (1|decimal_age) + (1|SelEdGrp)", polynomial = 0)

lm_mod_quadratic <- lapply(1:4, lm_applier, formula_choice = "lm", random = " ", polynomial = 2)

lm_mod_linear <- lapply(1:4, lm_applier, formula_choice = "lm", random = " ", polynomial = 1)

lmer_mod_reading_only <- lapply(1:4, lm_applier, formula_choice = "lm null", random = " ", polynomial = 0)

reg_p_vals <- list(
  color_chi <- round(anova(lmer_mod_quadratic[[1]], lmer_mod_linear[[1]])$`Pr(>Chisq)`[-1],3),
  word_chi <- round(anova(lmer_mod_quadratic[[2]], lmer_mod_linear[[2]])$`Pr(>Chisq)`[-1],3),
  color_word_chi <- round(anova(lmer_mod_linear[[3]], lmer_mod_null[[3]])$`Pr(>Chisq)`[-1],3),
  interference_chi <- round(anova(lmer_mod_linear[[4]], lmer_mod_null[[4]])$`Pr(>Chisq)`[-1],3)
)

reg_table_func <- function(mod){
  list_equations <- lapply(1:4, function(i, mod) cbind.data.frame(Name = scale_list_int[[i]], Significance = reg_p_vals[[i]], Terms = row.names(coef(summary(mod[[i]]))), round(coef(summary(mod[[i]])),3)) %>%
                             mutate(Significance = ifelse(duplicated(Significance), "", Significance)), mod = mod)
  
  do.call(rbind, list_equations) %>%
    mutate(Name = ifelse(duplicated(Name), "", Name),
    Terms = dplyr::recode(Terms, "(Intercept)" = "Intercept", "poly(color_word_ratio, 2)1" = "Color Word Ratio", "poly(color_word_ratio, 2)2" = "Color Word Ratio Squared", "reading" = "Reading", "poly(color_word_ratio, 1)" = "Color Word Ratio"))
  }

print_reg <- function(mod1, mod2){
  # name <- deparse(substitute(mod1))
  rbind.data.frame(reg_table_func(mod1) %>%
    slice(1:8),
    reg_table_func(mod2) %>%
      slice(7:12)) %>%
      kable(caption = "Regression Equations for Predicting Stroop Scores from Reading and Color to Word Ratio") %>%
      # kable(caption = table_counter_func(name)) %>% # Use this way to print by model name
      kable_options()
}
print_reg(lmer_mod_quadratic, lmer_mod_linear)
Regression Equations for Predicting Stroop Scores from Reading and Color to Word Ratio
Name Significance Terms Estimate Std. Error df t value Pr(>|t|)
(Intercept) Color Raw Score 0 Intercept 26.883 6.798 299.888 3.955 0.000
poly(color_word_ratio, 2)1 Color Word Ratio 126.983 9.755 297.684 13.018 0.000
poly(color_word_ratio, 2)2 Color Word Ratio Squared -51.470 9.966 294.234 -5.165 0.000
reading Reading 0.489 0.071 299.749 6.875 0.000
(Intercept)1 Word Raw Score 0.002 Intercept 39.738 9.641 82.448 4.122 0.000
poly(color_word_ratio, 2)11 Color Word Ratio -73.990 13.744 292.496 -5.383 0.000
poly(color_word_ratio, 2)21 Color Word Ratio Squared -43.989 14.024 279.640 -3.137 0.002
reading1 Reading 0.676 0.101 79.552 6.709 0.000
(Intercept)2 Color-Word Raw Score 0 Intercept -3.741 7.089 172.717 -0.528 0.598
poly(color_word_ratio, 1)2 Color Word Ratio 48.638 9.913 295.702 4.906 0.000
reading2 Reading 0.481 0.074 187.697 6.502 0.000
(Intercept)3 Interference 0.005 Intercept -19.478 6.496 150.529 -2.999 0.003
poly(color_word_ratio, 1)3 Color Word Ratio 19.116 9.138 297.017 2.092 0.037
reading3 Reading 0.200 0.068 161.457 2.952 0.004

The above table presents the regression equations for prediction of Stroop scores from Color Word Ratio and Reading Scores. We can see that Color Word Ratio has a quadratic relationship with the scores Color and Word and a linear relationship with Color-Word and Interference. To summarize, increased reading ability seems to lead to higher Stroop scores, while increased Color to Word performance leads to increased scores to a point in Color and Word and monotonically with Color-Word and Interference.

Critical Scores

Color to Word Ratio

Color to Word Ratio score had a slight quadratic relationship with Color-Word. We can find the optimal Color to Word Ratio score for Color-Word performance:

lmfun <- function(data, y, x, i) {
    y <- sym(y[i])
    x <- enquo(x)
    lm_model_formula <- formula(paste0(quo_name(y), "~ poly(",quo_name(x), ",2)"))
    lm(lm_model_formula, data = data)
  }
lm_applier <- function(i){lmfun(reading_dat, raw_score_column_names_int, "color_word_ratio", i)}

lm_mod <- lapply(1:4, lm_applier)

list_equations <- lapply(1:4, function(i) cbind.data.frame(Name = scale_list_int[[i]], Terms = row.names(coef(summary(lm_mod[[i]]))), round(coef(summary(lm_mod[[i]])),3)))

lapply(3, function(i){
  list_equations[[i]] %>%
    dplyr::filter(grepl("poly", Terms)) %>%
    dplyr::select(Estimate) %>%
    slice(1) -> first
  list_equations[[i]] %>%
    filter(grepl("poly", Terms)) %>%
    dplyr::select(Estimate) %>%
    slice(2) -> second
    -first / (2*second)
})[[1]] -> maximum_color_word

result <- function(a,b,c){
  if(delta(a,b,c) > 0){ # first case D>0
        x_1 = (-b+sqrt(delta(a,b,c)))/(2*a)
        x_2 = (-b-sqrt(delta(a,b,c)))/(2*a)
        result = c(x_1,x_2)
  }
  else if(delta(a,b,c) == 0){ # second case D=0
        x = -b/(2*a)
  }
  else {"There are no real roots."} # third case D<0
}

# Constructing delta
delta<-function(a,b,c){
      b^2-4*a*c
}

extreme_min <- mean(reading_dat$CWRscore) - 3*sd(reading_dat$CWRscore)

extreme_color_word_ratio <- result(list_equations[[3]]$Estimate[1] - extreme_min, list_equations[[3]]$Estimate[2], list_equations[[3]]$Estimate[3])
extreme_color_word_ratio <- extreme_color_word_ratio[extreme_color_word_ratio > 0]

The maximum Color to Word ratio score for Color-Word performance is 73.92%, which means about 35.27% as many words are read as colors.

To identify Color to Word Ratios that may lead to extremely low scores, defined here as less than 3 SD’s from the mean, the quadratic regression equation was solved for the positive root, and resulted in: 49.26%. At this ratio, the individual is reading about twice as many words as colors. At the other end, as noted above only 1(0.33%) individual had a Color to Word ratio above 1, suggesting that is a relatively abnormal relative score. However, there was not an extreme decrease in Color-Word score approaching or above that.

It is important to note that the test did not perform unexpetedly for individuals with those scores reviewed above. Rather these are some of the conditions that led, and could lead, to extreme scores.

We can also try to identify Reading scores that could lead to extreme performance:

Nonlinear and spline models were constructed to consider nonlinear effects with Reading as a fixed main effect.

Reading

# summary(cubic_reading_mod) 
lmer(Interference ~ splines::bs(reading, 3) + (1|color_word_ratio) + (1|decimal_age) + (1|SelEdGrp), data = reading_dat, REML = F) -> spl_read_int
lmer(Interference ~ 1+ (1|color_word_ratio) + (1|decimal_age) + (1|SelEdGrp), data = reading_dat, REML = F) -> no_intercept_mod_int
# summary(spl_read)
spl_lm_results_int <- anova(no_intercept_mod_int, spl_read_int)
simple_spl_int <- lm(Interference ~ splines::bs(reading, 3), data = reading_dat)
# summary(cubic_reading_mod) 
lmer(CWRscore ~ splines::bs(reading, 3) + (1|color_word_ratio) + (1|decimal_age) + (1|SelEdGrp), data = reading_dat, REML = F) -> spl_read_cw
# summary(spl_read)
lmer(CWRscore ~ 1+ (1|color_word_ratio) + (1|decimal_age) + (1|SelEdGrp), data = reading_dat, REML = F) -> no_intercept_mod_cw
spl_lm_results_cw <- anova(no_intercept_mod_cw, spl_read_cw)
simple_spl <- lm(CWRscore ~ splines::bs(reading, 3), data = reading_dat)
simple_lm <- lm(CWRscore ~ reading, data = reading_dat)

Both nonlinear and spline models are superior to the null model, without Reading as a predictor, and Interference (X2(3) p < 0.004) and Color-Word Score as outcomes(X2(3) p < 0), using the spline model compared to the null model). These models are not superior to simple linear effects for Reading and quadratic effects for Color to Word Ratio, as reviewed in the regression equation model selection procedures above. Models with Color and Word as outcomes with anything beyond the terms described in the models above performaed poorly and are not explored further.

When considering the cubic model compared with the spline model with both Interference and Color-Word as outcomes, the cubic term for the cubic model was not significant, while the three-knot term for the spline model was. As we desire to know Reading Score values that could prove problematic, using the spline model with the significant three-knot term was determined to be the best approach. As we desire to identify reading scores that could be problematic for a range of participants in terms of Interference and Color-Word performance, a simple linear spline model was used with standardized Reading scores.

Interference
# Fit overall Reading mod- get high know
scaled_reading <- data.frame(Interference_stan = scale(reading_dat$Interference), reading_stan = scale(reading_dat$reading))
reading_lm_mod <- lm(Interference_stan ~ reading_stan, data = scaled_reading)
fit_seg <- segmented(reading_lm_mod, seg.Z = ~reading_stan, psi = list(reading_stan=c(-2.5,0)),control = seg.control(n.boot=0))
# fit_seg <- segmented(reading_lm_mod, seg.Z = ~reading, psi = list(reading=c(75, 80))) # use with unstandardized

# reading_age_break_low <- ceiling(fit_seg$psi[1, 2])
pr <- seq(min(scaled_reading$reading_stan), max(scaled_reading$reading_stan), .1)
predicted_values <- cbind.data.frame(reading_stan = pr, predicted_interference = predict(fit_seg, data.frame(reading_stan = pr)))

predicted_break_value = predict(fit_seg, data.frame(reading_stan = fit_seg$psi[,2]))

predicted_break_values <- cbind.data.frame(break_value = fit_seg$psi[,2], predicted_break_value = predicted_break_value, se = fit_seg$psi[,3], location = c("Low", "High"))

p <- ggplot(predicted_values, aes(x = reading_stan, y = predicted_interference)) +
  geom_line() 
 p + geom_point(data = predicted_break_values, aes(x = break_value, y = predicted_break_value)) +
   geom_label(data = predicted_break_values, aes(x = break_value, y = predicted_break_value, label = paste0(round(break_value,2), "/", round(predicted_break_value,2), " (",round(se,2), ")"), color = location), nudge_y = .1) +
   ggtitle("Interference Scores Across \n Standardized Reading Scores Segmented at Break Point") +
  xlab("Standardized Reading Scores") +
   ylab("Predicted Interference") +
  labs(color = "Reading Score/ \n Predicted Interference(SE)") +
  scale_x_continuous(breaks = seq(-5,2,1)) +
   coord_cartesian(ylim = c(-.6, .3))

 medium_lm <- scaled_reading %>%
   filter(reading_stan > fit_seg$psi[1,2] & reading_stan < fit_seg$psi[2,2]) %>%
   lm(Interference_stan ~ reading_stan, data = .)
  medium_lm_null <- scaled_reading %>%
   filter(reading_stan > fit_seg$psi[1,2] & reading_stan < fit_seg$psi[2,2]) %>%
   lm(Interference_stan ~ 1, data = .)
  medium_anova <- anova(medium_lm, medium_lm_null)

It can be seen in the plot that two breaks occur at standardized Reading score values -3.44 and 0.33. For scores in between these breakpoints, there is not a relationship between reading and Interference X2(1, p = 0.76). In other words, it seems like having a very low reading level has an effect on Interference. This is seen at more than 3 SD’s below the mean, so it is only individuals with quite low levels. Also, as Reading scores increase above the mean, there is also a positive relationship with Interference.

Color-Word
# Fit overall Reading mod- get high know
scaled_reading <- data.frame(CWRscore = reading_dat$CWRscore, reading_stan = scale(reading_dat$reading))
reading_lm_mod <- lm(CWRscore ~ reading_stan, data = scaled_reading)
fit_seg <- segmented(reading_lm_mod, seg.Z = ~reading_stan, psi = list(reading_stan=c(-2.5, 1)),control = seg.control(n.boot=0))
# fit_seg <- segmented(reading_lm_mod, seg.Z = ~reading, psi = list(reading=c(75, 80))) # use with unstandardized

# reading_age_break_low <- ceiling(fit_seg$psi[1, 2])
pr <- seq(min(scaled_reading$reading_stan), max(scaled_reading$reading_stan), .1)

predicted_values <- cbind.data.frame(reading_stan = pr, predicted_CWRscore = predict(fit_seg, data.frame(reading_stan = pr)))

predicted_break_value = predict(fit_seg, data.frame(reading_stan = fit_seg$psi[,2]))

predicted_break_values <- cbind.data.frame(break_value = fit_seg$psi[,2], predicted_break_value = predicted_break_value, se = fit_seg$psi[,3], location = c("Low", "High"))

p <- ggplot(predicted_values, aes(x = reading_stan, y = predicted_CWRscore)) +
  geom_line() 
 p + geom_point(data = predicted_break_values, aes(x = break_value, y = predicted_break_value)) +
   geom_label(data = predicted_break_values, aes(x = break_value, y = predicted_break_value, label = paste0(round(break_value,2), "/", round(predicted_break_value,2), " (",round(se,2), ")"), color = location), nudge_y = .1) +
   ggtitle("Color-Word Scores Across \n Standardized Reading Scores Segmented at Break Point") +
  xlab("Standardized Reading Scores") +
   ylab("Predicted CWRscore") +
  labs(color = "Reading Score/ \n Predicted CWRscore(SE)") +
  scale_x_continuous(breaks = seq(-5,2,1))

 medium_lm <- scaled_reading %>%
   filter(reading_stan > fit_seg$psi[1,2] & reading_stan < fit_seg$psi[2,2]) %>%
   lm(CWRscore ~ reading_stan, data = .)
  medium_lm_null <- scaled_reading %>%
   filter(reading_stan > fit_seg$psi[1,2] & reading_stan < fit_seg$psi[2,2]) %>%
   lm(CWRscore ~ 1, data = .)
  medium_anova <- anova(medium_lm, medium_lm_null)

It can be seen in the plot that two breaks occur at standardized Reading score values -3.07 and 1.1. For scores in between these breakpoints, there continues to be a relationship between Reading and Color-Word X2(1, p = 0), but the relationship is different below the breakpoint with increases in Reading leading to greater increases in Color-Word score in this interval. This is seen at more than 3 SD’s below the mean, so it is only individuals with quite low levels.

Special Cases Summary

In this section the relationships between reading and speed of processing constituent parts of the Stroop test were examined. The goal was to help understand how these abilities are related to abilities measured by the Stroop and to give examiners information to help make administration decisions and for interpretation.

The best fitting models showed that Stroop scores increased as reading ability increased. The ideal proficiency of processing words alone compared to colors alone seemed to be about 35% faster. It seemed that ratios of twice as many words as colors or a 1:1 ratio of words to colors were unusual and could lead to unusual scores.

In terms of extreme scores, reading abilities less than 3 SD’s below the mean (lowest 1st percentile), may be related to poor incongruent Stroop performance at a more accelerated rate than higher scores. Increases in reading abilities were associated with increases in Stroop Color-Word task performance and may have accelerated at scores significantly above the mean (>1 SD).

The above analyses shows the Stroop seems robust across individuals with a range of reading abilities and styles of processing colors and sight words. There were indications of increases in performance with reading and optimal color to word processing, but Interference, which controls for increased performance in incongruent processing by comparisons to congruent processing was not or only weakly correlated to these abilities. Reading ability is related to other abilities, so it is not suprising to see a relationship with that ability and Stroop performance.

This analysis involves individuals broadly representative of the general population of individuals proficient enough to complete the WJ LW and WA tasks and the Stroop tasks. Further research should include extreme cases- those with very low and high reading abilities and processing styles to consider the test performance with those groups.

References

Protopapas, A., Archonti, A., & Skaloumbakas, C. (2006). Reading ability is negatively related to stroop interference. Cognitive Psychology, 54(3), 251-282.