# Read data
stroop_adult <- readxl::read_xls("Stroop Adult Data.xls")
retest <- readxl::read_xls("Stroop Adult Data RETEST.xls")
wj <- readxl::read_xls("WJ3 Adult Data.xls")
ctmt <- readxl::read_xls("CTMT Adult Data.xls")
nscst <- readxl::read_xls("NSCST Data.xls")
cc_ratio <- read.csv("cc_ratio_scale.csv")
ci_ratio <- read.csv("ci_ratio_scale.csv")
stroop_effect <- read.csv("stroop_effect_scale.csv")

# Manipulate data
## Age as decimal from birth to exam date
birth_date <- as.Date(paste(stroop_adult$Byear, stroop_adult$Bmonth, stroop_adult$Bday, sep = "/"), origin = 1970-01-01)
test_date <- as.Date(paste(stroop_adult$EVyear, stroop_adult$EVmonth, stroop_adult$EVday, sep = "/"), origin = 1970-01-01)
years<- floor(age_calc(birth_date, enddate = test_date, units = "years"))
months<- floor(age_calc(birth_date, enddate = test_date, units = "months")%%12)
stroop_adult$decimal_age <- years + months/12
## Recode main data
adult <- stroop_adult %>%
  #add_labels(Gender, labels = c("Male" = "1", "Female" = "2"))
  mutate(
    Gender = dplyr::recode(as.factor(Gender), '1' = "Male", '2' = "Female", .default = "Another Value"),
    Ethnic = dplyr::recode(as.factor(Ethnic), '1' = "Caucasian", '2' = "African-American", '3' = "Asian", '4' = "Hispanic", '5' = "Native-American", '6' = "Other", .default = "Another Value"),
    SelEdGrp = dplyr::recode(as.factor(SelEdGrp), '1' = "No HS Diploma", '2' = "HS Diploma", '3' = "Some College", '4' = "BA", '5' = "Post-grad", .default = "Another Value"),
    Region = dplyr::recode(as.factor(Region), '1' = "Northeast", '2' = "Midwest", '3' = "South", '4' = "West", .default = "Another Value"),
    Commun = dplyr::recode(as.factor(Commun), '1' = "Rural", '2' = "Urban", .default = "Another Value"),
    PrimeDX = dplyr::recode(as.factor(PrimeDX), '0' = "No Dx", '2' = "LD", '4' = "Autism", '6' = "ID", '10' = "Dementia", '11' = "ADHD",
                            '12' = "ESL", '14' = "TBI", .default = "Another Value"),
    Drugs = dplyr::recode(as.factor(`Drugs?`), '1' = "No", '2' = "Yes", .default = "Another Value"),
    ColorBl = dplyr::recode(as.factor(ColorBl), '1' = "No", .default = "Another Value"),
    age = as.numeric(str_trunc(decimal_age, 1, ellipsis = "")),
         age = recode(age, '1' = "15-19", '2' = "20-29", '3' = "30-39", '4' = "40-49", '5' = "50-59", '6' = "60-69", '7' = "70-79", '8' = "80-89"),
    case_id = paste(ID, `Case #`, sep="_")
  )

duplicate_case <- subset(adult, ID=="LA145" & `Case #` == "1570")

adult <- anti_join(adult, duplicate_case, by = "case_id")

# Recode convergent datasets- get id to match across sets and join with full set, with names "full-set_other-data"
retest <- retest %>%
  mutate(case_id = paste(gsub("R","", ID), `Case #`, sep="_"))
adult_retest <- inner_join(adult, retest, by = c("case_id"))

wj <- wj %>%
  mutate(case_id = paste(ID, `Case #`, sep="_")) %>%
  rename(LW = `WJ3-LW`, WA = `WJ3-WA`)
adult_wj <- inner_join(adult, wj, by = c("case_id"))

ctmt <- ctmt %>%
  mutate(case_id = paste(ID, `Case #`, sep="_")) %>%
  rename(total = `T-Sc Sum`)
adult_ctmt <- inner_join(adult, ctmt, by = c("case_id"))

nscst <- nscst %>%
  mutate(case_id = paste(ID, `Case #`, sep="_"),
         Stroop = round(CIRatio - CCRatio,2)) 

adult_nscst <- inner_join(adult, nscst, by = c("case_id"))

# Prepare Nonverbal Stroop data for looking up
nscst_df <- list(stroop_effect)

  scale_score_func <- function(dataset, age, score) {
                            dataset %>%
 pivot_longer(., everything()[-1]) %>%
  mutate(endpoint = stringr::str_extract(name, "[A-z]$"),
         upper_age = as.numeric(gsub("[A-z]", "", name))
        ) %>%
  dplyr::select(., -"name") %>%
    pivot_wider(names_from = endpoint, values_from = value) %>%
    mutate(lower_age = as.numeric(rep(c(3,unique(upper_age)[-length(unique(upper_age))]), length(unique(.[[1]]))))) %>%
    filter(lower_age <= age & age < upper_age )%>%
    summarize(score = ifelse(score < L[1], 20, ifelse(score > H[nrow(.)], 90,
                                                    .[[1]][L <= score & score <= H ])))
  }
  
  adult_nscst <- adult_nscst %>%
  mutate(
    cc_ratio_t_rev = unlist(mapply(function(x,y) scale_score_func(cc_ratio, x, y), adult_nscst$decimal_age, adult_nscst$CCRatio)),
    ci_ratio_t_rev = unlist(mapply(function(x,y) scale_score_func(ci_ratio, x, y), adult_nscst$decimal_age, adult_nscst$CIRatio)),
    nscst_stroop_t_rev = unlist(mapply(function(x,y) scale_score_func(stroop_effect, x, y), adult_nscst$decimal_age, adult_nscst$Stroop)),
    cc_ratio_t = 50 - (cc_ratio_t_rev - 50),
    ci_ratio_t = 50 - (ci_ratio_t_rev - 50),
    nscst_stroop_t = 50 - (nscst_stroop_t_rev - 50)
  )
### Join al df's
  adult_all <- adult %>% inner_join(., adult_nscst, by = "case_id") %>% inner_join(., wj, by = "case_id") %>% inner_join(., ctmt, by = "case_id")

#### Enter following information based on scores to analyze

# Enter score names for descriptive stats
raw_score_quos <- quos(CRScore, WRScore, CWRscore)
raw_score_column_names <- c("CRScore", "WRScore", "CWRscore")
raw_score_quos_int <- quos(CRScore, WRScore, CWRscore, Interference)
raw_score_column_names_int <- c("CRScore", "WRScore", "CWRscore", "Interference")
# Names of Scores
scale_list <- list("Color Raw Score", "Word Raw Score", "Color-Word Raw Score")
score_names<- c("Color Raw Score", "Word Raw Score", "Color-Word Raw Score")
scale_score <- c(quo(CRScore), quo(WRScore), quo(CWRscore))
scale_list_int <- list("Color Raw Score", "Word Raw Score", "Color-Word Raw Score", "Interference")
score_names_int <- c("Color Raw Score", "Word Raw Score", "Color-Word Raw Score", "Interference")
scale_score_int <- c(quo(CRScore), quo(WRScore), quo(CWRscore), quo(Interference))
# Demographic vars
group_column <- c(quo(age), quo(Gender), quo(Ethnic), quo(SelEdGrp), quo(Region), quo(Commun), quo(PrimeDX))
group_column_title <- list("Age", "Gender", "Ethnicity", "Education", "Region", "Community", "Diagnosis")
# Names of all scores in all different tests
all_names <- c("Color", "Word", "Color-Word", "Interference", "Letter-Word", "Word Attack", "CC Ratio", "CI Ratio", "Nonverbal Stroop", "Trails Total")
# master dataset name
adult <- adult

Introduction

Data were collected to analyze the performance of the Stroop Color and Word Test (“Stroop”) with a sample reflective of the current population in the United States.

Using a national standardization plan, the Stroop was administered and data were collected by trained field researchers over a period of time between 2013 to 2014 with 373 participants. The field researchers represent professionals targeted to administer the Stroop (see Qualifications section of this Manual). The field researchers were trained Give more information about data collection.

In this data analysis, Stroop scores were examined, models to predict Stroop scores were constructed, scaled score tables were constructed from these models, and reliability and validity were examined.

Data Cleaning

Data were examined for integrity of entry and for aberrations in values. The details of this process are listed in Appendix Appendix # - place below to ‘Standardization’ in Appendix.

Data were cleaned using the following procedures:

Cases were examined for duplicate entry using combinations of fields unlikely to result in random duplication.
Raw score outliers (see below for procedure to identify outliers) were examined for error.
Differences between test and retest values (see Test-Retest section) were examined for extreme values.

In all above procedures, if and when any cases were identified programmatically, those cases were consulted by visual inspection and consultation of raw data to determine if there was an incorrect value. If data were determined not to be incorrect they remained in the dataset.

Stroop Extreme Values

Scores were examined to identify outliers. Scores that were +/- 3 Standard Deviations (SD) from the mean were identified in the following table.

bind_rows(
    bind_rows(lapply(scale_score_int, function(score){
    adult %>%
      filter(!!score < (mean(!!score) - 3*(sd(!!score)))) %>%
      dplyr::select(!!!scale_score_int, !!!group_column) %>%
      mutate( #ID = str_extract(ID, "[A-z]{1,2}"),
             extreme_value = paste(quo_name(score), "Low"))
  })),
  bind_rows(lapply(scale_score_int, function(score){
    adult %>%
      filter(!!score > (mean(!!score) + 3*(sd(!!score)))) %>%
      dplyr::select(!!!scale_score_int, !!!group_column) %>%
      mutate(# ID = str_extract(ID, "[A-z]{1,2}"),
             extreme_value = paste(quo_name(score), "High"))
  })) 
) %>% 
  arrange(extreme_value) -> dat_df
ft <- table_output_func(dat_df, colnames = c(score_names_int, unlist(group_column_title), "Extreme Score"), caption =  table_counter_func("Extreme Score"))

Warning in readLines(knitr::current_input()): incomplete final line found on
'stroop_adult_analysis.Rmd'

Warning: namespace 'highr' is not available and has been replaced
by .GlobalEnv when processing object '<unknown>'

ft

*Table* 1: Extreme Score
Color Raw Score	Word Raw Score	Color-Word Raw Score	Interference	Age	Gender	Ethnicity	Education	Region	Community	Diagnosis	Extreme Score
120	97	70	16	40-49	Male	Caucasian	BA	South	Rural	No Dx	CRScore High
31	60	12	-8	20-29	Female	Caucasian	HS Diploma	Northeast	Urban	No Dx	CRScore Low
102	134	76	18	30-39	Female	Asian	BA	Northeast	Urban	No Dx	CWRscore High
70	101	94	53	40-49	Male	Other	Post-grad	South	Urban	No Dx	CWRscore High
67	94	43	54	60-69	Female	Other	BA	South	Urban	No Dx	Interference High
70	101	94	53	40-49	Male	Other	Post-grad	South	Urban	No Dx	Interference High
90	125	20	-32	40-49	Female	Caucasian	HS Diploma	Northeast	Urban	No Dx	Interference Low
57	50	23	-4	30-39	Female	African-American	Some College	South	Urban	No Dx	WRScore Low
51	48	29	4	40-49	Male	Hispanic	No HS Diploma	South	Rural	No Dx	WRScore Low
57	52	37	10	40-49	Male	Hispanic	No HS Diploma	South	Rural	No Dx	WRScore Low

# %>%
  # kable(col.names = c(score_names_int, unlist(group_column_title), table_counter_func("Extreme Score"))) %>%
  # kable_options()

As can be seen in the above table, there were a relatively balanced number of extreme values above and below three standard deviations from the mean. The small number of cases did not permit statistical analysis. Upon visual inspection there did not appear to be any pattern with respect to demographic characteristics or by field researcher. As extreme values appear in naturally occurring data, and because these values did not appear to follow a pattern, suggestive of bias, the values were left in the dataset. Differences among groups represented by the above characteristics will be explored further.

# Following was performed to identify duplicate cases- 1 identified and removed
# bind_rows(subset(adult, duplicated(ID)),
# subset(adult, duplicated(ID, fromLast = T)))
# 
# bind_rows(subset(adult, duplicated(`Case #`)),
# subset(adult, duplicated(`Case #`, fromLast = T))) %>%
#   arrange(`Case #`)

# Check for abberrant values from test to retest- use below
ad_wide <- inner_join(adult, retest, by = "case_id",suffix = c(".test", ".retest"),)
 vars <- paste0("^(", paste(raw_score_column_names_int, collapse="|"), "|case_id)")
check_score_differences_df <- rbind.data.frame(
  ad_wide[abs(ad_wide$CRScore.test - ad_wide$CRScore.retest)> 30,],
  ad_wide[abs(ad_wide$WRScore.test - ad_wide$WRScore.retest)> 30,],
  ad_wide[abs(ad_wide$CWRscore.test - ad_wide$CWRscore.retest)> 30,],
  ad_wide[abs(ad_wide$Interference.test - ad_wide$Interference.retest)> 30,]) %>%
  dplyr::select(., matches(vars))

1 case was identified as a duplicate entry and removed. One incorrect value was entered for a raw score. The original raw data were consulted to correct the error.

End Appendix section

Standardization

Sample

The standardization plan was designed to approximately replicate the U.S. population along demographic characteristics typically accounted for in standardized test development. The following demographic criteria were considered, with variable name to be referred to in this analysis and corresponding levels below:

demo_levels <- function(demo_vars){
  demo_vars = enquo(demo_vars)
  adult %>% dplyr::select(!!demo_vars) %>% sapply(., levels)
}

lev <- lapply(unlist(lapply(group_column[2:7], quo_name)),demo_levels) 
lev <- sapply(lev, function(x) paste(x, collapse = ", "))

data.frame(
  demo = c("Age", "Sex/Gender", "Race/Ethnicity", "Highest Completed Education Group", "Region of the U.S. Currently Residing", "Community Size", "Primary Mental Health Diagnosis"),
  demo_name = c("Age", unlist(group_column_title)[2:7]),
  levels = c("Continuous", lev)
  ) -> dat_df
  ft <- table_output_func(dat_df, colnames = c("Description", "Variable", "Levels"), caption = table_counter_func("Demographic Criteria"))

Warning in readLines(knitr::current_input()): incomplete final line found on
'stroop_adult_analysis.Rmd'

ft

*Table* 2: Demographic Criteria
Description	Variable	Levels
Age	Age	Continuous
Sex/Gender	Gender	Male, Female
Race/Ethnicity	Ethnicity	Caucasian, African-American, Asian, Hispanic, Native-American, Other
Highest Completed Education Group	Education	No HS Diploma, HS Diploma, Some College, BA, Post-grad, Another Value
Region of the U.S. Currently Residing	Region	Northeast, Midwest, South, West
Community Size	Community	Rural, Urban
Primary Mental Health Diagnosis	Diagnosis	No Dx, LD, Autism, ID, Dementia, ADHD, ESL, TBI, Another Value

  # kable(col.names = c("Description", "Variable", "Levels"), caption = table_counter_func("Demographic Criteria")) %>%
  # kable_options()

There was some variation in the proportion of levels within each variable between the sample and population, as described further in the Demographics section. Each of these variables is considered and controlled for or incorporated into the analysis of Stroop performance and in the models used to construct scaled scores, as described below.

Measures

Stroop Color and Word Test (Stroop)

The Stroop Color and Word Test- Golden & Freshwater version ref (Stroop) is a brief measure of executive functions and described in more detail in this manual. The Stroop consists of three tasks that generate three raw scores. They are:

Color Congruent (“Color”)
Word Congruent (“Word”)
Color-Word Incongruent (“Color-Word”)

These three scores will be referred to collectively as “Direct Measure” scores.

A calculation using these scores (described in the Scoring section of this Manual) represents a fourth score:

Interference (“Interference”)

This score will be referred to as a “Calculated” score to contrast with the Direct Measure scores.

Further interpretation and description of these scales is provided in the Interpretation section of this Manual.

Performance on these tasks is contextualized via scaled scores. Each score purports to represent a separate, though related, construct, so each score also represents a scale. The values are generally referred to as scores earlier in this chapter when providing descriptive statistics and constructing models for scaled scores. The content and relationship of these values as scales is discussed further below in this chapter.

Woodcock Johnson- Third Edition (WJ)

The Woodcock Johnson- Third Edition ref is an academic and cognitive battery. The following subtests were used:

Letter-Word Identification (LW)- This subtest assesses ability to recognize words and letters in isolation
Word Attak (WA)- This subtest requires phonetic decoding

Taken together, these tasks are considered useful measures of an individual’s ability to interpret and communicate visual language and are considered important skill components of reading ref- Mather & Gregg. Raw scores from both subtests were used.

Nonverbal Stroop Card Sorting Test (NSCST)

The Nonverbal Stroop Card Sorting Test (NSCST) ref is a short assessment of executive functions designed to elicit a Stroop effect, but administered and performed nonverbally. There are three scores obtained from the NSCST:

Color-Congruent Ratio (“CC Ratio”)- this is a ratio of the time alloted to the number of items correct in a condition with congruent stimuli. It is similar to the Color and Word tasks on the Stroop Test.
Color-Incongruent Ratio (“CI Ratio”)- this is a ratio of the time alloted to the number of items correct in a condition with incongruent stimuli. It is similar to the Color-Word task on the Stroop Test.

The above measures are calculated directly from participant performance. The following metric uses those scores in a calculation:

Stroop Effect (“Nonverbal Stroop”)- This is the CC Ratio subtracted from the CI Ratio, so it is a comparison of a person’s ability to manage incongruent stimuli compared to management of congruent stimuli.

Scores of the above were converted to T-scores, using NSCST procedures. T-scores were reversed so that a higher score was indicative of more capable performance (e.g., a score above 50 indicated a person was better than average at processing stimuli effectively in a particular condition).

Comprehensive Trail-Making Test (CTMT)

Comprehensive Trail-Making Test (CTMT) ref involves connecting multiple stimuli with various distractions under timed conditions. It is considered a useful assessment of attention, resistance to distractions, and processing speed ref.

The T-scores of individual tasks of the CTMT were summed to yield a Total T-Score (“Trails Total”).

Score Descriptions

      histogram_panel_function <- function(dataset, scores, recode_options, reorder_order){
        scores = enquo(scores)
        # recode_options = enexpr(recode_options)
        dataset %>%
        dplyr::select(!!scores) %>%
        gather("Scale", "Raw Score", convert = T) %>%
        mutate(Scale = dplyr::recode(Scale, !!!recode_options)) %>%
        arrange(match(Scale, reorder_order)) %>%
        mutate(Scales = factor(Scale, levels = reorder_order)) %>%
        ggplot(aes(`Raw Score`)) + #ggplot(., aes(x = Scales))+
        geom_bar(stat = "bin") +
        theme_bw() +
        facet_wrap(~Scales,scales = "free_x") +
        ggtitle(figure_counter_func("Raw Score Distributions")) + 
        ylab("Number of Participants")
      }

test_score_names <- list(raw_score_column_names_int, c("LW", "WA"), c("cc_ratio_t", "ci_ratio_t", "nscst_stroop_t"), "total")
recode_option <- list(list('CRScore' = "Color Raw Score", 'WRScore' = "Word Raw Score", 'CWRscore' = "Color-Word Raw Score", 'Interference' = "Interference"), list('LW' = "LW Scale", 'WA' = "WA Scale"), list('cc_ratio_t' = "CC Ratio", 'ci_ratio_t' = "CI Ratio", 'nscst_stroop_t' = "Nonverbal Stroop"), list('total' = "Total"))
test_scale_list <- list(scale_list_int, list("LW Scale", "WA Scale"), list("CC Ratio", "CI Ratio", "Nonverbal Stroop"), list("Total"))
data_set <- list(adult, wj, adult_nscst, ctmt)

# lapply(1:length(data_set), function(i) {
#   histogram_panel_function (data_set[[i]], test_score_names[[i]], recode_option[[i]], test_scale_list[[i]])
# })

Overall

The following are an overview of the scores for each measure and the characteristics of the sample overall for those scores:

adult_sum_df <- adult %>%
  summarize(n = n(),
         c_mean = paste0(round(mean(CRScore),2), '(', round(sd(CRScore),2), ')'),
         w_mean = paste0(round(mean(WRScore),2), '(', round(sd(WRScore),2), ')'),
         cw_mean = paste0(round(mean(CWRscore),2), '(', round(sd(CWRscore),2), ')'),
         int_mean = paste0(round(mean(Interference),2), '(', round(sd(Interference),2), ')'),
         c_SEM = round(psych::describe(CRScore)$se, 2),
         w_SEM = round(psych::describe(WRScore)$se, 2),
         cw_SEM = round(psych::describe(CWRscore)$se, 2),
         int_SEM = round(psych::describe(Interference)$se, 2),
         c_range = paste0(min(CRScore), "-", max(CRScore)),
         w_range = paste0(min(WRScore), "-", max(WRScore)),
         cw_range = paste0(min(CWRscore), "-", max(CWRscore)),
         int_range = paste0(min(Interference), "-", max(Interference))) %>%
  dplyr::select(n, c_mean, c_SEM, c_range, w_mean, w_SEM, w_range,  cw_mean, cw_SEM, cw_range, int_mean, int_SEM, int_range) 

wj_sum_df <- wj %>%
    summarize(n = n(),
         lw_mean = paste0(round(mean(LW),2), '(', round(sd(LW),2), ')'),
         wa_mean = paste0(round(mean(WA),2), '(', round(sd(WA),2), ')'),
         lw_SEM = round(psych::describe(LW)$se, 2),
         wa_SEM = round(psych::describe(WA)$se, 2),
         lw_range = paste0(min(LW), "-", max(LW)),
         wa_range = paste0(min(WA), "-", max(WA))) %>%
  dplyr::select(n, lw_mean, lw_SEM, lw_range, wa_mean, wa_SEM, wa_range)

nscst_sum_df <- adult_nscst %>%
    summarize(n = n(), 
         cc_ratio_mean = paste0(round(mean(cc_ratio_t),2), '(', round(sd(cc_ratio_t),2), ')'),
         ci_ratio_mean = paste0(round(mean(ci_ratio_t),2), '(', round(sd(ci_ratio_t),2), ')'),
         nscst_stroop_mean = paste0(round(mean(nscst_stroop_t),2), '(', round(sd(nscst_stroop_t),2), ')'),
         cc_ratio_SEM = round(psych::describe(cc_ratio_t)$se, 2),
         ci_ratio_SEM = round(psych::describe(ci_ratio_t)$se, 2),
         nscst_stroop_SEM = round(psych::describe(nscst_stroop_t)$se, 2),
         cc_ratio_range = paste0(min(cc_ratio_t), "-", max(cc_ratio_t)),
         ci_ratio_range = paste0(min(ci_ratio_t), "-", max(ci_ratio_t)),
         nscst_stroop_range = paste0(min(nscst_stroop_t), "-", max(nscst_stroop_t))) %>%
  dplyr::select(n, cc_ratio_mean, cc_ratio_SEM, cc_ratio_range, ci_ratio_mean, ci_ratio_SEM, ci_ratio_range, nscst_stroop_mean, nscst_stroop_SEM, nscst_stroop_range) 

ctmt_sum_df <- ctmt %>%
    summarize(n = n(),
         tot_mean = paste0(round(mean(total),2), '(', round(sd(total),2), ')'),
         tot_SEM = round(psych::describe(total)$se, 2),
         tot_range = paste0(min(total), "-", max(total))) %>%
  dplyr::select(n, tot_mean, tot_SEM, tot_range) 

data_sum_set <- list(adult_sum_df, wj_sum_df, nscst_sum_df, ctmt_sum_df)
data_sum_kable <- function(data, names){
          kable(data, col.names = c("*N*", rep(c(paste("Mean", "(SD)"), "SEM", "Min-Max"), length(names))),  caption = table_counter_func("Overall Raw Score Descriptives"), booktabs = T)  %>%
          kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position="left") %>%
          add_header_above(header = data.frame(names = c("",names), colspan = c(1, rep(3,length(names))))) %>%
          column_spec(seq(2, 3*length(names), 3), border_left = "1px solid gray")
}

data_set_names <- list("Stroop", "WJ-3", "NSCST", "CTMT")
lapply(1:length(data_set), function(i) {
  cat(paste("<h4>", data_set_names[[i]], "</h4>"))
  cat("<div class = 'row h-100'>")
    cat("<div class = 'col-md-6'>")
      invisible(print(histogram_panel_function (data_set[[i]], test_score_names[[i]], recode_option[[i]], test_scale_list[[i]])))
    cat("</div>")
    cat("<div class = 'col-md-6'>")
      invisible(print(data_sum_kable(data_sum_set[[i]], test_score_names[[i]])))
    cat("</div>")
  cat("</div>")
})

Stroop

*Table* 3: Overall Raw Score Descriptives
	CRScore			WRScore			CWRscore			Interference
N	Mean (SD)	SEM	Min-Max	Mean (SD)	SEM	Min-Max	Mean (SD)	SEM	Min-Max	Mean (SD)	SEM	Min-Max
373	73.31(13.62)	0.71	31-120	102.3(16.31)	0.84	48-147	42.38(11.02)	0.57	12-94	0.12(9.38)	0.49	-32-54

WJ-3

*Table* 4: Overall Raw Score Descriptives
	LW			WA
N	Mean (SD)	SEM	Min-Max	Mean (SD)	SEM	Min-Max
322	68.81(5.23)	0.29	52-78	26.3(3.94)	0.22	6-32

NSCST

*Table* 5: Overall Raw Score Descriptives
	cc_ratio_t			ci_ratio_t			nscst_stroop_t
N	Mean (SD)	SEM	Min-Max	Mean (SD)	SEM	Min-Max	Mean (SD)	SEM	Min-Max
189	48.41(12.14)	0.88	10-60	51.45(7.15)	0.52	10-60	52.85(10.56)	0.77	10-80

CTMT

*Table* 6: Overall Raw Score Descriptives
	total
N	Mean (SD)	SEM	Min-Max
169	235.92(44.06)	3.39	109-336

[[1]] NULL

[[2]] NULL

[[3]] NULL

[[4]] NULL

Normality

The data appeared to assume normal distributions, based on visual inspection of histograms. D’Agostino Pearson Test of Normality statistics, assessing skewness, were calculated to evaluate normality. This procedure was deemed appropriate for the sample sizes:

do.call(rbind, lapply(1:length(data_set), function(i){
  do.call(rbind, lapply(1:length(test_score_names[[i]]), function(j){
    ag_res <- agostino.test(as.data.frame(data_set[[i]] %>% dplyr::select(!!as.name(test_score_names[[i]][j])))[,1])
    #shapiro_test(!!as.name(test_score_names[[i]][j]))
    c(ag_res$statistic, p = ag_res$p.value)
  }))
})
) %>% cbind.data.frame(unlist(test_scale_list), .) %>%
 mutate_if(is.numeric, round, digits = 3) %>%
  kable(col.names = c("Scale", "Skewness Score", "Z", "P"), caption = table_counter_func("D'Agostino Pearson Test of Normality of Scores")) %>%
  kable_options()

*Table* 7: D’Agostino Pearson Test of Normality of Scores
Scale	Skewness Score	Z	P
Color Raw Score	0.089	0.712	0.477
Word Raw Score	-0.087	-0.701	0.483
Color-Word Raw Score	0.335	2.627	0.009
Interference	0.737	5.346	0.000
LW Scale	-0.788	-5.267	0.000
WA Scale	-2.051	-10.247	0.000
CC Ratio	-2.043	-8.021	0.000
CI Ratio	-2.571	-9.103	0.000
Nonverbal Stroop	-0.529	-2.914	0.004
Total	-0.153	-0.841	0.400

The above table shows that some variables may have issues with normality, even with corrections for multiple comparisons. Assumptions of normality will be examined when constructing models and performing further analyses.

Demographics

Demographic Tables and Plots

The following tables and plots are of Stroop scores by demographic variable groups. Numbers of participants within each group, the mean, SD, and SEM were also identified.

For each demographic variable, a Multiple Analysis of Variance (MANOVA) was conducted to identify differences among groups within any Stroop score. ANOVA was also conducted for each variable. The results of each are shown in column headers. Though ANOVA results are shown for each variable, it is only proper to consider ANOVA outcomes for a specific score when the MANOVA, which accounts for multiple comparisons, is significant.

The results presented in these tables should be interpreted with caution as the sample was not stratified within each variable, so differences among groups within a score may be more attributable to differences in the group due to a more salient variable. Furthermore, these demographic factors are explored further, accounted for, and controlled for in models constructed and in analyses below.

# MANOVA test function for raw scores

manova_func <- function(dataset, group_column){
  group_column <- enquo(group_column)
  response_matrix <- as.matrix(dataset %>% dplyr::select(!!!raw_score_quos_int))
  predictor_variable <- as.matrix(dplyr::select(adult, !!group_column))
  res.man <- manova(response_matrix ~ predictor_variable)
  res.man
}

manova_overall_p_value_func <- function(dataset, group_column){
  group_column <- enquo(group_column)
  res.man <- manova_func(dataset, !!group_column)
  p_val <- summary(res.man)$stats[1,6]
  p_val
}

manova_scale_p_value_func <- function(dataset, group_column){
  group_column <- enquo(group_column)
  res.man <- manova_func(dataset, !!group_column)
  lapply(summary.aov(res.man), function(x) x$`Pr(>F)`[1])
}
# *** Alternative approach to construct MANOVA here.  Not used
# scales <- c("CRScore", "WRScore", "CWRscore")
# responses <- paste(scales, collapse = ",")
# myformula <- as.formula( paste0( "cbind(", responses , ")~ unit" ) )
# manova( myformula, data = plots )

 manova_p_value_results_func <- function(dataset, group_column){
   group_column <- enquo(group_column)
    p_val <- round(manova_overall_p_value_func(dataset, !!group_column),4)
        if(p_val < .05 & p_val >= .01){
      # Can choose to print full table or summary
       # summary.aov(res.man)
      paste("*, p =", p_val)
       # data.frame(unclass(summary(res.man)), check.names = FALSE, stringsAsFactors = FALSE)
    } else if (p_val < .01 & p_val >= .0001) {
        paste("**, p =", p_val)
    }  else if (p_val < .0001) {
        paste("***, p < 0.001")
    }  else {
      paste("NS, p =", p_val)
    }
  }

 manova_scales_p_value_results_func <- function(dataset, group_column){
   group_column <- enquo(group_column)
   lapply(manova_scale_p_value_func(dataset, !!group_column), function(p_val){
     if(p_val < .05 & p_val >= .01){
       # Can shoose to print full table or summary
       # summary.aov(res.man)
       paste("*, p =", round(p_val,4))
       # data.frame(unclass(summary(res.man)), check.names = FALSE, stringsAsFactors = FALSE)
     } else if (p_val < .01 & p_val >= .0001) {
       paste("**, p =", round(p_val,4))
     }  else if (p_val < .0001) {
       paste("***, p < 0.001")
     }  else {
       paste("NS, p =", round(p_val, 4))
     }
   }
   )
 }
 
 # Function to generate summary tables- can choose data, grouping, summary variables, and whether to rename
generate_summary_tbl <- function(dataset, group_column, summary_column, useColname = T) {
  group_column   <- enquo(group_column)
  # Uncomment Below if not passing list of quosures for summary variables
  # summary_column <- enquo(summary_column)
  overall_p_value_result <- manova_p_value_results_func(dataset, !!group_column)
  dataset %>% 
    group_by(!!group_column) %>% 
    summarise(
      N = n(),
      mean = round(mean(!!summary_column),2),
      SD  = round(sd(!!summary_column),2),
      SEM = round(psych::describe(!!summary_column)$se, 2)
      # Other metrics that need to be generated frequently
    ) %>% 
    # Get proportion and consolidate some columns
    mutate(grouping_column = !!group_column,
           prop = paste0(round(N/sum(N),4)*100, "%"),
           n_prop = paste0(N," (", prop, ")"),
           mean_sd = paste0(mean, " (", SD, ")"),
    ) %>%
    dplyr::select(grouping_column, n_prop, mean_sd, SEM) %>%
    ungroup -> smryDta
  
  if (useColname) 
    smryDta <- smryDta %>%  
    rename_at(
      vars(-one_of(quo_name(summary_column))), 
      ~paste(quo_name(summary_column), .x, sep="_")
    ) 
  smryDta <- smryDta %>%
    rename_at(1, ~paste(quo_name(group_column), overall_p_value_result))
  
  
  return(smryDta)
}

# Linear means test function for raw scores
  # lm_demo_test <- function(scales, demo_var){
  #   mod_diff <- lm(!!scales ~ !!demo_var, data = adult)
  #   mod_same <- lm(!!scales ~ 1, data = adult)
  #   p_val <- round(anova(mod_diff, mod_same)$`Pr(>F)`, 4)
  #   # summary(res.man)
  #   if(p_val < .05 & p_val >= .01){
  #     # Can shoose to print full table or summary
  #      # summary.aov(res.man)
  #     paste("*, p =", p_val)
  #      # data.frame(unclass(summary(res.man)), check.names = FALSE, stringsAsFactors = FALSE)
  #   } else if (p_val < .01 & p_val >= .0001) {
  #       paste("**, p =", p_val)
  #   }  else if (p_val < .0001) {
  #       paste("***, p < 0.001")
  #   }  else {
  #     paste("No significant difference, p =", round(summary(res.man)$stats[1,6], 2))
  #   }
  # }
  # 
  # sapply(scale_score, function(scales) lm_test(demo_variable, scales))

# myHeader <- list(c(" " = 1, "Color Raw Score" = 3),c(" " = 1, "Word Raw Score" = 3),c(" " = 1, "Color-Word Raw Score" = 3))

interpretation_headers <- function(scale_name, interpretation, group_column_title) {
  paste(scale_name, interpretation)
}

table_maker <- function(dataset, group_column, group_column_title){
  group_column   <- enquo(group_column)
  myHeader <-mapply(interpretation_headers, scale_list_int, manova_scales_p_value_results_func(dataset, !!group_column))
  myHeader <- data.frame(names = myHeader, colspan = 2)
  myHeader <- rbind(c(" ", 2), myHeader)
  myHeader$colspan <- as.numeric(myHeader$colspan)
  
  # Summary of Age Breakdown
  summary_df_list <- lapply(1:length(scale_score_int), {function(i)
      dataset %>% generate_summary_tbl(!!group_column, scale_score_int[[i]])
  }
  )
  cbind.data.frame(summary_df_list[[1]], lapply(2:length(scale_score_int), function(i) summary_df_list[[i]][,3:4])) %>%
        kable(col.names = c(names(summary_df_list[[1]])[1], "*N* (%)", rep(c("Mean (SD)", "SEM"),length(scale_score_int))),  caption = table_counter_func(group_column_title), booktabs = T) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position="left", font_size = 10) %>%
        add_header_above(header = myHeader) %>%
        column_spec(c(3,5,7, 9), border_left = "1px solid gray") %>%
        footnote(general = "NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")
}

# Function to plot bar plots for each Raw Score by Demographic Variables
  demographics_function_plot <- function(demo_var){
    adult %>%
      dplyr::select(UQ(demo_var), CRScore, WRScore, CWRscore, Interference) %>%
      gather("Scale", "Raw Score", -UQ(demo_var), convert = T) %>%
      mutate(Scale = dplyr::recode(Scale, 'CRScore' = "Color Raw Score", 'WRScore' = "Word Raw Score", 'CWRscore' = "Color-Word Raw Score", 'Interference' = "Interference")) %>%
      arrange(match(Scale, c("Word Raw Score", "Color Raw Score", "Color-Word Raw Score", "Interference"))) %>%
      mutate(Scales = factor(Scale, levels =c("Color Raw Score", "Word Raw Score","Color-Word Raw Score", "Interference"))) %>%
      ggplot(aes(Scales, `Raw Score`, fill = UQ(demo_var)))+
      geom_col(position = "dodge") +
      theme_bw()+
      facet_wrap(~Scales,scales = "free_x")
    # One graph only
      # group_by(Ethnic) %>%
      # ggplot(stroop_adult, aes(x = PrimeDX, y = IntTscore)) +
      # geom_bar(aes(x = PrimeDX, y = IntTscore, fill = as.factor(PrimeDX)), stat = "summary", fun.y = "mean")
  }

demo_table <- lapply(1:length(group_column), function(i){
    table_maker(adult, !!group_column[[i]], group_column_title[i])
  }
)
demo_plot <- lapply(group_column, function(demo_var){
  demographics_function_plot(demo_var)
}
)
manova_column_header <- setNames(data.frame(matrix(ncol = 5, nrow = 0)),paste0("col_name_", c(1:5))) %>%
        kable(col.names = c("MANOVA Test Results", paste(score_names_int, "ANOVA")),  caption = table_counter_func("Test of Differences for following columns"), booktabs = T) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position="left", font_size = 12) %>%
        add_header_above(header = c(" " = 1, "Results" = 4))
  cat("<div class = 'row h-100'>")

    cat("<div class = 'col-md-6'>")

      print(manova_column_header)

*Table* 15: Test of Differences for following columns
	Results
MANOVA Test Results	Color Raw Score ANOVA	Word Raw Score ANOVA	Color-Word Raw Score ANOVA	Interference ANOVA

    cat("</div>")

  cat("</div>")

  cat("<hr>")

for (i in 1:length(group_column)){
  cat("<div class = 'row h-100'>")
    cat("<div class = 'col-sm-6'>")
      print(demo_table[[i]])
    cat("</div>")
    cat("<div class = 'col-sm-6' style = 'margin-top: 150px'>")
      print(demo_plot[[i]])
    cat("</div>")
  cat("</div>")
  cat("<hr>")
}

*Table* 8: Age
		Color Raw Score ***, p < 0.001		Word Raw Score *, p = 0.0196		Color-Word Raw Score ***, p < 0.001		Interference **, p = 0.0062
age ***, p < 0.001	N (%)	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM
15-19	46 (12.33%)	73.59 (12.86)	1.90	98.63 (15.47)	2.28	44.98 (10.42)	1.54	3.09 (7.69)	1.13
20-29	98 (26.27%)	74.8 (13.31)	1.34	103.97 (14.96)	1.51	45.17 (8.59)	0.87	1.83 (6.22)	0.63
30-39	68 (18.23%)	76.69 (14.43)	1.75	107.35 (16.76)	2.03	43.94 (11.71)	1.42	-0.46 (8.56)	1.04
40-49	78 (20.91%)	74.18 (13.95)	1.58	100.83 (17.57)	1.99	41.79 (13.04)	1.48	-0.55 (12.15)	1.38
50-59	48 (12.87%)	71.19 (12.27)	1.77	101.94 (17.12)	2.47	39.79 (8.61)	1.24	-2.1 (8.79)	1.27
60-69	24 (6.43%)	66.29 (8.6)	1.76	98.46 (14.89)	3.04	35.71 (8.2)	1.67	-0.67 (13.71)	2.80
70-79	7 (1.88%)	58.71 (7.85)	2.97	93 (8.49)	3.21	28.14 (8.15)	3.08	-7.86 (7.69)	2.91
80-89	4 (1.07%)	52.5 (14.39)	7.19	89.75 (7.68)	3.84	24.75 (6.9)	3.45	-8 (3.74)	1.87
Note:
NS = P > 0.05, * = P <= .05, = P <= .01, * = P < 0.001

*Table* 9: Gender
		Color Raw Score NS, p = 0.2732		Word Raw Score NS, p = 0.3811		Color-Word Raw Score NS, p = 0.4947		Interference NS, p = 0.7944
Gender NS, p = 0.7459	N (%)	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM
Male	149 (39.95%)	72.36 (13.37)	1.10	101.39 (15.72)	1.29	41.9 (10.79)	0.88	-0.04 (9.03)	0.74
Female	224 (60.05%)	73.94 (13.78)	0.92	102.9 (16.7)	1.12	42.7 (11.19)	0.75	0.22 (9.63)	0.64
Note:
NS = P > 0.05, * = P <= .05, = P <= .01, * = P < 0.001

*Table* 10: Ethnicity
		Color Raw Score NS, p = 0.5943		Word Raw Score NS, p = 0.3279		Color-Word Raw Score NS, p = 0.3575		Interference NS, p = 0.2484
Ethnic NS, p = 0.1326	N (%)	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM
Caucasian	249 (66.76%)	73.85 (13.66)	0.87	102.69 (16.06)	1.02	42.85 (10.83)	0.69	0.25 (8.77)	0.56
African-American	35 (9.38%)	73.94 (14.2)	2.40	100.11 (17.29)	2.92	39.14 (11.17)	1.89	-3.23 (8.06)	1.36
Asian	15 (4.02%)	72.2 (15.6)	4.03	104.33 (22.95)	5.93	44.13 (13.85)	3.57	1.8 (9.17)	2.37
Hispanic	54 (14.48%)	70.26 (12.22)	1.66	99.2 (16.28)	2.22	41.04 (9.44)	1.28	0.3 (7.64)	1.04
Native-American	4 (1.07%)	71.5 (17.67)	8.84	104 (6.38)	3.19	42.5 (7.55)	3.77	0.25 (3.86)	1.93
Other	16 (4.29%)	75.38 (14.05)	3.51	109.12 (10.79)	2.70	44.94 (15.52)	3.88	3.12 (20.9)	5.23
Note:
NS = P > 0.05, * = P <= .05, = P <= .01, * = P < 0.001

*Table* 11: Education
		Color Raw Score **, p = 0.0006		Word Raw Score ***, p < 0.001		Color-Word Raw Score **, p = 0.0035		Interference NS, p = 0.0778
SelEdGrp ***, p < 0.001	N (%)	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM
No HS Diploma	45 (12.06%)	68.78 (14)	2.09	93.98 (16.1)	2.40	42.76 (10.35)	1.54	3.33 (7.16)	1.07
HS Diploma	73 (19.57%)	70.6 (13.4)	1.57	96.14 (15.25)	1.79	38.19 (9.67)	1.13	-2.14 (8.26)	0.97
Some College	93 (24.93%)	72.26 (12.55)	1.30	103.01 (15.46)	1.60	41.96 (10.54)	1.09	-0.2 (8.38)	0.87
BA	111 (29.76%)	77.48 (13.54)	1.29	105.86 (13.45)	1.28	44.27 (11.25)	1.07	0.27 (10.61)	1.01
Post-grad	49 (13.14%)	74.63 (13.57)	1.94	110.2 (19.48)	2.78	45.02 (12.45)	1.78	0.76 (11.01)	1.57
Another Value	2 (0.54%)	59.5 (2.12)	1.50	90 (2.83)	2.00	36.5 (7.78)	5.50	0.5 (6.36)	4.50
Note:
NS = P > 0.05, * = P <= .05, = P <= .01, * = P < 0.001

*Table* 12: Region
		Color Raw Score NS, p = 0.9414		Word Raw Score **, p = 0.0007		Color-Word Raw Score NS, p = 0.1347		Interference **, p = 0.0005
Region **, p = 0.0001	N (%)	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM
Northeast	197 (52.82%)	73.03 (13.51)	0.96	105.27 (16.45)	1.17	41.29 (11.24)	0.80	-1.53 (8.81)	0.63
Midwest	5 (1.34%)	74.8 (4.66)	2.08	101.6 (10.92)	4.88	46.8 (2.77)	1.24	3.8 (3.03)	1.36
South	126 (33.78%)	73.33 (13.84)	1.23	97.63 (16.5)	1.47	44.02 (11.44)	1.02	2.82 (10.77)	0.96
West	45 (12.06%)	74.33 (14.39)	2.15	102.44 (12.54)	1.87	42.02 (8.79)	1.31	-0.67 (5.72)	0.85
Note:
NS = P > 0.05, * = P <= .05, = P <= .01, * = P < 0.001

*Table* 13: Community
		Color Raw Score NS, p = 0.8988		Word Raw Score ***, p < 0.001		Color-Word Raw Score NS, p = 0.1167		Interference **, p = 0.0011
Commun ***, p < 0.001	N (%)	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM
Rural	54 (14.48%)	73.09 (15.25)	2.07	93.3 (15.48)	2.11	44.56 (10.64)	1.45	3.94 (9.39)	1.28
Urban	319 (85.52%)	73.35 (13.35)	0.75	103.82 (15.97)	0.89	42.01 (11.06)	0.62	-0.53 (9.24)	0.52
Note:
NS = P > 0.05, * = P <= .05, = P <= .01, * = P < 0.001

*Table* 14: Diagnosis
		Color Raw Score NS, p = 0.0615		Word Raw Score NS, p = 0.2278		Color-Word Raw Score NS, p = 0.5123		Interference NS, p = 0.979
PrimeDX NS, p = 0.84	N (%)	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM	Mean (SD)	SEM
No Dx	329 (88.2%)	74.15 (13.71)	0.76	103 (16.7)	0.92	42.84 (11.15)	0.61	0.2 (9.54)	0.53
LD	1 (0.27%)	61 (NA)	NA	89 (NA)	NA	41 (NA)	NA	5 (NA)	NA
Autism	1 (0.27%)	74 (NA)	NA	110 (NA)	NA	38 (NA)	NA	-6 (NA)	NA
ID	21 (5.63%)	67.95 (12.44)	2.71	97.1 (11.91)	2.60	38.9 (9.59)	2.09	-0.86 (9.31)	2.03
Dementia	4 (1.07%)	62.25 (10.14)	5.07	103.5 (10.6)	5.30	37.25 (11.24)	5.62	-1.25 (7.85)	3.92
ADHD	1 (0.27%)	62 (NA)	NA	94 (NA)	NA	35 (NA)	NA	-2 (NA)	NA
ESL	2 (0.54%)	54 (9.9)	7.00	75 (4.24)	3.00	30 (1.41)	1.00	-1 (1.41)	1.00
TBI	10 (2.68%)	71.2 (9.99)	3.16	97.1 (9.45)	2.99	39.8 (11.83)	3.74	-1.2 (9.1)	2.88
Another Value	4 (1.07%)	64 (9.27)	4.64	101 (15.17)	7.58	43.25 (3.86)	1.93	4.25 (3.3)	1.65
Note:
NS = P > 0.05, * = P <= .05, = P <= .01, * = P < 0.001

Raw Scores Across Age

The variable Age was grouped into groups with ranges of 10 years for convenience in the tables above. MANOVA showed significant differences among groups. Also, age is known from research with the Stroop and similar instruments to have a significant relationship with Stroop performance. Age for each participant was expressed as a decimal value using years and months, with days truncated (that is, months were rounded down in all cases). As Age is a continuous variable, Stroop scores with respect to Age were examined (note: the directly measured Stroop Scores- Color, Word, and Color-Word were examined here, while the calculated score Interference was examined separately below).

## Following function to construct the regression equation for each model

    reg_eq <- function(mod) {paste(c(round(summary(mod)$coefficients[1,1],4), mapply(function(coefficient_names, coefficients) paste("+", coefficient_names, "*", round(coefficients,4)), coefficient_names = names(summary(mod)$coefficients[2:length(summary(mod)$coefficients[,1]),1]), coefficients = summary(mod)$coefficients[2:length(summary(mod)$coefficients[,1]),1])), collapse = " ")}

## Following constructs dataframe with important values for any model
# Need following terms
 # final_random_effects_model <- lmer model, like: lmer(CRScore ~ decimal_age + I(decimal_age^2) + SelEdGrp + (1|PrimeDX), data = adult)
  
  # Choose best lm model on RHS
  # chosen_lm_model <- mod2
  
  # Model vs. less restrictive
  # critical_comparison <- anova(mod1, mod2)

model_dataframe <- function(final_random_effects_model, chosen_lm_model, critical_comparison){
  
  # test for random effects
  random_effects_ranova <- lmerTest::ranova(final_random_effects_model)

  # Identifies included random effects and p-values and collapses
  random_effects_terms <- paste(attributes(random_effects_ranova)$row.names[-1], sep = ",")
  random_effects_terms_p_value <- paste(round(random_effects_ranova[,6][-1],4), sep = ",")
  
  # Best fitting model, test for heterskedasticity- get p-value
  bp_p.value <- lmtest::bptest(chosen_lm_model)$p.value
  
  # Regression equation
  regression_equation <- reg_eq(chosen_lm_model)
  
  model_stats_df <- data.frame(equation = regression_equation, model_significance = round(critical_comparison[,6][2],4),
                                  random_effects = random_effects_terms, random_effects_p = random_effects_terms_p_value, 
                                  bp_test_p.value = round(bp_p.value,4))
  row.names(model_stats_df) <- NULL
  return(model_stats_df)
}

Models to Predict Raw Scores

Models were constructed to predict each Stroop score. The models were then used to construct scaled scores for each individual raw score, beginning with the Direct Measure scores.

Summary Table of Models and Diagnostic Measures

As can be seen in the plots, Stroop performance across Raw scores seemed to remain steady, or slightly increase, through early and into mid-adulthood before declining beginning in mid- to late-adulthood. The relationship between Age and Scores may be nonlinear, so in models constructed below, quadratic and cubic effects were considered.

In addition to Age, Stroop performance was seen to vary significantly by Education level, which was used in previous editions to contextualize performance.

Mixed effects models were constructed for each of the Raw Scores, with Age and Education as predictors and other demographic variables entered as random effects. Full models with interactions between Age and Education and all demographic variables were constructed, along with Age with polynomial effects.

Terms were removed from the models and the fuller and reduced models were compared with maximum likelihood analyses. Maximum likelihood analysis has advantages in analysis compared to Wald-based methods of:

Being more robust to deviations from normalcy and tolerating unbalanced groups
Allowing for missing data (though there were none in this sample)
Incorporating full information from the models to allow for precise standard error estimation.

At each step of model construction full models with significant differences compared to reduced models were seen to contain important variables and adopted. When models were not significantly different the more parsimonious model was used. The following table contains the final regression equations, significance compared to fuller model, and random effects found to be significant. Because six random effects and each effects’ interaction with Age was considered, a more conservative alpha level was set using a Bonferonni correction of .05/number of copmarisons(12)*number of Scores(3) = 0.0014.

Random effects that had associated p-values below 0.05 were identified. It can be seen that none had p-values below the set alpha level, so all random effects were dropped from models.

In addition, a Breusch-Pagan (BP) Test was conducted for each model to test for heteroskedasticity. All results were above alpha = 0.05, so all models were considered not to violate assumptions of homogeneity.

cbind.data.frame(score_names, bind_rows(color_model_dataframe, word_model_dataframe, cw_model_dataframe)) %>%
        kable(col.names = c("Scale", "Regression Equation", "Significance Pr(>F)", "Included Random Effects", "Random Effects P-Value", "BP-Test P-Value"),  caption = table_counter_func("Regression Model Diagnostics for Stroop Raw Scores"), booktabs = T) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

*Table* 16: Regression Model Diagnostics for Stroop Raw Scores
Scale	Regression Equation	Significance Pr(>F)	Included Random Effects	Random Effects P-Value	BP-Test P-Value
Color Raw Score	67.4738 + decimal_age * 0.235 + I(decimal_age^2) * -0.0057 + SelEdGrpHS Diploma * 4.4776 + SelEdGrpSome College * 4.6917 + SelEdGrpBA * 10.7539 + SelEdGrpPost-grad * 8.887 + SelEdGrpAnother Value * -5.6689	0.0273	(1 \| PrimeDX)	0.0084	0.4498
Word Raw Score	98.9497 + decimal_age * -0.2043 + SelEdGrpHS Diploma * 5.3024 + SelEdGrpSome College * 11.2381 + SelEdGrpBA * 15.1596 + SelEdGrpPost-grad * 20.3274 + SelEdGrpAnother Value * 0.9523	0.0000	(1 \| Region)	0.0007	0.1699
Color-Word Raw Score	49.2216 + decimal_age * -0.2658 + SelEdGrpHS Diploma * -0.476 + SelEdGrpSome College * 2.0692 + SelEdGrpBA * 5.7824 + SelEdGrpPost-grad * 7.5984 + SelEdGrpAnother Value * 0.1562	0.0000	(1 \| Commun)	0.0197	0.6097

Plots of Predicted Scores and Residuals

The following plots represent the models above, with fitted Raw Score values plotted across age, allowing for differences by Education group. As was identified in the table above, Age was found to have a significant quadratic term as a predictor of Color Raw score, which explains the nonlinear lines in that plot.

In addition, diagnostic plots are provided, in which residuals are plotted against fitted values and age. It does not appear that there is any heteroskedasticity, as identified by those plots.

Scaled Scores

The models were used to construct predicted Raw Score value tables (Appendix Appendix #). Ideally the above specified regression models would be used to convert given Raw scores to exact Predicted Scores. However, as this edition of the Stroop test is intended to be published in paper format, it is necessary to generate lookup tables usable by clinicians. As such, it is necessary to round the predicted values to the nearest integer. As both Age and Education were found to significantly predict Raw Scores, both were used to construct tables. For each Raw Score, a separate table was constructed for each Education level. Bands of Age values scores were then assigned to corresponding integer predicted Raw score values in the range of scores obtained from the sample. The Age values associated with a Raw score did not have regular intervals. This is a departure from the previous edition of the Stroop, where Age values were consistently grouped with ranges of five years (e.g., “21-25”, “26-30”). It is anticipated that this will allow more precise esitmation of predicted scores. This assertion will be evaluated, as follows.

Color

color_se_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score_se <- round(predict(c_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$se.fit,2)
    data.frame(age, predicted_score_se, degree)
    }, degree = degree
    )
}
  )
color_fit_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score<- round(predict(c_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 2)
    cbind(age, predicted_score, degree)
    }, degree = degree
    )
}
  )

color_fit_df_list <- lapply(color_fit_list, function(x) (do.call(rbind,x)))
color_fit_df <- do.call(rbind.data.frame, c(color_fit_df_list, make.row.names = F))
color_fit_df$age <- as.numeric(color_fit_df$age)
color_fit_df$predicted_score <- as.numeric(color_fit_df$predicted_score)
color_fit_df$degree <- factor(color_fit_df$degree, levels = levels(adult$SelEdGrp))

color_age_plot <- ggplot() +
  geom_point(data = adult, aes(x = decimal_age, y = CRScore, color = SelEdGrp)) +
         geom_smooth(data = color_fit_df, aes(x = age, y = predicted_score, color = degree)) +
  scale_x_continuous(breaks = seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 5)) +
  scale_y_continuous(breaks = seq(min(adult$CRScore) - min(adult$CRScore) %%5, max(adult$CRScore), 5)) +
  labs(color = "Education Level") +
  xlab("Age") +
  ylab("Color Raw Score") +
  ggtitle(figure_counter_func("Predicted Color Scores \n by Education Across Ages"))

res <- residuals(c_mod)
yhat <- fitted(c_mod)

  cat("<div class = 'row h-100'>")

    cat("<div class = 'col-md-6' style = 'margin-top: 150px'>")

      color_age_plot

    cat("</div>")

    cat("<div class = 'col-md-6'>")

      plot(yhat,res, xlab="fitted values", ylab="residuals")

      plot(adult$decimal_age,res, xlab="age", ylab="residuals")

    cat("</div>")

  cat("</div>")

Word

word_se_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score_se <- round(predict(w_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$se.fit,2)
    data.frame(age, predicted_score_se, degree)
    }, degree = degree
    )
}
  )
word_fit_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score<- round(predict(w_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 2)
    cbind(age, predicted_score, degree)
    }, degree = degree
    )
}
  )

word_fit_df_list <- lapply(word_fit_list, function(x) (do.call(rbind,x)))
word_fit_df <- do.call(rbind.data.frame, c(word_fit_df_list, make.row.names = F))
word_fit_df$age <- as.numeric(word_fit_df$age)
word_fit_df$predicted_score <- as.numeric(word_fit_df$predicted_score)
word_fit_df$degree <- factor(word_fit_df$degree, levels = levels(adult$SelEdGrp))

word_age_plot <- ggplot() +
  geom_point(data = adult, aes(x = decimal_age, y = WRScore, color = SelEdGrp)) +
         geom_smooth(data = word_fit_df, aes(x = age, y = predicted_score, color = degree)) +
  scale_x_continuous(breaks = seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 5)) +
  scale_y_continuous(breaks = seq(min(adult$WRScore) - min(adult$WRScore) %%5, max(adult$WRScore), 5)) +
  labs(color = "Education Level") +
  xlab("Age") +
  ylab("Word Raw Score") +
  ggtitle(figure_counter_func("Predicted Word Scores \n by Education Across Ages"))

res <- residuals(w_mod)
yhat <- fitted(w_mod)

  cat("<div class = 'row h-100'>")

    cat("<div class = 'col-md-6' style = 'margin-top: 150px'>")

      word_age_plot

    cat("</div>")

    cat("<div class = 'col-md-6'>")

      plot(yhat,res, xlab="fitted values", ylab="residuals")

      plot(adult$decimal_age,res, xlab="age", ylab="residuals")

    cat("</div>")

  cat("</div>")

Color-Word

color_word_se_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score_se <- round(predict(cw_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$se.fit,2)
    data.frame(age, predicted_score_se, degree)
    }, degree = degree
    )
}
  )
color_word_fit_list <- lapply(levels(adult$SelEdGrp), function(degree) {
  lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
    predicted_score<- round(predict(cw_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 2)
    cbind(age, predicted_score, degree)
    }, degree = degree
    )
}
  )

color_word_fit_df_list <- lapply(color_word_fit_list, function(x) (do.call(rbind,x)))
color_word_fit_df <- do.call(rbind.data.frame, c(color_word_fit_df_list, make.row.names = F))
color_word_fit_df$age <- as.numeric(color_word_fit_df$age)
color_word_fit_df$predicted_score <- as.numeric(color_word_fit_df$predicted_score)
color_word_fit_df$degree <- factor(color_word_fit_df$degree, levels = levels(adult$SelEdGrp))

color_word_age_plot <- ggplot() +
  geom_point(data = adult, aes(x = decimal_age, y = CWRscore, color = SelEdGrp)) +
         geom_smooth(data = color_word_fit_df, aes(x = age, y = predicted_score, color = degree)) +
  scale_x_continuous(breaks = seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 5)) +
  scale_y_continuous(breaks = seq(min(adult$CWRscore) - min(adult$CWRscore) %%5, max(adult$CWRscore), 5)) +
  labs(color = "Education Level") +
  xlab("Age") +
  ylab("Color-Word Raw Score") +
  ggtitle(figure_counter_func("Predicted Color-Word Scores \n by Education Across Ages"))

res <- residuals(cw_mod)
yhat <- fitted(cw_mod)

  cat("<div class = 'row h-100'>")

    cat("<div class = 'col-md-6' style = 'margin-top: 150px'>")

      color_word_age_plot

    cat("</div>")

    cat("<div class = 'col-md-6'>")

      plot(yhat,res, xlab="fitted values", ylab="residuals")

      plot(adult$decimal_age,res, xlab="age", ylab="residuals")

    cat("</div>")

  cat("</div>")

# # Test for age and education interaction- don't model Interference Predicted Scores
#   mod1 <- lm(IntPrescore ~ decimal_age + SelEdGrp, data = adult)
#   mod3 <- lm(IntPrescore ~ decimal_age*SelEdGrp, data = adult)
#   anova(mod1, mod3)
# 
#   # Test for quadratic age
#   mod2 <- lm(IntPrescore ~ decimal_age + I(decimal_age^2) + SelEdGrp, data = adult)
#   anova(mod1, mod2)
# 
#   # Test for Cubic Effect
#   mod3 <- lm(IntPrescore ~ decimal_age + I(decimal_age^2) + I(decimal_age^3) + SelEdGrp, data = adult)
#   anova(mod2, mod3)
#   anova(mod1, mod3)
#   # Test Interaction age and education
# 
#   mod_all_rand <- lmer(IntPrescore ~ decimal_age + I(decimal_age^2) + SelEdGrp + (1|Gender:decimal_age) + (1|Gender) + (1|Drugs:decimal_age) + (1|Drugs) + (1|PrimeDX:decimal_age) + (1|PrimeDX) + (1|Commun:decimal_age) + (1|Commun) + (1|Region:decimal_age) + (1|Region) + (1|Ethnic:decimal_age) + (1|Ethnic), data = adult)
#   lmerTest::ranova(mod_all_rand)
#   int_mod_rand <- lmer(IntPrescore ~ decimal_age + SelEdGrp + (1|PrimeDX), data = adult)
#   lmerTest::ranova(int_mod_rand)
#   int_mod <- mod1
# lmtest::bptest(int_mod)

Compare New Prediction Models to Previous

These new models’ accuracy in predicting Stroop scores were compared to algorithms used in the previous version of the Stroop Test. The absolute value of the prediction from actual score for each participant in the sample were calculated as the distance of the actual value from predicted value. The mean of these distances were then calculated for each score for previous and new models and compared.

predicted_score_fit_mean<- function(model, raw_score) {
  round(mean(abs(round(predict(model, data.frame(decimal_age = adult$decimal_age, SelEdGrp = adult$SelEdGrp), se.fit=TRUE)$fit, 0) -
    adult[[raw_score]])), 4)
}
raw_score_mods <- list(c_mod, w_mod, cw_mod)

new_model_deviation <- mapply(predicted_score_fit_mean, raw_score_mods, raw_score_column_names)

old_model_deviation <- round(c(mean(abs(adult$CRESscore)), mean(abs(adult$WRESscore)), mean(abs(adult$CWRESscore))),4)

difference <- old_model_deviation - new_model_deviation

deviation_df <- rbind.data.frame(new_model_deviation, old_model_deviation, difference)
rownames(deviation_df) = c("New Stroop Models", "Old Stroop Models", "Difference Between Old and New Models Average Residuals")
deviation_df %>%
  kable(col.names = c("Color", "Word", "Color-Word"), caption = table_counter_func("Residuals of New and Old Stroop Predicted Score Models")) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

*Table* 17: Residuals of New and Old Stroop Predicted Score Models
	Color	Word	Color-Word
New Stroop Models	9.7480	11.6300	7.5657
Old Stroop Models	10.8606	12.0295	8.2118
Difference Between Old and New Models Average Residuals	1.1126	0.3995	0.6461

As can be seen in the above table the new Stroop models all produced predictions that were on average closer to actual scores than the previous version of the Stroop. These predicted scores are in Appendix:

Following is in Appendix

Predicted Score Tables for Color, Word, Color-Word

#   prediction_list_function <- function(model){
#     predicted <- lapply(levels(adult$SelEdGrp), function(degree){
#         out_df <- sapply(seq(15, 90, 1), function(age) {
#         round(predict(model, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 0)
#         }
#         )
#          names(out_df) <- seq(15, 90, 1)
#         # row.names(out_df) <- seq(20, 130, 5)
#         return(out_df)
#     }
#     )
#     names(predicted) <- levels(adult$SelEdGrp)
#     return(predicted)
#   }
# mod_list <- list(c_mod, w_mod, cw_mod)
# predicted_score_list <- lapply(1:3, function(i){
#   # model <- as.formula(model)
#   predict_list <- prediction_list_function(mod_list[[i]])
#   
#   lapply(predict_list, function(predicted_list){
#       not_duplicated_score_names <- as.numeric(names(predicted_list[!duplicated(predicted_list)]))
#       upper_range <- not_duplicated_score_names -1
#       names <- c(upper_range[-1], ">")
#       not_duplicated_score_names <- paste(not_duplicated_score_names, names, sep = "-")
#       not_duplicated_scores <- predicted_list[!duplicated(predicted_list)]
#       not_duplicated_df <- cbind.data.frame(not_duplicated_score_names, not_duplicated_scores)
#     }
#     )
# }
# )
#   names(predicted_score_list) <- score_names
# 
#   cat("<div class = 'row h-100'>")
#     cat("<div class = 'col-md-4'>")
#       cat("<h3>Color</h3>")
#     cat("</div>")
#     cat("<div class = 'col-md-4'>")
#       cat("<h3>Word</h3>")
#     cat("</div>")
#     cat("<div class = 'col-md-4'>")
#       cat("<h3>Color-Word</h3>")
#     cat("</div>")
#   cat("</div>")
#   
# lapply(1:length(levels(adult$SelEdGrp)), function(i){
#   cat("<div class = 'row h-100'>")
#   cat(paste0("<h4>",levels(adult$SelEdGrp)[i], "</h4>"))
#   lapply(1:3, function(j){
#     cat("<div class = 'col-md-4'>")
#       print(predicted_score_list[[j]][i] %>%
#             kable(row.names = F, col.names = c("Age", "Predicted Score")) %>%
#             kable_options())
#       cat("\n")
#     cat("</div>")
#   })
#     cat("\n")
#   cat("</div>")
#   cat("<hr>")
# })

Stop Appendix

Residual T-Scores

Next, T-score tables were constructed for residuals, that is, the predicted score for an individual subtracted from the actual score that individual obtained for each scale. Scores were examined for normality and found to be sufficiently normal. Tables can be found in Appendix # appendix

word_predicted_scores <- predict(w_mod)
# Compare to current predicted scores
word_deviation <- adult$WRScore - word_predicted_scores
  t_score_func <- function(raw_score, scale) {
    # scale   <- enquo(scale)
    round(((raw_score - mean(scale))/sd(scale))*10 + 50, 0)
  }
 word_t_scores <- sapply(seq(-50, 50, 1), t_score_func, scale = word_deviation)

color_predicted_scores <- predict(c_mod)
# Compare to current predicted scores
color_deviation <- adult$CRScore - color_predicted_scores
  color_t_scores <- sapply(seq(-50, 50, 1), t_score_func, scale = color_deviation)

color_word_predicted_scores <- predict(cw_mod)
# Compare to current predicted scores
color_word_deviation <- adult$CWRscore - color_word_predicted_scores
color_word_t_scores <- sapply(seq(-50, 50, 1), t_score_func, scale = color_word_deviation)

scale_deviation_list <- list(color_deviation, word_deviation, color_word_deviation)
 cbind.data.frame(score_names, do.call(bind_rows, lapply(1:length(scale_deviation_list), function(i) #shapiro_test(scale_deviation_list[[i]]))) %>%
   c(agostino.test(scale_deviation_list[[i]])$statistic, p = agostino.test(scale_deviation_list[[i]])$p.value)))) %>%
 mutate_if(is.numeric, round, digits = 3)%>%
  kable(col.names = c("Scale", "Skewness", "Z-Value", "P")) %>%
  kable_options()

Scale	Skewness	Z-Value	P
Color Raw Score	0.017	0.136	0.892
Word Raw Score	-0.203	-1.616	0.106
Color-Word Raw Score	0.394	3.062	0.002

cbind.data.frame(mapply(function(scale, scale_name) {
  set_names(as.data.frame(scale), scale_name)
  }, scale_deviation_list, score_names)
) %>%
  pivot_longer(., everything(), "Scale") %>%
  arrange(match(Scale, score_names)) %>%
  mutate(Scales = factor(Scale, levels = score_names)) %>%
  ggplot(aes(value)) + #ggplot(., aes(x = Scales))+
        geom_bar(stat = "bin") +
        theme_bw() +
        facet_wrap(~Scales,scales = "free_x") +
        ggtitle(figure_counter_func("Residual Distributions")) + 
        ylab("Number of Participants")

Insert to Appendix #

T-Scores for Word, Color, Color-Word

# data.frame(raw_deviation_score = seq(-50, 50, 1), word_t = word_t_scores, color_t = color_t_scores, color_word_t = color_word_t_scores) %>%
#   kable(col.names = c("Raw Deviation", "Word", "Color", "Color-Word")) %>%
#   kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
#   add_header_above(c(" ", "T-Scores" = 3))

Stop Appendix

Interference Scores

Interference scores were then examined to determine the best model for predicting scores.

# Plot with intercept
ggplot(adult, aes(x = decimal_age, y = Interference)) +
  geom_smooth() +
  geom_hline(aes(yintercept=mean(Interference), color= "Overall Mean"),
            linetype="dashed") +
  ggtitle(figure_counter_func("Interference Scores Across Age")) +
  xlab("Age") +
  labs(color = "Statistic") +
  scale_x_continuous(breaks = seq(15,90,5))

The model with Age term as a predictor of Interference is the best fitting model (P < 0.0001), but a continuous age term cannot be used in paper tables. Nonetheless, using this model can help us identify if it is appropriate to create a segmented model with age as a predictor of Interference scores, and identify that break-point, which was found to be 68.

A segmented model was constructed and was compared to one without the break and was found to be significantly better (p = 0.0081).

group_mean <- adult %>%
  group_by(Xbar) %>%
  summarize(sd = round(sd(Interference),2),
            Interference = mean(Interference),
            decimal_age = mean(decimal_age)) %>%
  ungroup() %>%
  mutate(age_group = dplyr::recode(Xbar, '0' = paste("Under", age_break), '1' = paste("Over", age_break)))

ggplot(my.model, aes(x = age, y = Interference)) + 
  geom_line() + 
  geom_label(data = group_mean, aes(x = decimal_age, y = Interference, label = paste0(round(Interference,2), "(", sd,")"), color = age_group)) +
   ggtitle(figure_counter_func("Interference Scores Across \n Age Segmented at Break Point")) +
  xlab("Age") +
  labs(color = "Age Group Mean Score (SD)") +
  scale_x_continuous(breaks = seq(15,90,10))

# interference_se_list <- lapply(levels(adult$SelEdGrp), function(degree) {
#   lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
#     predicted_score_se <- round(predict(int_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$se.fit,2)
#     data.frame(age, predicted_score_se, degree)
#     }, degree = degree
#     )
# }
#   )
# interference_fit_list <- lapply(levels(adult$SelEdGrp), function(degree) {
#   lapply(seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 1), function(age, degree) {
#     predicted_score<- round(predict(int_mod, data.frame(decimal_age = age, SelEdGrp = degree), se.fit=TRUE)$fit, 2)
#     cbind(age, predicted_score, degree)
#     }, degree = degree
#     )
# }
#   )
# 
# interference_fit_df_list <- lapply(interference_fit_list, function(x) (do.call(rbind,x)))
# interference_fit_df <- do.call(rbind.data.frame, c(interference_fit_df_list, make.row.names = F))
# interference_fit_df$age <- as.numeric(interference_fit_df$age)
# interference_fit_df$predicted_score <- as.numeric(interference_fit_df$predicted_score)
# interference_fit_df$degree <- factor(interference_fit_df$degree, levels = levels(adult$SelEdGrp))
# 
# ggplot() +
#   geom_point(data = adult, aes(x = decimal_age, y = Interference, color = SelEdGrp)) +
#          geom_smooth(data = interference_fit_df, aes(x = age, y = predicted_score, color = degree)) +
#   scale_x_continuous(breaks = seq(floor(min(adult$decimal_age)), ceiling(max(adult$decimal_age)), 5)) +
#   scale_y_continuous(breaks = seq(min(adult$Interference) - min(adult$Interference) %%5, max(adult$Interference), 5)) +
#   labs(color = "Education Level") +
#   xlab("Age") +
#   ylab("Interference Raw Score") +
#   ggtitle("Predicted Interference Scores by Education Across Ages")

adult %>%
  mutate(Xbar = recode(Xbar, '0' = paste('Below', age_break), '1' = paste('At or above', age_break))) %>%
ggplot(., aes(x = Interference, color = Xbar)) +
  geom_bar(stat = "bin", binwidth = 5) +
  ggtitle(figure_counter_func("Histogram of Interference Scores \n by Age Above and Below Breakpoint")) +
  xlab("Interference Raw Score") +
  ylab("Number of Participants") +
  labs(color = "Group")

Data were divided into those individuals at or above the breakpoint and those below. The groups were found to resemble a normal distribution and differences in variances were not significantly different (F = 1.83 p = 0.26). Thus the entire sample standard deviation was used, as constant variance was assumed.

T-Score tables for Interference are presented in Appendix Appendix #

Interference T-Scores

# Interference T-scores
  # int_t_score_func <- function(raw_score, expr) {
  #   sub_adult <- adult %>%
  #   dplyr::filter(., !!rlang::enexpr(expr))
  #   # Convert to t-score
  #   round(((raw_score - mean(sub_adult$Interference))/sd(adult$Interference))*10 + 50, 0)
  # }
  # 
  # young_int_t_score <- data.frame(raw_scores = seq(min(adult$Interference), max(adult$Interference), 1), t_scores = sapply(seq(min(adult$Interference), max(adult$Interference), 1), int_t_score_func, expr = decimal_age <= age_break)) %>%
  #   filter(t_scores >= 20 & t_scores <= 80) %>%
  #   mutate(raw_scores = ifelse(t_scores == 20, paste("<=", raw_scores), ifelse(t_scores == 80, paste(">=", raw_scores), raw_scores)))
  # 
  # old_int_t_score <- data.frame(raw_scores = seq(min(adult$Interference), max(adult$Interference), 1), t_scores = sapply(seq(min(adult$Interference), max(adult$Interference), 1), int_t_score_func, expr = decimal_age > age_break)) %>%
  #   filter(t_scores >= 20 & t_scores <= 80) %>%
  #   mutate(raw_scores = ifelse(t_scores == 20, paste("<=", raw_scores), ifelse(t_scores == 80, paste(">=", raw_scores), raw_scores)))
  # 
  # overall_int_t_score <- data.frame(raw_scores = seq(min(adult$Interference), max(adult$Interference), 1), t_scores = sapply(seq(min(adult$Interference), max(adult$Interference), 1), int_t_score_func, expr = (TRUE)))  %>%
  #   filter(t_scores >= 20 & t_scores <= 80) %>%
  #   mutate(raw_scores = ifelse(t_scores == 20, paste("<=", raw_scores), ifelse(t_scores == 80, paste(">=", raw_scores), raw_scores)))


  
  ## Continuous Norming- not done here b/c quadratic and not enough cases
# library(cNORM)
#   norm_adult <- adult %>% data.frame(personID = row.names(.), raw = .$Interference, group = as.numeric(.$Xbar)) %>% dplyr::select (personID, raw, group)
#   normData <- rankByGroup(norm_adult, group = "group")
#   normData <- computePowers(normData, k = 4, norm = "normValue", age = "group") 
#   model <- bestModel(normData)
#   getNormCurve(50, model, minAge = 15, maxAge = 90, step = 2, minRaw = min(adult$Interference), maxRaw = max(adult$Interference))
#     
#   normData2 <- adult %>% data.frame(personID = row.names(.), raw = .$Interference, group = as.numeric(.$Cyear)) %>% dplyr::select (personID, raw, group) %>% 
#     rankBySlidingWindow(data = ., age = "group", raw = "raw", width = 5)
#   normData2 <- computePowers(normData2, k = 4, norm = "normValue", age = "group")
#     model2 <- bestModel(normData2)
#   getNormCurve(50, model2, minAge = 15, maxAge = 90, step = 2, minRaw = min(adult$Interference), maxRaw = max(adult$Interference))
#   normTable(70, model2, minRaw = -50, maxRaw = 50, 
#                minNorm=20, maxNorm=80, step = 1)

# cat("<div class = 'row h-100'>")
#   cat("<div class = 'col-md-6'>")
#     cat(paste("<h3>Age <=", age_break, "</h3>"))
#     print(young_int_t_score %>%
#             kable(col.names = c("Raw Scores", "T-Scores")) %>%
#             kable_options()) 
#   cat("</div>")
#   cat("<div class = 'col-md-6'>")
#     cat(paste("<h3>Age >", age_break, "</h3>"))
#     print(old_int_t_score %>%
#             kable(col.names = c("Raw Scores", "T-Scores")) %>%
#             kable_options())
#   cat("</div>")
# cat("</div>")

Stop Appendix

Scale Structure

The structure of the Stroop was examined. The raw scores derived from the Color, Word, and Color-Word tasks and those scores calculations to obtain an Interference score were considered separate scales. Their association and the combination of these scales as factors were examined.

Scale Correlation

First the correlation of the scales was examined. As Interference is a calculation from the other three scores, Interference is not independent and, thus, does not meet assumptions typically required for inclusion in some analyses. Nonetheless, its association with the other scales is of interest. Analyses were typically conducted with and without Interference included to examine its association with the other scales (with its inclusion) and to obtain outcome measures fulfilling stricter assumptions (without inclusion).

Correlation Matrix Scatterplot Chart

A correlation matrix was constructed, along with scatterplots of association between scales, and histograms of individual scales, with all combinations of two scales included.

adult %>% 
  dplyr::select(CRScore, WRScore, CWRscore, Interference) %>%
  rename(Color = CRScore, Word = WRScore, Color_Word = CWRscore) %>%
  chart.Correlation()
  title(sub="NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")
  title(main = figure_counter_func("Stroop Correlation Matrix"), line = 3)

Observing the chart above, we can see that Color, Word, and Color-Word have significant approximately moderate correlations with each other. Interference has a strong and significant correlation with Color-Word, but has only very weak correlations with Color and Word individually. That Interference and Color-Word have a strong relationship is intuitive, as Interference is the predicted Color-Word score subtracted from the actual Color-Word score, thus Color-Word score is used in the calculation of Interference. However, the predicted Color-Word score is calculated from Color and Word scores, so there would seemingly be a relationship between Interference and Color or Word. The relationship between individual scales is explored further in the Special Cases section below.

Cronbach Alpha

# Stroop Correlation Matrix- given above, so not used here
# adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% cor(use = "pair") %>% round(2)

cbind.data.frame(scale = c("With Interference", "Without Interference"),
  rbind.data.frame(
    # Stroop Cronbach with Interference
    cbind.data.frame(adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% cronbach()),
  # Stroop Cronbach without Interference
    cbind.data.frame(adult%>% dplyr::select(CRScore, WRScore, CWRscore) %>% cronbach())
  )
) %>%
  mutate_if(is.numeric, round, digits = 3) %>%
  kable(col.names = c("Scales Included", "N", "Number of Scales", "Alpha")) %>%
  kable_options()

Scales Included	N	Number of Scales	Alpha
With Interference	373	4	0.698
Without Interference	373	3	0.768

Both with and without inclusion of Interference, Stroop scales demonstrated acceptable to moderate association, using traditional interpretation criteria of Cronbach alpha.

Bartlett Correlation Test

A Bartlett’s correlation test was conducted. Results of the test indicated the correlation of individual items was significantly different than zero, χ 2(6)= , p= 0. The determinant of the correlation matrix was 0.04, of sufficient value to conduct further analysis of the factor structure of the items.

The correlation of the variables was graphically represented with:

Graphical Representation of Correlation using Principal Components Analysis- 2d

# mdspca
adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% mdspca()
title(main = figure_counter_func("PCA Stroop Correlation"))

Graphical Representation of Correlation using Principal Components Analysis- 3d

# sphpca
adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% sphpca()
title(main = figure_counter_func("\n PCA Stroop \n Correlation 3d"), line = 3, adj = .2)

It can be seen that the Color scale and Word scale seem to have a have a relationship and a triangular relationship with Color-Word. Interference is associated with Color-Word. This is explored further through graphical representation with Interference as a dependent variable and the other three scales as independent.

Graphical Representation of Correlation with Interference as Dependent Variable

# FPCA
fpca(Interference ~ CRScore + WRScore + CWRscore, data = adult)
title(main = figure_counter_func("\n Correlation Interference \n Dependent"))

A Principal Components Analysis (PCA), or Exploratory Factor Analysis, was conducted to further explore the relationship.

Principal Components Analysis

Stroop Unspecified Model

# stroop pca
fit_unspecified <- adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% principal(., rotate="promax")
data.frame(factors = seq(1:length(raw_score_column_names_int)), variance = round(fit_unspecified$values[1:length(raw_score_column_names_int)],2)) %>%
  kable(caption = table_counter_func("Variance of Contrasts of Models with Unspecified Number of Factors"), row.names = F, col.names = c("Number of Factors in Model", "Variance")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

*Table* 18: Variance of Contrasts of Models with Unspecified Number of Factors
Number of Factors in Model	Variance
1	2.23
2	1.37
3	0.37
4	0.04

Stroop One Factor Model

data.frame(matrix(round(as.numeric(fit_unspecified$loadings), 3), attributes(fit_unspecified$loadings)$dim, dimnames=list(all_names[c(1:4)], c("Stroop Test")))) %>% kable(caption = table_counter_func("Stroop PCA One Factor")) %>% kable_options()

*Table* 19: Stroop PCA One Factor
	Stroop.Test
Color	0.806
Word	0.655
Color-Word	0.936
Interference	0.525

cat (paste0('\n', "Examining the variance of the contrasts of models with increasing factors, it can be seen that the greatest amount of variance is in the model with ", which(fit_unspecified$values == max(fit_unspecified$values)), " factor. This is the model with all scores contributing.  However, further models should be evaluated for further information about how scales interact.", '\n' ), sep = " ")

Examining the variance of the contrasts of models with increasing factors, it can be seen that the greatest amount of variance is in the model with 1 factor. This is the model with all scores contributing. However, further models should be evaluated for further information about how scales interact.

Stroop Two Factor Model

fit_two_factor <- adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% principal(., nfactors = 2, rotate="promax")
data.frame(matrix(round(as.numeric(fit_two_factor$loadings), 3), attributes(fit_two_factor$loadings)$dim, dimnames=list(all_names[c(1:4)], c("Processing Speed", "Interference")))) %>% kable(caption = table_counter_func("Stroop PCA Two Factor")) %>% kable_options()

*Table* 20: Stroop PCA Two Factor
	Processing.Speed	Interference
Color	0.879	0.087
Word	0.935	-0.185
Color-Word	0.419	0.799
Interference	-0.260	1.021

The model with two factors yielded chi-square of 168.429.

Stroop Three Factor Model

fit_three_factor <- adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% principal(., nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names[c(1:4)], c("Interference", "Color Processing Speed", "Word Processing Speed")))) %>% kable(caption = table_counter_func("Stroop PCA Three Factor")) %>% kable_options()

*Table* 21: Stroop PCA Three Factor
	Interference	Color.Processing.Speed	Word.Processing.Speed
Color	-0.077	1.054	-0.048
Word	-0.047	-0.046	1.035
Color-Word	0.780	0.296	0.158
Interference	1.040	-0.184	-0.119

The model with three factors yielded chi-square of 209.954.

It is interesting to see how scales separate. When forcing a two-factor model, it appears Color and Word form one factor, while Interference and Color-Word form another. With a three-factor model, Color and Word separate.

A scree plot was constructed to explore the appropriate number of factors the scales assume.

adult%>% dplyr::select(CRScore, WRScore, CWRscore, Interference) %>% scree.plot(., simu = 20, use="P", title = figure_counter_func("Scree Plot"))

It can be seen that a model with two factors maintains an eigenvalue above 1, typically considered the threshold for the number of factors. Taken together, the above analyses suggest that viewing all four scales as one construct is optimal, but that there is support to also conceptualize Stroop scales as assuming two factors, one with Color and Word, seemingly as a measure of raw processing speed, referred to as “Processing Speed” and the other with Color-Word and Interference, measures of the Stroop Effect, referred to as “Stroop.”

Structural Equation Modeling

Structural Equation Modeling was used to further explore the factor structure suggested by PCA above. Alpha, from Cronbach’s Alpha, and Omega, a measure of congeneric, or composite, reliability model statistics are presented. In general, composite reliability is seen to be a more accurate indicator of structure of scales for a number of reasons, including that it is more unbiased than alpha and seen to be more accurate citation

One-Factor Model

  round_df <- function(df, digits) {
  nums <- vapply(df, is.numeric, FUN.VALUE = logical(1))

  df[,nums] <- round(df[,nums], digits = digits)

  (df)
  }
  # Two Factor Model with no Interference
   one_factor.model_interference <-
 '                        total =~ CRScore + WRScore + CWRscore + Interference'
  
  #  Fit the model
  fit_one_factor_Int <- lavaan::cfa(one_factor.model_interference, data = adult)

  one_factor.model_no_interference <-
 '                        total =~ CRScore + WRScore + CWRscore'
  
  #  Fit the model
  fit_one_factor_no_Int <- lavaan::cfa(one_factor.model_no_interference, data = adult)
  cbind.data.frame(c("Interference", "No Interference"), rbind.data.frame(reliability(fit_one_factor_Int, return.total = TRUE)[1:2,], reliability(fit_one_factor_no_Int, return.total = TRUE)[1:2,])) %>%
    round_df(., 3) %>%
    kable(col.names = c("Interference Included", "Alpha", "Omega"), caption = table_counter_func("Stroop One Factor SEM")) %>%
    kable_options()

*Table* 22: Stroop One Factor SEM
Interference Included	Alpha	Omega
Interference	0.698	0.872
No Interference	0.768	0.791

Two-Factor Model with Interference

  # Two Factor Model with no Interference
  two_factor.model_interference <-
'
                       color_word =~ a*CRScore + a*WRScore
                       cw_int =~ b*CWRscore + b*Interference

                        CRScore ~~ CRScore
                        WRScore ~~ WRScore
                        CWRscore ~~ CWRscore
                        Interference ~~ Interference
'
  
  #  Fit the model
  fit_two_factor_int <- lavaan::cfa(two_factor.model_interference, data = adult)
  # summary <- summary(fit, fit.measures=FALSE)$PE
  
  # Parameter Estimates- Not used here with additional factor in above equations or to output df
  # pe <- parameterEstimates(fit_two_factor_int, ci = TRUE, level = 0.95)[1:4,]
  
  # Factor Loadings
    sl <- standardizedSolution(fit_two_factor_int)
    sl <- sl$est.std[sl$op == "=~"]
 # names(sl) <- names(raw_df)

  # # Output DF of Parameter Estimates
  # pe %>%
  #   cbind.data.frame(., sl)[,-4] %>%
  #   round_df(.,2) %>%
  # kable(caption = "Parameter Estimates", row.names = F, col.names = c("Factor", "Contains", "Variable", "Estimate", "SE", "Z", "P-Value", "CI Low", "CI Upper", "Factor Loadings")) %>%
  # kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
  # 
  # # For interpretive report below
  # pe_no_na <- pe[complete.cases(pe),]
  # fit_measures <- fitMeasures(fit)
  
  # Print SEM Paths
  semPaths(fit_two_factor_int,"std", edge.label.cex = 1, edge.label.color = "black", exoVar = FALSE,
           exoCov = FALSE, nodeLabels = c(score_names_int, "Processing Speed", "Stroop"), sizeMan = 15, sizeInt = 10, sizeLat = 17, label.cex = 1.3)
  title(main = figure_counter_func("SEM Paths"), line = 2)

# Compute residual variance of each item
re <- 1 - sl^2

# Compute composite reliability
composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
as.data.frame(reliability(fit_two_factor_int, return.total = T)[1:2,]) %>%
  round_df(., 3) %>%
  kable(col.names = c("Processing Speed", "Stroop", "Factors Combined"), caption = table_counter_func("Stroop Two-Factor SEM w/ Interference")) %>%
  add_header_above(c(" " = 1, "Factor" = 2, " " = 1)) %>%
  column_spec(4, border_left = "1px solid gray") %>%
  kable_options()

*Table* 23: Stroop Two-Factor SEM w/ Interference
	Factor
	Processing Speed	Stroop	Factors Combined
alpha	0.75	0.849	0.698
omega	0.76	0.903	0.885

# <!-- The model was found to be of significantly good fit (χ2(`r round(fit_measures[names(fit_measures) == "df"], 0)`)=`r paste0(round(fit_measures[names(fit_measures) == "chisq"],2), ", p= ", round(fit_measures[names(fit_measures) == "pvalue"], 4))`).    -->
# 
# <!-- The parameter estimates for each variable's fit (rhs column) within the assigned factor (lhs) are given in the table.  `r ifelse(nrow(pe_no_na[pe_no_na$pvalue >= .05,]) > 0, print(paste("The following variables were found to not fit within the assigned factor- ", pe_no_na$lhs[pe_no_na$pvalue >= .05])), print("No variables were found to not fit factors they were assigned.")) ` -->

#   # Two Factor Model with no Interference
#   two_factor.model_no_interference <- '
#                        color_word =~ CRScore + WRScore
#                        cw =~ CWRscore
# 
#                         CRScore ~~ CRScore
#                         WRScore ~~ WRScore
#                         CWRscore ~~ 0*CWRscore
# '
#  
#   #  Fit the model
#   fit_two_factor_no_int <- lavaan::cfa(two_factor.model_no_interference, data = adult)
#   summary <- summary(fit_two_factor_no_int, fit.measures=FALSE)$PE
#   
#   # Parameter Estimates- Not used here with additional factor in above equations or to output df
#   pe <- parameterEstimates(fit_two_factor_no_int, ci = TRUE, level = 0.95)[1:3,]
#   
#   # Factor Loadings
#     sl <- standardizedSolution(fit_two_factor_no_int)
#     sl <- sl$est.std[sl$op == "=~"]
#   names(sl) <- score_names
# 
#   # Output DF of Parameter Estimates
#   pe %>%
#     cbind.data.frame(., sl) %>%
#     round_df(.,2) %>%
#   kable(caption = "Parameter Estimates", row.names = F, col.names = c("Factor", "Contains", "Variable", "Estimate", "SE", "Z", "P-Value", "CI Low", "CI Upper", "Factor Loadings")) %>%
#   kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
# 
#   # For interpretive report below
#   pe_no_na <- pe[complete.cases(pe),]
#   fit_measures <- fitMeasures(fit_two_factor_no_int)
#   
#   # Print SEM Paths
#   semPaths(fit_two_factor_no_int,"std", edge.label.cex = 0.5, exoVar = FALSE,
#            exoCov = FALSE)
# 
# # Compute residual variance of each item
# re <- 1 - sl^2
# 
# # Compute composite reliability
# composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
# 
# # Compute composite reliability
# composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
# as.data.frame(reliability(fit_two_factor_no_int, return.total = T)[1:2,]) %>%
#   round_df(., 4) %>%
#   kable(col.names = c("Color and Word", "Total")) %>%
#    add_header_above(c(" " = 1, "Factor" = 1, " " = 1)) %>%
#   column_spec(3, border_left = "1px solid gray") %>%
#   kable_options()

Reliability

Test-Retest

# Give maximum length of time allowed for test-retest
time_limit <- 50

To assess test-retest reliability a number of participants were re-administered the Stroop up to 50 days between initial test (“Test”) and retest (“Retest”). Information about number of participants, length of time between test and retest, other information, and the impact of these factors on scores is presented below.

# ICC
## Compute with irr
# Note that, the two-way mixed-effects model and the absolute agreement are recommended for both test-retest and intra-rater reliability studies (Koo et al., 206).

# Create df with test-retest for each scale
library("irr")

icc_list <- list(cbind.data.frame(adult_retest$CRScore.x, adult_retest$CRScore.y),cbind.data.frame(adult_retest$WRScore.x, adult_retest$WRScore.y), cbind.data.frame(adult_retest$CWRscore.x, adult_retest$CWRscore.y), cbind.data.frame(adult_retest$Interference.x, adult_retest$Interference.y)) 

icc_output_list <- lapply(icc_list, function(df_list){
  icc(
    df_list, model = "twoway", 
    type = "consistency", unit = "single"
    )
})
names(icc_output_list) <- scale_list_int

# # Using ICC in psych- not used here
# icc_psych_output_list <- lapply(icc_list, function(df_list){
#   ICC(
#     df_list
#     )
# })
# names(icc_psych_output_list) <- scale_list_int

# Using test-retest in psych
adult$time <- rep(1, nrow(adult))
retest$time <- rep(2, nrow(retest))

adult_long <- adult %>%
  subset(case_id %in% retest$case_id) %>%
  dplyr::select(case_id, time, CRScore, WRScore, CWRscore, Interference) #%>%
  # pivot_longer(
  #   cols = ends_with("core"),
  #   names_to = "raw_scores",
  #   values_to = "values"
  # )

retest_long <- retest %>%
  dplyr::select(case_id, time, CRScore, WRScore, CWRscore, Interference)

adult_retest <- bind_rows(adult_long, retest_long)
over_time_descriptives <- describeBy(adult_retest,group="time", mat = T)
over_time_descriptives %>% 
  subset(., vars > 2) %>%
  mutate(time = recode(group1, '1' = "Test", '2' = "Retest"),
         scale = rep(score_names_int, each = 2)) %>%
  mutate(scale = ifelse(duplicated(scale), "", scale),
         n = ifelse(duplicated(n), "", n)) %>% #,
         # time = recode(time, '1' = "Test", '2' = "Retest")) %>%
  dplyr::select(scale, time, n, mean, sd, se, median, min, max, skew, kurtosis) %>%
    mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("Test-Retest Statistics")) %>%
  kable_options()

*Table* 24: Test-Retest Statistics
	scale	time	n	mean	sd	se	median	min	max	skew	kurtosis
CRScore1	Color Raw Score	Test	48	70.729	13.137	1.896	70.5	31	98	-0.435	0.445
CRScore2		Retest		72.083	13.388	1.932	70.5	46	100	0.257	-0.519
WRScore1	Word Raw Score	Test		102.812	16.283	2.350	100.5	60	139	0.058	-0.026
WRScore2		Retest		103.875	15.050	2.172	100.5	80	140	0.595	-0.358
CWRscore1	Color-Word Raw Score	Test		39.271	10.790	1.557	39.0	12	61	-0.155	-0.405
CWRscore2		Retest		41.396	10.582	1.527	41.0	20	63	0.207	-0.539
Interference1	Interference	Test		-2.396	7.371	1.064	-2.0	-20	10	-0.357	-0.485
Interference2		Retest		-0.917	9.055	1.307	0.0	-28	19	-0.366	0.639

# test_retest_ml <- mlr(adult_retest,grp="case_id",Time="time",items=3:5)

Intraclass correlation coefficients (ICC) are presented as measures of test-retest reliability. Two-way mixed-effects model and the absolute agreement to derive intraclass correlation coefficients were used. The following table presents ICC statistics for each scale.

lapply(icc_output_list,function(out_list) {data.frame(
    val = paste0(round(out_list$value,2)," (", round(out_list$lbound,2), " - ", round(out_list$ubound,2), ")"),
    f_val = round(out_list$Fvalue, 2),
    p_val = round(out_list$p.value, 2)
  )
}) %>%
  bind_rows() %>%
  mutate(scales = score_names_int) %>%
  dplyr::select(scales, everything()) %>%
  kable(col.names = c("Scale", "ICC Value (95% CI)", "F", "P"), caption = table_counter_func("ICC Table Test-Retest")) %>%
  kable_options()

*Table* 25: ICC Table Test-Retest
Scale	ICC Value (95% CI)	F
Color Raw Score	0.82 (0.7 - 0.9)	10.12
Word Raw Score	0.9 (0.83 - 0.94)	19.50
Color-Word Raw Score	0.68 (0.49 - 0.81)	5.25
Interference	0.5 (0.26 - 0.69)	3.02

Concordance Correlation Coefficients were also produced as a similar measure of test-retest reliability.

Concordance Correlation

library(rjags)
library(agRee)
library(BlandAltmanLeh)

cc_matrix_list <- lapply(raw_score_column_names_int, function(scale){
    scale <- enquo(scale)
    adult_retest %>%
    dplyr::select(case_id, time, !!scale) %>%
    pivot_wider(names_from = time, values_from = !!scale) %>%
    rename(test = `1`, retest = `2`) %>%
    dplyr::select(test, retest) %>%
     filter(abs(test - retest) < 50) %>%
    data.matrix()
})
  # agree.ccc()
  cc_output <- lapply(cc_matrix_list, function(mat) agree.ccc(mat))
  do.call(bind_rows, lapply(cc_output, unlist)) %>% 
    round(., 3) %>%
    cbind.data.frame(score_names_int, .) %>%
    kable(col.names = c("Scale", "Concordance Correlation", "Lower Bound", "Upper Bound"), caption = table_counter_func("Concordance Correlation")) %>%
    add_header_above(c(" " = 2, "95% CI" = 2)) %>%
    kable_options()

*Table* 26: Concordance Correlation
		95% CI
Scale	Concordance Correlation	Lower Bound	Upper Bound
Color Raw Score	0.810	0.635	0.906
Word Raw Score	0.890	0.696	0.963
Color-Word Raw Score	0.668	0.464	0.805
Interference	0.492	0.235	0.685

Bland-Altman and Means and Differences plots were produced to assess test-retest values. Bland-Altman plots show individual test-retest differences and a confidence interval band around the mean. Plots with a number of values falling outside this interval can suggest problems with test-retest reliability.

Means and Differences plots are a plot with test and retest values as coordinates. A fitted line is compared to a unity line (a line representing perfect test to retest correspondence) to compare agreement of test-retest values.

  # agree.plot(cc_matrix_list[[1]])
cat("<div class = 'row h-100'>")

  cat("<div class = 'col-md-6'>")

    cat("<h3>Bland-Altman Plot</h3>")

Bland-Altman Plot

  cat("</div>")

  cat("<div class = 'col-md-6'>")

    cat("<h3>Means and Differences Scatterplot</h3>")

Means and Differences Scatterplot

  cat("</div>")

cat("</div>")

  lapply(1:length(cc_matrix_list), function(i) {
      cat("<div class = 'row h-100'>")
        cat(paste("<h4>", score_names_int[i], "</h4>"))
        cat("<div class = 'col-md-6'>")
          print(bland.altman.plot(cc_matrix_list[[i]][,1], cc_matrix_list[[i]][,2], graph.sys="ggplot2"))
        cat("</div>")
        cat("<div class = 'col-md-6'>")
        print(
          cc_matrix_list[[i]] %>%
          as.data.frame() %>%
          ggplot(., aes(x = test, y = retest)) +
          geom_point() + 
          geom_smooth(aes(linetype = "Fitted Line"), method = "lm",se = F) +
          geom_abline(aes(linetype = "Unity Line" , intercept = 0, slope = 1))
        )
        cat("</div>")
      cat("</div>")
  })

Color Raw Score

Word Raw Score

Color-Word Raw Score

Interference

[[1]] NULL

[[2]] NULL

[[3]] NULL

[[4]] NULL

Each of the scales had significant ICC statistics, with moderate to strong correlation. However, it was also desired to ascertain the effect the length of time had on test and retest scores, which can impact the correlation, as well as have practical ramifications about practice effects and length of time between retesting.

To examine the effect of time on test-retest results:

Plots were constructed that show the change in score from Test to Retest across the time between tests (“Plot of Change in Score Over Time Between Tests”),
Mixed-effects models were constructed with Stroop scores as outcome variables and test type (test or retest) as predictor variable, with length of time between tests and within participant effects included in models and controlled for. Main effects of these models were examined, that is, whether there was a difference between test and retest scores controlling for those variables (“Main Effects of Differences Between Test and Retest”), and
The interaction between participant and length of time between tests were examined for significance (“Random Effects Likelihood Ratio Test”).

# Interference retest wide
  cat("<div class = 'row h-100'>")

     cat("<div class = 'col-md-6'>")

      cat(figure_counter_func("Plot of Score Change Over Time Between Tests"))

Figure 19: Plot of Score Change Over Time Between Tests

     cat("</div>")

     cat("<div class = 'col-md-6'>")

      cat(table_counter_func("Score Change Over Time Test Results"))

Table 27: Score Change Over Time Test Results

     cat("</div>")

    cat("</div>")

mapply(function(scale, scale_name){
  re <- retest %>%
   dplyr::mutate(exam_date_retest= as.Date(paste(EVyear, EVmonth, EVday, sep = "/")),
          retest_score = !!scale
   ) %>%
    dplyr::select(case_id, exam_date_retest, retest_score)
  ad <- adult %>%
   dplyr::mutate(exam_date_test= as.Date(paste(EVyear, EVmonth, EVday, sep = "/")),
          test_score = !!scale
   ) %>%
    dplyr::select(case_id, exam_date_test, test_score)
  
  re_ad_wide <- inner_join(re, ad, by = "case_id") %>%
    dplyr::mutate(retest_time = (age_calc(exam_date_test, enddate = exam_date_retest, units = "days")),
           test_change = retest_score - test_score)
   cat(paste0("<h4>",scale_name, "</h4>"))
   cat("<div class = 'row h-100'>")
     cat("<div class = 'col-md-6'>")
    
     print(re_ad_wide %>%
      dplyr::filter(retest_time <= time_limit) %>%
        ggplot(., aes(retest_time, test_change)) +
      geom_point() +
      geom_smooth() + 
        xlab("Time Between Test and Retest") +
        ylab("Score Change Between Test and Retest") +
        ggtitle("Plot of Change in Score \n Over Time Between Tests")-> p)
    cat("</div>")
   cat("<div class = 'col-md-6'>")
   # Interference retest long
re <- retest %>%
 dplyr::mutate(exam_date= as.Date(paste(EVyear, EVmonth, EVday, sep = "/")),
        test_status = rep("retest", nrow(.)),
                          result = !!scale) %>%
  dplyr::select(case_id, exam_date, result, test_status)
ad <- adult %>%
 dplyr::mutate(exam_date = as.Date(paste(EVyear, EVmonth, EVday, sep = "/")),
        test_status = rep("test", nrow(.)),
                          result = !!scale) %>%
  dplyr::select(case_id, exam_date, result, test_status)

re_ad_long <- bind_rows(re,ad)

    lmer(result ~ test_status +  (1|exam_date) + (1|case_id) + (1|exam_date:case_id), data = re_ad_long, REML = F) -> mod_full
    lmer(result ~ 1 +  (1|exam_date) + (1|case_id) + (1|exam_date:case_id), data = re_ad_long, REML = F) -> mod_reduced
    anova_result <- anova(mod_full, mod_reduced)
    # reduced model is the right choice for all
    rand_eff <- lmerTest::ranova(mod_reduced)
    dat_df <- cbind.data.frame(chi = round(anova_result$Chisq[2],2), df = anova_result$Chisq[-1], p = round(anova_result$`Pr(>Chisq)`[2], 2))
    ft <- table_output_func(dat_df, colnames = c("Chisq", "Df", "Pr(>Chisq)"), caption = "Main Effects of Difference between Test and Retest")
    cat(knit_print(ft))
    
    ### Checks difference in score over time
    
    re_ad_wide <- inner_join(ad, re, by = "case_id", suffix = c("_test", "_retest"))
re_ad_wide <- re_ad_wide %>%
  rowwise() %>%
  mutate(test_difference = result_retest - result_test,
         date_difference = exam_date_retest - exam_date_test)
re_ad_wide %>%
  ggplot(., aes(x = date_difference, y = test_difference)) +
  geom_smooth()

mod1 <- lm(test_difference ~ date_difference, data = re_ad_wide)
mod2 <- lm(test_difference ~ 1, data = re_ad_wide)
lrtest(mod1, mod2)
      # kable(col.names = c("Chisq", "Df", "Pr(>Chisq)"), caption = "Main Effects of Difference between Test and Retest") %>%
      # kable_options())
  #   print(cbind.data.frame(names(attributes(rand_eff)$formulae), round(rand_eff$LRT[-1],3), rand_eff$Df[-1], round(rand_eff$`Pr(>Chisq)`[-1],3))[3,] %>%
  #           flextable()%>% 
  # theme_zebra() %>% 
  # autofit())
            #kable(col.names = c("Random Effect", "Likelihood Ratio Test", "Df", "Pr(>Chisq)"), caption = "Random Effects Likelihood Ratio Test", row.names = F) %>%
            #kable_options())
    dat_df <- cbind.data.frame(names(attributes(rand_eff)$formulae), round(rand_eff$LRT[-1],3), rand_eff$Df[-1], round(rand_eff$`Pr(>Chisq)`[-1],3))[3,] 
    ft <- table_output_func(dat_df, colnames = c("Random Effect", "Likelihood Ratio Test", "Df", "Pr(>Chisq)"))
    cat(knit_print(ft))
    # names1 <- names(dat_df)
    # names2 <- c("Random Effect", "Likelihood Ratio Test", "Df", "Pr(>Chisq)")
    # ft = flextable(dat_df)
    # ft <- set_header_df(x = ft, mapping = data.frame(keys = names1, values = names2, stringsAsFactors = FALSE),
    #                  key = "keys" )
    # print(theme_booktabs(ft)) 
   cat("</div>")
 cat("</div>")
}, scale_score_int, score_names_int)

Color Raw Score

Main Effects of Difference between Test and Retest
Chisq	Df	Pr(>Chisq)
0.22	0.2155431	0.64

Random Effect	Likelihood Ratio Test	Df	Pr(>Chisq)
(1 \| exam_date:case_id)	0.422	1	0.516

Word Raw Score

Main Effects of Difference between Test and Retest
Chisq	Df	Pr(>Chisq)
2.07	2.066951	0.15

Random Effect	Likelihood Ratio Test	Df	Pr(>Chisq)
(1 \| exam_date:case_id)	2.881	1	0.09

Color-Word Raw Score

Main Effects of Difference between Test and Retest
Chisq	Df	Pr(>Chisq)
0.11	0.1091937	0.74

Random Effect	Likelihood Ratio Test	Df	Pr(>Chisq)
(1 \| exam_date:case_id)	0	1	1

Interference

Main Effects of Difference between Test and Retest
Chisq	Df	Pr(>Chisq)
0.03	0.0342477	0.85

Random Effect	Likelihood Ratio Test	Df	Pr(>Chisq)
(1 \| exam_date:case_id)	0	1	1

[[1]] NULL

[[2]] NULL

[[3]] NULL

[[4]] NULL

As can be seen in the above results, there were no significant differences between Test and Retest values for any Stroop scores, when controlling for time, participant factors, and that interaction. There were also not significant differences in score outcomes when testing for the interaction between length of time between test and retest and participant for any scores. The implications of both of these results is that Stroop scores seem to hold constant over time within individuals, and there do not seem to be practice effects, or changes in scores at retest points, at least within about 2 months time (50 days).

Validity

A number of participants were administered multiple instruments to assess the concurrent validity of the Stroop with other measures. The measures and specific scales used were reviewed in the “Measures” section above.

Previous Edition

None of the current Stroop stimulus content or administration procedures were changed from the previous edition. As such, there was no attempt to evaluate the previous and current revised edition.

All Measures

All Measures Correlation Matrix

The following correlation matrix presents the correlation of all measures used in these analyses.

ad_al <- adult_all %>% dplyr::select(CRScore.x, WRScore.x, CWRscore.x, Interference.x, LW , WA, cc_ratio_t, ci_ratio_t, Stroop, total) %>%
  correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower')
dimnames(ad_al)<- rep(list(all_names), 2) 
ad_al %>% kable(caption = table_counter_func("NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")) %>% kable_options()

*Table* 28: NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
	Color	Word	Color-Word	Interference	Letter-Word	Word Attack	CC Ratio	CI Ratio	Nonverbal Stroop
Color
Word	0.57***
Color-Word	0.62***	0.47***
Interference	-0.05	-0.11	0.71***
Letter-Word	0.22*	0.36**	0.33**	0.15
Word Attack	0.17	0.30**	0.22*	0.07	0.57***
CC Ratio	0.17	0.18	0.23*	0.11	-0.02	-0.00
CI Ratio	0.38***	0.31**	0.43***	0.20	0.07	0.11	0.38***
Nonverbal Stroop	-0.26*	-0.16	-0.26*	-0.12	-0.09	-0.12	0.45***	-0.64***
Trails Total	0.42***	0.29**	0.32**	0.02	0.23*	0.15	0.14	0.33**	-0.22*

All Measures Graphical Representation of Correlation using Princiapal Components Analysis- 2d

The following is a graphical representation of correlation using Principal Components Analysis of all measures. Points closer in space represent closer correlations.

# mdspca
adult_all %>% dplyr::select(CRScore.x, WRScore.x, CWRscore.x, Interference.x, LW , WA, cc_ratio_t, ci_ratio_t, Stroop, total)%>% setNames(all_names) %>% mdspca(namesupsubj = all_names)
title(figure_counter_func("PCA All Scales"))

All Measures Three Factor PCA Model

A Prinicpal Components Analysis was conducted to examine how measures related and formed separate factors. A three factor model was chosen after analyses indicated the Stroop tended to form at least two factors, given the presence of other assessments. The results below suggest the presence of three factors that tended to group together with characteristics represented by factors names below. Further analyses of the Stroop with individual measures explores the association of these measures, as follows.

fit_three_factor <- adult_all %>% dplyr::select(CRScore.x, WRScore.x, CWRscore.x, Interference.x, LW , WA, cc_ratio_t, ci_ratio_t, Stroop, total) %>% principal(., nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names, c("Stroop", "Fluency", "Nonverbal Stroop")))) %>% kable(caption = table_counter_func("All Measures Three Factor PCA Model Loadings")) %>% kable_options()

*Table* 29: All Measures Three Factor PCA Model Loadings
	Stroop	Fluency	Nonverbal.Stroop
Color	0.438	0.477	-0.009
Word	0.188	0.687	0.066
Color-Word	0.806	0.196	0.090
Interference	0.689	-0.310	0.089
Letter-Word	-0.105	0.792	0.012
Word Attack	-0.170	0.771	-0.032
CC Ratio	0.383	0.002	0.796
CI Ratio	0.806	-0.048	-0.240
Nonverbal Stroop	-0.477	0.032	0.884
Trails Total	0.338	0.356	-0.067

WJ

Stroop scores were compared with those obtained from Letter-Word (LW) and Word-Attack (WA) andThere were 18 cases that were identified in the WJ dataset that didn’t have a matching case in the Stroop dataset. The number of cases that matched were 304. There were no incomplete cases.

WJ-Stroop Correlation

The following analyses explore the correlation of the WJ subtests with Stroop scales. Review of analyses will be discussed as follows:

WJ-Stroop Correlation Matrix

# Stroop WJ Correlation Matrix
wj_mat <- adult_wj %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, LW , WA) %>%
  correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower')
dimnames(wj_mat)<- list(all_names[1:6], all_names[1:6]) 
wj_mat %>% kable(caption = table_counter_func("NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")) %>% kable_options()

*Table* 30: NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
	Color	Word	Color-Word	Interference	Letter-Word
Color
Word	0.64***
Color-Word	0.60***	0.47***
Interference	0.03	-0.06	0.74***
Letter-Word	0.27***	0.37***	0.30***	0.15*
Word Attack	0.29***	0.37***	0.32***	0.15**	0.63***

WJ-Stroop Graphical Representation of Correlation using Princiapal Component Analysis- 2d

# mdspca
adult_wj %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, LW , WA) %>% setNames(all_names[1:6]) %>% mdspca()
title(figure_counter_func("PCA Stroop WJ"))

WJ-Stroop Graphical Representation of Correlation using Princiapal Component Analysis- 2d

# sphpca
adult_wj %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, LW , WA) %>% setNames(all_names[1:6]) %>% sphpca()
title(figure_counter_func("\n PCA Stroop WJ 3d"), line = 2)

WJ-Stroop Principal Components Analysis (PCA)

WJ-Stroop Scree Plot

adult_wj_short <- adult_wj %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, LW , WA) %>% as.data.frame()
scree.plot(adult_wj_short,simu=20,use="P", title = figure_counter_func("\n Stroop WJ Scree Plot"))
title(figure_counter_func("\n Stroop WJ Scree Plot"), line = 2)

WJ-Stroop Two Factor PCA Model

fit_two_factor <- principal(adult_wj_short, nfactors = 2, rotate="promax")
data.frame(matrix(round(as.numeric(fit_two_factor$loadings), 3), attributes(fit_two_factor$loadings)$dim, dimnames=list(all_names[1:6], c("Processing Speed", "Stroop")))) %>% kable() %>% kable_options()

	Processing.Speed	Stroop
Color	0.760	0.036
Word	0.871	-0.150
Color-Word	0.362	0.785
Interference	-0.225	1.030
Letter-Word	0.701	0.000
Word Attack	0.701	0.018

WJ-Stroop Three Factor PCA Model

fit_three_factor <- principal(adult_wj_short, nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names[1:6], c("Processing Speed", "Stroop", "Reading Fluency")))) %>% kable() %>% kable_options()

	Processing.Speed	Stroop	Reading.Fluency
Color	0.948	0.021	-0.090
Word	0.882	-0.162	0.118
Color-Word	0.450	0.770	-0.010
Interference	-0.243	1.024	0.027
Letter-Word	-0.006	0.006	0.904
Word Attack	0.006	0.024	0.890

As can be seen in the above analyses, the Stroop and NSCST subtests had significant correlations. The scales tended to form two or three groups. There was a group consisting of Stroop, Interference, and Nonverbal Stroop, termed “Stroop” and the Color, Word, CC Ratio, and CI Ratio tasks that formed a group, called “Processing Speed.” These Processing Speed tasks require efficient processing of written language and color. When forced to have three factors, the CI Ratio and Nonverbal Stroop scores formed a separate factor that seemed to suggest nonverbal Stroop processes are different than verbal ones. The scree plot supported a factor structure of two or three factors.

WJ-Stroop Multi-trait Multimethod (MTMM)

# MTMM Stroop with Lavaan- not used here, use psy pack instead

# mtmm_basic <- "
# T1 =~ Interference
# T2 =~ CRScore + WRScore + CWRscore
# M1 =~ 1*Interference + 1*CRScore
# M2 =~ 1*Interference + 1*WRScore
# M3 =~ 1*Interference + 1*CWRscore
# 
# T1~~1*T1
# T2~~1*T2
# 
# T1~~T2
# "
# fit_mtmm_basic <- lavaan(mtmm_basic, data = adult, missing = "ml",
# auto.fix.first = FALSE, auto.var = TRUE)
# summary(fit_mtmm_basic, standardized = TRUE)


# MTMM with Interference, two scales
par(mfrow=c(2,1))
mtmm(adult_wj_short,list(c("CRScore", "WRScore", "CWRscore", "Interference"), c("LW", "WA")), graphItem = T, itemTot = F) %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("WJ-Stroop MTMM Two Factors")) %>%
  kable_options()

*Table* 31: WJ-Stroop MTMM Two Factors
	Item	ScaleI	Scale 1	Scale 2
1	CRScore	1	0.618	0.308
2	WRScore	1	0.487	0.405
3	CWRscore	1	0.803	0.344
4	Interference	1	0.224	0.164
9	LW	2	0.379	0.626
10	WA	2	0.391	0.626

The above Multitrait Multimethod analysis, with Strop and WJ subtests each assigned to two factors respectively, shows that Interference does not fit great with either factor, showing it has properties different from either.

WJ-Stroop Confirmatory Factor Analysis (CFA)

Confirmatory Factor Analysis was conducted with Stroop and WJ scales. As above analyses suggested two and three factor models were appropriate, CFA models were fit with those number of factors. It was found that a three factor model with Interference in its own factor was the best fit.

# CFA Stroop

# CFA Stroop WJ
two_factor_cfa_stroop_wj <- "
stroop_scales =~ 1* CRScore + WRScore + CWRscore + Interference
wj_scales =~ 1*LW + WA
"
# NA's from below model
fit_two_factor_cfa_stroop_wj<- cfa(two_factor_cfa_stroop_wj, data = adult_wj, missing = "ml")
# summary(fit_two_factor_cfa_stroop_wj, standardized = TRUE)


three_factor_stroop_wj <- "
stroop_color_word =~ 1* CRScore + WRScore
stroop_incongruent_interference =~ 1* CWRscore + Interference
wj_scales =~ 1*LW + WA
"
fit_three_factor_stroop_wj<- cfa(three_factor_stroop_wj, data = adult_wj, missing = "ml")
# summary(fit_three_factor_stroop_wj, standardized = TRUE)

# Interference didn't fit well

three_factor_stroop_wj_interference_alone <- "
stroop_color_word_incongruent =~ 1* CWRscore + CRScore + WRScore
stroop__interference =~ 1* Interference
wj_scales =~ 1*LW + WA
"
fit_three_factor_stroop_wj_interference_alone<- cfa(three_factor_stroop_wj_interference_alone, data = adult_wj, missing = "ml")
# summary(fit_three_factor_stroop_wj_interference_alone, standardized = TRUE)

fitMeasures(fit_three_factor_stroop_wj_interference_alone, c("chisq", "df", "pvalue")) %>% data.frame() %>% t() %>% 
  kable(caption = table_counter_func("WJ-Stroop Three Factor CFA Fit"), col.names = c("Chisq", "Df", "P")) %>%
  kable_options()

*Table* 32: WJ-Stroop Three Factor CFA Fit
	Chisq	Df	P
.	217.0154	7	0

parameterEstimates(fit_three_factor_stroop_wj_interference_alone)[1:(fitMeasures(fit_three_factor_stroop_wj_interference_alone, "df")-1),] %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("WJ-Stroop Three Factor CFA Estimates")) %>%
  kable_options()

*Table* 33: WJ-Stroop Three Factor CFA Estimates
lhs	op	rhs	est	se	z	pvalue	ci.lower	ci.upper
stroop_color_word_incongruent	=~	CWRscore	1.000	0.000	NA	NA	1.000	1.000
stroop_color_word_incongruent	=~	CRScore	0.199	0.063	3.164	0.002	0.076	0.323
stroop_color_word_incongruent	=~	WRScore	0.153	0.053	2.884	0.004	0.049	0.257
stroop__interference	=~	Interference	1.000	0.000	NA	NA	1.000	1.000
wj_scales	=~	LW	1.000	0.000	NA	NA	1.000	1.000
wj_scales	=~	WA	0.788	0.282	2.790	0.005	0.234	1.341

Nonverbal Stroop Card Sorting Test (NSCST)

The NSCST was used for convergent validity. There were 86 cases that were identified in the NSCST dataset that didn’t have a matching case in the Stroop dataset. The number of cases that matched were 189. There were no incomplete cases.

NSCST-Stroop Correlation

The following analyses explore the correlation of the NSCST subtests with Stroop scales. Review of analyses will be discussed as follows:

NSCST-Stroop Correlation Matrix

# Stroop NSCST Correlation Matrix
nscst_mat <- adult_nscst %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, cc_ratio_t, ci_ratio_t, nscst_stroop_t) %>%
  correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower')
dimnames(nscst_mat)<- list(all_names[c(1:4, 7:9)], all_names[c(1:4, 7:9)]) 
nscst_mat %>% kable(caption = "NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001") %>% kable_options()

NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
	Color	Word	Color-Word	Interference	CC Ratio	CI Ratio
Color
Word	0.57***
Color-Word	0.64***	0.34***
Interference	0.06	-0.20**	0.71***
CC Ratio	0.15*	0.50***	-0.06	-0.28***
CI Ratio	0.31***	0.38***	0.29***	0.09	0.40***
Nonverbal Stroop	0.10	-0.18*	0.29***	0.35***	-0.64***	0.42***

Graphical Representation of Correlation using Princiapal Component Analysis- 2d

# mdspca
adult_nscst %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, cc_ratio_t, ci_ratio_t, nscst_stroop_t) %>% setNames(all_names[c(1:4, 7:9)]) %>% mdspca()
title(figure_counter_func("Nonverbal Stroop Correlation"))

Graphical Representation of Correlation using Princiapal Component Analysis- 3d

# sphpca
adult_nscst %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, cc_ratio_t, ci_ratio_t, nscst_stroop_t) %>% setNames(all_names[c(1:4, 7:9)]) %>% sphpca()
title(figure_counter_func("\n Nonverbal Stroop \n Correlation 3d"), line = 1)

NSCST-Stroop Principal Components Analysis (PCA)

NSCST-Stroop Scree Plot

# Exploratory
adult_nscst_short <- adult_nscst %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, cc_ratio_t, ci_ratio_t, nscst_stroop_t) %>% as.data.frame()
fit <- principal(adult_nscst_short, rotate="promax")
scree.plot(adult_nscst_short,simu=20,use="P", title = figure_counter_func("Nonverbal Stroop Scree Plot"))

Two Factor PCA Model

fit_two_factor <- principal(adult_nscst_short, nfactors = 2, rotate="promax")
data.frame(matrix(round(as.numeric(fit_two_factor$loadings), 3), attributes(fit_two_factor$loadings)$dim, dimnames=list(all_names[c(1:4, 7:9)], c("Processing Speed", "Stroop")))) %>% kable() %>% kable_options()

	Processing.Speed	Stroop
Color	0.800	0.076
Word	0.802	-0.387
Color-Word	0.694	0.548
Interference	0.165	0.760
CC Ratio	0.499	-0.757
CI Ratio	0.661	0.074
Nonverbal Stroop	0.048	0.805

Three Factor PCA Model

fit_three_factor <- principal(adult_nscst_short, nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names[c(1:4, 7:9)], c("Processing Speed", "Stroop", "Nonverbal Stroop")))) %>% kable() %>% kable_options()

	Processing.Speed	Stroop	Nonverbal.Stroop
Color	0.527	0.590	0.076
Word	0.827	0.194	0.095
Color-Word	0.151	0.976	-0.019
Interference	-0.365	0.820	-0.092
CC Ratio	0.860	-0.194	-0.088
CI Ratio	0.368	-0.129	0.883
Nonverbal Stroop	-0.547	0.074	0.823

NSCST-Stroop Multi-trait Multimethod (MTMM)

# MTMM Stroop with Lavaan- not used here, use psy pack instead

# mtmm_basic <- "
# T1 =~ Interference
# T2 =~ CRScore + WRScore + CWRscore
# M1 =~ 1*Interference + 1*CRScore
# M2 =~ 1*Interference + 1*WRScore
# M3 =~ 1*Interference + 1*CWRscore
# 
# T1~~1*T1
# T2~~1*T2
# 
# T1~~T2
# "
# fit_mtmm_basic <- lavaan(mtmm_basic, data = adult, missing = "ml",
# auto.fix.first = FALSE, auto.var = TRUE)
# summary(fit_mtmm_basic, standardized = TRUE)


# MTMM with Interference, two scales
par(mfrow=c(1,2))
mtmm(adult_nscst_short,list(c("CRScore", "WRScore", "CWRscore", "cc_ratio_t", "ci_ratio_t"), c("Interference", "nscst_stroop_t")), graphItem = T, itemTot = T) %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("Stroop-NSCST MTMM Two Scales")) %>%
  kable_options()

*Table* 34: Stroop-NSCST MTMM Two Scales
	Item	ScaleI	Scale 1	Scale 2
1	CRScore	1	0.616	0.098
2	WRScore	1	0.673	-0.235
3	CWRscore	1	0.427	0.598
4	cc_ratio_t	1	0.336	-0.565
5	ci_ratio_t	1	0.470	0.322
11	Interference	2	0.057	0.349
12	nscst_stroop_t	2	-0.076	0.349

# # MTMM with Interference, three scales
# par(mfrow=c(1,3))
# mtmm(adult_nscst_short,list(c("WRScore", "cc_ratio_t"), c("CRScore", "CWRscore", "Interference"), c("ci_ratio_t", "nscst_stroop_t")), graphItem = T, itemTot = T) %>%
#   mutate_if(is.numeric, round, 3) %>%
#   kable(caption = table_counter_func("WJ-Stroop MTMM Three Scales")) %>%
#   kable_options()

The processing speed and Stroop scales were grouped into separate factors. Color-Word seemed to be have correspondence with both factors.

Stroop-NSCST Confirmatory Factor Analysis (CFA)

Using above information, CFA was conducted. Three scales were fit, Verbal Speed, Nonverbal Speed, and Interference. Interference represented Stroop and Nonverbal Stroop scores. The other scales from Stroop made up Verbal Speed and Nonverbal Stroop in Nonverbal Speed.

# CFA Stroop

# CFA Stroop NSCST
two_factor_cfa_stroop_nscst <- "
speed_scales =~ 1* CRScore + WRScore + CWRscore + cc_ratio_t + ci_ratio_t
interference_scales =~ 1*Interference + nscst_stroop_t
"
fit_two_factor_cfa_stroop_nscst<- cfa(two_factor_cfa_stroop_nscst, data = adult_nscst, missing = "ml")
# summary(fit_two_factor_cfa_stroop_nscst, standardized = TRUE)

# CFA Stroop NSCST Speed Broken into verbal and non
three_factor_cfa_stroop_nscst <- "
verbal_speed_scales =~ 1* CRScore + WRScore + CWRscore
nonverbal_speed_scales =~ 1*cc_ratio_t + ci_ratio_t
interference_scales =~ 1*Interference + nscst_stroop_t
"
fit_three_factor_cfa_stroop_nscst<- cfa(three_factor_cfa_stroop_nscst, data = adult_nscst, missing = "ml")

fitMeasures(fit_three_factor_cfa_stroop_nscst, c("chisq", "df", "pvalue")) %>% data.frame() %>% t() %>%
  kable(caption = table_counter_func("NSCST-Stroop Three Factor CFA Fit"), col.names = c("Chisq", "Df", "P")) %>%
  kable_options()

*Table* 35: NSCST-Stroop Three Factor CFA Fit
	Chisq	Df	P
.	402.975	11	0

parameterEstimates(fit_three_factor_cfa_stroop_nscst)[1:(fitMeasures(fit_three_factor_cfa_stroop_nscst, "df")-1),] %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("NSCST-Stroop Three Factor CFA Estimates")) %>%
  kable_options()

*Table* 36: NSCST-Stroop Three Factor CFA Estimates
lhs	op	rhs	est	se	z	pvalue	ci.lower	ci.upper
verbal_speed_scales	=~	CRScore	1.000	0.000	NA	NA	1.000	1.000
verbal_speed_scales	=~	WRScore	0.682	0.113	6.058	0.000	0.461	0.903
verbal_speed_scales	=~	CWRscore	0.489	0.080	6.145	0.000	0.333	0.645
nonverbal_speed_scales	=~	cc_ratio_t	1.000	0.000	NA	NA	1.000	1.000
nonverbal_speed_scales	=~	ci_ratio_t	-0.394	0.095	-4.165	0.000	-0.580	-0.209
interference_scales	=~	Interference	1.000	0.000	NA	NA	1.000	1.000
interference_scales	=~	nscst_stroop_t	2.962	0.577	5.132	0.000	1.831	4.093
CRScore	~~	CRScore	-6.517	26.695	-0.244	0.807	-58.838	45.804
WRScore	~~	WRScore	207.807	24.088	8.627	0.000	160.595	255.020
CWRscore	~~	CWRscore	75.769	10.310	7.349	0.000	55.563	95.976

CTMT

The CTMT was used to assess convergent validity. There were 17 cases that were identified in the CTMT dataset that didn’t have a matching case in the Stroop dataset. The number of cases that matched were 152. There were no incomplete cases.

CTMT-Stroop Correlation

CTMT-Stroop Correlation Matrix

# Stroop CTMT Correlation Matrix
ctmt_mat <- adult_ctmt %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, total) %>% correlation_matrix(digits = 2, replace_diagonal = T, use = 'lower')
dimnames(ctmt_mat)<- list(all_names[c(1:4, 10)], all_names[c(1:4, 10)]) 
ctmt_mat %>% kable(caption = table_counter_func("NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")) %>% kable_options()

*Table* 37: NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001
	Color	Word	Color-Word	Interference
Color
Word	0.67***
Color-Word	0.70***	0.60***
Interference	0.13	0.07	0.77***
Trails Total	0.43***	0.34***	0.37***	0.13

Graphical Representation of Correlation using Princiapal Component Analysis- 2d

# mdspca
adult_ctmt %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, total) %>% setNames(all_names[c(1:4, 10)]) %>% mdspca()
title(figure_counter_func("PCA Stroop CTMT"))

Graphical Representation of Correlation using Princiapal Component Analysis- 3d

# sphpca
adult_ctmt %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, total) %>% setNames(all_names[c(1:4, 10)]) %>% sphpca()
title(figure_counter_func("\n PCA Stroop \n CTMT 3d"), line = 1)

CTMT-Stroop Principal Components Analysis (PCA)

CTMT-Stroop Scree Plot

# Exploratory
adult_ctmt_short <- adult_ctmt %>% dplyr::select(CRScore, WRScore, CWRscore, Interference, total) %>% as.data.frame()
fit <- principal(adult_ctmt_short, rotate="promax")
scree.plot(adult_ctmt_short,simu=20,use="P", title = figure_counter_func("\n Stroop CTMT Scree Plot"))

Two Factor PCA Model

fit_two_factor <- principal(adult_ctmt_short, nfactors = 2, rotate="promax")
data.frame(matrix(round(as.numeric(fit_two_factor$loadings), 3), attributes(fit_two_factor$loadings)$dim, dimnames=list(all_names[c(1:4, 10)], c("Processing Speed", "Stroop")))) %>% kable() %>% kable_options()

	Processing.Speed	Stroop
Color	0.901	-0.005
Word	0.891	-0.095
Color-Word	0.507	0.696
Interference	-0.170	1.040
Trails Total	0.673	-0.064

Three Factor PCA Model

fit_three_factor <- principal(adult_ctmt_short, nfactors = 3, rotate="promax")
data.frame(matrix(round(as.numeric(fit_three_factor$loadings), 3), attributes(fit_three_factor$loadings)$dim, dimnames=list(all_names[c(1:4, 10)], c("Processing Speed", "Stroop", "Trails")))) %>% kable() %>% kable_options()

	Processing.Speed	Stroop	Trails
Color	0.884	-0.030	0.088
Word	0.996	-0.133	-0.079
Color-Word	0.505	0.688	0.009
Interference	-0.201	1.058	-0.013
Trails Total	-0.010	-0.012	1.005

CTMT-Stroop Multi-trait Multimethod (MTMM)

# MTMM Stroop with Lavaan- not used here, use psy pack instead

# mtmm_basic <- "
# T1 =~ Interference
# T2 =~ CRScore + WRScore + CWRscore
# M1 =~ 1*Interference + 1*CRScore
# M2 =~ 1*Interference + 1*WRScore
# M3 =~ 1*Interference + 1*CWRscore
# 
# T1~~1*T1
# T2~~1*T2
# 
# T1~~T2
# "
# fit_mtmm_basic <- lavaan(mtmm_basic, data = adult, missing = "ml",
# auto.fix.first = FALSE, auto.var = TRUE)
# summary(fit_mtmm_basic, standardized = TRUE)


# MTMM with Interference, two scales
par(mfrow=c(1,2))
mtmm(adult_ctmt_short,list(c("CRScore", "WRScore", "total"), c("Interference", "CWRscore")), graphItem = T, itemTot = T)%>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("Stroop-CTMT MTMM")) %>%
  kable_options()

*Table* 38: Stroop-CTMT MTMM
	Item	ScaleI	Scale 1	Scale 2
1	CRScore	1	0.579	0.493
2	WRScore	1	0.475	0.409
3	total	1	0.420	0.288
7	Interference	2	0.143	0.767
8	CWRscore	2	0.588	0.767

Stroop-CTMT Confirmatory Factor Analysis (CFA)

# CFA Stroop

# CFA Stroop CTMT
two_factor_cfa_stroop_ctmt <- "
stroop_scales =~ 1* CRScore + WRScore + CWRscore + total
ctmt_scales =~ 1*Interference
"
fit_two_factor_cfa_stroop_ctmt<- cfa(two_factor_cfa_stroop_ctmt, data = adult_ctmt, missing = "ml")
# summary(fit_two_factor_cfa_stroop_ctmt, standardized = TRUE)

# CFA CTMT speed and interference
two_factor_cfa_speed_interference <- "
speed =~ 1* CRScore + WRScore + total
interference =~ 1*CWRscore + Interference
"
fit_two_factor_cfa_speed_interference<- cfa(two_factor_cfa_speed_interference, data = adult_ctmt, missing = "ml")
# summary(fit_two_factor_cfa_speed_interference, standardized = TRUE)

fitMeasures(fit_two_factor_cfa_stroop_ctmt, c("chisq", "df", "pvalue")) %>% data.frame() %>% t() %>%
  kable(caption = table_counter_func("CTMT-Stroop Two Factor CFA Fit"), col.names = c("Chisq", "Df", "P")) %>%
  kable_options()

*Table* 39: CTMT-Stroop Two Factor CFA Fit
	Chisq	Df	P
.	109.6116	5	0

parameterEstimates(fit_two_factor_cfa_stroop_ctmt)[1:(fitMeasures(fit_two_factor_cfa_stroop_ctmt, "df")-1),] %>%
  mutate_if(is.numeric, round, 3) %>%
  kable(caption = table_counter_func("CTMT-Stroop Two Factor CFA Estimates")) %>%
  kable_options()

*Table* 40: CTMT-Stroop Two Factor CFA Estimates
lhs	op	rhs	est	se	z	pvalue	ci.lower	ci.upper
stroop_scales	=~	CRScore	1.000	0.000	NA	NA	1.000	1.000
stroop_scales	=~	WRScore	0.843	0.125	6.740	0.000	0.598	1.088
stroop_scales	=~	CWRscore	2.532	0.499	5.075	0.000	1.554	3.510
stroop_scales	=~	total	0.047	0.076	0.623	0.533	-0.101	0.196

Exploratory analysis above suggested that Trails Total may be associated with multiple Stroop scores. However, in CFA model construction Trails Total did not fit well. As such the optimum model was a two factor model with all Stroop scores as a factor and Trails Total as a separate factor.

Special Cases

Reading Ability and Style

# Select Relevant Variables, create composite reading score, create high and low reading group as those above vs. below 2 SD from mean
reading_dat <- adult_wj %>%
 dplyr::select(decimal_age, SelEdGrp, CRScore, WRScore, CWRscore, Interference, LW , WA) %>% 
  mutate(reading = LW + WA,
         level = ifelse(reading < mean(reading) - 2*sd(reading), "Low", "Normal"),
         color_word_ratio = (CRScore/WRScore),
         color_word_level = ifelse(color_word_ratio > (mean(color_word_ratio) + 2*SD(color_word_ratio)), "Color Faster", "Word Faster"))
# Calculate those with Color word ratio > 1
reading_dat %>%
  group_by(color_word_ratio >1) %>%
  dplyr::summarize(n = n()) %>%
  mutate(freq = n/sum(n)) %>%
  dplyr::filter(`color_word_ratio > 1` == T) %>%
  dplyr::select(freq) %>%
  mutate(freq = freq*100) %>%
  round(2) %>%
  paste0(., "%") -> color_above_1

As discussed in the Introduction chapter, the Stroop Effect has been observed in individuals with intact reading skills, hypothesized to be due to automatic word reading skills being faster than color reading skills for these individuals. Research has been mixed about the exact relationship between reading skills and Stroop performance, with some findings that individuals with lower reading abilities had less Stroop Interference than individuals with average or above abilities and other studies finding greater amounts of Interference with individuals with lower reading abilities.

We sought to investigate the relationship between reading and Stroop performance to understand how reading ability affects performance. An additional goal is to produce information the Stroop examiner can use to make actionable decisions about the appropriateness of Stroop administration with individuals and to aid in interpretation. Regarding this goal, analyses that may be simpler than other analyses, but provide succinct and usable information, without necessitating use of a computer in analysis, were considered to help provide actionable information. For full transparency and for maximum information, the more ideal analyses and simpler analyses are presented in conjuction.

The subset participants who were administered both the Stroop Test and Woodcock-Johnson (WJ) Test, who were used in the above analyses of Stroop and WJ concurrent validity was used (N = 304). Stroop Color, Word, Color-Word and Interference scores, along with WJ Letter-Word (LW) and Word-Attack (WA) scores, estimated to be good representations of reading skills, were examined. Raw scores from all these tasks were used, with factors Age and Education controlled for. These variables are used to determine and derive standardized scores on these tests, so controlling for these variables essentially standardizes the scores within this sample.

The WJ scores were summed together to create a “Reading” score, as an overall measure of reading skills (see Measures section above for description of these scales and how they comprise reading abilities). Reading scores were standardized (mean = 0 and SD = 1) in some analyses below to aid in generalizability of interpretation to other reading assessments. In addition, a ratio of an individual’s Color Raw Score to Word Raw Score was calculated (“Color to Word Ratio”), as a comparison of color naming proficiency to word naming proficiency, with a score of “1” indicating equivalency, and scores higher indicating better color naming than word naming. The Color to Word Ratio scores were used to identify and control for Stroop performance being a reflection of being able to identify colors better than numbers, as may occur with individuals with reading deficits.

Furthermore, individuals were assigned a reading level (“Reading Level”) of “Normal” or “Low” if they were above or below 2 SD’s below the mean, respectively, and a Color to Word Ratio level (“Color to Word Level”) of “Word Faster” or “Color Faster” if they were below or above 2 SD’s above the mean, respectively (not all individuals with “Color Faster” Color to Word levels had faster Color Raw Scores than Word Raw Scores [ratios above 1]. There was only 1 (0.33%) individual meeting this criteria, which limited useful information, thus leading to the use of > 2 SD’s as being reflective of discrepant color to word naming ability). These classifications were constructed to conveniently present summary information. Analyses were conducted with Reading and Color to Word Ratio as continuous variables.

Stroop and WJ Score Correlation

The correlation between Stroop scores and WJ scores are presented again here (previously presented with raw scores alone in Concurrent Validity section) with the additional calculated scores described above.

reading_dat %>%
dplyr::select(CRScore, WRScore, CWRscore, Interference, color_word_ratio, reading) %>%
  rename(Color = CRScore, Word = WRScore, Color_Word = CWRscore) %>%
  chart.Correlation()
  title(sub="NS = P > 0.05, * = P <= .05, ** = P <= .01, *** = P < 0.001")

Related to the findings discussed above in Concurrent Validity, there are informative observations with implications for Stroop performance understanding that can be observed in the Correlation chart above. Some of these are:

Interference is only weakly correlated with other scores, with the exception of Color-Word, suggesting there is something unique about Interference, that it involves skills not strongly associated with the skills involved in these other tasks, which include reading ability, raw processing speed, and perception.
Interference is strongly correlated with Color-Word. Interference is calculated from Color-Word.
Color-Word is moderately related to Color and Word.
Interference is not correlated with Color or Word individually. It may seem logical (transitive property) that Interference should be related to Color and Word based on above, but that is not occurring.
Reading and Color to Word Ratio have moderate correlations with Stroop scores.

The plots above are expanded to see the relationship between Reading and Color to Word Ratio with other variables, using Loess regression methods:

Reading Scatterplot

reading_dat %>%
  pivot_longer(., c(CRScore, WRScore, CWRscore, Interference, color_word_ratio), names_to = "raw_scores") %>%
  mutate(raw_scores = dplyr::recode(raw_scores, 'CRScore' = 'Color', 'WRScore' = 'Word', 'CWRscore' = 'Color-Word', 'color_word_ratio' = 'Color to Word Ratio')) %>%
  ggplot(data = ., aes(x = reading, y = value)) +
  geom_point() +
  geom_smooth() +
  facet_wrap(~raw_scores,scales = "free_y") +
  ggtitle("Reading Across Scores") + 
  xlab("Reading") +
  ylab("Score")

Visual inspection of the plots reveals approximately linear positive relationships of Reading with scores. The exception is Color to Word Ratio, which seems to decrease as Reading scores increase. As Reading seems to be both positively correlated with Color and Word, it seems that Word performance must increase quicker than Color increases at higher levels of Reading.

Color to Word Ratio Scatterplot

reading_dat %>%
  pivot_longer(., c(CRScore, WRScore, CWRscore, Interference, reading), names_to = "raw_scores") %>%
  mutate(raw_scores = dplyr::recode(raw_scores, 'CRScore' = 'Color', 'WRScore' = 'Word', 'CWRscore' = 'Color-Word', 'reading' = 'Reading')) %>%
  ggplot(data = ., aes(x = color_word_ratio, y = value)) +
  geom_point() +
  geom_smooth() +
  facet_wrap(~raw_scores,scales = "free_y") +
  ggtitle("Color to Word Ratio Across Scores") + 
  xlab("Color to Word Ratio") +
  ylab("Score")

Fitted lines in the Color to Word Ratio plots seem to have a parabolic shape, suggesting a quadratic relationship of Color to Word Ratio with other scores. The fitted line of its relationship with Interference seems generally flat.

# Plot Reading by Interference
p <- reading_dat %>%
  # dplyr::filter(level == "normal") %>%
 ggplot(data = ., aes(x = color_word_ratio, y = CWRscore, color = "Overall")) +
  geom_smooth(method = lm, formula = y ~ poly(x, 2), se = FALSE) +
  ggtitle("Interference Scores Across Reading Scores") +
  xlab("Reading") + 
  scale_colour_discrete(name = "Color to Word Ratio Level")
#p + geom_smooth(data = reading_dat, aes(x = reading, y = Interference, color = color_word_level), method = "lm", se = F)

Score Description by Reading and Color to Word Ratio Level

  reading_summary <- function(...){
  reading_dat %>%
    group_by(!!!quos(...)) %>%
    summarize_if(is.numeric, list(mean, sd)) %>%
    mutate_if(is.numeric, round, 2) %>%
    as.data.frame()
   }
   
  reading_sum <- reading_summary(level, color_word_level) %>%
    mutate(level = ifelse(duplicated(level), "", level))

    cols <- unique(gsub("_[^_]+$", "", colnames(reading_sum[,-(1:2)])))
    cbind.data.frame(reading_sum[,1:2], sapply(cols, function(cols_present) cbind(paste0(do.call(paste, c(reading_sum[grepl(cols_present, colnames(reading_sum))], sep="(")), ")")))) %>%
      kable(col.names = c("Reading Level", "Color to Word Level", "Age", score_names_int, "Letter-Word", "Word-Attack", "Reading", "Color to Word Ratio"), caption = table_counter_func("Mean(SD)")) %>%
      kable_options() %>%
      column_spec(4:7, background = "yellow")

*Table* 41: Mean(SD)
Reading Level	Color to Word Level	Age	Color Raw Score	Word Raw Score	Color-Word Raw Score	Interference	Letter-Word	Word-Attack	Reading	Color to Word Ratio
Low	Color Faster	28.79(10.31)	70(18.38)	68(25.46)	38.5(21.92)	4.5(12.02)	55(2.83)	15.5(3.54)	70.5(6.36)	1.05(0.12)
	Word Faster	34.72(11.85)	59.27(8.79)	86.18(10.72)	27.55(8.26)	-7.27(6.66)	57(3.61)	14.73(5.39)	71.73(6.05)	0.7(0.13)
Normal	Color Faster	25.87(12.19)	77.86(21.86)	82(23.67)	48.71(12.32)	9.29(9.95)	69.14(4.3)	26.29(4.75)	95.43(7.89)	0.95(0.03)
	Word Faster	38.3(15.86)	73.93(12.71)	105.71(14.63)	42.5(10.6)	-0.55(9.19)	69.5(4.46)	26.91(2.77)	96.42(6.32)	0.7(0.09)

Individuals with Low Reading levels generally have lower scores across all Stroop and WJ scores, including Interference scores. When those groups are broken into groups with Color Faster or Word Faster Color to Word Ratio levels, those in the Color Faster groups in either High or Low Reading levels had higher Interference scores than those in the Word Faster group in either High or Low Reading levels. Furthermore, for the score on the task where the Stroop Effect is evoked, the Color-Word Raw Score (which in comparison to the Color and Word Scores determines the Interference score) is higher overall for those with higher reading abilities, but is also higher within reading groups for those with Color Faster Color to Word Ratio levels. So, taken together the initial impressions from these descriptive statistics is that:

Interference Score is driven by Color to Word Ratio
Color-Word Score is driven by Reading ability
Word Raw Score is faster than Color Raw Score for Word Faster group than Color Raw Score is faster than Word Raw Score for Color Faster group.
As Interference is a comparison of Color-Word Incongruent task performance compared to Color and Word Congruent performance Word Faster individuals may be establishing higher scores in those tasks compared to Interference, thus leading to lower comparative Interference scores

To more formally assess how Interference scores vary by reading ability and color and word performance, mixed-effects models were constructed with Reading scores and Color to Word Ratio scores as predictors of Stroop scores. Age and Education Group were random factors to be controlled for, as were each Reading scores and Color to Word Ratio scores in models when the other variable was entered as a main effect. The “Significance” column presents the p-value of the model compared to the next less restrictive model.

  lmfun <- function(data, formula_choice, y, x, z, i, random, polynom) {
    y <- sym(y[i])
    x <- enquo(x)
    z <- enquo(z)
  
    model_formula <- formula(paste0(quo_name(y), "~ poly(",quo_name(x), ",", polynom, ") +", quo_name(z), random))
    null_model <- formula(paste0(quo_name(y), "~ 1 +", quo_name(z), random))
    
    if(formula_choice == "lmer"){
      lmer(model_formula, data = data)
    } else if (formula_choice == "lm") {
      lm(model_formula, data = data)
    } else if (formula_choice == "lm null") {
      lm(null_model, data = data)
    } else if (formula_choice == "lmer null") {
      lmer(null_model, data = data)
    }
  }
  
lm_applier <- function(i, formula_choice, random, polynomial){lmfun(reading_dat, formula_choice, raw_score_column_names_int, "color_word_ratio", reading, i, random, polynomial)}

lm_null_applier <- function(i, formula_choice, random, polynomial){lmfun(reading_dat, formula_choice, raw_score_column_names_int, "color_word_ratio", 0, i, random, polynomial)}

# Apply for lmer and poly and null mods
lmer_mod_quadratic <- lapply(1:4, lm_applier, formula_choice = "lmer", random = "+ (1|decimal_age) + (1|SelEdGrp)", polynomial = 2)

lmer_mod_linear <- lapply(1:4, lm_applier, formula_choice = "lmer", random = "+ (1|decimal_age) + (1|SelEdGrp)", polynomial = 1)

lmer_mod_reading_only <- lapply(1:4, lm_applier, formula_choice = "lmer null", random = "+ (1|decimal_age) + (1|SelEdGrp)", polynomial = 0)

lmer_mod_null <- lapply(1:4, lm_null_applier, formula_choice = "lmer null", random = "+ (1|decimal_age) + (1|SelEdGrp)", polynomial = 0)

lm_mod_quadratic <- lapply(1:4, lm_applier, formula_choice = "lm", random = " ", polynomial = 2)

lm_mod_linear <- lapply(1:4, lm_applier, formula_choice = "lm", random = " ", polynomial = 1)

lmer_mod_reading_only <- lapply(1:4, lm_applier, formula_choice = "lm null", random = " ", polynomial = 0)

reg_p_vals <- list(
  color_chi <- round(anova(lmer_mod_quadratic[[1]], lmer_mod_linear[[1]])$`Pr(>Chisq)`[-1],3),
  word_chi <- round(anova(lmer_mod_quadratic[[2]], lmer_mod_linear[[2]])$`Pr(>Chisq)`[-1],3),
  color_word_chi <- round(anova(lmer_mod_linear[[3]], lmer_mod_null[[3]])$`Pr(>Chisq)`[-1],3),
  interference_chi <- round(anova(lmer_mod_linear[[4]], lmer_mod_null[[4]])$`Pr(>Chisq)`[-1],3)
)

reg_table_func <- function(mod){
  list_equations <- lapply(1:4, function(i, mod) cbind.data.frame(Name = scale_list_int[[i]], Significance = reg_p_vals[[i]], Terms = row.names(coef(summary(mod[[i]]))), round(coef(summary(mod[[i]])),3)) %>%
                             mutate(Significance = ifelse(duplicated(Significance), "", Significance)), mod = mod)
  
  do.call(rbind, list_equations) %>%
    mutate(Name = ifelse(duplicated(Name), "", Name),
    Terms = dplyr::recode(Terms, "(Intercept)" = "Intercept", "poly(color_word_ratio, 2)1" = "Color Word Ratio", "poly(color_word_ratio, 2)2" = "Color Word Ratio Squared", "reading" = "Reading", "poly(color_word_ratio, 1)" = "Color Word Ratio"))
  }

print_reg <- function(mod1, mod2){
  # name <- deparse(substitute(mod1))
  rbind.data.frame(reg_table_func(mod1) %>%
    slice(1:8),
    reg_table_func(mod2) %>%
      slice(7:12)) %>%
      kable(caption = "Regression Equations for Predicting Stroop Scores from Reading and Color to Word Ratio") %>%
      # kable(caption = table_counter_func(name)) %>% # Use this way to print by model name
      kable_options()
}
print_reg(lmer_mod_quadratic, lmer_mod_linear)

Regression Equations for Predicting Stroop Scores from Reading and Color to Word Ratio
	Name	Significance	Terms	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	Color Raw Score	0	Intercept	26.883	6.798	299.888	3.955	0.000
poly(color_word_ratio, 2)1			Color Word Ratio	126.983	9.755	297.684	13.018	0.000
poly(color_word_ratio, 2)2			Color Word Ratio Squared	-51.470	9.966	294.234	-5.165	0.000
reading			Reading	0.489	0.071	299.749	6.875	0.000
(Intercept)1	Word Raw Score	0.002	Intercept	39.738	9.641	82.448	4.122	0.000
poly(color_word_ratio, 2)11			Color Word Ratio	-73.990	13.744	292.496	-5.383	0.000
poly(color_word_ratio, 2)21			Color Word Ratio Squared	-43.989	14.024	279.640	-3.137	0.002
reading1			Reading	0.676	0.101	79.552	6.709	0.000
(Intercept)2	Color-Word Raw Score	0	Intercept	-3.741	7.089	172.717	-0.528	0.598
poly(color_word_ratio, 1)2			Color Word Ratio	48.638	9.913	295.702	4.906	0.000
reading2			Reading	0.481	0.074	187.697	6.502	0.000
(Intercept)3	Interference	0.005	Intercept	-19.478	6.496	150.529	-2.999	0.003
poly(color_word_ratio, 1)3			Color Word Ratio	19.116	9.138	297.017	2.092	0.037
reading3			Reading	0.200	0.068	161.457	2.952	0.004

The above table presents the regression equations for prediction of Stroop scores from Color Word Ratio and Reading Scores. We can see that Color Word Ratio has a quadratic relationship with the scores Color and Word and a linear relationship with Color-Word and Interference. To summarize, increased reading ability seems to lead to higher Stroop scores, while increased Color to Word performance leads to increased scores to a point in Color and Word and monotonically with Color-Word and Interference.

Critical Scores

Color to Word Ratio

Color to Word Ratio score had a slight quadratic relationship with Color-Word. We can find the optimal Color to Word Ratio score for Color-Word performance:

lmfun <- function(data, y, x, i) {
    y <- sym(y[i])
    x <- enquo(x)
    lm_model_formula <- formula(paste0(quo_name(y), "~ poly(",quo_name(x), ",2)"))
    lm(lm_model_formula, data = data)
  }
lm_applier <- function(i){lmfun(reading_dat, raw_score_column_names_int, "color_word_ratio", i)}

lm_mod <- lapply(1:4, lm_applier)

list_equations <- lapply(1:4, function(i) cbind.data.frame(Name = scale_list_int[[i]], Terms = row.names(coef(summary(lm_mod[[i]]))), round(coef(summary(lm_mod[[i]])),3)))

lapply(3, function(i){
  list_equations[[i]] %>%
    dplyr::filter(grepl("poly", Terms)) %>%
    dplyr::select(Estimate) %>%
    slice(1) -> first
  list_equations[[i]] %>%
    filter(grepl("poly", Terms)) %>%
    dplyr::select(Estimate) %>%
    slice(2) -> second
    -first / (2*second)
})[[1]] -> maximum_color_word

result <- function(a,b,c){
  if(delta(a,b,c) > 0){ # first case D>0
        x_1 = (-b+sqrt(delta(a,b,c)))/(2*a)
        x_2 = (-b-sqrt(delta(a,b,c)))/(2*a)
        result = c(x_1,x_2)
  }
  else if(delta(a,b,c) == 0){ # second case D=0
        x = -b/(2*a)
  }
  else {"There are no real roots."} # third case D<0
}

# Constructing delta
delta<-function(a,b,c){
      b^2-4*a*c
}

extreme_min <- mean(reading_dat$CWRscore) - 3*sd(reading_dat$CWRscore)

extreme_color_word_ratio <- result(list_equations[[3]]$Estimate[1] - extreme_min, list_equations[[3]]$Estimate[2], list_equations[[3]]$Estimate[3])
extreme_color_word_ratio <- extreme_color_word_ratio[extreme_color_word_ratio > 0]

The maximum Color to Word ratio score for Color-Word performance is 73.92%, which means about 35.27% as many words are read as colors.

To identify Color to Word Ratios that may lead to extremely low scores, defined here as less than 3 SD’s from the mean, the quadratic regression equation was solved for the positive root, and resulted in: 49.26%. At this ratio, the individual is reading about twice as many words as colors. At the other end, as noted above only 1(0.33%) individual had a Color to Word ratio above 1, suggesting that is a relatively abnormal relative score. However, there was not an extreme decrease in Color-Word score approaching or above that.

It is important to note that the test did not perform unexpetedly for individuals with those scores reviewed above. Rather these are some of the conditions that led, and could lead, to extreme scores.

We can also try to identify Reading scores that could lead to extreme performance:

Nonlinear and spline models were constructed to consider nonlinear effects with Reading as a fixed main effect.

Reading

# summary(cubic_reading_mod) 
lmer(Interference ~ splines::bs(reading, 3) + (1|color_word_ratio) + (1|decimal_age) + (1|SelEdGrp), data = reading_dat, REML = F) -> spl_read_int
lmer(Interference ~ 1+ (1|color_word_ratio) + (1|decimal_age) + (1|SelEdGrp), data = reading_dat, REML = F) -> no_intercept_mod_int
# summary(spl_read)
spl_lm_results_int <- anova(no_intercept_mod_int, spl_read_int)
simple_spl_int <- lm(Interference ~ splines::bs(reading, 3), data = reading_dat)

# summary(cubic_reading_mod) 
lmer(CWRscore ~ splines::bs(reading, 3) + (1|color_word_ratio) + (1|decimal_age) + (1|SelEdGrp), data = reading_dat, REML = F) -> spl_read_cw
# summary(spl_read)
lmer(CWRscore ~ 1+ (1|color_word_ratio) + (1|decimal_age) + (1|SelEdGrp), data = reading_dat, REML = F) -> no_intercept_mod_cw
spl_lm_results_cw <- anova(no_intercept_mod_cw, spl_read_cw)
simple_spl <- lm(CWRscore ~ splines::bs(reading, 3), data = reading_dat)
simple_lm <- lm(CWRscore ~ reading, data = reading_dat)

Both nonlinear and spline models are superior to the null model, without Reading as a predictor, and Interference (X2(3) p < 0.004) and Color-Word Score as outcomes(X2(3) p < 0), using the spline model compared to the null model). These models are not superior to simple linear effects for Reading and quadratic effects for Color to Word Ratio, as reviewed in the regression equation model selection procedures above. Models with Color and Word as outcomes with anything beyond the terms described in the models above performaed poorly and are not explored further.

When considering the cubic model compared with the spline model with both Interference and Color-Word as outcomes, the cubic term for the cubic model was not significant, while the three-knot term for the spline model was. As we desire to know Reading Score values that could prove problematic, using the spline model with the significant three-knot term was determined to be the best approach. As we desire to identify reading scores that could be problematic for a range of participants in terms of Interference and Color-Word performance, a simple linear spline model was used with standardized Reading scores.

Interference

# Fit overall Reading mod- get high know
scaled_reading <- data.frame(Interference_stan = scale(reading_dat$Interference), reading_stan = scale(reading_dat$reading))
reading_lm_mod <- lm(Interference_stan ~ reading_stan, data = scaled_reading)
fit_seg <- segmented(reading_lm_mod, seg.Z = ~reading_stan, psi = list(reading_stan=c(-2.5,0)),control = seg.control(n.boot=0))
# fit_seg <- segmented(reading_lm_mod, seg.Z = ~reading, psi = list(reading=c(75, 80))) # use with unstandardized

# reading_age_break_low <- ceiling(fit_seg$psi[1, 2])
pr <- seq(min(scaled_reading$reading_stan), max(scaled_reading$reading_stan), .1)
predicted_values <- cbind.data.frame(reading_stan = pr, predicted_interference = predict(fit_seg, data.frame(reading_stan = pr)))

predicted_break_value = predict(fit_seg, data.frame(reading_stan = fit_seg$psi[,2]))

predicted_break_values <- cbind.data.frame(break_value = fit_seg$psi[,2], predicted_break_value = predicted_break_value, se = fit_seg$psi[,3], location = c("Low", "High"))

p <- ggplot(predicted_values, aes(x = reading_stan, y = predicted_interference)) +
  geom_line() 
 p + geom_point(data = predicted_break_values, aes(x = break_value, y = predicted_break_value)) +
   geom_label(data = predicted_break_values, aes(x = break_value, y = predicted_break_value, label = paste0(round(break_value,2), "/", round(predicted_break_value,2), " (",round(se,2), ")"), color = location), nudge_y = .1) +
   ggtitle("Interference Scores Across \n Standardized Reading Scores Segmented at Break Point") +
  xlab("Standardized Reading Scores") +
   ylab("Predicted Interference") +
  labs(color = "Reading Score/ \n Predicted Interference(SE)") +
  scale_x_continuous(breaks = seq(-5,2,1)) +
   coord_cartesian(ylim = c(-.6, .3))

 medium_lm <- scaled_reading %>%
   filter(reading_stan > fit_seg$psi[1,2] & reading_stan < fit_seg$psi[2,2]) %>%
   lm(Interference_stan ~ reading_stan, data = .)
  medium_lm_null <- scaled_reading %>%
   filter(reading_stan > fit_seg$psi[1,2] & reading_stan < fit_seg$psi[2,2]) %>%
   lm(Interference_stan ~ 1, data = .)
  medium_anova <- anova(medium_lm, medium_lm_null)

It can be seen in the plot that two breaks occur at standardized Reading score values -3.44 and 0.33. For scores in between these breakpoints, there is not a relationship between reading and Interference X2(1, p = 0.76). In other words, it seems like having a very low reading level has an effect on Interference. This is seen at more than 3 SD’s below the mean, so it is only individuals with quite low levels. Also, as Reading scores increase above the mean, there is also a positive relationship with Interference.

Color-Word

# Fit overall Reading mod- get high know
scaled_reading <- data.frame(CWRscore = reading_dat$CWRscore, reading_stan = scale(reading_dat$reading))
reading_lm_mod <- lm(CWRscore ~ reading_stan, data = scaled_reading)
fit_seg <- segmented(reading_lm_mod, seg.Z = ~reading_stan, psi = list(reading_stan=c(-2.5, 1)),control = seg.control(n.boot=0))
# fit_seg <- segmented(reading_lm_mod, seg.Z = ~reading, psi = list(reading=c(75, 80))) # use with unstandardized

# reading_age_break_low <- ceiling(fit_seg$psi[1, 2])
pr <- seq(min(scaled_reading$reading_stan), max(scaled_reading$reading_stan), .1)

predicted_values <- cbind.data.frame(reading_stan = pr, predicted_CWRscore = predict(fit_seg, data.frame(reading_stan = pr)))

predicted_break_value = predict(fit_seg, data.frame(reading_stan = fit_seg$psi[,2]))

predicted_break_values <- cbind.data.frame(break_value = fit_seg$psi[,2], predicted_break_value = predicted_break_value, se = fit_seg$psi[,3], location = c("Low", "High"))

p <- ggplot(predicted_values, aes(x = reading_stan, y = predicted_CWRscore)) +
  geom_line() 
 p + geom_point(data = predicted_break_values, aes(x = break_value, y = predicted_break_value)) +
   geom_label(data = predicted_break_values, aes(x = break_value, y = predicted_break_value, label = paste0(round(break_value,2), "/", round(predicted_break_value,2), " (",round(se,2), ")"), color = location), nudge_y = .1) +
   ggtitle("Color-Word Scores Across \n Standardized Reading Scores Segmented at Break Point") +
  xlab("Standardized Reading Scores") +
   ylab("Predicted CWRscore") +
  labs(color = "Reading Score/ \n Predicted CWRscore(SE)") +
  scale_x_continuous(breaks = seq(-5,2,1))

 medium_lm <- scaled_reading %>%
   filter(reading_stan > fit_seg$psi[1,2] & reading_stan < fit_seg$psi[2,2]) %>%
   lm(CWRscore ~ reading_stan, data = .)
  medium_lm_null <- scaled_reading %>%
   filter(reading_stan > fit_seg$psi[1,2] & reading_stan < fit_seg$psi[2,2]) %>%
   lm(CWRscore ~ 1, data = .)
  medium_anova <- anova(medium_lm, medium_lm_null)

It can be seen in the plot that two breaks occur at standardized Reading score values -3.07 and 1.1. For scores in between these breakpoints, there continues to be a relationship between Reading and Color-Word X2(1, p = 0), but the relationship is different below the breakpoint with increases in Reading leading to greater increases in Color-Word score in this interval. This is seen at more than 3 SD’s below the mean, so it is only individuals with quite low levels.

Special Cases Summary

In this section the relationships between reading and speed of processing constituent parts of the Stroop test were examined. The goal was to help understand how these abilities are related to abilities measured by the Stroop and to give examiners information to help make administration decisions and for interpretation.

The best fitting models showed that Stroop scores increased as reading ability increased. The ideal proficiency of processing words alone compared to colors alone seemed to be about 35% faster. It seemed that ratios of twice as many words as colors or a 1:1 ratio of words to colors were unusual and could lead to unusual scores.

In terms of extreme scores, reading abilities less than 3 SD’s below the mean (lowest 1st percentile), may be related to poor incongruent Stroop performance at a more accelerated rate than higher scores. Increases in reading abilities were associated with increases in Stroop Color-Word task performance and may have accelerated at scores significantly above the mean (>1 SD).

The above analyses shows the Stroop seems robust across individuals with a range of reading abilities and styles of processing colors and sight words. There were indications of increases in performance with reading and optimal color to word processing, but Interference, which controls for increased performance in incongruent processing by comparisons to congruent processing was not or only weakly correlated to these abilities. Reading ability is related to other abilities, so it is not suprising to see a relationship with that ability and Stroop performance.

This analysis involves individuals broadly representative of the general population of individuals proficient enough to complete the WJ LW and WA tasks and the Stroop tasks. Further research should include extreme cases- those with very low and high reading abilities and processing styles to consider the test performance with those groups.

References

Protopapas, A., Archonti, A., & Skaloumbakas, C. (2006). Reading ability is negatively related to stroop interference. Cognitive Psychology, 54(3), 251-282.

Adult Stroop Data Analysis

Brian Syzdek

Report Date: 2021-07-20

Introduction

Data Cleaning

Stroop Extreme Values

Standardization

Sample

Measures

Stroop Color and Word Test (Stroop)

Woodcock Johnson- Third Edition (WJ)

Nonverbal Stroop Card Sorting Test (NSCST)

Comprehensive Trail-Making Test (CTMT)

Score Descriptions

Overall

Stroop

WJ-3

NSCST

CTMT

Normality

Demographics

Demographic Tables and Plots

Raw Scores Across Age

Models to Predict Raw Scores

Summary Table of Models and Diagnostic Measures

Plots of Predicted Scores and Residuals

Scaled Scores

Color

Word

Color-Word

Compare New Prediction Models to Previous

Predicted Score Tables for Color, Word, Color-Word

Residual T-Scores

T-Scores for Word, Color, Color-Word

Interference Scores

Interference T-Scores

Scale Structure

Scale Correlation

Correlation Matrix Scatterplot Chart

Cronbach Alpha

Bartlett Correlation Test

Graphical Representation of Correlation using Principal Components Analysis- 2d

Graphical Representation of Correlation using Principal Components Analysis- 3d

Graphical Representation of Correlation with Interference as Dependent Variable

Principal Components Analysis

Stroop Unspecified Model

Stroop One Factor Model

Stroop Two Factor Model

Stroop Three Factor Model

Structural Equation Modeling

One-Factor Model

Two-Factor Model with Interference

Reliability

Test-Retest

Concordance Correlation

Bland-Altman Plot

Means and Differences Scatterplot

Color Raw Score

Word Raw Score

Color-Word Raw Score

Interference

Color Raw Score

Word Raw Score

Color-Word Raw Score

Interference

Validity

Previous Edition

All Measures

All Measures Correlation Matrix

All Measures Graphical Representation of Correlation using Princiapal Components Analysis- 2d

All Measures Three Factor PCA Model

WJ

WJ-Stroop Correlation

WJ-Stroop Correlation Matrix

WJ-Stroop Graphical Representation of Correlation using Princiapal Component Analysis- 2d

WJ-Stroop Graphical Representation of Correlation using Princiapal Component Analysis- 2d

WJ-Stroop Principal Components Analysis (PCA)

WJ-Stroop Scree Plot

WJ-Stroop Two Factor PCA Model

WJ-Stroop Three Factor PCA Model