###*** Read all data
## Recode data received- data previously checked for accuracy
### Variable names -> variable_names (lower case, underscore separate)
### To factors with levels specified, TD = "Typically Developing," as first level
### Reconfiguring some groups with low counts
# Read new dataset
sand_all <- readxl::read_xlsx("Full SAND Database (ages 2-12) 3.1.21_FInal with corrections_7.21.21_final for manual.xlsx", sheet = 1) %>%
rename(ASDdiagnosis = ASD, IntellectualDisability = ID, ID = `S ID`)
# Read old dataset, with some overlapping cases
sand_old <- readxl::read_xlsx("Full SAND Database (ages 2-12) 3.1.21_FInal with corrections.xlsx", sheet = 1)
# Identify unique cases in old data and add to new data
sand_all <- bind_rows(sand_all, anti_join(sand_old, sand_all, by = "ID"))
###*** Process data
# Function to clean data
df_clean <- function(df) {
df %>%
# Change missing values to NA
mutate_all(~ifelse(. %in% c(999, 888, "UNK"), NA, .)) %>%
# Not used, but can drop missing cases this way
# dplyr::filter(across(c(SSP_Total_Score, SSP_Tactile_Score, SSP_Auditory_Score, SSP_Visual_Score, ASDdiagnosis), ~ !is.na(.x))) %>%
mutate(
# recode variable names to conventional style
asd_diagnosis = ifelse(is.na(ASDdiagnosis), 1, ASDdiagnosis),
asd_diagnosis = factor(ifelse(asd_diagnosis == 1, "ASD", "TD"), levels = c("TD", "ASD")),
ndd = factor(ifelse(NDD == 1, "NDD", "TD"), levels = c("TD", "NDD")),
ses = factor(SES, levels = c("$0-$24,999", "$25,000-$49,000", "$50,000-$74,999", "$75,000-$99,999", "$100,000-$149,999", "$150,000-$199,999", "$200,000+")),
dx = dplyr::recode(Dx, '1' = "TD", '2' = "iASD", '3' = "PMS", '4' = "Fragile_X", '5' = "ADNP", '6' = "FOXP1", '7' = "DDX3X"),
# Put TD as first factor
dx = factor(dx, levels = c("TD", sort(unique(dx))[sort(unique(dx)) != "TD"])),
sex = factor(dplyr::recode(SEX, '0' = "Female", '1' = "Male")),
age = round(as.numeric(AGE),2),
# Combine races with multiple input into biracial
race = ifelse(grepl(",", Race), "Biracial", Race),
race = ifelse(Ethnicity == "Hispanic or Latino", "Hispanic/Latino", race),
race = dplyr::recode(race, "WHite" = "White"), # miscode
race = (replace_na(race, "Unknown")),
race = factor(race, levels = c(sort(unique(race))[sort(unique(race)) != "Unknown"], "Unknown")), # Unknown is last option
intellectual_d = as.character(IntellectualDisability),
IntellectualDisability = replace_na(IntellectualDisability, "Unknown"),
intellectual_disability = factor(dplyr::recode(IntellectualDisability, '11' = "Yes", '1' = "Yes", '0' = "No"), levels = c("No", "Yes", "Unknown")),
# Create new ssp classification if any critical score definite or probable
ssp_definite = factor(replace_na(ifelse(SSP_Total_Score <= 141 | SSP_Tactile_Score <= 26 | SSP_Auditory_Score <= 19 | SSP_Visual_Score <= 15 , "Sensory Difference", "None"), "None"), levels = c("None", "Sensory Difference")),
ssp_probable = factor(replace_na(ifelse(SSP_Total_Score <= 154 | SSP_Tactile_Score <= 29 | SSP_Auditory_Score <= 22 | SSP_Visual_Score <= 18, "Sensory Difference", "None"), "None"), levels = c("None", "Sensory Difference")),
# Include na cases, for modelling
ssp_probable_w_na = factor(ifelse(SSP_Total_Score <= 154 | SSP_Tactile_Score <= 29 | SSP_Auditory_Score <= 22 | SSP_Visual_Score <= 18, "Sensory Difference", "None"), levels = c("None", "Sensory Difference")),
# Following is if SSP or NDD
ssp_definite_ndd = factor(ifelse(ssp_definite == "Sensory Difference" | ndd == "NDD", "Sensory Difference", "None"), levels = c("None", "Sensory Difference")),
ssp_probable_ndd = factor(ifelse(ssp_probable == "Sensory Difference" | ndd == "NDD", "Sensory Difference", "None"), levels = c("None", "Sensory Difference")),
ssp_probable_ndd_w_na = factor(ifelse(ssp_probable_w_na == "Sensory Difference" | ndd == "NDD", "Sensory Difference", "None"), levels = c("None", "Sensory Difference")),
# We'll see this is same as ssp_definite/probable_ndd
sensory_difference_definite = factor(ifelse(ssp_definite_ndd == "Sensory Difference" | intellectual_disability == "Yes", "Sensory Difference", "None"), levels = c("None", "Sensory Difference")),
sensory_difference_probable = factor(ifelse(ssp_probable_ndd == "Sensory Difference" | intellectual_disability == "Yes", "Sensory Difference", "None"), levels = c("None", "Sensory Difference")),
sensory_difference_probable_w_na = factor(ifelse(ssp_probable_ndd_w_na == "Sensory Difference" | intellectual_disability == "Yes", "Sensory Difference", "None"), levels = c("None", "Sensory Difference"))
) %>%
# Following standardizes IQ test for comparison across tests
mutate(`IQ Test` = ifelse(`IQ Test` == "WASI", 5, `IQ Test`),
`IQ Test` == as.numeric(`IQ Test`),
standard_iq =
ifelse(`IQ Test` == 1,
(`FS IQ` - 50)/10,
ifelse(`IQ Test` %in% c(0, 2:6),
(`FS IQ` -100)/15, NA
)),
viq =
ifelse(`IQ Test` == 1,
(VIQ - 50)/10,
ifelse(`IQ Test` %in% c(0, 2:6),
(VIQ -100)/15, NA
)),
nviq =
ifelse(`IQ Test` == 1,
(NVIQ - 50)/10,
ifelse(`IQ Test` %in% c(0, 2:6),
(NVIQ -100)/15, NA
))
) %>%
# Rename all ssp scores to conventional variable names
rename(ssp_total_score = SSP_Total_Score, ssp_visual_score = SSP_Visual_Score, ssp_auditory_score = SSP_Auditory_Score, ssp_tactile_score = SSP_Tactile_Score, ssp_taste_score = SSP_Taste_Score, ssp_movement_score = SSP_Movement_Score,
ssp_sensation_score = SSP_Sensation_Score, ssp_energy_score = SSP_Energy_Score)
}
# Apply above data cleaning
sand_all <- df_clean(sand_all)
# Define vectors of individual scale names we will be using
domain_scale_names <- sand_all %>% dplyr::select(Hyperreactivity:Seeking) %>% names()
modality_scale_names <- sand_all %>% dplyr::select(TotalVisual, TotalTactile, TotalAuditory) %>% names()
subscale_names <- sand_all %>% dplyr::select(V_Hyper:A_Seek) %>% names()
# Name of scales
name_scales <- c("Domain", "Modality", "Subscales")
nam <- c("Modality", "Domain", "Subscales")
# Processes p-values if rounds to 0, rounding to 3
p_value_func <- function(p_value){
if(round(p_value,3) < .001) {
"p < .001"
} else {
paste0("p = ", round(p_value,3))
}
}
###*** Functions are sourced from helper_functions.R, but shown here
### Colorize function from 'helper_functions.R" to color given text
colorize <- function(x, color) {
if (knitr::is_latex_output()) {
sprintf("\\textcolor{%s}{%s}", color, x)
} else if (knitr::is_html_output()) {
sprintf("<span style='color: %s;'>%s</span>", color,
x)
} else x
}
### PRINT TABLES
## Use to number tables and figures
### Following gives running figure or table number
table_counter_n <<- 0
table_counter_func <- function(table_title){
table_counter_n <<- table_counter_n + 1
paste0("*Table* ", table_counter_n, ": ", table_title)
}
figure_counter_n <<- 0
figure_counter_func <- function(figure_title){
figure_counter_n <<- figure_counter_n + 1
paste0("*Figure* ", figure_counter_n, ": ", figure_title)
}
## Use to print tables in any format
### Get output format to print kable for html or console, flextable for others
getOutputFormat <- function() {
output <- rmarkdown:::parse_yaml_front_matter(
readLines(knitr::current_input())
)$output
if (is.list(output)){
return(names(output)[1])
} else {
return(output[1])
}
}
### Kable style preferences
kable_options <- function(.) {kable_styling(.,bootstrap_options = c("striped", "hover", "condensed", "responsive"))}
table_output_func <- function(df, colnames, caption, ...){
# with html- typically use table_output_func(df, colnames = c("col1", "col2"), caption = table_counter_func("caption")), df req
if (getOutputFormat() == "html_document"){
if (missing(colnames)){
colnames <- names(df)
}
if (missing(caption)){
caption <- NULL
}
kable(df, row.names = F, col.names = colnames, caption = caption, ...) %>%
kable_options()
} else {
# with pdf or word use table_output_func(df, colnames = c("col1", "col2"), values = table_counter_func("caption")), df req
names1 = names(df)
names2 = if(missing(colnames)){
names1
} else {
colnames
}
if (missing(caption)){
caption <- NULL
}
ft <- flextable(df)
ft <- set_header_df(x = ft, mapping = data.frame(keys = names1, values = names2, stringsAsFactors = FALSE),
key = "keys" ) %>%
add_header_lines(top = TRUE, values = caption)
theme_zebra(ft) # or theme_booktabs for plain tables
}
}
table_print <- function(df, colnames, caption, ...){
if (isTRUE(getOption('knitr.in.progress'))) {
table_output_func(df, colnames, caption, ...)
} else {
df %>%
kable(caption = caption, col.names = colnames) %>%
kable_options()
}
}
The following technical chapters present research on the SAND conducted in its development to review its reliability and validity.
Development of relevant items was done by taking into account DSM-5 criteria for ASD and existing sensory assessments. Face validity of items was established through discussions and consultations with clinical and developmental psychologists, child and adolescent psychiatrists, and occupational therapists treating children with NDDs.
Research evaluating the psychometric properties and validity of the SAND began in January 2014 and continued to February 2020. Data was collected at the Seaver Autism Center for Research and Treatment at the Icahn School of Medicine at Mount Sinai. Data was collected in the context of studies approved by Mount Sinai’s Program for the Protection of Human Subjects. Informed written consent was obtained from all parents or legal guardians and assent was obtained when appropriate.
Participants were recruited through the Seaver Autism Center’s Assessment Core, and ongoing studies at the center. Patients presenting to the Center between 2014 and February of 2020 received the SAND as part of a standard phenotyping battery which included ASD diagnostic testing, cognitive testing, and a measure of adaptive behavior (Vineland-II [Sparrow et al., 2008]). Typically developing controls were recruited through flyers, email blasts, and newsletters distributed within the Mount Sinai Hospital system (e.g., General Pediatrics) as well as local schools and childcare facilities. Children with known uncorrected vision or hearing problems (e.g., blindness or deafness) were not eligible for this study. Cortical visual impairment (CVI) and mild vision or hearing problems were allowed and did not impact the Examinee’s ability to engage in the SAND administration.
The Short Sensory Profile (SSP; Dunn, 1999) was used to establish convergent validity and as one method of defining sensory differences. It is a widely-used caregiver report questionnaire that purports to assess sensory reactivity domains, and was previously validated for use in children with ASD. Caregivers completed the SSP following the SAND administration. The SSP consists of 38 items, assessing sensory processing across the following domains:
Caregivers rate how often the Examinee shows a particular behavior, using a five-point Likert scale that ranges from Always (1) to Never (5). Higher scores reflect more typical behavior. The SSP has high internal reliability (0.90– 0.95 [Dunn, 1999]). Sensory reactivity differences were reported in up to 90% of children and adults with ASD, compared to typically developing controls (Crane et al., 2009; Dunn et al., 2002; Kern, et al., 2007; Kientz & Dunn, 1997; Leekam et al., 2007; Tomchek & Dunn, 2007; Watling et al., 2001; Wiggins et al., 2009). SSP scale raw scores are also summed to generate a Total Score, representative of overall sensory reactivity. SSP scores can be compared against cutoff criteria, indicating whether a score meets “Probable” or “Definite” criteria, reflecting approximately 1 and 2 Standard Deviations (SD) from the mean.
Full scale (FSIQ), verbal (VIQ), and nonverbal (NVIQ) Intelligence Quotients (IQ) were obtained using one of the following assessments, administered based on age and language ability:
Developmental quotients (DQs) were calculated for participants out of age range on the Mullen who were unable to complete standardized IQ testing. Both IQ and DQ scores were included in the FSIQ, VIQ, and NVIQ variables. As different IQ tests used different scales for standardizing scores (e.g., standard scores, t-scores, etc.), all IQ scores were standardized to z-scores (mean = 0, SD = 1) to allow for comparisons across tests.
The Social Responsiveness Scale, Second Edition (SRS-2; Constantino & Gruber, 2012) was used to screen children in the control group.
The SAND was administered according to the procedures explained in this Manual.
All SANDs were administered by a Ph.D. level psychologist. Raters used for inter-rater reliability included medical and graduate students.
306 children between the ages of 2 years and 12 years, 10 months participated in this study. The following demographic variables were collected:
In addition, we sought to create a criterion variable, called “Sensory Difference Present,” that captured whether an individual had sensory symptoms, using theoretical and analytical operationalization. Individuals with an NDD were included as having a Sensory Difference based on caregiver report of sensory symptoms during a comprehensive autismfocused psychiatric evaluation and/or those meeting clinical criteria, on any of the following SSP scales:
The above scales, along with cutoff scores, were used based on the following reasoning:
Participants who did not complete all measures involving SSP criteria were classified as ‘Unknown’ for those variables. If they were positive for other Sensory Difference criteria, they were considered positive for Sensory Difference . However, if they were not positive for other Sensory Difference criteria, these individuals were excluded from analyses involving this variable, though their SAND scores were included for other analyses.
# Create a confusion matrix to compare overlap of sensory difference and ssp classification
## Build df that is used
us <- sand_all %>%
# Remove NA
dplyr::filter(across(c(ssp_probable_w_na, ndd), ~ !is.na(.x))) %>%
mutate(
# Rename both ssp and ndd to same names- sensory difference or none
ssp = factor(ssp_probable_w_na, levels = c("Sensory Difference", "None")),
ndd = factor(ifelse(ndd == "NDD", "Sensory Difference", "None"), levels = c("Sensory Difference", "None"))
)%>%
dplyr:: select(ssp, ndd)
con <- confusionMatrix(us$ssp, us$ndd, dnn = c("SSP Classification", "NDD"))
as.data.frame.matrix(con$table) %>% bind_cols(row.names(.), .) %>%
regulartable() %>%
# Sets new headers above columns and rows
delete_part(., part = "header") %>%
add_header_row(., values = c("", "NDD ", "TD"), colwidths = c(1,1,1)) %>%
add_header_row(., values = c("SSP Classification ", "NDD"), colwidths = c(1,2)) %>%
add_header_lines(table_counter_func("SSP Probable/NDD Confusion Matrix")) %>%
theme_zebra() %>%
autofit() %>%
fit_to_width(9.5) %>%
set_caption(., "_Note_. There was a high degree of overlap between those with
an NDD and those meeting “Probable” criteria on the indicated
SSP scales.")
*Table* 1: SSP Probable/NDD Confusion Matrix | ||
SSP Classification | NDD | |
NDD | TD | |
Sensory Difference | 161 | 10 |
None | 24 | 47 |
###*** Descriptive table (big) for continuous demographic variables
std_border <- fp_border(color="gray", style="solid", width=1)
# Describes levels of vars
demographic_definition <- c("No Sensory Difference Present", "Sensory Difference Present", "No SSP Collected", "Typically Developing", "Autism Spectrum Disorder", "Typically Developing (No Rare Genetic Issue)", "NDD", "No Intellectual Disability", "Intellectual Disability", "", "", "", "Asian/Asian-American", "Biracial/Multiracial of any race", "Black/African-American", "Hispanic/Latino of any race", "White/Caucasian", rep("", 9))
# Order of variables in table and make user friendly
ordering_key <- sand_all %>% dplyr::select(ssp_probable_ndd_w_na, asd_diagnosis, ndd, intellectual_disability, sex, race, ses) %>%
# Print friendly names
dplyr::rename("Sensory Difference Probable" = ssp_probable_ndd_w_na, "ASD Diagnosis" = asd_diagnosis, "NDD" = ndd, "Race" = race, "ID" = intellectual_disability, "SES" = ses, "Sex" = sex) %>%
# change any NA to Unknown
mutate(across(everything(), ~fct_explicit_na(.x, "Unknown"))) %>%
sapply(., levels) %>%
Map(paste, names(.), ., sep = '_') %>%
unlist
# Apply data to table
sand_all %>%
dplyr::select(ssp_probable_ndd_w_na, asd_diagnosis, ndd, sex, race, ses, intellectual_disability, standard_iq, viq, nviq, age) %>%
# ASD defined as either ASD or TD with no other NDD
mutate(
asd_diagnosis = ifelse(asd_diagnosis == "ASD", "ASD", ifelse(ndd == "TD", "TD", "none"))
) %>%
# Friendly names
dplyr::rename("Sensory Difference Present" = ssp_probable_ndd_w_na, "ASD Diagnosis" = asd_diagnosis, "NDD" = ndd, "Race" = race, "ID" = intellectual_disability, "SES" = ses, "Sex" = sex) %>%
# Put in wide form so can go iterate through factors for below continous vars
pivot_longer(-c(standard_iq, viq, nviq, age)) %>%
group_by(name, value) %>%
mutate(value = replace_na(value, "Unknown")) %>%
# Drops ASD values who are ASD but NDD
filter(
!(name == "ASD Diagnosis" & value == "none")
) %>%
dplyr::summarize(count = n(),
## Get mean for each continuous
avg_iq = paste0(round(mean(standard_iq, na.rm = T),2), "(",round(sd(standard_iq, na.rm = T),2),")"),
avg_viq = paste0(round(mean(viq, na.rm = T),2), "(",round(sd(viq, na.rm = T),2),")"),
avg_nviq = paste0(round(mean(nviq, na.rm = T),2), "(",round(sd(nviq, na.rm = T),2),")"),
avg_age = paste0(round(mean(age, na.rm = T),2), "(",round(sd(age, na.rm = T),2),")")
) %>%
# Get n and percent of total for each level
mutate(freq = round((count/sum(count)*100),2),
`n(%)` = paste0(count, " (", freq, ")"),
ordering_value = paste(name, value, sep = "_")
) %>%
ungroup() %>%
# Put vars in order as specified above
arrange(match(ordering_value, ordering_key)) %>%
dplyr::select(name, value, `n(%)`, avg_age, avg_iq, avg_viq, avg_nviq) %>%
mutate(name = ifelse(duplicated(name), "", name)) %>%
# Add level descriptors
add_column(demographic_definition, .after = 2) %>%
# Adds a Grand TOTAL Row at bottom
bind_rows(., sand_all %>%
dplyr::summarize(
name = "TOTALS",
value = "",
demographic_definition = "",
`n(%)` = paste0(n(), "(100%)"),
avg_age = paste0(round(mean(age, na.rm = T),2), "(",round(sd(age, na.rm = T),2),")"),
# Creates mean(SD) for each continuous
avg_iq = paste0(round(mean(standard_iq, na.rm = T),2), "(",round(sd(standard_iq, na.rm = T),2),")"),
avg_viq = paste0(round(mean(viq, na.rm = T),2), "(",round(sd(viq, na.rm = T),2),")"),
avg_nviq = paste0(round(mean(nviq, na.rm = T),2), "(",round(sd(nviq, na.rm = T),2),")")
)
)-> continuous_demo_df
# Print with flextable for nice modification capability
regulartable(continuous_demo_df) %>%
set_header_df(data.frame(col_keys = colnames(continuous_demo_df),
what = c("Variable", "Levels", "Description", "N(%)", "Age", "FSIQ", "VIQ", "NVIQ"))) %>%
add_header_row(., values = c(" ", "Standardized Mean(SD)"), colwidths = c(4,4)) %>%
add_header_lines(table_counter_func("Continuous Demographic Variables")) %>%
theme_zebra() %>%
# Separate groups of vars
vline(., i = NULL, j = 4, border = std_border, part = "body") %>%
hline(., i = (nrow(continuous_demo_df) - 1), border = std_border) %>%
autofit() %>%
fit_to_width(9.5)
*Table* 2: Continuous Demographic Variables | |||||||
| Standardized Mean(SD) | ||||||
Variable | Levels | Description | N(%) | Age | FSIQ | VIQ | NVIQ |
ASD Diagnosis | TD | No Sensory Difference Present | 80 (30.53) | 6.03(2.73) | 0.94(0.95) | 0.92(0.91) | 0.88(1.23) |
ASD | Sensory Difference Present | 182 (69.47) | 6.42(2.67) | -1.45(1.9) | -1.77(1.88) | -1.01(1.98) | |
NDD | TD | No SSP Collected | 80 (26.14) | 6.03(2.73) | 0.94(0.95) | 0.92(0.91) | 0.88(1.23) |
NDD | Typically Developing | 226 (73.86) | 6.48(2.72) | -1.55(1.83) | -1.8(1.83) | -1.17(1.89) | |
ID | No | Autism Spectrum Disorder | 155 (50.65) | 5.92(2.57) | 0.54(1.25) | 0.37(1.39) | 0.66(1.35) |
Yes | Typically Developing (No Rare Genetic Issue) | 148 (48.37) | 6.81(2.83) | -2.32(1.44) | -2.59(1.35) | -1.91(1.58) | |
Unknown | NDD | 3 (0.98) | 6.86(2.36) | NaN(NA) | NaN(NA) | NaN(NA) | |
Sex | Female | No Intellectual Disability | 125 (40.85) | 6.66(2.79) | -1.09(2.04) | -1.26(2.05) | -0.94(1.98) |
Male | Intellectual Disability | 181 (59.15) | 6.15(2.66) | -0.99(1.92) | -1.29(1.98) | -0.66(1.94) | |
Race | Asian | 29 (9.48) | 5.04(2.32) | -0.45(1.65) | -0.81(1.89) | -0.01(1.54) | |
Biracial | 12 (3.92) | 7.16(3.14) | -1.12(1.97) | -1.18(2.14) | -1.37(1.97) | ||
Black | 22 (7.19) | 6.69(2.45) | -0.68(1.81) | -0.94(1.64) | -0.32(2.08) | ||
Hispanic/Latino | Asian/Asian-American | 25 (8.17) | 5.96(2.54) | -1.04(1.93) | -1.32(1.93) | -0.59(2.04) | |
White | Biracial/Multiracial of any race | 183 (59.8) | 6.68(2.8) | -1.22(2.01) | -1.47(2.07) | -1.03(1.96) | |
Unknown | Black/African-American | 35 (11.44) | 5.59(2.36) | -0.04(2.05) | -0.46(1.87) | 0.2(1.8) | |
SES | $0-$24,999 | Hispanic/Latino of any race | 4 (1.31) | 7.12(1.17) | -0.16(2.76) | -0.47(2.44) | -0.02(2.7) |
$25,000-$49,000 | White/Caucasian | 18 (5.88) | 7.26(3.36) | -1.15(1.79) | -1.51(1.75) | -0.86(1.9) | |
$50,000-$74,999 | 12 (3.92) | 6.47(3.21) | -1.55(2.17) | -2.08(1.95) | -1.48(2.3) | ||
$75,000-$99,999 | 23 (7.52) | 6.65(2.66) | -1.05(1.7) | -1.78(1.58) | -1.13(1.56) | ||
$100,000-$149,999 | 28 (9.15) | 6.4(2.82) | -1.5(1.72) | -1.98(1.76) | -1.13(1.61) | ||
$150,000-$199,999 | 32 (10.46) | 6.04(2.68) | -1.07(2.05) | -1.61(1.82) | -0.93(2.18) | ||
$200,000+ | 48 (15.69) | 6.84(2.73) | -1.34(1.78) | -1.37(1.86) | -1.09(1.76) | ||
Unknown | 141 (46.08) | 6.07(2.62) | -0.68(2.12) | -0.79(2.2) | -0.37(2.06) | ||
Sensory Difference Present | None | 47 (15.36) | 6.54(2.8) | 0.76(0.99) | 0.85(0.99) | 0.81(1.27) | |
Sensory Difference | 236 (77.12) | 6.43(2.72) | -1.48(1.87) | -1.7(1.89) | -1.1(1.9) | ||
Unknown | 23 (7.52) | 5.31(2.48) | 1.25(0.81) | 1(0.79) | 1.31(1.24) | ||
TOTALS | 306(100%) | 6.36(2.72) | -1.03(1.97) | -1.28(2.01) | -0.77(1.96) | ||
Note. The above table describes group variables, the levels of those variables, operational definition of those levels, count of participants meeting that criteria, and continuous descriptive variables.
# Does same as above, except continuous variables as SAND vars now
sand_all %>%
dplyr::select(ssp_probable_ndd_w_na, asd_diagnosis, ndd, sex, race, ses, intellectual_disability, Total, TotalObserved, Totalreported, Hyperreactivity, Hyporeactivity, Seeking, TotalVisual, TotalTactile, TotalAuditory) %>%
mutate(
asd_diagnosis = ifelse(asd_diagnosis == "ASD", "ASD", ifelse(ndd == "TD", "TD", "none"))
) %>%
dplyr::rename("Sensory Difference Present" = ssp_probable_ndd_w_na, "ASD Diagnosis" = asd_diagnosis, "NDD" = ndd, "Race" = race, "ID" = intellectual_disability, "SES" = ses, "Sex" = sex) %>%
pivot_longer(-c(Total, TotalObserved, Totalreported, Hyperreactivity, Hyporeactivity, Seeking, TotalVisual, TotalTactile, TotalAuditory)) %>%
group_by(name, value) %>%
mutate(value = replace_na(value, "Unknown")) %>%
filter(
!(name == "ASD Diagnosis" & value == "none")
) %>%
dplyr::summarize(count = n(),
avg_total = paste0(round(mean(Total, na.rm = T),2), "(",round(sd(Total, na.rm = T),2),")"),
avg_observed = paste0(round(mean(TotalObserved, na.rm = T),2), "(",round(sd(TotalObserved, na.rm = T),2),")"),
avg_reported = paste0(round(mean(Totalreported, na.rm = T),2), "(",round(sd(Totalreported, na.rm = T),2),")"),
avg_hyper = paste0(round(mean(Hyperreactivity, na.rm = T),2), "(",round(sd(Hyperreactivity, na.rm = T),2),")"),
avg_hypo = paste0(round(mean(Hyporeactivity, na.rm = T),2), "(",round(sd(Hyporeactivity, na.rm = T),2),")"),
avg_seek = paste0(round(mean(Seeking, na.rm = T),2), "(",round(sd(Seeking, na.rm = T),2),")"),
avg_vis = paste0(round(mean(TotalVisual, na.rm = T),2), "(",round(sd(TotalVisual, na.rm = T),2),")"),
avg_tact = paste0(round(mean(TotalTactile, na.rm = T),2), "(",round(sd(TotalTactile, na.rm = T),2),")"),
avg_aud = paste0(round(mean(TotalAuditory, na.rm = T),2), "(",round(sd(TotalAuditory, na.rm = T),2),")")
) %>%
mutate(freq = round((count/sum(count)*100),2),
`n(%)` = paste0(count, " (", freq, ")"),
ordering_value = paste(name, value, sep = "_")
) %>%
ungroup() %>%
arrange(match(ordering_value, ordering_key)) %>%
dplyr::select(name, value, `n(%)`, avg_total, avg_observed, avg_reported, avg_hyper, avg_hypo, avg_seek, avg_vis, avg_tact, avg_aud) %>%
mutate(name = ifelse(duplicated(name), "", name)) %>%
bind_rows(., sand_all %>%
dplyr::summarize(
name = "TOTALS",
value = "",
`n(%)` = paste0(n(), "(100%)"),
avg_total = paste0(round(mean(Total, na.rm = T),2), "(",round(sd(Total, na.rm = T),2),")"),
avg_observed = paste0(round(mean(TotalObserved, na.rm = T),2), "(",round(sd(TotalObserved, na.rm = T),2),")"),
avg_reported = paste0(round(mean(Totalreported, na.rm = T),2), "(",round(sd(Totalreported, na.rm = T),2),")"),
avg_hyper = paste0(round(mean(Hyperreactivity, na.rm = T),2), "(",round(sd(Hyperreactivity, na.rm = T),2),")"),
avg_hypo = paste0(round(mean(Hyporeactivity, na.rm = T),2), "(",round(sd(Hyporeactivity, na.rm = T),2),")"),
avg_seek = paste0(round(mean(Seeking, na.rm = T),2), "(",round(sd(Seeking, na.rm = T),2),")"),
avg_vis = paste0(round(mean(TotalVisual, na.rm = T),2), "(",round(sd(TotalVisual, na.rm = T),2),")"),
avg_tact = paste0(round(mean(TotalTactile, na.rm = T),2), "(",round(sd(TotalTactile, na.rm = T),2),")"),
avg_aud = paste0(round(mean(TotalAuditory, na.rm = T),2), "(",round(sd(TotalAuditory, na.rm = T),2),")")
)
) -> sand_score_demo_df
# Prints df created for SAND var table
regulartable(sand_score_demo_df) %>%
set_header_df(data.frame(col_keys = colnames(sand_score_demo_df),
what = c("Variable", "Levels", "N(%)","Total", "Observation", "Interview", "Hyperreactivity", "Hyporeactivity", "Seeking", "Visual", "Tactile", "Auditory"))) %>%
add_header_row(., values = c(" ", "Source", "Domain", "Modality"), colwidths = c(4,2,3,3)) %>%
add_header_row(., values = c(" ", "Mean(SD)"), colwidths = c(3,9)) %>%
add_header_lines(table_counter_func("SAND Scores by Demographics")) %>%
theme_zebra() %>%
vline(., i = NULL, j = c(3,4,6,9), border = std_border, part = "body") %>%
hline(., i = (nrow(sand_score_demo_df) - 1), border = std_border) %>%
autofit() %>%
fit_to_width(9.5)
*Table* 3: SAND Scores by Demographics | |||||||||||
| Mean(SD) | ||||||||||
| Source | Domain | Modality | ||||||||
Variable | Levels | N(%) | Total | Observation | Interview | Hyperreactivity | Hyporeactivity | Seeking | Visual | Tactile | Auditory |
ASD Diagnosis | TD | 80 (30.53) | 8.01(3.95) | 5.36(3.03) | 2.65(2.36) | 2.61(2.06) | 1.5(1.97) | 3.9(3.2) | 1.99(1.7) | 3.34(2.61) | 2.69(1.98) |
ASD | 182 (69.47) | 31.3(9.8) | 13.01(4.38) | 18.29(7.51) | 6.61(4.48) | 10(6.7) | 14.69(6.41) | 9.62(4.08) | 11.36(3.96) | 10.32(4.4) | |
NDD | TD | 80 (26.14) | 8.01(3.95) | 5.36(3.03) | 2.65(2.36) | 2.61(2.06) | 1.5(1.97) | 3.9(3.2) | 1.99(1.7) | 3.34(2.61) | 2.69(1.98) |
NDD | 226 (73.86) | 30.38(9.85) | 12.7(4.32) | 17.69(7.51) | 6.46(4.37) | 9.59(6.47) | 14.33(6.53) | 9.09(4.23) | 11.4(4.08) | 9.89(4.28) | |
ID | No | 155 (50.65) | 16.72(10.97) | 7.99(4.25) | 8.74(8.22) | 5.06(4.44) | 3.52(3.57) | 8.14(6.15) | 5.05(4.36) | 6.29(4.35) | 5.39(3.96) |
Yes | 148 (48.37) | 32.59(9.89) | 13.67(4.34) | 18.93(7.36) | 5.8(4.02) | 11.57(6.68) | 15.23(6.95) | 9.51(4.31) | 12.41(3.99) | 10.68(4.34) | |
Unknown | 3 (0.98) | 30.67(15.95) | 12.67(7.37) | 18(10.44) | 9.33(3.21) | 9.67(8.62) | 11.67(7.77) | 8.33(6.03) | 10.67(3.79) | 11.67(8.5) | |
Sex | Female | 125 (40.85) | 21.35(13.63) | 9.9(5.56) | 11.46(9.17) | 4.35(3.66) | 7.14(7.35) | 9.86(7.45) | 5.82(4.56) | 8.56(5.57) | 6.97(5.11) |
Male | 181 (59.15) | 26.73(12.34) | 11.39(4.78) | 15.34(9.11) | 6.22(4.47) | 7.7(6.18) | 12.81(7.2) | 8.21(4.85) | 9.8(4.81) | 8.73(4.73) | |
Race | Asian | 29 (9.48) | 27.34(12.38) | 12.76(5.27) | 14.59(8.51) | 4.21(3.33) | 10.14(6.57) | 13(7.71) | 8.07(4.28) | 10.38(4.87) | 8.9(5.18) |
Biracial | 12 (3.92) | 27.33(11.46) | 13.25(5.56) | 14.08(7.14) | 5.67(5.3) | 7.33(5.8) | 14.33(6.6) | 9.42(4.42) | 10.25(4.03) | 7.67(4.74) | |
Black | 22 (7.19) | 27.95(10.44) | 11.64(3.77) | 16.32(8.3) | 6.55(5.06) | 8.05(5.46) | 13.36(6.05) | 9.05(3.59) | 9.55(4.46) | 9.36(4.09) | |
Hispanic/Latino | 25 (8.17) | 27.96(12.46) | 11.64(4.95) | 16.32(8.52) | 7.08(4.45) | 6.72(6.41) | 14.16(7.79) | 8.64(5.07) | 9.8(4.73) | 9.52(5) | |
White | 183 (59.8) | 24.22(13.43) | 10.53(5.17) | 13.69(9.64) | 5.3(4.06) | 7.62(7.08) | 11.31(7.47) | 7.02(5) | 9.44(5.42) | 7.76(4.89) | |
Unknown | 35 (11.44) | 18.31(13.05) | 8.46(4.94) | 9.86(9.28) | 5.43(4.67) | 4.74(4.78) | 8.14(6.69) | 4.77(4.35) | 6.77(4.56) | 6.77(5.42) | |
SES | $0-$24,999 | 4 (1.31) | 30(22.08) | 10.5(8.35) | 19.5(13.89) | 7.25(4.27) | 6.75(8.62) | 16(11.58) | 8.75(7.85) | 8.5(7.85) | 12.75(6.7) |
$25,000-$49,000 | 18 (5.88) | 33(10.01) | 12.56(4.63) | 20.44(7.96) | 7.67(3.74) | 10.06(7.56) | 15.28(5.88) | 10.72(4.34) | 11.28(3.59) | 11(4.01) | |
$50,000-$74,999 | 12 (3.92) | 31.58(10.63) | 12.92(3.32) | 18.67(8.16) | 7.25(5.17) | 7.42(6.07) | 16.92(6.49) | 8.75(4.45) | 12.58(4.36) | 10.25(4.81) | |
$75,000-$99,999 | 23 (7.52) | 24.35(11.93) | 11.7(3.9) | 12.65(9.25) | 5.78(3.78) | 6.35(5.51) | 12.22(5.89) | 7(4.34) | 9.52(5.01) | 7.83(5.11) | |
$100,000-$149,999 | 28 (9.15) | 27(12.53) | 12(5.44) | 15(8.37) | 6.21(5.01) | 8.29(6.51) | 12.5(6.86) | 8.07(5.04) | 10.32(4.79) | 8.61(4.79) | |
$150,000-$199,999 | 32 (10.46) | 28.34(15.08) | 12.62(5.99) | 15.72(10.64) | 5.84(4.07) | 7.94(6.74) | 14.56(9.1) | 7.84(5.51) | 11(5.88) | 9.5(5.7) | |
$200,000+ | 48 (15.69) | 25.33(12.93) | 11.62(5.23) | 13.71(9.26) | 4.85(4.45) | 8.17(6.32) | 12.31(7.29) | 7.44(5.48) | 10.23(5.35) | 7.67(4.44) | |
Unknown | 141 (46.08) | 21.11(12.43) | 9.28(4.82) | 11.82(8.8) | 4.89(4.02) | 6.85(6.89) | 9.37(6.87) | 6.28(4.34) | 7.83(4.82) | 6.99(4.75) | |
Sensory Difference Present | None | 47 (15.36) | 7.57(3.88) | 5.38(3.22) | 2.19(2.27) | 2.43(1.77) | 1.36(1.57) | 3.79(3.17) | 1.79(1.67) | 2.96(2.27) | 2.83(2.18) |
Sensory Difference | 236 (77.12) | 29.5(10.54) | 12.4(4.49) | 17.1(7.89) | 6.32(4.36) | 9.29(6.49) | 13.89(6.79) | 8.8(4.37) | 11.1(4.27) | 9.6(4.41) | |
Unknown | 23 (7.52) | 8.26(4.26) | 5.17(2.81) | 3.09(2.17) | 2.83(2.48) | 1.3(2.67) | 4.13(2.74) | 2.3(1.94) | 3.7(3.14) | 2.26(1.86) | |
TOTALS | 306(100%) | 24.54(13.13) | 10.78(5.16) | 13.75(9.32) | 5.46(4.25) | 7.47(6.68) | 11.6(7.43) | 7.24(4.87) | 9.29(5.16) | 8.01(4.96) | |
The SAND yields the following composite scores that are described thoroughly in earlier chapters:
Total score is based on the sum of all items across the Observation and Interview, including Severity Items. Total scores range from 0 to 90.
Scores can be combined into Domain (Hyperreactivity, Hyporeactivity, and Seeking) and Modality (Visual, Tactile, and Auditory) scales (each ranging from 0 to 30).
The intersection of individual Domain and Modality scales produce the nine Subscales. Subscale scores range from 0 to 10.
The relationship between SAND scores and participant characteristics were examined for how SAND scores might vary based on individual differences. A description of the maximum likelihood data analysis techniques used are provided in Appendix G.
When examining the relationship between participant factors and SAND scores, SAND Total Score was used as the primary outcome variable. Total Score is most associated with diagnosis and incorporates the most information related to sensory symptoms, which the SAND purports to measure.
Linear regression was conducted with demographic variables as predictors and SAND Total Score as the outcome to determine which variables predicted Total Score. Stepwise regression procedures were used to identify the poorest fitting variable at each step, which was removed to create a reduced model that was compared with a likelihood ratio test to the full model (explained further in Appendix G). This iterative process was repeated until significant predictors were identified.
# From helper functions- following stepwise regression, using maximum likelihood to idenify significant predictors- removes worst fit at each step and tests reduced model against full- then outputs results
### Function to build lm models
lm_formula_func <- function(outcome_var, pred_var, data){
model_formula <- formula(paste0(outcome_var, "~ ", sub(" \\+ $.*", "", paste(sapply(pred_var, function(pred_var) paste(pred_var, "+ ")), collapse = ""))))
lm(model_formula, data = data) -> mod1
}
### Applies above to all vars that are factors
lm_factor_stepwise_func <- function(outcome_var, pred_var, data){
# Assign predicted variable to final, because pred_var changes
final_var <- pred_var
mod_full <- lm_formula_func(outcome_var, pred_var, data)
mod_full_sum <- summary(mod_full)
# Generate models for pred_vars removing one at each iteration
mod_one_removed<- lapply(1:length(pred_var), function(i) lm_formula_func(outcome_var, pred_var[-i], data))
aic_vec <- sapply(1:length(pred_var), function(i) summary(mod_one_removed[[i]])$adj.r.squared)
pred_var <- pred_var[-which(aic_vec == max(aic_vec, na.rm = T))]
mod_reduced <- lm_formula_func(outcome_var, pred_var, data = na.omit(data[ , all.vars(formula(mod_full))]))
full_reduced_anova <- lrtest(mod_full, mod_reduced)
if (full_reduced_anova$`Pr(>Chisq)`[-1] > .05){
outcome_msg <- "proceed"
} else {
outcome_msg <- "All significant predictors"
}
while (outcome_msg != "Final Model" & outcome_msg != "All significnat predictors" & pred_var[1] != "1") {
final_var <- pred_var
mod_full <- lm_formula_func(outcome_var, pred_var, data)
mod_full_sum <- summary(mod_full)
mod_one_removed<- lapply(1:length(pred_var), function(i) lm_formula_func(outcome_var, pred_var[-i], data))
# aic_vec <- sapply(1:length(pred_var), function(i) AIC(mod_one_removed[[i]]))
aic_vec <- sapply(1:length(pred_var), function(i) summary(mod_one_removed[[i]])$adj.r.squared)
pred_var <- pred_var[-which(aic_vec == max(aic_vec))]
pred_var <- if(identical(pred_var, character(0))) {
"1"
} else{
pred_var
}
mod_reduced <- lm_formula_func(outcome_var, pred_var, data=na.omit(data[ , all.vars(formula(mod_full))]))
full_reduced_anova <- lrtest(mod_full, mod_reduced)
if (isFALSE(getOption('knitr.in.progress'))) {
print(full_reduced_anova)
}
if (full_reduced_anova$`Pr(>Chisq)`[-1] > .05){
outcome_msg <- "proceed"
} else {
outcome_msg <- "Final Model"
}
}
paste0("Results of the linear regression indicated that there was",
ifelse(full_reduced_anova$`Pr(>Chisq)`[-1] > .05, " no significant effect ", " a significant effect "),
"between predictor variables ", paste(final_var, collapse = ", "),
" and the outcome variable ", outcome_var, " X2(", abs(full_reduced_anova$Df[-1]), "), ",
ifelse(full_reduced_anova$`Pr(>Chisq)`[-1] < .001, "p < .001",
paste0("p = ", round(full_reduced_anova$`Pr(>Chisq)`[-1], 3)))) -> cap
mod_full_sum$coefficients %>%
as_tibble(rownames = "Coefficients") %>%
mutate(across(where(is.double), round,3)) -> output_df
if (isTRUE(getOption('knitr.in.progress'))) {
output_df %>%
table_output_func(., colnames = colnames(.), caption = table_counter_func(paste(outcome_var, cap)))
} else {
output_df %>%
kable(caption = table_counter_func(paste(outcome_var, cap))) %>%
kable_options()
}
}
# Get predictor variables
pred_var <- sand_all %>%
dplyr::select(ssp_probable_ndd_w_na, intellectual_disability, sex, race, ses, age, standard_iq, viq, nviq) %>% # input var names to select) %>%
colnames()
# Get outcome variables
outcome_var <- sand_all %>%
dplyr::select(Total) %>% # input var names to select) %>%
colnames()
# subset data so that it contains only predictor and outcome vars
lm_data <- sand_all %>%
dplyr::select(all_of(pred_var), all_of(outcome_var)) %>%
mutate(intellectual_disability = na_if(intellectual_disability, "Unknown"))
lm_factor_stepwise_func(outcome_var, pred_var, lm_data)
| Coefficients | Estimate | Std. Error | t value | Pr(>|t|) |
|---|---|---|---|---|
| (Intercept) | 6.866 | 1.587 | 4.325 | 0.000 |
| ssp_probable_ndd_w_naSensory Difference | 16.547 | 1.918 | 8.629 | 0.000 |
| intellectual_disabilityYes | 7.795 | 1.750 | 4.455 | 0.000 |
| viq | -1.453 | 0.608 | -2.391 | 0.018 |
| nviq | 1.268 | 0.556 | 2.280 | 0.023 |
The results of linear model show Sensory Difference (Probable and Definite), Intellectual Disability, Verbal IQ, and Nonverbal IQ as all being significant predictors of Total score. As SAND purports to measure sensory reactivity and variable Sensory Difference is expected to be highly associated with that construct, Sensory Difference was entered in a mixed-effects regression model as a random effect to determine if other predictor variables remained significant after Sensory Difference was controlled for.
### This is model of interest at most basic- examine diagnostics
mod1 <- lm(Total ~ ssp_probable_ndd_w_na, data = lm_data)
mod2 <- lm(Total ~ 1, data=na.omit(lm_data[ , all.vars(formula(mod1))]))
lr_summary <- lrtest(mod1, mod2)
### Can also do above, controlling for random factors- lmer stepwise function not shown, but sourced
pred_var <- sand_all %>%
dplyr::select(ssp_definite_ndd, intellectual_disability, viq, nviq) %>% # input var names to select) %>%
colnames()
# Get outcome variables
outcome_var <- sand_all %>%
dplyr::select(Total) %>% # input var names to select) %>%
colnames()
# subset data so that it contains only predictor and outcome vars
lmer_data <- sand_all %>%
dplyr::select(all_of(pred_var), all_of(outcome_var))
lmer_factor_stepwise_func(outcome_var, pred_var[-(pred_var == "ssp_definite_ndd")], "ssp_definite_ndd", lmer_data)
| Coefficients | Estimate | Std. Error | df | t value | Pr(>|t|) |
|---|---|---|---|---|---|
| (Intercept) | 15.944 | 6.327 | 2.006 | 2.520 | 0.128 |
| intellectual_disabilityYes | 8.496 | 1.593 | 243.072 | 5.334 | 0.000 |
| nviq | 0.370 | 0.376 | 242.050 | 0.984 | 0.326 |
Sensory Difference Present and variables related to cognitive ability were found to be significant predictors of Total Score. However, upon examining model diagnostics, it was found that there was evidence of multicollinearity between cognitive test scores and Sensory Difference Present. In particular, all individuals with Intellectual Disability were also found to be positive for Sensory Difference Present. As such, cognitive scores and Intellectual Disability did not add information beyond Sensory Difference Present, which was a strong predictor of Total Score X2(1) = 150.71, p < .001.
As an example of the relationship between demographic variables and SAND score, the plot below shows Total Score plotted against Age, grouped by Sensory Difference Present. It can be seen that there are differences in Total Scores between groups, but that scores are relatively stable within them.
# Plot Total Scores Across Ages By Dx to show relatively flat across age
sand_all %>%
filter(!is.na(ssp_probable_ndd_w_na)) %>%
ggplot(data = ., aes(x=age, y = Total)) +
geom_smooth(aes(color = ssp_probable_ndd_w_na)) +
ggtitle(figure_counter_func("Total Scores By Sensory Difference Across Age")) +
labs(x = "Age", y = "Total SAND Score", caption = "Scores Flat Across Age Within Sensory Group", color = "Sensory Difference Present")
To examine the variable Sensory Difference Present, models using SSP criteria only versus using SSP criteria, along with NDD status, were constructed to compare models. As can be seen from the plot, using SSP and NDD status had more discriminative power to predict Total SAND Score, supporting the use of SSP and NDD as criteria to define Sensory Difference. It should be noted that the SAND identifies sensory issues associated with NDD, not NDD itself.
###*** Create logistic regression models that will be used later to generate probabilities, examining criterion variable- sensory difference with ssp and ndd?
mod_sensory_only_probable <- glm(ssp_probable_w_na ~ Total, data = sand_all, family = "binomial")
mod_sensory_asd_probable <- glm(ssp_probable_ndd_w_na ~ Total, data = sand_all, family = "binomial")
mod_sensory_td_id_probable <- glm(sensory_difference_probable_w_na ~ Total, data = sand_all, family = "binomial")
###--- This function is applied to different logistic models to get predicted values for each predictor value
## Gets predicted value
predict_func <- function(model_name, caption_name){
predicted_fit = sapply(min(sand_all$Total):max(sand_all$Total), function(x) predict(model_name, data.frame(Total = x), type='link', se.fit=TRUE)$fit)
## Gets standard errors
predicted_fit_se = sapply(min(sand_all$Total):max(sand_all$Total), function(x) predict(model_name, data.frame(Total = x), type='link', se.fit=TRUE)$se.fit)
# Gets probabilities
inverse_logit = function(x){
exp(x)/(1+exp(x))
}
## Gets standard errors for CI
se_low = round(inverse_logit(predicted_fit - (predicted_fit_se*1.96)),4)
se_high = round(inverse_logit(predicted_fit + (predicted_fit_se*1.96)), 4)
### Combines all above into df
glm_probs <- cbind.data.frame(min(sand_all$Total):max(sand_all$Total), sapply(predicted_fit, function(x) round(inverse_logit(x),4)), se_low, se_high)
colnames(glm_probs) <- c("Inputs", "Probabilities", "CI_Low", "CI_High")
glm_probs
}
# This gets odds ratio and at what input value that occurs for comparing logistic curves
coef_func <- function(p, mod_name) {
data.frame(
midpoint_odds = round(exp(summary(mod_name)$coefficients[2,1]),3),
midpoint_value = (log(p/(1-p)) - coef(mod_name)[1])/coef(mod_name)[2]
)}
### This applies predictor function and generates output table with values restricted where probabilities approach 0 and 100
preddat1 <- function(mod_name, caption_name){
glm_probs <- predict_func(mod_name, caption_name)
predict(mod_name,
type = "link",
newdata=data.frame(Total = seq(min(glm_probs$Inputs[glm_probs$Probabilities > .01 & glm_probs$Probabilities <=.995]), max(glm_probs$Inputs[glm_probs$Probabilities > .01 & glm_probs$Probabilities <=.995]), 1)),
se.fit=TRUE)%>%
as.data.frame() %>%
mutate(Total = seq(min(glm_probs$Inputs[glm_probs$Probabilities > .01 & glm_probs$Probabilities <=.995]), max(glm_probs$Inputs[glm_probs$Probabilities > .01 & glm_probs$Probabilities <=.995]), 1),
# model object mod1 has a component called linkinv that
# is a function that inverts the link function of the GLM:
lower = mod_name$family$linkinv(fit - 1.96*se.fit),
point.estimate = mod_name$family$linkinv(fit),
upper = mod_name$family$linkinv(fit + 1.96*se.fit))
}
### Combines the probability df's for the different criterion inputs
all_df <- bind_rows(cbind.data.frame(preddat1(mod_sensory_only_probable), model = "SSP Probable Only"), cbind.data.frame(preddat1(mod_sensory_asd_probable), model = "SSP Probable or NDD")) # cbind.data.frame(preddat1(mod_sensory_td_id_probable), model = "SSP Probable, ASD, ID or Non-TD Dx"))
###*** Create plot of logistic curves for each criterion to compare
all_df %>%
ggplot(aes(x = Total,
y = point.estimate, colour = model)) +
geom_line() +
annotate("text", x = (coef_func(.5, mod_sensory_td_id_probable)$midpoint_value - 2), y = .5, label = paste("", coef_func(.5, mod_sensory_td_id_probable)$midpoint_odds)) +
annotate("text", x = (coef_func(.5, mod_sensory_only_probable)$midpoint_value + 2), y = .5, label = paste("", coef_func(.5, mod_sensory_only_probable)$midpoint_odds)) +
labs(y = "Probability of Sensory Difference", caption = "Midpoint odds values included") +
ggtitle("Logistic curves \n predicting sensory difference \n from Total SAND")
A team of two PhD scientists (authors) and one medical student established inter-rater reliability through live observations completed at the same time. Inter-rater reliability was measured using a two-way, random effects, single measure intraclass correlation coefficients (ICCs) with absolute agreement between independent raters. ICCs were obtained from 54 participants. High ICCs were found between rater 1 and rater 2 (ICC = 0.87, n = 19) and between rater 1 and rater 3 (ICC = 0.91, n = 26).
Test-retest reliability was collected on ten participants in the NDD group within two weeks of the initial SAND administration and on an additional 47 participants within 12 weeks of the initial SAND administration. A two-way, mixed effects, single measure, ICC with absolute agreement was used to measure test-retest reliability. Over the two-week period, ICC for Total Scores was 0.97 (p < 0.001), for Total Observation Scores (ICC = 0.82, p < 0.001) and for Total Interview Scores (ICC = 0.97, p < 0.001). Stability over 12 weeks was also strong.
### This is from outside data gathered
data.frame(
`SAND Composite Score` = c("Total Score", domain_scale_names, gsub("Total", "", modality_scale_names)),
ICC = c(.857, .751, .832, .913, .738, .776, .858),
ci = c(".746-.920", ".555-.860", ".700-.906", ".945-.951", ".531-.853", ".601-.874", ".747-.921"),
Sig = rep("<.001", 7)
) %>%
table_print(caption = table_counter_func("SAND 12 Week Retest Reliability"), colnames = c("SAND Composite Score", "ICC", "95% CI Range", "Sig"))
| SAND Composite Score | ICC | 95% CI Range | Sig |
|---|---|---|---|
| Total Score | 0.857 | .746-.920 | <.001 |
| Hyperreactivity | 0.751 | .555-.860 | <.001 |
| Hyporeactivity | 0.832 | .700-.906 | <.001 |
| Seeking | 0.913 | .945-.951 | <.001 |
| Visual | 0.738 | .531-.853 | <.001 |
| Tactile | 0.776 | .601-.874 | <.001 |
| Auditory | 0.858 | .747-.921 | <.001 |
### This creates data frame of all items in observation and interview by subscale
## Gets index to subset
col_numb <- function(col_name) {which(colnames(sand_all) == col_name)}
item_data_df <- cbind.data.frame(
sand_all[,col_numb("V1.1O"): col_numb("V1.3R")],
sand_all[,col_numb("V2.1O"): col_numb("V2.3R")],
sand_all[,col_numb("V3.1O"): col_numb("V3.3R")],
sand_all[,col_numb("T1.1O"):col_numb("T1.3R")],
sand_all[,col_numb("T2.1O"):col_numb("T2.3R")],
sand_all[,col_numb("T3.1O"):col_numb("T3.3R")],
sand_all[,col_numb("A1.1O"):col_numb("A1.3R")],
sand_all[,col_numb("A2.1O"):col_numb("A2.3R")],
sand_all[,col_numb("A3.1O"):col_numb("A3.3R")]
)
### Cronbach's alpha
## For scales- defined above
cron_alph_item_data_df <- psych::alpha(item_data_df, warnings = F)
### For all individual items- not shown
all_items_df <- sand_all[,col_numb("V1.1O"): col_numb("A3ReportedSeverity")]
cron_alph_all_items <- psych::alpha(all_items_df, warnings = F)
Cronbach’s alpha was calculated to measure the internal consistency of the entire SAND. As there is some ambiguity about the assumption of independence among variables used to calculate alpha (Cronbach & Shavelson, 2004) and as SAND Severity Items were dependent on Behavior Item responses, two alphas were calculated, one with the Severity Items included (all 72 items) and one without (54 items). The 72 SAND items demonstrated high internal consistency Cronbach’s a = 0.89. The Behavior items alone also demonstrated high internal consistency (Cronbach’s a = 0.83 ). Both alpha values are considered good to excellent, using typical interpretive criteria for this metric. Importantly, the SAND is always interpreted using all Behavior and Severity items.
Cronbach’s alpha is considered the lower-bound estimate of reliability. Further analyses (Structural Equation Modeling section, below) will investigate the assumption of unidimensionality of the test and the consistency within scales to produce estimates of composite reliability, generally considered more accurate and representative measures of reliability (Peterson & Kim, 2013).
The correspondence between scores on the Observation and Interview was assessed, using Total Observation Scores (Observation) and Total Interview Scores (Interview).
# Long data for reported observed comparison
reported_observed_long <- sand_all %>%
# Gives id for each participant for internal comparison
mutate(participant = seq(1,nrow(sand_all), 1)) %>%
pivot_longer(names_to = "method", values_to = "score", cols = c(TotalObserved, Totalreported)) %>%
dplyr::select(method, score, participant, sensory_difference_probable, sensory_difference_probable_w_na)
#HLM TEst
reported_observed_lm <- lmer(score ~ method + (1|sensory_difference_probable_w_na/participant), data = reported_observed_long, REML = F)
reported_observed_reduced <- lmer(score ~ 1 + (1|sensory_difference_probable_w_na/participant), data = reported_observed_long, REML = F)
reported_observed_anova <- anova(reported_observed_lm, reported_observed_reduced)
## Summary stats of different forms- observed, interview
reported_observed_long %>%
dplyr::select(method, score) %>%
mutate(Method = dplyr::recode(method, "TotalObserved" = "Observed", "Totalreported" = "Reported")) %>%
group_by(Method) %>%
dplyr::summarize(N = n(), Mean = round(mean(score),2), SD = round(sd(score),2), SEM = round(psych::describe(score)$se, 2)) %>%
table_print(., colnames = colnames(.), caption = table_counter_func("Average Observation and Interview Scores"))
| Method | N | Mean | SD | SEM |
|---|---|---|---|---|
| Observed | 306 | 10.78 | 5.16 | 0.29 |
| Reported | 306 | 13.75 | 9.32 | 0.53 |
Observation and Interview scores were compared with mixed-effects models with participant included as random factor. Analysis revealed significant differences between Observation and Interview scores within participants (χ2(1)= 52.83, p < .001), with greater overall Interview scores on average. However, as can be seen in the plot, the Reported scores are not uniformly greater. Interview scores were higher than Observation scores for those with sensory differences present, although Interview scores were lower than Observation Scores for those without sensory differences.
sand_all %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
dplyr::select(sensory_difference_probable_w_na, TotalObserved, Totalreported) %>%
pivot_longer(names_to = "method", values_to = "score", cols = -sensory_difference_probable_w_na) %>%
mutate(Method = dplyr::recode(method, "TotalObserved" = "Observation", "Totalreported" = "Interview")) %>%
group_by(Method, sensory_difference_probable_w_na) %>%
dplyr::summarize(
avg = round(mean(score), 2),
avg_sd = paste0(avg, "(", round(sd(score), 2), ")"),
se = psych::describe(score)$se
) %>%
ggplot(., aes(x = Method, y = avg, fill = sensory_difference_probable_w_na)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(x = Method, ymin = avg - se, ymax = avg + se), position = position_dodge2(padding = 0.5), color = "gray35", size = 1.5) +
geom_label(aes(x = Method, y = avg/2, label = avg_sd), position = position_dodge2(0.9), fill = "white") +
ggtitle(figure_counter_func("Average Total Score By Source and Sensory Difference")) +
labs(x = "Source", y = "Total Score", fill = "Sensory Difference", caption = "Mean(SD), Errorbars = +/- SE") +
scale_y_continuous(breaks = seq(2,20,2)) + scale_fill_grey() + theme_bw()
# Calculate interrater reliability on each item between caregivers and clinicians
# create vectors with all reported and observed scores
reported <- c(sand_all$V1.1R, sand_all$V1.2R, sand_all$V1.3R, sand_all$V2.1R, sand_all$V2.2R, sand_all$V2.3R, sand_all$V3.1R, sand_all$V3.2R, sand_all$V3.3R,
sand_all$T1.1R, sand_all$T1.2R, sand_all$T1.3R, sand_all$T2.1R, sand_all$T2.2R, sand_all$T2.3R, sand_all$T3.1R, sand_all$T3.2R, sand_all$T3.3R,
sand_all$A1.1R, sand_all$A1.2R, sand_all$A1.3R, sand_all$A2.1R, sand_all$A2.2R, sand_all$A2.3R, sand_all$A3.1R, sand_all$A3.2R, sand_all$A3.3R)
observed <- c(sand_all$V1.1O, sand_all$V1.2O, sand_all$V1.3O, sand_all$V2.1O, sand_all$V2.2O, sand_all$V2.3O, sand_all$V3.1O, sand_all$V3.2O, sand_all$V3.3O,
sand_all$T1.1O, sand_all$T1.2O, sand_all$T1.3O, sand_all$T2.1O, sand_all$T2.2O, sand_all$T2.3O, sand_all$T3.1O, sand_all$T3.2O, sand_all$T3.3O,
sand_all$A1.1O, sand_all$A1.2O, sand_all$A1.3O, sand_all$A2.1O, sand_all$A2.2O, sand_all$A2.3O, sand_all$A3.1O, sand_all$A3.2O, sand_all$A3.3O)
reported_observed_all_items_df <- cbind.data.frame(reported, observed)
reported_df <- sand_all %>%
dplyr::select(V1.1R, V1.2R, V1.3R, V2.1R, V2.2R, V2.3R, V3.1R, V3.2R, V3.3R,
T1.1R, T1.2R, T1.3R, T2.1R, T2.2R, T2.3R, T3.1R, T3.2R, T3.3R,
A1.1R, A1.2R, A1.3R, A2.1R, A2.2R, A2.3R, A3.1R, A3.2R, A3.3R)
observed_df <- sand_all %>%
dplyr::select(V1.1O, V1.2O, V1.3O, V2.1O, V2.2O, V2.3O, V3.1O, V3.2O, V3.3O,
T1.1O, T1.2O, T1.3O, T2.1O, T2.2O, T2.3O, T3.1O, T3.2O, T3.3O,
A1.1O, A1.2O, A1.3O, A2.1O, A2.2O, A2.3O, A3.1O, A3.2O, A3.3O)
# Calculate Cohen's Kappa for Reported and Observed Scores at item level
interrater <- kappa2(cbind(reported, observed))
# Get CI
kap_ci <- cohen.kappa(cbind(reported, observed), alpha = .05)
# Look for interaction effects between report forma and sensory difference
reported_observed_dx_interaction_lm <- lmer(score ~ method*sensory_difference_probable_w_na + (1|participant), data = reported_observed_long, REML = F)
reported_observed_dx_no_interaction_reduced <- lmer(score ~ method + sensory_difference_probable_w_na+ (1|participant), data = reported_observed_long, REML = F)
reported_observed_dx_anova <- anova(reported_observed_dx_interaction_lm, reported_observed_dx_no_interaction_reduced)
Inclusion of an effect for the interaction between Diagnosis and Source (Observed or Interview) produced a strong effect (χ2(1)= 47.42, p < .001). As it applies to interpretation of the SAND, Examiner’s may expect Caregivers to report more sensory symptoms on the Interview than seen during the Observation. This is expected, as the Interview captures the presence of sensory symptoms in everyday life, across multiple environments. In the case of typically developing children, it is expected that Examiners will observe certain sensory symptoms that may not be as apparent to a Caregiver.
# Correlation of Total Observed and Reported Scores
# Scatterplot of observed and reported total scores
sand_all %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
ggplot(., aes(x = TotalObserved, y = Totalreported, color = sensory_difference_probable_w_na)) +
geom_point() +
geom_jitter() + # Sprads out dots
scale_y_continuous(breaks = seq(0, max(sand_all$Totalreported) + 5, 5), labels = seq(0, max(sand_all$Totalreported) + 5, 5)) +
scale_x_continuous(breaks = seq(0, max(sand_all$Totalreported) + 5, 5), labels = seq(0, max(sand_all$Totalreported) + 5, 5)) +
coord_cartesian(xlim = c(0, max(sand_all$Totalreported))) +
labs(x = "Observation", y = "Interview", color = "Sensory Difference") +
ggtitle(figure_counter_func("Observation and Interview Total Scores Scatterplot"))
Item-level agreement between raters was assessed using Cohen’s Kappa. To fulfill assumptions of independent variables, the Behavioral items and not dependent Severity items were included. A Cohen’s Kappa statistic was calculated to measure agreement between Observation and Interview scores for each participant, resulting in 5400 total pairs of scores. Kappa = 0.25 (p < .001), 95% CI (0.22, 0.27) , suggesting a significant, though low, correlation on item-level reliability between raters.
The agreement between Observation and Interview Total Scores was examined. The relationship between scores was monotonic, and appeared related, based on examination of the scatterplot.
The relationship between Observation and Interview Total scores was calculated using a Spearman correlation coefficient, appropriate for assessing inter-rater agreement with ordinal data.
# Correlation of different report methods
overall_cor <- cor.test(sand_all$TotalObserved, sand_all$Totalreported, method = "spearman")
cat(paste0('\n', "There was a moderate and significant correlation between the two variables: rs= ", round(overall_cor$estimate,2), ", ", p_value_func(overall_cor$p.value), ", N= ", nrow(sand_all), "."))
There was a moderate and significant correlation between the two variables: rs= 0.62, p < .001, N= 306.
This section examines the inter-relationship of SAND items and the factor structure.
Observation and Interview scores were summed to create an overall score for each item (e.g., Visual Hyperreactivity Observed + Visual Hyperreactivity Reported). As discussed above, the Observation and Interview Total Scores were moderately-to-strongly correlated, though correlations between individual items varied. Thus, combined scores capture maximum information for each item.
# Create df of raw data combining Observed and Reported- should have used programatic methods
raw_df <- data.frame(
V1.1 = sand_all$V1.1O + sand_all$V1.1R,
V1.2 = sand_all$V1.2O + sand_all$V1.2R,
V1.3 = sand_all$V1.3O + sand_all$V1.3R,
V2.1 = sand_all$V2.1O + sand_all$V2.1R,
V2.2 = sand_all$V2.2O + sand_all$V2.2R,
V2.3 = sand_all$V2.3O + sand_all$V2.3R,
V3.1 = sand_all$V3.1O + sand_all$V3.1R,
V3.2 = sand_all$V3.2O + sand_all$V3.2R,
V3.3 = sand_all$V3.3O + sand_all$V3.3R,
A1.1 = sand_all$A1.1O + sand_all$A1.1R,
A1.2 = sand_all$A1.2O + sand_all$A1.2R,
A1.3 = sand_all$A1.3O + sand_all$A1.3R,
A2.1 = sand_all$A2.1O + sand_all$A2.1R,
A2.2 = sand_all$A2.2O + sand_all$A2.2R,
A2.3 = sand_all$A2.3O + sand_all$A2.3R,
A3.1 = sand_all$A3.1O + sand_all$A3.1R,
A3.2 = sand_all$A3.2O + sand_all$A3.2R,
A3.3 = sand_all$A3.3O + sand_all$A3.3R,
T1.1 = sand_all$T1.1O + sand_all$T1.1R,
T1.2 = sand_all$T1.2O + sand_all$T1.2R,
T1.3 = sand_all$T1.3O + sand_all$T1.3R,
T2.1 = sand_all$T2.1O + sand_all$T2.1R,
T2.2 = sand_all$T2.2O + sand_all$T2.2R,
T2.3 = sand_all$T2.3O + sand_all$T2.3R,
T3.1 = sand_all$T3.1O + sand_all$T3.1R,
T3.2 = sand_all$T3.2O + sand_all$T3.2R,
T3.3 = sand_all$T3.3O + sand_all$T3.3R
)
A Bartlett’s correlation test of individual SAND items was conducted, and significant correlation among items (χ 2(351)= 2060.52 , p < .001). The determinant of the correlation matrix was 0.0009, of sufficient value to conduct further analysis of the factor structure of the items.
## Get correlations among all SAND subscales
sand_all %>%
dplyr::select(V_Hyper:A_Seek) %>%
correlation_matrix(digits = 2, use = "lower") %>%
as.data.frame() %>%
table_print(colnames = colnames(.), caption = "Note: V = Visual, T = Tactile, A = Auditory, Hyper = Hyperreactivity, Hypo = Hyporeactivity, Seek = Seeking; *p < .05. **p < .01. ***p < .001")
| V_Hyper | V_Hypo | V_Seek | T_Hyper | T_Hypo | T_Seek | A_Hyper | A_Hypo | A_Seek |
|---|---|---|---|---|---|---|---|---|
| 1.00 | ||||||||
| 0.16** | 1.00 | |||||||
| 0.19** | 0.19*** | 1.00 | ||||||
| 0.25*** | 0.11 | 0.21*** | 1.00 | |||||
| 0.11 | 0.59*** | 0.18** | -0.02 | 1.00 | ||||
| 0.12* | 0.34*** | 0.51*** | 0.11 | 0.40*** | 1.00 | |||
| 0.30*** | 0.05 | 0.34*** | 0.27*** | 0.08 | 0.24*** | 1.00 | ||
| 0.03 | 0.62*** | 0.01 | -0.06 | 0.57*** | 0.21*** | -0.16** | 1.00 | |
| 0.18** | 0.28*** | 0.52*** | 0.13* | 0.33*** | 0.59*** | 0.21*** | 0.13* | 1.00 |
# Create a df of observed and reported items sensory behavior x domain scales
individual_df <- sand_all[,which(colnames(sand_all) == "V_Hyper"):which(colnames(sand_all) == "A_Seek")]
The relationship between scales was examined. Results of the Bartlett’s correlation indicated the correlation of Subscales was significantly different than zero, (χ2(36)= 773.44, p < .001). The determinant of the correlation matrix was 0.0767, of sufficient value to conduct further analysis of the factor structure of the items.
First, a Principal Components Analysis (PCA) was conducted with the Scales as factors and individual items as components to gather information about items and the constructs they represent. In models utilizing item-level data, only Behavior Items, and not Severity Items, were used to avoid issues of dependency among items. SAND items were analyzed using models with a varied number of factors to represent the SAND scales. An oblique “promax” rotation was used for all analyses, which is often recommended for use when factors are correlated.
# PCA without restrictions- exploratory
fit <- principal(raw_df, rotate="promax")
data.frame(factors = seq(1:9), variance = round(fit$values[1:9],2)) %>%
kable(caption = paste("Variance of Contrasts of Models with Given Factors"), row.names = F, col.names = c("Number of Factors in Model", "Variance")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Number of Factors in Model | Variance |
|---|---|
| 1 | 4.95 |
| 2 | 3.41 |
| 3 | 1.98 |
| 4 | 1.24 |
| 5 | 1.16 |
| 6 | 1.13 |
| 7 | 0.98 |
| 8 | 0.92 |
| 9 | 0.90 |
cat (paste0('\n', "Examining the variance of the contrasts of models with increasing factors, it can be seen that the greatest amount of variance is in the model with ", which(fit$values == max(fit$values)), " factor. This is the model represented by Total Score. However, additional models should be evaluated because SAND items are intended to comprise multiple Scales.", '\n' ), sep = " ")
Examining the variance of the contrasts of models with increasing factors, it can be seen that the greatest amount of variance is in the model with 1 factor. This is the model represented by Total Score. However, additional models should be evaluated because SAND items are intended to comprise multiple Scales.
In the first model to assume a given number of components, a PCA was conducted on individual items with nine components to represent the Subscales.
A matrix of the items and their loading into the above components indicated that certain items load strongly onto more than one component. This could be due to the fact that each item is associated with more than one Scale. Items are associated with a Domain and a Modality so certain items may group more strongly by either Domain or Modality with other items in that shared Subscale. To further explore what each component represents, the three items that loaded most strongly onto each component were examined.
In the following table items are represented by a unique four-character label of the following scheme: “Subscale (Modality x Domain). Item Number,” where the following abbreviations represent the following Scales:
Modality
V = Visual, T = Tactile, A = Auditory
Domain
1 = Hyperreactivity, 2 = Hyporeactivity, 3 = Seeking
For example, an item numbered “A1.1” would be the first item from the Auditory Hyperreactivity Subscale. In PCA tables, “RC” stands for common factor portion. Components are labeled this, along with an arbitrary number.
## Individual item loading onto 9 Subscales
fit <- principal(raw_df, nfactors=9, rotate="promax")
as.data.frame(apply(fit$loadings, 2, function(fit_df) sapply(1:3, function(top_three) names(fit_df[fit_df == sort(fit_df, decreasing = T)[top_three]])))) %>%
kable(caption = paste("Items that most strongly load for each component."), row.names = T) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| RC2 | RC1 | RC6 | RC5 | RC7 | RC9 | RC3 | RC4 | RC8 | |
|---|---|---|---|---|---|---|---|---|---|
| 1 | A2.2 | A3.3 | T3.1 | A1.3 | V3.2 | V1.3 | V1.1 | T1.2 | T1.1 |
| 2 | A2.1 | T3.2 | T3.3 | A1.2 | V3.1 | A1.1 | V1.2 | T1.3 | V1.3 |
| 3 | A2.3 | A3.1 | V2.3 | A1.1 | V3.3 | A3.1 | A1.2 | A2.1 | V3.2 |
This overview shows that items tended to load within a Subscale, such as the component “RC2” that contained all Auditory Hyporeactivity items ("A2.*"). However, there was some components in which items seemed to load by either Modality or Domain (i.e., components with items with similar Sensory Modality or Behavior values). To consider how the Subscales were related, a Principal Components Analysis was conducted with those Subscales, assuming a three-component model.
# See how scales fit on a three component model
fit <- principal(individual_df, nfactors=3, rotate="promax")
round(fit$loadings[,1:(dim(fit$loadings)[2])], 4) %>%
data.frame() %>%
table_print(caption = table_counter_func("Subscale Loading Three-Component Subscale Model"), colnames = colnames(.))
| RC1 | RC2 | RC3 |
|---|---|---|
| -0.1320 | 0.1704 | 0.7892 |
| 0.0519 | 0.8341 | 0.1667 |
| 0.8101 | -0.1328 | 0.0843 |
| -0.0846 | 0.0011 | 0.7557 |
| 0.2030 | 0.7633 | -0.0079 |
| 0.8276 | 0.1504 | -0.1055 |
| 0.2917 | -0.1935 | 0.5656 |
| -0.1151 | 0.9042 | -0.0563 |
| 0.8409 | 0.0471 | -0.0670 |
round(fit$Vaccounted, 4) %>%
cbind.data.frame(statistic = rownames(.), .) %>%
table_print(caption = table_counter_func("Variance Accounted in Three-Component Subscale Model"), colnames = colnames(.))
| statistic | RC1 | RC2 | RC3 |
|---|---|---|---|
| SS loadings | 2.2112 | 2.2221 | 1.5448 |
| Proportion Var | 0.2457 | 0.2469 | 0.1716 |
| Cumulative Var | 0.2457 | 0.4926 | 0.6642 |
| Proportion Explained | 0.3699 | 0.3717 | 0.2584 |
| Cumulative Proportion | 0.3699 | 0.7416 | 1.0000 |
as.data.frame(apply(fit$loadings, 2, function(fit_df) sapply(1:3, function(top_three) names(fit_df[fit_df == sort(fit_df, decreasing = T)[top_three]])))) %>%
table_print(caption = table_counter_func("Subscales that Most Strongly Load for Each Component"), colnames = colnames(.))
| RC1 | RC2 | RC3 |
|---|---|---|
| A_Seek | A_Hypo | V_Hyper |
| T_Seek | V_Hypo | T_Hyper |
| V_Seek | T_Hypo | A_Hyper |
When the model is forced to have three components, Subscale scores grouped by Domain scales. To get a better sense of how individual items grouped, a PCA was conducted with individual behavior items and a three-factor model.
fit <- principal(raw_df, nfactors=3, rotate="promax")
round(fit$Vaccounted, 4) %>%
data.frame() %>%
table_print(caption = table_counter_func("Variance Accounted in Behavior Items Three-Component Model"), colnames = colnames(.))
| RC2 | RC1 | RC3 |
|---|---|---|
| 4.0321 | 3.7352 | 2.5691 |
| 0.1493 | 0.1383 | 0.0952 |
| 0.1493 | 0.2877 | 0.3828 |
| 0.3901 | 0.3614 | 0.2485 |
| 0.3901 | 0.7515 | 1.0000 |
as.data.frame(apply(fit$loadings, 2, function(fit_df) sapply(1:9, function(top_three) names(fit_df[fit_df == sort(fit_df, decreasing = T)[top_three]])))) %>%
kable(caption = paste("Behavior Items that Most Strongly Load for Each Component"), row.names = T) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| RC2 | RC1 | RC3 | |
|---|---|---|---|
| 1 | A2.3 | A3.3 | A1.2 |
| 2 | A2.1 | A3.1 | T1.3 |
| 3 | A2.2 | T3.3 | V1.3 |
| 4 | V2.2 | T3.1 | V1.1 |
| 5 | V2.3 | V3.3 | V1.2 |
| 6 | T2.1 | T3.2 | A1.3 |
| 7 | T2.3 | A3.2 | A1.1 |
| 8 | V2.1 | V3.1 | T1.1 |
| 9 | T2.2 | V3.2 | T1.2 |
resid <- factor.model(fit$loadings)
hist(resid)
In this model, the first component seems to be loaded by items related to Seeking, the second by Hypoactivity, and the third by Hyperreactivity. Histograms of residuals of this model indicated residuals were approximately normally distributed.
Additional models with varying numbers of components were considered. To determine the optimal number of components, a scree plot was constructed.
# Scree plot to determine optimal number of factors
library(nFactors)
ev <- eigen(cor(individual_df)) # get eigenvalues
ap <- parallel(subject=nrow(individual_df),var=ncol(individual_df),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS, legend = F)
Examination of the scree plot reveals that the “bend” in the line, the number of components of eigenvalues above 1 and above the mean, number of factors in parallel analysis, and number of optimal coordinates (identified in plot legend) occurs where there are three components, suggesting that a three-component model is optimal for conceptualizing the items.
In the PCA analysis, items were allowed to load onto any component, which allowed for multiple possible factor structures. However, due to items loading onto more than one factor, Structural Equation Modeling (SEM), or Confirmatory Factor Analysis, was used to model specific factor structures. Specifically, individual items (again, only the independent Behavior items) were assigned to factors representing different SAND Scales:
The fits for each of these models is reported, as is the composite reliability. Composite reliability is a more accurate and representative measure of reliability than Cronbach’s alpha because it accounts for items’ reliability with other items within the factor in which they load (Peterson & Kim, 2013). In this case, we expect that an item that measures hyperactivity, for example, should go along with other items that measure Hyperactivity and not with items that measure Hyporeactivity.
The first model with individual SAND items assigned to their respective Subscale was constructed.
# Individual Sensory/Senses Scales SEM
individual_sensory_senses.model <- ' visual_hyper =~ V1.1 + V1.2 + V1.3
visual_hypo =~ V2.1 + V2.2 + V2.3
visual_seek =~ V3.1 + V3.2 + V3.3
auditory_hyper =~ A1.1 + A1.2 + A1.3
auditory_hypo =~ A2.1 + A2.2 + A2.3
auditory_seek =~ A3.1 + A3.2 + A3.3
tactile_hyper =~ T1.1 + T1.2 + T1.3
tactile_hypo =~ T2.1 + T2.2 + T2.3
tactile_seek =~ T3.1 + T3.2 + T3.3 '
# Fit the model
fit <- cfa(individual_sensory_senses.model, data = raw_df)
# summary <- summary(fit, fit.measures=FALSE)$PE
# Parameter Estimates
pe <- parameterEstimates(fit, ci = TRUE, level = 0.95)[1:27,]
# Factor Loadings
sl <- standardizedSolution(fit)
sl <- sl$est.std[sl$op == "=~"]
# Round function
round_df <- function(df, digits) {
nums <- vapply(df, is.numeric, FUN.VALUE = logical(1))
df[,nums] <- round(df[,nums], digits = digits)
(df)
}
# Output DF of Parameter Estimates
pe %>%
cbind.data.frame(., sl) %>%
round_df(.,2) %>%
kable(caption = "Parameter Estimates", row.names = F, col.names = c("Factor", "Contains", "Variable", "Estimate", "SE", "Z", "P-Value", "CI Low", "CI Upper", "Factor Loadings")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Factor | Contains | Variable | Estimate | SE | Z | P-Value | CI Low | CI Upper | Factor Loadings |
|---|---|---|---|---|---|---|---|---|---|
| visual_hyper | =~ | V1.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.54 |
| visual_hyper | =~ | V1.2 | 0.81 | 0.14 | 5.68 | 0 | 0.53 | 1.08 | 0.61 |
| visual_hyper | =~ | V1.3 | 1.01 | 0.20 | 5.03 | 0 | 0.61 | 1.40 | 0.45 |
| visual_hypo | =~ | V2.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.49 |
| visual_hypo | =~ | V2.2 | 1.28 | 0.17 | 7.48 | 0 | 0.94 | 1.61 | 0.64 |
| visual_hypo | =~ | V2.3 | 1.34 | 0.18 | 7.46 | 0 | 0.99 | 1.69 | 0.64 |
| visual_seek | =~ | V3.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.64 |
| visual_seek | =~ | V3.2 | 0.49 | 0.08 | 6.35 | 0 | 0.34 | 0.64 | 0.45 |
| visual_seek | =~ | V3.3 | 0.88 | 0.10 | 8.63 | 0 | 0.68 | 1.08 | 0.67 |
| auditory_hyper | =~ | A1.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.45 |
| auditory_hyper | =~ | A1.2 | 1.32 | 0.24 | 5.58 | 0 | 0.85 | 1.78 | 0.62 |
| auditory_hyper | =~ | A1.3 | 1.35 | 0.25 | 5.35 | 0 | 0.86 | 1.85 | 0.54 |
| auditory_hypo | =~ | A2.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.73 |
| auditory_hypo | =~ | A2.2 | 0.97 | 0.09 | 10.63 | 0 | 0.79 | 1.14 | 0.67 |
| auditory_hypo | =~ | A2.3 | 1.41 | 0.12 | 12.05 | 0 | 1.18 | 1.64 | 0.77 |
| auditory_seek | =~ | A3.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.56 |
| auditory_seek | =~ | A3.2 | 1.29 | 0.16 | 8.02 | 0 | 0.97 | 1.60 | 0.66 |
| auditory_seek | =~ | A3.3 | 0.90 | 0.12 | 7.25 | 0 | 0.65 | 1.14 | 0.56 |
| tactile_hyper | =~ | T1.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.38 |
| tactile_hyper | =~ | T1.2 | 1.08 | 0.28 | 3.80 | 0 | 0.52 | 1.64 | 0.45 |
| tactile_hyper | =~ | T1.3 | 1.24 | 0.32 | 3.91 | 0 | 0.62 | 1.86 | 0.50 |
| tactile_hypo | =~ | T2.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.52 |
| tactile_hypo | =~ | T2.2 | 1.50 | 0.19 | 7.71 | 0 | 1.12 | 1.88 | 0.55 |
| tactile_hypo | =~ | T2.3 | 0.42 | 0.07 | 5.76 | 0 | 0.28 | 0.56 | 0.37 |
| tactile_seek | =~ | T3.1 | 1.00 | 0.00 | NA | NA | 1.00 | 1.00 | 0.54 |
| tactile_seek | =~ | T3.2 | 1.33 | 0.17 | 7.85 | 0 | 1.00 | 1.66 | 0.66 |
| tactile_seek | =~ | T3.3 | 1.15 | 0.16 | 7.31 | 0 | 0.84 | 1.46 | 0.58 |
# For interpretive report below
pe_no_na <- pe[complete.cases(pe),]
fit_measures <- fitMeasures(fit)
# Compute residual variance of each item
re <- 1 - sl^2
# Compute composite reliability
composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
The model was found to be of significantly good fit (χ2(288)=470.54, p < .001).
The parameter estimates for each variable’s fit within the assigned factor are provided in the table. No variables were found to not fit factors they were assigned.
Composite reliability of this model was 0.9277.
An SEM with the Subscale scores as variables and Domain scores as factors was constructed and summarized graphically below.
# Individual Sensory Model
individual_sensory.model <- ' hyper =~ V_Hyper + A_Hyper + T_Hyper
hypo =~ V_Hypo + A_Hypo + T_Hypo
seek =~ V_Seek + A_Seek + T_Seek '
# Fit the model
fit <- cfa(individual_sensory.model, data = individual_df)
# summary <- summary(fit, fit.measures=FALSE)$PE
# Parameter Estimates
pe <- parameterEstimates(fit, ci = TRUE, level = 0.95)[1:27,]
# Factor Loadings
sl <- standardizedSolution(fit)
sl <- sl$est.std[sl$op == "=~"]
# Round function
round_df <- function(df, digits) {
nums <- vapply(df, is.numeric, FUN.VALUE = logical(1))
df[,nums] <- round(df[,nums], digits = digits)
(df)
}
# For interpretive report below
pe_no_na <- pe[complete.cases(pe),]
fit_measures <- fitMeasures(fit)
# Print SEM Paths
label_df <- data.frame(labels = expand_grid( c("Hypr", "Hypo", "Seek"), c("Vis", "Aud", "Tact")) %>% unite("labels", c(2,1), remove = T, sep = "\n")) %>%
bind_rows(., data.frame(labels = (c("Hypr", "Hypo", "Seek"))))
# Print SEM Paths
semPaths(fit,"std", label.prop = .8, edge.label.cex = 1.5, edge.label.color = "black", exoVar = FALSE, nodeLabels = label_df$labels,
exoCov = FALSE, sizeMan = 8, sizeInt = 10, sizeLat = 13, label.cex = 1, layout = "tree", rotation = 2)
# Compute residual variance of each item
re <- 1 - sl^2
# Compute composite reliability
composite_reliability <- round(sum(sl)^2 / (sum(sl)^2 + sum(re)),4)
The model was found to be of significantly good fit (χ2(24)=84.82, p < .001).
Composite reliability of this model was 0.8855.
Overall, individual items on the SAND converge around constructs represented in the Subscales and these Scales significantly load into Domains and Total Scores. The most salient way to interpret sensory symptoms on the SAND is by Domain (Hyperreactivity, Hyporeactivity, Seeking).
The validity of the SAND was assessed by examining content and items, constructing models to assess the relationship between scores and the construct of having a sensory difference, and examining the accuracy and predictive validity of the test. These findings were used to construct interpretive guidelines for raw scores.
Face validity of items was established through discussions and consultations with clinical and developmental psychologists, child and adolescent psychiatrists, and occupational therapists evaluating and treating children with NDDs.
## Correlation between SAND and SSP
correlation_report_func <- function(cor_var_1, cor_var_2){
cor_mod <- cor.test(cor_var_1, cor_var_2, use="pairwise.complete.obs")
paste0("r = ", round(cor_mod$estimate, 3), ", ", p_value_func(cor_mod$p.value), ", 95% CI[", round(cor_mod$conf.int[1],3), ",", round(cor_mod$conf.int[2], 3), "]")
}
Correlations between the SAND and the previously validated SSP were used to examine convergent validity. As stated previously, higher SAND scores and lower SSP scores are indicative of more prominent sensory symptoms. The SSP Total Score was significantly correlated with the SAND Total Score across groups r = -0.72, p < .001, 95% CI[-0.776,-0.654]. The Total Observation Score r = -0.422, p < .001, 95% CI[-0.521,-0.313] and Total Interview Score r = -0.773, p < .001, 95% CI[-0.819,-0.716] were both significantly correlated with SSP Total Score.
Using the data collected and used for the above reliability analyses, we sought to develop a sample of scores representative of the general population, from which to construct normative scoring data. As the raw data were found to assume a normal distribution within diagnoses, scores can be found that correspond to extreme values and inform choice of cut-off scores, specifically at 1 and 2 standard deviations above the mean.
The sample from which the data were obtained was not stratified to be representative of the general population in characteristics that are typically sources of variation in assessment outcomes. Therefore, those characteristics and their impact on generalizability of data were considered.
As was presented in the Standardization Chapter on participant factors, Age, Sex, Race/Ethnicity, and SES were found to not have a relationship with SAND outcomes. Cognitive measures had issues with multicollinearity and overlap with the outcome Sensory Difference Present, a significant predictor. Factors that were significantly associated with SAND scores were NDD and ASD status.
Being positive for neurodevelopmental diagnosis (including those with a rare genetic syndrome and ASD) was strongly associated with SAND scores. The sample included 226 individuals with an NDD and 80 controls. According to the latest estimates, as of 2020, (Maenner et al., 2020), the prevalence of ASD among the general population, within the United States, was found to be 1 in 54. The prevalence of rare diseases associated with ASD seen in this sample is a negligible amount in the general population and the majority presented with ASD. Intellectual Disability was shown to have a relationship with SAND scores. Thus, the sample was dichotomized to be those with NDD (positive for NDD, including individuals with ASD and/ or ID associated with a known genetic syndrome) or TD.
To replicate the general population with the SAND sample, scores of individuals with and without NDD were weighted to reflect that proportion in the general population. Rather than sampling scores of individuals with NDD from the sample, which could result in extreme cases unduly influencing outcomes due to sampling error from a relatively small pool of cases, scores were weighted in proportion to those populations. Thus, for Total Score and each SAND Scale, the control mean and SD were weighted by 53/54 and the NDD mean and SD were weighted by 1/54 to construct standardized scores (Z-scores) and percentiles. For each score, raw mean and SD for groups and weighted mean and SD are presented in tables that follow.
# Weighted sample to mimic population
sample <- sand_all %>%
group_by(ndd) %>%
dplyr::summarize(tot = mean(Total),
stand = sd(Total)) %>%
ungroup() %>%
dplyr::summarize(weighted_mean = wtd.mean(tot, c(53/54, 1/54)),
weighted_sd = wtd.mean(stand, c(53/54, 1/54)))
The following plot shows a histogram of the full dataset, a histogram of the modified dataset sampled according to weights presented above, and, from the full dataset, the mean and values one and two SD above the mean. Data assumed normal distributions when weighted to mimic population characteristics (for all scales, though only Total is pictured).
###*** Histogram shows slight bimodality based on sensory difference
sample_central_df <- data.frame(name = c("Mean", "Mean + 1 SD", "Mean + 2 SD"), stat = c(sample$weighted_mean, sample$weighted_mean + sample$weighted_sd, sample$weighted_mean + 2*sample$weighted_sd))
### Histogram with lines for each mean + SD SD
set.seed(12345)
sand_all %>%
filter(!is.na(ndd)) %>%
nest(data = !ndd) %>%
mutate(n = c(1/54, 53/54)) %>%
mutate(samp = map2(data, n, sample_frac)) %>%
dplyr::select(-data) %>%
unnest(samp) %>%
mutate(dataset = "Weighted") %>%
bind_rows(., sand_all %>% mutate(dataset = "Full")) %>%
ggplot(., aes(x = Total, color = dataset)) +
geom_histogram(aes(y=..density..), fill="white", position="dodge") +
geom_vline(data = sample_central_df, aes(xintercept=stat, linetype = name)) +
stat_function(fun=dnorm,
color="red",
args=list(mean=sample$weighted_mean,
sd=sample$weighted_sd)) +
labs(color = "Data", y = "Density", linetype = "Statistics")
As can be seen in the table below, for Total, a score one SD above the mean corresponded to a score of 12.487 and two SD above the mean corresponded to a score of 16.546. The weighted summary statistics were used to construct Z-score and percentile tables, presented in Appendix F.
A similar procedure to construct Z-score and percentile tables and identify cutoff scores, based on weighted sample data, was undertaken with Domain, Modality, and Subscales (Appendix F). Tables are presented with summary statistics by NDD classification. As will be shown later in the section on classification with SAND scale scores, there was no difference between individual scales in predicting sensory symptoms. Each participant’s maximum score was used within each Scale.
### Function to weight each scale, pull maximum score from each row
weight_sample_func <- function(scale_names){
sand_all %>%
dplyr::select("NDD" = ndd, !!!syms(scale_names)) %>%
mutate(max_col_value = apply(.[-1], 1, max)) %>%
group_by(NDD) %>%
dplyr::summarize(tot = mean(max_col_value),
stand = sd(max_col_value)) %>%
ungroup() %>%
dplyr::summarize(weighted_mean = wtd.mean(tot, c(53/54, 1/54)),
weighted_sd = wtd.mean(stand, c(53/54, 1/54)))
}
scale_names <- list(sand_all %>% dplyr::select(Hyperreactivity:Seeking) %>% colnames, sand_all %>% dplyr::select(TotalVisual:TotalAuditory) %>% colnames, sand_all %>% dplyr::select(V_Hyper:A_Seek) %>% colnames)
scale_range <- list(0:30, 0:30, 0:10)
scale_type <- c("Domain", "Modality", "Subscale")
scale_names_all <- c(list("Total") %>% setNames("Total"), scale_names)
scale_type_all <- c("Total", scale_type)
scale_summary_table_func <- function(scale_names){
sand_all %>%
dplyr::select("NDD" = ndd, !!!syms(scale_names)) %>%
pivot_longer(!c(NDD), names_to = "Domain", values_to = "Score") %>%
group_by(Domain, NDD) %>%
dplyr::summarize(n = n(), Mean = round(mean(Score),2), SD = round(sd(Score),2), SEM = round(psych::describe(Score)$se, 2)) %>%
mutate(Domain = ifelse(duplicated(Domain), "", Domain))
}
### Create tables for each scale with weighted stats
out <- lapply(1:length(scale_names_all), function(i) cat(knit_print(
scale_summary_table_func(scale_names_all[[i]]) %>%
bind_rows(.,
cbind("TOTAL",nrow(sand_all), weight_sample_func(scale_names_all[[i]]),NA) %>%
# mutate_all(round, 3) %>%
setNames(c("NDD", "n", "Mean", "SD", "SEM")) %>%
mutate(across(where(is.numeric), round, 3))
) %>%
table_print(caption = scale_type_all[i], colnames = colnames(.))),
knit_print(cbind.data.frame(" ", "M+1SD", round(weight_sample_func(scale_names_all[[i]])$weighted_mean + weight_sample_func(scale_names_all[[i]])$weighted_sd, 2), "M+2SD", round(weight_sample_func(scale_names_all[[i]])$weighted_mean + (2*weight_sample_func(scale_names_all[[i]])$weighted_sd),2)) %>%
setNames(c( "NDD", "n", "Mean", "SD", "SEM")) %>%
mutate(across(where(is.numeric), round, 2))%>%
table_print(caption = scale_type[i], colnames = rep("", 5))), "\n\n")
)
| Domain | NDD | n | Mean | SD | SEM |
|---|---|---|---|---|---|
| Total | TD | 80 | 8.010 | 3.95 | 0.44 |
| NDD | 226 | 30.380 | 9.85 | 0.65 | |
| NA | TOTAL | 306 | 8.427 | 4.06 | NA |
| M+1SD | 12.49 | M+2SD | 16.55 |
| Domain | NDD | n | Mean | SD | SEM |
|---|---|---|---|---|---|
| Hyperreactivity | TD | 80 | 2.610 | 2.060 | 0.23 |
| NDD | 226 | 6.460 | 4.370 | 0.29 | |
| Hyporeactivity | TD | 80 | 1.500 | 1.970 | 0.22 |
| NDD | 226 | 9.590 | 6.470 | 0.43 | |
| Seeking | TD | 80 | 3.900 | 3.200 | 0.36 |
| NDD | 226 | 14.330 | 6.530 | 0.43 | |
| NA | TOTAL | 306 | 5.363 | 2.666 | NA |
| M+1SD | 8.03 | M+2SD | 10.69 |
| Domain | NDD | n | Mean | SD | SEM |
|---|---|---|---|---|---|
| TotalAuditory | TD | 80 | 2.690 | 1.98 | 0.22 |
| NDD | 226 | 9.890 | 4.28 | 0.28 | |
| TotalTactile | TD | 80 | 3.340 | 2.61 | 0.29 |
| NDD | 226 | 11.400 | 4.08 | 0.27 | |
| TotalVisual | TD | 80 | 1.990 | 1.70 | 0.19 |
| NDD | 226 | 9.090 | 4.23 | 0.28 | |
| NA | TOTAL | 306 | 4.682 | 2.11 | NA |
| M+1SD | 6.79 | M+2SD | 8.9 |
| Domain | NDD | n | Mean | SD | SEM |
|---|---|---|---|---|---|
| A_Hyper | TD | 80 | 1.150 | 1.42 | 0.16 |
| NDD | 226 | 2.890 | 2.35 | 0.16 | |
| A_Hypo | TD | 80 | 0.550 | 0.95 | 0.11 |
| NDD | 226 | 2.590 | 2.93 | 0.20 | |
| A_Seek | TD | 80 | 0.990 | 1.32 | 0.15 |
| NDD | 226 | 4.410 | 2.93 | 0.19 | |
| T_Hyper | TD | 80 | 1.200 | 1.34 | 0.15 |
| NDD | 226 | 2.420 | 2.12 | 0.14 | |
| T_Hypo | TD | 80 | 0.380 | 0.85 | 0.09 |
| NDD | 226 | 3.340 | 2.34 | 0.16 | |
| T_Seek | TD | 80 | 1.760 | 2.01 | 0.22 |
| NDD | 226 | 5.640 | 2.74 | 0.18 | |
| V_Hyper | TD | 80 | 0.260 | 0.67 | 0.07 |
| NDD | 226 | 1.150 | 1.77 | 0.12 | |
| V_Hypo | TD | 80 | 0.580 | 1.02 | 0.11 |
| NDD | 226 | 3.650 | 2.60 | 0.17 | |
| V_Seek | TD | 80 | 1.150 | 1.42 | 0.16 |
| NDD | 226 | 4.280 | 2.77 | 0.18 | |
| NA | TOTAL | 306 | 3.304 | 1.47 | NA |
| M+1SD | 4.77 | M+2SD | 6.24 |
###*** Generate z score tables
convert_z_score <- function(raw_score, scale_mean, scale_sd){
(raw_score - scale_mean) / scale_sd
}
z_score_table_func <- function(scale_names, scale_range){
z_score_vec <- as.numeric(mapply(convert_z_score, scale_range, weight_sample_func(scale_names)[1], weight_sample_func(scale_names)[2]))
z_percentiles <- sapply(z_score_vec, pnorm)
# print(
cbind.data.frame("Score" = scale_range, "Z-Score" = round(z_score_vec,2), "Percentiles" = round(z_percentiles*100,2)) %>%
filter(Percentiles < 100)
}
A main purpose of the SAND is to identify individual sensory reactivity preferences and to quantify sensory reactivity symptoms. One way clinically meaningful cut-off scores were derived was using normative methods, described above, in which less frequently occurring scores were identified, in this case those at 1 and 2 SD above the mean. Another method for identifying scores that are indicative of clinically meaningful sensory reactivity symptoms is to compare raw scores of those with sensory differences with those without. This section describes the methods and analyses used to develop models to predict sensory differences based on SAND scores, to assess the accuracy of classifying individuals as having a sensory difference based on SAND scores, and predictive validity of using SAND scores to classify sensory reactivity of a sample. All SAND scales were used.
The approach is described as follows:
As described in the Standardization section, “Sensory Difference Present” was a variable created to identify individuals with clinically meaningful sensory symptoms.
## Logistic regression for total score
glm_sensory_difference_probable_total <- glm(sensory_difference_probable_w_na ~ Total, data = sand_all, family = binomial)
glm_sensory_difference_probable_no_term <- glm(sensory_difference_probable_w_na ~ 1, data = sand_all, family = binomial)
total_lrtest <- lrtest(glm_sensory_difference_probable_total, glm_sensory_difference_probable_no_term)
# Hoslem Test
sand_all %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
glm(sensory_difference_probable_w_na ~ Total, data = ., family = binomial) -> binary_mod
hoslem.test(as.numeric(sand_all$sensory_difference_probable_w_na[!is.na(sand_all$sensory_difference_probable_w_na)])-1, fitted(binary_mod)) -> hoslem_out
Models were constructed to evaluate SAND Scale scores as predictors of the presence of a sensory difference. Best fitting models were then used to construct tables with probabilities of the presence of a sensory difference for given SAND scores. The process is described in detail for Total Score. The same process was replicated for other scales, with only results presented.
A logistic regression model was created with Sensory Difference Present as the outcome variable and Total Score as the predictor. Participant-level variables were not included, as they were shown to not predict SAND scores in the presence of Sensory Difference Present. A likelihood ratio test confirmed that the presence of a sensory difference was significantly predicted by Total Score with a very strong effect size χ2(1)=175.44, p < .001.
An analysis of goodness of fit using the Hosmer and Lemeshow goodness of fit (GOF) test was calculated with 10 groups. Results indicated no violations of goodness of fit assumptions χ2(8)=2.03, p = 0.98. Thus, the model was well-calibrated as the actual outcome (Sensory Difference Present) matched predicted outcomes (predicted Sensory Difference Present) well.
# Model Plot
sand_all %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
ggplot(aes(x = Total, y = as.numeric(sensory_difference_probable_w_na) - 1, color = "Actual")) +
geom_line(aes(x = Total, y = fitted(binary_mod), color = "Predicted")) +
geom_point(stat = "identity") +
labs(color = "Predicted \n Actual Scores", y = "Probability of Sensory Difference Prosent", x = "Total Score") +
scale_y_continuous(breaks = seq(0, 1, .25), labels = c("No Difference", ".25", ".50", ".75", "Difference")) +
scale_x_continuous(breaks = seq(min(sand_all$Total), max(sand_all$Total), 5)) +
ggtitle("Probability of Sensory Difference-\n Actual and Predicted")
log_model_sum <- summary(glm_sensory_difference_probable_total)
cbind.data.frame(
log_model_sum$coefficients,
"Odds Ratio" = exp(log_model_sum$coefficients[,1]),
"95% CI" = paste0("(",round(exp(log_model_sum$coefficients[,1] - 2*log_model_sum$coefficients[,2]),3), "-", round(exp(log_model_sum$coefficients[,1] + 2*log_model_sum$coefficients[,2]),3),")")
) %>%
mutate_if(is.numeric, round, 3) %>%
table_print(caption = table_counter_func("Logistic Regression Summary Total as Predictor of Sensory Difference Present"), colnames = colnames(.))
| Estimate | Std. Error | z value | Pr(>|z|) | Odds Ratio | 95% CI |
|---|---|---|---|---|---|
| -4.768 | 0.880 | -5.419 | 0 | 0.008 | (0.001-0.049) |
| 0.435 | 0.075 | 5.830 | 0 | 1.546 | (1.331-1.795) |
# Function for probability prediction of glmer and generate predictions for ASD- not used here because no mixed model (glmer)
probability_pred_glmer <- function(x, model_name){
1/(1+exp(-(fixef(model_name)[1] + fixef(model_name)[2]*x)))
}
# Function for probability prediction of glm and generate predictions for ASD- Use to generate one prediction
probability_pred_glm <- function(x, model_name){
1/(1+exp(-(coef(model_name)[1] + coef(model_name)[2]*x)))
}
logit_func = function(x){
exp(x)/(1+exp(x))
}
prediction_function <- function(df, score, model){
df %>%
dplyr::summarize(
inputs = min(!!sym(score)):max(!!sym(score)),
predicted_fit = sapply(inputs, function(x) predict(model, setNames(data.frame(x), quo_name(score)), type='link', se.fit=TRUE)$fit),
probabilities = round(logit_func(predicted_fit),4),
predicted_fit_se = sapply(inputs, function(x) predict(model, setNames(data.frame(x), quo_name(score)), type='link', se.fit=TRUE)$se.fit),
se_low = round(logit_func(predicted_fit - (predicted_fit_se*1.96)),4),
se_high = round(logit_func(predicted_fit + (predicted_fit_se*1.96)), 4)
) %>%
dplyr::filter(se_low > .01 & se_low <= .995) %>%
dplyr::select(inputs, probabilities, se_low, se_high)
}
modify_table_func <- function(model){
model %>%
mutate(
Probabilities = probabilities * 100,
se_low = se_low*100,
se_high = se_high*100
) %>%
dplyr::select(
"Score" = inputs,
"Probabilities" = Probabilities,
"CI Low" = se_low,
"CI High" = se_high)
}
plot_logistic_func <- function(model, name_var){
model %>%
{
ggplot(., aes(x = inputs, y = probabilities)) +
geom_line(colour = "blue") +
geom_ribbon(aes(ymin = se_low,
ymax = se_high),
alpha = 0.5) +
scale_y_continuous(name = "Probability of Sensory Difference Present", limits = c(0,1)) +
scale_x_continuous(name = name_var, breaks = seq(min(.$inputs), max(.$inputs), 2))
}
}
A table was constructed with possible Total Scores ranging from 4 to 29, those scores within 0.01-99.99%, and the associated predicted probabilities of having a sensory difference, along with confidence intervals. This table, along with the probability tables for SAND Scales is in Appendix E.
## Gives logistic plot
total_probability_table <- prediction_function(sand_all, "Total", glm_sensory_difference_probable_total)
plot_logistic_func(total_probability_table, "Total Score")
The accuracy of the SAND in classifying individuals as having a sensory difference was evaluated by calculating sensitivity (cases correctly identified as having a sensory difference among all those with a sensory difference), specificity (cases correctly identified as not having a sensory difference among all those without a sensory difference), and total accuracy (the number of cases classified correctly out of all cases) for all cases in the standardization sample. The following table reports those metrics with the table restricted to the rows with sensitivity and specificity less than 99.99%. This was done to weight both those metrics equally, as using total accuracy may be skewed based on unequal numbers of cases of Sensory Difference Present at different score levels.
# Get accuracy- sensitivity, specificity
accuracy_func <- function(df, scale, score){
df %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
mutate(critical = ifelse(!!sym(scale) >= score, "Yes", "No")) %>%
dplyr::summarize(tp = sum(critical == "Yes" & sensory_difference_probable_w_na == "Sensory Difference", na.rm = T),
tt = sum(sensory_difference_probable_w_na == "Sensory Difference", na.rm = T),
sensitivity = tp/tt,
tn = sum(critical == "No" & sensory_difference_probable_w_na == "None", na.rm = T),
tf = sum(sensory_difference_probable_w_na == "None", na.rm = T),
specificity = tn/tf,
accuracy = (tp + tn)/ (tt + tf))
}
apply_accuracy_func <- function(df, scale_name){
df %>%
dplyr::summarize(max_score = max(!!sym(scale_name))) %>%
pull(max_score) -> max_score
lapply(1:max_score, function(score) accuracy_func(df, quo_name(scale_name), score)) %>%
bind_rows() %>%
cbind(Score = 1:max_score, .) %>%
dplyr::select(Score, "Sensitivity" = sensitivity, "Specificity" = specificity, "Total Accuracy" = accuracy) %>%
dplyr::filter(Sensitivity < .999 & Specificity < .999) %>%
mutate(across(2:4, ~ round(., 4)*100))
}
accuracy_df <- apply_accuracy_func(sand_all, "Total")
maximum_accuracy_score <- accuracy_df %>%
slice_max(`Total Accuracy`) %>%
dplyr::select(Score)
most_equitable_score <- accuracy_df %>%
rowwise() %>%
mutate(score_difference = abs(Sensitivity - Specificity)) %>%
ungroup() %>%
slice_min(score_difference) %>%
dplyr::select(Score)
youden_score <- accuracy_df %>%
rowwise() %>%
mutate(score_sum = Sensitivity + Specificity) %>%
ungroup() %>%
slice_max(score_sum) %>%
dplyr::select(Score)
accuracy_df %>%
table_print(caption = table_counter_func("Total Score Accuracy Measures"), colnames = colnames(.))
| Score | Sensitivity | Specificity | Total Accuracy |
|---|---|---|---|
| 5 | 99.58 | 23.40 | 86.93 |
| 6 | 99.58 | 27.66 | 87.63 |
| 7 | 99.58 | 42.55 | 90.11 |
| 8 | 98.73 | 44.68 | 89.75 |
| 9 | 97.46 | 65.96 | 92.23 |
| 10 | 97.46 | 70.21 | 92.93 |
| 11 | 97.03 | 82.98 | 94.70 |
| 12 | 97.03 | 87.23 | 95.41 |
| 13 | 94.92 | 91.49 | 94.35 |
| 14 | 92.37 | 93.62 | 92.58 |
| 15 | 90.68 | 95.74 | 91.52 |
| 16 | 88.98 | 97.87 | 90.46 |
| 17 | 86.86 | 97.87 | 88.69 |
| 18 | 86.44 | 97.87 | 88.34 |
| 19 | 83.05 | 97.87 | 85.51 |
| 20 | 79.66 | 97.87 | 82.69 |
The SAND has high accuracy across a range of Total Scores. A score of 12 provided the greatest accuracy. A score of 14 provided the most equitable balance of sensitivity and specificity and a score of 16 provided the highest sum of the two measures (modified Youden Index). The above metrics will inform cutoff score selection, described below.
While a score of 16 reflects optimal summation of sensitivity and specificity, users may define their own cutoff scores based on their needs. A more thorough description of the process of adjusting cutoff scores is described in Appendix G.
The ROC plot illustrates the sensitivity and specificity correspondence. The ROC plot is zoomed in where accuracy is highest and includes labeled scores.
# AUC/ROC
library(pROC)
pred_bin <- predict(glm_sensory_difference_probable_total, type = c("response"))
# roccurve <- roc(binary_dx ~ pred_bin)
roccurve <- roc(sand_all$sensory_difference_probable_w_na[!is.na(sand_all$sensory_difference_probable_w_na)] ~ pred_bin)
plot(roccurve)
auc_mod <- auc(roccurve)
The plot is zoomed in to values given in the above table with the scores labeled.
apply_accuracy_func(sand_all, "Total") %>%
ggplot(., aes(x = round(100 - Specificity,2), y = round(Sensitivity,2), label = Score)) +
geom_point() +
geom_path() +
geom_text(aes(label=Score),hjust=0, vjust=-.5, position = position_jitter(seed = .1)) +
labs(x = "False Positive Rate", y = "True Positive Rate", caption = "True Positive and False Positive Rates \n for Scores within 100% Sensitivity and Specificity") +
ggtitle(figure_counter_func("ROC Plot Restricted to High Accuracy Scores"))
The Area Under the Curve (AUC), or concordance-statistic (c-statistic) was calculated and found to be 0.9773, which is very high, a reflection of very high Sensitivity and Specificity.
The predictive power of the SAND was also calculated. As it applies to the SAND, the positive predictive power was defined as, for a given score, the number of cases with a Sensory Difference Present, divided by the number of cases meeting or exceeding that score, expressed formulaically as:
True Positives / (True Positives + False Positives)
Negative predictive power for a given score is the number of cases accurately identified as not having a sensory difference, divided by the number of cases below that score, expressed formulaically as:
True Negatives / (True Negatives + False Negatives)
# Calculate percent of people correctly classified with different SAND scores- predictive power
predictive_power_func <- function(df, scale, score){
df %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
mutate(critical = ifelse(!!sym(scale) >= score, "Yes", "No")) %>%
dplyr::summarize(tp = sum(critical == "Yes" & sensory_difference_probable_w_na == "Sensory Difference", na.rm = T),
t_score = sum(critical == "Yes", na.rm = T),
positive_predictive_power = round(tp/t_score, 4),
tn = sum(critical == "No" & sensory_difference_probable_w_na == "None", na.rm = T),
f_score = sum(critical == "No", na.rm = T),
negative_predictive_power = round(tn/f_score, 4)
)
}
apply_predictive_power_func <- function(df, scale_name){
df %>%
dplyr::summarize(max_score = max(!!sym(scale_name))) %>%
pull(max_score) -> max_score
lapply(1:max_score, function(score) predictive_power_func(df, quo_name(scale_name), score)) %>%
bind_rows() %>%
cbind(Score = 1:max_score, .) %>%
dplyr::select(Score, "Positive Predictive Power" = positive_predictive_power, "Negative Predictive Power" = negative_predictive_power,) %>%
dplyr::filter(`Positive Predictive Power` < .999 & `Negative Predictive Power` < .999) %>%
mutate(across(2:3, ~ .*100 %>% round(2)))
}
apply_predictive_power_func(sand_all, "Total") %>%
table_print(caption = table_counter_func("Total Score Predictive Power"), colnames = colnames(.))
| Score | Positive Predictive Power | Negative Predictive Power |
|---|---|---|
| 5 | 86.72 | 91.67 |
| 6 | 87.36 | 92.86 |
| 7 | 89.69 | 95.24 |
| 8 | 89.96 | 87.50 |
| 9 | 93.50 | 83.78 |
| 10 | 94.26 | 84.62 |
| 11 | 96.62 | 84.78 |
| 12 | 97.45 | 85.42 |
| 13 | 98.25 | 78.18 |
| 14 | 98.64 | 70.97 |
| 15 | 99.07 | 67.16 |
| 16 | 99.53 | 63.89 |
| 17 | 99.51 | 59.74 |
| 18 | 99.51 | 58.97 |
| 19 | 99.49 | 53.49 |
| 20 | 99.47 | 48.94 |
The relationship between the Sensory Difference Present variable and SAND Scales (Domain, Modality, and Subscales) was examined. Models were constructed using the maximum score within each scale for each participant as a predictor of Sensory Difference Present. The maximum score for each scale was the score that was of greatest value among each of the scales. As an example, an individual had the following Domain scores:
# Made up example
data.frame(
Domain = domain_scale_names,
`Raw Score` = c(15, 2, 3)
) %>%
table_print(caption = "", colnames = colnames(.))
| Domain | Raw.Score |
|---|---|
| Hyperreactivity | 15 |
| Hyporeactivity | 2 |
| Seeking | 3 |
The maximum Domain score for this individual is 15 in Hyperreactivity. In the case of ties, all of the scale names were noted.
The reason the maximum score was used was to identify salient scales. As in the above example, an individual may have a high level of Hyperreactivity symptoms, but not Hyporeactivity or Seeking, so only Hyperreactivity would be used. Including all Scale scores from each participant might not allow differences among those with sensory differences and those without to surface. Said another way, if individuals tend to have one elevated Domain score, including other scales would dampen importance of the variance of this scale from typical results seen in other scales and from other participants.
## Generates tables for max score
max_scale_all_func <- function(i){
sand_all %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
mutate(max_score = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, max),
max_score_name = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, function(x) x[which(x == max(x))] %>% names())) %>%
cbind(., data.frame(stri_list2matrix(.$max_score_name, byrow = TRUE)) %>% setNames(paste0("max_score_name_", seq(1, length(scale_names[[i]]))))
) %>%
dplyr::select(ID, sensory_difference_probable_w_na, max_score, paste0("max_score_name_", seq(1, length(scale_names[[i]])))) %>%
pivot_longer(-c(ID, sensory_difference_probable_w_na, max_score), values_to = "max_score_name") %>%
filter(!is.na(max_score_name))
}
# Output to table
out <- lapply(1:length(scale_names), function(i){
cat(knit_print(
max_scale_all_func(i) %>%
group_by(sensory_difference_probable_w_na, max_score_name) %>%
dplyr::summarize(
n = n(),
Mean = mean(max_score),
SD = sd(max_score),
SEM = psych::describe(max_score)$se) %>%
mutate(across(where(is.numeric), round, 3))%>%
table_print(caption = scale_type[i], colnames = c("Sensory Difference", "Scale", "n", "Mean", "SD", "SEM"))
), "\n\n")
cat('\n')
p<- max_scale_all_func(i) %>%
group_by(max_score_name, sensory_difference_probable_w_na) %>%
dplyr::summarize(mean = mean(max_score),
sd = sd(max_score)
) %>%
{
ggplot(., aes(x = max_score_name, y = mean, fill = sensory_difference_probable_w_na )) +
geom_bar(stat = "identity", position = "dodge") + #= "summary", fun.y = "mean", position = "dodge") +
labs(x = "Scale", y = "Score", fill = "Sensory Difference", caption = "Error Bars +/- 1 SD") +
scale_y_continuous(breaks = seq(0, ceiling(max(.$mean, na.rm = T) + max(.$sd, na.rm = T)), 2)) +
# scale_x_discrete(labels = c(domain_scale_names)) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2,
position=position_dodge(.9)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
}
print(p)
cat('\n')
})
| Sensory Difference | Scale | n | Mean | SD | SEM |
|---|---|---|---|---|---|
| None | Hyperreactivity | 20 | 3.100 | 1.774 | 0.397 |
| None | Hyporeactivity | 12 | 3.000 | 1.809 | 0.522 |
| None | Seeking | 29 | 5.310 | 2.917 | 0.542 |
| Sensory Difference | Hyperreactivity | 29 | 11.207 | 4.039 | 0.750 |
| Sensory Difference | Hyporeactivity | 64 | 15.797 | 6.103 | 0.763 |
| Sensory Difference | Seeking | 154 | 16.890 | 5.536 | 0.446 |
| Sensory Difference | Scale | n | Mean | SD | SEM |
|---|---|---|---|---|---|
| None | TotalAuditory | 24 | 4.042 | 2.156 | 0.440 |
| None | TotalTactile | 27 | 4.037 | 2.047 | 0.394 |
| None | TotalVisual | 14 | 3.143 | 1.703 | 0.455 |
| Sensory Difference | TotalAuditory | 70 | 12.929 | 4.486 | 0.536 |
| Sensory Difference | TotalTactile | 137 | 12.496 | 3.829 | 0.327 |
| Sensory Difference | TotalVisual | 60 | 12.067 | 3.677 | 0.475 |
| Sensory Difference | Scale | n | Mean | SD | SEM |
|---|---|---|---|---|---|
| None | A_Hyper | 17 | 2.235 | 1.348 | 0.327 |
| None | A_Hypo | 12 | 1.667 | 0.778 | 0.225 |
| None | A_Seek | 16 | 2.188 | 1.276 | 0.319 |
| None | T_Hyper | 19 | 2.105 | 0.994 | 0.228 |
| None | T_Hypo | 9 | 1.556 | 0.882 | 0.294 |
| None | T_Seek | 21 | 2.619 | 1.658 | 0.362 |
| None | V_Hyper | 5 | 1.200 | 1.095 | 0.490 |
| None | V_Hypo | 7 | 1.429 | 0.976 | 0.369 |
| None | V_Seek | 16 | 2.375 | 1.408 | 0.352 |
| Sensory Difference | A_Hyper | 20 | 6.400 | 1.903 | 0.426 |
| Sensory Difference | A_Hypo | 27 | 7.074 | 2.286 | 0.440 |
| Sensory Difference | A_Seek | 51 | 7.549 | 1.712 | 0.240 |
| Sensory Difference | T_Hyper | 18 | 5.333 | 1.815 | 0.428 |
| Sensory Difference | T_Hypo | 21 | 6.429 | 1.207 | 0.263 |
| Sensory Difference | T_Seek | 101 | 7.614 | 1.816 | 0.181 |
| Sensory Difference | V_Hyper | 6 | 5.333 | 1.966 | 0.803 |
| Sensory Difference | V_Hypo | 28 | 6.393 | 2.114 | 0.400 |
| Sensory Difference | V_Seek | 49 | 7.265 | 1.591 | 0.227 |
As can be seen in the above plots and descriptive tables, individuals with Sensory Difference Present had higher maximum scores across all individual scales than those without Sensory Difference Present. Logistic regression was used to determine if maximum scale scores and the scale from which they came were significant predictors of Sensory Difference Present. First, the interaction between the terms was examined for significance:
scale_names <- list(sand_all %>% dplyr::select(Hyperreactivity:Seeking) %>% colnames, sand_all %>% dplyr::select(TotalVisual:TotalAuditory) %>% colnames, sand_all %>% dplyr::select(V_Hyper:A_Seek) %>% colnames)
scale_range <- list(0:30, 0:30, 0:10)
scale_type <- c("Domain", "Modality", "Subscale")
## Function gets maximum score per participant in each scale- randomly selects max for tie
max_scale_func <- function(i){
sand_all %>% data.frame() %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
mutate(max_score = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, max),
max_score_name = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, function(x) x[which(x == max(x))] %>% names() %>% sample(., 1))
)
}
## Function gets maximum score per participant in each scale- selects all for tie
max_scale_all_func <- function(i){
sand_all %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
mutate(max_score = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, max),
max_score_name = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, function(x) x[which(x == max(x))] %>% names())) %>%
cbind(., data.frame(stri_list2matrix(.$max_score_name, byrow = TRUE)) %>% setNames(paste0("max_score_name_", seq(1, length(scale_names[[i]]))))
) %>%
dplyr::select(ID, sensory_difference_probable_w_na, max_score, paste0("max_score_name_", seq(1, length(scale_names[[i]])))) %>%
pivot_longer(-c(ID, sensory_difference_probable_w_na, max_score), values_to = "max_score_name") %>%
filter(!is.na(max_score_name))
}
# Applies the function that gets all max scales in case of tie- conducts glm and outputs results in table for all scales
name_scales <- c("Domain", "Modality", "Subscales")
lapply(1:length(scale_names), function(i){
glm(sensory_difference_probable_w_na ~ max_score*max_score_name, data = max_scale_all_func(i), family = binomial) -> full
glm(sensory_difference_probable_w_na ~ max_score + max_score_name, data = max_scale_all_func(i), family = binomial) -> reduced
lrtest(full, reduced) -> out
data.frame(Scale = name_scales[i], DF = abs(out$Df[-1]), X2 = out$Chisq[-1], p = out$`Pr(>Chisq)`[-1])
}) %>%
bind_rows() %>%
mutate(across(where(is.numeric),round,3)) %>%
table_print(caption = table_counter_func("Logistic Regression Test for Interaction between Maximum
Scale Score and Scale Name"), colnames = colnames(.))
| Scale | DF | X2 | p |
|---|---|---|---|
| Domain | 2 | 4.857 | 0.088 |
| Modality | 2 | 0.867 | 0.648 |
| Subscales | 8 | 6.928 | 0.544 |
## Logistic for max scores for each scale
lapply(1:length(scale_names), function(i){
glm(sensory_difference_probable_w_na ~ max_score + max_score_name, data = max_scale_all_func(i), family = binomial) -> full
glm(sensory_difference_probable_w_na ~ max_score_name, data = max_scale_all_func(i), family = binomial) -> reduced
lrtest(full, reduced) -> out
data.frame(Scale = name_scales[i], DF = abs(out$Df[-1]), X2 = out$Chisq[-1], p = gsub("p ", "", p_value_func(out$`Pr(>Chisq)`[-1])))
}) %>%
bind_rows() %>%
mutate(across(where(is.numeric),round,3)) %>%
table_print(caption = table_counter_func("Logistic Regression Test of Maximum Score"), colnames = colnames(.))
| Scale | DF | X2 | p |
|---|---|---|---|
| Domain | 1 | 191.487 | < .001 |
| Modality | 1 | 214.001 | < .001 |
| Subscales | 1 | 343.683 | < .001 |
None of the interaction effects were significant. The main effects for maximum score were significant, with strong effects. In terms of practical application, this suggests that scores within Scales are significant predictors of Sensory Difference Present. Nonsignificant interaction effects suggest that one cutoff score can be estimated per scale, rather than separate cutoff scores for each individual Scale.
Scores were also examined to determine if different diagnoses were associated with different patterns of scores (yielding maximum scores from certain scales) by examining for interaction effects between maximum Scale name and Sensory Difference Present for Domain, Modality, and Subscales, with nonsignificant findings for each.
# Look for maximum score with a certain subtype of scale
max_scale_dx_func <- function(i){
sand_all %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
mutate(max_score = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, max),
max_score_name = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, function(x) x[which(x == max(x))] %>% names())) %>%
cbind(., data.frame(stri_list2matrix(.$max_score_name, byrow = TRUE)) %>% setNames(paste0("max_score_name_", seq(1, length(scale_names[[i]]))))
) %>%
dplyr::select(ID, sensory_difference_probable_w_na, dx, max_score, paste0("max_score_name_", seq(1, length(scale_names[[i]])))) %>%
pivot_longer(-c(ID, sensory_difference_probable_w_na, dx, max_score), values_to = "max_score_name") %>%
filter(!is.na(max_score_name))
}
# Check for domain differences by dx
max_scale_dx_func(3) -> max_data
max_data %>%
dplyr::filter(dx == "PMS") %>%
lm(max_score ~ max_score_name, data = .) -> mod_check_pms_max_score
lapply(1:length(scale_names), function(i){
max_scale_dx_func(i) -> max_data
max_data %>%
dplyr::filter(dx != "TD") %>%
lm(max_score ~ max_score_name*dx, data = .) -> mod_check_dx_max_score
max_data %>%
dplyr::filter(dx != "TD") %>%
lm(max_score ~ max_score_name + dx, data = .) -> mod_check_main_dx_max_score
lrtest(mod_check_dx_max_score, mod_check_main_dx_max_score)-> out
data.frame(Scale = name_scales[i], DF = abs(out$Df[-1]), X2 = out$Chisq[-1], p = out$`Pr(>Chisq)`[-1])
}) %>%
bind_rows() %>%
mutate(across(where(is.numeric),round,3)) %>%
table_print(caption = "", colnames = colnames(.))
| Scale | DF | X2 | p |
|---|---|---|---|
| Domain | 8 | 11.487 | 0.176 |
| Modality | 9 | 15.410 | 0.080 |
| Subscales | 25 | 26.585 | 0.377 |
Logistic models were used to generate tables with the probability of having a sensory difference at given scores and are presented in Appendix E. Tables of accuracies of classifying Sensory Difference Present at given scores for SAND Domains, Modalities, and Subscales were generated and follow.
Cutoff Scores The above information was used to create cutoff scores for Scales. There were several sources of information upon which to base cutoff scores:
Above criteria were mostly consistent. It was decided to create the following two scoring levels (except for Subscales, with only one cutoff) with the following characteristics: 1. Clinically Significant a. Highest balance of sensitivity and specificity b. Approximately two SD from the mean c. Strong likelihood of presence of a sensory symptom 2. Probable a. Overlapping CI with Clinically Significant b. Approximately one SD from the mean c. More likely than not presence of sensory symptoms
The following table identifies raw score cutoff scores for the different Scales. Individuals meeting or exceeding the highest cutoff score in that Scale should be interpreted as follows:
classification_data <- readxl::read_xlsx("classification_data.xlsx", sheet = 1, skip = 1) %>%
dplyr::filter(!is.na(`MQ ppt number`) & Total != 0 & !is.na(Age)) %>%
dplyr::rename(age = Age) %>%
dplyr::mutate(sex = factor(dplyr::recode(`Response (1 = male, 2 = female)`, "1" = "male", "2" = "female"), levels = c("male", "female"))) %>%
mutate(across(c(SAND_V1.1O:Total, age), as.numeric))
classification_data %>%
dplyr::select(Total, age, `A. White`: `Any other, please specify`) %>%
mutate(across(everything()[-(1:2)], as.character)) %>%
pivot_longer(., cols = everything()[-(1:2)], names_to = "race") %>%
filter(!is.na(value)) %>%
mutate(race =
case_when(grepl("mixed", tolower(race)) ~ "Biracial",
grepl("other", race) ~ "Other Race",
grepl("&", race) ~ "Biracial",
race == "A. White" ~ "American White",
race == "British" ~ "British White",
grepl("Pakistani|Indian|Asian", race) ~ "Asian",
TRUE ~ race) #%>%
# gsub("&", "Biracial", tolower(race))
) %>%
group_by(Race = race) %>%
dplyr::summarize(
n = n(),
`Total Mean(SD)` = paste0(round(mean(Total),2), "(", round(sd(Total),2), ")"),
`Age Mean(SD)` = paste0(round(mean(age),2), "(", round(sd(age),2), ")")) %>%
table_print(caption = table_counter_func("Race/Ethnicity Demographics"), colnames = colnames(.))
| Race | n | Total Mean(SD) | Age Mean(SD) |
|---|---|---|---|
| American White | 25 | 32.48(9.05) | 3.72(0.79) |
| Asian | 15 | 41.6(7.15) | 4.2(0.68) |
| Biracial | 7 | 34.43(5.86) | 4.29(0.76) |
| British White | 34 | 33.53(9.32) | 3.82(0.76) |
| Other Race | 3 | 33(11.14) | 4(0) |
classification_data %>%
group_by(Sex = sex) %>%
dplyr::summarize(n = n(),
`Total Mean(SD)` = paste0(round(mean(Total),2), "(", round(sd(Total),2), ")"),
`Age Mean(SD)` = paste0(round(mean(age),2), "(", round(sd(age),2), ")")) %>%
bind_rows(., classification_data %>%
dplyr::summarize(
Sex = "All",
n = n(),
`Total Mean(SD)` = paste0(round(mean(Total),2), "(", round(sd(Total),2), ")"),
`Age Mean(SD)` = paste0(round(mean(age),2), "(", round(sd(age),2), ")")
)
) %>%
table_print(caption = table_counter_func("Sex Demographics"), colnames = colnames(.))
| Sex | n | Total Mean(SD) | Age Mean(SD) |
|---|---|---|---|
| male | 46 | 35.26(9.74) | 4.02(0.75) |
| female | 13 | 34.77(7.04) | 3.92(0.76) |
| All | 59 | 35.15(9.16) | 4(0.74) |
The accuracies of the above cutoff scores in appropriately classifying individuals as having a clinically meaningful sensory difference are described in this section.
A unique dataset of 59 children with autism were recruited for the current study in the United Kingdom, by SAND co-author Dr. Teresa Tavassoli.
Participants were administered the SAND. Total and Scale scores were calculated. The number and percent of participants meeting clinical cutoff scores were calculated.
“Clinically Significant” cutoff scores were first used (with the exception of Subscales, which have only one cutoff).
subset_condition <- merge(c("SAND_V", "SAND_T", "SAND_A"), c("1", "2", "3"), all = T) %>% unite(., subscales, c(x,y), sep = "") %>% pull()
lapply(subset_condition, function(subset_condition) classification_data %>% dplyr::select(., matches(subset_condition)) %>% rowSums()) %>% cbind.data.frame() %>% setNames(subscale_names) -> subscale_df
classification_data %>%
filter(!is.na(Total)) %>%
dplyr::summarize(clinical = sum(Total >= 16),
total = n(),
percent_identified = (clinical/total)*100) %>%
t() %>%
cbind.data.frame(.,
mapply(function(df, scale_names, score){
df %>% data.frame() %>%
dplyr::select(!!!syms(scale_names)) %>%
filter(!is.na(.)) %>%
mutate(max_score = apply(., 1, max),
max_score_name = apply(., 1, function(x) x[which(x == max(x))] %>% names() %>% sample(., 1))
) %>%
dplyr::summarize(clinical = sum(max_score >= score),
total = n(),
percent_identified = (clinical/total) * 100) %>% data.frame() #table_print(caption = "", colnames = colnames(.))
}, list(classification_data, classification_data, subscale_df), scale_names, list(8,8,5)
)
) %>% rownames_to_column(var = "rownames") %>%
mutate(rownames = c("Sensory Difference", "Meets Clinical Criteria", "Percent")) %>%
table_print(colnames = c("", "Total", "Domain", "Modality", "Subscale"), caption = table_counter_func("Participants Meeting Clinically Significant Criteria (n=59) for SAND Scales"))
| Total | Domain | Modality | Subscale | |
|---|---|---|---|---|
| Sensory Difference | 59 | 59 | 59 | 59 |
| Meets Clinical Criteria | 59 | 59 | 59 | 59 |
| Percent | 100 | 100 | 100 | 100 |
All individuals were at, or above the Clinically Significant cutoff criteria on all SAND scales. Thus, all individuals would also fall above “Elevated” cutoff scores.
One hundred percent (100%) of individuals with autism spectrum disorder met Clinically Significant SAND classification criteria. The SAND seems to show great sensitivity in identifying individuals with ASD, a disorder for which sensory differences are one defining criterion.
Only partial table shown. Consult Manual for full table.
z_score_table_func("Total", 0:90) %>%
head() %>% # included for public display
table_print(caption = "Total", colnames = colnames(.))
| Score | Z-Score | Percentiles |
|---|---|---|
| 0 | -2.08 | 1.90 |
| 1 | -1.83 | 3.37 |
| 2 | -1.58 | 5.67 |
| 3 | -1.34 | 9.07 |
| 4 | -1.09 | 13.78 |
| 5 | -0.84 | 19.93 |
out <- lapply(1:length(scale_names), function(i) cat(knit_print(z_score_table_func(scale_names[[i]], scale_range[[i]]) %>%
head() %>% # included for public display
table_print(caption= scale_type[i], colnames = colnames(.))), "\n\n"))
| Score | Z-Score | Percentiles |
|---|---|---|
| 0 | -2.01 | 2.21 |
| 1 | -1.64 | 5.08 |
| 2 | -1.26 | 10.35 |
| 3 | -0.89 | 18.77 |
| 4 | -0.51 | 30.45 |
| 5 | -0.14 | 44.58 |
| Score | Z-Score | Percentiles |
|---|---|---|
| 0 | -2.22 | 1.32 |
| 1 | -1.75 | 4.05 |
| 2 | -1.27 | 10.18 |
| 3 | -0.80 | 21.26 |
| 4 | -0.32 | 37.32 |
| 5 | 0.15 | 55.99 |
| Score | Z-Score | Percentiles |
|---|---|---|
| 0 | -2.25 | 1.23 |
| 1 | -1.57 | 5.85 |
| 2 | -0.89 | 18.75 |
| 3 | -0.21 | 41.82 |
| 4 | 0.47 | 68.22 |
| 5 | 1.15 | 87.58 |
Only partial table shown. Consult Manual for full table.
total_probability_table <- prediction_function(sand_all, "Total", glm_sensory_difference_probable_total)
modify_table_func(total_probability_table) %>%
head() %>% # included for public display
table_print(caption = "Total", colnames = colnames(.))
| Score | Probabilities | CI Low | CI High |
|---|---|---|---|
| 4 | 4.63 | 1.46 | 13.70 |
| 5 | 6.97 | 2.53 | 17.77 |
| 6 | 10.38 | 4.34 | 22.84 |
| 7 | 15.19 | 7.28 | 29.01 |
| 8 | 21.68 | 11.83 | 36.35 |
| 9 | 29.96 | 18.38 | 44.83 |
out <- lapply(1:length(scale_names), function(i) {
glm_scale_max <- glm(sensory_difference_probable_w_na ~ max_score, data = max_scale_all_func(i), family = binomial)
total_probability_table <- prediction_function(max_scale_all_func(i), "max_score", glm_scale_max)
cat(knit_print(
modify_table_func(total_probability_table) %>%
head() %>% # included for public display
table_print(caption = name_scales[i], colnames = colnames(.))
), "\n\n")
})
| Score | Probabilities | CI Low | CI High |
|---|---|---|---|
| 1 | 3.26 | 1.09 | 9.35 |
| 2 | 6.11 | 2.45 | 14.40 |
| 3 | 11.16 | 5.38 | 21.72 |
| 4 | 19.51 | 11.22 | 31.74 |
| 5 | 31.88 | 21.41 | 44.56 |
| 6 | 47.46 | 35.81 | 59.40 |
| Score | Probabilities | CI Low | CI High |
|---|---|---|---|
| 2 | 4.90 | 1.98 | 11.60 |
| 3 | 10.64 | 5.31 | 20.20 |
| 4 | 21.58 | 13.13 | 33.39 |
| 5 | 38.88 | 27.92 | 51.10 |
| 6 | 59.52 | 47.67 | 70.35 |
| 7 | 77.26 | 66.11 | 85.55 |
| Score | Probabilities | CI Low | CI High |
|---|---|---|---|
| 1 | 3.28 | 1.55 | 6.81 |
| 2 | 11.46 | 6.86 | 18.53 |
| 3 | 33.11 | 24.46 | 43.07 |
| 4 | 65.42 | 55.36 | 74.27 |
| 5 | 87.85 | 80.53 | 92.67 |
| 6 | 96.51 | 92.80 | 98.34 |
max_scale_all_func <- function(i){
sand_all %>%
filter(!is.na(sensory_difference_probable_w_na)) %>%
mutate(max_score = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, max),
max_score_name = dplyr::select(., !!!syms(scale_names[[i]])) %>% apply(., 1, function(x) x[which(x == max(x))] %>% names())) %>%
cbind(., data.frame(stri_list2matrix(.$max_score_name, byrow = TRUE)) %>% setNames(paste0("max_score_name_", seq(1, length(scale_names[[i]]))))
) %>%
dplyr::select(ID, sensory_difference_probable_w_na, max_score, paste0("max_score_name_", seq(1, length(scale_names[[i]])))) %>%
pivot_longer(-c(ID, sensory_difference_probable_w_na, max_score), values_to = "max_score_name") %>%
filter(!is.na(max_score_name))
}
out <- lapply(1:length(scale_names), function(i) {
cat(knit_print(
apply_accuracy_func(max_scale_all_func(i), "max_score") %>%
table_print(caption = name_scales[i], colnames = colnames(.))
))
})
| Score | Sensitivity | Specificity | Total Accuracy |
|---|---|---|---|
| 5 | 97.57 | 70.49 | 92.21 |
| 6 | 97.17 | 70.49 | 91.88 |
| 7 | 93.93 | 83.61 | 91.88 |
| 8 | 91.50 | 90.16 | 91.23 |
| 9 | 87.04 | 91.80 | 87.99 |
| 10 | 84.21 | 96.72 | 86.69 |
| 11 | 80.97 | 98.36 | 84.42 |
| 12 | 76.52 | 98.36 | 80.84 |
| Score | Sensitivity | Specificity | Total Accuracy |
|---|---|---|---|
| 3 | 99.25 | 23.08 | 84.34 |
| 4 | 98.88 | 29.23 | 85.24 |
| 5 | 97.00 | 76.92 | 93.07 |
| 6 | 95.88 | 84.62 | 93.67 |
| 7 | 92.51 | 90.77 | 92.17 |
| 8 | 89.51 | 93.85 | 90.36 |
| 9 | 86.89 | 98.46 | 89.16 |
| 10 | 78.28 | 98.46 | 82.23 |
| Score | Sensitivity | Specificity | Total Accuracy |
|---|---|---|---|
| 3 | 96.88 | 81.97 | 92.78 |
| 4 | 95.02 | 86.07 | 92.55 |
| 5 | 90.34 | 93.44 | 91.20 |
| 6 | 81.62 | 98.36 | 86.23 |