# Libraries
pacman::p_load(knitr, # rendering of .Rmd file
               lme4, # model specification/estimation
               afex, # anova and deriving p-values via LRTs from lmer
               broom.mixed, # extracting data from model fits
               faux, # generate correlated values
               tidyverse, # data wrangling and visualization 
               ggridges, # visualization
               viridis, # color schemes for visualization
               kableExtra, # helps with knitting the .Rmd file
               cowplot, # visualization tool to include multiple plots
               ggrain, # visualization tool for raincloud plots
               dplyr, # data manipulation tools
               here, # controlling directories/reproducibility
               tidyr, # formatting package
               readr, # data importation
               Hmisc, # visualization
               emmeans, # estimation of marginal means and contrasts
               sjstats, # estimation of marginal means and contrasts
               lmerTest, # deriving p-values from lmer
               MuMIn, # ?
               sjPlot, # APA formatting of numerical output
               car, # data wrangling and visualization,
               magrittr, # to use %>%
               ggpubr, # ? (Rose had it in her script)
               broom, # ? (Rose had it in her script)
               Matrix, # model specification/estimation,
               multcomp, # multiple comparison adjustments
               forcats, # organizing visualizations
               gtools, # data manipulation/wrangling
               fuzzySim, # FDR correction,
               ez, # anova
               apaTables, # visualization of anova output
               #renv # package versioning/reproducibility
               )
## 
## Your package installed
## Warning in pacman::p_load(knitr, lme4, afex, broom.mixed, faux, tidyverse, : Failed to install/load:
# Global theme set
set.seed(1234) # set random seed for reproducibility

knitr::opts_chunk$set(echo = TRUE, collapse = TRUE)
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=TRUE, fig.width=10, fig.height=8, fig.fullwidth=TRUE)

plot_theme <- 
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, size = 18), 
        axis.text.x = element_text(size = 13), 
        axis.title.x = element_text(size = 13), 
        axis.text.y = element_text(size = 12), 
        axis.title.y = element_text(size = 13),
        strip.background = element_rect(color="white", fill="white", linewidth = 1.5, linetype="solid"))

options(scipen=999)

Data Manipulations

# Load data
data <- read.csv(here("2025 S3 Data.csv"), stringsAsFactors=TRUE) 

# Format data
data %<>%
  mutate(Participant.ID = factor(Participant.ID),
         lang = factor(Language),
         lang.c = as.numeric(Language)-1.5,
         social.percep = as.numeric(Social.Perception),
         celf.c = as.numeric(scale(CELF.ENG, center = TRUE, scale = FALSE)),
         lang.dev = factor(Lang.Dev.Status),
         lang.dev.c = as.numeric(Lang.Dev.Status) - 1.5,
         age.c = as.numeric(scale(Age, center = TRUE, scale = FALSE)),
         child.ethnicity = factor(Ethnicity),
         child.ethnicity.c = as.numeric(Ethnicity) - 1.5,
         s.w = factor(Strength.Weakness),
         #child.race = factor (child.race),
         #free.red.lunch.c = as.numeric(free.or.reduced.lunch) - 1.5,
         gender.c = as.numeric(Gender) - 1.5)
str(data)
## 'data.frame':    19 obs. of  24 variables:
##  $ Participant.ID   : Factor w/ 19 levels "030422_1M_11",..: 1 2 3 5 4 6 8 13 14 16 ...
##  $ Gender           : Factor w/ 2 levels "Female","Male ": 2 1 1 1 1 2 2 1 1 1 ...
##  $ DOB              : Factor w/ 19 levels "1/20/14","10/21/11",..: 6 13 17 12 9 3 15 18 16 5 ...
##  $ DOT1             : Factor w/ 18 levels "10/3/23","3/10/22",..: 6 6 2 4 3 5 8 17 13 15 ...
##  $ Age              : num  11.34 9.71 10.59 10.74 11.84 ...
##  $ Language         : Factor w/ 2 levels "ENG","ENG/SPAN": 1 1 1 1 1 1 2 1 2 1 ...
##  $ Social.Perception: int  1 1 0 1 1 0 0 0 0 0 ...
##  $ Strength.Weakness: Factor w/ 2 levels "Strength","Weakness": 1 1 2 1 1 2 2 2 2 2 ...
##  $ Statement        : Factor w/ 19 levels "Because is hard to ask people if I could be their friend.",..: 16 19 18 14 9 15 12 6 5 7 ...
##  $ CELF.ENG         : int  108 121 123 123 111 115 132 111 99 123 ...
##  $ CELF.SPAN        : int  NA NA NA NA NA NA 102 NA 75 NA ...
##  $ Lang.Dev.Status  : Factor w/ 2 levels "DLD","TD": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Ethnicity        : Factor w/ 3 levels "","Hispanic/Latino/Spanish",..: 3 3 3 1 3 3 2 3 2 2 ...
##  $ lang             : Factor w/ 2 levels "ENG","ENG/SPAN": 1 1 1 1 1 1 2 1 2 1 ...
##  $ lang.c           : num  -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 0.5 -0.5 0.5 -0.5 ...
##  $ social.percep    : num  1 1 0 1 1 0 0 0 0 0 ...
##  $ celf.c           : num  8.11 21.11 23.11 23.11 11.11 ...
##  $ lang.dev         : Factor w/ 2 levels "DLD","TD": 2 2 2 2 2 2 2 2 2 2 ...
##  $ lang.dev.c       : num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
##  $ age.c            : num  0.5844 -1.043 -0.1608 -0.0128 1.0913 ...
##  $ child.ethnicity  : Factor w/ 3 levels "","Hispanic/Latino/Spanish",..: 3 3 3 1 3 3 2 3 2 2 ...
##  $ child.ethnicity.c: num  1.5 1.5 1.5 -0.5 1.5 1.5 0.5 1.5 0.5 0.5 ...
##  $ s.w              : Factor w/ 2 levels "Strength","Weakness": 1 1 2 1 1 2 2 2 2 2 ...
##  $ gender.c         : num  0.5 -0.5 -0.5 -0.5 -0.5 0.5 0.5 -0.5 -0.5 -0.5 ...

# Notes:
# For gender, female = -0.5 (after centering), male = 0.5 (after centering)
# For language, English = -0.5 (after centering), English/Spanish = 0.5 (after centering)
# For language development status, DLD = -0.5 (after centering), TD = 0.5 (after centering)
# For ethnicity, Hispanic/Latino/Spanish = -0.5 (after centering), Not Hispanic/Latino/Spanish = 0.5 (after centering)
# For social perception, weakness = 0, strength = 1

Participants

Demographics

# Age
data %>%
  distinct(Participant.ID, Age)%>%
  summarise_at('Age', 
               list(~mean(., na.rm=T), 
                    ~sd(., na.rm=T),
                    ~median(., na.rm=T),
                    ~min(., na.rm=T),
                    ~max(., na.rm=T),
                    ~sum(!is.na(.))))%>%
  dplyr::rename("n" = "sum")%>%
  dplyr::select(n, mean, sd, median, min, max)%>%
  mutate(total.n = sum(n))%>%
  knitr::kable()
n mean sd median min max total.n
19 10.75256 1.025912 10.7863 9.120548 12.7863 19

# Gender
data %>%
  distinct(Participant.ID, Gender)%>%
  group_by(Gender)%>%
  summarise(n = n())%>%
  mutate(total.Gender.n = sum(n))%>%
  knitr::kable()
Gender n total.Gender.n
Female 10 19
Male 9 19

# Language
data %>%
  distinct(Participant.ID, lang)%>%
  group_by(lang)%>%
  summarise(n = n())%>%
  mutate(lang.n = sum(n))%>%
  knitr::kable()
lang n lang.n
ENG 15 19
ENG/SPAN 4 19

# Language Development Status
data %>%
  distinct(Participant.ID, Lang.Dev.Status)%>%
  group_by(Lang.Dev.Status)%>%
  summarise(n = n())%>%
  mutate(Lang.Dev.Status.n = sum(n))%>%
  knitr::kable()
Lang.Dev.Status n Lang.Dev.Status.n
DLD 4 19
TD 15 19

# # Race
# data %>%
#   distinct(Participant.ID, child.race)%>%
#   group_by(child.race)%>%
#   summarise(n = n())%>%
#   mutate(total.child.race.n = sum(n))%>%
#   knitr::kable()

# Ethnicity
data %>%
  distinct(Participant.ID, child.ethnicity)%>%
  group_by(child.ethnicity)%>%
  summarise(n = n())%>%
  mutate(total.child.ethnicity.n = sum(n))%>%
  knitr::kable()
child.ethnicity n total.child.ethnicity.n
1 19
Hispanic/Latino/Spanish 7 19
Not Hispanic/Latino/Spanish 11 19

Descriptives

# CELF-4 standard scores
data %>%
  distinct(Participant.ID, CELF.ENG)%>%
  summarise_at('CELF.ENG', 
               list(~mean(., na.rm=T), 
                    ~sd(., na.rm=T),
                    ~median(., na.rm=T),
                    ~min(., na.rm=T),
                    ~max(., na.rm=T),
                    ~sum(!is.na(.))))%>%
  dplyr::rename("n" = "sum")%>%
  dplyr::select(n, mean, sd, median, min, max)%>%
  mutate(total.n = sum(n))%>%
  knitr::kable()
n mean sd median min max total.n
19 99.89474 24.50373 108 46 132 19

Visualizations

ggplot(data, aes(x = factor(s.w), y = CELF.ENG)) +
  geom_boxplot(color = "black", outlier.shape = NA, alpha = 0.3) +  # Single black boxplot per category
  geom_jitter(aes(color = factor(s.w), shape = factor(lang)), width = 0.2, alpha = 0.7) +  # Keep color/shape mapping for points
  scale_color_manual(values = c("Weakness" = "red", "Strength" = "blue")) +  # Red for weakness, blue for strength
  geom_hline(yintercept = 80, linetype = "dashed", color = "gray") +  # Dashed gray line at y = 80
  scale_shape_manual(values = c("ENG" = 16, "ENG/SPAN" = 17), 
                     labels = c("ENG" = "Monolingual", "ENG/SPAN" = "Bilingual")) +  # Shape and labels for language
    scale_y_continuous(breaks = seq(floor(min(data$CELF.ENG)-6), ceiling(max(data$CELF.ENG)+6), by = 10)) +  # Ensure labels at every gridline
labs(x = "Self-Perception of Ability to Make Friends", y = "CELF-4 Standard Score", 
       color = "Self-Perception", shape = "Language Group") +
  theme_minimal()+
  theme(
    axis.title.x = element_text(size = 16, face = "bold", margin = margin(t = 10)),  # Add space between x-axis title and labels
    axis.title.y = element_text(size = 16, face = "bold"),  # Make y-axis title bold and larger
    axis.text = element_text(size = 14),  # Increase font size of axis values
    legend.text = element_text(size = 14),  # Increase font size of legend text
    legend.title = element_text(size = 16, face = "bold")  # Make legend title bold and larger
  )


#legend <- cowplot::get_legend(plot)

#grid.newpage()
#grid.draw(legend)

Analyses

data.m1 <- glm(social.percep ~ celf.c + gender.c + age.c + lang.c + lang.dev.c + child.ethnicity.c, data=data) #converges and all that jazz
tab_model(data.m1, show.se = TRUE,show.ci = FALSE, transform = "exp") #,show.stat = TRUE, show.df = TRUE)
  social.percep
Predictors Estimates std. Error p
(Intercept) 1.69 0.41 0.033
celf c 0.98 0.01 0.067
gender c 1.22 0.29 0.404
age c 0.89 0.10 0.302
lang c 0.61 0.19 0.119
lang dev c 2.55 1.43 0.095
child ethnicity c 0.75 0.16 0.178
Observations 19
R2 0.407

# data.m2 <- lm(celf.c ~ s.w + gender.c + age.c + lang.c + lang.dev.c + child.ethnicity.c, data=data) #converges and all that jazz
# tab_model(data.m2, show.se = TRUE,show.ci = FALSE, transform = "exp") #,show.stat = TRUE, show.df = TRUE)

# Notes:
# For gender, female = -0.5 (after centering), male = 0.5 (after centering)
# For language development status, DLD = -0.5 (after centering), TD = 0.5 (after centering)
# For ethnicity, Hispanic/Latino/Spanish = -0.5 (after centering), Not Hispanic/Latino/Spanish = 0.5 (after centering)
# For social perception, weakness = 0, strength = 1
Summary of the results:

There are no within-subject or within-item measures (only a single summary data point from each participant for each measure), so a LMER is not appropriate; a simple (logistic) linear regression is. When controlling for the other variables (i.e., gender, age, language spoken, language development status, ethnicity), CELF-4 Core Language Scores do not significantly predict the likelihood a child will perceive their social abilities as a strength. The relationship does trend toward significance, suggesting that if the same pattern held with a larger sample size, we might find that higher CELF-4 CLS scores actually predict that a child is more likely to perceive their social abilities as a weakness (not a strength). The value of 0.98 means means that for each one-point increase in CELF-4 CLS, the odds of a child perceiving their social abilities as a strength decrease by about 1.8%. However, this is a relatively small effect. Thus, language ability alone does not appear to be a strong predictor of how social abilities are perceived.