Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  rms,
  mgcv,
  mirt
)

## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from 'package:psych':
## 
##     describe
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## This is mgcv 1.9-3. For overview type 'help("mgcv-package")'.
## Loading required package: stats4
## Loading required package: lattice
## 
## Attaching package: 'mirt'
## 
## The following object is masked from 'package:nlme':
## 
##     fixef

theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

Data

d_diamonds = diamonds

#vocab data
d_vocab_irt = read_rds("data/good_items_fit.rds")
d_vocab_items = d_vocab_irt@Data$data

d_vocab = read_rds("data/vocab study data.rds")

Analysis

Diamonds

#plot quantiles
d_diamonds %>%
  GG_quantiles("price", "carat", method = "Rq")

## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

GG_save("figs/diamond carat quantiles by price.png")

#mean regression as normal
d_diamonds %>%
  ggplot(aes(price, carat)) +
  geom_point(alpha = 0.1) +
  geom_smooth()

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/diamond carat mean regression by price.png")

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

d_diamonds[-c(2, 3, 4)] %>% wtd.cors()

##        carat   depth  table   price       x       y      z
## carat 1.0000  0.0282  0.182  0.9216  0.9751  0.9517 0.9534
## depth 0.0282  1.0000 -0.296 -0.0106 -0.0253 -0.0293 0.0949
## table 0.1816 -0.2958  1.000  0.1271  0.1953  0.1838 0.1509
## price 0.9216 -0.0106  0.127  1.0000  0.8844  0.8654 0.8612
## x     0.9751 -0.0253  0.195  0.8844  1.0000  0.9747 0.9708
## y     0.9517 -0.0293  0.184  0.8654  0.9747  1.0000 0.9520
## z     0.9534  0.0949  0.151  0.8612  0.9708  0.9520 1.0000

#residualize carat by price
gam_fit_spline = gam(carat ~ s(price), data = d_diamonds)
gam_fit_spline %>% summary()

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## carat ~ s(price)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.797940   0.000649    1229   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##           edf Ref.df     F p-value    
## s(price) 8.61   8.95 53509  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.899   Deviance explained = 89.9%
## GCV = 0.022746  Scale est. = 0.022742  n = 53940

#cor equi
gam_fit_spline %>% summary() %>% .$r.sq %>% sqrt()

## [1] 0.948

d_diamonds = d_diamonds %>%
  mutate(
    carat_resid = resid(gam_fit_spline)
  )

#test HS
test_HS(d_diamonds$carat_resid, d_diamonds$price)

#check residuals
d_diamonds %>%
  ggplot(aes(price, carat_resid)) +
  geom_point(alpha = 0.1) +
  geom_smooth()

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/diamond carat residuals by price.png")

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#predict abs residual
lm_fit_resids = lm(abs(carat_resid) ~ price, data = d_diamonds)
lm_fit_resids %>% summary()

## 
## Call:
## lm(formula = abs(carat_resid) ~ price, data = d_diamonds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3288 -0.0405 -0.0123  0.0269  2.6299 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.18e-02   5.90e-04    53.8   <2e-16 ***
## price       1.61e-05   1.05e-07   153.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0976 on 53938 degrees of freedom
## Multiple R-squared:  0.303,  Adjusted R-squared:  0.303 
## F-statistic: 2.34e+04 on 1 and 53938 DF,  p-value: <2e-16

#save preds and
#divide by predicted abs resid to get z scores
d_diamonds = d_diamonds %>%
  mutate(
    carat_resid_abs_pred = predict(lm_fit_resids),
    carat_z = carat_resid / carat_resid_abs_pred
  )

#test HS
test_HS(d_diamonds$carat_z, d_diamonds$price)

#check z scores
d_diamonds %>%
  ggplot(aes(price, carat_z)) +
  geom_point(alpha = 0.1) +
  geom_smooth()

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/diamond carat z scores by price.png")

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#quantile regression again
d_diamonds %>%
  GG_quantiles("price", "carat_z", method = "Rq")

## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

GG_save("figs/diamond carat z score quantiles by price.png")

#convert to real z-scores
d_diamonds = d_diamonds %>%
  mutate(
    carat_z_real = carat_z %>% standardize()
  )

Vocab

#load vocab data
d_vocab$vocab_sumscore = d_vocab_items %>% rowSums()

#plot quantiles
d_vocab %>%
  GG_quantiles("age", "vocab_sumscore")

d_vocab %>%
  GG_quantiles("age", "vocab_sumscore", method = "Rq")

## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

#try the g scores
d_vocab$vocab_g = fscores(d_vocab_irt)[, 1] %>% standardize()

d_vocab %>%
  GG_quantiles("age", "vocab_g") +
  labs(
    title = "Vocab g scores by age, quantile regression",
    y = "Vocab g scores (IRT)",
    x = "Age"
  )

GG_save("figs/vocab g scores quantiles by age.png")

#age norms
vocab_normed = make_norms(
  d_vocab$vocab_g,
  d_vocab$age
)

## Detected linear effect of age on the score (p = <0.001***). Model used.

## Detected variance effect of age on the score (p = <0.001***). Model used.

vocab_normed$data

apply_norms(
  -1,
  20,
  prior_norms = vocab_normed
)

##   -1 
## 92.1

#test HS
test_HS(
  d_vocab$vocab_g,
  d_vocab$age
)

Meta

#versions
write_sessioninfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] mirt_1.45.1           lattice_0.22-7        mgcv_1.9-3           
##  [4] nlme_3.1-168          rms_8.0-0             Hmisc_5.2-4          
##  [7] kirkegaard_2025-10-17 robustbase_0.99-6     psych_2.5.6          
## [10] assertthat_0.2.1      weights_1.1.2         magrittr_2.0.4       
## [13] lubridate_1.9.4       forcats_1.0.1         stringr_1.5.2        
## [16] dplyr_1.1.4           purrr_1.1.0           readr_2.1.5          
## [19] tidyr_1.3.1           tibble_3.3.0          ggplot2_4.0.0        
## [22] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3   rstudioapi_0.17.1    audio_0.1-11        
##   [4] jsonlite_2.0.0       shape_1.4.6.1        TH.data_1.1-4       
##   [7] jomo_2.7-6           farver_2.1.2         nloptr_2.2.1        
##  [10] rmarkdown_2.30       ragg_1.5.0           vctrs_0.6.5         
##  [13] minqa_1.2.8          base64enc_0.1-3      htmltools_0.5.8.1   
##  [16] polspline_1.1.25     broom_1.0.10         Formula_1.2-5       
##  [19] mitml_0.4-5          dcurver_0.9.2        sass_0.4.10         
##  [22] parallelly_1.45.1    bslib_0.9.0          htmlwidgets_1.6.4   
##  [25] plyr_1.8.9           sandwich_3.1-1       testthat_3.2.3      
##  [28] zoo_1.8-14           cachem_1.1.0         mime_0.13           
##  [31] lifecycle_1.0.4      iterators_1.0.14     pkgconfig_2.0.3     
##  [34] Matrix_1.7-4         R6_2.6.1             fastmap_1.2.0       
##  [37] shiny_1.11.1         rbibutils_2.3        future_1.67.0       
##  [40] digest_0.6.37        colorspace_2.1-2     qgam_2.0.0          
##  [43] textshaping_1.0.4    vegan_2.7-2          labeling_0.4.3      
##  [46] progressr_0.16.0     timechange_0.3.0     gdata_3.0.1         
##  [49] compiler_4.5.1       doParallel_1.0.17    withr_3.0.2         
##  [52] htmlTable_2.4.3      S7_0.2.0             backports_1.5.0     
##  [55] R.utils_2.13.0       pan_1.9              MASS_7.3-65         
##  [58] quantreg_6.1         sessioninfo_1.2.3    GPArotation_2025.3-1
##  [61] gtools_3.9.5         permute_0.9-8        tools_4.5.1         
##  [64] foreign_0.8-90       httpuv_1.6.16        future.apply_1.20.0 
##  [67] clipr_0.8.0          nnet_7.3-20          R.oo_1.27.1         
##  [70] glue_1.8.0           promises_1.3.3       grid_4.5.1          
##  [73] checkmate_2.3.3      cluster_2.1.8.1      generics_0.1.4      
##  [76] gtable_0.3.6         tzdb_0.5.0           R.methodsS3_1.8.2   
##  [79] data.table_1.17.8    hms_1.1.3            Deriv_4.2.0         
##  [82] foreach_1.5.2        pillar_1.11.1        later_1.4.4         
##  [85] splines_4.5.1        survival_3.8-3       SparseM_1.84-2      
##  [88] tidyselect_1.2.1     pbapply_1.7-4        knitr_1.50          
##  [91] reformulas_0.4.1     gridExtra_2.3        xfun_0.53           
##  [94] brio_1.1.5           DEoptimR_1.1-4       stringi_1.8.7       
##  [97] yaml_2.3.10          boot_1.3-32          evaluate_1.0.5      
## [100] codetools_0.2-20     beepr_2.0            cli_3.6.5           
## [103] rpart_4.1.24         xtable_1.8-4         systemfonts_1.3.1   
## [106] Rdpack_2.6.4         jquerylib_0.1.4      Rcpp_1.1.0          
## [109] globals_0.18.0       parallel_4.5.1       MatrixModels_0.5-4  
## [112] lme4_1.1-37          listenv_0.9.1        glmnet_4.1-10       
## [115] mvtnorm_1.3-3        SimDesign_2.21       scales_1.4.0        
## [118] rlang_1.1.6          multcomp_1.4-28      mnormt_2.1.1        
## [121] mice_3.18.0

#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}

Norming a scale

19 October, 2025

Init

Functions

Data

Analysis

Diamonds

Vocab

Meta