TL;DR, you can’t just convert centiles at 1:1 ratio. Assume a .80 true correlation, free from measurement error and range problems, so we are looking at latent scores here.
I did not bother with the scales, since these don’t matter for the point being made. I also did not bother with scale ranges, but these just make my point stronger since they cause problems near the ceiling.
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
theme_set(theme_bw())
#sim data
set.seed(1)
d = MASS::mvrnorm(n = 10e3,
Sigma = matrix(c(1, .8, .8, 1), nrow = 2),
mu = c(0, 0)) %>%
set_colnames(c("X", "Y")) %>%
as_tibble() %>%
mutate(
X_centile = ecdf(X)(X),
Y_centile = ecdf(Y)(Y)
)
#plot
#red line: slope = 1
ggplot(d, aes(X, Y)) +
geom_point() +
geom_smooth() +
geom_abline(intercept = 0, slope = 1, color = "red") +
ggtitle("Centiles of X and Y, which are r = .80\nRed line: slope = 1")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#centiles
ggplot(d, aes(X_centile, Y_centile)) +
geom_point() +
geom_smooth() +
geom_abline(intercept = 0, slope = 1, color = "red") +
ggtitle("Centiles of X and Y, which are r = .80\nRed line: slope = 1") +
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#predicted values
preds = tibble(
X = seq(1, 3, by = .1),
X_centile = ecdf(d$X)(X)
)
#add predictions
#z score model
mod_Y = ols(Y ~ X, data = d)
mod_Y
## Linear Regression Model
##
## ols(formula = Y ~ X, data = d)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 10000 LR chi2 10563.05 R2 0.652
## sigma0.5966 d.f. 1 R2 adj 0.652
## d.f. 9998 Pr(> chi2) 0.0000 g 0.924
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.636473 -0.401585 -0.003439 0.396455 2.352340
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0036 0.0060 -0.60 0.5488
## X 0.8099 0.0059 136.94 <0.0001
##
#centile model: DO NOT USE IRL
mod_Y_centile = ols(Y_centile ~ X_centile, data = d)
mod_Y_centile
## Linear Regression Model
##
## ols(formula = Y_centile ~ X_centile, data = d)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 10000 LR chi2 9867.72 R2 0.627
## sigma0.1763 d.f. 1 R2 adj 0.627
## d.f. 9998 Pr(> chi2) 0.0000 g 0.264
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.6684154 -0.1181346 -0.0004279 0.1151591 0.6701526
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1040 0.0035 29.50 <0.0001
## X_centile 0.7920 0.0061 129.70 <0.0001
##
#the predictions
preds$pred_Y = predict(mod_Y, newdata = preds)
preds$pred_Y_implied_centile = ecdf(d$Y)(preds$pred_Y)
preds$pred_Y_centile = predict(mod_Y_centile, newdata = preds)
#print results
preds %>% print(n=Inf)
## # A tibble: 21 x 5
## X X_centile pred_Y pred_Y_implied_centile pred_Y_centile
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.838 0.806 0.790 0.767
## 2 1.1 0.860 0.887 0.812 0.786
## 3 1.2 0.882 0.968 0.833 0.802
## 4 1.3 0.900 1.05 0.850 0.817
## 5 1.4 0.915 1.13 0.868 0.829
## 6 1.5 0.929 1.21 0.885 0.840
## 7 1.6 0.942 1.29 0.901 0.850
## 8 1.7 0.955 1.37 0.916 0.860
## 9 1.8 0.963 1.45 0.928 0.867
## 10 1.9 0.971 1.54 0.937 0.873
## 11 2 0.978 1.62 0.946 0.878
## 12 2.1 0.983 1.70 0.956 0.882
## 13 2.2 0.988 1.78 0.962 0.886
## 14 2.3 0.991 1.86 0.967 0.889
## 15 2.4 0.994 1.94 0.972 0.891
## 16 2.5 0.996 2.02 0.976 0.893
## 17 2.6 0.996 2.10 0.980 0.893
## 18 2.7 0.997 2.18 0.983 0.894
## 19 2.8 0.998 2.26 0.986 0.894
## 20 2.9 0.999 2.35 0.990 0.895
## 21 3 0.999 2.43 0.992 0.895
Conclusions: