A ideia é fazer uma partição dos dados baseados nas diferenças entre idade e escolaridade. Para este propósito, iremos utilizar uma árvore de decisão condicional, que possui a vantagem de fazer estas partições simultaneamente e em vários nÃveis hierárquicos.
Primeiro, vamos escrever as sintaxes para obtermos a média e o DP robustos, aparados em 20%, como sugerido por Rand Wilcox.
# Funcao pra calcular a media aparada em 20%
tmean <- function(x,tr=.2,na.rm=FALSE,STAND=NULL){
if(na.rm)x<-x[!is.na(x)]
val<-mean(x,tr)
val
}
# Funcao pra calcular o desvio padrao (DP) aparado em 20%
sd_trim <- function(x,trim=0.2, const=TRUE){
# trimmed sd, where x is a matrix (column-wise)
x <- as.matrix(x)
if (const){
if (trim==0.1){const <- 0.7892}
else if (trim==0.2){const <- 0.6615}
else {warning("Voce especificou a constante correta?")}
}
else{const <- 1}
m <- apply(x,2,mean,trim)
res <- x-rep(1,nrow(x))%*%t(m)
qu <- apply(abs(res),2,quantile,1-trim)
sdtrim <- apply(matrix(res[t(abs(t(res))<=qu)]^2,ncol=ncol(x),byrow=FALSE),2,sum)
sdtrim <- sqrt(sdtrim/((nrow(x)*(1-trim)-1)))/const
return(sdtrim)
}
Vamos selecionar o banco de dados.
# No PC Avell
# setwd("C:/Dropbox/Laboratorio/Pedro Brandao/PDCRS")
# No Mac M1
setwd("~/Dropbox/Laboratorio/Pedro Brandao/PDCRS")
# Le o banco .sav exportado pelo SurveyMonkey
#dataset = haven::read_sav("normas_pdcrs_2021.sav")
# Limpa os nomes das variaveis
#dataset <- tibble::as_tibble(dataset, .name_repair = janitor::make_clean_names)
# Exporta para o formato .csv
#readr::write_csv(dataset, file ="dados.csv")
dados <- readr::read_csv("dados.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## id = col_character(),
## data_nasc = col_character(),
## pesquisador = col_character(),
## cidade = col_character(),
## estado = col_character(),
## nome = col_character(),
## idade = col_character(),
## naturalidade = col_character(),
## estado_naturalidade = col_character(),
## data_coleta = col_character(),
## exclu03_recusa_tcle = col_logical(),
## exclu10_afasia = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
#colnames(dados)
#dados %>% glimpse
# Transforma a idade em variavel numerica
dados$idade <- as.numeric(dados$idade)
## Warning: NAs introduzidos por coerção
# Plota a distribuicao da idade
hist(dados$idade, breaks = 50)
# Plota a distribuicao da escolaridade
hist(dados$escolaridade, breaks = 50)
# Plota a distribuicao da escolaridade
hist(dados$pdcrs_total_score, breaks = 50)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Exclui dados com PD-CRS < 30
dados <- dados %>% filter(pdcrs_total_score > 29)
A árvore de decisão condicional pode gerar uma partição com overfit, mudando quando os dados são atualizados. Por isso, faremos uma cross-validation. Os resultados apresentados em cada nodo (mu e sd) poderão ser utilizados no site https://sapsi.shinyapps.io/bayesNeuro/ para se estimar o escore z e percentil, bem como calcular a probabidade do caso ser estatisticamente diferente do grupo controle.
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
# Seleciona apenas as variaveis de interesse
df <- dados %>% select(pdcrs_total_score, idade, escolaridade) %>%
na.omit()
# Renomeia a variavel predita pra padronizar a sintaxe
names(df) <- c("score", "idade", "escolaridade")
# Transforma as variaveis integrais em numericas
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
# Arvores condicionais com k-fold cross-validation (sem overfit)
model <- caret::train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
plot(model$finalModel)
# Identifica o nodo final para cada observacao
nodes <- where(model$finalModel)
# Calcula as estatisticas e modelos necessarios para a funcao da tabela normativa
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
sum_stats[, order(sum_stats[1,])]
## 12 11 16 3 15 7 17 6
## mu 55.28571 66.27273 67.20000 77.09091 79.16418 85.05128 90.55263 93.3038
## sd 12.94514 10.74091 14.46472 12.15509 18.04338 13.41397 14.35281 11.8870
## 8
## mu 95.41237
## sd 11.59273
# Calcula a distribuicao empirica (ECDF) da variavel
ecdfs <- lapply(split(df, nodes), function(x)
ecdf(x$score))
# Regressao linear para cada nodo
reg_models <- lapply(split(df, nodes), function(x)
lm(score ~ idade + escolaridade, data=x))
# Define a funcao da tabela normativa
# O argumento 'case' precisa ser um data.frame com 3 colunas: 'score', 'escolaridade' e 'idade'
norm_table <- function(case, method='stats') {
if (!any(class(case) == 'data.frame') | (nrow(case) == 0) | (ncol(case) != 3) |
!all(names(case) %in% c('score', 'idade', 'escolaridade'))) {
stop('"case" precisa ser um objeto data.frame com pelo menos uma observacao e tres colunas, "score", "escolaridade" e "idade"')
}
node <- where(model$finalModel, case)
if (method == 'stats'){
out <- lapply(1:nrow(case), function(x) {
data.frame(
Escore=case$score[x],
Z=(case$score[x] - sum_stats[1, as.character(node[x])])/sum_stats[2, as.character(node[x])],
Percentil.Normal=pnorm((case$score[x] - sum_stats[1, as.character(node[x])])/sum_stats[2, as.character(node[x])]),
Percentil.EmpÃrico=ecdfs[[as.character(node[x])]](case$score[x])
)
})
do.call(rbind, out)
} else if (method == 'lm') {
out <- lapply(1:nrow(case), function(x) {
data.frame(
Escore=case$score[x],
Z=(case$score[x] - predict(reg_models[[as.character(node[x])]], case[x, ]))/
sigma(reg_models[[as.character(node[x])]]),
Percentil.Normal=pnorm((case$score[x] - predict(reg_models[[as.character(node[x])]], case[x, ]))/
sigma(reg_models[[as.character(node[x])]])),
Percentil.EmpÃrico=ecdf(residuals(reg_models[[as.character(node[x])]]))(case$score[x] - predict(reg_models[[as.character(node[x])]], case[x, ]))
)
})
do.call(rbind, out)
} else {
stop('Metodo deve ser "stats" ou "lm"')
}
}
# Testa a funcao
# A funcao funciona com observacoes multiplas simultaneamente!
test.df <- df[1:10,]
# Mostra os 10 primeiros casos
norm_table(test.df, method='stats')
| Escore | Z | Percentil.Normal | Percentil.EmpÃrico |
|---|---|---|---|
| 106 | 1.0680744 | 0.8572565 | 0.8914729 |
| 105 | 0.9839489 | 0.8374297 | 0.8682171 |
| 113 | 1.6569528 | 0.9512355 | 0.9689922 |
| 97 | 0.3109449 | 0.6220787 | 0.6666667 |
| 93 | -0.2080934 | 0.4175780 | 0.4339623 |
| 85 | -0.6985611 | 0.2424132 | 0.2480620 |
| 98 | 0.2232113 | 0.5883145 | 0.6163522 |
| 94 | 0.0585684 | 0.5233521 | 0.5503876 |
| 103 | 0.8156979 | 0.7926635 | 0.7984496 |
| 88 | -0.6393981 | 0.2612820 | 0.2452830 |
norm_table(test.df, method='lm')
| Escore | Z | Percentil.Normal | Percentil.EmpÃrico |
|---|---|---|---|
| 106 | 1.1134050 | 0.8672327 | 0.8914729 |
| 105 | 0.9761499 | 0.8355049 | 0.8139535 |
| 113 | 1.7220651 | 0.9574711 | 0.9612403 |
| 97 | 0.3656677 | 0.6426935 | 0.6511628 |
| 93 | -0.2815953 | 0.3891269 | 0.3836478 |
| 85 | -0.6313153 | 0.2639172 | 0.2325581 |
| 98 | 0.1625189 | 0.5645514 | 0.5283019 |
| 94 | 0.0473133 | 0.5188682 | 0.4961240 |
| 103 | 0.8772385 | 0.8098214 | 0.7906977 |
| 88 | -0.7257096 | 0.2340084 | 0.2201258 |
Dentro de cada nodo da árvore de decisão, obtida a partir dos dados score, idade e escolaridade, iremos obter a técnica de expansão de Taylor. Para mais informações consultar http://www.psychometrica.de/cNorm_en.html
library(cNORM)
## Good morning star-shine, cNORM says 'Hello!'
##
## Attaching package: 'cNORM'
## The following object is masked from 'package:ggplot2':
##
## derive
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
# Curvas percentilicas observadas e preditas
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.437
## Final solution: 5 terms
## R-Square Adj. = 0.98337
## Final regression model: raw ~ L1A3 + L3A1 + L3A4 + L4A2 + L4A3
## Regression function: raw ~ 15.01762369 + (2.249262245e-06*L1A3) + (7.315973702e-06*L3A1) + (-1.090520274e-11*L3A4) + (-2.304850546e-09*L4A2) + (2.582422385e-11*L4A3)
## Raw Score RMSE = 2.2158
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
# Cross-validation pra obter o N de termos que sera usado na linha abaixo
#cnorm.cv(m_norm$data)
# Calcula as normas cNORM usando o escore padrão WESCHLER (média 10 e DP 3)
m_norm <- cnorm(raw = df$score, group = node_mean, terms=5, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.437
## User specified solution: 5 terms
## R-Square Adj. = 0.983176
## Final regression model: raw ~ L2 + A4 + L1A2 + L1A4 + L2A2
## Regression function: raw ~ 14.53483583 + (0.1160882073*L2) + (4.037271845e-07*A4) + (0.001264144226*L1A2) + (-6.942264164e-08*L1A4) + (-2.310681602e-05*L2A2)
## Raw Score RMSE = 2.22867
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
# Mostra o R2 para cada numero de preditor
plotSubset(m_norm)
# Plota os escores observados vs. ajustados
plotRaw(m_norm)
# Plota os escores observados vs. ajustados pela norma
plotNorm(m_norm)
# Testa para o sujeito 1
predictNorm(raw = df[1,1][[1]], A=predict(model$finalModel, df[1, ])[[1]], model=m_norm)
## [1] 13.25828
# Testa para o sujeito 142
predictNorm(raw = df[142,1][[1]], A=predict(model$finalModel, df[142, ])[[1]], model=m_norm)
## [1] 7.41512
# Testa a funcao
normTable(predict(model$finalModel, df[142, ])[[1]], model = m_norm)
| norm | raw | percentile |
|---|---|---|
| 2.5 | 32.00000 | 0.6209665 |
| 2.8 | 32.00000 | 0.8197536 |
| 3.1 | 32.00000 | 1.0724110 |
| 3.4 | 32.00000 | 1.3903448 |
| 3.7 | 32.00000 | 1.7864421 |
| 4.0 | 32.89580 | 2.2750132 |
| 4.3 | 34.00839 | 2.8716560 |
| 4.6 | 35.12840 | 3.5930319 |
| 4.9 | 36.25584 | 4.4565463 |
| 5.2 | 37.39070 | 5.4799292 |
| 5.5 | 38.53299 | 6.6807201 |
| 5.8 | 39.68270 | 8.0756659 |
| 6.1 | 40.83983 | 9.6800485 |
| 6.4 | 42.00439 | 11.5069670 |
| 6.7 | 43.17637 | 13.5666061 |
| 7.0 | 44.35577 | 15.8655254 |
| 7.3 | 45.54260 | 18.4060125 |
| 7.6 | 46.73685 | 21.1855399 |
| 7.9 | 47.93853 | 24.1963652 |
| 8.2 | 49.14763 | 27.4253118 |
| 8.5 | 50.36415 | 30.8537539 |
| 8.8 | 51.58810 | 34.4578258 |
| 9.1 | 52.81947 | 38.2088578 |
| 9.4 | 54.05827 | 42.0740291 |
| 9.7 | 55.30448 | 46.0172163 |
| 10.0 | 56.55813 | 50.0000000 |
| 10.3 | 57.81919 | 53.9827837 |
| 10.6 | 59.08768 | 57.9259709 |
| 10.9 | 60.36359 | 61.7911422 |
| 11.2 | 61.64693 | 65.5421742 |
| 11.5 | 62.93769 | 69.1462461 |
| 11.8 | 64.23588 | 72.5746882 |
| 12.1 | 65.54149 | 75.8036348 |
| 12.4 | 66.85452 | 78.8144601 |
| 12.7 | 68.17497 | 81.5939875 |
| 13.0 | 69.50285 | 84.1344746 |
| 13.3 | 70.83816 | 86.4333939 |
| 13.6 | 72.18088 | 88.4930330 |
| 13.9 | 73.53103 | 90.3199515 |
| 14.2 | 74.88861 | 91.9243341 |
| 14.5 | 76.25361 | 93.3192799 |
| 14.8 | 77.62603 | 94.5200708 |
| 15.1 | 79.00587 | 95.5434537 |
| 15.4 | 80.39314 | 96.4069681 |
| 15.7 | 81.78784 | 97.1283440 |
| 16.0 | 83.18995 | 97.7249868 |
| 16.3 | 84.59950 | 98.2135579 |
| 16.6 | 86.01646 | 98.6096552 |
| 16.9 | 87.44085 | 98.9275890 |
| 17.2 | 88.87266 | 99.1802464 |
| 17.5 | 90.31190 | 99.3790335 |
# Monta a funcao de predicao para cada nodo da arvore
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
# Monta as tabelas normativas, uma para cada nodo da arvore
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
As normas podem ser interpretadas usando a calculadora de conversão psicométrica http://www.psychometrica.de/normwertrechner_en.html
Faixas dos escores padrões: 1 a 4 = Muito baixo; 5 a 7 = Baixo; 8 a 13 = Médio; 14 a 16 = Alto; 17 a 19 = Muito alto.
Aqui é importante observar a inequalidade de Jensen: a média de uma variável aleatória transformada é sempre maior ou igual à transformação da média da variável aleatória. Como as distribuições não são simétricas, a média do resultado da aplicação do cNORM será, quase sempre, diferente da média empÃrica, mesmo a robusta. O teorema é uma lei da probabilidade que diz o seguinte: f(E(x)) <= E(f(x)). Como o cNORM faz uma transformação normal nos escores brutos para ajustar o modelo, o valor esperado das normas vai ser sempre maior que o valor esperado nos escores brutos.
# Mostra as tabelas normativas para cada nodo da árvore de decisão condicional, usando a escala padrão "Weschler" com média 10 e DP 3.
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 12 11 16 3 15 7 17 6
## mu 55.28571 66.27273 67.20000 77.09091 79.16418 85.05128 90.55263 93.3038
## sd 12.94514 10.74091 14.46472 12.15509 18.04338 13.41397 14.35281 11.8870
## 8
## mu 95.41237
## sd 11.59273
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 32 31 0.1349898
## 2 2 32 31 0.3830381
## 3 3 32 32 0.9815329
## 4 4 33 36 2.2750132
## 5 5 37 39 4.7790352
## 6 6 40 43 9.1211220
## 7 7 44 47 15.8655254
## 8 8 48 51 25.2492538
## 9 9 52 56 36.9441340
## 10 10 57 60 50.0000000
## 11 11 61 64 63.0558660
## 12 12 65 69 74.7507462
## 13 13 70 73 84.1344746
## 14 14 74 78 90.8788780
## 15 15 79 82 95.2209648
## 16 16 83 87 97.7249868
## 17 17 88 92 99.0184671
## 18 18 93 97 99.6169619
## 19 19 98 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 32 31 0.1349898
## 2 2 32 34 0.3830381
## 3 3 35 38 0.9815329
## 4 4 39 43 2.2750132
## 5 5 44 47 4.7790352
## 6 6 48 51 9.1211220
## 7 7 52 56 15.8655254
## 8 8 57 60 25.2492538
## 9 9 61 65 36.9441340
## 10 10 66 69 50.0000000
## 11 11 70 74 63.0558660
## 12 12 75 78 74.7507462
## 13 13 79 83 84.1344746
## 14 14 84 87 90.8788780
## 15 15 88 92 95.2209648
## 16 16 93 97 97.7249868
## 17 17 98 102 99.0184671
## 18 18 103 106 99.6169619
## 19 19 107 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 32 31 0.1349898
## 2 2 32 35 0.3830381
## 3 3 36 40 0.9815329
## 4 4 41 44 2.2750132
## 5 5 45 49 4.7790352
## 6 6 50 53 9.1211220
## 7 7 54 58 15.8655254
## 8 8 59 62 25.2492538
## 9 9 63 67 36.9441340
## 10 10 68 71 50.0000000
## 11 11 72 76 63.0558660
## 12 12 77 81 74.7507462
## 13 13 82 85 84.1344746
## 14 14 86 90 90.8788780
## 15 15 91 94 95.2209648
## 16 16 95 99 97.7249868
## 17 17 100 104 99.0184671
## 18 18 105 108 99.6169619
## 19 19 109 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 34 38 0.1349898
## 2 2 39 43 0.3830381
## 3 3 44 47 0.9815329
## 4 4 48 52 2.2750132
## 5 5 53 57 4.7790352
## 6 6 58 62 9.1211220
## 7 7 63 67 15.8655254
## 8 8 68 71 25.2492538
## 9 9 72 76 36.9441340
## 10 10 77 81 50.0000000
## 11 11 82 85 63.0558660
## 12 12 86 90 74.7507462
## 13 13 91 94 84.1344746
## 14 14 95 99 90.8788780
## 15 15 100 103 95.2209648
## 16 16 104 107 97.7249868
## 17 17 108 112 99.0184671
## 18 18 113 116 99.6169619
## 19 19 117 NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 36 40 0.1349898
## 2 2 41 45 0.3830381
## 3 3 46 50 0.9815329
## 4 4 51 55 2.2750132
## 5 5 56 59 4.7790352
## 6 6 60 64 9.1211220
## 7 7 65 69 15.8655254
## 8 8 70 74 25.2492538
## 9 9 75 78 36.9441340
## 10 10 79 83 50.0000000
## 11 11 84 88 63.0558660
## 12 12 89 92 74.7507462
## 13 13 93 96 84.1344746
## 14 14 97 101 90.8788780
## 15 15 102 105 95.2209648
## 16 16 106 109 97.7249868
## 17 17 110 114 99.0184671
## 18 18 115 118 99.6169619
## 19 19 119 NA 99.8650102
##
## [[6]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 41 45 0.1349898
## 2 2 46 51 0.3830381
## 3 3 52 56 0.9815329
## 4 4 57 61 2.2750132
## 5 5 62 66 4.7790352
## 6 6 67 71 9.1211220
## 7 7 72 75 15.8655254
## 8 8 76 80 25.2492538
## 9 9 81 85 36.9441340
## 10 10 86 89 50.0000000
## 11 11 90 93 63.0558660
## 12 12 94 98 74.7507462
## 13 13 99 102 84.1344746
## 14 14 103 106 90.8788780
## 15 15 107 110 95.2209648
## 16 16 111 114 97.7249868
## 17 17 115 117 99.0184671
## 18 18 118 121 99.6169619
## 19 19 122 NA 99.8650102
##
## [[7]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 45 50 0.1349898
## 2 2 51 55 0.3830381
## 3 3 56 60 0.9815329
## 4 4 61 65 2.2750132
## 5 5 66 70 4.7790352
## 6 6 71 75 9.1211220
## 7 7 76 80 15.8655254
## 8 8 81 84 25.2492538
## 9 9 85 89 36.9441340
## 10 10 90 93 50.0000000
## 11 11 94 97 63.0558660
## 12 12 98 101 74.7507462
## 13 13 102 105 84.1344746
## 14 14 106 109 90.8788780
## 15 15 110 112 95.2209648
## 16 16 113 116 97.7249868
## 17 17 117 119 99.0184671
## 18 18 120 122 99.6169619
## 19 19 123 NA 99.8650102
##
## [[8]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 50 55 0.1349898
## 2 2 56 60 0.3830381
## 3 3 61 65 0.9815329
## 4 4 66 70 2.2750132
## 5 5 71 75 4.7790352
## 6 6 76 80 9.1211220
## 7 7 81 84 15.8655254
## 8 8 85 88 25.2492538
## 9 9 89 93 36.9441340
## 10 10 94 97 50.0000000
## 11 11 98 100 63.0558660
## 12 12 101 104 74.7507462
## 13 13 105 108 84.1344746
## 14 14 109 111 90.8788780
## 15 15 112 114 95.2209648
## 16 16 115 117 97.7249868
## 17 17 118 120 99.0184671
## 18 18 121 123 99.6169619
## 19 19 124 NA 99.8650102
##
## [[9]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 53 58 0.1349898
## 2 2 59 63 0.3830381
## 3 3 64 68 0.9815329
## 4 4 69 73 2.2750132
## 5 5 74 78 4.7790352
## 6 6 79 82 9.1211220
## 7 7 83 87 15.8655254
## 8 8 88 91 25.2492538
## 9 9 92 95 36.9441340
## 10 10 96 99 50.0000000
## 11 11 100 102 63.0558660
## 12 12 103 106 74.7507462
## 13 13 107 109 84.1344746
## 14 14 110 112 90.8788780
## 15 15 113 115 95.2209648
## 16 16 116 118 97.7249868
## 17 17 119 120 99.0184671
## 18 18 121 122 99.6169619
## 19 19 123 NA 99.8650102
df <- dados %>% select(pdcrs_s1_memoria_imediata, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.8, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.3546
## Final solution: 3 terms
## R-Square Adj. = 0.99282
## Final regression model: raw ~ L3 + L1A2 + L4A3
## Regression function: raw ~ 0.4002567926 + (2.118088018e-05*L3) + (0.001865092747*L1A2) + (-3.448788794e-10*L4A3)
## Raw Score RMSE = 0.17983
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=4, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.3546
## User specified solution: 4 terms
## R-Square Adj. = 0.993466
## Final regression model: raw ~ L3 + A1 + L1A1 + L4A2
## Regression function: raw ~ -3.201555656 + (0.001647893318*L3) + (0.7461921558*A1) + (0.05315377304*L1A1) + (-1.087742307e-06*L4A2)
## Raw Score RMSE = 0.17142
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 12 3 15 11 7 14 8 5
## mu 6.257143 7.476190 7.500000 7.530612 8.408163 8.916667 9.600000 10.165049
## sd 2.331064 1.765267 2.283946 1.581609 2.126812 2.039105 1.460242 1.744047
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 2 0.1349898
## 2 2 3 2 0.3830381
## 3 3 3 2 0.9815329
## 4 4 3 3 2.2750132
## 5 5 4 3 4.7790352
## 6 6 4 4 9.1211220
## 7 7 5 4 15.8655254
## 8 8 5 5 25.2492538
## 9 9 6 5 36.9441340
## 10 10 6 6 50.0000000
## 11 11 7 7 63.0558660
## 12 12 8 7 74.7507462
## 13 13 8 8 84.1344746
## 14 14 9 9 90.8788780
## 15 15 10 10 95.2209648
## 16 16 11 11 97.7249868
## 17 17 12 11 99.0184671
## 18 18 12 11 99.6169619
## 19 19 12 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 2 0.1349898
## 2 2 3 3 0.3830381
## 3 3 4 3 0.9815329
## 4 4 4 4 2.2750132
## 5 5 5 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 5 15.8655254
## 8 8 6 6 25.2492538
## 9 9 7 6 36.9441340
## 10 10 7 7 50.0000000
## 11 11 8 8 63.0558660
## 12 12 9 8 74.7507462
## 13 13 9 9 84.1344746
## 14 14 10 10 90.8788780
## 15 15 11 11 95.2209648
## 16 16 12 11 97.7249868
## 17 17 12 11 99.0184671
## 18 18 12 11 99.6169619
## 19 19 12 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 2 0.1349898
## 2 2 3 3 0.3830381
## 3 3 4 3 0.9815329
## 4 4 4 4 2.2750132
## 5 5 5 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 5 15.8655254
## 8 8 6 6 25.2492538
## 9 9 7 7 36.9441340
## 10 10 8 7 50.0000000
## 11 11 8 8 63.0558660
## 12 12 9 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 10 90.8788780
## 15 15 11 11 95.2209648
## 16 16 12 11 97.7249868
## 17 17 12 11 99.0184671
## 18 18 12 11 99.6169619
## 19 19 12 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 2 0.1349898
## 2 2 3 3 0.3830381
## 3 3 4 3 0.9815329
## 4 4 4 4 2.2750132
## 5 5 5 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 5 15.8655254
## 8 8 6 6 25.2492538
## 9 9 7 7 36.9441340
## 10 10 8 7 50.0000000
## 11 11 8 8 63.0558660
## 12 12 9 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 10 90.8788780
## 15 15 11 11 95.2209648
## 16 16 12 11 97.7249868
## 17 17 12 11 99.0184671
## 18 18 12 11 99.6169619
## 19 19 12 NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 4 3 0.1349898
## 2 2 4 4 0.3830381
## 3 3 5 4 0.9815329
## 4 4 5 5 2.2750132
## 5 5 6 5 4.7790352
## 6 6 6 6 9.1211220
## 7 7 7 6 15.8655254
## 8 8 7 7 25.2492538
## 9 9 8 8 36.9441340
## 10 10 9 8 50.0000000
## 11 11 9 9 63.0558660
## 12 12 10 9 74.7507462
## 13 13 10 10 84.1344746
## 14 14 11 10 90.8788780
## 15 15 11 11 95.2209648
## 16 16 12 11 97.7249868
## 17 17 12 11 99.0184671
## 18 18 12 11 99.6169619
## 19 19 12 NA 99.8650102
##
## [[6]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 4 3 0.1349898
## 2 2 4 4 0.3830381
## 3 3 5 4 0.9815329
## 4 4 5 5 2.2750132
## 5 5 6 6 4.7790352
## 6 6 7 6 9.1211220
## 7 7 7 7 15.8655254
## 8 8 8 7 25.2492538
## 9 9 8 8 36.9441340
## 10 10 9 9 50.0000000
## 11 11 10 9 63.0558660
## 12 12 10 10 74.7507462
## 13 13 11 10 84.1344746
## 14 14 11 11 90.8788780
## 15 15 12 11 95.2209648
## 16 16 12 11 97.7249868
## 17 17 12 11 99.0184671
## 18 18 12 11 99.6169619
## 19 19 12 NA 99.8650102
##
## [[7]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 4 4 0.1349898
## 2 2 5 4 0.3830381
## 3 3 5 5 0.9815329
## 4 4 6 6 2.2750132
## 5 5 7 6 4.7790352
## 6 6 7 7 9.1211220
## 7 7 8 7 15.8655254
## 8 8 8 8 25.2492538
## 9 9 9 9 36.9441340
## 10 10 10 9 50.0000000
## 11 11 10 10 63.0558660
## 12 12 11 10 74.7507462
## 13 13 11 11 84.1344746
## 14 14 12 11 90.8788780
## 15 15 12 11 95.2209648
## 16 16 12 11 97.7249868
## 17 17 12 11 99.0184671
## 18 18 12 NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[8]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 5 4 0.1349898
## 2 2 5 5 0.3830381
## 3 3 6 5 0.9815329
## 4 4 6 6 2.2750132
## 5 5 7 7 4.7790352
## 6 6 8 7 9.1211220
## 7 7 8 8 15.8655254
## 8 8 9 9 25.2492538
## 9 9 10 9 36.9441340
## 10 10 10 10 50.0000000
## 11 11 11 10 63.0558660
## 12 12 11 11 74.7507462
## 13 13 12 11 84.1344746
## 14 14 12 11 90.8788780
## 15 15 12 11 95.2209648
## 16 16 12 11 97.7249868
## 17 17 12 11 99.0184671
## 18 18 12 NA 99.6169619
## 19 19 NA NA 99.8650102
df <- dados %>% select(pdcrs_s2_nomeacao, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.2046
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 5 linear dependencies found
## Reordering variables and trying again:
## Final solution: 6 terms
## R-Square Adj. = 0.990561
## Final regression model: raw ~ L3 + L4 + L1A1 + L1A2 + L2A3 + L4A2
## Regression function: raw ~ -14.63381923 + (-6.958070739e-05*L3) + (8.740052551e-07*L4) + (0.05777415234*L1A1) + (6.169602511e-05*L1A2) + (-9.8826865e-07*L2A3) + (-1.391149644e-09*L4A2)
## Raw Score RMSE = 0.54408
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=6, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.2046
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 5 linear dependencies found
## Reordering variables and trying again:
## User specified solution: 6 terms
## R-Square Adj. = 0.990076
## Final regression model: raw ~ A1 + L1A2 + L3A1 + L4A2 + A4 + L3A4
## Regression function: raw ~ 11.21566852 + (-0.990397982*A1) + (0.008933345905*L1A2) + (-6.25526876e-05*L3A1) + (1.124598008e-06*L4A2) + (6.136953107e-05*A4) + (-1.139999959e-07*L3A4)
## Raw Score RMSE = 0.64147
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 2 7 5 6
## mu 12.869565 13.333333 16.053254 17.250000
## sd 3.713601 4.596937 2.677319 2.546323
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 2 0.1349898
## 2 2 3 3 0.3830381
## 3 3 4 5 0.9815329
## 4 4 6 6 2.2750132
## 5 5 7 7 4.7790352
## 6 6 8 8 9.1211220
## 7 7 9 9 15.8655254
## 8 8 10 10 25.2492538
## 9 9 11 12 36.9441340
## 10 10 13 13 50.0000000
## 11 11 14 14 63.0558660
## 12 12 15 15 74.7507462
## 13 13 16 16 84.1344746
## 14 14 17 17 90.8788780
## 15 15 18 19 95.2209648
## 16 16 20 19 97.7249868
## 17 17 20 19 99.0184671
## 18 18 20 19 99.6169619
## 19 19 20 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 2 0.1349898
## 2 2 3 4 0.3830381
## 3 3 5 5 0.9815329
## 4 4 6 6 2.2750132
## 5 5 7 7 4.7790352
## 6 6 8 9 9.1211220
## 7 7 10 10 15.8655254
## 8 8 11 11 25.2492538
## 9 9 12 12 36.9441340
## 10 10 13 13 50.0000000
## 11 11 14 14 63.0558660
## 12 12 15 15 74.7507462
## 13 13 16 16 84.1344746
## 14 14 17 18 90.8788780
## 15 15 19 19 95.2209648
## 16 16 20 19 97.7249868
## 17 17 20 19 99.0184671
## 18 18 20 19 99.6169619
## 19 19 20 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 3 0.1349898
## 2 2 4 5 0.3830381
## 3 3 6 7 0.9815329
## 4 4 8 9 2.2750132
## 5 5 10 10 4.7790352
## 6 6 11 12 9.1211220
## 7 7 13 13 15.8655254
## 8 8 14 14 25.2492538
## 9 9 15 15 36.9441340
## 10 10 16 16 50.0000000
## 11 11 17 17 63.0558660
## 12 12 18 18 74.7507462
## 13 13 19 18 84.1344746
## 14 14 19 19 90.8788780
## 15 15 20 19 95.2209648
## 16 16 20 19 97.7249868
## 17 17 20 19 99.0184671
## 18 18 20 19 99.6169619
## 19 19 20 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 3 0.1349898
## 2 2 4 6 0.3830381
## 3 3 7 8 0.9815329
## 4 4 9 10 2.2750132
## 5 5 11 12 4.7790352
## 6 6 13 13 9.1211220
## 7 7 14 15 15.8655254
## 8 8 16 16 25.2492538
## 9 9 17 17 36.9441340
## 10 10 18 17 50.0000000
## 11 11 18 18 63.0558660
## 12 12 19 18 74.7507462
## 13 13 19 18 84.1344746
## 14 14 19 18 90.8788780
## 15 15 19 NA 95.2209648
## 16 16 NA NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA 18 99.6169619
## 19 19 19 NA 99.8650102
df <- dados %>% select(pdcrs_s3_atencao_sust, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.2817
## Final solution: 6 terms
## R-Square Adj. = 0.990223
## Final regression model: raw ~ L3 + L4 + L1A2 + L3A3 + L4A1 + L4A3
## Regression function: raw ~ -6.73027274 + (0.0001118271825*L3) + (-6.332476856e-07*L4) + (0.005635372104*L1A2) + (-2.680859514e-07*L3A3) + (-1.131888679e-07*L4A1) + (3.074033824e-09*L4A3)
## Raw Score RMSE = 0.22253
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=6, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.2817
## User specified solution: 6 terms
## R-Square Adj. = 0.990672
## Final regression model: raw ~ L3 + A3 + L1A2 + L2A3 + L4A1 + L4A2
## Regression function: raw ~ -3.646073729 + (0.008202672499*L3) + (0.004062596245*A3) + (0.02513164717*L1A2) + (-0.0002286916285*L2A3) + (-0.0001323301802*L4A1) + (1.235821613e-05*L4A2)
## Raw Score RMSE = 0.21735
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 6 10 5 9 13 12 2
## mu 4.588235 6.450000 7.000000 7.979452 8.363636 9.090909 9.297710
## sd 3.841553 3.001343 2.785559 2.272374 2.095587 1.258857 1.218698
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 -1 0.9815329
## 4 4 0 -1 2.2750132
## 5 5 0 -1 4.7790352
## 6 6 0 0 9.1211220
## 7 7 1 1 15.8655254
## 8 8 2 2 25.2492538
## 9 9 3 4 36.9441340
## 10 10 5 5 50.0000000
## 11 11 6 6 63.0558660
## 12 12 7 7 74.7507462
## 13 13 8 8 84.1344746
## 14 14 9 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 0 0.9815329
## 4 4 1 1 2.2750132
## 5 5 2 2 4.7790352
## 6 6 3 3 9.1211220
## 7 7 4 4 15.8655254
## 8 8 5 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 6 50.0000000
## 11 11 7 7 63.0558660
## 12 12 8 8 74.7507462
## 13 13 9 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 NA 95.2209648
## 16 16 NA NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 0 0.3830381
## 3 3 1 0 0.9815329
## 4 4 1 1 2.2750132
## 5 5 2 2 4.7790352
## 6 6 3 3 9.1211220
## 7 7 4 4 15.8655254
## 8 8 5 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 7 50.0000000
## 11 11 8 7 63.0558660
## 12 12 8 8 74.7507462
## 13 13 9 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 NA 95.2209648
## 16 16 NA NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 0 0.1349898
## 2 2 1 1 0.3830381
## 3 3 2 2 0.9815329
## 4 4 3 3 2.2750132
## 5 5 4 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 5 15.8655254
## 8 8 6 6 25.2492538
## 9 9 7 7 36.9441340
## 10 10 8 8 50.0000000
## 11 11 9 8 63.0558660
## 12 12 9 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 0 0.1349898
## 2 2 1 1 0.3830381
## 3 3 2 2 0.9815329
## 4 4 3 3 2.2750132
## 5 5 4 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 6 15.8655254
## 8 8 7 6 25.2492538
## 9 9 7 7 36.9441340
## 10 10 8 8 50.0000000
## 11 11 9 8 63.0558660
## 12 12 9 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[6]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 1 0.1349898
## 2 2 2 3 0.3830381
## 3 3 4 4 0.9815329
## 4 4 5 5 2.2750132
## 5 5 6 6 4.7790352
## 6 6 7 6 9.1211220
## 7 7 7 7 15.8655254
## 8 8 8 8 25.2492538
## 9 9 9 8 36.9441340
## 10 10 9 8 50.0000000
## 11 11 9 9 63.0558660
## 12 12 10 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[7]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 2 0.1349898
## 2 2 3 3 0.3830381
## 3 3 4 4 0.9815329
## 4 4 5 5 2.2750132
## 5 5 6 6 4.7790352
## 6 6 7 7 9.1211220
## 7 7 8 7 15.8655254
## 8 8 8 8 25.2492538
## 9 9 9 8 36.9441340
## 10 10 9 9 50.0000000
## 11 11 10 9 63.0558660
## 12 12 10 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
df <- dados %>% select(pdcrs_s4_memoria_operac, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
#m_norm <- cnorm(raw = df$score, group = node_mean)
m_norm <- cnorm(raw = df$score, group = node_mean, terms=4, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.2978
## User specified solution: 4 terms
## R-Square Adj. = 0.990865
## Final regression model: raw ~ L1 + A2 + L2A1 + L3A3
## Regression function: raw ~ -2.911948239 + (0.4355508499*L1) + (0.09195020712*A2) + (0.003205709182*L2A1) + (-3.192250221e-06*L3A3)
## Raw Score RMSE = 0.18917
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 10 15 3 13 8 6 14 7
## mu 4.062500 4.619048 5.333333 5.465753 6.505882 6.868852 7.000000 7.686567
## sd 1.915451 2.722087 1.765760 2.417528 1.556457 1.672766 2.187678 1.575438
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 -1 0.9815329
## 4 4 0 0 2.2750132
## 5 5 1 1 4.7790352
## 6 6 2 1 9.1211220
## 7 7 2 2 15.8655254
## 8 8 3 2 25.2492538
## 9 9 3 3 36.9441340
## 10 10 4 4 50.0000000
## 11 11 5 4 63.0558660
## 12 12 5 5 74.7507462
## 13 13 6 6 84.1344746
## 14 14 7 6 90.8788780
## 15 15 7 7 95.2209648
## 16 16 8 8 97.7249868
## 17 17 9 8 99.0184671
## 18 18 9 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 0 0.9815329
## 4 4 1 0 2.2750132
## 5 5 1 1 4.7790352
## 6 6 2 2 9.1211220
## 7 7 3 2 15.8655254
## 8 8 3 3 25.2492538
## 9 9 4 3 36.9441340
## 10 10 4 4 50.0000000
## 11 11 5 5 63.0558660
## 12 12 6 5 74.7507462
## 13 13 6 6 84.1344746
## 14 14 7 7 90.8788780
## 15 15 8 7 95.2209648
## 16 16 8 8 97.7249868
## 17 17 9 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 0 0.1349898
## 2 2 1 0 0.3830381
## 3 3 1 1 0.9815329
## 4 4 2 1 2.2750132
## 5 5 2 2 4.7790352
## 6 6 3 3 9.1211220
## 7 7 4 3 15.8655254
## 8 8 4 4 25.2492538
## 9 9 5 4 36.9441340
## 10 10 5 5 50.0000000
## 11 11 6 6 63.0558660
## 12 12 7 6 74.7507462
## 13 13 7 7 84.1344746
## 14 14 8 8 90.8788780
## 15 15 9 8 95.2209648
## 16 16 9 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 0 0.1349898
## 2 2 1 0 0.3830381
## 3 3 1 1 0.9815329
## 4 4 2 2 2.2750132
## 5 5 3 2 4.7790352
## 6 6 3 3 9.1211220
## 7 7 4 3 15.8655254
## 8 8 4 4 25.2492538
## 9 9 5 5 36.9441340
## 10 10 6 5 50.0000000
## 11 11 6 6 63.0558660
## 12 12 7 6 74.7507462
## 13 13 7 7 84.1344746
## 14 14 8 8 90.8788780
## 15 15 9 8 95.2209648
## 16 16 9 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 1 0.1349898
## 2 2 2 1 0.3830381
## 3 3 2 2 0.9815329
## 4 4 3 3 2.2750132
## 5 5 4 3 4.7790352
## 6 6 4 4 9.1211220
## 7 7 5 4 15.8655254
## 8 8 5 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 6 50.0000000
## 11 11 7 7 63.0558660
## 12 12 8 7 74.7507462
## 13 13 8 8 84.1344746
## 14 14 9 8 90.8788780
## 15 15 9 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[6]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 1 0.1349898
## 2 2 2 2 0.3830381
## 3 3 3 2 0.9815329
## 4 4 3 3 2.2750132
## 5 5 4 3 4.7790352
## 6 6 4 4 9.1211220
## 7 7 5 5 15.8655254
## 8 8 6 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 6 50.0000000
## 11 11 7 7 63.0558660
## 12 12 8 7 74.7507462
## 13 13 8 8 84.1344746
## 14 14 9 8 90.8788780
## 15 15 9 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[7]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 1 0.1349898
## 2 2 2 2 0.3830381
## 3 3 3 2 0.9815329
## 4 4 3 3 2.2750132
## 5 5 4 4 4.7790352
## 6 6 5 4 9.1211220
## 7 7 5 5 15.8655254
## 8 8 6 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 7 50.0000000
## 11 11 8 7 63.0558660
## 12 12 8 8 74.7507462
## 13 13 9 8 84.1344746
## 14 14 9 8 90.8788780
## 15 15 9 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[8]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 2 0.1349898
## 2 2 3 3 0.3830381
## 3 3 4 3 0.9815329
## 4 4 4 4 2.2750132
## 5 5 5 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 6 15.8655254
## 8 8 7 6 25.2492538
## 9 9 7 7 36.9441340
## 10 10 8 7 50.0000000
## 11 11 8 8 63.0558660
## 12 12 9 8 74.7507462
## 13 13 9 8 84.1344746
## 14 14 9 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 NA 99.6169619
## 19 19 NA NA 99.8650102
df <- dados %>% select(pdcrs_s5_desenho_relogio, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.0895
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 10 linear dependencies found
## Reordering variables and trying again:
## Final solution: 3 terms
## R-Square Adj. = 0.991768
## Final regression model: raw ~ L1 + A3 + L4A4
## Regression function: raw ~ -12.58456493 + (0.3440753641*L1) + (0.01063968868*A3) + (-6.721774072e-11*L4A4)
## Raw Score RMSE = 0.32425
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=3, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.0895
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 10 linear dependencies found
## Reordering variables and trying again:
## User specified solution: 3 terms
## R-Square Adj. = 0.993273
## Final regression model: raw ~ L2 + L2A2 + L2A3
## Regression function: raw ~ 4.26291058 + (0.01079780879*L2) + (9.247224762e-05*L2A2) + (3.618593879e-05*L2A3)
## Raw Score RMSE = 0.88126
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 4 5 2
## mu 6.982456 8.233533 9.150259
## sd 2.753359 2.113737 1.562951
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 4 3 0.1349898
## 2 2 4 3 0.3830381
## 3 3 4 4 0.9815329
## 4 4 5 4 2.2750132
## 5 5 5 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 5 15.8655254
## 8 8 6 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 6 50.0000000
## 11 11 7 7 63.0558660
## 12 12 8 8 74.7507462
## 13 13 9 8 84.1344746
## 14 14 9 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 4 3 0.1349898
## 2 2 4 4 0.3830381
## 3 3 5 4 0.9815329
## 4 4 5 4 2.2750132
## 5 5 5 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 5 15.8655254
## 8 8 6 6 25.2492538
## 9 9 7 7 36.9441340
## 10 10 8 7 50.0000000
## 11 11 8 8 63.0558660
## 12 12 9 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 4 3 0.1349898
## 2 2 4 4 0.3830381
## 3 3 5 4 0.9815329
## 4 4 5 4 2.2750132
## 5 5 5 5 4.7790352
## 6 6 6 5 9.1211220
## 7 7 6 6 15.8655254
## 8 8 7 7 25.2492538
## 9 9 8 7 36.9441340
## 10 10 8 8 50.0000000
## 11 11 9 9 63.0558660
## 12 12 10 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
df <- dados %>% select(pdcrs_s6_copia_relogio, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.222
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 5 linear dependencies found
## Reordering variables and trying again:
## Final solution: 5 terms
## R-Square Adj. = 0.991063
## Final regression model: raw ~ L1 + L1A1 + L2A1 + L3A2 + L3A4
## Regression function: raw ~ 5.000690568 + (-0.7477277565*L1) + (0.05370316593*L1A1) + (0.001527545864*L2A1) + (-8.369389021e-07*L3A2) + (-9.806193176e-09*L3A4)
## Raw Score RMSE = 0.20954
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=5, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.222
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 5 linear dependencies found
## Reordering variables and trying again:
## User specified solution: 5 terms
## R-Square Adj. = 0.993423
## Final regression model: raw ~ L1A3 + L2A3 + L3A3 + L4A2 + L4A4
## Regression function: raw ~ 0.4251716756 + (0.004075104075*L1A3) + (-0.0008317000032*L2A3) + (8.658770956e-05*L3A3) + (-4.003221505e-06*L4A2) + (-3.028694694e-07*L4A4)
## Raw Score RMSE = 0.18312
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 4 6 9 2 8
## mu 8.452830 9.368421 9.5675676 10 10
## sd 1.748844 1.328646 0.7564687 0 0
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 2 0.1349898
## 2 2 3 3 0.3830381
## 3 3 4 4 0.9815329
## 4 4 5 4 2.2750132
## 5 5 5 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 6 15.8655254
## 8 8 7 6 25.2492538
## 9 9 7 7 36.9441340
## 10 10 8 8 50.0000000
## 11 11 9 9 63.0558660
## 12 12 10 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 NA 90.8788780
## 15 15 NA NA 95.2209648
## 16 16 NA NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 3 0.1349898
## 2 2 4 4 0.3830381
## 3 3 5 5 0.9815329
## 4 4 6 6 2.2750132
## 5 5 7 6 4.7790352
## 6 6 7 7 9.1211220
## 7 7 8 7 15.8655254
## 8 8 8 8 25.2492538
## 9 9 9 9 36.9441340
## 10 10 10 9 50.0000000
## 11 11 10 NA 63.0558660
## 12 12 NA NA 74.7507462
## 13 13 NA NA 84.1344746
## 14 14 NA NA 90.8788780
## 15 15 NA NA 95.2209648
## 16 16 NA NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 4 0.1349898
## 2 2 5 5 0.3830381
## 3 3 6 6 0.9815329
## 4 4 7 6 2.2750132
## 5 5 7 7 4.7790352
## 6 6 8 7 9.1211220
## 7 7 8 8 15.8655254
## 8 8 9 8 25.2492538
## 9 9 9 9 36.9441340
## 10 10 10 9 50.0000000
## 11 11 10 NA 63.0558660
## 12 12 NA NA 74.7507462
## 13 13 NA NA 84.1344746
## 14 14 NA NA 90.8788780
## 15 15 NA NA 95.2209648
## 16 16 NA NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 4 0.1349898
## 2 2 5 6 0.3830381
## 3 3 7 6 0.9815329
## 4 4 7 7 2.2750132
## 5 5 8 8 4.7790352
## 6 6 9 8 9.1211220
## 7 7 9 9 15.8655254
## 8 8 10 9 25.2492538
## 9 9 10 9 36.9441340
## 10 10 10 NA 50.0000000
## 11 11 NA NA 63.0558660
## 12 12 NA NA 74.7507462
## 13 13 NA NA 84.1344746
## 14 14 NA NA 90.8788780
## 15 15 NA NA 95.2209648
## 16 16 NA NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 4 5 0.1349898
## 2 2 6 6 0.3830381
## 3 3 7 7 0.9815329
## 4 4 8 7 2.2750132
## 5 5 8 8 4.7790352
## 6 6 9 8 9.1211220
## 7 7 9 9 15.8655254
## 8 8 10 9 25.2492538
## 9 9 10 9 36.9441340
## 10 10 10 NA 50.0000000
## 11 11 NA NA 63.0558660
## 12 12 NA NA 74.7507462
## 13 13 NA NA 84.1344746
## 14 14 NA NA 90.8788780
## 15 15 NA NA 95.2209648
## 16 16 NA NA 97.7249868
## 17 17 NA NA 99.0184671
## 18 18 NA NA 99.6169619
## 19 19 NA NA 99.8650102
df <- dados %>% select(pdcrs_s7_memoria_tardia, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.3368
## Final solution: 3 terms
## R-Square Adj. = 0.994231
## Final regression model: raw ~ L1 + A2 + L2A4
## Regression function: raw ~ -9.726937339 + (0.2349699141*L1) + (0.130146237*A2) + (-2.187507101e-07*L2A4)
## Raw Score RMSE = 0.18285
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=3, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.3368
## User specified solution: 3 terms
## R-Square Adj. = 0.994299
## Final regression model: raw ~ L1 + A1 + L2A2
## Regression function: raw ~ -8.834757384 + (0.8194093097*L1) + (1.244629428*A1) + (-0.0002020409453*L2A2)
## Raw Score RMSE = 0.18176
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 12 17 3 11 15 7 16 8
## mu 3.096774 3.617647 3.789474 4.285714 4.925373 6.279070 6.482759 7.000000
## sd 2.938714 2.448074 1.784181 2.475958 2.928549 2.503002 2.276424 1.895826
## 5
## mu 8.111111
## sd 1.773465
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 -1 0.9815329
## 4 4 0 -1 2.2750132
## 5 5 0 -1 4.7790352
## 6 6 0 0 9.1211220
## 7 7 1 1 15.8655254
## 8 8 2 1 25.2492538
## 9 9 2 2 36.9441340
## 10 10 3 3 50.0000000
## 11 11 4 4 63.0558660
## 12 12 5 4 74.7507462
## 13 13 5 5 84.1344746
## 14 14 6 6 90.8788780
## 15 15 7 7 95.2209648
## 16 16 8 7 97.7249868
## 17 17 8 8 99.0184671
## 18 18 9 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 -1 0.9815329
## 4 4 0 -1 2.2750132
## 5 5 0 -1 4.7790352
## 6 6 0 0 9.1211220
## 7 7 1 1 15.8655254
## 8 8 2 2 25.2492538
## 9 9 3 3 36.9441340
## 10 10 4 3 50.0000000
## 11 11 4 4 63.0558660
## 12 12 5 5 74.7507462
## 13 13 6 6 84.1344746
## 14 14 7 6 90.8788780
## 15 15 7 7 95.2209648
## 16 16 8 8 97.7249868
## 17 17 9 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 -1 0.9815329
## 4 4 0 -1 2.2750132
## 5 5 0 0 4.7790352
## 6 6 1 1 9.1211220
## 7 7 2 1 15.8655254
## 8 8 2 2 25.2492538
## 9 9 3 3 36.9441340
## 10 10 4 4 50.0000000
## 11 11 5 4 63.0558660
## 12 12 5 5 74.7507462
## 13 13 6 6 84.1344746
## 14 14 7 7 90.8788780
## 15 15 8 7 95.2209648
## 16 16 8 8 97.7249868
## 17 17 9 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 -1 0.9815329
## 4 4 0 0 2.2750132
## 5 5 1 0 4.7790352
## 6 6 1 1 9.1211220
## 7 7 2 2 15.8655254
## 8 8 3 3 25.2492538
## 9 9 4 3 36.9441340
## 10 10 4 4 50.0000000
## 11 11 5 5 63.0558660
## 12 12 6 6 74.7507462
## 13 13 7 6 84.1344746
## 14 14 7 7 90.8788780
## 15 15 8 8 95.2209648
## 16 16 9 8 97.7249868
## 17 17 9 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 -1 0.1349898
## 2 2 0 -1 0.3830381
## 3 3 0 -1 0.9815329
## 4 4 0 0 2.2750132
## 5 5 1 1 4.7790352
## 6 6 2 2 9.1211220
## 7 7 3 2 15.8655254
## 8 8 3 3 25.2492538
## 9 9 4 4 36.9441340
## 10 10 5 5 50.0000000
## 11 11 6 5 63.0558660
## 12 12 6 6 74.7507462
## 13 13 7 7 84.1344746
## 14 14 8 7 90.8788780
## 15 15 8 8 95.2209648
## 16 16 9 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[6]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 0 0.1349898
## 2 2 1 0 0.3830381
## 3 3 1 1 0.9815329
## 4 4 2 2 2.2750132
## 5 5 3 3 4.7790352
## 6 6 4 3 9.1211220
## 7 7 4 4 15.8655254
## 8 8 5 5 25.2492538
## 9 9 6 5 36.9441340
## 10 10 6 6 50.0000000
## 11 11 7 7 63.0558660
## 12 12 8 7 74.7507462
## 13 13 8 8 84.1344746
## 14 14 9 8 90.8788780
## 15 15 9 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[7]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 0 0 0.1349898
## 2 2 1 1 0.3830381
## 3 3 2 1 0.9815329
## 4 4 2 2 2.2750132
## 5 5 3 3 4.7790352
## 6 6 4 4 9.1211220
## 7 7 5 4 15.8655254
## 8 8 5 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 6 50.0000000
## 11 11 7 7 63.0558660
## 12 12 8 7 74.7507462
## 13 13 8 8 84.1344746
## 14 14 9 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[8]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 0 0.1349898
## 2 2 1 1 0.3830381
## 3 3 2 2 0.9815329
## 4 4 3 3 2.2750132
## 5 5 4 3 4.7790352
## 6 6 4 4 9.1211220
## 7 7 5 5 15.8655254
## 8 8 6 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 7 50.0000000
## 11 11 8 7 63.0558660
## 12 12 8 8 74.7507462
## 13 13 9 8 84.1344746
## 14 14 9 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
##
## [[9]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 2 0.1349898
## 2 2 3 3 0.3830381
## 3 3 4 3 0.9815329
## 4 4 4 4 2.2750132
## 5 5 5 5 4.7790352
## 6 6 6 5 9.1211220
## 7 7 6 6 15.8655254
## 8 8 7 7 25.2492538
## 9 9 8 7 36.9441340
## 10 10 8 8 50.0000000
## 11 11 9 8 63.0558660
## 12 12 9 9 74.7507462
## 13 13 10 9 84.1344746
## 14 14 10 9 90.8788780
## 15 15 10 9 95.2209648
## 16 16 10 9 97.7249868
## 17 17 10 9 99.0184671
## 18 18 10 9 99.6169619
## 19 19 10 NA 99.8650102
df <- dados %>% select(pdcrs_s8_fluencia_alternada_2, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.2759
## Final solution: 3 terms
## R-Square Adj. = 0.991897
## Final regression model: raw ~ L1A4 + L2A1 + L3A3
## Regression function: raw ~ -1.914389702 + (3.132403417e-06*L1A4) + (0.0005342515377*L2A1) + (-2.490743574e-08*L3A3)
## Raw Score RMSE = 0.36995
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=3, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.2759
## User specified solution: 3 terms
## R-Square Adj. = 0.9929
## Final regression model: raw ~ L1A3 + L2A1 + L2A3
## Regression function: raw ~ 0.2119421836 + (0.0006498383601*L1A3) + (0.008863380506*L2A1) + (-5.854438675e-05*L2A3)
## Raw Score RMSE = 0.34629
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 10 15 3 13 14 8 6 7
## mu 5.875000 7.176471 8.606061 8.724138 10.714286 10.784091 11.466667 13.343284
## sd 2.697272 3.981885 3.375698 4.160122 4.000434 3.855357 3.828847 3.623697
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 0 0.1349898
## 2 2 1 0 0.3830381
## 3 3 1 0 0.9815329
## 4 4 1 1 2.2750132
## 5 5 2 2 4.7790352
## 6 6 3 2 9.1211220
## 7 7 3 3 15.8655254
## 8 8 4 4 25.2492538
## 9 9 5 5 36.9441340
## 10 10 6 6 50.0000000
## 11 11 7 7 63.0558660
## 12 12 8 8 74.7507462
## 13 13 9 9 84.1344746
## 14 14 10 11 90.8788780
## 15 15 12 12 95.2209648
## 16 16 13 13 97.7249868
## 17 17 14 15 99.0184671
## 18 18 16 17 99.6169619
## 19 19 18 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 0 0.1349898
## 2 2 1 0 0.3830381
## 3 3 1 1 0.9815329
## 4 4 2 2 2.2750132
## 5 5 3 2 4.7790352
## 6 6 3 3 9.1211220
## 7 7 4 4 15.8655254
## 8 8 5 5 25.2492538
## 9 9 6 6 36.9441340
## 10 10 7 7 50.0000000
## 11 11 8 8 63.0558660
## 12 12 9 10 74.7507462
## 13 13 11 11 84.1344746
## 14 14 12 13 90.8788780
## 15 15 14 14 95.2209648
## 16 16 15 16 97.7249868
## 17 17 17 18 99.0184671
## 18 18 19 19 99.6169619
## 19 19 20 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 0 0.1349898
## 2 2 1 1 0.3830381
## 3 3 2 2 0.9815329
## 4 4 3 2 2.2750132
## 5 5 3 3 4.7790352
## 6 6 4 4 9.1211220
## 7 7 5 5 15.8655254
## 8 8 6 6 25.2492538
## 9 9 7 8 36.9441340
## 10 10 9 9 50.0000000
## 11 11 10 10 63.0558660
## 12 12 11 12 74.7507462
## 13 13 13 13 84.1344746
## 14 14 14 15 90.8788780
## 15 15 16 16 95.2209648
## 16 16 17 18 97.7249868
## 17 17 19 20 99.0184671
## 18 18 21 20 99.6169619
## 19 19 21 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 0 0.1349898
## 2 2 1 1 0.3830381
## 3 3 2 2 0.9815329
## 4 4 3 3 2.2750132
## 5 5 4 3 4.7790352
## 6 6 4 4 9.1211220
## 7 7 5 5 15.8655254
## 8 8 6 6 25.2492538
## 9 9 7 8 36.9441340
## 10 10 9 9 50.0000000
## 11 11 10 10 63.0558660
## 12 12 11 12 74.7507462
## 13 13 13 13 84.1344746
## 14 14 14 15 90.8788780
## 15 15 16 16 95.2209648
## 16 16 17 18 97.7249868
## 17 17 19 20 99.0184671
## 18 18 21 20 99.6169619
## 19 19 21 NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 1 0.1349898
## 2 2 2 2 0.3830381
## 3 3 3 3 0.9815329
## 4 4 4 4 2.2750132
## 5 5 5 5 4.7790352
## 6 6 6 6 9.1211220
## 7 7 7 7 15.8655254
## 8 8 8 8 25.2492538
## 9 9 9 10 36.9441340
## 10 10 11 11 50.0000000
## 11 11 12 12 63.0558660
## 12 12 13 14 74.7507462
## 13 13 15 15 84.1344746
## 14 14 16 17 90.8788780
## 15 15 18 18 95.2209648
## 16 16 19 20 97.7249868
## 17 17 21 20 99.0184671
## 18 18 21 20 99.6169619
## 19 19 21 NA 99.8650102
##
## [[6]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 1 0.1349898
## 2 2 2 2 0.3830381
## 3 3 3 3 0.9815329
## 4 4 4 4 2.2750132
## 5 5 5 5 4.7790352
## 6 6 6 6 9.1211220
## 7 7 7 7 15.8655254
## 8 8 8 8 25.2492538
## 9 9 9 10 36.9441340
## 10 10 11 11 50.0000000
## 11 11 12 12 63.0558660
## 12 12 13 14 74.7507462
## 13 13 15 15 84.1344746
## 14 14 16 17 90.8788780
## 15 15 18 18 95.2209648
## 16 16 19 20 97.7249868
## 17 17 21 20 99.0184671
## 18 18 21 20 99.6169619
## 19 19 21 NA 99.8650102
##
## [[7]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 1 0.1349898
## 2 2 2 2 0.3830381
## 3 3 3 3 0.9815329
## 4 4 4 5 2.2750132
## 5 5 6 6 4.7790352
## 6 6 7 7 9.1211220
## 7 7 8 8 15.8655254
## 8 8 9 9 25.2492538
## 9 9 10 10 36.9441340
## 10 10 11 12 50.0000000
## 11 11 13 13 63.0558660
## 12 12 14 14 74.7507462
## 13 13 15 16 84.1344746
## 14 14 17 17 90.8788780
## 15 15 18 18 95.2209648
## 16 16 19 20 97.7249868
## 17 17 21 20 99.0184671
## 18 18 21 20 99.6169619
## 19 19 21 NA 99.8650102
##
## [[8]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 2 0.1349898
## 2 2 3 4 0.3830381
## 3 3 5 5 0.9815329
## 4 4 6 6 2.2750132
## 5 5 7 7 4.7790352
## 6 6 8 9 9.1211220
## 7 7 10 10 15.8655254
## 8 8 11 11 25.2492538
## 9 9 12 12 36.9441340
## 10 10 13 13 50.0000000
## 11 11 14 15 63.0558660
## 12 12 16 16 74.7507462
## 13 13 17 17 84.1344746
## 14 14 18 18 90.8788780
## 15 15 19 19 95.2209648
## 16 16 20 20 97.7249868
## 17 17 21 20 99.0184671
## 18 18 21 20 99.6169619
## 19 19 21 NA 99.8650102
df <- dados %>% select(pdcrs_s9_fluencia_acoes_2, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=30)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.3072
## Final solution: 5 terms
## R-Square Adj. = 0.982615
## Final regression model: raw ~ L3 + L4 + L1A2 + L3A2 + L4A1
## Regression function: raw ~ -2.976844409 + (0.0001269934649*L3) + (-1.777308671e-06*L4) + (0.001315197377*L1A2) + (-4.275331425e-07*L3A2) + (1.085710644e-07*L4A1)
## Raw Score RMSE = 0.68312
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=5, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.3072
## User specified solution: 5 terms
## R-Square Adj. = 0.983529
## Final regression model: raw ~ L3 + L4 + L1A2 + L2A2 + L3A1
## Regression function: raw ~ -0.2204017801 + (0.005703403465*L3) + (-0.0004167682521*L4) + (0.01015508171*L1A2) + (-0.0009032046116*L2A2) + (0.0007234084789*L3A1)
## Raw Score RMSE = 0.66492
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 4 12 3 10 11 14 6 15
## mu 8.551020 9.965517 11.454545 12.379310 14.408163 15.181818 16.361345 18.09091
## sd 3.624603 5.149929 4.316316 4.495878 4.233323 4.866607 4.133667 4.34361
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 0 0.1349898
## 2 2 1 1 0.3830381
## 3 3 2 1 0.9815329
## 4 4 2 2 2.2750132
## 5 5 3 3 4.7790352
## 6 6 4 4 9.1211220
## 7 7 5 5 15.8655254
## 8 8 6 6 25.2492538
## 9 9 7 7 36.9441340
## 10 10 8 9 50.0000000
## 11 11 10 10 63.0558660
## 12 12 11 12 74.7507462
## 13 13 13 13 84.1344746
## 14 14 14 14 90.8788780
## 15 15 15 15 95.2209648
## 16 16 16 16 97.7249868
## 17 17 17 17 99.0184671
## 18 18 18 17 99.6169619
## 19 19 18 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 1 0.1349898
## 2 2 2 2 0.3830381
## 3 3 3 3 0.9815329
## 4 4 4 3 2.2750132
## 5 5 4 4 4.7790352
## 6 6 5 5 9.1211220
## 7 7 6 7 15.8655254
## 8 8 8 8 25.2492538
## 9 9 9 9 36.9441340
## 10 10 10 11 50.0000000
## 11 11 12 12 63.0558660
## 12 12 13 14 74.7507462
## 13 13 15 16 84.1344746
## 14 14 17 17 90.8788780
## 15 15 18 19 95.2209648
## 16 16 20 20 97.7249868
## 17 17 21 21 99.0184671
## 18 18 22 21 99.6169619
## 19 19 22 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 1 0.1349898
## 2 2 2 2 0.3830381
## 3 3 3 3 0.9815329
## 4 4 4 4 2.2750132
## 5 5 5 5 4.7790352
## 6 6 6 6 9.1211220
## 7 7 7 7 15.8655254
## 8 8 8 9 25.2492538
## 9 9 10 10 36.9441340
## 10 10 11 12 50.0000000
## 11 11 13 13 63.0558660
## 12 12 14 15 74.7507462
## 13 13 16 17 84.1344746
## 14 14 18 18 90.8788780
## 15 15 19 20 95.2209648
## 16 16 21 21 97.7249868
## 17 17 22 22 99.0184671
## 18 18 23 23 99.6169619
## 19 19 24 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 1 1 0.1349898
## 2 2 2 3 0.3830381
## 3 3 4 4 0.9815329
## 4 4 5 5 2.2750132
## 5 5 6 6 4.7790352
## 6 6 7 7 9.1211220
## 7 7 8 8 15.8655254
## 8 8 9 10 25.2492538
## 9 9 11 11 36.9441340
## 10 10 12 13 50.0000000
## 11 11 14 14 63.0558660
## 12 12 15 16 74.7507462
## 13 13 17 18 84.1344746
## 14 14 19 19 90.8788780
## 15 15 20 21 95.2209648
## 16 16 22 23 97.7249868
## 17 17 24 24 99.0184671
## 18 18 25 25 99.6169619
## 19 19 26 NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 2 0.1349898
## 2 2 3 4 0.3830381
## 3 3 5 5 0.9815329
## 4 4 6 7 2.2750132
## 5 5 8 8 4.7790352
## 6 6 9 9 9.1211220
## 7 7 10 10 15.8655254
## 8 8 11 12 25.2492538
## 9 9 13 13 36.9441340
## 10 10 14 15 50.0000000
## 11 11 16 17 63.0558660
## 12 12 18 18 74.7507462
## 13 13 19 20 84.1344746
## 14 14 21 22 90.8788780
## 15 15 23 24 95.2209648
## 16 16 25 25 97.7249868
## 17 17 26 27 99.0184671
## 18 18 28 28 99.6169619
## 19 19 29 NA 99.8650102
##
## [[6]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 3 0.1349898
## 2 2 4 4 0.3830381
## 3 3 5 6 0.9815329
## 4 4 7 7 2.2750132
## 5 5 8 9 4.7790352
## 6 6 10 10 9.1211220
## 7 7 11 11 15.8655254
## 8 8 12 13 25.2492538
## 9 9 14 14 36.9441340
## 10 10 15 16 50.0000000
## 11 11 17 17 63.0558660
## 12 12 18 19 74.7507462
## 13 13 20 21 84.1344746
## 14 14 22 22 90.8788780
## 15 15 23 24 95.2209648
## 16 16 25 26 97.7249868
## 17 17 27 27 99.0184671
## 18 18 28 28 99.6169619
## 19 19 29 NA 99.8650102
##
## [[7]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 2 4 0.1349898
## 2 2 5 5 0.3830381
## 3 3 6 7 0.9815329
## 4 4 8 8 2.2750132
## 5 5 9 10 4.7790352
## 6 6 11 11 9.1211220
## 7 7 12 13 15.8655254
## 8 8 14 14 25.2492538
## 9 9 15 15 36.9441340
## 10 10 16 17 50.0000000
## 11 11 18 18 63.0558660
## 12 12 19 20 74.7507462
## 13 13 21 22 84.1344746
## 14 14 23 23 90.8788780
## 15 15 24 25 95.2209648
## 16 16 26 27 97.7249868
## 17 17 28 28 99.0184671
## 18 18 29 28 99.6169619
## 19 19 29 NA 99.8650102
##
## [[8]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 3 4 0.1349898
## 2 2 5 6 0.3830381
## 3 3 7 8 0.9815329
## 4 4 9 10 2.2750132
## 5 5 11 11 4.7790352
## 6 6 12 13 9.1211220
## 7 7 14 14 15.8655254
## 8 8 15 15 25.2492538
## 9 9 16 17 36.9441340
## 10 10 18 18 50.0000000
## 11 11 19 20 63.0558660
## 12 12 21 21 74.7507462
## 13 13 22 23 84.1344746
## 14 14 24 24 90.8788780
## 15 15 25 26 95.2209648
## 16 16 27 27 97.7249868
## 17 17 28 28 99.0184671
## 18 18 29 28 99.6169619
## 19 19 29 NA 99.8650102
df <- dados %>% select(pdcrs_frontal_subcortical_score, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=50)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.4735
## Final solution: 4 terms
## R-Square Adj. = 0.990789
## Final regression model: raw ~ A4 + L1A4 + L2A1 + L3A1
## Regression function: raw ~ 1.964939515 + (2.19018126e-06*A4) + (-4.269834477e-08*L1A4) + (0.0005468702917*L2A1) + (-3.346032597e-06*L3A1)
## Raw Score RMSE = 1.42128
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=4, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.4735
## User specified solution: 4 terms
## R-Square Adj. = 0.990339
## Final regression model: raw ~ L3 + A3 + L2A1 + L2A3
## Regression function: raw ~ 10.63522922 + (-0.009660835236*L3) + (0.0001072569149*A3) + (0.009582056789*L2A1) + (-1.024184122e-06*L2A3)
## Raw Score RMSE = 1.45557
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 8 7 15 13 12 3 14 4
## mu 38.29412 46.939394 47.44118 54.28125 60.73418 68.21212 68.86957 71.761194
## sd 10.59790 8.237041 13.22683 14.12512 12.43314 11.00995 11.21107 9.882053
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 21 20 0.1349898
## 2 2 21 20 0.3830381
## 3 3 21 20 0.9815329
## 4 4 21 23 2.2750132
## 5 5 24 25 4.7790352
## 6 6 26 28 9.1211220
## 7 7 29 31 15.8655254
## 8 8 32 34 25.2492538
## 9 9 35 38 36.9441340
## 10 10 39 41 50.0000000
## 11 11 42 44 63.0558660
## 12 12 45 48 74.7507462
## 13 13 49 51 84.1344746
## 14 14 52 54 90.8788780
## 15 15 55 57 95.2209648
## 16 16 58 59 97.7249868
## 17 17 60 61 99.0184671
## 18 18 62 63 99.6169619
## 19 19 64 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 23 23 0.1349898
## 2 2 24 24 0.3830381
## 3 3 25 26 0.9815329
## 4 4 27 29 2.2750132
## 5 5 30 32 4.7790352
## 6 6 33 35 9.1211220
## 7 7 36 39 15.8655254
## 8 8 40 42 25.2492538
## 9 9 43 46 36.9441340
## 10 10 47 50 50.0000000
## 11 11 51 54 63.0558660
## 12 12 55 59 74.7507462
## 13 13 60 63 84.1344746
## 14 14 64 67 90.8788780
## 15 15 68 70 95.2209648
## 16 16 71 74 97.7249868
## 17 17 75 77 99.0184671
## 18 18 78 80 99.6169619
## 19 19 81 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 23 23 0.1349898
## 2 2 24 24 0.3830381
## 3 3 25 26 0.9815329
## 4 4 27 29 2.2750132
## 5 5 30 32 4.7790352
## 6 6 33 35 9.1211220
## 7 7 36 39 15.8655254
## 8 8 40 42 25.2492538
## 9 9 43 46 36.9441340
## 10 10 47 50 50.0000000
## 11 11 51 55 63.0558660
## 12 12 56 59 74.7507462
## 13 13 60 63 84.1344746
## 14 14 64 67 90.8788780
## 15 15 68 71 95.2209648
## 16 16 72 74 97.7249868
## 17 17 75 77 99.0184671
## 18 18 78 80 99.6169619
## 19 19 81 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 28 28 0.1349898
## 2 2 29 30 0.3830381
## 3 3 31 32 0.9815329
## 4 4 33 35 2.2750132
## 5 5 36 38 4.7790352
## 6 6 39 41 9.1211220
## 7 7 42 45 15.8655254
## 8 8 46 49 25.2492538
## 9 9 50 53 36.9441340
## 10 10 54 57 50.0000000
## 11 11 58 62 63.0558660
## 12 12 63 66 74.7507462
## 13 13 67 70 84.1344746
## 14 14 71 75 90.8788780
## 15 15 76 79 95.2209648
## 16 16 80 83 97.7249868
## 17 17 84 86 99.0184671
## 18 18 87 90 99.6169619
## 19 19 91 NA 99.8650102
##
## [[5]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 35 35 0.1349898
## 2 2 36 37 0.3830381
## 3 3 38 39 0.9815329
## 4 4 40 41 2.2750132
## 5 5 42 44 4.7790352
## 6 6 45 48 9.1211220
## 7 7 49 51 15.8655254
## 8 8 52 55 25.2492538
## 9 9 56 59 36.9441340
## 10 10 60 63 50.0000000
## 11 11 64 68 63.0558660
## 12 12 69 72 74.7507462
## 13 13 73 76 84.1344746
## 14 14 77 80 90.8788780
## 15 15 81 84 95.2209648
## 16 16 85 88 97.7249868
## 17 17 89 92 99.0184671
## 18 18 93 95 99.6169619
## 19 19 96 NA 99.8650102
##
## [[6]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 45 45 0.1349898
## 2 2 46 47 0.3830381
## 3 3 48 49 0.9815329
## 4 4 50 51 2.2750132
## 5 5 52 54 4.7790352
## 6 6 55 57 9.1211220
## 7 7 58 60 15.8655254
## 8 8 61 64 25.2492538
## 9 9 65 67 36.9441340
## 10 10 68 71 50.0000000
## 11 11 72 74 63.0558660
## 12 12 75 78 74.7507462
## 13 13 79 82 84.1344746
## 14 14 83 85 90.8788780
## 15 15 86 88 95.2209648
## 16 16 89 91 97.7249868
## 17 17 92 94 99.0184671
## 18 18 95 96 99.6169619
## 19 19 97 NA 99.8650102
##
## [[7]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 46 46 0.1349898
## 2 2 47 47 0.3830381
## 3 3 48 49 0.9815329
## 4 4 50 51 2.2750132
## 5 5 52 54 4.7790352
## 6 6 55 57 9.1211220
## 7 7 58 60 15.8655254
## 8 8 61 64 25.2492538
## 9 9 65 67 36.9441340
## 10 10 68 71 50.0000000
## 11 11 72 75 63.0558660
## 12 12 76 78 74.7507462
## 13 13 79 82 84.1344746
## 14 14 83 85 90.8788780
## 15 15 86 88 95.2209648
## 16 16 89 91 97.7249868
## 17 17 92 94 99.0184671
## 18 18 95 96 99.6169619
## 19 19 97 NA 99.8650102
##
## [[8]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 51 51 0.1349898
## 2 2 52 52 0.3830381
## 3 3 53 54 0.9815329
## 4 4 55 56 2.2750132
## 5 5 57 59 4.7790352
## 6 6 60 62 9.1211220
## 7 7 63 64 15.8655254
## 8 8 65 68 25.2492538
## 9 9 69 71 36.9441340
## 10 10 72 74 50.0000000
## 11 11 75 77 63.0558660
## 12 12 78 80 74.7507462
## 13 13 81 83 84.1344746
## 14 14 84 86 90.8788780
## 15 15 87 89 95.2209648
## 16 16 90 91 97.7249868
## 17 17 92 93 99.0184671
## 18 18 94 94 99.6169619
## 19 19 95 NA 99.8650102
df <- dados %>% select(pdcrs_posterior_cortical_score, idade, escolaridade) %>%
na.omit()
names(df) <- c("score", "idade", "escolaridade")
df <- df %>% mutate(across(where(is.integer), as.numeric))
df <- as.data.frame(df)
model <- train(
score ~ idade + escolaridade,
data = df,
method = 'ctree2',
trControl = trainControl("cv", number = 5, classProbs = FALSE),
tuneGrid = expand.grid(maxdepth = 1:10, mincriterion = c(0.9, 0.95, 0.99)),
controls=ctree_control(minbucket=50)
)
nodes <- where(model$finalModel)
sum_stats <- rbind(mu=tapply(df$score, nodes, tmean),
sd=tapply(df$score, nodes, sd_trim))
node_mean <- as.vector(predict(model$finalModel, df))
node_val <- as.vector(where(model$finalModel, df))
names(node_mean) <- node_val
set.seed(666)
node_mean_cat <- as.numeric(factor(node_mean))
m_norm <- cnorm(raw = df$score, group = node_mean)
## Multiple R2 between raw score and explanatory variable: R2 = 0.1514
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 5 linear dependencies found
## Reordering variables and trying again:
## Final solution: 4 terms
## R-Square Adj. = 0.990884
## Final regression model: raw ~ L2A3 + L3A1 + L4A2 + L2A4
## Regression function: raw ~ 6.208629029 + (1.052513715e-06*L2A3) + (4.471379216e-06*L3A1) + (-3.6909142e-09*L4A2) + (-2.147543748e-08*L2A4)
## Raw Score RMSE = 0.83969
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
m_norm <- cnorm(raw = df$score, group = node_mean, terms=5, scale=c(10, 3))
## Multiple R2 between raw score and explanatory variable: R2 = 0.1514
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 5 linear dependencies found
## Reordering variables and trying again:
## User specified solution: 5 terms
## R-Square Adj. = 0.992015
## Final regression model: raw ~ L2A1 + L3A1 + L4A1 + L4A3 + L4A4
## Regression function: raw ~ 5.314495093 + (0.02749749055*L2A1) + (-0.002674995596*L3A1) + (8.330155529e-05*L4A1) + (-4.29643929e-08*L4A3) + (1.215434425e-09*L4A4)
## Raw Score RMSE = 0.81583
##
## Use 'printSubset(model)' to get detailed information on the different solutions, 'plotPercentiles(model) to display percentile plot, plotSubset(model)' to inspect model fit.
tt <- lapply(sort(unique(node_mean)), function(x) normTable(x, model = m_norm, step = 1, minNorm = 1, maxNorm=19))
normTabs <- lapply(tt, function(x) data.frame(Norma=x$norm,
Escore.Inf=c(round(x$raw)),
Escore.Sup=c(round(x$raw[-1])-1, NA),
Percentil=x$percentile))
plot(model$finalModel)
sum_stats[, order(sum_stats[1,])]
## 4 3 6 7
## mu 19.060606 22.61111 24.817143 26.329480
## sd 4.434875 3.85962 3.830394 3.147825
normTabs
## [[1]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 6 6 0.1349898
## 2 2 7 8 0.3830381
## 3 3 9 10 0.9815329
## 4 4 11 12 2.2750132
## 5 5 13 14 4.7790352
## 6 6 15 16 9.1211220
## 7 7 17 18 15.8655254
## 8 8 19 19 25.2492538
## 9 9 20 20 36.9441340
## 10 10 21 21 50.0000000
## 11 11 22 22 63.0558660
## 12 12 23 22 74.7507462
## 13 13 23 23 84.1344746
## 14 14 24 23 90.8788780
## 15 15 24 25 95.2209648
## 16 16 26 26 97.7249868
## 17 17 27 29 99.0184671
## 18 18 30 29 99.6169619
## 19 19 30 NA 99.8650102
##
## [[2]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 6 6 0.1349898
## 2 2 7 8 0.3830381
## 3 3 9 11 0.9815329
## 4 4 12 13 2.2750132
## 5 5 14 16 4.7790352
## 6 6 17 18 9.1211220
## 7 7 19 20 15.8655254
## 8 8 21 21 25.2492538
## 9 9 22 23 36.9441340
## 10 10 24 24 50.0000000
## 11 11 25 24 63.0558660
## 12 12 25 25 74.7507462
## 13 13 26 25 84.1344746
## 14 14 26 26 90.8788780
## 15 15 27 27 95.2209648
## 16 16 28 29 97.7249868
## 17 17 30 29 99.0184671
## 18 18 30 29 99.6169619
## 19 19 30 NA 99.8650102
##
## [[3]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 6 6 0.1349898
## 2 2 7 9 0.3830381
## 3 3 10 11 0.9815329
## 4 4 12 14 2.2750132
## 5 5 15 17 4.7790352
## 6 6 18 19 9.1211220
## 7 7 20 21 15.8655254
## 8 8 22 23 25.2492538
## 9 9 24 24 36.9441340
## 10 10 25 25 50.0000000
## 11 11 26 26 63.0558660
## 12 12 27 26 74.7507462
## 13 13 27 27 84.1344746
## 14 14 28 28 90.8788780
## 15 15 29 29 95.2209648
## 16 16 30 29 97.7249868
## 17 17 30 29 99.0184671
## 18 18 30 29 99.6169619
## 19 19 30 NA 99.8650102
##
## [[4]]
## Norma Escore.Inf Escore.Sup Percentil
## 1 1 6 7 0.1349898
## 2 2 8 9 0.3830381
## 3 3 10 12 0.9815329
## 4 4 13 14 2.2750132
## 5 5 15 17 4.7790352
## 6 6 18 20 9.1211220
## 7 7 21 22 15.8655254
## 8 8 23 24 25.2492538
## 9 9 25 25 36.9441340
## 10 10 26 26 50.0000000
## 11 11 27 27 63.0558660
## 12 12 28 28 74.7507462
## 13 13 29 28 84.1344746
## 14 14 29 29 90.8788780
## 15 15 30 29 95.2209648
## 16 16 30 29 97.7249868
## 17 17 30 29 99.0184671
## 18 18 30 29 99.6169619
## 19 19 30 NA 99.8650102