Bancos de dados

library(readxl)
library(tidyverse)
# Banco de dados parcial (IBNeuro)

#setwd("D:/Dropbox/Laboratorio/Carina/SDMT/") # Computador do IBNeuro
#setwd("C:/Dropbox/Laboratorio/Carina/SDMT/") # PC Avell
setwd("~/Dropbox/Laboratorio/Carina/SDMT/") # Macbook

# Banco de dados para as correlacoes (validade convergente e discriminante)
parcial <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "NORMAS")
parcial <- parcial %>% type.convert()

# Banco de dados completo (IBNeuro e FM USP) para usarmos nas normas (excluindo dados com MMSE)
todos <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "NormasSDMT")
todos <- todos %>% type.convert()

# Banco de dados das NORMAS (com MMSE_SV Normal)
dados <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "NormasSDMT")
dados <- dados %>% type.convert()

# Banco de dados da Ressonância Magnética
Ress <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "EM_Resson")
Ress <- Ress %>% type.convert()

# Cria grupos com EDSS ≤ 1.5
Ress$grupos <- ifelse(Ress$EDSS <= 1.5, "EM", "EMRR")
table(Ress$grupos)
## 
##   EM EMRR 
##   34   20
# Cria grupos com EDSS ≤ 2.5
Ress$grupos2 <- ifelse(Ress$EDSS <= 2.5, "MS", "RRMS")
table(Ress$grupos2)
## 
##   MS RRMS 
##   42   12
# Banco de dados da Esclerose Múltipla
EM <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "EM")
EM <- EM %>% type.convert()

# Banco de dados Caso-Controle
caso <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "Caso_contr_RM")
caso <- caso %>% type.convert()

# Banco de dados Teste-Reteste
conf <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "Confiabilidade")
conf <- conf %>% type.convert()

Análise descritiva

# Análise descritiva dos dados numéricos
library(knitr)
library(psych)

options(knitr.table.format = 'markdown')

# Banco de dados que será usado na elaboração das normas
kable(psych::describe(dplyr::select_if(dados, is.numeric), tr=.2), digits = 2)
vars n mean sd median trimmed mad min max range skew kurtosis se
Idade 1 587 44.50 16.39 41 42.95 19.27 18 92 74 0.43 -0.77 0.68
FaixaEtaria 2 587 3.02 1.63 3 2.88 1.48 1 7 6 0.45 -0.80 0.07
Escola 3 587 3.46 0.74 4 3.68 0.00 2 4 2 -0.96 -0.55 0.03
BAI 4 455 8.92 8.47 6 6.95 5.93 0 43 43 1.28 1.22 0.40
BDI 5 455 9.35 7.96 7 7.88 5.93 0 51 51 1.28 2.15 0.37
HADS_A 6 587 5.84 3.85 5 5.33 2.97 0 21 21 0.79 0.31 0.16
HADS_D 7 587 4.50 3.36 4 3.97 2.97 0 19 19 1.03 1.01 0.14
MMSE_SV 8 574 28.49 1.84 29 29.02 1.48 21 30 9 -1.36 1.33 0.08
SD_escr 9 586 43.94 17.14 45 44.17 16.31 2 110 108 0.28 0.77 0.71
SD_oral 10 587 47.84 18.34 50 48.16 17.79 2 110 108 0.13 0.20 0.76
SD_oral_escrito 11 587 3.98 4.90 5 4.84 5.93 -9 9 18 -0.60 -0.74 0.20
# Banco de dados parcial (somente do IBNeuro) e servirá pra correlações
kable(psych::describe(dplyr::select_if(parcial, is.numeric), tr=.2), digits = 2)
vars n mean sd median trimmed mad min max range skew kurtosis se
Idade 1 471 42.33 15.20 39 40.60 16.31 18 89 71 0.52 -0.61 0.70
BAI 2 471 9.06 8.64 7 7.05 7.41 0 45 45 1.33 1.46 0.40
BDI 3 471 9.35 7.94 7 7.87 5.93 0 51 51 1.27 2.06 0.37
HADS_A 4 471 6.15 4.05 6 5.69 4.45 0 21 21 0.67 0.05 0.19
HADS_D 5 471 4.68 3.59 4 4.13 2.97 0 19 19 0.90 0.44 0.17
Escrita 6 471 0.97 0.16 1 1.00 0.00 0 1 1 -6.00 34.12 0.01
Desenho 7 471 0.92 0.27 1 1.00 0.00 0 1 1 -3.07 7.44 0.01
VelProc 8 471 17.75 6.15 18 17.71 5.93 0 35 35 0.22 0.37 0.28
MMSE2_BV 9 471 15.47 0.95 16 15.77 0.00 8 16 8 -2.87 13.24 0.04
MMSE2_SV 10 471 28.51 2.05 29 29.13 1.48 13 30 17 -2.24 8.25 0.09
MMSE2_EV 11 471 60.44 10.30 60 60.59 8.90 25 87 62 -0.13 0.33 0.47
A1_acerto 12 471 7.11 2.29 7 6.88 2.97 0 15 15 0.50 0.22 0.11
A1_intrus 13 471 0.46 0.89 0 0.14 0.00 0 6 6 2.48 7.72 0.04
A1_repet 14 471 0.41 0.92 0 0.09 0.00 0 8 8 3.58 17.93 0.04
A5_acerto 15 471 13.15 2.51 14 13.49 2.97 0 16 16 -0.94 1.11 0.12
A5_intrus 16 471 0.17 0.45 0 0.00 0.00 0 3 3 3.20 11.92 0.02
A5_repet 17 471 1.10 1.58 0 0.60 0.00 0 8 8 1.82 3.38 0.07
BVMTR1 18 471 5.78 3.18 6 5.63 2.97 0 12 12 0.22 -0.72 0.15
BVMTR3 19 471 9.36 2.98 10 10.16 2.97 0 12 12 -1.26 0.84 0.14
SD_escr 20 470 44.66 16.04 45 44.55 14.83 6 110 104 0.61 2.00 0.74
SD_oral 21 470 50.31 18.16 51 50.29 17.05 9 110 101 0.26 0.51 0.84
SD_oral_escrito 22 470 5.65 9.92 6 5.67 8.90 -31 55 86 0.20 2.75 0.46
# Banco de dados do estudo com Ressonância Magnética
kable(psych::describe(dplyr::select_if(Ress, is.numeric), tr=.2), digits = 2)
vars n mean sd median trimmed mad min max range skew kurtosis se
Idade 1 54 36.28 9.22 34.5 35.21 6.67 18.00 61.0 43.00 0.67 0.28 1.25
Escolaridade 2 54 13.80 4.54 13.5 13.68 3.71 4.00 28.0 24.00 0.31 0.70 0.62
Comorbidades 3 54 0.17 0.38 0.0 0.00 0.00 0.00 1.0 1.00 1.74 1.05 0.05
Medicamentos 4 54 0.81 0.39 1.0 1.00 0.00 0.00 1.0 1.00 -1.58 0.49 0.05
TempoDoenca 5 52 6.80 6.24 5.5 5.53 5.93 0.25 32.0 31.75 1.60 3.33 0.87
EDSS 6 54 1.56 2.04 1.0 0.94 1.48 0.00 7.5 7.50 1.27 0.57 0.28
Passos25 7 54 7.26 2.56 7.0 6.76 2.97 4.00 15.0 11.00 1.47 2.21 0.35
pinosdir9 8 54 28.81 6.61 27.0 27.56 5.93 18.00 45.0 27.00 0.84 -0.21 0.90
pinosesq9 9 54 30.81 9.34 28.0 28.85 5.93 19.00 70.0 51.00 1.96 4.68 1.27
MedicamentoEM 10 54 0.78 0.42 1.0 0.94 0.00 0.00 1.0 1.00 -1.30 -0.32 0.06
VitD 11 54 62.73 29.98 55.5 63.23 37.06 7.00 100.0 93.00 0.09 -1.52 4.08
FSS 12 54 35.74 16.72 35.5 35.47 17.79 9.00 63.0 54.00 0.04 -1.14 2.27
HADS_A 13 54 6.43 3.97 5.5 6.09 3.71 0.00 16.0 16.00 0.44 -0.74 0.54
HADS_D 14 54 4.50 3.96 3.0 3.59 2.97 0.00 15.0 15.00 1.26 0.86 0.54
SF36 15 54 91.65 11.74 94.0 93.88 7.41 53.00 105.0 52.00 -1.86 3.42 1.60
SD_escr 16 54 44.26 15.77 47.5 43.59 21.50 11.00 77.0 66.00 0.09 -1.04 2.15
SD_oral 17 54 46.30 15.44 49.0 46.59 15.57 12.00 77.0 65.00 -0.11 -0.89 2.10
BVMT_T1 18 54 2.11 1.83 2.0 1.82 1.48 0.00 9.0 9.00 1.36 2.38 0.25
BVMT_T2 19 54 4.00 2.56 3.0 3.79 2.97 0.00 12.0 12.00 0.85 0.72 0.35
BVMT_T3 20 54 4.80 2.72 5.0 4.76 1.48 0.00 12.0 12.00 0.50 0.41 0.37
BVMT 21 54 10.91 6.65 10.0 10.53 7.41 0.00 33.0 33.00 0.88 1.26 0.91
CVLT_A1 22 54 6.30 1.66 6.0 6.00 1.48 4.00 10.0 6.00 0.59 -0.84 0.23
CVLT_A2 23 54 9.69 2.48 10.0 9.76 2.97 4.00 14.0 10.00 -0.29 -0.49 0.34
CVLT_A3 24 54 11.33 2.94 11.5 11.53 3.71 4.00 16.0 12.00 -0.38 -0.63 0.40
CVLT_A4 25 54 12.17 3.18 13.0 12.59 2.97 0.00 16.0 16.00 -1.27 2.37 0.43
CVLT_A5 26 54 12.44 3.28 14.0 13.09 2.97 1.00 16.0 15.00 -1.21 1.31 0.45
CVLT 27 54 51.93 11.73 52.5 53.09 12.60 22.00 70.0 48.00 -0.55 -0.37 1.60
CVLT_rep 28 54 7.46 5.77 6.0 6.35 4.45 0.00 27.0 27.00 1.38 1.80 0.79
MMSE_MST 29 54 53.37 9.83 53.0 53.59 9.64 31.00 81.0 50.00 0.01 0.08 1.34
VolCereb 30 54 1504.43 70.44 1514.5 1514.45 55.60 1291.00 1643.0 352.00 -0.82 0.63 9.59
VolCinz 31 54 890.75 52.32 893.0 894.04 49.67 766.00 995.0 229.00 -0.32 -0.36 7.12
LesoesFlair 32 54 9.70 8.55 6.7 7.50 5.04 1.20 38.9 37.70 1.48 1.63 1.16
LesoesImpreg 33 54 0.01 0.07 0.0 0.00 0.00 0.00 0.4 0.40 4.57 21.35 0.01
# Banco de dados do estudo de Esclerose Múltipla
kable(psych::describe(dplyr::select_if(EM, is.numeric), tr=.2), digits = 2)
vars n mean sd median trimmed mad min max range skew kurtosis se
Idade 1 256 40.68 11.88 40.0 40.03 13.34 17 72 55 0.28 -0.65 0.74
Escolaridade 2 256 12.51 4.15 12.0 12.61 3.71 1 28 27 0.07 0.96 0.26
HADS_D 3 256 6.00 3.75 5.0 5.56 2.97 0 19 19 0.79 0.43 0.23
HADS_A 4 256 7.20 4.21 6.0 6.62 2.97 0 21 21 0.77 0.32 0.26
SD_escr 5 256 34.16 15.56 33.5 33.99 15.57 0 77 77 0.12 -0.21 0.97
SD_oral 6 256 37.57 15.45 38.0 37.73 16.31 0 77 77 0.03 -0.41 0.97
EDSS 7 171 3.05 2.27 3.0 2.88 2.97 0 8 8 0.32 -1.05 0.17
A1 8 238 6.06 1.99 6.0 5.97 1.48 1 13 12 0.37 0.54 0.13
A5 9 238 11.21 3.13 11.0 11.40 4.45 1 16 15 -0.32 -0.64 0.20
A1_A5 10 238 45.16 14.16 47.0 46.13 14.83 11 75 64 -0.32 -0.57 0.92
BVMT_1 11 224 3.89 2.92 3.0 3.46 2.97 0 12 12 0.72 -0.23 0.20
BVMT_2 12 224 5.84 3.61 5.0 5.57 4.45 0 12 12 0.28 -1.11 0.24
BVMT_3 13 224 6.78 3.90 6.0 6.85 5.93 0 12 12 -0.02 -1.33 0.26
BVMT_total 14 224 16.48 9.78 15.5 15.85 12.60 1 36 35 0.27 -1.10 0.65
# Banco de dados do estudo Caso-Controle
kable(psych::describe(dplyr::select_if(caso, is.numeric), tr=.2), digits = 2)
vars n mean sd median trimmed mad min max range skew kurtosis se
Idade 1 26 30.46 4.81 31.00 30.31 2.97 21.00 45.00 24.00 0.66 1.85 0.94
Escolaridade 2 26 14.92 3.19 15.00 14.75 4.45 8.00 20.00 12.00 -0.05 -1.02 0.62
EDSS 3 26 0.54 1.09 0.00 0.12 0.00 0.00 4.50 4.50 2.15 4.28 0.21
Grupo 4 26 0.54 0.51 1.00 0.56 0.00 0.00 1.00 1.00 -0.15 -2.05 0.10
A1 5 26 7.19 2.10 7.00 7.00 2.97 4.00 12.00 8.00 0.43 -0.96 0.41
A2 6 26 10.31 2.00 10.00 10.00 1.48 7.00 15.00 8.00 0.72 -0.22 0.39
A3 7 26 11.88 2.52 12.00 12.06 2.97 5.00 16.00 11.00 -0.51 0.16 0.49
A4 8 26 12.19 3.41 13.00 12.81 2.97 0.00 16.00 16.00 -1.73 3.74 0.67
A5 9 26 12.92 3.35 14.00 13.56 2.97 1.00 16.00 15.00 -1.72 3.58 0.66
Total_A1_A5 10 26 54.50 11.28 55.50 54.94 9.64 23.00 75.00 52.00 -0.62 0.62 2.21
CVLT _rep 11 26 5.58 5.52 4.00 4.56 5.93 0.00 20.00 20.00 1.02 0.28 1.08
BVMT_T1 12 26 4.81 4.22 2.00 4.12 1.48 0.00 12.00 12.00 0.48 -1.57 0.83
BVMT_T2 13 26 6.96 4.09 6.00 6.81 4.45 0.00 12.00 12.00 0.09 -1.68 0.80
BVMT_T3 14 26 7.88 3.59 7.00 8.00 4.45 1.00 12.00 11.00 -0.08 -1.56 0.70
BVMT 15 26 19.65 11.54 15.50 18.94 11.86 1.00 36.00 35.00 0.15 -1.69 2.26
SD_escr 16 26 49.81 12.62 51.00 51.50 12.60 23.00 68.00 45.00 -0.57 -0.72 2.48
SD_oral 17 26 52.69 13.67 52.50 53.69 17.05 23.00 75.00 52.00 -0.38 -0.72 2.68
Oral_Escrito 18 26 2.88 5.46 1.50 1.88 3.71 -8.00 19.00 27.00 1.01 1.30 1.07
HADS_A 19 26 6.54 3.85 5.00 5.94 2.97 1.00 14.00 13.00 0.57 -1.01 0.75
HADS_D 20 26 4.27 4.11 3.00 3.12 2.97 0.00 15.00 15.00 1.29 0.59 0.81
HPT9_RH 21 26 23.64 5.81 21.30 22.44 4.89 17.91 39.00 21.09 1.13 0.46 1.14
HPT9_LH 22 26 24.67 6.82 22.20 23.35 4.41 18.00 51.00 33.00 2.20 5.89 1.34
HPT9_total 23 26 24.15 6.04 21.34 23.02 4.00 18.00 45.00 27.00 1.62 2.97 1.18
VolCereb 24 14 1540.73 32.58 1534.00 1536.22 33.58 1503.00 1603.00 100.00 0.52 -1.16 8.71
VolCinz 25 14 903.11 27.38 905.50 900.77 26.17 860.80 952.00 91.20 0.35 -0.95 7.32
lesoesflair 26 14 10.65 5.39 10.60 10.60 6.52 1.50 19.64 18.14 0.00 -1.28 1.44
lesoesimpreg 27 14 0.04 0.12 0.00 0.00 0.00 0.00 0.40 0.40 2.23 3.72 0.03
# Banco de dados do estudo de Confiabilidade Teste-Reteste
kable(psych::describe(dplyr::select_if(conf, is.numeric), tr=.2), digits = 2)
vars n mean sd median trimmed mad min max range skew kurtosis se
Idade 1 98 60.87 13.44 62.0 61.80 8.90 22 92 70 -0.50 0.60 1.36
Escolaridade 2 98 9.76 5.36 11.0 9.43 7.41 1 27 26 0.34 -0.34 0.54
SD_escr_teste 3 98 29.42 13.28 29.5 29.02 14.83 5 69 64 0.25 -0.53 1.34
SD_oral_teste 4 98 31.62 14.93 32.0 30.88 16.31 2 70 68 0.35 -0.36 1.51
SD_escr_reteste 5 91 31.34 14.47 32.0 31.49 17.79 0 70 70 0.01 -0.65 1.52
SD_oral_reteste 6 91 33.56 14.52 34.0 33.53 16.31 0 68 68 0.01 -0.53 1.52
alter_escr 7 75 23.48 11.15 23.0 23.00 13.34 0 47 47 0.15 -0.83 1.29
altern_oral 8 75 25.40 11.28 25.0 25.00 13.34 4 50 46 0.16 -0.74 1.30

Imputacao Ress

library(mice)
imp <- mice(Ress)
## 
##  iter imp variable
##   1   1  TempoDoenca*
##   1   2  TempoDoenca*
##   1   3  TempoDoenca*
##   1   4  TempoDoenca*
##   1   5  TempoDoenca*
##   2   1  TempoDoenca*
##   2   2  TempoDoenca*
##   2   3  TempoDoenca*
##   2   4  TempoDoenca*
##   2   5  TempoDoenca*
##   3   1  TempoDoenca*
##   3   2  TempoDoenca*
##   3   3  TempoDoenca*
##   3   4  TempoDoenca*
##   3   5  TempoDoenca*
##   4   1  TempoDoenca*
##   4   2  TempoDoenca*
##   4   3  TempoDoenca*
##   4   4  TempoDoenca*
##   4   5  TempoDoenca*
##   5   1  TempoDoenca*
##   5   2  TempoDoenca*
##   5   3  TempoDoenca*
##   5   4  TempoDoenca*
##   5   5  TempoDoenca*
compl <- complete(imp)
sum(is.na(compl))
## [1] 0
Ress <- compl

# Banco de dados do estudo com Ressonância Magnética IMPUTADO
kable(psych::describe(dplyr::select_if(Ress, is.numeric), tr=.2), digits = 2)
vars n mean sd median trimmed mad min max range skew kurtosis se
Idade 1 54 36.28 9.22 34.5 35.21 6.67 18.00 61.0 43.00 0.67 0.28 1.25
Escolaridade 2 54 13.80 4.54 13.5 13.68 3.71 4.00 28.0 24.00 0.31 0.70 0.62
Comorbidades 3 54 0.17 0.38 0.0 0.00 0.00 0.00 1.0 1.00 1.74 1.05 0.05
Medicamentos 4 54 0.81 0.39 1.0 1.00 0.00 0.00 1.0 1.00 -1.58 0.49 0.05
TempoDoenca 5 54 6.88 6.34 5.5 5.57 6.15 0.25 32.0 31.75 1.50 2.81 0.86
EDSS 6 54 1.56 2.04 1.0 0.94 1.48 0.00 7.5 7.50 1.27 0.57 0.28
Passos25 7 54 7.26 2.56 7.0 6.76 2.97 4.00 15.0 11.00 1.47 2.21 0.35
pinosdir9 8 54 28.81 6.61 27.0 27.56 5.93 18.00 45.0 27.00 0.84 -0.21 0.90
pinosesq9 9 54 30.81 9.34 28.0 28.85 5.93 19.00 70.0 51.00 1.96 4.68 1.27
MedicamentoEM 10 54 0.78 0.42 1.0 0.94 0.00 0.00 1.0 1.00 -1.30 -0.32 0.06
VitD 11 54 62.73 29.98 55.5 63.23 37.06 7.00 100.0 93.00 0.09 -1.52 4.08
FSS 12 54 35.74 16.72 35.5 35.47 17.79 9.00 63.0 54.00 0.04 -1.14 2.27
HADS_A 13 54 6.43 3.97 5.5 6.09 3.71 0.00 16.0 16.00 0.44 -0.74 0.54
HADS_D 14 54 4.50 3.96 3.0 3.59 2.97 0.00 15.0 15.00 1.26 0.86 0.54
SF36 15 54 91.65 11.74 94.0 93.88 7.41 53.00 105.0 52.00 -1.86 3.42 1.60
SD_escr 16 54 44.26 15.77 47.5 43.59 21.50 11.00 77.0 66.00 0.09 -1.04 2.15
SD_oral 17 54 46.30 15.44 49.0 46.59 15.57 12.00 77.0 65.00 -0.11 -0.89 2.10
BVMT_T1 18 54 2.11 1.83 2.0 1.82 1.48 0.00 9.0 9.00 1.36 2.38 0.25
BVMT_T2 19 54 4.00 2.56 3.0 3.79 2.97 0.00 12.0 12.00 0.85 0.72 0.35
BVMT_T3 20 54 4.80 2.72 5.0 4.76 1.48 0.00 12.0 12.00 0.50 0.41 0.37
BVMT 21 54 10.91 6.65 10.0 10.53 7.41 0.00 33.0 33.00 0.88 1.26 0.91
CVLT_A1 22 54 6.30 1.66 6.0 6.00 1.48 4.00 10.0 6.00 0.59 -0.84 0.23
CVLT_A2 23 54 9.69 2.48 10.0 9.76 2.97 4.00 14.0 10.00 -0.29 -0.49 0.34
CVLT_A3 24 54 11.33 2.94 11.5 11.53 3.71 4.00 16.0 12.00 -0.38 -0.63 0.40
CVLT_A4 25 54 12.17 3.18 13.0 12.59 2.97 0.00 16.0 16.00 -1.27 2.37 0.43
CVLT_A5 26 54 12.44 3.28 14.0 13.09 2.97 1.00 16.0 15.00 -1.21 1.31 0.45
CVLT 27 54 51.93 11.73 52.5 53.09 12.60 22.00 70.0 48.00 -0.55 -0.37 1.60
CVLT_rep 28 54 7.46 5.77 6.0 6.35 4.45 0.00 27.0 27.00 1.38 1.80 0.79
MMSE_MST 29 54 53.37 9.83 53.0 53.59 9.64 31.00 81.0 50.00 0.01 0.08 1.34
VolCereb 30 54 1504.43 70.44 1514.5 1514.45 55.60 1291.00 1643.0 352.00 -0.82 0.63 9.59
VolCinz 31 54 890.75 52.32 893.0 894.04 49.67 766.00 995.0 229.00 -0.32 -0.36 7.12
LesoesFlair 32 54 9.70 8.55 6.7 7.50 5.04 1.20 38.9 37.70 1.48 1.63 1.16
LesoesImpreg 33 54 0.01 0.07 0.0 0.00 0.00 0.00 0.4 0.40 4.57 21.35 0.01

Medidas clínicas

Análise de cluster RM

library(cluster)
library(tidyverse)
# Seleciona variáveis do banco de dados da Ressonância
#dados2 <- na.omit(Ress[c(1,3, 8:15, 17:38)])
dados2 <- Ress %>% select(TempoDoenca:pinosesq9, VitD, FSS:SD_oral, BVMT, CVLT, MMSE_MST, VolCinz,LesoesFlair, grupos, grupos2, Idade, Escolaridade)
# http://gradientdescending.com/unsupervised-random-forest-example/
suppressPackageStartupMessages(library(randomForest))
suppressPackageStartupMessages(library(caret))
suppressPackageStartupMessages(library(cluster))
suppressPackageStartupMessages(library(RColorBrewer))

# set colours
myColRamp <- colorRampPalette(colors = c("#5DBCD2", "#FF80AA"))

# random forest model
set.seed(3984)
rf2 <- randomForest(x = dados2[c(1,2,4,5,6,16,17)], y = NULL, mtry = 3, 
                    ntree = 10000, proximity = TRUE, oob.prox = TRUE)
rf2
## 
## Call:
##  randomForest(x = dados2[c(1, 2, 4, 5, 6, 16, 17)], y = NULL,      ntree = 10000, mtry = 3, proximity = TRUE, oob.prox = TRUE) 
##                Type of random forest: unsupervised
##                      Number of trees: 10000
## No. of variables tried at each split: 3
# PAM method
prox <- rf2$proximity
pam.rf <- pam(prox, 2)
dados2$clustersRF <- pam.rf$cluster

dados2$clustersRF <- ifelse(dados2$clustersRF == 1, "Cluster1", "Cluster2")
dados2$clustersRF <- as.factor(dados2$clustersRF)

table(dados2$clustersRF)
## 
## Cluster1 Cluster2 
##       35       19
prop.table(table(dados2$clustersRF))
## 
##  Cluster1  Cluster2 
## 0.6481481 0.3518519
# Tabela EDSS ≤ 1.5 e EDSS ≤ 2.5
table(dados2$grupos, dados2$grupos2)
##       
##        MS RRMS
##   EM   34    0
##   EMRR  8   12
# Tabela Random Forest Clustering e EDSS ≤ 1.5
table(dados2$clustersRF, dados2$grupos)
##           
##            EM EMRR
##   Cluster1 17   18
##   Cluster2 17    2
# Tabela Random Forest Clustering e EDSS ≤ 2.5
table(dados2$clustersRF, dados2$grupos2)
##           
##            MS RRMS
##   Cluster1 23   12
##   Cluster2 19    0

Análise de Componentes Principais

PCA com os grupos do Random Forest Clustering.

library(factoextra)
library("FactoMineR")

# Compute PCA
BD.pca <- PCA(dados2[c(1:17)], graph = FALSE)

# Use habillage to specify groups for coloring
fviz_pca_ind(BD.pca,
             label = "none", # hide individual labels
             habillage = dados2$clustersRF, # color by groups
             palette = c("#5DBCD2", "#FF80AA"),
             addEllipses = TRUE, ellipse.level=0.8 # Concentration ellipses
)

fviz_pca_biplot(BD.pca, axes = c(1, 2), geom = "point",
                col.ind = "black", col.var = "steelblue", label = "all",
                invisible = "none", repel = F, habillage = dados2$clustersRF, 
                palette = c("#5DBCD2", "#FF80AA"), addEllipses = TRUE, ellipse.level=0.8,
                title = "PCA - Biplot")

Explora dos dados

Sintaxe das estatísticas robustas

## Robust statistics
library(WRS2)

# Function to calculate 20% trimmed mean
tmean <- function(x,tr=.2,na.rm=FALSE,STAND=NULL){
  if(na.rm)x<-x[!is.na(x)]
  val<-mean(x,tr)
  val
}

# Function to calculate 20% trimmed standard deviation (SD)
sd_trim <- function(x,trim=0.2, const=TRUE){
  # trimmed sd, where x is a matrix (column-wise)
  x <- as.matrix(x)
  if (const){
    if (trim==0.1){const <- 0.7892}
    else if (trim==0.2){const <- 0.6615}
    else {warning("Did you specify the correct consistency constant for trimming?")}
  }
  else{const <- 1}
  m <- apply(x,2,mean,trim)
  res <- x-rep(1,nrow(x))%*%t(m)
  qu <- apply(abs(res),2,quantile,1-trim)
  sdtrim <- apply(matrix(res[t(abs(t(res))<=qu)]^2,ncol=ncol(x),byrow=FALSE),2,sum)
  sdtrim <- sqrt(sdtrim/((nrow(x)*(1-trim)-1)))/const
  return(sdtrim)
}

Resultados robustos

# Descricao geral
psych::describe(dados2, tr = .2)
##              vars  n   mean    sd median trimmed   mad    min   max  range
## TempoDoenca     1 54   6.88  6.34    5.5    5.57  6.15   0.25  32.0  31.75
## EDSS            2 54   1.56  2.04    1.0    0.94  1.48   0.00   7.5   7.50
## Passos25        3 54   7.26  2.56    7.0    6.76  2.97   4.00  15.0  11.00
## pinosdir9       4 54  28.81  6.61   27.0   27.56  5.93  18.00  45.0  27.00
## pinosesq9       5 54  30.81  9.34   28.0   28.85  5.93  19.00  70.0  51.00
## VitD            6 54  62.73 29.98   55.5   63.23 37.06   7.00 100.0  93.00
## FSS             7 54  35.74 16.72   35.5   35.47 17.79   9.00  63.0  54.00
## HADS_A          8 54   6.43  3.97    5.5    6.09  3.71   0.00  16.0  16.00
## HADS_D          9 54   4.50  3.96    3.0    3.59  2.97   0.00  15.0  15.00
## SF36           10 54  91.65 11.74   94.0   93.88  7.41  53.00 105.0  52.00
## SD_escr        11 54  44.26 15.77   47.5   43.59 21.50  11.00  77.0  66.00
## SD_oral        12 54  46.30 15.44   49.0   46.59 15.57  12.00  77.0  65.00
## BVMT           13 54  10.91  6.65   10.0   10.53  7.41   0.00  33.0  33.00
## CVLT           14 54  51.93 11.73   52.5   53.09 12.60  22.00  70.0  48.00
## MMSE_MST       15 54  53.37  9.83   53.0   53.59  9.64  31.00  81.0  50.00
## VolCinz        16 54 890.75 52.32  893.0  894.04 49.67 766.00 995.0 229.00
## LesoesFlair    17 54   9.70  8.55    6.7    7.50  5.04   1.20  38.9  37.70
## grupos*        18 54   1.37  0.49    1.0    1.29  0.00   1.00   2.0   1.00
## grupos2*       19 54   1.22  0.42    1.0    1.06  0.00   1.00   2.0   1.00
## Idade          20 54  36.28  9.22   34.5   35.21  6.67  18.00  61.0  43.00
## Escolaridade   21 54  13.80  4.54   13.5   13.68  3.71   4.00  28.0  24.00
## clustersRF*    22 54   1.35  0.48    1.0    1.26  0.00   1.00   2.0   1.00
##               skew kurtosis   se
## TempoDoenca   1.50     2.81 0.86
## EDSS          1.27     0.57 0.28
## Passos25      1.47     2.21 0.35
## pinosdir9     0.84    -0.21 0.90
## pinosesq9     1.96     4.68 1.27
## VitD          0.09    -1.52 4.08
## FSS           0.04    -1.14 2.27
## HADS_A        0.44    -0.74 0.54
## HADS_D        1.26     0.86 0.54
## SF36         -1.86     3.42 1.60
## SD_escr       0.09    -1.04 2.15
## SD_oral      -0.11    -0.89 2.10
## BVMT          0.88     1.26 0.91
## CVLT         -0.55    -0.37 1.60
## MMSE_MST      0.01     0.08 1.34
## VolCinz      -0.32    -0.36 7.12
## LesoesFlair   1.48     1.63 1.16
## grupos*       0.52    -1.76 0.07
## grupos2*      1.30    -0.32 0.06
## Idade         0.67     0.28 1.25
## Escolaridade  0.31     0.70 0.62
## clustersRF*   0.60    -1.67 0.07
# Amostra total (com Estatística Robusta)
media <- round(sapply(dados2[c(1:17,20,21)], function(i) tmean(i)), 2)
dp <- round(sapply(dados2[c(1:17,20,21)], function(i) sd(i)), 2)
total <- cbind(media, dp)
total
##               media    dp
## TempoDoenca    5.57  6.34
## EDSS           0.94  2.04
## Passos25       6.76  2.56
## pinosdir9     27.56  6.61
## pinosesq9     28.85  9.34
## VitD          63.23 29.98
## FSS           35.47 16.72
## HADS_A         6.09  3.97
## HADS_D         3.59  3.96
## SF36          93.88 11.74
## SD_escr       43.59 15.77
## SD_oral       46.59 15.44
## BVMT          10.53  6.65
## CVLT          53.09 11.73
## MMSE_MST      53.59  9.83
## VolCinz      894.04 52.32
## LesoesFlair    7.50  8.55
## Idade         35.21  9.22
## Escolaridade  13.68  4.54
# Descrição dos grupos Random Forest com Estatística Robusta
psych::describeBy(dados2, dados2$clustersRF, tr = .2)
## 
##  Descriptive statistics by group 
## group: Cluster1
##              vars  n   mean    sd median trimmed   mad   min   max range  skew
## TempoDoenca     1 35   8.68  6.89    7.0    7.62  5.93   0.6  32.0  31.4  1.17
## EDSS            2 35   2.23  2.22    2.0    1.76  2.97   0.0   7.5   7.5  0.76
## Passos25        3 35   7.31  2.64    7.0    6.81  2.97   4.0  15.0  11.0  1.35
## pinosdir9       4 35  31.63  6.44   31.0   30.67  7.41  23.0  45.0  22.0  0.54
## pinosesq9       5 35  33.83 10.19   31.0   31.57  7.41  21.0  70.0  49.0  1.61
## VitD            6 35  54.68 28.72   45.0   51.01 26.69   7.0  99.0  92.0  0.44
## FSS             7 35  39.46 16.69   41.0   40.86 16.31   9.0  63.0  54.0 -0.36
## HADS_A          8 35   6.71  4.06    5.0    6.24  4.45   1.0  16.0  15.0  0.53
## HADS_D          9 35   5.83  4.20    4.0    4.95  2.97   1.0  15.0  14.0  0.91
## SF36           10 35  91.14 11.39   92.0   92.57  5.93  53.0 105.0  52.0 -1.68
## SD_escr        11 35  37.20 12.08   33.0   36.19 10.38  11.0  68.0  57.0  0.38
## SD_oral        12 35  39.89 12.42   40.0   39.81 16.31  12.0  68.0  56.0  0.03
## BVMT           13 35   9.63  6.53    9.0    8.81  5.93   0.0  29.0  29.0  0.87
## CVLT           14 35  49.14 11.75   50.0   49.95 14.83  22.0  68.0  46.0 -0.46
## MMSE_MST       15 35  49.86  9.09   51.0   49.76  8.90  31.0  70.0  39.0  0.00
## VolCinz        16 35 870.29 50.23  873.0  871.44 47.44 766.0 980.0 214.0 -0.03
## LesoesFlair    17 35  12.78  9.04    9.1   10.48  5.63   2.3  38.9  36.6  1.12
## grupos*        18 35   1.51  0.51    2.0    1.52  0.00   1.0   2.0   1.0 -0.05
## grupos2*       19 35   1.34  0.48    1.0    1.24  0.00   1.0   2.0   1.0  0.63
## Idade          20 35  39.09  9.53   38.0   37.76 10.38  19.0  61.0  42.0  0.49
## Escolaridade   21 35  13.17  5.21   12.0   12.86  4.45   4.0  28.0  24.0  0.58
## clustersRF*    22 35   1.00  0.00    1.0    1.00  0.00   1.0   1.0   0.0   NaN
##              kurtosis   se
## TempoDoenca      1.62 1.16
## EDSS            -0.63 0.37
## Passos25         1.68 0.45
## pinosdir9       -0.95 1.09
## pinosesq9        2.78 1.72
## VitD            -1.15 4.85
## FSS             -0.95 2.82
## HADS_A          -0.65 0.69
## HADS_D          -0.27 0.71
## SF36             3.53 1.93
## SD_escr         -0.39 2.04
## SD_oral         -0.64 2.10
## BVMT             0.61 1.10
## CVLT            -0.55 1.99
## MMSE_MST        -0.34 1.54
## VolCinz         -0.39 8.49
## LesoesFlair      0.33 1.53
## grupos*         -2.05 0.09
## grupos2*        -1.64 0.08
## Idade           -0.33 1.61
## Escolaridade     0.39 0.88
## clustersRF*       NaN 0.00
## ------------------------------------------------------------ 
## group: Cluster2
##              vars  n   mean    sd median trimmed   mad    min   max  range
## TempoDoenca     1 19   3.57  3.28    2.0    2.95  2.08   0.25  13.0  12.75
## EDSS            2 19   0.32  0.67    0.0    0.08  0.00   0.00   2.0   2.00
## Passos25        3 19   7.16  2.46    7.0    6.69  1.48   5.00  15.0  10.00
## pinosdir9       4 19  23.63  2.54   24.0   23.92  2.97  18.00  27.0   9.00
## pinosesq9       5 19  25.26  3.25   25.0   25.23  2.97  19.00  32.0  13.00
## VitD            6 19  77.56 27.02   99.0   82.13  0.00  30.00 100.0  70.00
## FSS             7 19  28.89 14.84   25.0   26.85 14.83   9.00  63.0  54.00
## HADS_A          8 19   5.89  3.86    6.0    5.77  4.45   0.00  12.0  12.00
## HADS_D          9 19   2.05  1.75    1.0    1.85  1.48   0.00   6.0   6.00
## SF36           10 19  92.58 12.62   96.0   95.92  4.45  56.00 104.0  48.00
## SD_escr        11 19  57.26 13.44   59.0   59.00  8.90  19.00  77.0  58.00
## SD_oral        12 19  58.11 13.55   59.0   59.92  8.90  19.00  77.0  58.00
## BVMT           13 19  13.26  6.38   14.0   13.08  4.45   3.00  33.0  30.00
## CVLT           14 19  57.05 10.09   59.0   58.15 10.38  33.00  70.0  37.00
## MMSE_MST       15 19  59.84  7.76   59.0   59.77  5.93  47.00  81.0  34.00
## VolCinz        16 19 928.42 31.35  927.0  926.92 35.58 872.00 995.0 123.00
## LesoesFlair    17 19   4.02  2.90    3.5    3.33  2.82   1.20  11.7  10.50
## grupos*        18 19   1.11  0.32    1.0    1.00  0.00   1.00   2.0   1.00
## grupos2*       19 19   1.00  0.00    1.0    1.00  0.00   1.00   1.0   0.00
## Idade          20 19  31.11  5.91   32.0   31.31  5.93  18.00  42.0  24.00
## Escolaridade   21 19  14.95  2.72   16.0   15.00  2.97  11.00  20.0   9.00
## clustersRF*    22 19   2.00  0.00    2.0    2.00  0.00   2.00   2.0   0.00
##               skew kurtosis   se
## TempoDoenca   1.24     1.07 0.75
## EDSS          1.69     1.33 0.15
## Passos25      1.60     2.79 0.56
## pinosdir9    -0.55    -0.77 0.58
## pinosesq9     0.08    -0.56 0.74
## VitD         -0.51    -1.60 6.20
## FSS           0.80    -0.27 3.41
## HADS_A        0.19    -1.50 0.88
## HADS_D        0.69    -0.68 0.40
## SF36         -2.00     2.86 2.89
## SD_escr      -1.21     1.43 3.08
## SD_oral      -1.30     1.63 3.11
## BVMT          1.15     2.42 1.46
## CVLT         -0.63    -0.52 2.31
## MMSE_MST      0.58     0.83 1.78
## VolCinz       0.27    -0.60 7.19
## LesoesFlair   1.23     0.58 0.66
## grupos*       2.37     3.84 0.07
## grupos2*       NaN      NaN 0.00
## Idade        -0.25    -0.49 1.35
## Escolaridade -0.12    -1.18 0.62
## clustersRF*    NaN      NaN 0.00
# YUEN robust t-test
clusterRF.p <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$clustersRF)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
clusterRF.d <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$clustersRF)$effsize), 3)
# Print results
cbind(clusterRF.p, clusterRF.d)
##              clusterRF.p clusterRF.d
## TempoDoenca        0.003       0.596
## EDSS               0.003       0.778
## Passos25           0.858       0.097
## pinosdir9          0.000       0.869
## pinosesq9          0.000       0.778
## VitD               0.011       0.492
## FSS                0.009       0.519
## HADS_A             0.740       0.106
## HADS_D             0.002       0.698
## SF36               0.084       0.343
## SD_escr            0.000       0.798
## SD_oral            0.000       0.770
## BVMT               0.010       0.434
## CVLT               0.025       0.480
## MMSE_MST           0.000       0.709
## VolCinz            0.000       0.819
## LesoesFlair        0.000       0.872
## Idade              0.004       0.639
## Escolaridade       0.060       0.408
# Descricao dos grupos EM e EMRR (EDSS ≤ 1.5)
psych::describeBy(dados2, dados2$grupos, tr = .2)
## 
##  Descriptive statistics by group 
## group: EM
##              vars  n   mean    sd median trimmed   mad    min   max  range
## TempoDoenca     1 34   5.35  4.41    5.0    4.61  5.41   0.25  15.0  14.75
## EDSS            2 34   0.26  0.45    0.0    0.14  0.00   0.00   1.0   1.00
## Passos25        3 34   6.76  2.24    6.0    6.32  1.48   4.00  15.0  11.00
## pinosdir9       4 34  25.91  3.73   26.0   25.55  2.22  20.00  39.0  19.00
## pinosesq9       5 34  27.12  4.40   27.0   27.00  4.45  19.00  40.0  21.00
## VitD            6 34  64.43 29.36   63.0   66.08 43.07   7.00 100.0  93.00
## FSS             7 34  31.00 15.89   30.0   29.59 15.57   9.00  63.0  54.00
## HADS_A          8 34   5.62  3.86    5.0    5.18  4.45   0.00  14.0  14.00
## HADS_D          9 34   3.41  3.46    3.0    2.55  2.22   0.00  15.0  15.00
## SF36           10 34  93.41 10.85   96.0   95.55  6.67  53.00 105.0  52.00
## SD_escr        11 34  49.56 15.71   51.0   50.55 17.79  19.00  77.0  58.00
## SD_oral        12 34  50.62 15.45   53.0   51.95 17.79  19.00  77.0  58.00
## BVMT           13 34  11.59  6.16   11.5   11.68  6.67   0.00  33.0  33.00
## CVLT           14 34  52.35 11.65   53.0   53.41 11.86  23.00  70.0  47.00
## MMSE_MST       15 34  56.03  9.97   57.0   56.55  9.64  32.00  81.0  49.00
## VolCinz        16 34 896.30 47.68  899.0  899.20 41.51 787.00 995.0 208.00
## LesoesFlair    17 34   7.80  7.24    5.6    5.93  4.74   1.20  30.9  29.70
## grupos*        18 34   1.00  0.00    1.0    1.00  0.00   1.00   1.0   0.00
## grupos2*       19 34   1.00  0.00    1.0    1.00  0.00   1.00   1.0   0.00
## Idade          20 34  34.12  7.45   33.0   33.64  6.67  18.00  55.0  37.00
## Escolaridade   21 34  14.38  4.52   15.0   14.27  4.45   4.00  28.0  24.00
## clustersRF*    22 34   1.50  0.51    1.5    1.50  0.74   1.00   2.0   1.00
##               skew kurtosis   se
## TempoDoenca   0.61    -0.90 0.76
## EDSS          1.02    -0.99 0.08
## Passos25      1.49     2.92 0.38
## pinosdir9     1.22     2.63 0.64
## pinosesq9     0.51     0.38 0.75
## VitD         -0.07    -1.32 5.03
## FSS           0.39    -0.86 2.73
## HADS_A        0.54    -0.74 0.66
## HADS_D        1.93     3.62 0.59
## SF36         -2.20     5.22 1.86
## SD_escr      -0.27    -1.11 2.69
## SD_oral      -0.35    -0.93 2.65
## BVMT          0.85     2.23 1.06
## CVLT         -0.51    -0.44 2.00
## MMSE_MST     -0.17     0.13 1.71
## VolCinz      -0.33    -0.13 8.18
## LesoesFlair   1.64     2.22 1.24
## grupos*        NaN      NaN 0.00
## grupos2*       NaN      NaN 0.00
## Idade         0.46     0.37 1.28
## Escolaridade  0.43     1.14 0.78
## clustersRF*   0.00    -2.06 0.09
## ------------------------------------------------------------ 
## group: EMRR
##              vars  n   mean    sd median trimmed   mad   min   max range  skew
## TempoDoenca     1 20   9.48  8.18   6.00    8.00  5.93   0.6  32.0  31.4  1.06
## EDSS            2 20   3.75  1.79   3.00    3.42  1.48   2.0   7.5   5.5  0.59
## Passos25        3 20   8.10  2.88   8.00    7.50  1.48   5.0  15.0  10.0  1.25
## pinosdir9       4 20  33.75  7.55  34.00   34.17  8.90  18.0  45.0  27.0 -0.30
## pinosesq9       5 20  37.10 11.99  35.00   34.58 11.12  24.0  70.0  46.0  1.08
## VitD            6 20  59.85 31.56  42.00   57.77 22.46  22.0  99.0  77.0  0.33
## FSS             7 20  43.80 15.24  45.00   45.50 20.02   9.0  63.0  54.0 -0.52
## HADS_A          8 20   7.80  3.87   7.50    7.50  5.19   2.0  16.0  14.0  0.38
## HADS_D          9 20   6.35  4.16   5.00    5.92  4.45   1.0  15.0  14.0  0.63
## SF36           10 20  88.65 12.84  90.50   90.67  7.41  56.0 105.0  49.0 -1.38
## SD_escr        11 20  35.25 11.35  32.50   34.50  6.67  11.0  53.0  42.0  0.04
## SD_oral        12 20  38.95 12.66  39.50   39.08 15.57  12.0  58.0  46.0 -0.17
## BVMT           13 20   9.75  7.45   8.50    8.42  5.93   1.0  29.0  28.0  1.01
## CVLT           14 20  51.20 12.14  51.00   52.50 16.31  22.0  67.0  45.0 -0.56
## MMSE_MST       15 20  48.85  7.93  51.00   49.42  5.93  31.0  65.0  34.0 -0.28
## VolCinz        16 20 881.30 59.47 885.00  881.75 67.46 766.0 980.0 214.0 -0.15
## LesoesFlair    17 20  12.93  9.77   9.05   10.34  6.60   3.5  38.9  35.4  1.12
## grupos*        18 20   1.00  0.00   1.00    1.00  0.00   1.0   1.0   0.0   NaN
## grupos2*       19 20   1.60  0.50   2.00    1.67  0.00   1.0   2.0   1.0 -0.38
## Idade          20 20  39.95 10.87  37.00   38.67  8.90  19.0  61.0  42.0  0.33
## Escolaridade   21 20  12.80  4.53  11.50   12.58  3.71   4.0  22.0  18.0  0.12
## clustersRF*    22 20   1.10  0.31   1.00    1.00  0.00   1.0   2.0   1.0  2.47
##              kurtosis    se
## TempoDoenca      0.43  1.83
## EDSS            -1.14  0.40
## Passos25         0.70  0.64
## pinosdir9       -0.97  1.69
## pinosesq9        0.55  2.68
## VitD            -1.83  7.06
## FSS             -0.76  3.41
## HADS_A          -0.98  0.87
## HADS_D          -0.60  0.93
## SF36             1.48  2.87
## SD_escr         -0.84  2.54
## SD_oral         -0.90  2.83
## BVMT             0.31  1.67
## CVLT            -0.57  2.71
## MMSE_MST        -0.28  1.77
## VolCinz         -0.92 13.30
## LesoesFlair      0.23  2.18
## grupos*           NaN  0.00
## grupos2*        -1.95  0.11
## Idade           -0.77  2.43
## Escolaridade    -0.55  1.01
## clustersRF*      4.32  0.07
# YUEN robust t-test
grupos.p <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$grupos)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
grupos.d <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$grupos)$effsize), 3)
# Print results
cbind(grupos.p, grupos.d)
##              grupos.p grupos.d
## TempoDoenca     0.182    0.395
## EDSS            0.000    0.922
## Passos25        0.053    0.353
## pinosdir9       0.001    0.759
## pinosesq9       0.013    0.768
## VitD            0.538    0.126
## FSS             0.006    0.564
## HADS_A          0.095    0.350
## HADS_D          0.011    0.616
## SF36            0.043    0.493
## SD_escr         0.002    0.674
## SD_oral         0.013    0.513
## BVMT            0.057    0.368
## CVLT            0.829    0.070
## MMSE_MST        0.008    0.526
## VolCinz         0.321    0.232
## LesoesFlair     0.063    0.413
## Idade           0.132    0.439
## Escolaridade    0.174    0.304
# Descricao dos grupos MS e RRMS (EDSS ≤ 2.5)
psych::describeBy(dados2, dados2$grupos2, tr = .2)
## 
##  Descriptive statistics by group 
## group: MS
##              vars  n   mean    sd median trimmed   mad    min   max  range
## TempoDoenca     1 42   5.73  4.87    5.0    4.78  5.41   0.25  17.0  16.75
## EDSS            2 42   0.62  0.85    0.0    0.35  0.00   0.00   2.5   2.50
## Passos25        3 42   6.88  2.43    6.0    6.35  1.48   4.00  15.0  11.00
## pinosdir9       4 42  26.60  4.52   26.0   25.96  2.97  18.00  39.0  21.00
## pinosesq9       5 42  27.81  5.26   27.0   27.12  4.45  19.00  43.0  24.00
## VitD            6 42  65.54 29.35   63.0   67.03 43.74   7.00 100.0  93.00
## FSS             7 42  32.48 16.15   33.0   31.73 17.79   9.00  63.0  54.00
## HADS_A          8 42   6.10  3.94    5.0    5.69  4.45   0.00  14.0  14.00
## HADS_D          9 42   3.83  3.77    3.0    2.85  2.97   0.00  15.0  15.00
## SF36           10 42  92.95 11.63   96.0   95.38  6.67  53.00 105.0  52.00
## SD_escr        11 42  47.64 15.19   50.0   47.73 18.53  19.00  77.0  58.00
## SD_oral        12 42  49.31 14.73   52.5   50.04 17.79  19.00  77.0  58.00
## BVMT           13 42  11.62  6.48   11.0   11.42  5.93   0.00  33.0  33.00
## CVLT           14 42  51.98 11.89   52.5   53.12 11.86  22.00  70.0  48.00
## MMSE_MST       15 42  54.64 10.48   55.5   55.38 11.12  31.00  81.0  50.00
## VolCinz        16 42 897.03 51.82  902.0  902.13 44.48 766.00 995.0 229.00
## LesoesFlair    17 42   8.62  7.51    5.9    6.75  4.82   1.20  30.9  29.70
## grupos*        18 42   1.19  0.40    1.0    1.00  0.00   1.00   2.0   1.00
## grupos2*       19 42   1.00  0.00    1.0    1.00  0.00   1.00   1.0   0.00
## Idade          20 42  34.50  8.50   33.0   33.62  5.93  18.00  58.0  40.00
## Escolaridade   21 42  14.52  4.58   15.0   14.46  4.45   4.00  28.0  24.00
## clustersRF*    22 42   1.45  0.50    1.0    1.42  0.00   1.00   2.0   1.00
##               skew kurtosis   se
## TempoDoenca   0.78    -0.53 0.75
## EDSS          0.93    -0.65 0.13
## Passos25      1.72     3.28 0.38
## pinosdir9     0.82     0.63 0.70
## pinosesq9     1.00     0.94 0.81
## VitD         -0.06    -1.41 4.53
## FSS           0.20    -1.08 2.49
## HADS_A        0.45    -0.87 0.61
## HADS_D        1.65     2.22 0.58
## SF36         -2.10     4.28 1.79
## SD_escr      -0.05    -1.15 2.34
## SD_oral      -0.19    -0.93 2.27
## BVMT          0.96     1.77 1.00
## CVLT         -0.63    -0.08 1.83
## MMSE_MST     -0.23    -0.05 1.62
## VolCinz      -0.56    -0.12 8.00
## LesoesFlair   1.32     0.96 1.16
## grupos*       1.52     0.32 0.06
## grupos2*       NaN      NaN 0.00
## Idade         0.69     0.56 1.31
## Escolaridade  0.25     0.65 0.71
## clustersRF*   0.18    -2.01 0.08
## ------------------------------------------------------------ 
## group: RRMS
##              vars  n   mean    sd median trimmed   mad   min   max range  skew
## TempoDoenca     1 12  10.88  9.10   8.00    9.25  6.67   0.6  32.0  31.4  0.94
## EDSS            2 12   4.83  1.51   4.75    4.75  1.85   3.0   7.5   4.5  0.15
## Passos25        3 12   8.58  2.64   8.00    8.12  1.48   5.0  15.0  10.0  1.06
## pinosdir9       4 12  36.58  7.05  38.00   37.62  7.41  23.0  45.0  22.0 -0.56
## pinosesq9       5 12  41.33 12.74  36.00   39.50  6.67  26.0  70.0  44.0  0.85
## VitD            6 12  52.92 31.37  37.65   48.88 16.01  22.0  99.0  77.0  0.60
## FSS             7 12  47.17 13.82  45.00   48.38 20.76  23.0  63.0  40.0 -0.27
## HADS_A          8 12   7.58  4.06   7.50    7.38  4.45   2.0  16.0  14.0  0.43
## HADS_D          9 12   6.83  3.86   6.00    6.50  3.71   2.0  15.0  13.0  0.58
## SF36           10 12  87.08 11.43  89.00   88.62  6.67  56.0 104.0  48.0 -1.38
## SD_escr        11 12  32.42 11.87  30.00   31.75  7.41  11.0  53.0  42.0  0.20
## SD_oral        12 12  35.75 13.59  35.00   35.38 13.34  12.0  58.0  46.0  0.04
## BVMT           13 12   8.42  6.92   6.50    7.25  6.67   1.0  24.0  23.0  0.84
## CVLT           14 12  51.75 11.66  55.50   52.38 12.60  35.0  64.0  29.0 -0.19
## MMSE_MST       15 12  48.92  5.32  51.00   49.38  5.93  40.0  55.0  15.0 -0.35
## VolCinz        16 12 868.75 50.01 862.50  866.50 39.29 779.0 980.0 201.0  0.42
## LesoesFlair    17 12  13.47 11.05   9.05   10.74  4.89   4.3  38.9  34.6  1.13
## grupos*        18 12   1.00  0.00   1.00    1.00  0.00   1.0   1.0   0.0   NaN
## grupos2*       19 12   1.00  0.00   1.00    1.00  0.00   1.0   1.0   0.0   NaN
## Idade          20 12  42.50  9.24  40.50   41.00  8.90  32.0  61.0  29.0  0.66
## Escolaridade   21 12  11.25  3.49  11.00   11.38  0.74   4.0  18.0  14.0 -0.18
## clustersRF*    22 12   1.00  0.00   1.00    1.00  0.00   1.0   1.0   0.0   NaN
##              kurtosis    se
## TempoDoenca     -0.19  2.63
## EDSS            -1.42  0.44
## Passos25         0.49  0.76
## pinosdir9       -1.05  2.04
## pinosesq9       -0.42  3.68
## VitD            -1.59  9.06
## FSS             -1.49  3.99
## HADS_A          -0.84  1.17
## HADS_D          -0.85  1.11
## SF36             1.99  3.30
## SD_escr         -0.86  3.43
## SD_oral         -1.17  3.92
## BVMT            -0.35  2.00
## CVLT            -1.91  3.37
## MMSE_MST        -1.56  1.53
## VolCinz          0.02 14.44
## LesoesFlair     -0.21  3.19
## grupos*           NaN  0.00
## grupos2*          NaN  0.00
## Idade           -0.99  2.67
## Escolaridade     0.01  1.01
## clustersRF*       NaN  0.00
# YUEN robust t-test
grupos.p2 <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$grupos2)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
grupos.d2 <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$grupos2)$effsize), 3)
# Print results
cbind(grupos.p2, grupos.d2)
##              grupos.p2 grupos.d2
## TempoDoenca      0.126     0.399
## EDSS             0.000     0.893
## Passos25         0.001     0.505
## pinosdir9        0.001     0.823
## pinosesq9        0.009     0.802
## VitD             0.251     0.265
## FSS              0.020     0.549
## HADS_A           0.277     0.227
## HADS_D           0.021     0.526
## SF36             0.009     0.533
## SD_escr          0.007     0.672
## SD_oral          0.019     0.564
## BVMT             0.080     0.379
## CVLT             0.891     0.083
## MMSE_MST         0.036     0.505
## VolCinz          0.026     0.467
## LesoesFlair      0.319     0.396
## Idade            0.058     0.623
## Escolaridade     0.000     0.548
# Agrupa os resultados
cbind(grupos.p, grupos.d, grupos.p2, grupos.d2, clusterRF.p, clusterRF.d)
##              grupos.p grupos.d grupos.p2 grupos.d2 clusterRF.p clusterRF.d
## TempoDoenca     0.182    0.395     0.126     0.399       0.003       0.596
## EDSS            0.000    0.922     0.000     0.893       0.003       0.778
## Passos25        0.053    0.353     0.001     0.505       0.858       0.097
## pinosdir9       0.001    0.759     0.001     0.823       0.000       0.869
## pinosesq9       0.013    0.768     0.009     0.802       0.000       0.778
## VitD            0.538    0.126     0.251     0.265       0.011       0.492
## FSS             0.006    0.564     0.020     0.549       0.009       0.519
## HADS_A          0.095    0.350     0.277     0.227       0.740       0.106
## HADS_D          0.011    0.616     0.021     0.526       0.002       0.698
## SF36            0.043    0.493     0.009     0.533       0.084       0.343
## SD_escr         0.002    0.674     0.007     0.672       0.000       0.798
## SD_oral         0.013    0.513     0.019     0.564       0.000       0.770
## BVMT            0.057    0.368     0.080     0.379       0.010       0.434
## CVLT            0.829    0.070     0.891     0.083       0.025       0.480
## MMSE_MST        0.008    0.526     0.036     0.505       0.000       0.709
## VolCinz         0.321    0.232     0.026     0.467       0.000       0.819
## LesoesFlair     0.063    0.413     0.319     0.396       0.000       0.872
## Idade           0.132    0.439     0.058     0.623       0.004       0.639
## Escolaridade    0.174    0.304     0.000     0.548       0.060       0.408

Gráficos tradicionais

# https://cran.r-project.org/web/packages/compareGroups/vignettes/compareGroups_vignette.html
library(compareGroups)
descrTable(dados2)
## 
## --------Summary descriptives table ---------
## 
## ___________________________ 
##                 [ALL]    N  
##                 N=54        
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## TempoDoenca  6.88 (6.34) 54 
## EDSS         1.56 (2.04) 54 
## Passos25     7.26 (2.56) 54 
## pinosdir9    28.8 (6.61) 54 
## pinosesq9    30.8 (9.34) 54 
## VitD         62.7 (30.0) 54 
## FSS          35.7 (16.7) 54 
## HADS_A       6.43 (3.97) 54 
## HADS_D       4.50 (3.96) 54 
## SF36         91.6 (11.7) 54 
## SD_escr      44.3 (15.8) 54 
## SD_oral      46.3 (15.4) 54 
## BVMT         10.9 (6.65) 54 
## CVLT         51.9 (11.7) 54 
## MMSE_MST     53.4 (9.83) 54 
## VolCinz      891 (52.3)  54 
## LesoesFlair  9.70 (8.55) 54 
## grupos:                  54 
##     EM       34 (63.0%)     
##     EMRR     20 (37.0%)     
## grupos2:                 54 
##     MS       42 (77.8%)     
##     RRMS     12 (22.2%)     
## Idade        36.3 (9.22) 54 
## Escolaridade 13.8 (4.54) 54 
## clustersRF:              54 
##     Cluster1 35 (64.8%)     
##     Cluster2 19 (35.2%)     
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
res <- compareGroups(clustersRF ~., data = dados2, method = 4)
res
## 
## 
## -------- Summary of results by groups of 'clustersRF'---------
## 
## 
##    var          N  p.value  method                selection
## 1  TempoDoenca  54 0.002**  continuous non-normal ALL      
## 2  EDSS         54 <0.001** continuous non-normal ALL      
## 3  Passos25     54 0.911    continuous non-normal ALL      
## 4  pinosdir9    54 <0.001** continuous non-normal ALL      
## 5  pinosesq9    54 <0.001** continuous non-normal ALL      
## 6  VitD         54 0.007**  continuous non-normal ALL      
## 7  FSS          54 0.023**  continuous non-normal ALL      
## 8  HADS_A       54 0.501    continuous non-normal ALL      
## 9  HADS_D       54 <0.001** continuous non-normal ALL      
## 10 SF36         54 0.198    continuous non-normal ALL      
## 11 SD_escr      54 <0.001** continuous non-normal ALL      
## 12 SD_oral      54 <0.001** continuous non-normal ALL      
## 13 BVMT         54 0.036**  continuous non-normal ALL      
## 14 CVLT         54 0.020**  continuous non-normal ALL      
## 15 MMSE_MST     54 <0.001** continuous non-normal ALL      
## 16 VolCinz      54 <0.001** continuous non-normal ALL      
## 17 LesoesFlair  54 <0.001** continuous non-normal ALL      
## 18 grupos       54 0.007**  categorical           ALL      
## 19 grupos2      54 0.004**  categorical           ALL      
## 20 Idade        54 0.002**  continuous non-normal ALL      
## 21 Escolaridade 54 0.076*   continuous non-normal ALL      
## -----
## Signif. codes:  0 '**' 0.05 '*' 0.1 ' ' 1
summary(res)
## 
##  --- Descriptives of each row-variable by groups of 'clustersRF' ---
## 
## ------------------- 
## row-variable: TempoDoenca 
## 
##          N  med Q1   Q3   lower upper p.overall
## [ALL]    54 5.5 2    10   3     7              
## Cluster1 35 7   3    11.5 5     10    0.002226 
## Cluster2 19 2   1.15 5    1     5              
## 
##      OR       OR.lower OR.upper
## [1,] 0.801355 0.680546 0.943611
## 
## ------------------- 
## row-variable: EDSS 
## 
##          N  med Q1 Q3    lower upper p.overall
## [ALL]    54 1   0  2.375 0     2              
## Cluster1 35 2   0  3.25  1     3     0.000228 
## Cluster2 19 0   0  0     0     0              
## 
##      OR       OR.lower OR.upper
## [1,] 0.344809 0.159441 0.745687
## 
## ------------------- 
## row-variable: Passos25 
## 
##          N  med Q1 Q3 lower upper p.overall
## [ALL]    54 7   5  8  6     8              
## Cluster1 35 7   5  8  5     8     0.911383 
## Cluster2 19 7   5  8  5     8              
## 
##      OR       OR.lower OR.upper
## [1,] 0.975539 0.779626 1.220683
## 
## ------------------- 
## row-variable: pinosdir9 
## 
##          N  med Q1   Q3    lower upper p.overall
## [ALL]    54 27  25   32.75 26    28             
## Cluster1 35 31  27   37    27    33    1e-06    
## Cluster2 19 24  22.5 25.5  22    26             
## 
##      OR      OR.lower OR.upper
## [1,] 0.54475 0.371205 0.79943 
## 
## ------------------- 
## row-variable: pinosesq9 
## 
##          N  med Q1    Q3 lower upper p.overall
## [ALL]    54 28  25.25 34 26    31             
## Cluster1 35 31  27    36 28    35    6.8e-05  
## Cluster2 19 25  24    27 24    27             
## 
##      OR       OR.lower OR.upper
## [1,] 0.743493 0.61489  0.898992
## 
## ------------------- 
## row-variable: VitD 
## 
##          N  med  Q1   Q3 lower upper p.overall
## [ALL]    54 55.5 38   99 42.7  80             
## Cluster1 35 45   36   75 38    63    0.006647 
## Cluster2 19 99   53.5 99 53    99             
## 
##      OR       OR.lower OR.upper
## [1,] 1.028301 1.00686  1.050199
## 
## ------------------- 
## row-variable: FSS 
## 
##          N  med  Q1    Q3 lower upper p.overall
## [ALL]    54 35.5 23.25 47 28    43             
## Cluster1 35 41   31    53 35    47    0.023447 
## Cluster2 19 25   18.5  37 16    38             
## 
##      OR       OR.lower OR.upper
## [1,] 0.959271 0.923711 0.996199
## 
## ------------------- 
## row-variable: HADS_A 
## 
##          N  med Q1   Q3  lower upper p.overall
## [ALL]    54 5.5 3.25 9   5     8              
## Cluster1 35 5   4    9   5     8     0.501106 
## Cluster2 19 6   3    9.5 3     10             
## 
##      OR       OR.lower OR.upper
## [1,] 0.947359 0.818892 1.095979
## 
## ------------------- 
## row-variable: HADS_D 
## 
##          N  med Q1 Q3   lower upper p.overall
## [ALL]    54 3   2  6.75 2     4              
## Cluster1 35 4   3  8    3     7     0.000163 
## Cluster2 19 1   1  3    1     3              
## 
##      OR       OR.lower OR.upper
## [1,] 0.583914 0.405043 0.841775
## 
## ------------------- 
## row-variable: SF36 
## 
##          N  med Q1 Q3   lower upper p.overall
## [ALL]    54 94  89 99   91    96             
## Cluster1 35 92  88 98.5 89    96    0.197896 
## Cluster2 19 96  93 99   93    99             
## 
##      OR       OR.lower OR.upper
## [1,] 1.011162 0.961446 1.063449
## 
## ------------------- 
## row-variable: SD_escr 
## 
##          N  med  Q1   Q3   lower upper p.overall
## [ALL]    54 47.5 30   56.5 33    51             
## Cluster1 35 33   29   48   30    45    5e-06    
## Cluster2 19 59   52.5 65   52    65             
## 
##      OR       OR.lower OR.upper
## [1,] 1.127098 1.057069 1.201765
## 
## ------------------- 
## row-variable: SD_oral 
## 
##          N  med Q1   Q3   lower upper p.overall
## [ALL]    54 49  35   57   40    54             
## Cluster1 35 40  29   49.5 35    48    1.3e-05  
## Cluster2 19 59  54.5 65   54    65             
## 
##      OR       OR.lower OR.upper
## [1,] 1.121607 1.05119  1.196741
## 
## ------------------- 
## row-variable: BVMT 
## 
##          N  med Q1   Q3    lower upper p.overall
## [ALL]    54 10  6.25 15.75 8     13             
## Cluster1 35 9   5    13    6     11    0.036071 
## Cluster2 19 14  9.5  16    9     16             
## 
##      OR       OR.lower OR.upper
## [1,] 1.090579 0.993927 1.19663 
## 
## ------------------- 
## row-variable: CVLT 
## 
##          N  med  Q1    Q3    lower upper p.overall
## [ALL]    54 52.5 44.25 61.75 49    58             
## Cluster1 35 50   39.5  58.5  45    56    0.019803 
## Cluster2 19 59   50.5  65    49    66             
## 
##      OR       OR.lower OR.upper
## [1,] 1.072029 1.009655 1.138255
## 
## ------------------- 
## row-variable: MMSE_MST 
## 
##          N  med Q1   Q3    lower upper p.overall
## [ALL]    54 53  47   59.75 51    57             
## Cluster1 35 51  44   54.5  46    53    0.000193 
## Cluster2 19 59  55.5 63.5  55    64             
## 
##      OR       OR.lower OR.upper
## [1,] 1.157575 1.056649 1.268141
## 
## ------------------- 
## row-variable: VolCinz 
## 
##          N  med Q1    Q3    lower upper p.overall
## [ALL]    54 893 860.2 927   879   914            
## Cluster1 35 873 839.5 903.5 858   889   4.6e-05  
## Cluster2 19 927 907   950   900   951            
## 
##      OR       OR.lower OR.upper
## [1,] 1.034405 1.014016 1.055203
## 
## ------------------- 
## row-variable: LesoesFlair 
## 
##          N  med Q1   Q3    lower upper p.overall
## [ALL]    54 6.7 3.8  12.15 5.4   9.1            
## Cluster1 35 9.1 6.2  17.75 6.8   14.7  4e-06    
## Cluster2 19 3.5 1.75 4.85  1.6   5.4            
## 
##      OR       OR.lower OR.upper
## [1,] 0.645438 0.488437 0.852905
## 
## ------------------- 
## row-variable: grupos 
## 
##          EM EMRR EM%      EMRR%    p.overall
## [ALL]    34 20   62.96296 37.03704          
## Cluster1 17 18   48.57143 51.42857 0.007422 
## Cluster2 17 2    89.47368 10.52632          
## 
##      OR       OR.lower OR.upper
## EM   1                         
## EMRR 0.121746 0.015932 0.519754
## 
## ------------------- 
## row-variable: grupos2 
## 
##          MS RRMS MS%      RRMS%    p.overall
## [ALL]    42 12   77.77778 22.22222          
## Cluster1 23 12   65.71429 34.28571 0.004364 
## Cluster2 19 0    100      0                 
## 
##      OR OR.lower OR.upper
## MS   1                   
## RRMS .  .        .       
## 
## ------------------- 
## row-variable: Idade 
## 
##          N  med  Q1   Q3   lower upper p.overall
## [ALL]    54 34.5 31   41.5 32    38             
## Cluster1 35 38   32   45   34    42    0.002452 
## Cluster2 19 32   26.5 34.5 26    35             
## 
##      OR       OR.lower OR.upper
## [1,] 0.869639 0.786454 0.961622
## 
## ------------------- 
## row-variable: Escolaridade 
## 
##          N  med  Q1 Q3   lower upper p.overall
## [ALL]    54 13.5 11 17   11    16             
## Cluster1 35 12   11 16.5 11    15    0.075759 
## Cluster2 19 16   13 17   13    17             
## 
##      OR       OR.lower OR.upper
## [1,] 1.094281 0.960237 1.247037
createTable(res, show.ratio = TRUE)
## 
## --------Summary descriptives table by 'clustersRF'---------
## 
## _________________________________________________________________________________ 
##                  Cluster1         Cluster2            OR        p.ratio p.overall 
##                    N=35             N=19                                          
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## TempoDoenca  7.00 [3.00;11.5] 2.00 [1.15;5.00] 0.80 [0.68;0.94]  0.008    0.002   
## EDSS         2.00 [0.00;3.25] 0.00 [0.00;0.00] 0.34 [0.16;0.75]  0.007   <0.001   
## Passos25     7.00 [5.00;8.00] 7.00 [5.00;8.00] 0.98 [0.78;1.22]  0.829    0.911   
## pinosdir9    31.0 [27.0;37.0] 24.0 [22.5;25.5] 0.54 [0.37;0.80]  0.002   <0.001   
## pinosesq9    31.0 [27.0;36.0] 25.0 [24.0;27.0] 0.74 [0.61;0.90]  0.002   <0.001   
## VitD         45.0 [36.0;75.0] 99.0 [53.5;99.0] 1.03 [1.01;1.05]  0.009    0.007   
## FSS          41.0 [31.0;53.0] 25.0 [18.5;37.0] 0.96 [0.92;1.00]  0.031    0.023   
## HADS_A       5.00 [4.00;9.00] 6.00 [3.00;9.50] 0.95 [0.82;1.10]  0.467    0.501   
## HADS_D       4.00 [3.00;8.00] 1.00 [1.00;3.00] 0.58 [0.41;0.84]  0.004   <0.001   
## SF36         92.0 [88.0;98.5] 96.0 [93.0;99.0] 1.01 [0.96;1.06]  0.666    0.198   
## SD_escr      33.0 [29.0;48.0] 59.0 [52.5;65.0] 1.13 [1.06;1.20] <0.001   <0.001   
## SD_oral      40.0 [29.0;49.5] 59.0 [54.5;65.0] 1.12 [1.05;1.20]  0.001   <0.001   
## BVMT         9.00 [5.00;13.0] 14.0 [9.50;16.0] 1.09 [0.99;1.20]  0.067    0.036   
## CVLT         50.0 [39.5;58.5] 59.0 [50.5;65.0] 1.07 [1.01;1.14]  0.023    0.020   
## MMSE_MST     51.0 [44.0;54.5] 59.0 [55.5;63.5] 1.16 [1.06;1.27]  0.002   <0.001   
## VolCinz       873 [840;904]    927 [907;950]   1.03 [1.01;1.06]  0.001   <0.001   
## LesoesFlair  9.10 [6.20;17.8] 3.50 [1.75;4.85] 0.65 [0.49;0.85]  0.002   <0.001   
## grupos:                                                                   0.007   
##     EM          17 (48.6%)       17 (89.5%)          Ref.        Ref.             
##     EMRR        18 (51.4%)       2 (10.5%)     0.12 [0.02;0.52]  0.003            
## grupos2:                                                                  0.004   
##     MS          23 (65.7%)       19 (100%)           Ref.        Ref.             
##     RRMS        12 (34.3%)       0 (0.00%)         . [.;.]         .              
## Idade        38.0 [32.0;45.0] 32.0 [26.5;34.5] 0.87 [0.79;0.96]  0.006    0.002   
## Escolaridade 12.0 [11.0;16.5] 16.0 [13.0;17.0] 1.09 [0.96;1.25]  0.177    0.076   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
plot(res)

plot(res, bivar = TRUE)

#restab <- createTable(res, show.ratio = TRUE)
#print(restab, which.table = "avail")
#print(restab, which.table = "descr")

# Análise gráfica dos dados da Ressonancia (completa)
library(DataExplorer)
#plot_missing(Ress)
plot_histogram(Ress)

#plot_density(Ress)
plot_correlation(Ress, type = 'continuous', cor_args = list("use" = "pairwise.complete.obs"))

plot_correlation(dados, type = 'discrete', cor_args = list("use" = "pairwise.complete.obs"))

#plot_bar(Ress)
plot_bar(Ress[-5])

Gráficos de Rede

# cria um banco RM diferente (USAR EM OUTRAS ANALISES)
Ress2 <- Ress %>% dplyr::select(Idade, Escolaridade, TempoDoenca:pinosesq9, VitD, 
                                FSS:SD_oral, BVMT, CVLT, CVLT_rep:LesoesImpreg)

names(Ress2) <- c("Idade","Escola","TempoD","EDSS","25Passo","9PinoD","9PinoE","VitD","FSS","HADSA",
                  "HADSD","SF36","SDescr","SDoral","BVMT","CVLT","CVLTrep","MMSEmst","VolCereb","VolCinz",
                  "LesFlair","LesImpr")

library(qgraph)
# Correlação entre as variáveis do banco RM
clinic <- cor_auto(Ress2)

library(corrplot)
corrplot(clinic, type="lower", order="hclust")

# Correlação Pearson
qgraph(cor(Ress2), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação parcial
qgraph(cor_auto(Ress2), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação penalidade glasso
library(glasso)
qgraph(glasso(cor_auto(Ress2), 0.1), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação entre as variáveis do banco RM
library(qgraph)
library(corrplot)
clinic <- cor_auto(Ress[-c(2,4:8,14,16,23:25,27:31)])
corrplot(clinic, type="lower", order="hclust")

Pontos de corte

# Pontos de corte
library(cutpointr)

# SDMT Escrito (RFCluster)
cp1 <- cutpointr(dados2, SD_escr, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
                method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp1)
## Method: maximize_metric 
## Predictor: SD_escr 
## Outcome: clustersRF 
## Direction: <= 
## Nr. of bootstraps: 1000 
## 
##     AUC  n n_pos n_neg
##  0.8805 54    35    19
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                51        1.7038 0.8704      0.9143      0.7895 32  3  4 15
## 
## Predictor summary: 
##      Data Min.   5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##   Overall   11 23.0    30.0   47.5 44.25926    56.5 68.35   77 15.76500   0
##  Cluster1   11 23.0    29.0   33.0 37.20000    48.0 54.20   68 12.08256   0
##  Cluster2   19 31.6    52.5   59.0 57.26316    65.0 73.40   77 13.44058   0
## 
## Bootstrap summary: 
##           Variable  Min.    5% 1st Qu. Median  Mean 3rd Qu.   95% Max.   SD NAs
##   optimal_cutpoint 40.00 48.00   50.00  50.25 50.40   51.00 53.00   57 1.73   0
##              AUC_b  0.63  0.78    0.84   0.89  0.88    0.93  0.97    1 0.06   0
##            AUC_oob  0.34  0.73    0.82   0.89  0.88    0.95  1.00    1 0.09   0
##    sum_sens_spec_b  1.45  1.59    1.68   1.75  1.74    1.81  1.89    2 0.09   0
##  sum_sens_spec_oob  0.75  1.33    1.51   1.63  1.62    1.73  1.88    2 0.16   0
##              acc_b  0.70  0.80    0.85   0.87  0.88    0.91  0.94    1 0.05   0
##            acc_oob  0.53  0.70    0.77   0.82  0.82    0.88  0.94    1 0.07   0
##      sensitivity_b  0.62  0.76    0.85   0.90  0.89    0.94  0.98    1 0.07   0
##    sensitivity_oob  0.43  0.64    0.79   0.87  0.85    0.92  1.00    1 0.11   0
##      specificity_b  0.53  0.69    0.79   0.86  0.85    0.92  1.00    1 0.09   0
##    specificity_oob  0.00  0.50    0.67   0.78  0.77    0.88  1.00    1 0.16   0
##     cohens_kappa_b  0.43  0.57    0.67   0.73  0.73    0.80  0.88    1 0.10   0
##   cohens_kappa_oob -0.17  0.33    0.49   0.61  0.60    0.71  0.86    1 0.16   0
plot(cp1)

plot_metric(cp1)

# SDMT Escrito Método Robusto
set.seed(4)
cp1.1 <- cutpointr(dados2, SD_escr, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp1.1)
## Method: maximize_boot_metric 
## Predictor: SD_escr 
## Outcome: clustersRF 
## Direction: <= 
## 
##     AUC  n n_pos n_neg
##  0.8805 54    35    19
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           51.8728   0.8704 0.8704      0.9143      0.7895 32  3  4 15
## 
## Predictor summary: 
##      Data Min.   5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##   Overall   11 23.0    30.0   47.5 44.25926    56.5 68.35   77 15.76500   0
##  Cluster1   11 23.0    29.0   33.0 37.20000    48.0 54.20   68 12.08256   0
##  Cluster2   19 31.6    52.5   59.0 57.26316    65.0 73.40   77 13.44058   0
plot(cp1.1)

# SDMT Escrito Grupos EM e EMRR
cp3 <- cutpointr(dados2, SD_escr, grupos, pos_class = "EMRR", neg_class = "EM",
                method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp3)
## Method: maximize_metric 
## Predictor: SD_escr 
## Outcome: grupos 
## Direction: <= 
## Nr. of bootstraps: 1000 
## 
##    AUC  n n_pos n_neg
##  0.761 54    20    34
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                50        1.4588 0.6852         0.9      0.5588 18  2 15 19
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   11 23.00   30.00   47.5 44.25926   56.50 68.35   77 15.76500   0
##       EM   19 25.65   35.25   51.0 49.55882   61.50 70.40   77 15.71178   0
##     EMRR   11 22.40   29.00   32.5 35.25000   47.25 52.05   53 11.35028   0
## 
## Bootstrap summary: 
##           Variable  Min.    5% 1st Qu. Median  Mean 3rd Qu.   95%  Max.   SD
##   optimal_cutpoint 23.00 33.00   38.00  50.00 45.93   52.00 53.00 53.00 7.21
##              AUC_b  0.46  0.65    0.72   0.77  0.76    0.81  0.86  0.94 0.07
##            AUC_oob  0.46  0.60    0.70   0.76  0.76    0.82  0.90  1.00 0.09
##    sum_sens_spec_b  0.97  1.35    1.45   1.52  1.52    1.58  1.67  1.80 0.10
##  sum_sens_spec_oob  0.50  1.03    1.22   1.33  1.32    1.44  1.58  1.72 0.17
##              acc_b  0.41  0.59    0.69   0.72  0.72    0.78  0.81  0.91 0.07
##            acc_oob  0.35  0.50    0.58   0.64  0.64    0.70  0.77  0.88 0.08
##      sensitivity_b  0.43  0.67    0.82   0.93  0.89    1.00  1.00  1.00 0.11
##    sensitivity_oob  0.00  0.36    0.60   0.80  0.75    1.00  1.00  1.00 0.22
##      specificity_b  0.24  0.39    0.52   0.63  0.62    0.73  0.84  1.00 0.14
##    specificity_oob  0.10  0.31    0.44   0.56  0.57    0.69  0.85  1.00 0.17
##     cohens_kappa_b -0.02  0.29    0.39   0.46  0.46    0.54  0.63  0.81 0.11
##   cohens_kappa_oob -0.36  0.03    0.19   0.29  0.29    0.39  0.55  0.73 0.15
##  NAs
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
plot(cp3)

plot_metric(cp3)

# SDMT Escrito Grupos EM e EMRR Método Robusto
set.seed(4)
cp3.1 <- cutpointr(dados2, SD_escr, grupos, pos_class = "EMRR", neg_class = "EM", 
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp3.1)
## Method: maximize_boot_metric 
## Predictor: SD_escr 
## Outcome: grupos 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.761 54    20    34
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           36.8909   0.7037 0.7037        0.65      0.7353 13  7  9 25
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   11 23.00   30.00   47.5 44.25926   56.50 68.35   77 15.76500   0
##       EM   19 25.65   35.25   51.0 49.55882   61.50 70.40   77 15.71178   0
##     EMRR   11 22.40   29.00   32.5 35.25000   47.25 52.05   53 11.35028   0
plot(cp3.1)

# SDMT Escrito Grupos MS e RRMS
cp4 <- cutpointr(dados2, SD_escr, grupos2, pos_class = "RRMS", neg_class = "MS",
                method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp4)
## Method: maximize_metric 
## Predictor: SD_escr 
## Outcome: grupos2 
## Direction: <= 
## Nr. of bootstraps: 1000 
## 
##     AUC  n n_pos n_neg
##  0.7837 54    12    42
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##              40.5         1.369 0.6481        0.75       0.619  9  3 16 26
## 
## Predictor summary: 
##     Data Min.   5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   11 23.0   30.00   47.5 44.25926   56.50 68.35   77 15.76500   0
##       MS   19 26.0   33.25   50.0 47.64286   58.75 68.95   77 15.18796   0
##     RRMS   11 17.6   27.50   30.0 32.41667   36.50 50.25   53 11.87402   0
## 
## Bootstrap summary: 
##           Variable  Min.    5% 1st Qu. Median  Mean 3rd Qu.   95%  Max.   SD
##   optimal_cutpoint 11.00 30.00   33.00  33.00 39.52   48.00 48.00 53.00 8.33
##              AUC_b  0.50  0.66    0.74   0.79  0.78    0.83  0.89  0.96 0.07
##            AUC_oob  0.34  0.59    0.72   0.79  0.78    0.86  0.94  1.00 0.10
##    sum_sens_spec_b  1.16  1.36    1.48   1.56  1.55    1.63  1.74  1.93 0.12
##  sum_sens_spec_oob  0.42  0.98    1.21   1.36  1.35    1.50  1.71  1.93 0.22
##              acc_b  0.39  0.56    0.67   0.74  0.72    0.78  0.85  0.96 0.09
##            acc_oob  0.22  0.48    0.59   0.67  0.66    0.73  0.82  0.94 0.11
##      sensitivity_b  0.30  0.64    0.80   0.89  0.87    1.00  1.00  1.00 0.12
##    sensitivity_oob  0.00  0.20    0.50   0.75  0.70    1.00  1.00  1.00 0.26
##      specificity_b  0.20  0.47    0.60   0.69  0.68    0.78  0.87  1.00 0.13
##    specificity_oob  0.12  0.36    0.54   0.67  0.65    0.77  0.89  1.00 0.16
##     cohens_kappa_b  0.10  0.21    0.33   0.42  0.42    0.49  0.62  0.84 0.12
##   cohens_kappa_oob -0.28 -0.02    0.15   0.25  0.25    0.36  0.53  0.82 0.17
##  NAs
##    0
##    0
##    1
##    0
##    1
##    0
##    0
##    0
##    1
##    0
##    0
##    0
##    0
plot(cp4)

plot_metric(cp4)

# SDMT Escrito Grupos MS e RRMS Robusto
set.seed(4)
cp4.1 <- cutpointr(dados2, SD_escr, grupos2, pos_class = "RRMS", neg_class = "MS",
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp4.1)
## Method: maximize_boot_metric 
## Predictor: SD_escr 
## Outcome: grupos2 
## Direction: <= 
## 
##     AUC  n n_pos n_neg
##  0.7837 54    12    42
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           23.2105   0.8148 0.8148        0.25      0.9762  3  9  1 41
## 
## Predictor summary: 
##     Data Min.   5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   11 23.0   30.00   47.5 44.25926   56.50 68.35   77 15.76500   0
##       MS   19 26.0   33.25   50.0 47.64286   58.75 68.95   77 15.18796   0
##     RRMS   11 17.6   27.50   30.0 32.41667   36.50 50.25   53 11.87402   0
plot(cp4.1)

# SDMT Oral (RFCluster)
cp5 <- cutpointr(dados2, SD_oral, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
                method = maximize_metric, metric = sum_sens_spec)
summary(cp5)
## Method: maximize_metric 
## Predictor: SD_oral 
## Outcome: clustersRF 
## Direction: <= 
## 
##     AUC  n n_pos n_neg
##  0.8617 54    35    19
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                50        1.6662 0.8148      0.7714      0.8947 27  8  2 17
## 
## Predictor summary: 
##      Data Min.   5% 1st Qu. Median     Mean 3rd Qu.  95% Max.       SD NAs
##   Overall   12 23.0    35.0     49 46.29630    57.0 69.4   77 15.44391   0
##  Cluster1   12 23.0    29.0     40 39.88571    49.5 57.3   68 12.42334   0
##  Cluster2   19 31.6    54.5     59 58.10526    65.0 73.4   77 13.55194   0
plot(cp5)

# SDMT Oral RFCluster Método Robusto
set.seed(4)
cp5.1 <- cutpointr(dados2, SD_oral, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp5.1)
## Method: maximize_boot_metric 
## Predictor: SD_oral 
## Outcome: clustersRF 
## Direction: <= 
## 
##     AUC  n n_pos n_neg
##  0.8617 54    35    19
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           54.0573   0.8519 0.8519      0.9143      0.7368 32  3  5 14
## 
## Predictor summary: 
##      Data Min.   5% 1st Qu. Median     Mean 3rd Qu.  95% Max.       SD NAs
##   Overall   12 23.0    35.0     49 46.29630    57.0 69.4   77 15.44391   0
##  Cluster1   12 23.0    29.0     40 39.88571    49.5 57.3   68 12.42334   0
##  Cluster2   19 31.6    54.5     59 58.10526    65.0 73.4   77 13.55194   0
plot(cp5.1)

# SDMT Oral grupos EM e EMRR
cp6 <- cutpointr(dados2, SD_oral, grupos, pos_class = "EMRR", neg_class = "EM",
                method = maximize_metric, metric = sum_sens_spec)
summary(cp6)
## Method: maximize_metric 
## Predictor: SD_oral 
## Outcome: grupos 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.714 54    20    34
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                41        1.4059 0.7037         0.7      0.7059 14  6 10 24
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   12 23.00   35.00   49.0 46.29630   57.00 69.40   77 15.44391   0
##       EM   19 25.65   40.25   53.0 50.61765   62.25 72.35   77 15.45279   0
##     EMRR   12 22.45   29.00   39.5 38.95000   50.75 57.05   58 12.66356   0
plot(cp6)

# # SDMT Oral grupos EM e EMRR Método Robusto
set.seed(4)
cp6.1 <- cutpointr(dados2, SD_oral, grupos, pos_class = "EMRR", neg_class = "EM", 
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp6.1)
## Method: maximize_boot_metric 
## Predictor: SD_oral 
## Outcome: grupos 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.714 54    20    34
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           36.1298   0.6667 0.6667        0.45      0.7941  9 11  7 27
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   12 23.00   35.00   49.0 46.29630   57.00 69.40   77 15.44391   0
##       EM   19 25.65   40.25   53.0 50.61765   62.25 72.35   77 15.45279   0
##     EMRR   12 22.45   29.00   39.5 38.95000   50.75 57.05   58 12.66356   0
plot(cp6.1)

# SDMT Oral grupos MS e RRMS
cp7 <- cutpointr(dados2, SD_oral, grupos2, pos_class = "RRMS", neg_class = "MS",
                method = maximize_metric, metric = sum_sens_spec)
summary(cp7)
## Method: maximize_metric 
## Predictor: SD_oral 
## Outcome: grupos2 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.745 54    12    42
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                38        1.3452 0.7222      0.5833      0.7619  7  5 10 32
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   12 23.00   35.00   49.0 46.29630   57.00 69.40   77 15.44391   0
##       MS   19 26.00   39.25   52.5 49.30952   58.75 71.80   77 14.72759   0
##     RRMS   12 18.05   27.50   35.0 35.75000   43.25 55.25   58 13.59228   0
plot(cp7)

# SDMT Oral grupos MS e RRMS Robusto
set.seed(4)
cp7.1 <- cutpointr(dados2, SD_oral, grupos2, pos_class = "RRMS", neg_class = "MS",
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp7.1)
## Method: maximize_boot_metric 
## Predictor: SD_oral 
## Outcome: grupos2 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.745 54    12    42
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##              23.2   0.8148 0.8148        0.25      0.9762  3  9  1 41
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   12 23.00   35.00   49.0 46.29630   57.00 69.40   77 15.44391   0
##       MS   19 26.00   39.25   52.5 49.30952   58.75 71.80   77 14.72759   0
##     RRMS   12 18.05   27.50   35.0 35.75000   43.25 55.25   58 13.59228   0
plot(cp7.1)

Machine Learning

Método XGBoost

# https://rpubs.com/dalekube/XGBoost-Iris-Classification-Example-in-R#:~:text=XGBoost%20(Extreme%20Gradient%20Boosting)%20is,%2Dclass%20(multinomial)%20classification.
library(xgboost)

# Convert the Species factor to an integer class starting at 0
# This is picky, but it's a requirement for XGBoost
dados4 <- dados2 %>% dplyr::select(SD_escr:MMSE_MST, clustersRF)

clusters = dados4$clustersRF
label = as.integer(dados4$clustersRF)-1

dados4$clustersRF = NULL

n = nrow(dados4)
train.index = sample(n,floor(0.8*n))
train.data = as.matrix(dados4[train.index,])
train.label = label[train.index]
test.data = as.matrix(dados4[-train.index,])
test.label = label[-train.index]

# Transform the two data sets into xgb.Matrix
xgb.train = xgb.DMatrix(data=train.data,label=train.label)
xgb.test = xgb.DMatrix(data=test.data,label=test.label)

# Define the parameters for multinomial classification
num_class = length(levels(clusters))
params = list(
  booster="gbtree",
  eta=0.001,
  max_depth=5,
  gamma=3,
  subsample=0.7,
  colsample_bytree=1,
  objective="multi:softprob",
  eval_metric="mlogloss",
  num_class=num_class
)

# Train the XGBoost classifer
xgb.fit=xgb.train(
  params=params,
  data=xgb.train,
  nrounds=10000,
  nthreads=1,
  early_stopping_rounds=10,
  watchlist=list(val1=xgb.train,val2=xgb.test),
  verbose=0
)
## [12:08:36] WARNING: amalgamation/../src/learner.cc:541: 
## Parameters: { nthreads } might not be used.
## 
##   This may not be accurate due to some parameters are only used in language bindings but
##   passed down to XGBoost core.  Or some parameters are not used but slip through this
##   verification. Please open an issue if you find above cases.
# Review the final model and results
xgb.fit
## ##### xgb.Booster
## raw: 1.2 Mb 
## call:
##   xgb.train(params = params, data = xgb.train, nrounds = 10000, 
##     watchlist = list(val1 = xgb.train, val2 = xgb.test), verbose = 0, 
##     early_stopping_rounds = 10, nthreads = 1)
## params (as set within xgb.train):
##   booster = "gbtree", eta = "0.001", max_depth = "5", gamma = "3", subsample = "0.7", colsample_bytree = "1", objective = "multi:softprob", eval_metric = "mlogloss", num_class = "2", nthreads = "1", validate_parameters = "TRUE"
## xgb.attributes:
##   best_iteration, best_msg, best_ntreelimit, best_score, niter
## callbacks:
##   cb.evaluation.log()
##   cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, 
##     verbose = verbose)
## # of features: 5 
## niter: 715
## best_iteration : 705 
## best_ntreelimit : 705 
## best_score : 0.604711 
## best_msg : [705] val1-mlogloss:0.463218  val2-mlogloss:0.604711 
## nfeatures : 5 
## evaluation_log:
##     iter val1_mlogloss val2_mlogloss
##        1      0.692599      0.692823
##        2      0.692002      0.692527
## ---                                 
##      714      0.462019      0.604747
##      715      0.461867      0.604738
# Predict outcomes with the test data
xgb.pred = predict(xgb.fit,test.data,reshape=T)
xgb.pred = as.data.frame(xgb.pred)
colnames(xgb.pred) = levels(clusters)

# Use the predicted label with the highest probability
xgb.pred$prediction = apply(xgb.pred,1,function(x) colnames(xgb.pred)[which.max(x)])
xgb.pred$label = levels(clusters)[test.label+1]

# Calculate the final accuracy
result = sum(xgb.pred$prediction==xgb.pred$label)/nrow(xgb.pred)
print(paste("Final Accuracy =",sprintf("%1.2f%%", 100*result)))
## [1] "Final Accuracy = 72.73%"