Bancos de dados

library(readxl)
library(tidyverse)
# Banco de dados parcial (IBNeuro)

#setwd("D:/Dropbox/Laboratorio/Carina/SDMT/") # Computador do IBNeuro
#setwd("C:/Dropbox/Laboratorio/Carina/SDMT/") # PC Avell
setwd("~/Dropbox/Laboratorio/Carina/SDMT/") # Macbook

# Banco de dados para as correlacoes (validade convergente e discriminante)
parcial <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "NORMAS")
parcial <- parcial %>% type.convert()

# Banco de dados completo (IBNeuro e FM USP) para usarmos nas normas (excluindo dados com MMSE)
todos <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "NormasSDMT")
todos <- todos %>% type.convert()

# Banco de dados das NORMAS (com MMSE_SV Normal)
dados <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "NormasSDMT")
dados <- dados %>% type.convert()

# Banco de dados da Ressonância Magnética
Ress <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "EM_Resson")
Ress <- Ress %>% type.convert()

# Cria grupos com EDSS ≤ 1.5
Ress$grupos <- ifelse(Ress$EDSS <= 1.5, "EM", "EMRR")
table(Ress$grupos)

## 
##   EM EMRR 
##   34   20

# Cria grupos com EDSS ≤ 2.5
Ress$grupos2 <- ifelse(Ress$EDSS <= 2.5, "MS", "RRMS")
table(Ress$grupos2)

## 
##   MS RRMS 
##   42   12

# Banco de dados da Esclerose Múltipla
EM <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "EM")
EM <- EM %>% type.convert()

# Banco de dados Caso-Controle
caso <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "Caso_contr_RM")
caso <- caso %>% type.convert()

# Banco de dados Teste-Reteste
conf <- read_excel("SDMT_manual_REVISADO_nov2019_FINAL.xlsx", sheet = "Confiabilidade")
conf <- conf %>% type.convert()

Análise descritiva

# Análise descritiva dos dados numéricos
library(knitr)
library(psych)

options(knitr.table.format = 'markdown')

# Banco de dados que será usado na elaboração das normas
kable(psych::describe(dplyr::select_if(dados, is.numeric), tr=.2), digits = 2)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Idade	1	587	44.50	16.39	41	42.95	19.27	18	92	74	0.43	-0.77	0.68
FaixaEtaria	2	587	3.02	1.63	3	2.88	1.48	1	7	6	0.45	-0.80	0.07
Escola	3	587	3.46	0.74	4	3.68	0.00	2	4	2	-0.96	-0.55	0.03
BAI	4	455	8.92	8.47	6	6.95	5.93	0	43	43	1.28	1.22	0.40
BDI	5	455	9.35	7.96	7	7.88	5.93	0	51	51	1.28	2.15	0.37
HADS_A	6	587	5.84	3.85	5	5.33	2.97	0	21	21	0.79	0.31	0.16
HADS_D	7	587	4.50	3.36	4	3.97	2.97	0	19	19	1.03	1.01	0.14
MMSE_SV	8	574	28.49	1.84	29	29.02	1.48	21	30	9	-1.36	1.33	0.08
SD_escr	9	586	43.94	17.14	45	44.17	16.31	2	110	108	0.28	0.77	0.71
SD_oral	10	587	47.84	18.34	50	48.16	17.79	2	110	108	0.13	0.20	0.76
SD_oral_escrito	11	587	3.98	4.90	5	4.84	5.93	-9	9	18	-0.60	-0.74	0.20

# Banco de dados parcial (somente do IBNeuro) e servirá pra correlações
kable(psych::describe(dplyr::select_if(parcial, is.numeric), tr=.2), digits = 2)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Idade	1	471	42.33	15.20	39	40.60	16.31	18	89	71	0.52	-0.61	0.70
BAI	2	471	9.06	8.64	7	7.05	7.41	0	45	45	1.33	1.46	0.40
BDI	3	471	9.35	7.94	7	7.87	5.93	0	51	51	1.27	2.06	0.37
HADS_A	4	471	6.15	4.05	6	5.69	4.45	0	21	21	0.67	0.05	0.19
HADS_D	5	471	4.68	3.59	4	4.13	2.97	0	19	19	0.90	0.44	0.17
Escrita	6	471	0.97	0.16	1	1.00	0.00	0	1	1	-6.00	34.12	0.01
Desenho	7	471	0.92	0.27	1	1.00	0.00	0	1	1	-3.07	7.44	0.01
VelProc	8	471	17.75	6.15	18	17.71	5.93	0	35	35	0.22	0.37	0.28
MMSE2_BV	9	471	15.47	0.95	16	15.77	0.00	8	16	8	-2.87	13.24	0.04
MMSE2_SV	10	471	28.51	2.05	29	29.13	1.48	13	30	17	-2.24	8.25	0.09
MMSE2_EV	11	471	60.44	10.30	60	60.59	8.90	25	87	62	-0.13	0.33	0.47
A1_acerto	12	471	7.11	2.29	7	6.88	2.97	0	15	15	0.50	0.22	0.11
A1_intrus	13	471	0.46	0.89	0	0.14	0.00	0	6	6	2.48	7.72	0.04
A1_repet	14	471	0.41	0.92	0	0.09	0.00	0	8	8	3.58	17.93	0.04
A5_acerto	15	471	13.15	2.51	14	13.49	2.97	0	16	16	-0.94	1.11	0.12
A5_intrus	16	471	0.17	0.45	0	0.00	0.00	0	3	3	3.20	11.92	0.02
A5_repet	17	471	1.10	1.58	0	0.60	0.00	0	8	8	1.82	3.38	0.07
BVMTR1	18	471	5.78	3.18	6	5.63	2.97	0	12	12	0.22	-0.72	0.15
BVMTR3	19	471	9.36	2.98	10	10.16	2.97	0	12	12	-1.26	0.84	0.14
SD_escr	20	470	44.66	16.04	45	44.55	14.83	6	110	104	0.61	2.00	0.74
SD_oral	21	470	50.31	18.16	51	50.29	17.05	9	110	101	0.26	0.51	0.84
SD_oral_escrito	22	470	5.65	9.92	6	5.67	8.90	-31	55	86	0.20	2.75	0.46

# Banco de dados do estudo com Ressonância Magnética
kable(psych::describe(dplyr::select_if(Ress, is.numeric), tr=.2), digits = 2)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Idade	1	54	36.28	9.22	34.5	35.21	6.67	18.00	61.0	43.00	0.67	0.28	1.25
Escolaridade	2	54	13.80	4.54	13.5	13.68	3.71	4.00	28.0	24.00	0.31	0.70	0.62
Comorbidades	3	54	0.17	0.38	0.0	0.00	0.00	0.00	1.0	1.00	1.74	1.05	0.05
Medicamentos	4	54	0.81	0.39	1.0	1.00	0.00	0.00	1.0	1.00	-1.58	0.49	0.05
TempoDoenca	5	52	6.80	6.24	5.5	5.53	5.93	0.25	32.0	31.75	1.60	3.33	0.87
EDSS	6	54	1.56	2.04	1.0	0.94	1.48	0.00	7.5	7.50	1.27	0.57	0.28
Passos25	7	54	7.26	2.56	7.0	6.76	2.97	4.00	15.0	11.00	1.47	2.21	0.35
pinosdir9	8	54	28.81	6.61	27.0	27.56	5.93	18.00	45.0	27.00	0.84	-0.21	0.90
pinosesq9	9	54	30.81	9.34	28.0	28.85	5.93	19.00	70.0	51.00	1.96	4.68	1.27
MedicamentoEM	10	54	0.78	0.42	1.0	0.94	0.00	0.00	1.0	1.00	-1.30	-0.32	0.06
VitD	11	54	62.73	29.98	55.5	63.23	37.06	7.00	100.0	93.00	0.09	-1.52	4.08
FSS	12	54	35.74	16.72	35.5	35.47	17.79	9.00	63.0	54.00	0.04	-1.14	2.27
HADS_A	13	54	6.43	3.97	5.5	6.09	3.71	0.00	16.0	16.00	0.44	-0.74	0.54
HADS_D	14	54	4.50	3.96	3.0	3.59	2.97	0.00	15.0	15.00	1.26	0.86	0.54
SF36	15	54	91.65	11.74	94.0	93.88	7.41	53.00	105.0	52.00	-1.86	3.42	1.60
SD_escr	16	54	44.26	15.77	47.5	43.59	21.50	11.00	77.0	66.00	0.09	-1.04	2.15
SD_oral	17	54	46.30	15.44	49.0	46.59	15.57	12.00	77.0	65.00	-0.11	-0.89	2.10
BVMT_T1	18	54	2.11	1.83	2.0	1.82	1.48	0.00	9.0	9.00	1.36	2.38	0.25
BVMT_T2	19	54	4.00	2.56	3.0	3.79	2.97	0.00	12.0	12.00	0.85	0.72	0.35
BVMT_T3	20	54	4.80	2.72	5.0	4.76	1.48	0.00	12.0	12.00	0.50	0.41	0.37
BVMT	21	54	10.91	6.65	10.0	10.53	7.41	0.00	33.0	33.00	0.88	1.26	0.91
CVLT_A1	22	54	6.30	1.66	6.0	6.00	1.48	4.00	10.0	6.00	0.59	-0.84	0.23
CVLT_A2	23	54	9.69	2.48	10.0	9.76	2.97	4.00	14.0	10.00	-0.29	-0.49	0.34
CVLT_A3	24	54	11.33	2.94	11.5	11.53	3.71	4.00	16.0	12.00	-0.38	-0.63	0.40
CVLT_A4	25	54	12.17	3.18	13.0	12.59	2.97	0.00	16.0	16.00	-1.27	2.37	0.43
CVLT_A5	26	54	12.44	3.28	14.0	13.09	2.97	1.00	16.0	15.00	-1.21	1.31	0.45
CVLT	27	54	51.93	11.73	52.5	53.09	12.60	22.00	70.0	48.00	-0.55	-0.37	1.60
CVLT_rep	28	54	7.46	5.77	6.0	6.35	4.45	0.00	27.0	27.00	1.38	1.80	0.79
MMSE_MST	29	54	53.37	9.83	53.0	53.59	9.64	31.00	81.0	50.00	0.01	0.08	1.34
VolCereb	30	54	1504.43	70.44	1514.5	1514.45	55.60	1291.00	1643.0	352.00	-0.82	0.63	9.59
VolCinz	31	54	890.75	52.32	893.0	894.04	49.67	766.00	995.0	229.00	-0.32	-0.36	7.12
LesoesFlair	32	54	9.70	8.55	6.7	7.50	5.04	1.20	38.9	37.70	1.48	1.63	1.16
LesoesImpreg	33	54	0.01	0.07	0.0	0.00	0.00	0.00	0.4	0.40	4.57	21.35	0.01

# Banco de dados do estudo de Esclerose Múltipla
kable(psych::describe(dplyr::select_if(EM, is.numeric), tr=.2), digits = 2)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Idade	1	256	40.68	11.88	40.0	40.03	13.34	17	72	55	0.28	-0.65	0.74
Escolaridade	2	256	12.51	4.15	12.0	12.61	3.71	1	28	27	0.07	0.96	0.26
HADS_D	3	256	6.00	3.75	5.0	5.56	2.97	0	19	19	0.79	0.43	0.23
HADS_A	4	256	7.20	4.21	6.0	6.62	2.97	0	21	21	0.77	0.32	0.26
SD_escr	5	256	34.16	15.56	33.5	33.99	15.57	0	77	77	0.12	-0.21	0.97
SD_oral	6	256	37.57	15.45	38.0	37.73	16.31	0	77	77	0.03	-0.41	0.97
EDSS	7	171	3.05	2.27	3.0	2.88	2.97	0	8	8	0.32	-1.05	0.17
A1	8	238	6.06	1.99	6.0	5.97	1.48	1	13	12	0.37	0.54	0.13
A5	9	238	11.21	3.13	11.0	11.40	4.45	1	16	15	-0.32	-0.64	0.20
A1_A5	10	238	45.16	14.16	47.0	46.13	14.83	11	75	64	-0.32	-0.57	0.92
BVMT_1	11	224	3.89	2.92	3.0	3.46	2.97	0	12	12	0.72	-0.23	0.20
BVMT_2	12	224	5.84	3.61	5.0	5.57	4.45	0	12	12	0.28	-1.11	0.24
BVMT_3	13	224	6.78	3.90	6.0	6.85	5.93	0	12	12	-0.02	-1.33	0.26
BVMT_total	14	224	16.48	9.78	15.5	15.85	12.60	1	36	35	0.27	-1.10	0.65

# Banco de dados do estudo Caso-Controle
kable(psych::describe(dplyr::select_if(caso, is.numeric), tr=.2), digits = 2)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Idade	1	26	30.46	4.81	31.00	30.31	2.97	21.00	45.00	24.00	0.66	1.85	0.94
Escolaridade	2	26	14.92	3.19	15.00	14.75	4.45	8.00	20.00	12.00	-0.05	-1.02	0.62
EDSS	3	26	0.54	1.09	0.00	0.12	0.00	0.00	4.50	4.50	2.15	4.28	0.21
Grupo	4	26	0.54	0.51	1.00	0.56	0.00	0.00	1.00	1.00	-0.15	-2.05	0.10
A1	5	26	7.19	2.10	7.00	7.00	2.97	4.00	12.00	8.00	0.43	-0.96	0.41
A2	6	26	10.31	2.00	10.00	10.00	1.48	7.00	15.00	8.00	0.72	-0.22	0.39
A3	7	26	11.88	2.52	12.00	12.06	2.97	5.00	16.00	11.00	-0.51	0.16	0.49
A4	8	26	12.19	3.41	13.00	12.81	2.97	0.00	16.00	16.00	-1.73	3.74	0.67
A5	9	26	12.92	3.35	14.00	13.56	2.97	1.00	16.00	15.00	-1.72	3.58	0.66
Total_A1_A5	10	26	54.50	11.28	55.50	54.94	9.64	23.00	75.00	52.00	-0.62	0.62	2.21
CVLT _rep	11	26	5.58	5.52	4.00	4.56	5.93	0.00	20.00	20.00	1.02	0.28	1.08
BVMT_T1	12	26	4.81	4.22	2.00	4.12	1.48	0.00	12.00	12.00	0.48	-1.57	0.83
BVMT_T2	13	26	6.96	4.09	6.00	6.81	4.45	0.00	12.00	12.00	0.09	-1.68	0.80
BVMT_T3	14	26	7.88	3.59	7.00	8.00	4.45	1.00	12.00	11.00	-0.08	-1.56	0.70
BVMT	15	26	19.65	11.54	15.50	18.94	11.86	1.00	36.00	35.00	0.15	-1.69	2.26
SD_escr	16	26	49.81	12.62	51.00	51.50	12.60	23.00	68.00	45.00	-0.57	-0.72	2.48
SD_oral	17	26	52.69	13.67	52.50	53.69	17.05	23.00	75.00	52.00	-0.38	-0.72	2.68
Oral_Escrito	18	26	2.88	5.46	1.50	1.88	3.71	-8.00	19.00	27.00	1.01	1.30	1.07
HADS_A	19	26	6.54	3.85	5.00	5.94	2.97	1.00	14.00	13.00	0.57	-1.01	0.75
HADS_D	20	26	4.27	4.11	3.00	3.12	2.97	0.00	15.00	15.00	1.29	0.59	0.81
HPT9_RH	21	26	23.64	5.81	21.30	22.44	4.89	17.91	39.00	21.09	1.13	0.46	1.14
HPT9_LH	22	26	24.67	6.82	22.20	23.35	4.41	18.00	51.00	33.00	2.20	5.89	1.34
HPT9_total	23	26	24.15	6.04	21.34	23.02	4.00	18.00	45.00	27.00	1.62	2.97	1.18
VolCereb	24	14	1540.73	32.58	1534.00	1536.22	33.58	1503.00	1603.00	100.00	0.52	-1.16	8.71
VolCinz	25	14	903.11	27.38	905.50	900.77	26.17	860.80	952.00	91.20	0.35	-0.95	7.32
lesoesflair	26	14	10.65	5.39	10.60	10.60	6.52	1.50	19.64	18.14	0.00	-1.28	1.44
lesoesimpreg	27	14	0.04	0.12	0.00	0.00	0.00	0.00	0.40	0.40	2.23	3.72	0.03

# Banco de dados do estudo de Confiabilidade Teste-Reteste
kable(psych::describe(dplyr::select_if(conf, is.numeric), tr=.2), digits = 2)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Idade	1	98	60.87	13.44	62.0	61.80	8.90	22	92	70	-0.50	0.60	1.36
Escolaridade	2	98	9.76	5.36	11.0	9.43	7.41	1	27	26	0.34	-0.34	0.54
SD_escr_teste	3	98	29.42	13.28	29.5	29.02	14.83	5	69	64	0.25	-0.53	1.34
SD_oral_teste	4	98	31.62	14.93	32.0	30.88	16.31	2	70	68	0.35	-0.36	1.51
SD_escr_reteste	5	91	31.34	14.47	32.0	31.49	17.79	0	70	70	0.01	-0.65	1.52
SD_oral_reteste	6	91	33.56	14.52	34.0	33.53	16.31	0	68	68	0.01	-0.53	1.52
alter_escr	7	75	23.48	11.15	23.0	23.00	13.34	0	47	47	0.15	-0.83	1.29
altern_oral	8	75	25.40	11.28	25.0	25.00	13.34	4	50	46	0.16	-0.74	1.30

Imputacao Ress

library(mice)
imp <- mice(Ress)

## 
##  iter imp variable
##   1   1  TempoDoenca*
##   1   2  TempoDoenca*
##   1   3  TempoDoenca*
##   1   4  TempoDoenca*
##   1   5  TempoDoenca*
##   2   1  TempoDoenca*
##   2   2  TempoDoenca*
##   2   3  TempoDoenca*
##   2   4  TempoDoenca*
##   2   5  TempoDoenca*
##   3   1  TempoDoenca*
##   3   2  TempoDoenca*
##   3   3  TempoDoenca*
##   3   4  TempoDoenca*
##   3   5  TempoDoenca*
##   4   1  TempoDoenca*
##   4   2  TempoDoenca*
##   4   3  TempoDoenca*
##   4   4  TempoDoenca*
##   4   5  TempoDoenca*
##   5   1  TempoDoenca*
##   5   2  TempoDoenca*
##   5   3  TempoDoenca*
##   5   4  TempoDoenca*
##   5   5  TempoDoenca*

compl <- complete(imp)
sum(is.na(compl))

## [1] 0

Ress <- compl

# Banco de dados do estudo com Ressonância Magnética IMPUTADO
kable(psych::describe(dplyr::select_if(Ress, is.numeric), tr=.2), digits = 2)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Idade	1	54	36.28	9.22	34.5	35.21	6.67	18.00	61.0	43.00	0.67	0.28	1.25
Escolaridade	2	54	13.80	4.54	13.5	13.68	3.71	4.00	28.0	24.00	0.31	0.70	0.62
Comorbidades	3	54	0.17	0.38	0.0	0.00	0.00	0.00	1.0	1.00	1.74	1.05	0.05
Medicamentos	4	54	0.81	0.39	1.0	1.00	0.00	0.00	1.0	1.00	-1.58	0.49	0.05
TempoDoenca	5	54	6.88	6.34	5.5	5.57	6.15	0.25	32.0	31.75	1.50	2.81	0.86
EDSS	6	54	1.56	2.04	1.0	0.94	1.48	0.00	7.5	7.50	1.27	0.57	0.28
Passos25	7	54	7.26	2.56	7.0	6.76	2.97	4.00	15.0	11.00	1.47	2.21	0.35
pinosdir9	8	54	28.81	6.61	27.0	27.56	5.93	18.00	45.0	27.00	0.84	-0.21	0.90
pinosesq9	9	54	30.81	9.34	28.0	28.85	5.93	19.00	70.0	51.00	1.96	4.68	1.27
MedicamentoEM	10	54	0.78	0.42	1.0	0.94	0.00	0.00	1.0	1.00	-1.30	-0.32	0.06
VitD	11	54	62.73	29.98	55.5	63.23	37.06	7.00	100.0	93.00	0.09	-1.52	4.08
FSS	12	54	35.74	16.72	35.5	35.47	17.79	9.00	63.0	54.00	0.04	-1.14	2.27
HADS_A	13	54	6.43	3.97	5.5	6.09	3.71	0.00	16.0	16.00	0.44	-0.74	0.54
HADS_D	14	54	4.50	3.96	3.0	3.59	2.97	0.00	15.0	15.00	1.26	0.86	0.54
SF36	15	54	91.65	11.74	94.0	93.88	7.41	53.00	105.0	52.00	-1.86	3.42	1.60
SD_escr	16	54	44.26	15.77	47.5	43.59	21.50	11.00	77.0	66.00	0.09	-1.04	2.15
SD_oral	17	54	46.30	15.44	49.0	46.59	15.57	12.00	77.0	65.00	-0.11	-0.89	2.10
BVMT_T1	18	54	2.11	1.83	2.0	1.82	1.48	0.00	9.0	9.00	1.36	2.38	0.25
BVMT_T2	19	54	4.00	2.56	3.0	3.79	2.97	0.00	12.0	12.00	0.85	0.72	0.35
BVMT_T3	20	54	4.80	2.72	5.0	4.76	1.48	0.00	12.0	12.00	0.50	0.41	0.37
BVMT	21	54	10.91	6.65	10.0	10.53	7.41	0.00	33.0	33.00	0.88	1.26	0.91
CVLT_A1	22	54	6.30	1.66	6.0	6.00	1.48	4.00	10.0	6.00	0.59	-0.84	0.23
CVLT_A2	23	54	9.69	2.48	10.0	9.76	2.97	4.00	14.0	10.00	-0.29	-0.49	0.34
CVLT_A3	24	54	11.33	2.94	11.5	11.53	3.71	4.00	16.0	12.00	-0.38	-0.63	0.40
CVLT_A4	25	54	12.17	3.18	13.0	12.59	2.97	0.00	16.0	16.00	-1.27	2.37	0.43
CVLT_A5	26	54	12.44	3.28	14.0	13.09	2.97	1.00	16.0	15.00	-1.21	1.31	0.45
CVLT	27	54	51.93	11.73	52.5	53.09	12.60	22.00	70.0	48.00	-0.55	-0.37	1.60
CVLT_rep	28	54	7.46	5.77	6.0	6.35	4.45	0.00	27.0	27.00	1.38	1.80	0.79
MMSE_MST	29	54	53.37	9.83	53.0	53.59	9.64	31.00	81.0	50.00	0.01	0.08	1.34
VolCereb	30	54	1504.43	70.44	1514.5	1514.45	55.60	1291.00	1643.0	352.00	-0.82	0.63	9.59
VolCinz	31	54	890.75	52.32	893.0	894.04	49.67	766.00	995.0	229.00	-0.32	-0.36	7.12
LesoesFlair	32	54	9.70	8.55	6.7	7.50	5.04	1.20	38.9	37.70	1.48	1.63	1.16
LesoesImpreg	33	54	0.01	0.07	0.0	0.00	0.00	0.00	0.4	0.40	4.57	21.35	0.01

Medidas clínicas

Análise de cluster RM

library(cluster)
library(tidyverse)
# Seleciona variáveis do banco de dados da Ressonância
#dados2 <- na.omit(Ress[c(1,3, 8:15, 17:38)])
dados2 <- Ress %>% select(TempoDoenca:pinosesq9, VitD, FSS:SD_oral, BVMT, CVLT, MMSE_MST, VolCinz,LesoesFlair, grupos, grupos2, Idade, Escolaridade)

# http://gradientdescending.com/unsupervised-random-forest-example/
suppressPackageStartupMessages(library(randomForest))
suppressPackageStartupMessages(library(caret))
suppressPackageStartupMessages(library(cluster))
suppressPackageStartupMessages(library(RColorBrewer))

# set colours
myColRamp <- colorRampPalette(colors = c("#5DBCD2", "#FF80AA"))

# random forest model
set.seed(3984)
rf2 <- randomForest(x = dados2[c(1,2,4,5,6,16,17)], y = NULL, mtry = 3, 
                    ntree = 10000, proximity = TRUE, oob.prox = TRUE)
rf2

## 
## Call:
##  randomForest(x = dados2[c(1, 2, 4, 5, 6, 16, 17)], y = NULL,      ntree = 10000, mtry = 3, proximity = TRUE, oob.prox = TRUE) 
##                Type of random forest: unsupervised
##                      Number of trees: 10000
## No. of variables tried at each split: 3

# PAM method
prox <- rf2$proximity
pam.rf <- pam(prox, 2)
dados2$clustersRF <- pam.rf$cluster

dados2$clustersRF <- ifelse(dados2$clustersRF == 1, "Cluster1", "Cluster2")
dados2$clustersRF <- as.factor(dados2$clustersRF)

table(dados2$clustersRF)

## 
## Cluster1 Cluster2 
##       35       19

prop.table(table(dados2$clustersRF))

## 
##  Cluster1  Cluster2 
## 0.6481481 0.3518519

# Tabela EDSS ≤ 1.5 e EDSS ≤ 2.5
table(dados2$grupos, dados2$grupos2)

##       
##        MS RRMS
##   EM   34    0
##   EMRR  8   12

# Tabela Random Forest Clustering e EDSS ≤ 1.5
table(dados2$clustersRF, dados2$grupos)

##           
##            EM EMRR
##   Cluster1 17   18
##   Cluster2 17    2

# Tabela Random Forest Clustering e EDSS ≤ 2.5
table(dados2$clustersRF, dados2$grupos2)

##           
##            MS RRMS
##   Cluster1 23   12
##   Cluster2 19    0

Análise de Componentes Principais

PCA com os grupos do Random Forest Clustering.

library(factoextra)
library("FactoMineR")

# Compute PCA
BD.pca <- PCA(dados2[c(1:17)], graph = FALSE)

# Use habillage to specify groups for coloring
fviz_pca_ind(BD.pca,
             label = "none", # hide individual labels
             habillage = dados2$clustersRF, # color by groups
             palette = c("#5DBCD2", "#FF80AA"),
             addEllipses = TRUE, ellipse.level=0.8 # Concentration ellipses
)

fviz_pca_biplot(BD.pca, axes = c(1, 2), geom = "point",
                col.ind = "black", col.var = "steelblue", label = "all",
                invisible = "none", repel = F, habillage = dados2$clustersRF, 
                palette = c("#5DBCD2", "#FF80AA"), addEllipses = TRUE, ellipse.level=0.8,
                title = "PCA - Biplot")

Explora dos dados

Sintaxe das estatísticas robustas

## Robust statistics
library(WRS2)

# Function to calculate 20% trimmed mean
tmean <- function(x,tr=.2,na.rm=FALSE,STAND=NULL){
  if(na.rm)x<-x[!is.na(x)]
  val<-mean(x,tr)
  val
}

# Function to calculate 20% trimmed standard deviation (SD)
sd_trim <- function(x,trim=0.2, const=TRUE){
  # trimmed sd, where x is a matrix (column-wise)
  x <- as.matrix(x)
  if (const){
    if (trim==0.1){const <- 0.7892}
    else if (trim==0.2){const <- 0.6615}
    else {warning("Did you specify the correct consistency constant for trimming?")}
  }
  else{const <- 1}
  m <- apply(x,2,mean,trim)
  res <- x-rep(1,nrow(x))%*%t(m)
  qu <- apply(abs(res),2,quantile,1-trim)
  sdtrim <- apply(matrix(res[t(abs(t(res))<=qu)]^2,ncol=ncol(x),byrow=FALSE),2,sum)
  sdtrim <- sqrt(sdtrim/((nrow(x)*(1-trim)-1)))/const
  return(sdtrim)
}

Resultados robustos

# Descricao geral
psych::describe(dados2, tr = .2)

##              vars  n   mean    sd median trimmed   mad    min   max  range
## TempoDoenca     1 54   6.88  6.34    5.5    5.57  6.15   0.25  32.0  31.75
## EDSS            2 54   1.56  2.04    1.0    0.94  1.48   0.00   7.5   7.50
## Passos25        3 54   7.26  2.56    7.0    6.76  2.97   4.00  15.0  11.00
## pinosdir9       4 54  28.81  6.61   27.0   27.56  5.93  18.00  45.0  27.00
## pinosesq9       5 54  30.81  9.34   28.0   28.85  5.93  19.00  70.0  51.00
## VitD            6 54  62.73 29.98   55.5   63.23 37.06   7.00 100.0  93.00
## FSS             7 54  35.74 16.72   35.5   35.47 17.79   9.00  63.0  54.00
## HADS_A          8 54   6.43  3.97    5.5    6.09  3.71   0.00  16.0  16.00
## HADS_D          9 54   4.50  3.96    3.0    3.59  2.97   0.00  15.0  15.00
## SF36           10 54  91.65 11.74   94.0   93.88  7.41  53.00 105.0  52.00
## SD_escr        11 54  44.26 15.77   47.5   43.59 21.50  11.00  77.0  66.00
## SD_oral        12 54  46.30 15.44   49.0   46.59 15.57  12.00  77.0  65.00
## BVMT           13 54  10.91  6.65   10.0   10.53  7.41   0.00  33.0  33.00
## CVLT           14 54  51.93 11.73   52.5   53.09 12.60  22.00  70.0  48.00
## MMSE_MST       15 54  53.37  9.83   53.0   53.59  9.64  31.00  81.0  50.00
## VolCinz        16 54 890.75 52.32  893.0  894.04 49.67 766.00 995.0 229.00
## LesoesFlair    17 54   9.70  8.55    6.7    7.50  5.04   1.20  38.9  37.70
## grupos*        18 54   1.37  0.49    1.0    1.29  0.00   1.00   2.0   1.00
## grupos2*       19 54   1.22  0.42    1.0    1.06  0.00   1.00   2.0   1.00
## Idade          20 54  36.28  9.22   34.5   35.21  6.67  18.00  61.0  43.00
## Escolaridade   21 54  13.80  4.54   13.5   13.68  3.71   4.00  28.0  24.00
## clustersRF*    22 54   1.35  0.48    1.0    1.26  0.00   1.00   2.0   1.00
##               skew kurtosis   se
## TempoDoenca   1.50     2.81 0.86
## EDSS          1.27     0.57 0.28
## Passos25      1.47     2.21 0.35
## pinosdir9     0.84    -0.21 0.90
## pinosesq9     1.96     4.68 1.27
## VitD          0.09    -1.52 4.08
## FSS           0.04    -1.14 2.27
## HADS_A        0.44    -0.74 0.54
## HADS_D        1.26     0.86 0.54
## SF36         -1.86     3.42 1.60
## SD_escr       0.09    -1.04 2.15
## SD_oral      -0.11    -0.89 2.10
## BVMT          0.88     1.26 0.91
## CVLT         -0.55    -0.37 1.60
## MMSE_MST      0.01     0.08 1.34
## VolCinz      -0.32    -0.36 7.12
## LesoesFlair   1.48     1.63 1.16
## grupos*       0.52    -1.76 0.07
## grupos2*      1.30    -0.32 0.06
## Idade         0.67     0.28 1.25
## Escolaridade  0.31     0.70 0.62
## clustersRF*   0.60    -1.67 0.07

# Amostra total (com Estatística Robusta)
media <- round(sapply(dados2[c(1:17,20,21)], function(i) tmean(i)), 2)
dp <- round(sapply(dados2[c(1:17,20,21)], function(i) sd(i)), 2)
total <- cbind(media, dp)
total

##               media    dp
## TempoDoenca    5.57  6.34
## EDSS           0.94  2.04
## Passos25       6.76  2.56
## pinosdir9     27.56  6.61
## pinosesq9     28.85  9.34
## VitD          63.23 29.98
## FSS           35.47 16.72
## HADS_A         6.09  3.97
## HADS_D         3.59  3.96
## SF36          93.88 11.74
## SD_escr       43.59 15.77
## SD_oral       46.59 15.44
## BVMT          10.53  6.65
## CVLT          53.09 11.73
## MMSE_MST      53.59  9.83
## VolCinz      894.04 52.32
## LesoesFlair    7.50  8.55
## Idade         35.21  9.22
## Escolaridade  13.68  4.54

# Descrição dos grupos Random Forest com Estatística Robusta
psych::describeBy(dados2, dados2$clustersRF, tr = .2)

## 
##  Descriptive statistics by group 
## group: Cluster1
##              vars  n   mean    sd median trimmed   mad   min   max range  skew
## TempoDoenca     1 35   8.68  6.89    7.0    7.62  5.93   0.6  32.0  31.4  1.17
## EDSS            2 35   2.23  2.22    2.0    1.76  2.97   0.0   7.5   7.5  0.76
## Passos25        3 35   7.31  2.64    7.0    6.81  2.97   4.0  15.0  11.0  1.35
## pinosdir9       4 35  31.63  6.44   31.0   30.67  7.41  23.0  45.0  22.0  0.54
## pinosesq9       5 35  33.83 10.19   31.0   31.57  7.41  21.0  70.0  49.0  1.61
## VitD            6 35  54.68 28.72   45.0   51.01 26.69   7.0  99.0  92.0  0.44
## FSS             7 35  39.46 16.69   41.0   40.86 16.31   9.0  63.0  54.0 -0.36
## HADS_A          8 35   6.71  4.06    5.0    6.24  4.45   1.0  16.0  15.0  0.53
## HADS_D          9 35   5.83  4.20    4.0    4.95  2.97   1.0  15.0  14.0  0.91
## SF36           10 35  91.14 11.39   92.0   92.57  5.93  53.0 105.0  52.0 -1.68
## SD_escr        11 35  37.20 12.08   33.0   36.19 10.38  11.0  68.0  57.0  0.38
## SD_oral        12 35  39.89 12.42   40.0   39.81 16.31  12.0  68.0  56.0  0.03
## BVMT           13 35   9.63  6.53    9.0    8.81  5.93   0.0  29.0  29.0  0.87
## CVLT           14 35  49.14 11.75   50.0   49.95 14.83  22.0  68.0  46.0 -0.46
## MMSE_MST       15 35  49.86  9.09   51.0   49.76  8.90  31.0  70.0  39.0  0.00
## VolCinz        16 35 870.29 50.23  873.0  871.44 47.44 766.0 980.0 214.0 -0.03
## LesoesFlair    17 35  12.78  9.04    9.1   10.48  5.63   2.3  38.9  36.6  1.12
## grupos*        18 35   1.51  0.51    2.0    1.52  0.00   1.0   2.0   1.0 -0.05
## grupos2*       19 35   1.34  0.48    1.0    1.24  0.00   1.0   2.0   1.0  0.63
## Idade          20 35  39.09  9.53   38.0   37.76 10.38  19.0  61.0  42.0  0.49
## Escolaridade   21 35  13.17  5.21   12.0   12.86  4.45   4.0  28.0  24.0  0.58
## clustersRF*    22 35   1.00  0.00    1.0    1.00  0.00   1.0   1.0   0.0   NaN
##              kurtosis   se
## TempoDoenca      1.62 1.16
## EDSS            -0.63 0.37
## Passos25         1.68 0.45
## pinosdir9       -0.95 1.09
## pinosesq9        2.78 1.72
## VitD            -1.15 4.85
## FSS             -0.95 2.82
## HADS_A          -0.65 0.69
## HADS_D          -0.27 0.71
## SF36             3.53 1.93
## SD_escr         -0.39 2.04
## SD_oral         -0.64 2.10
## BVMT             0.61 1.10
## CVLT            -0.55 1.99
## MMSE_MST        -0.34 1.54
## VolCinz         -0.39 8.49
## LesoesFlair      0.33 1.53
## grupos*         -2.05 0.09
## grupos2*        -1.64 0.08
## Idade           -0.33 1.61
## Escolaridade     0.39 0.88
## clustersRF*       NaN 0.00
## ------------------------------------------------------------ 
## group: Cluster2
##              vars  n   mean    sd median trimmed   mad    min   max  range
## TempoDoenca     1 19   3.57  3.28    2.0    2.95  2.08   0.25  13.0  12.75
## EDSS            2 19   0.32  0.67    0.0    0.08  0.00   0.00   2.0   2.00
## Passos25        3 19   7.16  2.46    7.0    6.69  1.48   5.00  15.0  10.00
## pinosdir9       4 19  23.63  2.54   24.0   23.92  2.97  18.00  27.0   9.00
## pinosesq9       5 19  25.26  3.25   25.0   25.23  2.97  19.00  32.0  13.00
## VitD            6 19  77.56 27.02   99.0   82.13  0.00  30.00 100.0  70.00
## FSS             7 19  28.89 14.84   25.0   26.85 14.83   9.00  63.0  54.00
## HADS_A          8 19   5.89  3.86    6.0    5.77  4.45   0.00  12.0  12.00
## HADS_D          9 19   2.05  1.75    1.0    1.85  1.48   0.00   6.0   6.00
## SF36           10 19  92.58 12.62   96.0   95.92  4.45  56.00 104.0  48.00
## SD_escr        11 19  57.26 13.44   59.0   59.00  8.90  19.00  77.0  58.00
## SD_oral        12 19  58.11 13.55   59.0   59.92  8.90  19.00  77.0  58.00
## BVMT           13 19  13.26  6.38   14.0   13.08  4.45   3.00  33.0  30.00
## CVLT           14 19  57.05 10.09   59.0   58.15 10.38  33.00  70.0  37.00
## MMSE_MST       15 19  59.84  7.76   59.0   59.77  5.93  47.00  81.0  34.00
## VolCinz        16 19 928.42 31.35  927.0  926.92 35.58 872.00 995.0 123.00
## LesoesFlair    17 19   4.02  2.90    3.5    3.33  2.82   1.20  11.7  10.50
## grupos*        18 19   1.11  0.32    1.0    1.00  0.00   1.00   2.0   1.00
## grupos2*       19 19   1.00  0.00    1.0    1.00  0.00   1.00   1.0   0.00
## Idade          20 19  31.11  5.91   32.0   31.31  5.93  18.00  42.0  24.00
## Escolaridade   21 19  14.95  2.72   16.0   15.00  2.97  11.00  20.0   9.00
## clustersRF*    22 19   2.00  0.00    2.0    2.00  0.00   2.00   2.0   0.00
##               skew kurtosis   se
## TempoDoenca   1.24     1.07 0.75
## EDSS          1.69     1.33 0.15
## Passos25      1.60     2.79 0.56
## pinosdir9    -0.55    -0.77 0.58
## pinosesq9     0.08    -0.56 0.74
## VitD         -0.51    -1.60 6.20
## FSS           0.80    -0.27 3.41
## HADS_A        0.19    -1.50 0.88
## HADS_D        0.69    -0.68 0.40
## SF36         -2.00     2.86 2.89
## SD_escr      -1.21     1.43 3.08
## SD_oral      -1.30     1.63 3.11
## BVMT          1.15     2.42 1.46
## CVLT         -0.63    -0.52 2.31
## MMSE_MST      0.58     0.83 1.78
## VolCinz       0.27    -0.60 7.19
## LesoesFlair   1.23     0.58 0.66
## grupos*       2.37     3.84 0.07
## grupos2*       NaN      NaN 0.00
## Idade        -0.25    -0.49 1.35
## Escolaridade -0.12    -1.18 0.62
## clustersRF*    NaN      NaN 0.00

# YUEN robust t-test
clusterRF.p <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$clustersRF)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
clusterRF.d <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$clustersRF)$effsize), 3)
# Print results
cbind(clusterRF.p, clusterRF.d)

##              clusterRF.p clusterRF.d
## TempoDoenca        0.003       0.596
## EDSS               0.003       0.778
## Passos25           0.858       0.097
## pinosdir9          0.000       0.869
## pinosesq9          0.000       0.778
## VitD               0.011       0.492
## FSS                0.009       0.519
## HADS_A             0.740       0.106
## HADS_D             0.002       0.698
## SF36               0.084       0.343
## SD_escr            0.000       0.798
## SD_oral            0.000       0.770
## BVMT               0.010       0.434
## CVLT               0.025       0.480
## MMSE_MST           0.000       0.709
## VolCinz            0.000       0.819
## LesoesFlair        0.000       0.872
## Idade              0.004       0.639
## Escolaridade       0.060       0.408

# Descricao dos grupos EM e EMRR (EDSS ≤ 1.5)
psych::describeBy(dados2, dados2$grupos, tr = .2)

## 
##  Descriptive statistics by group 
## group: EM
##              vars  n   mean    sd median trimmed   mad    min   max  range
## TempoDoenca     1 34   5.35  4.41    5.0    4.61  5.41   0.25  15.0  14.75
## EDSS            2 34   0.26  0.45    0.0    0.14  0.00   0.00   1.0   1.00
## Passos25        3 34   6.76  2.24    6.0    6.32  1.48   4.00  15.0  11.00
## pinosdir9       4 34  25.91  3.73   26.0   25.55  2.22  20.00  39.0  19.00
## pinosesq9       5 34  27.12  4.40   27.0   27.00  4.45  19.00  40.0  21.00
## VitD            6 34  64.43 29.36   63.0   66.08 43.07   7.00 100.0  93.00
## FSS             7 34  31.00 15.89   30.0   29.59 15.57   9.00  63.0  54.00
## HADS_A          8 34   5.62  3.86    5.0    5.18  4.45   0.00  14.0  14.00
## HADS_D          9 34   3.41  3.46    3.0    2.55  2.22   0.00  15.0  15.00
## SF36           10 34  93.41 10.85   96.0   95.55  6.67  53.00 105.0  52.00
## SD_escr        11 34  49.56 15.71   51.0   50.55 17.79  19.00  77.0  58.00
## SD_oral        12 34  50.62 15.45   53.0   51.95 17.79  19.00  77.0  58.00
## BVMT           13 34  11.59  6.16   11.5   11.68  6.67   0.00  33.0  33.00
## CVLT           14 34  52.35 11.65   53.0   53.41 11.86  23.00  70.0  47.00
## MMSE_MST       15 34  56.03  9.97   57.0   56.55  9.64  32.00  81.0  49.00
## VolCinz        16 34 896.30 47.68  899.0  899.20 41.51 787.00 995.0 208.00
## LesoesFlair    17 34   7.80  7.24    5.6    5.93  4.74   1.20  30.9  29.70
## grupos*        18 34   1.00  0.00    1.0    1.00  0.00   1.00   1.0   0.00
## grupos2*       19 34   1.00  0.00    1.0    1.00  0.00   1.00   1.0   0.00
## Idade          20 34  34.12  7.45   33.0   33.64  6.67  18.00  55.0  37.00
## Escolaridade   21 34  14.38  4.52   15.0   14.27  4.45   4.00  28.0  24.00
## clustersRF*    22 34   1.50  0.51    1.5    1.50  0.74   1.00   2.0   1.00
##               skew kurtosis   se
## TempoDoenca   0.61    -0.90 0.76
## EDSS          1.02    -0.99 0.08
## Passos25      1.49     2.92 0.38
## pinosdir9     1.22     2.63 0.64
## pinosesq9     0.51     0.38 0.75
## VitD         -0.07    -1.32 5.03
## FSS           0.39    -0.86 2.73
## HADS_A        0.54    -0.74 0.66
## HADS_D        1.93     3.62 0.59
## SF36         -2.20     5.22 1.86
## SD_escr      -0.27    -1.11 2.69
## SD_oral      -0.35    -0.93 2.65
## BVMT          0.85     2.23 1.06
## CVLT         -0.51    -0.44 2.00
## MMSE_MST     -0.17     0.13 1.71
## VolCinz      -0.33    -0.13 8.18
## LesoesFlair   1.64     2.22 1.24
## grupos*        NaN      NaN 0.00
## grupos2*       NaN      NaN 0.00
## Idade         0.46     0.37 1.28
## Escolaridade  0.43     1.14 0.78
## clustersRF*   0.00    -2.06 0.09
## ------------------------------------------------------------ 
## group: EMRR
##              vars  n   mean    sd median trimmed   mad   min   max range  skew
## TempoDoenca     1 20   9.48  8.18   6.00    8.00  5.93   0.6  32.0  31.4  1.06
## EDSS            2 20   3.75  1.79   3.00    3.42  1.48   2.0   7.5   5.5  0.59
## Passos25        3 20   8.10  2.88   8.00    7.50  1.48   5.0  15.0  10.0  1.25
## pinosdir9       4 20  33.75  7.55  34.00   34.17  8.90  18.0  45.0  27.0 -0.30
## pinosesq9       5 20  37.10 11.99  35.00   34.58 11.12  24.0  70.0  46.0  1.08
## VitD            6 20  59.85 31.56  42.00   57.77 22.46  22.0  99.0  77.0  0.33
## FSS             7 20  43.80 15.24  45.00   45.50 20.02   9.0  63.0  54.0 -0.52
## HADS_A          8 20   7.80  3.87   7.50    7.50  5.19   2.0  16.0  14.0  0.38
## HADS_D          9 20   6.35  4.16   5.00    5.92  4.45   1.0  15.0  14.0  0.63
## SF36           10 20  88.65 12.84  90.50   90.67  7.41  56.0 105.0  49.0 -1.38
## SD_escr        11 20  35.25 11.35  32.50   34.50  6.67  11.0  53.0  42.0  0.04
## SD_oral        12 20  38.95 12.66  39.50   39.08 15.57  12.0  58.0  46.0 -0.17
## BVMT           13 20   9.75  7.45   8.50    8.42  5.93   1.0  29.0  28.0  1.01
## CVLT           14 20  51.20 12.14  51.00   52.50 16.31  22.0  67.0  45.0 -0.56
## MMSE_MST       15 20  48.85  7.93  51.00   49.42  5.93  31.0  65.0  34.0 -0.28
## VolCinz        16 20 881.30 59.47 885.00  881.75 67.46 766.0 980.0 214.0 -0.15
## LesoesFlair    17 20  12.93  9.77   9.05   10.34  6.60   3.5  38.9  35.4  1.12
## grupos*        18 20   1.00  0.00   1.00    1.00  0.00   1.0   1.0   0.0   NaN
## grupos2*       19 20   1.60  0.50   2.00    1.67  0.00   1.0   2.0   1.0 -0.38
## Idade          20 20  39.95 10.87  37.00   38.67  8.90  19.0  61.0  42.0  0.33
## Escolaridade   21 20  12.80  4.53  11.50   12.58  3.71   4.0  22.0  18.0  0.12
## clustersRF*    22 20   1.10  0.31   1.00    1.00  0.00   1.0   2.0   1.0  2.47
##              kurtosis    se
## TempoDoenca      0.43  1.83
## EDSS            -1.14  0.40
## Passos25         0.70  0.64
## pinosdir9       -0.97  1.69
## pinosesq9        0.55  2.68
## VitD            -1.83  7.06
## FSS             -0.76  3.41
## HADS_A          -0.98  0.87
## HADS_D          -0.60  0.93
## SF36             1.48  2.87
## SD_escr         -0.84  2.54
## SD_oral         -0.90  2.83
## BVMT             0.31  1.67
## CVLT            -0.57  2.71
## MMSE_MST        -0.28  1.77
## VolCinz         -0.92 13.30
## LesoesFlair      0.23  2.18
## grupos*           NaN  0.00
## grupos2*        -1.95  0.11
## Idade           -0.77  2.43
## Escolaridade    -0.55  1.01
## clustersRF*      4.32  0.07

# YUEN robust t-test
grupos.p <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$grupos)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
grupos.d <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$grupos)$effsize), 3)
# Print results
cbind(grupos.p, grupos.d)

##              grupos.p grupos.d
## TempoDoenca     0.182    0.395
## EDSS            0.000    0.922
## Passos25        0.053    0.353
## pinosdir9       0.001    0.759
## pinosesq9       0.013    0.768
## VitD            0.538    0.126
## FSS             0.006    0.564
## HADS_A          0.095    0.350
## HADS_D          0.011    0.616
## SF36            0.043    0.493
## SD_escr         0.002    0.674
## SD_oral         0.013    0.513
## BVMT            0.057    0.368
## CVLT            0.829    0.070
## MMSE_MST        0.008    0.526
## VolCinz         0.321    0.232
## LesoesFlair     0.063    0.413
## Idade           0.132    0.439
## Escolaridade    0.174    0.304

# Descricao dos grupos MS e RRMS (EDSS ≤ 2.5)
psych::describeBy(dados2, dados2$grupos2, tr = .2)

## 
##  Descriptive statistics by group 
## group: MS
##              vars  n   mean    sd median trimmed   mad    min   max  range
## TempoDoenca     1 42   5.73  4.87    5.0    4.78  5.41   0.25  17.0  16.75
## EDSS            2 42   0.62  0.85    0.0    0.35  0.00   0.00   2.5   2.50
## Passos25        3 42   6.88  2.43    6.0    6.35  1.48   4.00  15.0  11.00
## pinosdir9       4 42  26.60  4.52   26.0   25.96  2.97  18.00  39.0  21.00
## pinosesq9       5 42  27.81  5.26   27.0   27.12  4.45  19.00  43.0  24.00
## VitD            6 42  65.54 29.35   63.0   67.03 43.74   7.00 100.0  93.00
## FSS             7 42  32.48 16.15   33.0   31.73 17.79   9.00  63.0  54.00
## HADS_A          8 42   6.10  3.94    5.0    5.69  4.45   0.00  14.0  14.00
## HADS_D          9 42   3.83  3.77    3.0    2.85  2.97   0.00  15.0  15.00
## SF36           10 42  92.95 11.63   96.0   95.38  6.67  53.00 105.0  52.00
## SD_escr        11 42  47.64 15.19   50.0   47.73 18.53  19.00  77.0  58.00
## SD_oral        12 42  49.31 14.73   52.5   50.04 17.79  19.00  77.0  58.00
## BVMT           13 42  11.62  6.48   11.0   11.42  5.93   0.00  33.0  33.00
## CVLT           14 42  51.98 11.89   52.5   53.12 11.86  22.00  70.0  48.00
## MMSE_MST       15 42  54.64 10.48   55.5   55.38 11.12  31.00  81.0  50.00
## VolCinz        16 42 897.03 51.82  902.0  902.13 44.48 766.00 995.0 229.00
## LesoesFlair    17 42   8.62  7.51    5.9    6.75  4.82   1.20  30.9  29.70
## grupos*        18 42   1.19  0.40    1.0    1.00  0.00   1.00   2.0   1.00
## grupos2*       19 42   1.00  0.00    1.0    1.00  0.00   1.00   1.0   0.00
## Idade          20 42  34.50  8.50   33.0   33.62  5.93  18.00  58.0  40.00
## Escolaridade   21 42  14.52  4.58   15.0   14.46  4.45   4.00  28.0  24.00
## clustersRF*    22 42   1.45  0.50    1.0    1.42  0.00   1.00   2.0   1.00
##               skew kurtosis   se
## TempoDoenca   0.78    -0.53 0.75
## EDSS          0.93    -0.65 0.13
## Passos25      1.72     3.28 0.38
## pinosdir9     0.82     0.63 0.70
## pinosesq9     1.00     0.94 0.81
## VitD         -0.06    -1.41 4.53
## FSS           0.20    -1.08 2.49
## HADS_A        0.45    -0.87 0.61
## HADS_D        1.65     2.22 0.58
## SF36         -2.10     4.28 1.79
## SD_escr      -0.05    -1.15 2.34
## SD_oral      -0.19    -0.93 2.27
## BVMT          0.96     1.77 1.00
## CVLT         -0.63    -0.08 1.83
## MMSE_MST     -0.23    -0.05 1.62
## VolCinz      -0.56    -0.12 8.00
## LesoesFlair   1.32     0.96 1.16
## grupos*       1.52     0.32 0.06
## grupos2*       NaN      NaN 0.00
## Idade         0.69     0.56 1.31
## Escolaridade  0.25     0.65 0.71
## clustersRF*   0.18    -2.01 0.08
## ------------------------------------------------------------ 
## group: RRMS
##              vars  n   mean    sd median trimmed   mad   min   max range  skew
## TempoDoenca     1 12  10.88  9.10   8.00    9.25  6.67   0.6  32.0  31.4  0.94
## EDSS            2 12   4.83  1.51   4.75    4.75  1.85   3.0   7.5   4.5  0.15
## Passos25        3 12   8.58  2.64   8.00    8.12  1.48   5.0  15.0  10.0  1.06
## pinosdir9       4 12  36.58  7.05  38.00   37.62  7.41  23.0  45.0  22.0 -0.56
## pinosesq9       5 12  41.33 12.74  36.00   39.50  6.67  26.0  70.0  44.0  0.85
## VitD            6 12  52.92 31.37  37.65   48.88 16.01  22.0  99.0  77.0  0.60
## FSS             7 12  47.17 13.82  45.00   48.38 20.76  23.0  63.0  40.0 -0.27
## HADS_A          8 12   7.58  4.06   7.50    7.38  4.45   2.0  16.0  14.0  0.43
## HADS_D          9 12   6.83  3.86   6.00    6.50  3.71   2.0  15.0  13.0  0.58
## SF36           10 12  87.08 11.43  89.00   88.62  6.67  56.0 104.0  48.0 -1.38
## SD_escr        11 12  32.42 11.87  30.00   31.75  7.41  11.0  53.0  42.0  0.20
## SD_oral        12 12  35.75 13.59  35.00   35.38 13.34  12.0  58.0  46.0  0.04
## BVMT           13 12   8.42  6.92   6.50    7.25  6.67   1.0  24.0  23.0  0.84
## CVLT           14 12  51.75 11.66  55.50   52.38 12.60  35.0  64.0  29.0 -0.19
## MMSE_MST       15 12  48.92  5.32  51.00   49.38  5.93  40.0  55.0  15.0 -0.35
## VolCinz        16 12 868.75 50.01 862.50  866.50 39.29 779.0 980.0 201.0  0.42
## LesoesFlair    17 12  13.47 11.05   9.05   10.74  4.89   4.3  38.9  34.6  1.13
## grupos*        18 12   1.00  0.00   1.00    1.00  0.00   1.0   1.0   0.0   NaN
## grupos2*       19 12   1.00  0.00   1.00    1.00  0.00   1.0   1.0   0.0   NaN
## Idade          20 12  42.50  9.24  40.50   41.00  8.90  32.0  61.0  29.0  0.66
## Escolaridade   21 12  11.25  3.49  11.00   11.38  0.74   4.0  18.0  14.0 -0.18
## clustersRF*    22 12   1.00  0.00   1.00    1.00  0.00   1.0   1.0   0.0   NaN
##              kurtosis    se
## TempoDoenca     -0.19  2.63
## EDSS            -1.42  0.44
## Passos25         0.49  0.76
## pinosdir9       -1.05  2.04
## pinosesq9       -0.42  3.68
## VitD            -1.59  9.06
## FSS             -1.49  3.99
## HADS_A          -0.84  1.17
## HADS_D          -0.85  1.11
## SF36             1.99  3.30
## SD_escr         -0.86  3.43
## SD_oral         -1.17  3.92
## BVMT            -0.35  2.00
## CVLT            -1.91  3.37
## MMSE_MST        -1.56  1.53
## VolCinz          0.02 14.44
## LesoesFlair     -0.21  3.19
## grupos*           NaN  0.00
## grupos2*          NaN  0.00
## Idade           -0.99  2.67
## Escolaridade     0.01  1.01
## clustersRF*       NaN  0.00

# YUEN robust t-test
grupos.p2 <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$grupos2)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
grupos.d2 <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$grupos2)$effsize), 3)
# Print results
cbind(grupos.p2, grupos.d2)

##              grupos.p2 grupos.d2
## TempoDoenca      0.126     0.399
## EDSS             0.000     0.893
## Passos25         0.001     0.505
## pinosdir9        0.001     0.823
## pinosesq9        0.009     0.802
## VitD             0.251     0.265
## FSS              0.020     0.549
## HADS_A           0.277     0.227
## HADS_D           0.021     0.526
## SF36             0.009     0.533
## SD_escr          0.007     0.672
## SD_oral          0.019     0.564
## BVMT             0.080     0.379
## CVLT             0.891     0.083
## MMSE_MST         0.036     0.505
## VolCinz          0.026     0.467
## LesoesFlair      0.319     0.396
## Idade            0.058     0.623
## Escolaridade     0.000     0.548

# Agrupa os resultados
cbind(grupos.p, grupos.d, grupos.p2, grupos.d2, clusterRF.p, clusterRF.d)

##              grupos.p grupos.d grupos.p2 grupos.d2 clusterRF.p clusterRF.d
## TempoDoenca     0.182    0.395     0.126     0.399       0.003       0.596
## EDSS            0.000    0.922     0.000     0.893       0.003       0.778
## Passos25        0.053    0.353     0.001     0.505       0.858       0.097
## pinosdir9       0.001    0.759     0.001     0.823       0.000       0.869
## pinosesq9       0.013    0.768     0.009     0.802       0.000       0.778
## VitD            0.538    0.126     0.251     0.265       0.011       0.492
## FSS             0.006    0.564     0.020     0.549       0.009       0.519
## HADS_A          0.095    0.350     0.277     0.227       0.740       0.106
## HADS_D          0.011    0.616     0.021     0.526       0.002       0.698
## SF36            0.043    0.493     0.009     0.533       0.084       0.343
## SD_escr         0.002    0.674     0.007     0.672       0.000       0.798
## SD_oral         0.013    0.513     0.019     0.564       0.000       0.770
## BVMT            0.057    0.368     0.080     0.379       0.010       0.434
## CVLT            0.829    0.070     0.891     0.083       0.025       0.480
## MMSE_MST        0.008    0.526     0.036     0.505       0.000       0.709
## VolCinz         0.321    0.232     0.026     0.467       0.000       0.819
## LesoesFlair     0.063    0.413     0.319     0.396       0.000       0.872
## Idade           0.132    0.439     0.058     0.623       0.004       0.639
## Escolaridade    0.174    0.304     0.000     0.548       0.060       0.408

Gráficos tradicionais

# https://cran.r-project.org/web/packages/compareGroups/vignettes/compareGroups_vignette.html
library(compareGroups)
descrTable(dados2)

## 
## --------Summary descriptives table ---------
## 
## ___________________________ 
##                 [ALL]    N  
##                 N=54        
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## TempoDoenca  6.88 (6.34) 54 
## EDSS         1.56 (2.04) 54 
## Passos25     7.26 (2.56) 54 
## pinosdir9    28.8 (6.61) 54 
## pinosesq9    30.8 (9.34) 54 
## VitD         62.7 (30.0) 54 
## FSS          35.7 (16.7) 54 
## HADS_A       6.43 (3.97) 54 
## HADS_D       4.50 (3.96) 54 
## SF36         91.6 (11.7) 54 
## SD_escr      44.3 (15.8) 54 
## SD_oral      46.3 (15.4) 54 
## BVMT         10.9 (6.65) 54 
## CVLT         51.9 (11.7) 54 
## MMSE_MST     53.4 (9.83) 54 
## VolCinz      891 (52.3)  54 
## LesoesFlair  9.70 (8.55) 54 
## grupos:                  54 
##     EM       34 (63.0%)     
##     EMRR     20 (37.0%)     
## grupos2:                 54 
##     MS       42 (77.8%)     
##     RRMS     12 (22.2%)     
## Idade        36.3 (9.22) 54 
## Escolaridade 13.8 (4.54) 54 
## clustersRF:              54 
##     Cluster1 35 (64.8%)     
##     Cluster2 19 (35.2%)     
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

res <- compareGroups(clustersRF ~., data = dados2, method = 4)
res

## 
## 
## -------- Summary of results by groups of 'clustersRF'---------
## 
## 
##    var          N  p.value  method                selection
## 1  TempoDoenca  54 0.002**  continuous non-normal ALL      
## 2  EDSS         54 <0.001** continuous non-normal ALL      
## 3  Passos25     54 0.911    continuous non-normal ALL      
## 4  pinosdir9    54 <0.001** continuous non-normal ALL      
## 5  pinosesq9    54 <0.001** continuous non-normal ALL      
## 6  VitD         54 0.007**  continuous non-normal ALL      
## 7  FSS          54 0.023**  continuous non-normal ALL      
## 8  HADS_A       54 0.501    continuous non-normal ALL      
## 9  HADS_D       54 <0.001** continuous non-normal ALL      
## 10 SF36         54 0.198    continuous non-normal ALL      
## 11 SD_escr      54 <0.001** continuous non-normal ALL      
## 12 SD_oral      54 <0.001** continuous non-normal ALL      
## 13 BVMT         54 0.036**  continuous non-normal ALL      
## 14 CVLT         54 0.020**  continuous non-normal ALL      
## 15 MMSE_MST     54 <0.001** continuous non-normal ALL      
## 16 VolCinz      54 <0.001** continuous non-normal ALL      
## 17 LesoesFlair  54 <0.001** continuous non-normal ALL      
## 18 grupos       54 0.007**  categorical           ALL      
## 19 grupos2      54 0.004**  categorical           ALL      
## 20 Idade        54 0.002**  continuous non-normal ALL      
## 21 Escolaridade 54 0.076*   continuous non-normal ALL      
## -----
## Signif. codes:  0 '**' 0.05 '*' 0.1 ' ' 1

summary(res)

## 
##  --- Descriptives of each row-variable by groups of 'clustersRF' ---
## 
## ------------------- 
## row-variable: TempoDoenca 
## 
##          N  med Q1   Q3   lower upper p.overall
## [ALL]    54 5.5 2    10   3     7              
## Cluster1 35 7   3    11.5 5     10    0.002226 
## Cluster2 19 2   1.15 5    1     5              
## 
##      OR       OR.lower OR.upper
## [1,] 0.801355 0.680546 0.943611
## 
## ------------------- 
## row-variable: EDSS 
## 
##          N  med Q1 Q3    lower upper p.overall
## [ALL]    54 1   0  2.375 0     2              
## Cluster1 35 2   0  3.25  1     3     0.000228 
## Cluster2 19 0   0  0     0     0              
## 
##      OR       OR.lower OR.upper
## [1,] 0.344809 0.159441 0.745687
## 
## ------------------- 
## row-variable: Passos25 
## 
##          N  med Q1 Q3 lower upper p.overall
## [ALL]    54 7   5  8  6     8              
## Cluster1 35 7   5  8  5     8     0.911383 
## Cluster2 19 7   5  8  5     8              
## 
##      OR       OR.lower OR.upper
## [1,] 0.975539 0.779626 1.220683
## 
## ------------------- 
## row-variable: pinosdir9 
## 
##          N  med Q1   Q3    lower upper p.overall
## [ALL]    54 27  25   32.75 26    28             
## Cluster1 35 31  27   37    27    33    1e-06    
## Cluster2 19 24  22.5 25.5  22    26             
## 
##      OR      OR.lower OR.upper
## [1,] 0.54475 0.371205 0.79943 
## 
## ------------------- 
## row-variable: pinosesq9 
## 
##          N  med Q1    Q3 lower upper p.overall
## [ALL]    54 28  25.25 34 26    31             
## Cluster1 35 31  27    36 28    35    6.8e-05  
## Cluster2 19 25  24    27 24    27             
## 
##      OR       OR.lower OR.upper
## [1,] 0.743493 0.61489  0.898992
## 
## ------------------- 
## row-variable: VitD 
## 
##          N  med  Q1   Q3 lower upper p.overall
## [ALL]    54 55.5 38   99 42.7  80             
## Cluster1 35 45   36   75 38    63    0.006647 
## Cluster2 19 99   53.5 99 53    99             
## 
##      OR       OR.lower OR.upper
## [1,] 1.028301 1.00686  1.050199
## 
## ------------------- 
## row-variable: FSS 
## 
##          N  med  Q1    Q3 lower upper p.overall
## [ALL]    54 35.5 23.25 47 28    43             
## Cluster1 35 41   31    53 35    47    0.023447 
## Cluster2 19 25   18.5  37 16    38             
## 
##      OR       OR.lower OR.upper
## [1,] 0.959271 0.923711 0.996199
## 
## ------------------- 
## row-variable: HADS_A 
## 
##          N  med Q1   Q3  lower upper p.overall
## [ALL]    54 5.5 3.25 9   5     8              
## Cluster1 35 5   4    9   5     8     0.501106 
## Cluster2 19 6   3    9.5 3     10             
## 
##      OR       OR.lower OR.upper
## [1,] 0.947359 0.818892 1.095979
## 
## ------------------- 
## row-variable: HADS_D 
## 
##          N  med Q1 Q3   lower upper p.overall
## [ALL]    54 3   2  6.75 2     4              
## Cluster1 35 4   3  8    3     7     0.000163 
## Cluster2 19 1   1  3    1     3              
## 
##      OR       OR.lower OR.upper
## [1,] 0.583914 0.405043 0.841775
## 
## ------------------- 
## row-variable: SF36 
## 
##          N  med Q1 Q3   lower upper p.overall
## [ALL]    54 94  89 99   91    96             
## Cluster1 35 92  88 98.5 89    96    0.197896 
## Cluster2 19 96  93 99   93    99             
## 
##      OR       OR.lower OR.upper
## [1,] 1.011162 0.961446 1.063449
## 
## ------------------- 
## row-variable: SD_escr 
## 
##          N  med  Q1   Q3   lower upper p.overall
## [ALL]    54 47.5 30   56.5 33    51             
## Cluster1 35 33   29   48   30    45    5e-06    
## Cluster2 19 59   52.5 65   52    65             
## 
##      OR       OR.lower OR.upper
## [1,] 1.127098 1.057069 1.201765
## 
## ------------------- 
## row-variable: SD_oral 
## 
##          N  med Q1   Q3   lower upper p.overall
## [ALL]    54 49  35   57   40    54             
## Cluster1 35 40  29   49.5 35    48    1.3e-05  
## Cluster2 19 59  54.5 65   54    65             
## 
##      OR       OR.lower OR.upper
## [1,] 1.121607 1.05119  1.196741
## 
## ------------------- 
## row-variable: BVMT 
## 
##          N  med Q1   Q3    lower upper p.overall
## [ALL]    54 10  6.25 15.75 8     13             
## Cluster1 35 9   5    13    6     11    0.036071 
## Cluster2 19 14  9.5  16    9     16             
## 
##      OR       OR.lower OR.upper
## [1,] 1.090579 0.993927 1.19663 
## 
## ------------------- 
## row-variable: CVLT 
## 
##          N  med  Q1    Q3    lower upper p.overall
## [ALL]    54 52.5 44.25 61.75 49    58             
## Cluster1 35 50   39.5  58.5  45    56    0.019803 
## Cluster2 19 59   50.5  65    49    66             
## 
##      OR       OR.lower OR.upper
## [1,] 1.072029 1.009655 1.138255
## 
## ------------------- 
## row-variable: MMSE_MST 
## 
##          N  med Q1   Q3    lower upper p.overall
## [ALL]    54 53  47   59.75 51    57             
## Cluster1 35 51  44   54.5  46    53    0.000193 
## Cluster2 19 59  55.5 63.5  55    64             
## 
##      OR       OR.lower OR.upper
## [1,] 1.157575 1.056649 1.268141
## 
## ------------------- 
## row-variable: VolCinz 
## 
##          N  med Q1    Q3    lower upper p.overall
## [ALL]    54 893 860.2 927   879   914            
## Cluster1 35 873 839.5 903.5 858   889   4.6e-05  
## Cluster2 19 927 907   950   900   951            
## 
##      OR       OR.lower OR.upper
## [1,] 1.034405 1.014016 1.055203
## 
## ------------------- 
## row-variable: LesoesFlair 
## 
##          N  med Q1   Q3    lower upper p.overall
## [ALL]    54 6.7 3.8  12.15 5.4   9.1            
## Cluster1 35 9.1 6.2  17.75 6.8   14.7  4e-06    
## Cluster2 19 3.5 1.75 4.85  1.6   5.4            
## 
##      OR       OR.lower OR.upper
## [1,] 0.645438 0.488437 0.852905
## 
## ------------------- 
## row-variable: grupos 
## 
##          EM EMRR EM%      EMRR%    p.overall
## [ALL]    34 20   62.96296 37.03704          
## Cluster1 17 18   48.57143 51.42857 0.007422 
## Cluster2 17 2    89.47368 10.52632          
## 
##      OR       OR.lower OR.upper
## EM   1                         
## EMRR 0.121746 0.015932 0.519754
## 
## ------------------- 
## row-variable: grupos2 
## 
##          MS RRMS MS%      RRMS%    p.overall
## [ALL]    42 12   77.77778 22.22222          
## Cluster1 23 12   65.71429 34.28571 0.004364 
## Cluster2 19 0    100      0                 
## 
##      OR OR.lower OR.upper
## MS   1                   
## RRMS .  .        .       
## 
## ------------------- 
## row-variable: Idade 
## 
##          N  med  Q1   Q3   lower upper p.overall
## [ALL]    54 34.5 31   41.5 32    38             
## Cluster1 35 38   32   45   34    42    0.002452 
## Cluster2 19 32   26.5 34.5 26    35             
## 
##      OR       OR.lower OR.upper
## [1,] 0.869639 0.786454 0.961622
## 
## ------------------- 
## row-variable: Escolaridade 
## 
##          N  med  Q1 Q3   lower upper p.overall
## [ALL]    54 13.5 11 17   11    16             
## Cluster1 35 12   11 16.5 11    15    0.075759 
## Cluster2 19 16   13 17   13    17             
## 
##      OR       OR.lower OR.upper
## [1,] 1.094281 0.960237 1.247037

createTable(res, show.ratio = TRUE)

## 
## --------Summary descriptives table by 'clustersRF'---------
## 
## _________________________________________________________________________________ 
##                  Cluster1         Cluster2            OR        p.ratio p.overall 
##                    N=35             N=19                                          
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## TempoDoenca  7.00 [3.00;11.5] 2.00 [1.15;5.00] 0.80 [0.68;0.94]  0.008    0.002   
## EDSS         2.00 [0.00;3.25] 0.00 [0.00;0.00] 0.34 [0.16;0.75]  0.007   <0.001   
## Passos25     7.00 [5.00;8.00] 7.00 [5.00;8.00] 0.98 [0.78;1.22]  0.829    0.911   
## pinosdir9    31.0 [27.0;37.0] 24.0 [22.5;25.5] 0.54 [0.37;0.80]  0.002   <0.001   
## pinosesq9    31.0 [27.0;36.0] 25.0 [24.0;27.0] 0.74 [0.61;0.90]  0.002   <0.001   
## VitD         45.0 [36.0;75.0] 99.0 [53.5;99.0] 1.03 [1.01;1.05]  0.009    0.007   
## FSS          41.0 [31.0;53.0] 25.0 [18.5;37.0] 0.96 [0.92;1.00]  0.031    0.023   
## HADS_A       5.00 [4.00;9.00] 6.00 [3.00;9.50] 0.95 [0.82;1.10]  0.467    0.501   
## HADS_D       4.00 [3.00;8.00] 1.00 [1.00;3.00] 0.58 [0.41;0.84]  0.004   <0.001   
## SF36         92.0 [88.0;98.5] 96.0 [93.0;99.0] 1.01 [0.96;1.06]  0.666    0.198   
## SD_escr      33.0 [29.0;48.0] 59.0 [52.5;65.0] 1.13 [1.06;1.20] <0.001   <0.001   
## SD_oral      40.0 [29.0;49.5] 59.0 [54.5;65.0] 1.12 [1.05;1.20]  0.001   <0.001   
## BVMT         9.00 [5.00;13.0] 14.0 [9.50;16.0] 1.09 [0.99;1.20]  0.067    0.036   
## CVLT         50.0 [39.5;58.5] 59.0 [50.5;65.0] 1.07 [1.01;1.14]  0.023    0.020   
## MMSE_MST     51.0 [44.0;54.5] 59.0 [55.5;63.5] 1.16 [1.06;1.27]  0.002   <0.001   
## VolCinz       873 [840;904]    927 [907;950]   1.03 [1.01;1.06]  0.001   <0.001   
## LesoesFlair  9.10 [6.20;17.8] 3.50 [1.75;4.85] 0.65 [0.49;0.85]  0.002   <0.001   
## grupos:                                                                   0.007   
##     EM          17 (48.6%)       17 (89.5%)          Ref.        Ref.             
##     EMRR        18 (51.4%)       2 (10.5%)     0.12 [0.02;0.52]  0.003            
## grupos2:                                                                  0.004   
##     MS          23 (65.7%)       19 (100%)           Ref.        Ref.             
##     RRMS        12 (34.3%)       0 (0.00%)         . [.;.]         .              
## Idade        38.0 [32.0;45.0] 32.0 [26.5;34.5] 0.87 [0.79;0.96]  0.006    0.002   
## Escolaridade 12.0 [11.0;16.5] 16.0 [13.0;17.0] 1.09 [0.96;1.25]  0.177    0.076   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

plot(res)

plot(res, bivar = TRUE)

#restab <- createTable(res, show.ratio = TRUE)
#print(restab, which.table = "avail")
#print(restab, which.table = "descr")

# Análise gráfica dos dados da Ressonancia (completa)
library(DataExplorer)
#plot_missing(Ress)
plot_histogram(Ress)

#plot_density(Ress)
plot_correlation(Ress, type = 'continuous', cor_args = list("use" = "pairwise.complete.obs"))

plot_correlation(dados, type = 'discrete', cor_args = list("use" = "pairwise.complete.obs"))

#plot_bar(Ress)
plot_bar(Ress[-5])

Gráficos de Rede

# cria um banco RM diferente (USAR EM OUTRAS ANALISES)
Ress2 <- Ress %>% dplyr::select(Idade, Escolaridade, TempoDoenca:pinosesq9, VitD, 
                                FSS:SD_oral, BVMT, CVLT, CVLT_rep:LesoesImpreg)

names(Ress2) <- c("Idade","Escola","TempoD","EDSS","25Passo","9PinoD","9PinoE","VitD","FSS","HADSA",
                  "HADSD","SF36","SDescr","SDoral","BVMT","CVLT","CVLTrep","MMSEmst","VolCereb","VolCinz",
                  "LesFlair","LesImpr")

library(qgraph)
# Correlação entre as variáveis do banco RM
clinic <- cor_auto(Ress2)

library(corrplot)
corrplot(clinic, type="lower", order="hclust")

# Correlação Pearson
qgraph(cor(Ress2), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação parcial
qgraph(cor_auto(Ress2), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação penalidade glasso
library(glasso)
qgraph(glasso(cor_auto(Ress2), 0.1), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação entre as variáveis do banco RM
library(qgraph)
library(corrplot)
clinic <- cor_auto(Ress[-c(2,4:8,14,16,23:25,27:31)])
corrplot(clinic, type="lower", order="hclust")

Pontos de corte

# Pontos de corte
library(cutpointr)

# SDMT Escrito (RFCluster)
cp1 <- cutpointr(dados2, SD_escr, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
                method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp1)

## Method: maximize_metric 
## Predictor: SD_escr 
## Outcome: clustersRF 
## Direction: <= 
## Nr. of bootstraps: 1000 
## 
##     AUC  n n_pos n_neg
##  0.8805 54    35    19
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                51        1.7038 0.8704      0.9143      0.7895 32  3  4 15
## 
## Predictor summary: 
##      Data Min.   5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##   Overall   11 23.0    30.0   47.5 44.25926    56.5 68.35   77 15.76500   0
##  Cluster1   11 23.0    29.0   33.0 37.20000    48.0 54.20   68 12.08256   0
##  Cluster2   19 31.6    52.5   59.0 57.26316    65.0 73.40   77 13.44058   0
## 
## Bootstrap summary: 
##           Variable  Min.    5% 1st Qu. Median  Mean 3rd Qu.   95% Max.   SD NAs
##   optimal_cutpoint 40.00 48.00   50.00  50.25 50.40   51.00 53.00   57 1.73   0
##              AUC_b  0.63  0.78    0.84   0.89  0.88    0.93  0.97    1 0.06   0
##            AUC_oob  0.34  0.73    0.82   0.89  0.88    0.95  1.00    1 0.09   0
##    sum_sens_spec_b  1.45  1.59    1.68   1.75  1.74    1.81  1.89    2 0.09   0
##  sum_sens_spec_oob  0.75  1.33    1.51   1.63  1.62    1.73  1.88    2 0.16   0
##              acc_b  0.70  0.80    0.85   0.87  0.88    0.91  0.94    1 0.05   0
##            acc_oob  0.53  0.70    0.77   0.82  0.82    0.88  0.94    1 0.07   0
##      sensitivity_b  0.62  0.76    0.85   0.90  0.89    0.94  0.98    1 0.07   0
##    sensitivity_oob  0.43  0.64    0.79   0.87  0.85    0.92  1.00    1 0.11   0
##      specificity_b  0.53  0.69    0.79   0.86  0.85    0.92  1.00    1 0.09   0
##    specificity_oob  0.00  0.50    0.67   0.78  0.77    0.88  1.00    1 0.16   0
##     cohens_kappa_b  0.43  0.57    0.67   0.73  0.73    0.80  0.88    1 0.10   0
##   cohens_kappa_oob -0.17  0.33    0.49   0.61  0.60    0.71  0.86    1 0.16   0

plot(cp1)

plot_metric(cp1)

# SDMT Escrito Método Robusto
set.seed(4)
cp1.1 <- cutpointr(dados2, SD_escr, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp1.1)

## Method: maximize_boot_metric 
## Predictor: SD_escr 
## Outcome: clustersRF 
## Direction: <= 
## 
##     AUC  n n_pos n_neg
##  0.8805 54    35    19
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           51.8728   0.8704 0.8704      0.9143      0.7895 32  3  4 15
## 
## Predictor summary: 
##      Data Min.   5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##   Overall   11 23.0    30.0   47.5 44.25926    56.5 68.35   77 15.76500   0
##  Cluster1   11 23.0    29.0   33.0 37.20000    48.0 54.20   68 12.08256   0
##  Cluster2   19 31.6    52.5   59.0 57.26316    65.0 73.40   77 13.44058   0

plot(cp1.1)

# SDMT Escrito Grupos EM e EMRR
cp3 <- cutpointr(dados2, SD_escr, grupos, pos_class = "EMRR", neg_class = "EM",
                method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp3)

## Method: maximize_metric 
## Predictor: SD_escr 
## Outcome: grupos 
## Direction: <= 
## Nr. of bootstraps: 1000 
## 
##    AUC  n n_pos n_neg
##  0.761 54    20    34
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                50        1.4588 0.6852         0.9      0.5588 18  2 15 19
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   11 23.00   30.00   47.5 44.25926   56.50 68.35   77 15.76500   0
##       EM   19 25.65   35.25   51.0 49.55882   61.50 70.40   77 15.71178   0
##     EMRR   11 22.40   29.00   32.5 35.25000   47.25 52.05   53 11.35028   0
## 
## Bootstrap summary: 
##           Variable  Min.    5% 1st Qu. Median  Mean 3rd Qu.   95%  Max.   SD
##   optimal_cutpoint 23.00 33.00   38.00  50.00 45.93   52.00 53.00 53.00 7.21
##              AUC_b  0.46  0.65    0.72   0.77  0.76    0.81  0.86  0.94 0.07
##            AUC_oob  0.46  0.60    0.70   0.76  0.76    0.82  0.90  1.00 0.09
##    sum_sens_spec_b  0.97  1.35    1.45   1.52  1.52    1.58  1.67  1.80 0.10
##  sum_sens_spec_oob  0.50  1.03    1.22   1.33  1.32    1.44  1.58  1.72 0.17
##              acc_b  0.41  0.59    0.69   0.72  0.72    0.78  0.81  0.91 0.07
##            acc_oob  0.35  0.50    0.58   0.64  0.64    0.70  0.77  0.88 0.08
##      sensitivity_b  0.43  0.67    0.82   0.93  0.89    1.00  1.00  1.00 0.11
##    sensitivity_oob  0.00  0.36    0.60   0.80  0.75    1.00  1.00  1.00 0.22
##      specificity_b  0.24  0.39    0.52   0.63  0.62    0.73  0.84  1.00 0.14
##    specificity_oob  0.10  0.31    0.44   0.56  0.57    0.69  0.85  1.00 0.17
##     cohens_kappa_b -0.02  0.29    0.39   0.46  0.46    0.54  0.63  0.81 0.11
##   cohens_kappa_oob -0.36  0.03    0.19   0.29  0.29    0.39  0.55  0.73 0.15
##  NAs
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0
##    0

plot(cp3)

plot_metric(cp3)

# SDMT Escrito Grupos EM e EMRR Método Robusto
set.seed(4)
cp3.1 <- cutpointr(dados2, SD_escr, grupos, pos_class = "EMRR", neg_class = "EM", 
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp3.1)

## Method: maximize_boot_metric 
## Predictor: SD_escr 
## Outcome: grupos 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.761 54    20    34
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           36.8909   0.7037 0.7037        0.65      0.7353 13  7  9 25
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   11 23.00   30.00   47.5 44.25926   56.50 68.35   77 15.76500   0
##       EM   19 25.65   35.25   51.0 49.55882   61.50 70.40   77 15.71178   0
##     EMRR   11 22.40   29.00   32.5 35.25000   47.25 52.05   53 11.35028   0

plot(cp3.1)

# SDMT Escrito Grupos MS e RRMS
cp4 <- cutpointr(dados2, SD_escr, grupos2, pos_class = "RRMS", neg_class = "MS",
                method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp4)

## Method: maximize_metric 
## Predictor: SD_escr 
## Outcome: grupos2 
## Direction: <= 
## Nr. of bootstraps: 1000 
## 
##     AUC  n n_pos n_neg
##  0.7837 54    12    42
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##              40.5         1.369 0.6481        0.75       0.619  9  3 16 26
## 
## Predictor summary: 
##     Data Min.   5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   11 23.0   30.00   47.5 44.25926   56.50 68.35   77 15.76500   0
##       MS   19 26.0   33.25   50.0 47.64286   58.75 68.95   77 15.18796   0
##     RRMS   11 17.6   27.50   30.0 32.41667   36.50 50.25   53 11.87402   0
## 
## Bootstrap summary: 
##           Variable  Min.    5% 1st Qu. Median  Mean 3rd Qu.   95%  Max.   SD
##   optimal_cutpoint 11.00 30.00   33.00  33.00 39.52   48.00 48.00 53.00 8.33
##              AUC_b  0.50  0.66    0.74   0.79  0.78    0.83  0.89  0.96 0.07
##            AUC_oob  0.34  0.59    0.72   0.79  0.78    0.86  0.94  1.00 0.10
##    sum_sens_spec_b  1.16  1.36    1.48   1.56  1.55    1.63  1.74  1.93 0.12
##  sum_sens_spec_oob  0.42  0.98    1.21   1.36  1.35    1.50  1.71  1.93 0.22
##              acc_b  0.39  0.56    0.67   0.74  0.72    0.78  0.85  0.96 0.09
##            acc_oob  0.22  0.48    0.59   0.67  0.66    0.73  0.82  0.94 0.11
##      sensitivity_b  0.30  0.64    0.80   0.89  0.87    1.00  1.00  1.00 0.12
##    sensitivity_oob  0.00  0.20    0.50   0.75  0.70    1.00  1.00  1.00 0.26
##      specificity_b  0.20  0.47    0.60   0.69  0.68    0.78  0.87  1.00 0.13
##    specificity_oob  0.12  0.36    0.54   0.67  0.65    0.77  0.89  1.00 0.16
##     cohens_kappa_b  0.10  0.21    0.33   0.42  0.42    0.49  0.62  0.84 0.12
##   cohens_kappa_oob -0.28 -0.02    0.15   0.25  0.25    0.36  0.53  0.82 0.17
##  NAs
##    0
##    0
##    1
##    0
##    1
##    0
##    0
##    0
##    1
##    0
##    0
##    0
##    0

plot(cp4)

plot_metric(cp4)

# SDMT Escrito Grupos MS e RRMS Robusto
set.seed(4)
cp4.1 <- cutpointr(dados2, SD_escr, grupos2, pos_class = "RRMS", neg_class = "MS",
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp4.1)

## Method: maximize_boot_metric 
## Predictor: SD_escr 
## Outcome: grupos2 
## Direction: <= 
## 
##     AUC  n n_pos n_neg
##  0.7837 54    12    42
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           23.2105   0.8148 0.8148        0.25      0.9762  3  9  1 41
## 
## Predictor summary: 
##     Data Min.   5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   11 23.0   30.00   47.5 44.25926   56.50 68.35   77 15.76500   0
##       MS   19 26.0   33.25   50.0 47.64286   58.75 68.95   77 15.18796   0
##     RRMS   11 17.6   27.50   30.0 32.41667   36.50 50.25   53 11.87402   0

plot(cp4.1)

# SDMT Oral (RFCluster)
cp5 <- cutpointr(dados2, SD_oral, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
                method = maximize_metric, metric = sum_sens_spec)
summary(cp5)

## Method: maximize_metric 
## Predictor: SD_oral 
## Outcome: clustersRF 
## Direction: <= 
## 
##     AUC  n n_pos n_neg
##  0.8617 54    35    19
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                50        1.6662 0.8148      0.7714      0.8947 27  8  2 17
## 
## Predictor summary: 
##      Data Min.   5% 1st Qu. Median     Mean 3rd Qu.  95% Max.       SD NAs
##   Overall   12 23.0    35.0     49 46.29630    57.0 69.4   77 15.44391   0
##  Cluster1   12 23.0    29.0     40 39.88571    49.5 57.3   68 12.42334   0
##  Cluster2   19 31.6    54.5     59 58.10526    65.0 73.4   77 13.55194   0

plot(cp5)

# SDMT Oral RFCluster Método Robusto
set.seed(4)
cp5.1 <- cutpointr(dados2, SD_oral, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp5.1)

## Method: maximize_boot_metric 
## Predictor: SD_oral 
## Outcome: clustersRF 
## Direction: <= 
## 
##     AUC  n n_pos n_neg
##  0.8617 54    35    19
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           54.0573   0.8519 0.8519      0.9143      0.7368 32  3  5 14
## 
## Predictor summary: 
##      Data Min.   5% 1st Qu. Median     Mean 3rd Qu.  95% Max.       SD NAs
##   Overall   12 23.0    35.0     49 46.29630    57.0 69.4   77 15.44391   0
##  Cluster1   12 23.0    29.0     40 39.88571    49.5 57.3   68 12.42334   0
##  Cluster2   19 31.6    54.5     59 58.10526    65.0 73.4   77 13.55194   0

plot(cp5.1)

# SDMT Oral grupos EM e EMRR
cp6 <- cutpointr(dados2, SD_oral, grupos, pos_class = "EMRR", neg_class = "EM",
                method = maximize_metric, metric = sum_sens_spec)
summary(cp6)

## Method: maximize_metric 
## Predictor: SD_oral 
## Outcome: grupos 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.714 54    20    34
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                41        1.4059 0.7037         0.7      0.7059 14  6 10 24
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   12 23.00   35.00   49.0 46.29630   57.00 69.40   77 15.44391   0
##       EM   19 25.65   40.25   53.0 50.61765   62.25 72.35   77 15.45279   0
##     EMRR   12 22.45   29.00   39.5 38.95000   50.75 57.05   58 12.66356   0

plot(cp6)

# # SDMT Oral grupos EM e EMRR Método Robusto
set.seed(4)
cp6.1 <- cutpointr(dados2, SD_oral, grupos, pos_class = "EMRR", neg_class = "EM", 
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp6.1)

## Method: maximize_boot_metric 
## Predictor: SD_oral 
## Outcome: grupos 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.714 54    20    34
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##           36.1298   0.6667 0.6667        0.45      0.7941  9 11  7 27
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   12 23.00   35.00   49.0 46.29630   57.00 69.40   77 15.44391   0
##       EM   19 25.65   40.25   53.0 50.61765   62.25 72.35   77 15.45279   0
##     EMRR   12 22.45   29.00   39.5 38.95000   50.75 57.05   58 12.66356   0

plot(cp6.1)

# SDMT Oral grupos MS e RRMS
cp7 <- cutpointr(dados2, SD_oral, grupos2, pos_class = "RRMS", neg_class = "MS",
                method = maximize_metric, metric = sum_sens_spec)
summary(cp7)

## Method: maximize_metric 
## Predictor: SD_oral 
## Outcome: grupos2 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.745 54    12    42
## 
##  optimal_cutpoint sum_sens_spec    acc sensitivity specificity tp fn fp tn
##                38        1.3452 0.7222      0.5833      0.7619  7  5 10 32
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   12 23.00   35.00   49.0 46.29630   57.00 69.40   77 15.44391   0
##       MS   19 26.00   39.25   52.5 49.30952   58.75 71.80   77 14.72759   0
##     RRMS   12 18.05   27.50   35.0 35.75000   43.25 55.25   58 13.59228   0

plot(cp7)

# SDMT Oral grupos MS e RRMS Robusto
set.seed(4)
cp7.1 <- cutpointr(dados2, SD_oral, grupos2, pos_class = "RRMS", neg_class = "MS",
          method = maximize_boot_metric,
          boot_cut = 200, summary_func = mean,
          metric = accuracy, silent = TRUE)
summary(cp7.1)

## Method: maximize_boot_metric 
## Predictor: SD_oral 
## Outcome: grupos2 
## Direction: <= 
## 
##    AUC  n n_pos n_neg
##  0.745 54    12    42
## 
##  optimal_cutpoint accuracy    acc sensitivity specificity tp fn fp tn
##              23.2   0.8148 0.8148        0.25      0.9762  3  9  1 41
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   12 23.00   35.00   49.0 46.29630   57.00 69.40   77 15.44391   0
##       MS   19 26.00   39.25   52.5 49.30952   58.75 71.80   77 14.72759   0
##     RRMS   12 18.05   27.50   35.0 35.75000   43.25 55.25   58 13.59228   0

plot(cp7.1)

Machine Learning

Método XGBoost

# https://rpubs.com/dalekube/XGBoost-Iris-Classification-Example-in-R#:~:text=XGBoost%20(Extreme%20Gradient%20Boosting)%20is,%2Dclass%20(multinomial)%20classification.
library(xgboost)

# Convert the Species factor to an integer class starting at 0
# This is picky, but it's a requirement for XGBoost
dados4 <- dados2 %>% dplyr::select(SD_escr:MMSE_MST, clustersRF)

clusters = dados4$clustersRF
label = as.integer(dados4$clustersRF)-1

dados4$clustersRF = NULL

n = nrow(dados4)
train.index = sample(n,floor(0.8*n))
train.data = as.matrix(dados4[train.index,])
train.label = label[train.index]
test.data = as.matrix(dados4[-train.index,])
test.label = label[-train.index]

# Transform the two data sets into xgb.Matrix
xgb.train = xgb.DMatrix(data=train.data,label=train.label)
xgb.test = xgb.DMatrix(data=test.data,label=test.label)

# Define the parameters for multinomial classification
num_class = length(levels(clusters))
params = list(
  booster="gbtree",
  eta=0.001,
  max_depth=5,
  gamma=3,
  subsample=0.7,
  colsample_bytree=1,
  objective="multi:softprob",
  eval_metric="mlogloss",
  num_class=num_class
)

# Train the XGBoost classifer
xgb.fit=xgb.train(
  params=params,
  data=xgb.train,
  nrounds=10000,
  nthreads=1,
  early_stopping_rounds=10,
  watchlist=list(val1=xgb.train,val2=xgb.test),
  verbose=0
)

## [12:08:36] WARNING: amalgamation/../src/learner.cc:541: 
## Parameters: { nthreads } might not be used.
## 
##   This may not be accurate due to some parameters are only used in language bindings but
##   passed down to XGBoost core.  Or some parameters are not used but slip through this
##   verification. Please open an issue if you find above cases.

# Review the final model and results
xgb.fit

## ##### xgb.Booster
## raw: 1.2 Mb 
## call:
##   xgb.train(params = params, data = xgb.train, nrounds = 10000, 
##     watchlist = list(val1 = xgb.train, val2 = xgb.test), verbose = 0, 
##     early_stopping_rounds = 10, nthreads = 1)
## params (as set within xgb.train):
##   booster = "gbtree", eta = "0.001", max_depth = "5", gamma = "3", subsample = "0.7", colsample_bytree = "1", objective = "multi:softprob", eval_metric = "mlogloss", num_class = "2", nthreads = "1", validate_parameters = "TRUE"
## xgb.attributes:
##   best_iteration, best_msg, best_ntreelimit, best_score, niter
## callbacks:
##   cb.evaluation.log()
##   cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, 
##     verbose = verbose)
## # of features: 5 
## niter: 715
## best_iteration : 705 
## best_ntreelimit : 705 
## best_score : 0.604711 
## best_msg : [705] val1-mlogloss:0.463218  val2-mlogloss:0.604711 
## nfeatures : 5 
## evaluation_log:
##     iter val1_mlogloss val2_mlogloss
##        1      0.692599      0.692823
##        2      0.692002      0.692527
## ---                                 
##      714      0.462019      0.604747
##      715      0.461867      0.604738

# Predict outcomes with the test data
xgb.pred = predict(xgb.fit,test.data,reshape=T)
xgb.pred = as.data.frame(xgb.pred)
colnames(xgb.pred) = levels(clusters)

# Use the predicted label with the highest probability
xgb.pred$prediction = apply(xgb.pred,1,function(x) colnames(xgb.pred)[which.max(x)])
xgb.pred$label = levels(clusters)[test.label+1]

# Calculate the final accuracy
result = sum(xgb.pred$prediction==xgb.pred$label)/nrow(xgb.pred)
print(paste("Final Accuracy =",sprintf("%1.2f%%", 100*result)))

## [1] "Final Accuracy = 72.73%"

SDMT - Symbol Digit Modalities Test

Danilo A Pereira, Ph.D.

4/7/2021

Bancos de dados

Análise descritiva

Imputacao Ress

Medidas clínicas

Análise de cluster RM

Análise de Componentes Principais

Explora dos dados

Sintaxe das estatísticas robustas

Resultados robustos

Gráficos tradicionais

Gráficos de Rede

Pontos de corte

Machine Learning

Método XGBoost