Análise descritiva
# Análise descritiva dos dados numéricos
library(knitr)
library(psych)
options(knitr.table.format = 'markdown')
# Banco de dados que será usado na elaboração das normas
kable(psych::describe(dplyr::select_if(dados, is.numeric), tr=.2), digits = 2)
| Idade |
1 |
587 |
44.50 |
16.39 |
41 |
42.95 |
19.27 |
18 |
92 |
74 |
0.43 |
-0.77 |
0.68 |
| FaixaEtaria |
2 |
587 |
3.02 |
1.63 |
3 |
2.88 |
1.48 |
1 |
7 |
6 |
0.45 |
-0.80 |
0.07 |
| Escola |
3 |
587 |
3.46 |
0.74 |
4 |
3.68 |
0.00 |
2 |
4 |
2 |
-0.96 |
-0.55 |
0.03 |
| BAI |
4 |
455 |
8.92 |
8.47 |
6 |
6.95 |
5.93 |
0 |
43 |
43 |
1.28 |
1.22 |
0.40 |
| BDI |
5 |
455 |
9.35 |
7.96 |
7 |
7.88 |
5.93 |
0 |
51 |
51 |
1.28 |
2.15 |
0.37 |
| HADS_A |
6 |
587 |
5.84 |
3.85 |
5 |
5.33 |
2.97 |
0 |
21 |
21 |
0.79 |
0.31 |
0.16 |
| HADS_D |
7 |
587 |
4.50 |
3.36 |
4 |
3.97 |
2.97 |
0 |
19 |
19 |
1.03 |
1.01 |
0.14 |
| MMSE_SV |
8 |
574 |
28.49 |
1.84 |
29 |
29.02 |
1.48 |
21 |
30 |
9 |
-1.36 |
1.33 |
0.08 |
| SD_escr |
9 |
586 |
43.94 |
17.14 |
45 |
44.17 |
16.31 |
2 |
110 |
108 |
0.28 |
0.77 |
0.71 |
| SD_oral |
10 |
587 |
47.84 |
18.34 |
50 |
48.16 |
17.79 |
2 |
110 |
108 |
0.13 |
0.20 |
0.76 |
| SD_oral_escrito |
11 |
587 |
3.98 |
4.90 |
5 |
4.84 |
5.93 |
-9 |
9 |
18 |
-0.60 |
-0.74 |
0.20 |
# Banco de dados parcial (somente do IBNeuro) e servirá pra correlações
kable(psych::describe(dplyr::select_if(parcial, is.numeric), tr=.2), digits = 2)
| Idade |
1 |
471 |
42.33 |
15.20 |
39 |
40.60 |
16.31 |
18 |
89 |
71 |
0.52 |
-0.61 |
0.70 |
| BAI |
2 |
471 |
9.06 |
8.64 |
7 |
7.05 |
7.41 |
0 |
45 |
45 |
1.33 |
1.46 |
0.40 |
| BDI |
3 |
471 |
9.35 |
7.94 |
7 |
7.87 |
5.93 |
0 |
51 |
51 |
1.27 |
2.06 |
0.37 |
| HADS_A |
4 |
471 |
6.15 |
4.05 |
6 |
5.69 |
4.45 |
0 |
21 |
21 |
0.67 |
0.05 |
0.19 |
| HADS_D |
5 |
471 |
4.68 |
3.59 |
4 |
4.13 |
2.97 |
0 |
19 |
19 |
0.90 |
0.44 |
0.17 |
| Escrita |
6 |
471 |
0.97 |
0.16 |
1 |
1.00 |
0.00 |
0 |
1 |
1 |
-6.00 |
34.12 |
0.01 |
| Desenho |
7 |
471 |
0.92 |
0.27 |
1 |
1.00 |
0.00 |
0 |
1 |
1 |
-3.07 |
7.44 |
0.01 |
| VelProc |
8 |
471 |
17.75 |
6.15 |
18 |
17.71 |
5.93 |
0 |
35 |
35 |
0.22 |
0.37 |
0.28 |
| MMSE2_BV |
9 |
471 |
15.47 |
0.95 |
16 |
15.77 |
0.00 |
8 |
16 |
8 |
-2.87 |
13.24 |
0.04 |
| MMSE2_SV |
10 |
471 |
28.51 |
2.05 |
29 |
29.13 |
1.48 |
13 |
30 |
17 |
-2.24 |
8.25 |
0.09 |
| MMSE2_EV |
11 |
471 |
60.44 |
10.30 |
60 |
60.59 |
8.90 |
25 |
87 |
62 |
-0.13 |
0.33 |
0.47 |
| A1_acerto |
12 |
471 |
7.11 |
2.29 |
7 |
6.88 |
2.97 |
0 |
15 |
15 |
0.50 |
0.22 |
0.11 |
| A1_intrus |
13 |
471 |
0.46 |
0.89 |
0 |
0.14 |
0.00 |
0 |
6 |
6 |
2.48 |
7.72 |
0.04 |
| A1_repet |
14 |
471 |
0.41 |
0.92 |
0 |
0.09 |
0.00 |
0 |
8 |
8 |
3.58 |
17.93 |
0.04 |
| A5_acerto |
15 |
471 |
13.15 |
2.51 |
14 |
13.49 |
2.97 |
0 |
16 |
16 |
-0.94 |
1.11 |
0.12 |
| A5_intrus |
16 |
471 |
0.17 |
0.45 |
0 |
0.00 |
0.00 |
0 |
3 |
3 |
3.20 |
11.92 |
0.02 |
| A5_repet |
17 |
471 |
1.10 |
1.58 |
0 |
0.60 |
0.00 |
0 |
8 |
8 |
1.82 |
3.38 |
0.07 |
| BVMTR1 |
18 |
471 |
5.78 |
3.18 |
6 |
5.63 |
2.97 |
0 |
12 |
12 |
0.22 |
-0.72 |
0.15 |
| BVMTR3 |
19 |
471 |
9.36 |
2.98 |
10 |
10.16 |
2.97 |
0 |
12 |
12 |
-1.26 |
0.84 |
0.14 |
| SD_escr |
20 |
470 |
44.66 |
16.04 |
45 |
44.55 |
14.83 |
6 |
110 |
104 |
0.61 |
2.00 |
0.74 |
| SD_oral |
21 |
470 |
50.31 |
18.16 |
51 |
50.29 |
17.05 |
9 |
110 |
101 |
0.26 |
0.51 |
0.84 |
| SD_oral_escrito |
22 |
470 |
5.65 |
9.92 |
6 |
5.67 |
8.90 |
-31 |
55 |
86 |
0.20 |
2.75 |
0.46 |
# Banco de dados do estudo com Ressonância Magnética
kable(psych::describe(dplyr::select_if(Ress, is.numeric), tr=.2), digits = 2)
| Idade |
1 |
54 |
36.28 |
9.22 |
34.5 |
35.21 |
6.67 |
18.00 |
61.0 |
43.00 |
0.67 |
0.28 |
1.25 |
| Escolaridade |
2 |
54 |
13.80 |
4.54 |
13.5 |
13.68 |
3.71 |
4.00 |
28.0 |
24.00 |
0.31 |
0.70 |
0.62 |
| Comorbidades |
3 |
54 |
0.17 |
0.38 |
0.0 |
0.00 |
0.00 |
0.00 |
1.0 |
1.00 |
1.74 |
1.05 |
0.05 |
| Medicamentos |
4 |
54 |
0.81 |
0.39 |
1.0 |
1.00 |
0.00 |
0.00 |
1.0 |
1.00 |
-1.58 |
0.49 |
0.05 |
| TempoDoenca |
5 |
52 |
6.80 |
6.24 |
5.5 |
5.53 |
5.93 |
0.25 |
32.0 |
31.75 |
1.60 |
3.33 |
0.87 |
| EDSS |
6 |
54 |
1.56 |
2.04 |
1.0 |
0.94 |
1.48 |
0.00 |
7.5 |
7.50 |
1.27 |
0.57 |
0.28 |
| Passos25 |
7 |
54 |
7.26 |
2.56 |
7.0 |
6.76 |
2.97 |
4.00 |
15.0 |
11.00 |
1.47 |
2.21 |
0.35 |
| pinosdir9 |
8 |
54 |
28.81 |
6.61 |
27.0 |
27.56 |
5.93 |
18.00 |
45.0 |
27.00 |
0.84 |
-0.21 |
0.90 |
| pinosesq9 |
9 |
54 |
30.81 |
9.34 |
28.0 |
28.85 |
5.93 |
19.00 |
70.0 |
51.00 |
1.96 |
4.68 |
1.27 |
| MedicamentoEM |
10 |
54 |
0.78 |
0.42 |
1.0 |
0.94 |
0.00 |
0.00 |
1.0 |
1.00 |
-1.30 |
-0.32 |
0.06 |
| VitD |
11 |
54 |
62.73 |
29.98 |
55.5 |
63.23 |
37.06 |
7.00 |
100.0 |
93.00 |
0.09 |
-1.52 |
4.08 |
| FSS |
12 |
54 |
35.74 |
16.72 |
35.5 |
35.47 |
17.79 |
9.00 |
63.0 |
54.00 |
0.04 |
-1.14 |
2.27 |
| HADS_A |
13 |
54 |
6.43 |
3.97 |
5.5 |
6.09 |
3.71 |
0.00 |
16.0 |
16.00 |
0.44 |
-0.74 |
0.54 |
| HADS_D |
14 |
54 |
4.50 |
3.96 |
3.0 |
3.59 |
2.97 |
0.00 |
15.0 |
15.00 |
1.26 |
0.86 |
0.54 |
| SF36 |
15 |
54 |
91.65 |
11.74 |
94.0 |
93.88 |
7.41 |
53.00 |
105.0 |
52.00 |
-1.86 |
3.42 |
1.60 |
| SD_escr |
16 |
54 |
44.26 |
15.77 |
47.5 |
43.59 |
21.50 |
11.00 |
77.0 |
66.00 |
0.09 |
-1.04 |
2.15 |
| SD_oral |
17 |
54 |
46.30 |
15.44 |
49.0 |
46.59 |
15.57 |
12.00 |
77.0 |
65.00 |
-0.11 |
-0.89 |
2.10 |
| BVMT_T1 |
18 |
54 |
2.11 |
1.83 |
2.0 |
1.82 |
1.48 |
0.00 |
9.0 |
9.00 |
1.36 |
2.38 |
0.25 |
| BVMT_T2 |
19 |
54 |
4.00 |
2.56 |
3.0 |
3.79 |
2.97 |
0.00 |
12.0 |
12.00 |
0.85 |
0.72 |
0.35 |
| BVMT_T3 |
20 |
54 |
4.80 |
2.72 |
5.0 |
4.76 |
1.48 |
0.00 |
12.0 |
12.00 |
0.50 |
0.41 |
0.37 |
| BVMT |
21 |
54 |
10.91 |
6.65 |
10.0 |
10.53 |
7.41 |
0.00 |
33.0 |
33.00 |
0.88 |
1.26 |
0.91 |
| CVLT_A1 |
22 |
54 |
6.30 |
1.66 |
6.0 |
6.00 |
1.48 |
4.00 |
10.0 |
6.00 |
0.59 |
-0.84 |
0.23 |
| CVLT_A2 |
23 |
54 |
9.69 |
2.48 |
10.0 |
9.76 |
2.97 |
4.00 |
14.0 |
10.00 |
-0.29 |
-0.49 |
0.34 |
| CVLT_A3 |
24 |
54 |
11.33 |
2.94 |
11.5 |
11.53 |
3.71 |
4.00 |
16.0 |
12.00 |
-0.38 |
-0.63 |
0.40 |
| CVLT_A4 |
25 |
54 |
12.17 |
3.18 |
13.0 |
12.59 |
2.97 |
0.00 |
16.0 |
16.00 |
-1.27 |
2.37 |
0.43 |
| CVLT_A5 |
26 |
54 |
12.44 |
3.28 |
14.0 |
13.09 |
2.97 |
1.00 |
16.0 |
15.00 |
-1.21 |
1.31 |
0.45 |
| CVLT |
27 |
54 |
51.93 |
11.73 |
52.5 |
53.09 |
12.60 |
22.00 |
70.0 |
48.00 |
-0.55 |
-0.37 |
1.60 |
| CVLT_rep |
28 |
54 |
7.46 |
5.77 |
6.0 |
6.35 |
4.45 |
0.00 |
27.0 |
27.00 |
1.38 |
1.80 |
0.79 |
| MMSE_MST |
29 |
54 |
53.37 |
9.83 |
53.0 |
53.59 |
9.64 |
31.00 |
81.0 |
50.00 |
0.01 |
0.08 |
1.34 |
| VolCereb |
30 |
54 |
1504.43 |
70.44 |
1514.5 |
1514.45 |
55.60 |
1291.00 |
1643.0 |
352.00 |
-0.82 |
0.63 |
9.59 |
| VolCinz |
31 |
54 |
890.75 |
52.32 |
893.0 |
894.04 |
49.67 |
766.00 |
995.0 |
229.00 |
-0.32 |
-0.36 |
7.12 |
| LesoesFlair |
32 |
54 |
9.70 |
8.55 |
6.7 |
7.50 |
5.04 |
1.20 |
38.9 |
37.70 |
1.48 |
1.63 |
1.16 |
| LesoesImpreg |
33 |
54 |
0.01 |
0.07 |
0.0 |
0.00 |
0.00 |
0.00 |
0.4 |
0.40 |
4.57 |
21.35 |
0.01 |
# Banco de dados do estudo de Esclerose Múltipla
kable(psych::describe(dplyr::select_if(EM, is.numeric), tr=.2), digits = 2)
| Idade |
1 |
256 |
40.68 |
11.88 |
40.0 |
40.03 |
13.34 |
17 |
72 |
55 |
0.28 |
-0.65 |
0.74 |
| Escolaridade |
2 |
256 |
12.51 |
4.15 |
12.0 |
12.61 |
3.71 |
1 |
28 |
27 |
0.07 |
0.96 |
0.26 |
| HADS_D |
3 |
256 |
6.00 |
3.75 |
5.0 |
5.56 |
2.97 |
0 |
19 |
19 |
0.79 |
0.43 |
0.23 |
| HADS_A |
4 |
256 |
7.20 |
4.21 |
6.0 |
6.62 |
2.97 |
0 |
21 |
21 |
0.77 |
0.32 |
0.26 |
| SD_escr |
5 |
256 |
34.16 |
15.56 |
33.5 |
33.99 |
15.57 |
0 |
77 |
77 |
0.12 |
-0.21 |
0.97 |
| SD_oral |
6 |
256 |
37.57 |
15.45 |
38.0 |
37.73 |
16.31 |
0 |
77 |
77 |
0.03 |
-0.41 |
0.97 |
| EDSS |
7 |
171 |
3.05 |
2.27 |
3.0 |
2.88 |
2.97 |
0 |
8 |
8 |
0.32 |
-1.05 |
0.17 |
| A1 |
8 |
238 |
6.06 |
1.99 |
6.0 |
5.97 |
1.48 |
1 |
13 |
12 |
0.37 |
0.54 |
0.13 |
| A5 |
9 |
238 |
11.21 |
3.13 |
11.0 |
11.40 |
4.45 |
1 |
16 |
15 |
-0.32 |
-0.64 |
0.20 |
| A1_A5 |
10 |
238 |
45.16 |
14.16 |
47.0 |
46.13 |
14.83 |
11 |
75 |
64 |
-0.32 |
-0.57 |
0.92 |
| BVMT_1 |
11 |
224 |
3.89 |
2.92 |
3.0 |
3.46 |
2.97 |
0 |
12 |
12 |
0.72 |
-0.23 |
0.20 |
| BVMT_2 |
12 |
224 |
5.84 |
3.61 |
5.0 |
5.57 |
4.45 |
0 |
12 |
12 |
0.28 |
-1.11 |
0.24 |
| BVMT_3 |
13 |
224 |
6.78 |
3.90 |
6.0 |
6.85 |
5.93 |
0 |
12 |
12 |
-0.02 |
-1.33 |
0.26 |
| BVMT_total |
14 |
224 |
16.48 |
9.78 |
15.5 |
15.85 |
12.60 |
1 |
36 |
35 |
0.27 |
-1.10 |
0.65 |
# Banco de dados do estudo Caso-Controle
kable(psych::describe(dplyr::select_if(caso, is.numeric), tr=.2), digits = 2)
| Idade |
1 |
26 |
30.46 |
4.81 |
31.00 |
30.31 |
2.97 |
21.00 |
45.00 |
24.00 |
0.66 |
1.85 |
0.94 |
| Escolaridade |
2 |
26 |
14.92 |
3.19 |
15.00 |
14.75 |
4.45 |
8.00 |
20.00 |
12.00 |
-0.05 |
-1.02 |
0.62 |
| EDSS |
3 |
26 |
0.54 |
1.09 |
0.00 |
0.12 |
0.00 |
0.00 |
4.50 |
4.50 |
2.15 |
4.28 |
0.21 |
| Grupo |
4 |
26 |
0.54 |
0.51 |
1.00 |
0.56 |
0.00 |
0.00 |
1.00 |
1.00 |
-0.15 |
-2.05 |
0.10 |
| A1 |
5 |
26 |
7.19 |
2.10 |
7.00 |
7.00 |
2.97 |
4.00 |
12.00 |
8.00 |
0.43 |
-0.96 |
0.41 |
| A2 |
6 |
26 |
10.31 |
2.00 |
10.00 |
10.00 |
1.48 |
7.00 |
15.00 |
8.00 |
0.72 |
-0.22 |
0.39 |
| A3 |
7 |
26 |
11.88 |
2.52 |
12.00 |
12.06 |
2.97 |
5.00 |
16.00 |
11.00 |
-0.51 |
0.16 |
0.49 |
| A4 |
8 |
26 |
12.19 |
3.41 |
13.00 |
12.81 |
2.97 |
0.00 |
16.00 |
16.00 |
-1.73 |
3.74 |
0.67 |
| A5 |
9 |
26 |
12.92 |
3.35 |
14.00 |
13.56 |
2.97 |
1.00 |
16.00 |
15.00 |
-1.72 |
3.58 |
0.66 |
| Total_A1_A5 |
10 |
26 |
54.50 |
11.28 |
55.50 |
54.94 |
9.64 |
23.00 |
75.00 |
52.00 |
-0.62 |
0.62 |
2.21 |
| CVLT _rep |
11 |
26 |
5.58 |
5.52 |
4.00 |
4.56 |
5.93 |
0.00 |
20.00 |
20.00 |
1.02 |
0.28 |
1.08 |
| BVMT_T1 |
12 |
26 |
4.81 |
4.22 |
2.00 |
4.12 |
1.48 |
0.00 |
12.00 |
12.00 |
0.48 |
-1.57 |
0.83 |
| BVMT_T2 |
13 |
26 |
6.96 |
4.09 |
6.00 |
6.81 |
4.45 |
0.00 |
12.00 |
12.00 |
0.09 |
-1.68 |
0.80 |
| BVMT_T3 |
14 |
26 |
7.88 |
3.59 |
7.00 |
8.00 |
4.45 |
1.00 |
12.00 |
11.00 |
-0.08 |
-1.56 |
0.70 |
| BVMT |
15 |
26 |
19.65 |
11.54 |
15.50 |
18.94 |
11.86 |
1.00 |
36.00 |
35.00 |
0.15 |
-1.69 |
2.26 |
| SD_escr |
16 |
26 |
49.81 |
12.62 |
51.00 |
51.50 |
12.60 |
23.00 |
68.00 |
45.00 |
-0.57 |
-0.72 |
2.48 |
| SD_oral |
17 |
26 |
52.69 |
13.67 |
52.50 |
53.69 |
17.05 |
23.00 |
75.00 |
52.00 |
-0.38 |
-0.72 |
2.68 |
| Oral_Escrito |
18 |
26 |
2.88 |
5.46 |
1.50 |
1.88 |
3.71 |
-8.00 |
19.00 |
27.00 |
1.01 |
1.30 |
1.07 |
| HADS_A |
19 |
26 |
6.54 |
3.85 |
5.00 |
5.94 |
2.97 |
1.00 |
14.00 |
13.00 |
0.57 |
-1.01 |
0.75 |
| HADS_D |
20 |
26 |
4.27 |
4.11 |
3.00 |
3.12 |
2.97 |
0.00 |
15.00 |
15.00 |
1.29 |
0.59 |
0.81 |
| HPT9_RH |
21 |
26 |
23.64 |
5.81 |
21.30 |
22.44 |
4.89 |
17.91 |
39.00 |
21.09 |
1.13 |
0.46 |
1.14 |
| HPT9_LH |
22 |
26 |
24.67 |
6.82 |
22.20 |
23.35 |
4.41 |
18.00 |
51.00 |
33.00 |
2.20 |
5.89 |
1.34 |
| HPT9_total |
23 |
26 |
24.15 |
6.04 |
21.34 |
23.02 |
4.00 |
18.00 |
45.00 |
27.00 |
1.62 |
2.97 |
1.18 |
| VolCereb |
24 |
14 |
1540.73 |
32.58 |
1534.00 |
1536.22 |
33.58 |
1503.00 |
1603.00 |
100.00 |
0.52 |
-1.16 |
8.71 |
| VolCinz |
25 |
14 |
903.11 |
27.38 |
905.50 |
900.77 |
26.17 |
860.80 |
952.00 |
91.20 |
0.35 |
-0.95 |
7.32 |
| lesoesflair |
26 |
14 |
10.65 |
5.39 |
10.60 |
10.60 |
6.52 |
1.50 |
19.64 |
18.14 |
0.00 |
-1.28 |
1.44 |
| lesoesimpreg |
27 |
14 |
0.04 |
0.12 |
0.00 |
0.00 |
0.00 |
0.00 |
0.40 |
0.40 |
2.23 |
3.72 |
0.03 |
# Banco de dados do estudo de Confiabilidade Teste-Reteste
kable(psych::describe(dplyr::select_if(conf, is.numeric), tr=.2), digits = 2)
| Idade |
1 |
98 |
60.87 |
13.44 |
62.0 |
61.80 |
8.90 |
22 |
92 |
70 |
-0.50 |
0.60 |
1.36 |
| Escolaridade |
2 |
98 |
9.76 |
5.36 |
11.0 |
9.43 |
7.41 |
1 |
27 |
26 |
0.34 |
-0.34 |
0.54 |
| SD_escr_teste |
3 |
98 |
29.42 |
13.28 |
29.5 |
29.02 |
14.83 |
5 |
69 |
64 |
0.25 |
-0.53 |
1.34 |
| SD_oral_teste |
4 |
98 |
31.62 |
14.93 |
32.0 |
30.88 |
16.31 |
2 |
70 |
68 |
0.35 |
-0.36 |
1.51 |
| SD_escr_reteste |
5 |
91 |
31.34 |
14.47 |
32.0 |
31.49 |
17.79 |
0 |
70 |
70 |
0.01 |
-0.65 |
1.52 |
| SD_oral_reteste |
6 |
91 |
33.56 |
14.52 |
34.0 |
33.53 |
16.31 |
0 |
68 |
68 |
0.01 |
-0.53 |
1.52 |
| alter_escr |
7 |
75 |
23.48 |
11.15 |
23.0 |
23.00 |
13.34 |
0 |
47 |
47 |
0.15 |
-0.83 |
1.29 |
| altern_oral |
8 |
75 |
25.40 |
11.28 |
25.0 |
25.00 |
13.34 |
4 |
50 |
46 |
0.16 |
-0.74 |
1.30 |
Medidas clínicas
Análise de cluster RM
library(cluster)
library(tidyverse)
# Seleciona variáveis do banco de dados da Ressonância
#dados2 <- na.omit(Ress[c(1,3, 8:15, 17:38)])
dados2 <- Ress %>% select(TempoDoenca:pinosesq9, VitD, FSS:SD_oral, BVMT, CVLT, MMSE_MST, VolCinz,LesoesFlair, grupos, grupos2, Idade, Escolaridade)
# http://gradientdescending.com/unsupervised-random-forest-example/
suppressPackageStartupMessages(library(randomForest))
suppressPackageStartupMessages(library(caret))
suppressPackageStartupMessages(library(cluster))
suppressPackageStartupMessages(library(RColorBrewer))
# set colours
myColRamp <- colorRampPalette(colors = c("#5DBCD2", "#FF80AA"))
# random forest model
set.seed(3984)
rf2 <- randomForest(x = dados2[c(1,2,4,5,6,16,17)], y = NULL, mtry = 3,
ntree = 10000, proximity = TRUE, oob.prox = TRUE)
rf2
##
## Call:
## randomForest(x = dados2[c(1, 2, 4, 5, 6, 16, 17)], y = NULL, ntree = 10000, mtry = 3, proximity = TRUE, oob.prox = TRUE)
## Type of random forest: unsupervised
## Number of trees: 10000
## No. of variables tried at each split: 3
# PAM method
prox <- rf2$proximity
pam.rf <- pam(prox, 2)
dados2$clustersRF <- pam.rf$cluster
dados2$clustersRF <- ifelse(dados2$clustersRF == 1, "Cluster1", "Cluster2")
dados2$clustersRF <- as.factor(dados2$clustersRF)
table(dados2$clustersRF)
##
## Cluster1 Cluster2
## 35 19
prop.table(table(dados2$clustersRF))
##
## Cluster1 Cluster2
## 0.6481481 0.3518519
# Tabela EDSS ≤ 1.5 e EDSS ≤ 2.5
table(dados2$grupos, dados2$grupos2)
##
## MS RRMS
## EM 34 0
## EMRR 8 12
# Tabela Random Forest Clustering e EDSS ≤ 1.5
table(dados2$clustersRF, dados2$grupos)
##
## EM EMRR
## Cluster1 17 18
## Cluster2 17 2
# Tabela Random Forest Clustering e EDSS ≤ 2.5
table(dados2$clustersRF, dados2$grupos2)
##
## MS RRMS
## Cluster1 23 12
## Cluster2 19 0
Análise de Componentes Principais
PCA com os grupos do Random Forest Clustering.
library(factoextra)
library("FactoMineR")
# Compute PCA
BD.pca <- PCA(dados2[c(1:17)], graph = FALSE)
# Use habillage to specify groups for coloring
fviz_pca_ind(BD.pca,
label = "none", # hide individual labels
habillage = dados2$clustersRF, # color by groups
palette = c("#5DBCD2", "#FF80AA"),
addEllipses = TRUE, ellipse.level=0.8 # Concentration ellipses
)

fviz_pca_biplot(BD.pca, axes = c(1, 2), geom = "point",
col.ind = "black", col.var = "steelblue", label = "all",
invisible = "none", repel = F, habillage = dados2$clustersRF,
palette = c("#5DBCD2", "#FF80AA"), addEllipses = TRUE, ellipse.level=0.8,
title = "PCA - Biplot")

Explora dos dados
Sintaxe das estatísticas robustas
## Robust statistics
library(WRS2)
# Function to calculate 20% trimmed mean
tmean <- function(x,tr=.2,na.rm=FALSE,STAND=NULL){
if(na.rm)x<-x[!is.na(x)]
val<-mean(x,tr)
val
}
# Function to calculate 20% trimmed standard deviation (SD)
sd_trim <- function(x,trim=0.2, const=TRUE){
# trimmed sd, where x is a matrix (column-wise)
x <- as.matrix(x)
if (const){
if (trim==0.1){const <- 0.7892}
else if (trim==0.2){const <- 0.6615}
else {warning("Did you specify the correct consistency constant for trimming?")}
}
else{const <- 1}
m <- apply(x,2,mean,trim)
res <- x-rep(1,nrow(x))%*%t(m)
qu <- apply(abs(res),2,quantile,1-trim)
sdtrim <- apply(matrix(res[t(abs(t(res))<=qu)]^2,ncol=ncol(x),byrow=FALSE),2,sum)
sdtrim <- sqrt(sdtrim/((nrow(x)*(1-trim)-1)))/const
return(sdtrim)
}
Resultados robustos
# Descricao geral
psych::describe(dados2, tr = .2)
## vars n mean sd median trimmed mad min max range
## TempoDoenca 1 54 6.88 6.34 5.5 5.57 6.15 0.25 32.0 31.75
## EDSS 2 54 1.56 2.04 1.0 0.94 1.48 0.00 7.5 7.50
## Passos25 3 54 7.26 2.56 7.0 6.76 2.97 4.00 15.0 11.00
## pinosdir9 4 54 28.81 6.61 27.0 27.56 5.93 18.00 45.0 27.00
## pinosesq9 5 54 30.81 9.34 28.0 28.85 5.93 19.00 70.0 51.00
## VitD 6 54 62.73 29.98 55.5 63.23 37.06 7.00 100.0 93.00
## FSS 7 54 35.74 16.72 35.5 35.47 17.79 9.00 63.0 54.00
## HADS_A 8 54 6.43 3.97 5.5 6.09 3.71 0.00 16.0 16.00
## HADS_D 9 54 4.50 3.96 3.0 3.59 2.97 0.00 15.0 15.00
## SF36 10 54 91.65 11.74 94.0 93.88 7.41 53.00 105.0 52.00
## SD_escr 11 54 44.26 15.77 47.5 43.59 21.50 11.00 77.0 66.00
## SD_oral 12 54 46.30 15.44 49.0 46.59 15.57 12.00 77.0 65.00
## BVMT 13 54 10.91 6.65 10.0 10.53 7.41 0.00 33.0 33.00
## CVLT 14 54 51.93 11.73 52.5 53.09 12.60 22.00 70.0 48.00
## MMSE_MST 15 54 53.37 9.83 53.0 53.59 9.64 31.00 81.0 50.00
## VolCinz 16 54 890.75 52.32 893.0 894.04 49.67 766.00 995.0 229.00
## LesoesFlair 17 54 9.70 8.55 6.7 7.50 5.04 1.20 38.9 37.70
## grupos* 18 54 1.37 0.49 1.0 1.29 0.00 1.00 2.0 1.00
## grupos2* 19 54 1.22 0.42 1.0 1.06 0.00 1.00 2.0 1.00
## Idade 20 54 36.28 9.22 34.5 35.21 6.67 18.00 61.0 43.00
## Escolaridade 21 54 13.80 4.54 13.5 13.68 3.71 4.00 28.0 24.00
## clustersRF* 22 54 1.35 0.48 1.0 1.26 0.00 1.00 2.0 1.00
## skew kurtosis se
## TempoDoenca 1.50 2.81 0.86
## EDSS 1.27 0.57 0.28
## Passos25 1.47 2.21 0.35
## pinosdir9 0.84 -0.21 0.90
## pinosesq9 1.96 4.68 1.27
## VitD 0.09 -1.52 4.08
## FSS 0.04 -1.14 2.27
## HADS_A 0.44 -0.74 0.54
## HADS_D 1.26 0.86 0.54
## SF36 -1.86 3.42 1.60
## SD_escr 0.09 -1.04 2.15
## SD_oral -0.11 -0.89 2.10
## BVMT 0.88 1.26 0.91
## CVLT -0.55 -0.37 1.60
## MMSE_MST 0.01 0.08 1.34
## VolCinz -0.32 -0.36 7.12
## LesoesFlair 1.48 1.63 1.16
## grupos* 0.52 -1.76 0.07
## grupos2* 1.30 -0.32 0.06
## Idade 0.67 0.28 1.25
## Escolaridade 0.31 0.70 0.62
## clustersRF* 0.60 -1.67 0.07
# Amostra total (com Estatística Robusta)
media <- round(sapply(dados2[c(1:17,20,21)], function(i) tmean(i)), 2)
dp <- round(sapply(dados2[c(1:17,20,21)], function(i) sd(i)), 2)
total <- cbind(media, dp)
total
## media dp
## TempoDoenca 5.57 6.34
## EDSS 0.94 2.04
## Passos25 6.76 2.56
## pinosdir9 27.56 6.61
## pinosesq9 28.85 9.34
## VitD 63.23 29.98
## FSS 35.47 16.72
## HADS_A 6.09 3.97
## HADS_D 3.59 3.96
## SF36 93.88 11.74
## SD_escr 43.59 15.77
## SD_oral 46.59 15.44
## BVMT 10.53 6.65
## CVLT 53.09 11.73
## MMSE_MST 53.59 9.83
## VolCinz 894.04 52.32
## LesoesFlair 7.50 8.55
## Idade 35.21 9.22
## Escolaridade 13.68 4.54
# Descrição dos grupos Random Forest com Estatística Robusta
psych::describeBy(dados2, dados2$clustersRF, tr = .2)
##
## Descriptive statistics by group
## group: Cluster1
## vars n mean sd median trimmed mad min max range skew
## TempoDoenca 1 35 8.68 6.89 7.0 7.62 5.93 0.6 32.0 31.4 1.17
## EDSS 2 35 2.23 2.22 2.0 1.76 2.97 0.0 7.5 7.5 0.76
## Passos25 3 35 7.31 2.64 7.0 6.81 2.97 4.0 15.0 11.0 1.35
## pinosdir9 4 35 31.63 6.44 31.0 30.67 7.41 23.0 45.0 22.0 0.54
## pinosesq9 5 35 33.83 10.19 31.0 31.57 7.41 21.0 70.0 49.0 1.61
## VitD 6 35 54.68 28.72 45.0 51.01 26.69 7.0 99.0 92.0 0.44
## FSS 7 35 39.46 16.69 41.0 40.86 16.31 9.0 63.0 54.0 -0.36
## HADS_A 8 35 6.71 4.06 5.0 6.24 4.45 1.0 16.0 15.0 0.53
## HADS_D 9 35 5.83 4.20 4.0 4.95 2.97 1.0 15.0 14.0 0.91
## SF36 10 35 91.14 11.39 92.0 92.57 5.93 53.0 105.0 52.0 -1.68
## SD_escr 11 35 37.20 12.08 33.0 36.19 10.38 11.0 68.0 57.0 0.38
## SD_oral 12 35 39.89 12.42 40.0 39.81 16.31 12.0 68.0 56.0 0.03
## BVMT 13 35 9.63 6.53 9.0 8.81 5.93 0.0 29.0 29.0 0.87
## CVLT 14 35 49.14 11.75 50.0 49.95 14.83 22.0 68.0 46.0 -0.46
## MMSE_MST 15 35 49.86 9.09 51.0 49.76 8.90 31.0 70.0 39.0 0.00
## VolCinz 16 35 870.29 50.23 873.0 871.44 47.44 766.0 980.0 214.0 -0.03
## LesoesFlair 17 35 12.78 9.04 9.1 10.48 5.63 2.3 38.9 36.6 1.12
## grupos* 18 35 1.51 0.51 2.0 1.52 0.00 1.0 2.0 1.0 -0.05
## grupos2* 19 35 1.34 0.48 1.0 1.24 0.00 1.0 2.0 1.0 0.63
## Idade 20 35 39.09 9.53 38.0 37.76 10.38 19.0 61.0 42.0 0.49
## Escolaridade 21 35 13.17 5.21 12.0 12.86 4.45 4.0 28.0 24.0 0.58
## clustersRF* 22 35 1.00 0.00 1.0 1.00 0.00 1.0 1.0 0.0 NaN
## kurtosis se
## TempoDoenca 1.62 1.16
## EDSS -0.63 0.37
## Passos25 1.68 0.45
## pinosdir9 -0.95 1.09
## pinosesq9 2.78 1.72
## VitD -1.15 4.85
## FSS -0.95 2.82
## HADS_A -0.65 0.69
## HADS_D -0.27 0.71
## SF36 3.53 1.93
## SD_escr -0.39 2.04
## SD_oral -0.64 2.10
## BVMT 0.61 1.10
## CVLT -0.55 1.99
## MMSE_MST -0.34 1.54
## VolCinz -0.39 8.49
## LesoesFlair 0.33 1.53
## grupos* -2.05 0.09
## grupos2* -1.64 0.08
## Idade -0.33 1.61
## Escolaridade 0.39 0.88
## clustersRF* NaN 0.00
## ------------------------------------------------------------
## group: Cluster2
## vars n mean sd median trimmed mad min max range
## TempoDoenca 1 19 3.57 3.28 2.0 2.95 2.08 0.25 13.0 12.75
## EDSS 2 19 0.32 0.67 0.0 0.08 0.00 0.00 2.0 2.00
## Passos25 3 19 7.16 2.46 7.0 6.69 1.48 5.00 15.0 10.00
## pinosdir9 4 19 23.63 2.54 24.0 23.92 2.97 18.00 27.0 9.00
## pinosesq9 5 19 25.26 3.25 25.0 25.23 2.97 19.00 32.0 13.00
## VitD 6 19 77.56 27.02 99.0 82.13 0.00 30.00 100.0 70.00
## FSS 7 19 28.89 14.84 25.0 26.85 14.83 9.00 63.0 54.00
## HADS_A 8 19 5.89 3.86 6.0 5.77 4.45 0.00 12.0 12.00
## HADS_D 9 19 2.05 1.75 1.0 1.85 1.48 0.00 6.0 6.00
## SF36 10 19 92.58 12.62 96.0 95.92 4.45 56.00 104.0 48.00
## SD_escr 11 19 57.26 13.44 59.0 59.00 8.90 19.00 77.0 58.00
## SD_oral 12 19 58.11 13.55 59.0 59.92 8.90 19.00 77.0 58.00
## BVMT 13 19 13.26 6.38 14.0 13.08 4.45 3.00 33.0 30.00
## CVLT 14 19 57.05 10.09 59.0 58.15 10.38 33.00 70.0 37.00
## MMSE_MST 15 19 59.84 7.76 59.0 59.77 5.93 47.00 81.0 34.00
## VolCinz 16 19 928.42 31.35 927.0 926.92 35.58 872.00 995.0 123.00
## LesoesFlair 17 19 4.02 2.90 3.5 3.33 2.82 1.20 11.7 10.50
## grupos* 18 19 1.11 0.32 1.0 1.00 0.00 1.00 2.0 1.00
## grupos2* 19 19 1.00 0.00 1.0 1.00 0.00 1.00 1.0 0.00
## Idade 20 19 31.11 5.91 32.0 31.31 5.93 18.00 42.0 24.00
## Escolaridade 21 19 14.95 2.72 16.0 15.00 2.97 11.00 20.0 9.00
## clustersRF* 22 19 2.00 0.00 2.0 2.00 0.00 2.00 2.0 0.00
## skew kurtosis se
## TempoDoenca 1.24 1.07 0.75
## EDSS 1.69 1.33 0.15
## Passos25 1.60 2.79 0.56
## pinosdir9 -0.55 -0.77 0.58
## pinosesq9 0.08 -0.56 0.74
## VitD -0.51 -1.60 6.20
## FSS 0.80 -0.27 3.41
## HADS_A 0.19 -1.50 0.88
## HADS_D 0.69 -0.68 0.40
## SF36 -2.00 2.86 2.89
## SD_escr -1.21 1.43 3.08
## SD_oral -1.30 1.63 3.11
## BVMT 1.15 2.42 1.46
## CVLT -0.63 -0.52 2.31
## MMSE_MST 0.58 0.83 1.78
## VolCinz 0.27 -0.60 7.19
## LesoesFlair 1.23 0.58 0.66
## grupos* 2.37 3.84 0.07
## grupos2* NaN NaN 0.00
## Idade -0.25 -0.49 1.35
## Escolaridade -0.12 -1.18 0.62
## clustersRF* NaN NaN 0.00
# YUEN robust t-test
clusterRF.p <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$clustersRF)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
clusterRF.d <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$clustersRF)$effsize), 3)
# Print results
cbind(clusterRF.p, clusterRF.d)
## clusterRF.p clusterRF.d
## TempoDoenca 0.003 0.596
## EDSS 0.003 0.778
## Passos25 0.858 0.097
## pinosdir9 0.000 0.869
## pinosesq9 0.000 0.778
## VitD 0.011 0.492
## FSS 0.009 0.519
## HADS_A 0.740 0.106
## HADS_D 0.002 0.698
## SF36 0.084 0.343
## SD_escr 0.000 0.798
## SD_oral 0.000 0.770
## BVMT 0.010 0.434
## CVLT 0.025 0.480
## MMSE_MST 0.000 0.709
## VolCinz 0.000 0.819
## LesoesFlair 0.000 0.872
## Idade 0.004 0.639
## Escolaridade 0.060 0.408
# Descricao dos grupos EM e EMRR (EDSS ≤ 1.5)
psych::describeBy(dados2, dados2$grupos, tr = .2)
##
## Descriptive statistics by group
## group: EM
## vars n mean sd median trimmed mad min max range
## TempoDoenca 1 34 5.35 4.41 5.0 4.61 5.41 0.25 15.0 14.75
## EDSS 2 34 0.26 0.45 0.0 0.14 0.00 0.00 1.0 1.00
## Passos25 3 34 6.76 2.24 6.0 6.32 1.48 4.00 15.0 11.00
## pinosdir9 4 34 25.91 3.73 26.0 25.55 2.22 20.00 39.0 19.00
## pinosesq9 5 34 27.12 4.40 27.0 27.00 4.45 19.00 40.0 21.00
## VitD 6 34 64.43 29.36 63.0 66.08 43.07 7.00 100.0 93.00
## FSS 7 34 31.00 15.89 30.0 29.59 15.57 9.00 63.0 54.00
## HADS_A 8 34 5.62 3.86 5.0 5.18 4.45 0.00 14.0 14.00
## HADS_D 9 34 3.41 3.46 3.0 2.55 2.22 0.00 15.0 15.00
## SF36 10 34 93.41 10.85 96.0 95.55 6.67 53.00 105.0 52.00
## SD_escr 11 34 49.56 15.71 51.0 50.55 17.79 19.00 77.0 58.00
## SD_oral 12 34 50.62 15.45 53.0 51.95 17.79 19.00 77.0 58.00
## BVMT 13 34 11.59 6.16 11.5 11.68 6.67 0.00 33.0 33.00
## CVLT 14 34 52.35 11.65 53.0 53.41 11.86 23.00 70.0 47.00
## MMSE_MST 15 34 56.03 9.97 57.0 56.55 9.64 32.00 81.0 49.00
## VolCinz 16 34 896.30 47.68 899.0 899.20 41.51 787.00 995.0 208.00
## LesoesFlair 17 34 7.80 7.24 5.6 5.93 4.74 1.20 30.9 29.70
## grupos* 18 34 1.00 0.00 1.0 1.00 0.00 1.00 1.0 0.00
## grupos2* 19 34 1.00 0.00 1.0 1.00 0.00 1.00 1.0 0.00
## Idade 20 34 34.12 7.45 33.0 33.64 6.67 18.00 55.0 37.00
## Escolaridade 21 34 14.38 4.52 15.0 14.27 4.45 4.00 28.0 24.00
## clustersRF* 22 34 1.50 0.51 1.5 1.50 0.74 1.00 2.0 1.00
## skew kurtosis se
## TempoDoenca 0.61 -0.90 0.76
## EDSS 1.02 -0.99 0.08
## Passos25 1.49 2.92 0.38
## pinosdir9 1.22 2.63 0.64
## pinosesq9 0.51 0.38 0.75
## VitD -0.07 -1.32 5.03
## FSS 0.39 -0.86 2.73
## HADS_A 0.54 -0.74 0.66
## HADS_D 1.93 3.62 0.59
## SF36 -2.20 5.22 1.86
## SD_escr -0.27 -1.11 2.69
## SD_oral -0.35 -0.93 2.65
## BVMT 0.85 2.23 1.06
## CVLT -0.51 -0.44 2.00
## MMSE_MST -0.17 0.13 1.71
## VolCinz -0.33 -0.13 8.18
## LesoesFlair 1.64 2.22 1.24
## grupos* NaN NaN 0.00
## grupos2* NaN NaN 0.00
## Idade 0.46 0.37 1.28
## Escolaridade 0.43 1.14 0.78
## clustersRF* 0.00 -2.06 0.09
## ------------------------------------------------------------
## group: EMRR
## vars n mean sd median trimmed mad min max range skew
## TempoDoenca 1 20 9.48 8.18 6.00 8.00 5.93 0.6 32.0 31.4 1.06
## EDSS 2 20 3.75 1.79 3.00 3.42 1.48 2.0 7.5 5.5 0.59
## Passos25 3 20 8.10 2.88 8.00 7.50 1.48 5.0 15.0 10.0 1.25
## pinosdir9 4 20 33.75 7.55 34.00 34.17 8.90 18.0 45.0 27.0 -0.30
## pinosesq9 5 20 37.10 11.99 35.00 34.58 11.12 24.0 70.0 46.0 1.08
## VitD 6 20 59.85 31.56 42.00 57.77 22.46 22.0 99.0 77.0 0.33
## FSS 7 20 43.80 15.24 45.00 45.50 20.02 9.0 63.0 54.0 -0.52
## HADS_A 8 20 7.80 3.87 7.50 7.50 5.19 2.0 16.0 14.0 0.38
## HADS_D 9 20 6.35 4.16 5.00 5.92 4.45 1.0 15.0 14.0 0.63
## SF36 10 20 88.65 12.84 90.50 90.67 7.41 56.0 105.0 49.0 -1.38
## SD_escr 11 20 35.25 11.35 32.50 34.50 6.67 11.0 53.0 42.0 0.04
## SD_oral 12 20 38.95 12.66 39.50 39.08 15.57 12.0 58.0 46.0 -0.17
## BVMT 13 20 9.75 7.45 8.50 8.42 5.93 1.0 29.0 28.0 1.01
## CVLT 14 20 51.20 12.14 51.00 52.50 16.31 22.0 67.0 45.0 -0.56
## MMSE_MST 15 20 48.85 7.93 51.00 49.42 5.93 31.0 65.0 34.0 -0.28
## VolCinz 16 20 881.30 59.47 885.00 881.75 67.46 766.0 980.0 214.0 -0.15
## LesoesFlair 17 20 12.93 9.77 9.05 10.34 6.60 3.5 38.9 35.4 1.12
## grupos* 18 20 1.00 0.00 1.00 1.00 0.00 1.0 1.0 0.0 NaN
## grupos2* 19 20 1.60 0.50 2.00 1.67 0.00 1.0 2.0 1.0 -0.38
## Idade 20 20 39.95 10.87 37.00 38.67 8.90 19.0 61.0 42.0 0.33
## Escolaridade 21 20 12.80 4.53 11.50 12.58 3.71 4.0 22.0 18.0 0.12
## clustersRF* 22 20 1.10 0.31 1.00 1.00 0.00 1.0 2.0 1.0 2.47
## kurtosis se
## TempoDoenca 0.43 1.83
## EDSS -1.14 0.40
## Passos25 0.70 0.64
## pinosdir9 -0.97 1.69
## pinosesq9 0.55 2.68
## VitD -1.83 7.06
## FSS -0.76 3.41
## HADS_A -0.98 0.87
## HADS_D -0.60 0.93
## SF36 1.48 2.87
## SD_escr -0.84 2.54
## SD_oral -0.90 2.83
## BVMT 0.31 1.67
## CVLT -0.57 2.71
## MMSE_MST -0.28 1.77
## VolCinz -0.92 13.30
## LesoesFlair 0.23 2.18
## grupos* NaN 0.00
## grupos2* -1.95 0.11
## Idade -0.77 2.43
## Escolaridade -0.55 1.01
## clustersRF* 4.32 0.07
# YUEN robust t-test
grupos.p <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$grupos)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
grupos.d <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$grupos)$effsize), 3)
# Print results
cbind(grupos.p, grupos.d)
## grupos.p grupos.d
## TempoDoenca 0.182 0.395
## EDSS 0.000 0.922
## Passos25 0.053 0.353
## pinosdir9 0.001 0.759
## pinosesq9 0.013 0.768
## VitD 0.538 0.126
## FSS 0.006 0.564
## HADS_A 0.095 0.350
## HADS_D 0.011 0.616
## SF36 0.043 0.493
## SD_escr 0.002 0.674
## SD_oral 0.013 0.513
## BVMT 0.057 0.368
## CVLT 0.829 0.070
## MMSE_MST 0.008 0.526
## VolCinz 0.321 0.232
## LesoesFlair 0.063 0.413
## Idade 0.132 0.439
## Escolaridade 0.174 0.304
# Descricao dos grupos MS e RRMS (EDSS ≤ 2.5)
psych::describeBy(dados2, dados2$grupos2, tr = .2)
##
## Descriptive statistics by group
## group: MS
## vars n mean sd median trimmed mad min max range
## TempoDoenca 1 42 5.73 4.87 5.0 4.78 5.41 0.25 17.0 16.75
## EDSS 2 42 0.62 0.85 0.0 0.35 0.00 0.00 2.5 2.50
## Passos25 3 42 6.88 2.43 6.0 6.35 1.48 4.00 15.0 11.00
## pinosdir9 4 42 26.60 4.52 26.0 25.96 2.97 18.00 39.0 21.00
## pinosesq9 5 42 27.81 5.26 27.0 27.12 4.45 19.00 43.0 24.00
## VitD 6 42 65.54 29.35 63.0 67.03 43.74 7.00 100.0 93.00
## FSS 7 42 32.48 16.15 33.0 31.73 17.79 9.00 63.0 54.00
## HADS_A 8 42 6.10 3.94 5.0 5.69 4.45 0.00 14.0 14.00
## HADS_D 9 42 3.83 3.77 3.0 2.85 2.97 0.00 15.0 15.00
## SF36 10 42 92.95 11.63 96.0 95.38 6.67 53.00 105.0 52.00
## SD_escr 11 42 47.64 15.19 50.0 47.73 18.53 19.00 77.0 58.00
## SD_oral 12 42 49.31 14.73 52.5 50.04 17.79 19.00 77.0 58.00
## BVMT 13 42 11.62 6.48 11.0 11.42 5.93 0.00 33.0 33.00
## CVLT 14 42 51.98 11.89 52.5 53.12 11.86 22.00 70.0 48.00
## MMSE_MST 15 42 54.64 10.48 55.5 55.38 11.12 31.00 81.0 50.00
## VolCinz 16 42 897.03 51.82 902.0 902.13 44.48 766.00 995.0 229.00
## LesoesFlair 17 42 8.62 7.51 5.9 6.75 4.82 1.20 30.9 29.70
## grupos* 18 42 1.19 0.40 1.0 1.00 0.00 1.00 2.0 1.00
## grupos2* 19 42 1.00 0.00 1.0 1.00 0.00 1.00 1.0 0.00
## Idade 20 42 34.50 8.50 33.0 33.62 5.93 18.00 58.0 40.00
## Escolaridade 21 42 14.52 4.58 15.0 14.46 4.45 4.00 28.0 24.00
## clustersRF* 22 42 1.45 0.50 1.0 1.42 0.00 1.00 2.0 1.00
## skew kurtosis se
## TempoDoenca 0.78 -0.53 0.75
## EDSS 0.93 -0.65 0.13
## Passos25 1.72 3.28 0.38
## pinosdir9 0.82 0.63 0.70
## pinosesq9 1.00 0.94 0.81
## VitD -0.06 -1.41 4.53
## FSS 0.20 -1.08 2.49
## HADS_A 0.45 -0.87 0.61
## HADS_D 1.65 2.22 0.58
## SF36 -2.10 4.28 1.79
## SD_escr -0.05 -1.15 2.34
## SD_oral -0.19 -0.93 2.27
## BVMT 0.96 1.77 1.00
## CVLT -0.63 -0.08 1.83
## MMSE_MST -0.23 -0.05 1.62
## VolCinz -0.56 -0.12 8.00
## LesoesFlair 1.32 0.96 1.16
## grupos* 1.52 0.32 0.06
## grupos2* NaN NaN 0.00
## Idade 0.69 0.56 1.31
## Escolaridade 0.25 0.65 0.71
## clustersRF* 0.18 -2.01 0.08
## ------------------------------------------------------------
## group: RRMS
## vars n mean sd median trimmed mad min max range skew
## TempoDoenca 1 12 10.88 9.10 8.00 9.25 6.67 0.6 32.0 31.4 0.94
## EDSS 2 12 4.83 1.51 4.75 4.75 1.85 3.0 7.5 4.5 0.15
## Passos25 3 12 8.58 2.64 8.00 8.12 1.48 5.0 15.0 10.0 1.06
## pinosdir9 4 12 36.58 7.05 38.00 37.62 7.41 23.0 45.0 22.0 -0.56
## pinosesq9 5 12 41.33 12.74 36.00 39.50 6.67 26.0 70.0 44.0 0.85
## VitD 6 12 52.92 31.37 37.65 48.88 16.01 22.0 99.0 77.0 0.60
## FSS 7 12 47.17 13.82 45.00 48.38 20.76 23.0 63.0 40.0 -0.27
## HADS_A 8 12 7.58 4.06 7.50 7.38 4.45 2.0 16.0 14.0 0.43
## HADS_D 9 12 6.83 3.86 6.00 6.50 3.71 2.0 15.0 13.0 0.58
## SF36 10 12 87.08 11.43 89.00 88.62 6.67 56.0 104.0 48.0 -1.38
## SD_escr 11 12 32.42 11.87 30.00 31.75 7.41 11.0 53.0 42.0 0.20
## SD_oral 12 12 35.75 13.59 35.00 35.38 13.34 12.0 58.0 46.0 0.04
## BVMT 13 12 8.42 6.92 6.50 7.25 6.67 1.0 24.0 23.0 0.84
## CVLT 14 12 51.75 11.66 55.50 52.38 12.60 35.0 64.0 29.0 -0.19
## MMSE_MST 15 12 48.92 5.32 51.00 49.38 5.93 40.0 55.0 15.0 -0.35
## VolCinz 16 12 868.75 50.01 862.50 866.50 39.29 779.0 980.0 201.0 0.42
## LesoesFlair 17 12 13.47 11.05 9.05 10.74 4.89 4.3 38.9 34.6 1.13
## grupos* 18 12 1.00 0.00 1.00 1.00 0.00 1.0 1.0 0.0 NaN
## grupos2* 19 12 1.00 0.00 1.00 1.00 0.00 1.0 1.0 0.0 NaN
## Idade 20 12 42.50 9.24 40.50 41.00 8.90 32.0 61.0 29.0 0.66
## Escolaridade 21 12 11.25 3.49 11.00 11.38 0.74 4.0 18.0 14.0 -0.18
## clustersRF* 22 12 1.00 0.00 1.00 1.00 0.00 1.0 1.0 0.0 NaN
## kurtosis se
## TempoDoenca -0.19 2.63
## EDSS -1.42 0.44
## Passos25 0.49 0.76
## pinosdir9 -1.05 2.04
## pinosesq9 -0.42 3.68
## VitD -1.59 9.06
## FSS -1.49 3.99
## HADS_A -0.84 1.17
## HADS_D -0.85 1.11
## SF36 1.99 3.30
## SD_escr -0.86 3.43
## SD_oral -1.17 3.92
## BVMT -0.35 2.00
## CVLT -1.91 3.37
## MMSE_MST -1.56 1.53
## VolCinz 0.02 14.44
## LesoesFlair -0.21 3.19
## grupos* NaN 0.00
## grupos2* NaN 0.00
## Idade -0.99 2.67
## Escolaridade 0.01 1.01
## clustersRF* NaN 0.00
# YUEN robust t-test
grupos.p2 <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen(i ~ dados2$grupos2)$p.value), 3)
# Effec size (with bootstrap), results may differ because of resampling
grupos.d2 <- round(sapply(dados2[c(1:17,20,21)], function(i) yuen.effect.ci(i ~ dados2$grupos2)$effsize), 3)
# Print results
cbind(grupos.p2, grupos.d2)
## grupos.p2 grupos.d2
## TempoDoenca 0.126 0.399
## EDSS 0.000 0.893
## Passos25 0.001 0.505
## pinosdir9 0.001 0.823
## pinosesq9 0.009 0.802
## VitD 0.251 0.265
## FSS 0.020 0.549
## HADS_A 0.277 0.227
## HADS_D 0.021 0.526
## SF36 0.009 0.533
## SD_escr 0.007 0.672
## SD_oral 0.019 0.564
## BVMT 0.080 0.379
## CVLT 0.891 0.083
## MMSE_MST 0.036 0.505
## VolCinz 0.026 0.467
## LesoesFlair 0.319 0.396
## Idade 0.058 0.623
## Escolaridade 0.000 0.548
# Agrupa os resultados
cbind(grupos.p, grupos.d, grupos.p2, grupos.d2, clusterRF.p, clusterRF.d)
## grupos.p grupos.d grupos.p2 grupos.d2 clusterRF.p clusterRF.d
## TempoDoenca 0.182 0.395 0.126 0.399 0.003 0.596
## EDSS 0.000 0.922 0.000 0.893 0.003 0.778
## Passos25 0.053 0.353 0.001 0.505 0.858 0.097
## pinosdir9 0.001 0.759 0.001 0.823 0.000 0.869
## pinosesq9 0.013 0.768 0.009 0.802 0.000 0.778
## VitD 0.538 0.126 0.251 0.265 0.011 0.492
## FSS 0.006 0.564 0.020 0.549 0.009 0.519
## HADS_A 0.095 0.350 0.277 0.227 0.740 0.106
## HADS_D 0.011 0.616 0.021 0.526 0.002 0.698
## SF36 0.043 0.493 0.009 0.533 0.084 0.343
## SD_escr 0.002 0.674 0.007 0.672 0.000 0.798
## SD_oral 0.013 0.513 0.019 0.564 0.000 0.770
## BVMT 0.057 0.368 0.080 0.379 0.010 0.434
## CVLT 0.829 0.070 0.891 0.083 0.025 0.480
## MMSE_MST 0.008 0.526 0.036 0.505 0.000 0.709
## VolCinz 0.321 0.232 0.026 0.467 0.000 0.819
## LesoesFlair 0.063 0.413 0.319 0.396 0.000 0.872
## Idade 0.132 0.439 0.058 0.623 0.004 0.639
## Escolaridade 0.174 0.304 0.000 0.548 0.060 0.408
Gráficos tradicionais
# https://cran.r-project.org/web/packages/compareGroups/vignettes/compareGroups_vignette.html
library(compareGroups)
descrTable(dados2)
##
## --------Summary descriptives table ---------
##
## ___________________________
## [ALL] N
## N=54
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## TempoDoenca 6.88 (6.34) 54
## EDSS 1.56 (2.04) 54
## Passos25 7.26 (2.56) 54
## pinosdir9 28.8 (6.61) 54
## pinosesq9 30.8 (9.34) 54
## VitD 62.7 (30.0) 54
## FSS 35.7 (16.7) 54
## HADS_A 6.43 (3.97) 54
## HADS_D 4.50 (3.96) 54
## SF36 91.6 (11.7) 54
## SD_escr 44.3 (15.8) 54
## SD_oral 46.3 (15.4) 54
## BVMT 10.9 (6.65) 54
## CVLT 51.9 (11.7) 54
## MMSE_MST 53.4 (9.83) 54
## VolCinz 891 (52.3) 54
## LesoesFlair 9.70 (8.55) 54
## grupos: 54
## EM 34 (63.0%)
## EMRR 20 (37.0%)
## grupos2: 54
## MS 42 (77.8%)
## RRMS 12 (22.2%)
## Idade 36.3 (9.22) 54
## Escolaridade 13.8 (4.54) 54
## clustersRF: 54
## Cluster1 35 (64.8%)
## Cluster2 19 (35.2%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
res <- compareGroups(clustersRF ~., data = dados2, method = 4)
res
##
##
## -------- Summary of results by groups of 'clustersRF'---------
##
##
## var N p.value method selection
## 1 TempoDoenca 54 0.002** continuous non-normal ALL
## 2 EDSS 54 <0.001** continuous non-normal ALL
## 3 Passos25 54 0.911 continuous non-normal ALL
## 4 pinosdir9 54 <0.001** continuous non-normal ALL
## 5 pinosesq9 54 <0.001** continuous non-normal ALL
## 6 VitD 54 0.007** continuous non-normal ALL
## 7 FSS 54 0.023** continuous non-normal ALL
## 8 HADS_A 54 0.501 continuous non-normal ALL
## 9 HADS_D 54 <0.001** continuous non-normal ALL
## 10 SF36 54 0.198 continuous non-normal ALL
## 11 SD_escr 54 <0.001** continuous non-normal ALL
## 12 SD_oral 54 <0.001** continuous non-normal ALL
## 13 BVMT 54 0.036** continuous non-normal ALL
## 14 CVLT 54 0.020** continuous non-normal ALL
## 15 MMSE_MST 54 <0.001** continuous non-normal ALL
## 16 VolCinz 54 <0.001** continuous non-normal ALL
## 17 LesoesFlair 54 <0.001** continuous non-normal ALL
## 18 grupos 54 0.007** categorical ALL
## 19 grupos2 54 0.004** categorical ALL
## 20 Idade 54 0.002** continuous non-normal ALL
## 21 Escolaridade 54 0.076* continuous non-normal ALL
## -----
## Signif. codes: 0 '**' 0.05 '*' 0.1 ' ' 1
summary(res)
##
## --- Descriptives of each row-variable by groups of 'clustersRF' ---
##
## -------------------
## row-variable: TempoDoenca
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 5.5 2 10 3 7
## Cluster1 35 7 3 11.5 5 10 0.002226
## Cluster2 19 2 1.15 5 1 5
##
## OR OR.lower OR.upper
## [1,] 0.801355 0.680546 0.943611
##
## -------------------
## row-variable: EDSS
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 1 0 2.375 0 2
## Cluster1 35 2 0 3.25 1 3 0.000228
## Cluster2 19 0 0 0 0 0
##
## OR OR.lower OR.upper
## [1,] 0.344809 0.159441 0.745687
##
## -------------------
## row-variable: Passos25
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 7 5 8 6 8
## Cluster1 35 7 5 8 5 8 0.911383
## Cluster2 19 7 5 8 5 8
##
## OR OR.lower OR.upper
## [1,] 0.975539 0.779626 1.220683
##
## -------------------
## row-variable: pinosdir9
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 27 25 32.75 26 28
## Cluster1 35 31 27 37 27 33 1e-06
## Cluster2 19 24 22.5 25.5 22 26
##
## OR OR.lower OR.upper
## [1,] 0.54475 0.371205 0.79943
##
## -------------------
## row-variable: pinosesq9
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 28 25.25 34 26 31
## Cluster1 35 31 27 36 28 35 6.8e-05
## Cluster2 19 25 24 27 24 27
##
## OR OR.lower OR.upper
## [1,] 0.743493 0.61489 0.898992
##
## -------------------
## row-variable: VitD
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 55.5 38 99 42.7 80
## Cluster1 35 45 36 75 38 63 0.006647
## Cluster2 19 99 53.5 99 53 99
##
## OR OR.lower OR.upper
## [1,] 1.028301 1.00686 1.050199
##
## -------------------
## row-variable: FSS
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 35.5 23.25 47 28 43
## Cluster1 35 41 31 53 35 47 0.023447
## Cluster2 19 25 18.5 37 16 38
##
## OR OR.lower OR.upper
## [1,] 0.959271 0.923711 0.996199
##
## -------------------
## row-variable: HADS_A
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 5.5 3.25 9 5 8
## Cluster1 35 5 4 9 5 8 0.501106
## Cluster2 19 6 3 9.5 3 10
##
## OR OR.lower OR.upper
## [1,] 0.947359 0.818892 1.095979
##
## -------------------
## row-variable: HADS_D
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 3 2 6.75 2 4
## Cluster1 35 4 3 8 3 7 0.000163
## Cluster2 19 1 1 3 1 3
##
## OR OR.lower OR.upper
## [1,] 0.583914 0.405043 0.841775
##
## -------------------
## row-variable: SF36
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 94 89 99 91 96
## Cluster1 35 92 88 98.5 89 96 0.197896
## Cluster2 19 96 93 99 93 99
##
## OR OR.lower OR.upper
## [1,] 1.011162 0.961446 1.063449
##
## -------------------
## row-variable: SD_escr
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 47.5 30 56.5 33 51
## Cluster1 35 33 29 48 30 45 5e-06
## Cluster2 19 59 52.5 65 52 65
##
## OR OR.lower OR.upper
## [1,] 1.127098 1.057069 1.201765
##
## -------------------
## row-variable: SD_oral
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 49 35 57 40 54
## Cluster1 35 40 29 49.5 35 48 1.3e-05
## Cluster2 19 59 54.5 65 54 65
##
## OR OR.lower OR.upper
## [1,] 1.121607 1.05119 1.196741
##
## -------------------
## row-variable: BVMT
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 10 6.25 15.75 8 13
## Cluster1 35 9 5 13 6 11 0.036071
## Cluster2 19 14 9.5 16 9 16
##
## OR OR.lower OR.upper
## [1,] 1.090579 0.993927 1.19663
##
## -------------------
## row-variable: CVLT
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 52.5 44.25 61.75 49 58
## Cluster1 35 50 39.5 58.5 45 56 0.019803
## Cluster2 19 59 50.5 65 49 66
##
## OR OR.lower OR.upper
## [1,] 1.072029 1.009655 1.138255
##
## -------------------
## row-variable: MMSE_MST
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 53 47 59.75 51 57
## Cluster1 35 51 44 54.5 46 53 0.000193
## Cluster2 19 59 55.5 63.5 55 64
##
## OR OR.lower OR.upper
## [1,] 1.157575 1.056649 1.268141
##
## -------------------
## row-variable: VolCinz
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 893 860.2 927 879 914
## Cluster1 35 873 839.5 903.5 858 889 4.6e-05
## Cluster2 19 927 907 950 900 951
##
## OR OR.lower OR.upper
## [1,] 1.034405 1.014016 1.055203
##
## -------------------
## row-variable: LesoesFlair
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 6.7 3.8 12.15 5.4 9.1
## Cluster1 35 9.1 6.2 17.75 6.8 14.7 4e-06
## Cluster2 19 3.5 1.75 4.85 1.6 5.4
##
## OR OR.lower OR.upper
## [1,] 0.645438 0.488437 0.852905
##
## -------------------
## row-variable: grupos
##
## EM EMRR EM% EMRR% p.overall
## [ALL] 34 20 62.96296 37.03704
## Cluster1 17 18 48.57143 51.42857 0.007422
## Cluster2 17 2 89.47368 10.52632
##
## OR OR.lower OR.upper
## EM 1
## EMRR 0.121746 0.015932 0.519754
##
## -------------------
## row-variable: grupos2
##
## MS RRMS MS% RRMS% p.overall
## [ALL] 42 12 77.77778 22.22222
## Cluster1 23 12 65.71429 34.28571 0.004364
## Cluster2 19 0 100 0
##
## OR OR.lower OR.upper
## MS 1
## RRMS . . .
##
## -------------------
## row-variable: Idade
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 34.5 31 41.5 32 38
## Cluster1 35 38 32 45 34 42 0.002452
## Cluster2 19 32 26.5 34.5 26 35
##
## OR OR.lower OR.upper
## [1,] 0.869639 0.786454 0.961622
##
## -------------------
## row-variable: Escolaridade
##
## N med Q1 Q3 lower upper p.overall
## [ALL] 54 13.5 11 17 11 16
## Cluster1 35 12 11 16.5 11 15 0.075759
## Cluster2 19 16 13 17 13 17
##
## OR OR.lower OR.upper
## [1,] 1.094281 0.960237 1.247037
createTable(res, show.ratio = TRUE)
##
## --------Summary descriptives table by 'clustersRF'---------
##
## _________________________________________________________________________________
## Cluster1 Cluster2 OR p.ratio p.overall
## N=35 N=19
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## TempoDoenca 7.00 [3.00;11.5] 2.00 [1.15;5.00] 0.80 [0.68;0.94] 0.008 0.002
## EDSS 2.00 [0.00;3.25] 0.00 [0.00;0.00] 0.34 [0.16;0.75] 0.007 <0.001
## Passos25 7.00 [5.00;8.00] 7.00 [5.00;8.00] 0.98 [0.78;1.22] 0.829 0.911
## pinosdir9 31.0 [27.0;37.0] 24.0 [22.5;25.5] 0.54 [0.37;0.80] 0.002 <0.001
## pinosesq9 31.0 [27.0;36.0] 25.0 [24.0;27.0] 0.74 [0.61;0.90] 0.002 <0.001
## VitD 45.0 [36.0;75.0] 99.0 [53.5;99.0] 1.03 [1.01;1.05] 0.009 0.007
## FSS 41.0 [31.0;53.0] 25.0 [18.5;37.0] 0.96 [0.92;1.00] 0.031 0.023
## HADS_A 5.00 [4.00;9.00] 6.00 [3.00;9.50] 0.95 [0.82;1.10] 0.467 0.501
## HADS_D 4.00 [3.00;8.00] 1.00 [1.00;3.00] 0.58 [0.41;0.84] 0.004 <0.001
## SF36 92.0 [88.0;98.5] 96.0 [93.0;99.0] 1.01 [0.96;1.06] 0.666 0.198
## SD_escr 33.0 [29.0;48.0] 59.0 [52.5;65.0] 1.13 [1.06;1.20] <0.001 <0.001
## SD_oral 40.0 [29.0;49.5] 59.0 [54.5;65.0] 1.12 [1.05;1.20] 0.001 <0.001
## BVMT 9.00 [5.00;13.0] 14.0 [9.50;16.0] 1.09 [0.99;1.20] 0.067 0.036
## CVLT 50.0 [39.5;58.5] 59.0 [50.5;65.0] 1.07 [1.01;1.14] 0.023 0.020
## MMSE_MST 51.0 [44.0;54.5] 59.0 [55.5;63.5] 1.16 [1.06;1.27] 0.002 <0.001
## VolCinz 873 [840;904] 927 [907;950] 1.03 [1.01;1.06] 0.001 <0.001
## LesoesFlair 9.10 [6.20;17.8] 3.50 [1.75;4.85] 0.65 [0.49;0.85] 0.002 <0.001
## grupos: 0.007
## EM 17 (48.6%) 17 (89.5%) Ref. Ref.
## EMRR 18 (51.4%) 2 (10.5%) 0.12 [0.02;0.52] 0.003
## grupos2: 0.004
## MS 23 (65.7%) 19 (100%) Ref. Ref.
## RRMS 12 (34.3%) 0 (0.00%) . [.;.] .
## Idade 38.0 [32.0;45.0] 32.0 [26.5;34.5] 0.87 [0.79;0.96] 0.006 0.002
## Escolaridade 12.0 [11.0;16.5] 16.0 [13.0;17.0] 1.09 [0.96;1.25] 0.177 0.076
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
plot(res)





















plot(res, bivar = TRUE)





















#restab <- createTable(res, show.ratio = TRUE)
#print(restab, which.table = "avail")
#print(restab, which.table = "descr")
# Análise gráfica dos dados da Ressonancia (completa)
library(DataExplorer)
#plot_missing(Ress)
plot_histogram(Ress)


#plot_density(Ress)
plot_correlation(Ress, type = 'continuous', cor_args = list("use" = "pairwise.complete.obs"))

plot_correlation(dados, type = 'discrete', cor_args = list("use" = "pairwise.complete.obs"))

#plot_bar(Ress)
plot_bar(Ress[-5])

Gráficos de Rede
# cria um banco RM diferente (USAR EM OUTRAS ANALISES)
Ress2 <- Ress %>% dplyr::select(Idade, Escolaridade, TempoDoenca:pinosesq9, VitD,
FSS:SD_oral, BVMT, CVLT, CVLT_rep:LesoesImpreg)
names(Ress2) <- c("Idade","Escola","TempoD","EDSS","25Passo","9PinoD","9PinoE","VitD","FSS","HADSA",
"HADSD","SF36","SDescr","SDoral","BVMT","CVLT","CVLTrep","MMSEmst","VolCereb","VolCinz",
"LesFlair","LesImpr")
library(qgraph)
# Correlação entre as variáveis do banco RM
clinic <- cor_auto(Ress2)
library(corrplot)
corrplot(clinic, type="lower", order="hclust")

# Correlação Pearson
qgraph(cor(Ress2), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação parcial
qgraph(cor_auto(Ress2), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação penalidade glasso
library(glasso)
qgraph(glasso(cor_auto(Ress2), 0.1), layout = "spring", sampleSize = nrow(Ress2), labels = colnames(Ress2))

# Correlação entre as variáveis do banco RM
library(qgraph)
library(corrplot)
clinic <- cor_auto(Ress[-c(2,4:8,14,16,23:25,27:31)])
corrplot(clinic, type="lower", order="hclust")

Pontos de corte
# Pontos de corte
library(cutpointr)
# SDMT Escrito (RFCluster)
cp1 <- cutpointr(dados2, SD_escr, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp1)
## Method: maximize_metric
## Predictor: SD_escr
## Outcome: clustersRF
## Direction: <=
## Nr. of bootstraps: 1000
##
## AUC n n_pos n_neg
## 0.8805 54 35 19
##
## optimal_cutpoint sum_sens_spec acc sensitivity specificity tp fn fp tn
## 51 1.7038 0.8704 0.9143 0.7895 32 3 4 15
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 11 23.0 30.0 47.5 44.25926 56.5 68.35 77 15.76500 0
## Cluster1 11 23.0 29.0 33.0 37.20000 48.0 54.20 68 12.08256 0
## Cluster2 19 31.6 52.5 59.0 57.26316 65.0 73.40 77 13.44058 0
##
## Bootstrap summary:
## Variable Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## optimal_cutpoint 40.00 48.00 50.00 50.25 50.40 51.00 53.00 57 1.73 0
## AUC_b 0.63 0.78 0.84 0.89 0.88 0.93 0.97 1 0.06 0
## AUC_oob 0.34 0.73 0.82 0.89 0.88 0.95 1.00 1 0.09 0
## sum_sens_spec_b 1.45 1.59 1.68 1.75 1.74 1.81 1.89 2 0.09 0
## sum_sens_spec_oob 0.75 1.33 1.51 1.63 1.62 1.73 1.88 2 0.16 0
## acc_b 0.70 0.80 0.85 0.87 0.88 0.91 0.94 1 0.05 0
## acc_oob 0.53 0.70 0.77 0.82 0.82 0.88 0.94 1 0.07 0
## sensitivity_b 0.62 0.76 0.85 0.90 0.89 0.94 0.98 1 0.07 0
## sensitivity_oob 0.43 0.64 0.79 0.87 0.85 0.92 1.00 1 0.11 0
## specificity_b 0.53 0.69 0.79 0.86 0.85 0.92 1.00 1 0.09 0
## specificity_oob 0.00 0.50 0.67 0.78 0.77 0.88 1.00 1 0.16 0
## cohens_kappa_b 0.43 0.57 0.67 0.73 0.73 0.80 0.88 1 0.10 0
## cohens_kappa_oob -0.17 0.33 0.49 0.61 0.60 0.71 0.86 1 0.16 0
plot(cp1)

plot_metric(cp1)

# SDMT Escrito Método Robusto
set.seed(4)
cp1.1 <- cutpointr(dados2, SD_escr, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
method = maximize_boot_metric,
boot_cut = 200, summary_func = mean,
metric = accuracy, silent = TRUE)
summary(cp1.1)
## Method: maximize_boot_metric
## Predictor: SD_escr
## Outcome: clustersRF
## Direction: <=
##
## AUC n n_pos n_neg
## 0.8805 54 35 19
##
## optimal_cutpoint accuracy acc sensitivity specificity tp fn fp tn
## 51.8728 0.8704 0.8704 0.9143 0.7895 32 3 4 15
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 11 23.0 30.0 47.5 44.25926 56.5 68.35 77 15.76500 0
## Cluster1 11 23.0 29.0 33.0 37.20000 48.0 54.20 68 12.08256 0
## Cluster2 19 31.6 52.5 59.0 57.26316 65.0 73.40 77 13.44058 0
plot(cp1.1)

# SDMT Escrito Grupos EM e EMRR
cp3 <- cutpointr(dados2, SD_escr, grupos, pos_class = "EMRR", neg_class = "EM",
method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp3)
## Method: maximize_metric
## Predictor: SD_escr
## Outcome: grupos
## Direction: <=
## Nr. of bootstraps: 1000
##
## AUC n n_pos n_neg
## 0.761 54 20 34
##
## optimal_cutpoint sum_sens_spec acc sensitivity specificity tp fn fp tn
## 50 1.4588 0.6852 0.9 0.5588 18 2 15 19
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 11 23.00 30.00 47.5 44.25926 56.50 68.35 77 15.76500 0
## EM 19 25.65 35.25 51.0 49.55882 61.50 70.40 77 15.71178 0
## EMRR 11 22.40 29.00 32.5 35.25000 47.25 52.05 53 11.35028 0
##
## Bootstrap summary:
## Variable Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## optimal_cutpoint 23.00 33.00 38.00 50.00 45.93 52.00 53.00 53.00 7.21
## AUC_b 0.46 0.65 0.72 0.77 0.76 0.81 0.86 0.94 0.07
## AUC_oob 0.46 0.60 0.70 0.76 0.76 0.82 0.90 1.00 0.09
## sum_sens_spec_b 0.97 1.35 1.45 1.52 1.52 1.58 1.67 1.80 0.10
## sum_sens_spec_oob 0.50 1.03 1.22 1.33 1.32 1.44 1.58 1.72 0.17
## acc_b 0.41 0.59 0.69 0.72 0.72 0.78 0.81 0.91 0.07
## acc_oob 0.35 0.50 0.58 0.64 0.64 0.70 0.77 0.88 0.08
## sensitivity_b 0.43 0.67 0.82 0.93 0.89 1.00 1.00 1.00 0.11
## sensitivity_oob 0.00 0.36 0.60 0.80 0.75 1.00 1.00 1.00 0.22
## specificity_b 0.24 0.39 0.52 0.63 0.62 0.73 0.84 1.00 0.14
## specificity_oob 0.10 0.31 0.44 0.56 0.57 0.69 0.85 1.00 0.17
## cohens_kappa_b -0.02 0.29 0.39 0.46 0.46 0.54 0.63 0.81 0.11
## cohens_kappa_oob -0.36 0.03 0.19 0.29 0.29 0.39 0.55 0.73 0.15
## NAs
## 0
## 0
## 0
## 0
## 0
## 0
## 0
## 0
## 0
## 0
## 0
## 0
## 0
plot(cp3)

plot_metric(cp3)

# SDMT Escrito Grupos EM e EMRR Método Robusto
set.seed(4)
cp3.1 <- cutpointr(dados2, SD_escr, grupos, pos_class = "EMRR", neg_class = "EM",
method = maximize_boot_metric,
boot_cut = 200, summary_func = mean,
metric = accuracy, silent = TRUE)
summary(cp3.1)
## Method: maximize_boot_metric
## Predictor: SD_escr
## Outcome: grupos
## Direction: <=
##
## AUC n n_pos n_neg
## 0.761 54 20 34
##
## optimal_cutpoint accuracy acc sensitivity specificity tp fn fp tn
## 36.8909 0.7037 0.7037 0.65 0.7353 13 7 9 25
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 11 23.00 30.00 47.5 44.25926 56.50 68.35 77 15.76500 0
## EM 19 25.65 35.25 51.0 49.55882 61.50 70.40 77 15.71178 0
## EMRR 11 22.40 29.00 32.5 35.25000 47.25 52.05 53 11.35028 0
plot(cp3.1)

# SDMT Escrito Grupos MS e RRMS
cp4 <- cutpointr(dados2, SD_escr, grupos2, pos_class = "RRMS", neg_class = "MS",
method = maximize_metric, metric = sum_sens_spec, boot_runs = 1000)
summary(cp4)
## Method: maximize_metric
## Predictor: SD_escr
## Outcome: grupos2
## Direction: <=
## Nr. of bootstraps: 1000
##
## AUC n n_pos n_neg
## 0.7837 54 12 42
##
## optimal_cutpoint sum_sens_spec acc sensitivity specificity tp fn fp tn
## 40.5 1.369 0.6481 0.75 0.619 9 3 16 26
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 11 23.0 30.00 47.5 44.25926 56.50 68.35 77 15.76500 0
## MS 19 26.0 33.25 50.0 47.64286 58.75 68.95 77 15.18796 0
## RRMS 11 17.6 27.50 30.0 32.41667 36.50 50.25 53 11.87402 0
##
## Bootstrap summary:
## Variable Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## optimal_cutpoint 11.00 30.00 33.00 33.00 39.52 48.00 48.00 53.00 8.33
## AUC_b 0.50 0.66 0.74 0.79 0.78 0.83 0.89 0.96 0.07
## AUC_oob 0.34 0.59 0.72 0.79 0.78 0.86 0.94 1.00 0.10
## sum_sens_spec_b 1.16 1.36 1.48 1.56 1.55 1.63 1.74 1.93 0.12
## sum_sens_spec_oob 0.42 0.98 1.21 1.36 1.35 1.50 1.71 1.93 0.22
## acc_b 0.39 0.56 0.67 0.74 0.72 0.78 0.85 0.96 0.09
## acc_oob 0.22 0.48 0.59 0.67 0.66 0.73 0.82 0.94 0.11
## sensitivity_b 0.30 0.64 0.80 0.89 0.87 1.00 1.00 1.00 0.12
## sensitivity_oob 0.00 0.20 0.50 0.75 0.70 1.00 1.00 1.00 0.26
## specificity_b 0.20 0.47 0.60 0.69 0.68 0.78 0.87 1.00 0.13
## specificity_oob 0.12 0.36 0.54 0.67 0.65 0.77 0.89 1.00 0.16
## cohens_kappa_b 0.10 0.21 0.33 0.42 0.42 0.49 0.62 0.84 0.12
## cohens_kappa_oob -0.28 -0.02 0.15 0.25 0.25 0.36 0.53 0.82 0.17
## NAs
## 0
## 0
## 1
## 0
## 1
## 0
## 0
## 0
## 1
## 0
## 0
## 0
## 0
plot(cp4)

plot_metric(cp4)

# SDMT Escrito Grupos MS e RRMS Robusto
set.seed(4)
cp4.1 <- cutpointr(dados2, SD_escr, grupos2, pos_class = "RRMS", neg_class = "MS",
method = maximize_boot_metric,
boot_cut = 200, summary_func = mean,
metric = accuracy, silent = TRUE)
summary(cp4.1)
## Method: maximize_boot_metric
## Predictor: SD_escr
## Outcome: grupos2
## Direction: <=
##
## AUC n n_pos n_neg
## 0.7837 54 12 42
##
## optimal_cutpoint accuracy acc sensitivity specificity tp fn fp tn
## 23.2105 0.8148 0.8148 0.25 0.9762 3 9 1 41
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 11 23.0 30.00 47.5 44.25926 56.50 68.35 77 15.76500 0
## MS 19 26.0 33.25 50.0 47.64286 58.75 68.95 77 15.18796 0
## RRMS 11 17.6 27.50 30.0 32.41667 36.50 50.25 53 11.87402 0
plot(cp4.1)

# SDMT Oral (RFCluster)
cp5 <- cutpointr(dados2, SD_oral, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
method = maximize_metric, metric = sum_sens_spec)
summary(cp5)
## Method: maximize_metric
## Predictor: SD_oral
## Outcome: clustersRF
## Direction: <=
##
## AUC n n_pos n_neg
## 0.8617 54 35 19
##
## optimal_cutpoint sum_sens_spec acc sensitivity specificity tp fn fp tn
## 50 1.6662 0.8148 0.7714 0.8947 27 8 2 17
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 12 23.0 35.0 49 46.29630 57.0 69.4 77 15.44391 0
## Cluster1 12 23.0 29.0 40 39.88571 49.5 57.3 68 12.42334 0
## Cluster2 19 31.6 54.5 59 58.10526 65.0 73.4 77 13.55194 0
plot(cp5)

# SDMT Oral RFCluster Método Robusto
set.seed(4)
cp5.1 <- cutpointr(dados2, SD_oral, clustersRF, pos_class = "Cluster1", neg_class = "Cluster2",
method = maximize_boot_metric,
boot_cut = 200, summary_func = mean,
metric = accuracy, silent = TRUE)
summary(cp5.1)
## Method: maximize_boot_metric
## Predictor: SD_oral
## Outcome: clustersRF
## Direction: <=
##
## AUC n n_pos n_neg
## 0.8617 54 35 19
##
## optimal_cutpoint accuracy acc sensitivity specificity tp fn fp tn
## 54.0573 0.8519 0.8519 0.9143 0.7368 32 3 5 14
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 12 23.0 35.0 49 46.29630 57.0 69.4 77 15.44391 0
## Cluster1 12 23.0 29.0 40 39.88571 49.5 57.3 68 12.42334 0
## Cluster2 19 31.6 54.5 59 58.10526 65.0 73.4 77 13.55194 0
plot(cp5.1)

# SDMT Oral grupos EM e EMRR
cp6 <- cutpointr(dados2, SD_oral, grupos, pos_class = "EMRR", neg_class = "EM",
method = maximize_metric, metric = sum_sens_spec)
summary(cp6)
## Method: maximize_metric
## Predictor: SD_oral
## Outcome: grupos
## Direction: <=
##
## AUC n n_pos n_neg
## 0.714 54 20 34
##
## optimal_cutpoint sum_sens_spec acc sensitivity specificity tp fn fp tn
## 41 1.4059 0.7037 0.7 0.7059 14 6 10 24
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 12 23.00 35.00 49.0 46.29630 57.00 69.40 77 15.44391 0
## EM 19 25.65 40.25 53.0 50.61765 62.25 72.35 77 15.45279 0
## EMRR 12 22.45 29.00 39.5 38.95000 50.75 57.05 58 12.66356 0
plot(cp6)

# # SDMT Oral grupos EM e EMRR Método Robusto
set.seed(4)
cp6.1 <- cutpointr(dados2, SD_oral, grupos, pos_class = "EMRR", neg_class = "EM",
method = maximize_boot_metric,
boot_cut = 200, summary_func = mean,
metric = accuracy, silent = TRUE)
summary(cp6.1)
## Method: maximize_boot_metric
## Predictor: SD_oral
## Outcome: grupos
## Direction: <=
##
## AUC n n_pos n_neg
## 0.714 54 20 34
##
## optimal_cutpoint accuracy acc sensitivity specificity tp fn fp tn
## 36.1298 0.6667 0.6667 0.45 0.7941 9 11 7 27
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 12 23.00 35.00 49.0 46.29630 57.00 69.40 77 15.44391 0
## EM 19 25.65 40.25 53.0 50.61765 62.25 72.35 77 15.45279 0
## EMRR 12 22.45 29.00 39.5 38.95000 50.75 57.05 58 12.66356 0
plot(cp6.1)

# SDMT Oral grupos MS e RRMS
cp7 <- cutpointr(dados2, SD_oral, grupos2, pos_class = "RRMS", neg_class = "MS",
method = maximize_metric, metric = sum_sens_spec)
summary(cp7)
## Method: maximize_metric
## Predictor: SD_oral
## Outcome: grupos2
## Direction: <=
##
## AUC n n_pos n_neg
## 0.745 54 12 42
##
## optimal_cutpoint sum_sens_spec acc sensitivity specificity tp fn fp tn
## 38 1.3452 0.7222 0.5833 0.7619 7 5 10 32
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 12 23.00 35.00 49.0 46.29630 57.00 69.40 77 15.44391 0
## MS 19 26.00 39.25 52.5 49.30952 58.75 71.80 77 14.72759 0
## RRMS 12 18.05 27.50 35.0 35.75000 43.25 55.25 58 13.59228 0
plot(cp7)

# SDMT Oral grupos MS e RRMS Robusto
set.seed(4)
cp7.1 <- cutpointr(dados2, SD_oral, grupos2, pos_class = "RRMS", neg_class = "MS",
method = maximize_boot_metric,
boot_cut = 200, summary_func = mean,
metric = accuracy, silent = TRUE)
summary(cp7.1)
## Method: maximize_boot_metric
## Predictor: SD_oral
## Outcome: grupos2
## Direction: <=
##
## AUC n n_pos n_neg
## 0.745 54 12 42
##
## optimal_cutpoint accuracy acc sensitivity specificity tp fn fp tn
## 23.2 0.8148 0.8148 0.25 0.9762 3 9 1 41
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 12 23.00 35.00 49.0 46.29630 57.00 69.40 77 15.44391 0
## MS 19 26.00 39.25 52.5 49.30952 58.75 71.80 77 14.72759 0
## RRMS 12 18.05 27.50 35.0 35.75000 43.25 55.25 58 13.59228 0
plot(cp7.1)

Machine Learning
Método XGBoost
# https://rpubs.com/dalekube/XGBoost-Iris-Classification-Example-in-R#:~:text=XGBoost%20(Extreme%20Gradient%20Boosting)%20is,%2Dclass%20(multinomial)%20classification.
library(xgboost)
# Convert the Species factor to an integer class starting at 0
# This is picky, but it's a requirement for XGBoost
dados4 <- dados2 %>% dplyr::select(SD_escr:MMSE_MST, clustersRF)
clusters = dados4$clustersRF
label = as.integer(dados4$clustersRF)-1
dados4$clustersRF = NULL
n = nrow(dados4)
train.index = sample(n,floor(0.8*n))
train.data = as.matrix(dados4[train.index,])
train.label = label[train.index]
test.data = as.matrix(dados4[-train.index,])
test.label = label[-train.index]
# Transform the two data sets into xgb.Matrix
xgb.train = xgb.DMatrix(data=train.data,label=train.label)
xgb.test = xgb.DMatrix(data=test.data,label=test.label)
# Define the parameters for multinomial classification
num_class = length(levels(clusters))
params = list(
booster="gbtree",
eta=0.001,
max_depth=5,
gamma=3,
subsample=0.7,
colsample_bytree=1,
objective="multi:softprob",
eval_metric="mlogloss",
num_class=num_class
)
# Train the XGBoost classifer
xgb.fit=xgb.train(
params=params,
data=xgb.train,
nrounds=10000,
nthreads=1,
early_stopping_rounds=10,
watchlist=list(val1=xgb.train,val2=xgb.test),
verbose=0
)
## [12:08:36] WARNING: amalgamation/../src/learner.cc:541:
## Parameters: { nthreads } might not be used.
##
## This may not be accurate due to some parameters are only used in language bindings but
## passed down to XGBoost core. Or some parameters are not used but slip through this
## verification. Please open an issue if you find above cases.
# Review the final model and results
xgb.fit
## ##### xgb.Booster
## raw: 1.2 Mb
## call:
## xgb.train(params = params, data = xgb.train, nrounds = 10000,
## watchlist = list(val1 = xgb.train, val2 = xgb.test), verbose = 0,
## early_stopping_rounds = 10, nthreads = 1)
## params (as set within xgb.train):
## booster = "gbtree", eta = "0.001", max_depth = "5", gamma = "3", subsample = "0.7", colsample_bytree = "1", objective = "multi:softprob", eval_metric = "mlogloss", num_class = "2", nthreads = "1", validate_parameters = "TRUE"
## xgb.attributes:
## best_iteration, best_msg, best_ntreelimit, best_score, niter
## callbacks:
## cb.evaluation.log()
## cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,
## verbose = verbose)
## # of features: 5
## niter: 715
## best_iteration : 705
## best_ntreelimit : 705
## best_score : 0.604711
## best_msg : [705] val1-mlogloss:0.463218 val2-mlogloss:0.604711
## nfeatures : 5
## evaluation_log:
## iter val1_mlogloss val2_mlogloss
## 1 0.692599 0.692823
## 2 0.692002 0.692527
## ---
## 714 0.462019 0.604747
## 715 0.461867 0.604738
# Predict outcomes with the test data
xgb.pred = predict(xgb.fit,test.data,reshape=T)
xgb.pred = as.data.frame(xgb.pred)
colnames(xgb.pred) = levels(clusters)
# Use the predicted label with the highest probability
xgb.pred$prediction = apply(xgb.pred,1,function(x) colnames(xgb.pred)[which.max(x)])
xgb.pred$label = levels(clusters)[test.label+1]
# Calculate the final accuracy
result = sum(xgb.pred$prediction==xgb.pred$label)/nrow(xgb.pred)
print(paste("Final Accuracy =",sprintf("%1.2f%%", 100*result)))
## [1] "Final Accuracy = 72.73%"