Analisis exploratorio base de datos

# Establecer el directorio de trabajo 

library(readxl)
DATA_SET <- read_excel("DATA_SET.xlsx") #cargue base de datos

library(summarytools)
print(dfSummary(DATA_SET), method = 'render')

Data Frame Summary

DATA_SET

Dimensions: 200 x 180
Duplicates: 0
No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
1 ID [numeric]
Mean (sd) : 100.5 (57.9)
min ≤ med ≤ max:
1 ≤ 100.5 ≤ 200
IQR (CV) : 99.5 (0.6)
200 distinct values 200 (100.0%) 0 (0.0%)
2 CE_MUJER [character] 1. Si
200(100.0%)
200 (100.0%) 0 (0.0%)
3 CE_EDAD_18 [character] 1. Si
200(100.0%)
200 (100.0%) 0 (0.0%)
4 CE_DX_mama [character] 1. Si
200(100.0%)
200 (100.0%) 0 (0.0%)
5 CE_TRAT [character] 1. Si
200(100.0%)
200 (100.0%) 0 (0.0%)
6 CE_TRAT_2 [character]
1. No
2. Si
140(70.0%)
60(30.0%)
200 (100.0%) 0 (0.0%)
7 CE_libre_cancer [character] 1. Si
200(100.0%)
200 (100.0%) 0 (0.0%)
8 CE_CJO [character]
1. No
2. Si
2(1.0%)
198(99.0%)
200 (100.0%) 0 (0.0%)
9 CE_COM_mental [character] 1. No
200(100.0%)
200 (100.0%) 0 (0.0%)
10 CE_CAMA [character]
1. No
2. Si
199(99.5%)
1(0.5%)
200 (100.0%) 0 (0.0%)
11 CE_PARTICIPACION [character] 1. Si
200(100.0%)
200 (100.0%) 0 (0.0%)
12 CE_CONSENTIMIENTO [character] 1. Si
200(100.0%)
200 (100.0%) 0 (0.0%)
13 CE_COMPLETO [character] 1. Complete
200(100.0%)
200 (100.0%) 0 (0.0%)
14 qlq_c30_1 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
42(21.4%)
17(8.7%)
77(39.3%)
60(30.6%)
196 (98.0%) 4 (2.0%)
15 qlq_c30_2 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
20(10.0%)
10(5.0%)
132(66.0%)
38(19.0%)
200 (100.0%) 0 (0.0%)
16 qlq_c30_3 [character]
1. Bastante
2. Para nada
3. Un poco
6(3.0%)
172(86.0%)
22(11.0%)
200 (100.0%) 0 (0.0%)
17 qlq_c30_4 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
8(4.0%)
1(0.5%)
171(85.5%)
20(10.0%)
200 (100.0%) 0 (0.0%)
18 qlq_c30_5 [character]
1. Bastante
2. Para nada
3. Un poco
1(0.5%)
194(97.0%)
5(2.5%)
200 (100.0%) 0 (0.0%)
19 qlq_c30_6 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
16(8.0%)
4(2.0%)
144(72.0%)
36(18.0%)
200 (100.0%) 0 (0.0%)
20 qlq_c30_7 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
6(3.0%)
4(2.0%)
179(89.5%)
11(5.5%)
200 (100.0%) 0 (0.0%)
21 qlq_c30_8 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
8(4.0%)
1(0.5%)
158(79.0%)
33(16.5%)
200 (100.0%) 0 (0.0%)
22 qlq_c30_9 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
29(14.5%)
15(7.5%)
79(39.5%)
77(38.5%)
200 (100.0%) 0 (0.0%)
23 qlq_c30_10 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
21(10.5%)
5(2.5%)
101(50.5%)
73(36.5%)
200 (100.0%) 0 (0.0%)
24 qlq_c30_11 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
23(11.5%)
15(7.5%)
94(47.0%)
68(34.0%)
200 (100.0%) 0 (0.0%)
25 qlq_c30_12 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
17(8.5%)
3(1.5%)
138(69.0%)
42(21.0%)
200 (100.0%) 0 (0.0%)
26 qlq_c30_13 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
8(4.0%)
3(1.5%)
174(87.0%)
15(7.5%)
200 (100.0%) 0 (0.0%)
27 qlq_c30_14 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
2(1.0%)
3(1.5%)
175(87.5%)
20(10.0%)
200 (100.0%) 0 (0.0%)
28 qlq_c30_15 [character]
1. Para nada
2. Un poco
194(97.0%)
6(3.0%)
200 (100.0%) 0 (0.0%)
29 qlq_c30_16 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
11(5.5%)
2(1.0%)
148(74.0%)
39(19.5%)
200 (100.0%) 0 (0.0%)
30 qlq_c30_17 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
5(2.5%)
2(1.0%)
179(89.5%)
14(7.0%)
200 (100.0%) 0 (0.0%)
31 qlq_c30_18 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
21(10.5%)
9(4.5%)
90(45.0%)
80(40.0%)
200 (100.0%) 0 (0.0%)
32 qlq_c30_19 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
24(12.0%)
9(4.5%)
120(60.0%)
47(23.5%)
200 (100.0%) 0 (0.0%)
33 qlq_c30_20 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
18(9.0%)
4(2.0%)
137(68.5%)
41(20.5%)
200 (100.0%) 0 (0.0%)
34 qlq_c30_21 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
23(11.5%)
6(3.0%)
125(62.5%)
46(23.0%)
200 (100.0%) 0 (0.0%)
35 qlq_c30_22 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
26(13.0%)
14(7.0%)
88(44.0%)
72(36.0%)
200 (100.0%) 0 (0.0%)
36 qlq_c30_23 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
15(7.5%)
8(4.0%)
135(67.5%)
42(21.0%)
200 (100.0%) 0 (0.0%)
37 qlq_c30_24 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
15(7.5%)
13(6.5%)
118(59.0%)
54(27.0%)
200 (100.0%) 0 (0.0%)
38 qlq_c30_25 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
26(13.0%)
3(1.5%)
97(48.5%)
74(37.0%)
200 (100.0%) 0 (0.0%)
39 qlq_c30_26 [character]
1. Bastante
2. Para nada
3. Un poco
8(4.0%)
170(85.0%)
22(11.0%)
200 (100.0%) 0 (0.0%)
40 qlq_c30_27 [character]
1. Bastante
2. Para nada
3. Un poco
10(5.0%)
169(84.5%)
21(10.5%)
200 (100.0%) 0 (0.0%)
41 qlq_c30_28 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
11(5.5%)
4(2.0%)
160(80.0%)
25(12.5%)
200 (100.0%) 0 (0.0%)
42 qlq_c30_29 [numeric]
Mean (sd) : 5.6 (1.1)
min ≤ med ≤ max:
1 ≤ 6 ≤ 7
IQR (CV) : 1 (0.2)
1:1(0.5%)
2:1(0.5%)
3:8(4.0%)
4:20(10.0%)
5:61(30.5%)
6:63(31.5%)
7:46(23.0%)
200 (100.0%) 0 (0.0%)
43 qlq_c30_30 [numeric]
Mean (sd) : 5.8 (1.2)
min ≤ med ≤ max:
2 ≤ 6 ≤ 7
IQR (CV) : 2 (0.2)
2:4(2.0%)
3:7(3.5%)
4:12(6.0%)
5:51(25.5%)
6:64(32.0%)
7:62(31.0%)
200 (100.0%) 0 (0.0%)
44 qlq_c30_31 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
31(15.5%)
10(5.0%)
97(48.5%)
62(31.0%)
200 (100.0%) 0 (0.0%)
45 qlq_c30_32 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
3(1.5%)
1(0.5%)
179(89.5%)
17(8.5%)
200 (100.0%) 0 (0.0%)
46 qlq_c30_33 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
21(10.5%)
8(4.0%)
103(51.5%)
68(34.0%)
200 (100.0%) 0 (0.0%)
47 qlq_c30_34 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
32(16.0%)
12(6.0%)
114(57.0%)
42(21.0%)
200 (100.0%) 0 (0.0%)
48 qlq_c30_35 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
18(9.6%)
12(6.4%)
135(72.2%)
22(11.8%)
187 (93.5%) 13 (6.5%)
49 qlq_c30_36 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
26(13.0%)
5(2.5%)
124(62.0%)
45(22.5%)
200 (100.0%) 0 (0.0%)
50 qlq_c30_37 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
35(17.5%)
19(9.5%)
100(50.0%)
46(23.0%)
200 (100.0%) 0 (0.0%)
51 qlq_c30_38 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
26(13.0%)
3(1.5%)
119(59.5%)
52(26.0%)
200 (100.0%) 0 (0.0%)
52 qlq_c30_39 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
12(6.0%)
7(3.5%)
151(75.5%)
30(15.0%)
200 (100.0%) 0 (0.0%)
53 qlq_c30_40 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
4(2.0%)
2(1.0%)
175(87.5%)
19(9.5%)
200 (100.0%) 0 (0.0%)
54 qlq_c30_41 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
4(2.0%)
4(2.0%)
163(81.5%)
29(14.5%)
200 (100.0%) 0 (0.0%)
55 qlq_c30_42 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
1(0.5%)
5(2.5%)
162(81.0%)
32(16.0%)
200 (100.0%) 0 (0.0%)
56 qlq_c30_43 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
48(24.0%)
13(6.5%)
74(37.0%)
65(32.5%)
200 (100.0%) 0 (0.0%)
57 qlq_c30_44 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
13(6.5%)
3(1.5%)
140(70.0%)
44(22.0%)
200 (100.0%) 0 (0.0%)
58 qlq_c30_45 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
11(5.5%)
2(1.0%)
140(70.0%)
47(23.5%)
200 (100.0%) 0 (0.0%)
59 qlq_c30_46 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
23(12.9%)
2(1.1%)
124(69.7%)
29(16.3%)
178 (89.0%) 22 (11.0%)
60 qlq_c30_47 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
26(13.0%)
11(5.5%)
85(42.5%)
78(39.0%)
200 (100.0%) 0 (0.0%)
61 qlq_c30_48 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
15(7.5%)
6(3.0%)
155(77.5%)
24(12.0%)
200 (100.0%) 0 (0.0%)
62 qlq_c30_49 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
12(6.0%)
5(2.5%)
134(67.0%)
49(24.5%)
200 (100.0%) 0 (0.0%)
63 qlq_c30_50 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
12(6.0%)
4(2.0%)
127(63.5%)
57(28.5%)
200 (100.0%) 0 (0.0%)
64 qlq_c30_51 [character]
1. Bastante
2. Para nada
3. Un poco
4(2.0%)
181(90.5%)
15(7.5%)
200 (100.0%) 0 (0.0%)
65 qlq_c30_52 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
15(7.5%)
6(3.0%)
128(64.0%)
51(25.5%)
200 (100.0%) 0 (0.0%)
66 qlq_c30_53 [character]
1. Bastante
2. Mucho
3. Para nada
4. Un poco
9(4.5%)
4(2.0%)
151(75.5%)
36(18.0%)
200 (100.0%) 0 (0.0%)
67 EDAD [numeric]
Mean (sd) : 65.1 (9.5)
min ≤ med ≤ max:
37 ≤ 65 ≤ 93
IQR (CV) : 13 (0.1)
44 distinct values 200 (100.0%) 0 (0.0%)
68 ESTRATO [numeric]
Mean (sd) : 2.7 (0.9)
min ≤ med ≤ max:
1 ≤ 3 ≤ 6
IQR (CV) : 1 (0.3)
1:12(6.0%)
2:70(35.0%)
3:84(42.0%)
4:27(13.5%)
5:5(2.5%)
6:2(1.0%)
200 (100.0%) 0 (0.0%)
69 AFILIACION_SALUD [character]
1. Contributivo
2. Subsidiado
194(97.0%)
6(3.0%)
200 (100.0%) 0 (0.0%)
70 EDUCACION [character]
1. Educación básica (primari
2. Educación media (bachille
3. Educación preescolar
4. Educación superior (unive
5. Educación técnica
6. Posgrado (especializacion
7. Sin educación formal
73(36.5%)
42(21.0%)
5(2.5%)
27(13.5%)
37(18.5%)
12(6.0%)
4(2.0%)
200 (100.0%) 0 (0.0%)
71 SIT_MARITAL [character]
1. Casado(a)
2. Divorciado(a)
3. Separado(a)
4. Soltero(a)
5. Unión libre
6. Viudo(a)
76(38.2%)
1(0.5%)
20(10.1%)
45(22.6%)
23(11.6%)
34(17.1%)
199 (99.5%) 1 (0.5%)
72 OCUPACION [character]
1. Ama de Casa
2. Pensionado
3. Hogar
4. Vendedor
5. Auxiliar Varios
6. Cesante
7. Personal de Servidumbre
8. Administrador de Empresas
9. Contadores
10. Empleados de Oficina
[ 11 others ]
59(29.5%)
56(28.0%)
32(16.0%)
9(4.5%)
5(2.5%)
5(2.5%)
5(2.5%)
4(2.0%)
4(2.0%)
4(2.0%)
17(8.5%)
200 (100.0%) 0 (0.0%)
73 FRACCION_OCUPACIONES [numeric]
Mean (sd) : 60.5 (30.6)
min ≤ med ≤ max:
1 ≤ 56.5 ≤ 100
IQR (CV) : 55.2 (0.5)
31 distinct values 200 (100.0%) 0 (0.0%)
74 SATISFACCION_ATENCION_SALUD [character]
1. Extremadamente satisfecho
2. Moderadamente satisfecho
3. Muy satisfecho
4. Poco satisfecho
99(49.5%)
11(5.5%)
85(42.5%)
5(2.5%)
200 (100.0%) 0 (0.0%)
75 PERCEPCION_RELIGION [numeric]
Mean (sd) : 8.1 (2.3)
min ≤ med ≤ max:
0 ≤ 9 ≤ 10
IQR (CV) : 3 (0.3)
0:4(2.0%)
1:1(0.5%)
3:4(2.0%)
4:1(0.5%)
5:26(13.0%)
6:9(4.5%)
7:12(6.0%)
8:40(20.0%)
9:17(8.5%)
10:86(43.0%)
200 (100.0%) 0 (0.0%)
76 PERCEPCION_ESPIRITUAL [numeric]
Mean (sd) : 8.5 (1.9)
min ≤ med ≤ max:
0 ≤ 9 ≤ 10
IQR (CV) : 3 (0.2)
0:2(1.0%)
5:17(8.5%)
6:8(4.0%)
7:25(12.5%)
8:34(17.0%)
9:17(8.5%)
10:97(48.5%)
200 (100.0%) 0 (0.0%)
77 PERCEPCION_SOPORTE_SOCIAL [numeric]
Mean (sd) : 8.6 (2)
min ≤ med ≤ max:
1 ≤ 10 ≤ 10
IQR (CV) : 2 (0.2)
1:1(0.5%)
2:3(1.5%)
3:3(1.5%)
4:2(1.0%)
5:13(6.5%)
6:7(3.5%)
7:11(5.5%)
8:29(14.5%)
9:26(13.0%)
10:105(52.5%)
200 (100.0%) 0 (0.0%)
78 COMPLETE_SOCIODEMOGRAFICO [character] 1. Complete
200(100.0%)
200 (100.0%) 0 (0.0%)
79 TIEMPO_SUPERVIVENCIA [numeric]
Mean (sd) : 8.2 (4.8)
min ≤ med ≤ max:
1 ≤ 7 ≤ 26
IQR (CV) : 6 (0.6)
23 distinct values 200 (100.0%) 0 (0.0%)
80 EDAD_DX [numeric]
Mean (sd) : 56.9 (10.1)
min ≤ med ≤ max:
31 ≤ 57.5 ≤ 90
IQR (CV) : 14.2 (0.2)
43 distinct values 200 (100.0%) 0 (0.0%)
81 LATERIALDAD_MAMA_DERECHO [character]
1. Checked
2. Unchecked
108(54.0%)
92(46.0%)
200 (100.0%) 0 (0.0%)
82 LATERIALDAD_MAMA_IZQUIERDO [character]
1. Checked
2. Unchecked
90(45.0%)
110(55.0%)
200 (100.0%) 0 (0.0%)
83 LATERIALDAD_MAMA_BILATERAL [character]
1. Checked
2. Unchecked
3(1.5%)
197(98.5%)
200 (100.0%) 0 (0.0%)
84 ESTADIFICACION_MAMA_NCCN [character]
1. 0
2. IA
3. IB
4. IIA
5. IIB
6. IIIA
7. IIIB
8. IIIC
9. IV
10. No reporta
2(1.0%)
50(25.0%)
3(1.5%)
52(26.0%)
37(18.5%)
26(13.0%)
9(4.5%)
5(2.5%)
2(1.0%)
14(7.0%)
200 (100.0%) 0 (0.0%)
85 TIPO_HISTOLOGICO_CIE_10 [character]
1. Adenoideo quístico
2. Ductal
3. Lobulillar
4. Medular
5. Mucinoso
6. No reporta
7. Otro
8. Papilar
9. Tubular
1(0.5%)
155(77.5%)
11(5.5%)
2(1.0%)
3(1.5%)
12(6.0%)
7(3.5%)
8(4.0%)
1(0.5%)
200 (100.0%) 0 (0.0%)
86 CLASIFICACION_INMUNOHISTOQUIMICA_RE_POSITIVO [character]
1. Checked
2. Unchecked
161(80.5%)
39(19.5%)
200 (100.0%) 0 (0.0%)
87 CLASIFICACION_INMUNOHISTOQUIMICA_RP_POSITIVO [character]
1. Checked
2. Unchecked
136(68.0%)
64(32.0%)
200 (100.0%) 0 (0.0%)
88 CLASIFICACION_INMUNOHISTOQUIMICA_TRIPLE_NEGATIVO [character]
1. Checked
2. Unchecked
17(8.5%)
183(91.5%)
200 (100.0%) 0 (0.0%)
89 CLASIFICACION_INMUNOHISTOQUIMICA_HER2_POSITIVO [character]
1. Checked
2. Unchecked
31(15.5%)
169(84.5%)
200 (100.0%) 0 (0.0%)
90 CLASIFICACION_INMUNOHISTOQUIMICA_KI_67_POSITIVO [character]
1. Checked
2. Unchecked
54(27.0%)
146(73.0%)
200 (100.0%) 0 (0.0%)
91 CLASIFICACION_INMUNOHISTOQUIMICA_OTRO [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
92 CLASIFICACION_INMUNOHISTOQUIMICA_NO_REPORTA [character]
1. Checked
2. Unchecked
9(4.5%)
191(95.5%)
200 (100.0%) 0 (0.0%)
93 TRAT_QUIMIOTERAPIA_NEOADYUVANTE [character]
1. Checked
2. Unchecked
69(34.5%)
131(65.5%)
200 (100.0%) 0 (0.0%)
94 TRAT_ANTIHER2 [character]
1. Checked
2. Unchecked
29(14.5%)
171(85.5%)
200 (100.0%) 0 (0.0%)
95 TRAT_RADIOTERAPIA_NEOADYUVANTE [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
96 TRAT_CIRUGIA_RADICAL [character]
1. Checked
2. Unchecked
16(8.0%)
184(92.0%)
200 (100.0%) 0 (0.0%)
97 TRAT_CIRUGIA_RADICAL_MODIFICADA [character]
1. Checked
2. Unchecked
64(32.0%)
136(68.0%)
200 (100.0%) 0 (0.0%)
98 TRAT_CIRUGIA_CONSERVADORA [character]
1. Checked
2. Unchecked
127(63.5%)
73(36.5%)
200 (100.0%) 0 (0.0%)
99 TRAT_VACIAMIENTO_GANGLIONAR [character]
1. Checked
2. Unchecked
145(72.5%)
55(27.5%)
200 (100.0%) 0 (0.0%)
100 TRAT_TRATAMIENTO_HORMONAL [character]
1. Checked
2. Unchecked
152(76.0%)
48(24.0%)
200 (100.0%) 0 (0.0%)
101 TRAT_QUIMIOTERAPIA_ADYUVANTE [character]
1. Checked
2. Unchecked
98(49.0%)
102(51.0%)
200 (100.0%) 0 (0.0%)
102 TRAT_RADIOTERAPIA_ADYUVANTE [character]
1. Checked
2. Unchecked
165(82.5%)
35(17.5%)
200 (100.0%) 0 (0.0%)
103 TRAT_CIRUGIA_RECONSTRUCTIVA [character]
1. Checked
2. Unchecked
35(17.5%)
165(82.5%)
200 (100.0%) 0 (0.0%)
104 TRAT_OTRO [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
105 TRAT_NO_REPORTA [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
106 TIPO_T_SISTEMICO_NEOADYUVANTE_QUIMIOTERAPIA [character]
1. Checked
2. Unchecked
64(32.0%)
136(68.0%)
200 (100.0%) 0 (0.0%)
107 TIPO_T_SISTEMICO_NEOADYUVANTE_ANTIHER2 [character]
1. Checked
2. Unchecked
11(5.5%)
189(94.5%)
200 (100.0%) 0 (0.0%)
108 TIPO_T_SISTEMICO_NEOADYUVANTE_T.BLANCO [character]
1. Checked
2. Unchecked
2(1.0%)
198(99.0%)
200 (100.0%) 0 (0.0%)
109 TIPO_T_SISTEMICO_NEOADYUVANTE_INMUNOTERAPIA [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
110 TIPO_T_SISTEMICO_NEOADYUVANTE_HORMONOTERAPIA [character]
1. Checked
2. Unchecked
3(1.5%)
197(98.5%)
200 (100.0%) 0 (0.0%)
111 TIPO_T_SISTEMICO_NEOADYUVANTE_NO_REPORTA [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
112 TIPO_T_SISTEMICO_ADYUVANTE_QUIMIOTERAPIA [character]
1. Checked
2. Unchecked
86(43.0%)
114(57.0%)
200 (100.0%) 0 (0.0%)
113 TIPO_T_SISTEMICO_ADYUVANTE_ANTIHER2 [character]
1. Checked
2. Unchecked
18(9.0%)
182(91.0%)
200 (100.0%) 0 (0.0%)
114 TIPO_T_SISTEMICO_ADYUVANTE_T.BLANCO [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
115 TIPO_T_SISTEMICO_ADYUVANTE_INMUNOTERAPIA [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
116 TIPO_T_SISTEMICO_ADYUVANTE_HORMONOTERAPIA [character]
1. Checked
2. Unchecked
40(20.0%)
160(80.0%)
200 (100.0%) 0 (0.0%)
117 TIPO_T_SISTEMICO_ADYUVANTE_NO_REPORTA [character]
1. Checked
2. Unchecked
3(1.5%)
197(98.5%)
200 (100.0%) 0 (0.0%)
118 TIPO_RADIOTERAPIA_NEOADYUVANTE_RADIACION_TODO_SENO_CONVENCIONAL [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
119 TIPO_RADIOTERAPIA_NEOADYUVANTE_RADIACION_TODO_SENO_HIPOFRACCIONADA [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
120 TIPO_RADIOTERAPIA_NEOADYUVANTE_RADIOT_IORT [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
121 TIPO_RADIOTERAPIA_NEOADYUVANTE_RADIOT_3D_CRT [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
122 TIPO_RADIOTERAPIA_NEOADYUVANTE_RADIOT_IMRT [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
123 TIPO_RADIOTERAPIA_NEOADYUVANTE_BRAQUITERAPIA [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
124 TIPO_RADIOTERAPIA_NEOADYUVANTE_RADIACION_GANGLIOS [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
125 TIPO_RADIOTERAPIA_NEOADYUVANTE_OTRO [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
126 TIPO_RADIOTERAPIA_NEOADYUVANTE_NO_REPORTA [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
127 TIPO_RADIOTERAPIA_ADYUVANTE_RADIACION_TODO_SENO_CONVENCIONAL [character]
1. Checked
2. Unchecked
4(2.0%)
196(98.0%)
200 (100.0%) 0 (0.0%)
128 TIPO_RADIOTERAPIA_ADYUVANTE_RADIACION_TODO_SENO_HIPOFRACCIONADA [character]
1. Checked
2. Unchecked
2(1.0%)
198(99.0%)
200 (100.0%) 0 (0.0%)
129 TIPO_RADIOTERAPIA_ADYUVANTE_RADIOT_IORT [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
130 TIPO_RADIOTERAPIA_ADYUVANTE_RADIOT_3D_CRT [character]
1. Checked
2. Unchecked
6(3.0%)
194(97.0%)
200 (100.0%) 0 (0.0%)
131 TIPO_RADIOTERAPIA_ADYUVANTE_RADIOT_IMRT [character]
1. Checked
2. Unchecked
123(61.5%)
77(38.5%)
200 (100.0%) 0 (0.0%)
132 TIPO_RADIOTERAPIA_ADYUVANTE_BRAQUITERAPIA [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
133 TIPO_RADIOTERAPIA_ADYUVANTE_RADIACION_GANGLIOS [character]
1. Checked
2. Unchecked
10(5.0%)
190(95.0%)
200 (100.0%) 0 (0.0%)
134 TIPO_RADIOTERAPIA_ADYUVANTE_OTRO [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
135 TIPO_RADIOTERAPIA_ADYUVANTE_NO_REPORTA [character]
1. Checked
2. Unchecked
29(14.5%)
171(85.5%)
200 (100.0%) 0 (0.0%)
136 TRAT_HORMONAL_COMPLETADO [character]
1. Inhibidor de aromatasa ex
2. Inhibidor de aromatasa po
3. Otro
4. Tamoxifeno 2 años y switc
5. Tamoxifeno 2 años y switc
6. Tamoxifeno extendido (7-1
7. Tamoxifeno por 5 años
23(14.6%)
33(21.0%)
11(7.0%)
20(12.7%)
15(9.6%)
20(12.7%)
35(22.3%)
157 (78.5%) 43 (21.5%)
137 TRAT_HORMONAL_ACTUAL [character]
1. Anastrazol
2. Exemestane
3. Letrozol
4. No esta recibiendo actual
5. Otro
6. Tamoxifeno
15(9.6%)
1(0.6%)
22(14.0%)
104(66.2%)
1(0.6%)
14(8.9%)
157 (78.5%) 43 (21.5%)
138 RECAIDA_CANCER_MAMA [character]
1. No
2. Si
196(98.0%)
4(2.0%)
200 (100.0%) 0 (0.0%)
139 CATEGORIA_RIESGO_HC [character]
1. Alto riesgo
2. Bajo riesgo
3. Intermedio riesgo
4. No reporta
75(37.5%)
58(29.0%)
50(25.0%)
17(8.5%)
200 (100.0%) 0 (0.0%)
140 TRAT_RECAIDA_QUIMIOTERAPIA [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
141 TRAT_RECAIDA_ANTI_HER2 [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
142 TRAT_RECAIDA_T._BLANCO [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
143 TRAT_RECAIDA_INMUNOTERAPIA [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
144 TRAT_RECAIDA_HORMONOTERAPIA [character]
1. Checked
2. Unchecked
3(1.5%)
197(98.5%)
200 (100.0%) 0 (0.0%)
145 TRAT_RECAIDA_RADIOTERAPIA [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
146 TRAT_RECAIDA_NO_REPORTA [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
147 ATENCION_UA_CUIDADO_PALIATIVO [character]
1. Checked
2. Unchecked
25(12.5%)
175(87.5%)
200 (100.0%) 0 (0.0%)
148 ATENCION_UA_PROGRAMA_LINFEDEMA [character]
1. Checked
2. Unchecked
2(1.0%)
198(99.0%)
200 (100.0%) 0 (0.0%)
149 ATENCION_UA_PROGRAMA_FATIGA_REH_ONCOLOGICA [character]
1. Checked
2. Unchecked
3(1.5%)
197(98.5%)
200 (100.0%) 0 (0.0%)
150 ATENCION_UA_PSICOONCOLOGIA [character]
1. Checked
2. Unchecked
7(3.5%)
193(96.5%)
200 (100.0%) 0 (0.0%)
151 ATENCION_UA_SEXOLOGIA_GINECOLOGIA_FUNCIONAL [character]
1. Checked
2. Unchecked
2(1.0%)
198(99.0%)
200 (100.0%) 0 (0.0%)
152 ATENCION_UA_NEUROLOGIA [character]
1. Checked
2. Unchecked
5(2.5%)
195(97.5%)
200 (100.0%) 0 (0.0%)
153 ATENCION_UA_TRABAJO_SOCIAL [character]
1. Checked
2. Unchecked
2(1.0%)
198(99.0%)
200 (100.0%) 0 (0.0%)
154 ATENCION_UA_NUTRICION [character]
1. Checked
2. Unchecked
5(2.5%)
195(97.5%)
200 (100.0%) 0 (0.0%)
155 ATENCION_UA_MEDICINA_OCUPACIONAL [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
156 ATENCION_UA_MEDICINA_CIRUGIA_PLASTICA_RECONSTRUCTIVA [character]
1. Checked
2. Unchecked
11(5.5%)
189(94.5%)
200 (100.0%) 0 (0.0%)
157 ATENCION_UA_MEDICINA_FISIATRIA_MEDICINA_FISICA [character]
1. Checked
2. Unchecked
12(6.0%)
188(94.0%)
200 (100.0%) 0 (0.0%)
158 ATENCION_UA_OTRO [character]
1. Checked
2. Unchecked
4(2.0%)
196(98.0%)
200 (100.0%) 0 (0.0%)
159 ATENCION_UA_NINGUNO [character]
1. Checked
2. Unchecked
154(77.0%)
46(23.0%)
200 (100.0%) 0 (0.0%)
160 SEGUIMIENTO_SUPERVIVENCIA_UA_MEDICINA FAMILIAR [character]
1. Checked
2. Unchecked
170(85.0%)
30(15.0%)
200 (100.0%) 0 (0.0%)
161 SEGUIMIENTO_SUPERVIVENCIA_UA_ONCOLOGIA_CLINICA [character]
1. Checked
2. Unchecked
64(32.0%)
136(68.0%)
200 (100.0%) 0 (0.0%)
162 SEGUIMIENTO_SUPERVIVENCIA_UA_CIRUGIA_MAMA [character]
1. Checked
2. Unchecked
72(36.0%)
128(64.0%)
200 (100.0%) 0 (0.0%)
163 SEGUIMIENTO_SUPERVIVENCIA_UA_OTRO [character]
1. Checked
2. Unchecked
1(0.5%)
199(99.5%)
200 (100.0%) 0 (0.0%)
164 SEGUIMIENTO_SUPERVIVENCIA_UA_NINGUNO [character] 1. Unchecked
200(100.0%)
200 (100.0%) 0 (0.0%)
165 ANT_INFARTO_MIOCARDIO [character]
1. No
2. Si
199(99.5%)
1(0.5%)
200 (100.0%) 0 (0.0%)
166 ANT_FALLA_CARDIACA [character]
1. No
2. Si
191(96.0%)
8(4.0%)
199 (99.5%) 1 (0.5%)
167 ANT_FALLA_ENF_VASCULAR_PERIFERICA [character]
1. No
2. Si
177(88.9%)
22(11.1%)
199 (99.5%) 1 (0.5%)
168 ANT_FALLA_ENF_CEREBROVASCULAR [character]
1. No
2. Si
193(97.0%)
6(3.0%)
199 (99.5%) 1 (0.5%)
169 ANT_FALLA_ENF_DEMENCIA [character]
1. No
2. Si
199(99.5%)
1(0.5%)
200 (100.0%) 0 (0.0%)
170 ANT_FALLA_ENF_PULMONAR_OBSTRUCTIVA_CRONICA [character]
1. No
2. Si
194(97.0%)
6(3.0%)
200 (100.0%) 0 (0.0%)
171 ANT_FALLA_ENF_TEJIDO_CONECTIVO [character]
1. No
2. Si
192(96.0%)
8(4.0%)
200 (100.0%) 0 (0.0%)
172 ANT_FALLA_ULCERA_PEPTICA [character]
1. No
2. Si
188(94.5%)
11(5.5%)
199 (99.5%) 1 (0.5%)
173 ANT_FALLA_ENF_HEPATICA [character]
1. Enfermedad hepática leve
2. Enfermedad hepática moder
3. Sin antecedente de enferm
7(3.5%)
1(0.5%)
190(96.0%)
198 (99.0%) 2 (1.0%)
174 ANT_FALLA_DIABETES_MELLITUS [character]
1. Antecedente de Diabetes m
2. Diabetes mellitus sin co
3. Sin antecedente de diabet
2(1.0%)
12(6.1%)
183(92.9%)
197 (98.5%) 3 (1.5%)
175 ANT_FALLA_HEMIPLEJIA [character]
1. No
2. Si
195(98.0%)
4(2.0%)
199 (99.5%) 1 (0.5%)
176 ANT_ENF_RENAL_CRONICA [character]
1. Enfermedad renal crónica
2. Enfermedad renal crónica
3. Sin antecedente de enferm
5(2.5%)
3(1.5%)
190(96.0%)
198 (99.0%) 2 (1.0%)
177 ANT_VIH [character] 1. No
198(100.0%)
198 (99.0%) 2 (1.0%)
178 ANT_SIDA [character] 1. No
194(100.0%)
194 (97.0%) 6 (3.0%)
179 INDICE_CHARLSON [numeric]
Mean (sd) : 4.6 (1.4)
min ≤ med ≤ max:
2 ≤ 4 ≤ 9
IQR (CV) : 1 (0.3)
2:10(5.0%)
3:31(15.5%)
4:61(30.5%)
5:55(27.5%)
6:25(12.5%)
7:14(7.0%)
8:2(1.0%)
9:2(1.0%)
200 (100.0%) 0 (0.0%)
180 ANTECEDENTES_COMPLETE [character] 1. Complete
200(100.0%)
200 (100.0%) 0 (0.0%)

Generated by summarytools 1.1.4 (R version 4.4.2)
2025-08-17

Analisis descriptivo

# Estrato 

library(dplyr)
DATA_SET <- DATA_SET %>% mutate(Estrato_cat = cut(ESTRATO, 
                                          breaks = c (0,2, 4, 6), 
                                          right = T, 
                                          labels = c("Bajo", "Medio", "Alto")))
# EDUCACION

DATA_SET$EDUCACION_CAT <- ifelse(DATA_SET$EDUCACION %in% c("Educación preescolar", "Educación básica (primaria)"), "Primaria o menos", ifelse(DATA_SET$EDUCACION %in% c("Educación media (bachiller)"), "Basica/secundaria", ifelse(DATA_SET$EDUCACION %in% c("Educación superior (universitaria)", "Educación técnica", "Posgrado (especializacion, maestria, doctorado)"), "Mas que secundaria", DATA_SET$EDUCACION)))

DATA_SET$EDUCACION_CAT <- factor(DATA_SET$EDUCACION_CAT, levels = c("Sin educación formal", "Primaria o menos", "Basica/secundaria", "Mas que secundaria") ) #REORDENANDO LAS CATEGORIAS


#Situacion marital 
DATA_SET$SIT_MARITAL_CAT <- ifelse(DATA_SET$SIT_MARITAL %in% c("Casado(a)", "Unión libre"), "Casada/Union Libre", 
                            ifelse(DATA_SET$SIT_MARITAL %in% c("Separado(a)", "Divorciado(a)", "Viudo(a)"), "Separada/Viuda", 
                            ifelse(DATA_SET$SIT_MARITAL %in% c("Soltero(a)"), "Soltera", DATA_SET$SIT_MARITAL)))

DATA_SET$SIT_MARITAL_CAT <- factor(DATA_SET$SIT_MARITAL_CAT, levels = c("Soltera", "Casada/Union Libre", "Separada/Viuda") ) #REORDENANDO LAS CATEGORIAS

#OCUPACION 

DATA_SET$OCUPACIONL_CAT <- ifelse(DATA_SET$OCUPACION %in% c("Ama de Casa", "Hogar"), "Ama de casa/hogar", 
                            ifelse(DATA_SET$OCUPACION %in% c("Abogado", "Administrador de Empresas", "Auxiliar Varios", "Auxiliares de Enfermeria", "Cocineros", "Contadores", "Empleados de Oficina", "Estilistas", "Ingenieros", "Oficiales de Cubierta", "Personal de Servidumbre", "Personal Directivo de la Administraci󮠐blica", "Profesores y Educadores", "Sastres y Modistos", "Secretaria", "Socioantrop󬯧o", "Vendedor"), "empleado/ocupado", 
                            DATA_SET$OCUPACION))

DATA_SET$OCUPACIONL_CAT<- factor(DATA_SET$OCUPACIONL_CAT, levels = c("Ama de casa/hogar", "empleado/ocupado", "Pensionado", "Cesante") ) #REORDENANDO LAS CATEGORIAS

#Satisfaccion salud

DATA_SET$SATISFACCION_CAT <- ifelse(DATA_SET$SATISFACCION_ATENCION_SALUD %in% c("Poco satisfecho", "Moderadamente satisfecho"), "Poco satisfechas", 
                             ifelse(DATA_SET$SATISFACCION_ATENCION_SALUD %in% c("Muy satisfecho", "Extremadamente satisfecho"), "Muy satisfechas", DATA_SET$SATISFACCION_ATENCION_SALUD))

#ESTADIO 

DATA_SET$ESTADIO <- ifelse(DATA_SET$ESTADIFICACION_MAMA_NCCN %in% c("0", "IA", "IB", "IIA"), "Temprano", 
                    ifelse(DATA_SET$ESTADIFICACION_MAMA_NCCN %in% c("IIB", "IIIA", "IIIB", "IIIC"), "Localmente avanzado", 
                    ifelse(DATA_SET$ESTADIFICACION_MAMA_NCCN %in% c("IV"), "Metastasico", DATA_SET$ESTADIFICACION_MAMA_NCCN)))


DATA_SET$ESTADIO<- factor(DATA_SET$ESTADIO, levels = c("Temprano", "Localmente avanzado", "Metastasico", "No reporta") ) #REORDENANDO LAS CATEGORIAS

#CLASIFICACION_INMUNOHISTOQUIMICA

DATA_SET <- DATA_SET %>% 
  mutate(LUMINAL_A = ifelse((CLASIFICACION_INMUNOHISTOQUIMICA_RE_POSITIVO == "Checked" | CLASIFICACION_INMUNOHISTOQUIMICA_RP_POSITIVO == "Checked" ) & (CLASIFICACION_INMUNOHISTOQUIMICA_KI_67_POSITIVO == "Unchecked" & CLASIFICACION_INMUNOHISTOQUIMICA_HER2_POSITIVO  == "Unchecked"), "Si", "No"))


DATA_SET <- DATA_SET %>% 
  mutate(LUMINAL_B = ifelse((CLASIFICACION_INMUNOHISTOQUIMICA_RE_POSITIVO == "Checked" | CLASIFICACION_INMUNOHISTOQUIMICA_RP_POSITIVO == "Checked" ) & (CLASIFICACION_INMUNOHISTOQUIMICA_KI_67_POSITIVO == "Checked" & CLASIFICACION_INMUNOHISTOQUIMICA_HER2_POSITIVO  == "Unchecked"), "Si", "No"))

DATA_SET <- DATA_SET %>% 
  mutate(HER2_POSITIVO = ifelse(CLASIFICACION_INMUNOHISTOQUIMICA_KI_67_POSITIVO == "Checked", "Si", "No"))

DATA_SET <- DATA_SET %>% 
  mutate(TRIPLE_NEGATIVO = ifelse(CLASIFICACION_INMUNOHISTOQUIMICA_RE_POSITIVO == "Unchecked" & CLASIFICACION_INMUNOHISTOQUIMICA_RP_POSITIVO == "Unchecked" &  CLASIFICACION_INMUNOHISTOQUIMICA_HER2_POSITIVO  == "Unchecked", "Si", "No"))


#CLASIFICACION TRATAMIENTOS

DATA_SET <- DATA_SET %>% 
  mutate(TRAT_SISTEMICO = ifelse(TRAT_QUIMIOTERAPIA_NEOADYUVANTE == "Checked" |  TRAT_QUIMIOTERAPIA_ADYUVANTE == "Checked" | TRAT_ANTIHER2 == "Checked" | TIPO_T_SISTEMICO_NEOADYUVANTE_QUIMIOTERAPIA == "Checked" | TIPO_T_SISTEMICO_NEOADYUVANTE_ANTIHER2 == "Checked" | TIPO_T_SISTEMICO_NEOADYUVANTE_T.BLANCO == "Checked" | TIPO_T_SISTEMICO_NEOADYUVANTE_INMUNOTERAPIA == "Checked" | TIPO_T_SISTEMICO_ADYUVANTE_QUIMIOTERAPIA == "Checked" | TIPO_T_SISTEMICO_ADYUVANTE_ANTIHER2  == "Checked" | TIPO_T_SISTEMICO_ADYUVANTE_T.BLANCO == "Checked", "Si", "No"))


DATA_SET <- DATA_SET %>% 
  mutate(TRAT_RADIOTERAPIA = ifelse(TRAT_RADIOTERAPIA_NEOADYUVANTE == "Checked" |TRAT_RADIOTERAPIA_ADYUVANTE == "Checked" | TIPO_RADIOTERAPIA_ADYUVANTE_RADIACION_TODO_SENO_CONVENCIONAL == "Checked" | TIPO_RADIOTERAPIA_ADYUVANTE_RADIACION_TODO_SENO_HIPOFRACCIONADA == "Checked" | TIPO_RADIOTERAPIA_ADYUVANTE_RADIOT_3D_CRT == "Checked" | TIPO_RADIOTERAPIA_ADYUVANTE_RADIOT_IMRT == "Checked" | TIPO_RADIOTERAPIA_ADYUVANTE_BRAQUITERAPIA == "Checked" | TIPO_RADIOTERAPIA_ADYUVANTE_RADIACION_GANGLIOS == "Checked" | TIPO_RADIOTERAPIA_ADYUVANTE_OTRO == "Checked", "Si", "No"))

DATA_SET <- DATA_SET %>% 
  mutate(TRAT_CIRUGIA = ifelse(TRAT_CIRUGIA_RADICAL == "Checked" | TRAT_CIRUGIA_RADICAL_MODIFICADA == "Checked" | TRAT_CIRUGIA_CONSERVADORA == "Checked" | TRAT_VACIAMIENTO_GANGLIONAR == "Checked" | TRAT_CIRUGIA_RECONSTRUCTIVA == "Checked", "Si", "No"))

DATA_SET <- DATA_SET %>% 
  mutate(
    TRAT_HORMONOTERAPIA = ifelse(
      TRAT_TRATAMIENTO_HORMONAL == "Checked" |
      !is.na(TRAT_HORMONAL_COMPLETADO) |
      !is.na(TRAT_HORMONAL_ACTUAL),
      "Si",
      "No"
    )
  )

# Cuidado de soporte se prueban dos maneras: Recibio o no atencion y por recepcion de cuidado de soporte oncologico primario

DATA_SET <- DATA_SET %>% 
  mutate(Cuidado_soporte_UA = ifelse(ATENCION_UA_NINGUNO == "Unchecked", "Si", "No"))

DATA_SET <- DATA_SET %>% 
  mutate(Cuidado_soporte_primario = ifelse(ATENCION_UA_CUIDADO_PALIATIVO == "Checked" | ATENCION_UA_PSICOONCOLOGIA == "Checked" | ATENCION_UA_NUTRICION == "Checked" | ATENCION_UA_PROGRAMA_LINFEDEMA == "Checked" | ATENCION_UA_PROGRAMA_FATIGA_REH_ONCOLOGICA == "Checked", "Si", "No"))

#Indice charlson 

DATA_SET <- DATA_SET %>% 
  mutate(charlson_cat = cut(INDICE_CHARLSON,
                            breaks = c (-Inf, 2, 4, Inf),
                            right = T, 
                            labels = c ("1-2", "3-4", "=>5")
                            ))

Cruce de variables

Analisis descriptivo de las variables

#dput(names(DATA_SET))

myVars1 <- c("EDAD","Estrato_cat","EDUCACION_CAT", "SIT_MARITAL_CAT", "OCUPACIONL_CAT", "FRACCION_OCUPACIONES" , "SATISFACCION_CAT", "TIEMPO_SUPERVIVENCIA", "EDAD_DX",  "ESTADIO", "LUMINAL_A", "LUMINAL_B", "HER2_POSITIVO", "TRIPLE_NEGATIVO", "TRAT_SISTEMICO", "TRAT_RADIOTERAPIA", "TRAT_CIRUGIA", "TRAT_HORMONOTERAPIA", "RECAIDA_CANCER_MAMA", "charlson_cat", "Cuidado_soporte_UA", "Cuidado_soporte_primario") 
catVars1 <- c("Estrato_cat","EDUCACION_CAT", "SIT_MARITAL_CAT", "OCUPACIONL_CAT",  "SATISFACCION_CAT", "ESTADIO", "LUMINAL_A", "LUMINAL_B", "HER2_POSITIVO", "TRIPLE_NEGATIVO", "TRAT_SISTEMICO", "TRAT_RADIOTERAPIA", "TRAT_CIRUGIA", "TRAT_HORMONOTERAPIA", "RECAIDA_CANCER_MAMA", "charlson_cat", "Cuidado_soporte_UA", "Cuidado_soporte_primario")  

library(tableone)
tab1 <- CreateTableOne(vars = myVars1, factorVars= catVars1, data = DATA_SET, includeNA = T, test = T,addOverall = T)

table1 <- as.data.frame(print(tab1, showAllLevels= TRUE, printToggle = FALSE, noSpaces = TRUE))
rownames(table1) <- gsub("\\.{3,}", "", rownames(table1))  # Quita puntos suspensivos "..."
rownames(table1) <- gsub("\\.{1,}", "_", rownames(table1))  # Quita puntos suspensivos "..."
rownames(table1) <- gsub("\\_{1,}", " ", rownames(table1))  # Quita puntos suspensivos "..."


library(knitr)
library(kableExtra)

kable(table1, format = "html", caption = "Variable sociodemograficas") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center") %>%
  column_spec(1, bold = T, color = "white", background = "black") %>%
  column_spec(2, border_left = T, color = "white", background = "grey")
Variable sociodemograficas
level Overall
n 200
EDAD mean SD 65.12 (9.54)
Estrato cat Bajo 82 (41.0)
X Medio 111 (55.5)
X 1 Alto 7 (3.5)
EDUCACION CAT Sin educación formal 4 (2.0)
X 2 Primaria o menos 78 (39.0)
X 3 Basica/secundaria 42 (21.0)
X 4 Mas que secundaria 76 (38.0)
SIT MARITAL CAT Soltera 45 (22.5)
X 5 Casada/Union Libre 99 (49.5)
X 6 Separada/Viuda 55 (27.5)
X 7 NA 1 (0.5)
OCUPACIONL CAT Ama de casa/hogar 91 (45.5)
X 8 empleado/ocupado 48 (24.0)
X 9 Pensionado 56 (28.0)
X 10 Cesante 5 (2.5)
FRACCION OCUPACIONES mean SD 60.47 (30.57)
SATISFACCION CAT Muy satisfechas 184 (92.0)
X 11 Poco satisfechas 16 (8.0)
TIEMPO SUPERVIVENCIA mean SD 8.20 (4.84)
EDAD DX mean SD 56.92 (10.15)
ESTADIO Temprano 107 (53.5)
X 12 Localmente avanzado 77 (38.5)
X 13 Metastasico 2 (1.0)
X 14 No reporta 14 (7.0)
LUMINAL A No 89 (44.5)
X 15 Si 111 (55.5)
LUMINAL B No 168 (84.0)
X 16 Si 32 (16.0)
HER2 POSITIVO No 146 (73.0)
X 17 Si 54 (27.0)
TRIPLE NEGATIVO No 174 (87.0)
X 18 Si 26 (13.0)
TRAT SISTEMICO No 56 (28.0)
X 19 Si 144 (72.0)
TRAT RADIOTERAPIA No 35 (17.5)
X 20 Si 165 (82.5)
TRAT CIRUGIA No 2 (1.0)
X 21 Si 198 (99.0)
TRAT HORMONOTERAPIA No 43 (21.5)
X 22 Si 157 (78.5)
RECAIDA CANCER MAMA No 196 (98.0)
X 23 Si 4 (2.0)
charlson cat 1-2 10 (5.0)
X 24 3-4 92 (46.0)
X 25 =>5 98 (49.0)
Cuidado soporte UA No 154 (77.0)
X 26 Si 46 (23.0)
Cuidado soporte primario No 172 (86.0)
X 27 Si 28 (14.0)

Cruce variables

Cruzar ocupadas - ocupadas con fraccion de ocupaciones

DATA_CR <- DATA_SET %>% select(OCUPACIONL_CAT, FRACCION_OCUPACIONES)
print(stby(data = DATA_CR, INDICES = DATA_CR$OCUPACIONL_CAT, 
     FUN = descr, stats = "common", transpose = TRUE), method = 'render')

Descriptive Statistics

by OCUPACIONL_CAT

Data Frame: DATA_CR
N: 200
Mean Std.Dev Min Median Max N.Valid N Pct.Valid
Ama de casa/hogar 72.93 25.76 10.00 80.00 100.00 91 91 100.00
empleado/ocupado 39.83 26.09 5.00 30.00 100.00 48 48 100.00
Pensionado 59.20 31.87 1.00 56.50 100.00 56 56 100.00
Cesante 45.80 25.75 4.00 50.00 75.00 5 5 100.00

Generated by summarytools 1.1.4 (R version 4.4.2)
2025-08-17

# %>%
 # stview() ver en markdown 

Revisando diferencias

Se realiza una prueba de normalidad por cada categoría.

#install.packages("broom")
library(broom)
library(tidyr)
library(purrr)

tabla_shapiro <- DATA_CR %>%
  group_by(OCUPACIONL_CAT) %>%
  summarise(
    shapiro = list(shapiro.test(FRACCION_OCUPACIONES)),
    .groups = "drop"
  ) %>%
  mutate(shapiro = map(shapiro, tidy)) %>%
  unnest(shapiro)


library(gt)
tabla_shapiro %>%
  gt() %>%
  tab_header(
    title = md("**Prueba de normalidad Shapiro-Wilk por grupo**"),
    subtitle = "Resultados agrupados"
  ) %>%
  tab_style(
    style = cell_text(align = "center", weight = "bold", color = "darkblue"),
    locations = cells_column_labels(everything())
  ) %>%
  tab_style(
    style = cell_text(align = "center"),
    locations = cells_body(columns = everything())
  ) %>%
  fmt_number(
    columns = where(is.numeric),
    decimals = 4
  ) %>%
  opt_table_outline() %>%
  opt_row_striping()
Prueba de normalidad Shapiro-Wilk por grupo
Resultados agrupados
OCUPACIONL_CAT statistic p.value method
Ama de casa/hogar 0.8764 0.0000 Shapiro-Wilk normality test
empleado/ocupado 0.8972 0.0005 Shapiro-Wilk normality test
Pensionado 0.9183 0.0010 Shapiro-Wilk normality test
Cesante 0.8394 0.1634 Shapiro-Wilk normality test

Se encontró una distribución no normal, se usa la prueba Kruskall wallis

kruskal_res <- kruskal.test(
  x = DATA_CR$FRACCION_OCUPACIONES,
  g = DATA_CR$OCUPACIONL_CAT
)

# convertir a tibble
kruskal_table <- tidy(kruskal_res)


kruskal_table %>%
  gt() %>%
  tab_header(
    title = md("**Prueba de Kruskal-Wallis**"),
    subtitle = md("Comparación de medianas entre grupos")
  ) %>%
  fmt_number(
    columns = everything(),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", color = "darkblue", align = "center")
    ),
    locations = cells_column_labels(everything())
  ) %>%
  opt_table_outline() %>% 
  tab_options(
    table.border.top.color    = "black",
    table.border.bottom.color = "black",
    table.font.size = 14
  )
Prueba de Kruskal-Wallis
Comparación de medianas entre grupos
statistic p.value parameter method
37.9675 0.0000 3.0000 Kruskal-Wallis rank sum test

Hay diferencias en al menos en la distribución de una variable (hay al menos una mediana diferente). Se revisa la comparación entre pares (no parametrico)

library(FSA)

# Correr la prueba de Dunn
dunn_res <- dunnTest(FRACCION_OCUPACIONES ~ OCUPACIONL_CAT, 
                     data = DATA_CR, 
                     method = "bonferroni")

# Extraer la tabla de resultados
tabla_dunn <- dunn_res$res

# Pasar a gt para embellecer
tabla_dunn %>%
  gt() %>%
  tab_header(
    title = "Prueba de Dunn post-hoc",
    subtitle = "Ajuste por Bonferroni"
  ) %>%
  cols_label(
    Comparison = "Comparación",
    Z          = "Estadístico Z",
    P.unadj    = "p-valor sin ajuste",
    P.adj      = "p-valor ajustado"
  ) %>%
  fmt_number(
    columns = c(Z, P.unadj, P.adj),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", align = "center")
    ),
    locations = cells_column_labels(everything())
  )
Prueba de Dunn post-hoc
Ajuste por Bonferroni
Comparación Estadístico Z p-valor sin ajuste p-valor ajustado
Ama de casa/hogar - Cesante 1.9816 0.0475 0.2851
Ama de casa/hogar - empleado/ocupado 6.0429 0.0000 0.0000
Cesante - empleado/ocupado 0.3570 0.7211 1.0000
Ama de casa/hogar - Pensionado 2.5181 0.0118 0.0708
Cesante - Pensionado −1.0338 0.3012 1.0000
empleado/ocupado - Pensionado −3.3060 0.0009 0.0057

Hay diferencias entre todas las modalidades de ocupación y la fracción de ocupaciones.

Revisando intersecciones cuidado de soporte

library(limma)
library(UpSetR)
BASE_UPSET <- DATA_SET %>% select(ATENCION_UA_CUIDADO_PALIATIVO, ATENCION_UA_PSICOONCOLOGIA, ATENCION_UA_NUTRICION, ATENCION_UA_PROGRAMA_LINFEDEMA, ATENCION_UA_PROGRAMA_FATIGA_REH_ONCOLOGICA, ATENCION_UA_SEXOLOGIA_GINECOLOGIA_FUNCIONAL, ATENCION_UA_NEUROLOGIA, ATENCION_UA_TRABAJO_SOCIAL, ATENCION_UA_MEDICINA_OCUPACIONAL, ATENCION_UA_MEDICINA_CIRUGIA_PLASTICA_RECONSTRUCTIVA, ATENCION_UA_MEDICINA_FISIATRIA_MEDICINA_FISICA)

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_CUIDADO_PALIATIVO"] <- "Cuidado paliativo"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_PSICOONCOLOGIA"] <- "Psicooncologia"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_NUTRICION"] <- "Nutricion"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_PROGRAMA_LINFEDEMA"] <- "Linfedema"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_PROGRAMA_FATIGA_REH_ONCOLOGICA"] <- "Programa Fatiga"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_SEXOLOGIA_GINECOLOGIA_FUNCIONAL"] <- "Sexologia Gineco"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_NEUROLOGIA"] <- "Neurologia"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_TRABAJO_SOCIAL"] <- "Trabajo social"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_MEDICINA_OCUPACIONAL"] <- "Medicina Ocupacional"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_MEDICINA_CIRUGIA_PLASTICA_RECONSTRUCTIVA"] <- "Cirugia plastica"

colnames(BASE_UPSET)[colnames(BASE_UPSET) =="ATENCION_UA_MEDICINA_FISIATRIA_MEDICINA_FISICA"] <- "Medicina fisica"


BASE_UPSET <- BASE_UPSET %>%
  mutate(across(c(1:11), ~ recode(., "Checked" = 1, "Unchecked" = 0)))

BASE_UPSET<- as.data.frame(BASE_UPSET)

upset(BASE_UPSET, order.by = "freq", nsets = 11, nintersects = 20,
      matrix.color = "#2F4F4F",  main.bar.color = "darkblue", sets.bar.color = "#5F9EA0", mainbar.y.label = "Intersecciones Cuidado soporte",  sets.x.label = "Atencion especialidad", point.size = 2, line.size = 0.5, number.angles = 0, group.by = "degree", shade.alpha = 1, matrix.dot.alpha = 1, scale.intersections = "identity", scale.sets = "identity", text.scale = 1, set_size.show = T)

Perfiles latentes

  • Se ajusta la base de datos (variables continuas)
  • Se busca identificar potenciales pacientes “descuidados”
  • Se imputan los datos (vecino más cercano)
  • Se estiman los perfiles con todos los datos y sin todos los datos
#Ajuste base datos

library(dplyr)
DATA_PROFILES <- DATA_SET %>% mutate(across(c(14:41, 44:66), ~ recode(., "Un poco" = 1, "Para nada" = 2, "Mucho"  = 3, "Bastante" = 4))) # no se incluyeron las columnas 44 y 45 debido que son items que van de 1 a 7

# se invirtieron los items 44, 45 y 46. 
DATA_PROFILES<- DATA_PROFILES %>%
  mutate(across(57:59, ~ recode(., `1` = 4,`2` = 3, `3` = 2, `4` = 1)))

Revisando posibles pacientes “descuidados”

#install.packages("careless")
library(careless)
library(psych)


DATA_PROFILES_careless <- DATA_PROFILES %>%
  select(1, 14:43) %>%  # Solo columna 1 (ID) + columnas 14:66 (variables QLQ)
  mutate(string = longstring(select(., -1))) %>%      # Calcular longstring excluyendo ID
  mutate(md = outlier(select(., -1), plot = FALSE))   # Calcular MD excluyendo ID

library(knitr)
library(kableExtra)

# Crear tabla de frecuencias
tabla_freq <- freq(DATA_PROFILES_careless$string, report.nas = TRUE)
tabla_df <- as.data.frame(tabla_freq)
tabla_df <- tabla_df[order(tabla_df$Freq, decreasing = FALSE), ]

# Mostrar tabla con bordes
kable(tabla_df, caption = "Frecuencia de la variable", align = "c") %>%
  kable_styling(
    bootstrap_options = c("striped", "bordered", "hover", "condensed"),
    full_width = FALSE,
    position = "center"
  )
Frecuencia de la variable
Freq % Valid % Valid Cum. % Total % Total Cum.
<NA> 0 NA NA 0.0 100.0
2 1 0.5 0.5 0.5 0.5
22 1 0.5 96.5 0.5 96.5
27 1 0.5 98.0 0.5 98.0
15 2 1.0 86.0 1.0 86.0
16 2 1.0 87.0 1.0 87.0
20 2 1.0 95.0 1.0 95.0
21 2 1.0 96.0 1.0 96.0
24 2 1.0 97.5 1.0 97.5
11 3 1.5 80.5 1.5 80.5
14 3 1.5 85.0 1.5 85.0
17 4 2.0 89.0 2.0 89.0
18 4 2.0 91.0 2.0 91.0
28 4 2.0 100.0 2.0 100.0
12 6 3.0 83.5 3.0 83.5
19 6 3.0 94.0 3.0 94.0
3 7 3.5 4.0 3.5 4.0
4 12 6.0 10.0 6.0 10.0
6 17 8.5 32.5 8.5 32.5
8 17 8.5 54.5 8.5 54.5
10 19 9.5 79.0 9.5 79.0
7 27 13.5 46.0 13.5 46.0
5 28 14.0 24.0 14.0 24.0
9 30 15.0 69.5 15.0 69.5
Total 200 100.0 100.0 100.0 100.0

Se identifican los ID

indices_problematicos <- which(DATA_PROFILES_careless$string >= 27)
print(paste("Números de fila con longstring >= 27:", paste(indices_problematicos, collapse = ", ")))
## [1] "Números de fila con longstring >= 27: 16, 25, 104, 105, 199"
tabla_df2<- DATA_PROFILES_careless[c(16,25,104,105,199),]


kable(tabla_df2, caption = "Casos con patron ´descuidado´", align = "c") %>%
  kable_styling(
    bootstrap_options = c("striped", "bordered", "hover", "condensed"),
    full_width = FALSE,
    position = "center"
  )
Casos con patron ´descuidado´
ID qlq_c30_1 qlq_c30_2 qlq_c30_3 qlq_c30_4 qlq_c30_5 qlq_c30_6 qlq_c30_7 qlq_c30_8 qlq_c30_9 qlq_c30_10 qlq_c30_11 qlq_c30_12 qlq_c30_13 qlq_c30_14 qlq_c30_15 qlq_c30_16 qlq_c30_17 qlq_c30_18 qlq_c30_19 qlq_c30_20 qlq_c30_21 qlq_c30_22 qlq_c30_23 qlq_c30_24 qlq_c30_25 qlq_c30_26 qlq_c30_27 qlq_c30_28 qlq_c30_29 qlq_c30_30 string md
16 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 7 7 28 13.07523
25 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 7 7 28 13.07523
104 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 7 7 28 13.07523
105 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 7 7 28 13.07523
199 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 6 7 27 27.53364

Se identificaron los ID 16, 25, 104, 105 y 199. Se mantuvieron en el analisis

#se limpio la base de datos eliminando esos 5 datos. 

#DATA_PROFILES_clean <- DATA_PROFILES %>%
# filter(!ID %in% c(16, 25, 104, 105, 199))  

Imputando las observaciones faltantes

Se utilizo el metodo del vecino mas cercano: K-nearest neighbors (KNN). Utiliza observaciones similares (vecinos) en múltiples variables para predecir un valor razonable para el dato faltante. No asume una distribución especifica y puede asignar valores diferentes a distintos casos faltantes, lo cual refleja mejor la dispersión natural de los datos.

Solo se uso para el item 1

#install.packages("VIM")
library(VIM)

DATA_PROFILES_imp <- kNN(DATA_PROFILES, 
                           variable = c("qlq_c30_1"), 
                           k = 5, 
                           imp_var = FALSE)

Generacion de totales

########################## BASE IMPUTADA #############################


DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(PF = qlq_c30_1 + qlq_c30_2 + qlq_c30_3 + qlq_c30_4 + qlq_c30_5) #Physical functioning 1

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(RF = qlq_c30_6 + qlq_c30_7) # Role functioning 2 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(CF = qlq_c30_20 + qlq_c30_25) #Cognitive functioning 3 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(EF = qlq_c30_21 + qlq_c30_22 + qlq_c30_23 + qlq_c30_24) #Emotional functioning 4 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(SF = qlq_c30_26 + qlq_c30_27)#Social functioning 5

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(QL2 = qlq_c30_29 + qlq_c30_30) #Global health status / QoL 6 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(FA = qlq_c30_10 + qlq_c30_12 + qlq_c30_18) #Fatigue 7 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(NV = qlq_c30_14 + qlq_c30_15) #Nausea and vomiting 8 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(PA = qlq_c30_9 + qlq_c30_19) #Pain 9 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(DY = qlq_c30_8) #Dyspnoea 10 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(SL = qlq_c30_11) #Insomnia 11 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(AP = qlq_c30_13) #Appetite loss 12 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(CO = qlq_c30_16) #Constipation 13 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(DI = qlq_c30_17) #Diarrhoea 14

#################### Escalas suplementarias ############################

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(FI = qlq_c30_28) #Financial difficulties 15 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(BRST = qlq_c30_31 + qlq_c30_32 + qlq_c30_33 + qlq_c30_34 + qlq_c30_36 + qlq_c30_37 + qlq_c30_38) #Systemic therapy side effects 16 

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(BRHL = qlq_c30_35) #Upset by hair loss 17

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(BRBI = qlq_c30_39 + qlq_c30_40 + qlq_c30_41 + qlq_c30_42) #Body image 18

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(BRFU = qlq_c30_43) #Future perspective 19

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(BRSEF = qlq_c30_44 + qlq_c30_45) #Sexual functionning 20

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(BRSEE = qlq_c30_46) #Sexual enjoyment 21

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(BRAS = qlq_c30_47 + qlq_c30_48 + qlq_c30_49) #Arm symptoms 22

DATA_PROFILES_imp <- DATA_PROFILES_imp %>% 
  mutate(BRBS = qlq_c30_50 + qlq_c30_51 + qlq_c30_52 + qlq_c30_53) #Breast symptoms 23

Perfiles con totales

#latent profile analysis 
colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_1"] <- "QL-1"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_2"] <- "QL-2"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_3"] <- "QL-3"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_4"] <- "QL-4"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_5"] <- "QL-5"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_6"] <- "QL-6"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_7"] <- "QL-7"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_8"] <- "QL-8"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_9"] <- "QL-9"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_10"] <- "QL-10"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_11"] <- "QL-11"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_12"] <- "QL-12"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_13"] <- "QL-13"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_14"] <- "QL-14"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_15"] <- "QL-15"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_16"] <- "QL-16"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_17"] <- "QL-17"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_18"] <- "QL-18"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_19"] <- "QL-19"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_20"] <- "QL-20"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_21"] <- "QL-21"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_22"] <- "QL-22"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_23"] <- "QL-23"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_24"] <- "QL-24"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_25"] <- "QL-25"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_26"] <- "QL-26"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_27"] <- "QL-27"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_28"] <- "QL-28"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_29"] <- "QL-29"

colnames(DATA_PROFILES_imp)[colnames(DATA_PROFILES_imp) =="qlq_c30_30"] <- "QL-30"

library(mclust)
DATA_PROFILES_ANALYSIS <- DATA_PROFILES_imp %>% 
  select(ID, PF, RF, CF, EF, SF, QL2, FA, NV, PA, DY, SL, AP, CO, DI )


mc1 <-mclustBIC(DATA_PROFILES_imp[,14:43]) #create the model items
mc2 <-mclustBIC(DATA_PROFILES_ANALYSIS[,-1]) #create the model totals

summary(mc1) # 2 - 4 GRUPOS CON ITEMS
## Best BIC values:
##             VEI,4      VEI,3      VEI,2
## BIC      -8954.61 -9238.8569 -10202.132
## BIC diff     0.00  -284.2465  -1247.522
summary(mc2) # 7 - 9 GRUPOS CON ITEMS
## Best BIC values:
##             VEI,9      VEI,8      VEI,7
## BIC      -6226.15 -6454.1912 -6492.3054
## BIC diff     0.00  -228.0413  -266.1555

Cuando se analizan items se podrian encontrar entre 2 a 4 perfiles

Cuando se analizan totales se podrian encontrar entre 7 a 9 perfiles

Hay menos grupos cuando se analizan items que los totales

#install.packages("factoextra")
library(factoextra) #plotting the best model
mc1.1 <-Mclust(DATA_PROFILES_imp[,14:43])
fviz_mclust_bic(mc1.1)

Se encontrarian una mejor solucion en 4 perfiles

Se utiliza el modelo de analisis por items

model1 <- Mclust(DATA_PROFILES_imp[,14:43], modelNames = "VEI", G = 4)
summary(model1)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VEI (diagonal, equal shape) model with 4 components: 
## 
##  log-likelihood   n  df      BIC       ICL
##       -4064.036 200 156 -8954.61 -8957.928
## 
## Clustering table:
##  1  2  3  4 
## 28 79 29 64

Graficando los perfiles

Se obtienen primero las probabilidades de pertencia

library(tibble)
library(tidyr)

means <- data.frame(model1$parameters$mean) %>%
  rownames_to_column() %>%
  rename(DATA_PROFILES_imp = rowname) %>%
  pivot_longer(cols = c(X1, X2, X3, X4), names_to = "Profile", values_to = "Mean") %>%
  mutate(
    Mean = round(Mean, 2),
    # Estandarización Z (normalización usando media y desviación estándar)
    Mean = (Mean - mean(Mean)) / sd(Mean)
  )


p <- means %>%
  mutate(Profile = recode(Profile, 
                          X1 = "P1",
                          X2 = "P2", 
                          X3 = "P3", 
                          X4 = "P4")) %>%
  ggplot(aes(DATA_PROFILES_imp, Mean, group = Profile, color = Profile)) +
  geom_point(size = 2.25) +
  geom_line(size = 1.25) +
  scale_x_discrete(limits = c("QL-1", "QL-2", "QL-3", "QL-4", "QL-5", "QL-6", "QL-7", "QL-8", "QL-9", "QL-10", "QL-11", "QL-12", "QL-13", "QL-14", "QL-15", "QL-16", "QL-17", "QL-18", "QL-19","QL-20", "QL-21", "QL-22", "QL-23", "QL-24", "QL-25", "QL-26", "QL-27", "QL-28", "QL-29", "QL-30" )) +
  labs(x = "Items calidad vida", y = "Media estandarizada", title = "Perfiles Calidad de vida") +
  theme_bw(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, size = 10, face = "bold"),   # Letras eje X más grandes y en negrita
    axis.text.y = element_text(size = 14, face = "bold"),                         # Letras eje Y más grandes y en negrita
    axis.title.x = element_text(size = 16, face = "bold"),                       # Aumentar tamaño de la etiqueta del eje X
    axis.title.y = element_text(size = 16, face = "bold"),                       # Aumentar tamaño de la etiqueta del eje Y
    legend.position = "right", 
    legend.text = element_text(size = 10, face = "bold"),                        # Aumentar el tamaño de la leyenda y ponerla en negrita
    legend.title = element_text(size = 16, face = "bold")                       # Aumentar tamaño del título de la leyenda
  )
p

Porbablemente los perfiles se puedan agrupar de acuerdo con el “Global health status”.