df <- read_csv2("trauma_iceci.csv")
¿Cuántos NA hay?
dim(df) # dimensiones del df
[1] 1261 44
(sum(is.na(df))/(ncol(df) * nrow(df)))*100 # que porcentaje de los datos son NA?
[1] 24
sapply(df, function(x) sum(is.na(x)))
id sexo edtrauma lesion cau_priCILCE
0 0 0 31 0
cau_secCILCEa cau_secCILCEb cau_terCILCEa cau_terCILCEb lugar_CILCE
0 0 0 0 0
les_glendor historia pac_hc pac_hc2 d_afect
0 0 524 0 524
n_les numdte fenac horaexa fucontrol
0 0 0 172 29
dgderiv enfsist traupre fetrac hora
1163 5 0 29 235
tatenc lugar causa cmoocurri feex
0 0 0 123 0
tejido diente d gr_ed_tr nnn
0 0 0 0 0
tbu tltb lltb ad tipo_seg
1132 1132 1132 1157 1157
ed_cons dtaad tr_rep cual_ad
1193 1157 1258 1160
hay 31 lesion, los debo eliminar
df.clean <- df[complete.cases(df$lesion),] # nuevo df sin NA en lesion
sapply(df.clean, function(x) sum(is.na(x))) #verifico
id sexo edtrauma lesion cau_priCILCE
0 0 0 0 0
cau_secCILCEa cau_secCILCEb cau_terCILCEa cau_terCILCEb lugar_CILCE
0 0 0 0 0
les_glendor historia pac_hc pac_hc2 d_afect
0 0 510 0 510
n_les numdte fenac horaexa fucontrol
0 0 0 155 17
dgderiv enfsist traupre fetrac hora
1139 5 0 17 209
tatenc lugar causa cmoocurri feex
0 0 0 107 0
tejido diente d gr_ed_tr nnn
0 0 0 0 0
tbu tltb lltb ad tipo_seg
1101 1101 1101 1157 1157
ed_cons dtaad tr_rep cual_ad
1193 1157 1230 1160
dim(df.clean) #veo cuantos son
[1] 1230 44
df.clean <- as.data.frame(unclass(df.clean)) #convierto chr to fc
Hay les_glendor No reportado = 0 y lesion FCRNC = 7. Los voy a eliminar
df.clean <- df.clean %>%
filter(les_glendor != "No reportado") %>%
filter(lesion != "FCRNC") %>%
filter(lesion != "Infraccion") %>%
droplevels()
Remuevo el df original, ya no es necesario
rm(df)
options(digits=3)
df.clean %>%
group_by(sexo) %>%
summarise("Promedio edad trauma" = mean(edtrauma), SD = sd(edtrauma))
df.clean %>%
ggplot(aes(x = sexo, y = edtrauma)) +
geom_boxplot() +
ggtitle("Edad por sexo")
df.clean %>%
ggplot(aes(x = lesion)) +
geom_bar() +
coord_flip() +
ggtitle("Tipo de lesión") + xlab("Lesión") + ylab("Frecuencia")
df.clean %>%
ggplot(aes(x = les_glendor)) +
geom_bar() +
coord_flip() +
ggtitle("Tipo de lesión - Glendor") + xlab("Lesión") + ylab("Frecuencia")
df.clean %>%
ggplot(aes(x = cau_priCILCE)) +
geom_bar() +
coord_flip() +
ggtitle("Causa primaria CILCE") + xlab("Causa") + ylab("Frecuencia")
df.clean %>%
ggplot(aes(x = cau_secCILCEa)) +
geom_bar() +
coord_flip()+
ggtitle("Causa secundaria CILCE") + xlab("Causa") + ylab("Frecuencia")
df.clean %>%
ggplot(aes(x = cau_terCILCEa)) +
geom_bar() +
coord_flip() +
ggtitle("Causa terciaria CILCE") + xlab("Causa") + ylab("Frecuencia")
df.clean %>%
ggplot(aes(x = cau_terCILCEb)) +
geom_bar() +
coord_flip() +
ggtitle("Causa terciaria CILCE") + xlab("Causa") + ylab("Frecuencia")
df.clean %>%
ggplot(aes(x = d_afect)) +
geom_histogram(binwidth = 1) +
ggtitle("Dientes afectado") + xlab("Dientes") + ylab("Frecuencia")
addmargins(table(df.clean$lesion, df.clean$sexo))
Femenino Masculino Sum
Avulsion 50 46 96
Concusion 27 46 73
Extrusion 13 21 34
FCC 23 23 46
FCNC 62 60 122
FCRC 11 15 26
Fract radicular 30 51 81
Intrusion 62 85 147
Luxacion lateral 67 90 157
Sub luxacion 128 181 309
Tej blando 53 76 129
Sum 526 694 1220
No hay diferencias de lesión por sexo
mosaicplot(table(df.clean$lesion, df.clean$sexo), shade = T, main = "Lesión por sexo",
ylab = "Sexo", xlab = "Lesión")
chisq.test(table(df.clean$lesion, df.clean$sexo))
Pearson's Chi-squared test
data: table(df.clean$lesion, df.clean$sexo)
X-squared = 10, df = 10, p-value = 0.4
Se ocupan
les_glendor porque solo hay cinco cau_terCILCEa: 11 lugar_CILCE: 12
veo si se puede hacer algo con todo…
ramas <- df.clean$lesion ~
df.clean$cau_priCILCE +
df.clean$cau_secCILCEa +
df.clean$cau_secCILCEb +
df.clean$cau_terCILCEa +
df.clean$cau_terCILCEb +
df.clean$lugar_CILCE
arbol <- ctree(ramas)
plot(arbol, type = "simple")
No se puede hacer nada
arbol
Model formula:
df.clean$lesion ~ df.clean$cau_priCILCE + df.clean$cau_secCILCEa +
df.clean$cau_secCILCEb + df.clean$cau_terCILCEa + df.clean$cau_terCILCEb +
df.clean$lugar_CILCE
Fitted party:
[1] root
| [2] df.clean$cau_terCILCEa in Caida de un nivel a otro, Colision, Golpe con o contra objeto, Golpe por objeto arrojado, No reportado, Traccion
| | [3] df.clean$cau_terCILCEb in Choque, Colision, Empujon, Golpe con o contra objeto, No recuerda, No reportado, Perdida del equilibrio, Resbalon, Tropezon
| | | [4] df.clean$cau_priCILCE in Accidente de transito, Deporte individual, Primeros pasos
| | | | [5] df.clean$cau_secCILCEb in Construccion, Muebles, No reportado, Utensilio o recipiente, Vehiculo terrestre
| | | | | [6] df.clean$cau_secCILCEa in Construccion, Muebles, Producto infantil, Utensilio o recipiente, Vehiculo terrestre
| | | | | | [7] df.clean$lugar_CILCE in Via publica: Intrusion (n = 8, err = 62%)
| | | | | | [8] df.clean$lugar_CILCE in area deportiva, No reportado, Vivienda
| | | | | | | [9] df.clean$cau_secCILCEa in Producto infantil: Sub luxacion (n = 27, err = 78%)
| | | | | | | [10] df.clean$cau_secCILCEa in Construccion, Muebles, Utensilio o recipiente: Intrusion (n = 8, err = 62%)
| | | | | [11] df.clean$cau_secCILCEa in Animal, planta o persona, Equipo deportivo/act. recreativa, No reportado: Intrusion (n = 7, err = 57%)
| | | | [12] df.clean$cau_secCILCEb in Producto infantil: Intrusion (n = 7, err = 0%)
| | | [13] df.clean$cau_priCILCE in Accidente en bicicleta, Accidente en bicicleta pasajero, Actividad de viajar, Actividad vital, Causa no especificada, Educacion, No reportado, Otros, Tiempo libre o juego
| | | | [14] df.clean$cau_terCILCEb in Choque, Colision, No recuerda, No reportado, Perdida del equilibrio, Resbalon, Tropezon
| | | | | [15] df.clean$cau_priCILCE in Accidente en bicicleta, Actividad de viajar, Actividad vital, Causa no especificada, Educacion, No reportado, Tiempo libre o juego
| | | | | | [16] df.clean$cau_priCILCE in Accidente en bicicleta
| | | | | | | [17] df.clean$lugar_CILCE in Campo o zona rural, Vivienda: Intrusion (n = 10, err = 80%)
| | | | | | | [18] df.clean$lugar_CILCE in No reportado, Via publica: Luxacion lateral (n = 15, err = 53%)
| | | | | | [19] df.clean$cau_priCILCE in Actividad de viajar, Actividad vital, Causa no especificada, Educacion, No reportado, Tiempo libre o juego
| | | | | | | [20] df.clean$cau_priCILCE in Educacion, No reportado, Tiempo libre o juego
| | | | | | | | [21] df.clean$cau_secCILCEb in Material no clasificado en otra parte: Fract radicular (n = 12, err = 50%)
| | | | | | | | [22] df.clean$cau_secCILCEb in Animal, planta o persona, Artefacto o enseres usados en el hogar, Construccion, Equipo deportivo/act. recreativa, Muebles, No reportado, Producto infantil, Utensilio o recipiente, Vehiculo terrestre
| | | | | | | | | [23] df.clean$cau_terCILCEa in Caida de un nivel a otro, Colision, Golpe con o contra objeto: Sub luxacion (n = 250, err = 75%)
| | | | | | | | | [24] df.clean$cau_terCILCEa in No reportado: Sub luxacion (n = 64, err = 75%)
| | | | | | | [25] df.clean$cau_priCILCE in Actividad de viajar, Actividad vital, Causa no especificada
| | | | | | | | [26] df.clean$lugar_CILCE in area comercial, Vivienda: Intrusion (n = 109, err = 79%)
| | | | | | | | [27] df.clean$lugar_CILCE in No reportado, Via publica: FCNC (n = 9, err = 67%)
| | | | | [28] df.clean$cau_priCILCE in Accidente en bicicleta pasajero, Otros: Avulsion (n = 7, err = 43%)
| | | | [29] df.clean$cau_terCILCEb in Empujon, Golpe con o contra objeto: Sub luxacion (n = 94, err = 70%)
| | [30] df.clean$cau_terCILCEb in Golpe por objeto arrojado, Traccion: Avulsion (n = 17, err = 41%)
| [31] df.clean$cau_terCILCEa in Caida en el mismo nivel, Choque, Golpe de o con persona, Morder, No recuerda
| | [32] df.clean$cau_priCILCE in Accidente de transito, Accidente en bicicleta, Actividad vital, Causa no especificada, Deporte individual, Educacion, No recuerda, No reportado, Otros, Pelea, Tiempo libre o juego
| | | [33] df.clean$cau_priCILCE in Accidente de transito, Actividad vital, Causa no especificada, Deporte individual, Educacion, No reportado, Otros, Tiempo libre o juego
| | | | [34] df.clean$lugar_CILCE in area comercial, area de atencion medica, area deportiva, area escolar, area publica, area recreativa, Via publica, Vivienda
| | | | | [35] df.clean$lugar_CILCE in area comercial, area de atencion medica, Vivienda
| | | | | | [36] df.clean$cau_secCILCEa in Alimentos o bebidas, Animal, planta o persona, Artefacto o enseres usados en el hogar, Causa no especificada, Construccion, Equipo deportivo/act. recreativa, Muebles, No reportado, Producto infantil: Sub luxacion (n = 311, err = 75%)
| | | | | | [37] df.clean$cau_secCILCEa in Uso personal: Avulsion (n = 8, err = 62%)
| | | | | [38] df.clean$lugar_CILCE in area deportiva, area escolar, area publica, area recreativa, Via publica
| | | | | | [39] df.clean$lugar_CILCE in area deportiva, area recreativa: Sub luxacion (n = 7, err = 57%)
| | | | | | [40] df.clean$lugar_CILCE in area escolar, area publica, Via publica: Sub luxacion (n = 205, err = 60%)
| | | | [41] df.clean$lugar_CILCE in No recuerda, No reportado
| | | | | [42] df.clean$cau_terCILCEb in No recuerda, No reportado, Tropezon: FCRC (n = 22, err = 77%)
| | | | | [43] df.clean$cau_terCILCEb in Perdida del equilibrio, Resbalon: Sub luxacion (n = 7, err = 29%)
| | | [44] df.clean$cau_priCILCE in Accidente en bicicleta, No recuerda, Pelea: Avulsion (n = 7, err = 71%)
| | [45] df.clean$cau_priCILCE in Primeros pasos: FCNC (n = 9, err = 44%)
Number of inner nodes: 22
Number of terminal nodes: 23
Asà que hay que simplificar
ramas <- df.clean$les_glendor ~
df.clean$sexo +
df.clean$cau_terCILCEa +
df.clean$lugar_CILCE
arbol <- ctree(ramas)
plot(arbol, type = "simple")
arbol
Model formula:
df.clean$les_glendor ~ df.clean$sexo + df.clean$cau_terCILCEa +
df.clean$lugar_CILCE
Fitted party:
[1] root
| [2] df.clean$cau_terCILCEa in Caida de un nivel a otro, Choque, Golpe con o contra objeto, Golpe por objeto arrojado, No reportado
| | [3] df.clean$lugar_CILCE in area comercial, area de atencion medica, Campo o zona rural, Via publica, Vivienda: Lux. con dislocacion (n = 452, err = 50%)
| | [4] df.clean$lugar_CILCE in area deportiva, area escolar, area recreativa, No reportado: Lux. sin dislocacion (n = 184, err = 66%)
| [5] df.clean$cau_terCILCEa in Caida en el mismo nivel, Colision, Golpe de o con persona, Morder, No recuerda, Traccion
| | [6] df.clean$lugar_CILCE in area comercial, area de atencion medica, area recreativa, Vivienda: Lux. con dislocacion (n = 341, err = 67%)
| | [7] df.clean$lugar_CILCE in area de transporte, area deportiva, area escolar, area publica, No recuerda, No reportado, Via publica: Lux. sin dislocacion (n = 243, err = 53%)
Number of inner nodes: 3
Number of terminal nodes: 4
arbol.c50 <- C5.0(les_glendor ~ sexo +
cau_priCILCE +
cau_terCILCEb +
lugar_CILCE,
data = df.clean,
rules = FALSE)
arbol.c50
Call:
C5.0.formula(formula = les_glendor ~ sexo + cau_priCILCE + cau_terCILCEb
+ lugar_CILCE, data = df.clean, rules = FALSE)
Classification Tree
Number of samples: 1220
Number of predictors: 4
Tree size: 16
Non-standard options: attempt to group attributes
summary(arbol.c50)
Call:
C5.0.formula(formula = les_glendor ~ sexo + cau_priCILCE + cau_terCILCEb
+ lugar_CILCE, data = df.clean, rules = FALSE)
C5.0 [Release 2.07 GPL Edition] Sat Apr 15 15:53:33 2017
-------------------------------
Class specified by attribute `outcome'
Read 1220 cases (5 attributes) from undefined.data
Decision tree:
lugar_CILCE in {area comercial,area de atencion medica,Campo o zona rural,
: Vivienda}: Lux. con dislocacion (732/424)
lugar_CILCE in {area de transporte,area escolar,area publica,
: area recreativa}: Lux. sin dislocacion (258/147)
lugar_CILCE = area deportiva: Fract. sin expos. pulpar (5/2)
lugar_CILCE = No recuerda: Fract. con expos. pulpar (2/1)
lugar_CILCE = No reportado:
:...cau_terCILCEb in {Choque,Colision,Empujon,Golpe con o contra objeto,
: : Golpe de o con persona,Morder,No recuerda,
: : Perdida del equilibrio,Resbalon,
: : Traccion}: Lux. sin dislocacion (25/9)
: cau_terCILCEb = Golpe por objeto arrojado: Lux. con dislocacion (1)
: cau_terCILCEb = Tropezon: Fract. con expos. pulpar (3/1)
: cau_terCILCEb = No reportado:
: :...sexo = Femenino: Lux. con dislocacion (34/17)
: sexo = Masculino: Lux. sin dislocacion (38/23)
lugar_CILCE = Via publica: [S1]
SubTree [S1]
cau_priCILCE in {Accidente en bicicleta,Actividad vital,Deporte individual,
: Primeros pasos}: Lux. con dislocacion (47/26)
cau_priCILCE = Actividad de viajar: Fract. con expos. pulpar (2)
cau_priCILCE in {Causa no especificada,Educacion,No recuerda,No reportado,
: Otros,Pelea,Tiempo libre o juego,
: Uso inapropiado de los dientes}: Lux. sin dislocacion (58/27)
cau_priCILCE = Accidente de transito:
:...sexo = Femenino: Lux. con dislocacion (4)
: sexo = Masculino: Lux. sin dislocacion (7/2)
cau_priCILCE = Accidente en bicicleta pasajero: [S2]
SubTree [S2]
cau_terCILCEb in {Choque,Colision,Empujon,Golpe con o contra objeto,
: Golpe de o con persona,Golpe por objeto arrojado,Morder,
: No recuerda,No reportado,Perdida del equilibrio,Resbalon,
: Traccion}: Fract. sin expos. pulpar (2)
cau_terCILCEb = Tropezon: Lux. con dislocacion (2)
Evaluation on training data (1220 cases):
Decision Tree
----------------
Size Errors
16 679(55.7%) <<
(a) (b) (c) (d) (e) <-classified as
---- ---- ---- ---- ----
5 96 52 (a): class Fract. con expos. pulpar
5 90 27 (b): class Fract. sin expos. pulpar
1 353 80 (c): class Lux. con dislocacion
1 2 201 178 (d): class Lux. sin dislocacion
80 49 (e): class Tejido blando
Attribute usage:
100.00% lugar_CILCE
10.00% cau_priCILCE
8.61% cau_terCILCEb
6.80% sexo
Time: 0.0 secs
C5imp(arbol.c50,metric='usage')
Voy a clasificar la
fit <- rpart(les_glendor ~ sexo +
cau_priCILCE +
cau_terCILCEb +
lugar_CILCE,
data = df.clean,
method = "class",
control = rpart.control(minsplit=30, cp=0.001))
printcp(fit)
Classification tree:
rpart(formula = les_glendor ~ sexo + cau_priCILCE + cau_terCILCEb +
lugar_CILCE, data = df.clean, method = "class", control = rpart.control(minsplit = 30,
cp = 0.001))
Variables actually used in tree construction:
[1] cau_priCILCE cau_terCILCEb lugar_CILCE sexo
Root node error: 786/1220 = 0.6
n= 1220
CP nsplit rel error xerror xstd
1 0.080 0 1.0 1.0 0.02
2 0.029 1 0.9 1.0 0.02
3 0.010 2 0.9 0.9 0.02
4 0.006 3 0.9 0.9 0.02
5 0.002 4 0.9 0.9 0.02
6 0.002 19 0.8 0.9 0.02
7 0.001 23 0.8 0.9 0.02
plotcp(fit)
summary(fit)
Call:
rpart(formula = les_glendor ~ sexo + cau_priCILCE + cau_terCILCEb +
lugar_CILCE, data = df.clean, method = "class", control = rpart.control(minsplit = 30,
cp = 0.001))
n= 1220
CP nsplit rel error xerror xstd
1 0.08015 0 1.000 1.000 0.0213
2 0.02926 1 0.920 0.952 0.0216
3 0.01018 2 0.891 0.921 0.0218
4 0.00636 3 0.880 0.908 0.0219
5 0.00212 4 0.874 0.910 0.0219
6 0.00191 19 0.838 0.919 0.0218
7 0.00100 23 0.831 0.925 0.0218
Variable importance
cau_priCILCE cau_terCILCEb lugar_CILCE sexo
35 32 26 7
Node number 1: 1220 observations, complexity param=0.0802
predicted class=Lux. con dislocacion expected loss=0.644 P(node) =1
class counts: 153 122 434 382 129
probabilities: 0.125 0.100 0.356 0.313 0.106
left son=2 (544 obs) right son=3 (676 obs)
Primary splits:
cau_priCILCE splits as LLLRLRLRRLLRLRL, improve=14.700, (0 missing)
lugar_CILCE splits as LRRRRRRLRRRL, improve=14.400, (0 missing)
cau_terCILCEb splits as LRLRRLRRLLLLL, improve= 7.250, (0 missing)
sexo splits as LR, improve= 0.725, (0 missing)
Surrogate splits:
lugar_CILCE splits as LLRRRRRLRLLL, agree=0.658, adj=0.233, (0 split)
cau_terCILCEb splits as RRRRRLLRRLRLR, agree=0.567, adj=0.029, (0 split)
Node number 2: 544 observations, complexity param=0.00212
predicted class=Lux. con dislocacion expected loss=0.566 P(node) =0.446
class counts: 68 67 236 121 52
probabilities: 0.125 0.123 0.434 0.222 0.096
left son=4 (484 obs) right son=5 (60 obs)
Primary splits:
cau_priCILCE splits as RRL-L-R--LR-L-R, improve=3.19, (0 missing)
lugar_CILCE splits as RR-RR--R-LRR, improve=3.07, (0 missing)
cau_terCILCEb splits as LRLL-RL-LLLLL, improve=2.35, (0 missing)
sexo splits as LR, improve=1.09, (0 missing)
Surrogate splits:
cau_terCILCEb splits as LRLL-LL-LLLLL, agree=0.895, adj=0.050, (0 split)
lugar_CILCE splits as LL-LL--R-LLL, agree=0.893, adj=0.033, (0 split)
Node number 3: 676 observations, complexity param=0.0293
predicted class=Lux. sin dislocacion expected loss=0.614 P(node) =0.554
class counts: 85 55 198 261 77
probabilities: 0.126 0.081 0.293 0.386 0.114
left son=6 (326 obs) right son=7 (350 obs)
Primary splits:
lugar_CILCE splits as RRRRRRR-RRRL, improve=8.780, (0 missing)
cau_terCILCEb splits as LLLLRR-LLRLRL, improve=4.480, (0 missing)
cau_priCILCE splits as ---R-L-RR--L-L-, improve=2.710, (0 missing)
sexo splits as LR, improve=0.181, (0 missing)
Surrogate splits:
cau_priCILCE splits as ---R-L-RL--L-L-, agree=0.818, adj=0.623, (0 split)
cau_terCILCEb splits as RRRRLR-LRLRLR, agree=0.575, adj=0.120, (0 split)
Node number 4: 484 observations, complexity param=0.00212
predicted class=Lux. con dislocacion expected loss=0.583 P(node) =0.397
class counts: 66 67 202 103 46
probabilities: 0.136 0.138 0.417 0.213 0.095
left son=8 (130 obs) right son=9 (354 obs)
Primary splits:
lugar_CILCE splits as LR-LR----LLR, improve=3.250, (0 missing)
cau_terCILCEb splits as LRLL-RL-LLLLL, improve=2.990, (0 missing)
cau_priCILCE splits as --R-R----L--R--, improve=1.520, (0 missing)
sexo splits as LR, improve=0.453, (0 missing)
Surrogate splits:
cau_terCILCEb splits as RRRR-RR-LRRRR, agree=0.758, adj=0.100, (0 split)
cau_priCILCE splits as --L-R----R--R--, agree=0.740, adj=0.031, (0 split)
Node number 5: 60 observations, complexity param=0.00212
predicted class=Lux. con dislocacion expected loss=0.433 P(node) =0.0492
class counts: 2 0 34 18 6
probabilities: 0.033 0.000 0.567 0.300 0.100
left son=10 (11 obs) right son=11 (49 obs)
Primary splits:
sexo splits as LR, improve=2.810, (0 missing)
cau_terCILCEb splits as LLRL----RLRRR, improve=1.040, (0 missing)
lugar_CILCE splits as LR-----L-RLR, improve=0.694, (0 missing)
cau_priCILCE splits as RL----R---R---L, improve=0.560, (0 missing)
Node number 6: 326 observations, complexity param=0.0102
predicted class=Lux. con dislocacion expected loss=0.617 P(node) =0.267
class counts: 38 29 125 102 32
probabilities: 0.117 0.089 0.383 0.313 0.098
left son=12 (207 obs) right son=13 (119 obs)
Primary splits:
cau_terCILCEb splits as RLRRL--LLRLRL, improve=5.040, (0 missing)
sexo splits as LR, improve=1.170, (0 missing)
cau_priCILCE splits as -----R--R--R-L-, improve=0.492, (0 missing)
Surrogate splits:
cau_priCILCE splits as -----L--L--R-L-, agree=0.647, adj=0.034, (0 split)
Node number 7: 350 observations, complexity param=0.00212
predicted class=Lux. sin dislocacion expected loss=0.546 P(node) =0.287
class counts: 47 26 73 159 45
probabilities: 0.134 0.074 0.209 0.454 0.129
left son=14 (276 obs) right son=15 (74 obs)
Primary splits:
cau_terCILCEb splits as RRLR-L-RLLL-L, improve=5.78, (0 missing)
cau_priCILCE splits as ---L-R-L-----R-, improve=2.51, (0 missing)
lugar_CILCE splits as LRRRLRL-LLL-, improve=2.18, (0 missing)
sexo splits as RL, improve=1.26, (0 missing)
Surrogate splits:
cau_priCILCE splits as ---L-R-L-----L-, agree=0.806, adj=0.081, (0 split)
lugar_CILCE splits as LLLLLLR-RLL-, agree=0.797, adj=0.041, (0 split)
Node number 8: 130 observations, complexity param=0.00212
predicted class=Lux. con dislocacion expected loss=0.677 P(node) =0.107
class counts: 27 18 42 35 8
probabilities: 0.208 0.138 0.323 0.269 0.062
left son=16 (111 obs) right son=17 (19 obs)
Primary splits:
cau_terCILCEb splits as --LR----LRL-L, improve=1.490, (0 missing)
cau_priCILCE splits as --L-L----R--L--, improve=1.190, (0 missing)
sexo splits as LR, improve=0.864, (0 missing)
lugar_CILCE splits as R--L-----RL-, improve=0.675, (0 missing)
Surrogate splits:
cau_priCILCE splits as --L-L----L--R--, agree=0.892, adj=0.263, (0 split)
lugar_CILCE splits as L--R-----LL-, agree=0.862, adj=0.053, (0 split)
Node number 9: 354 observations, complexity param=0.00212
predicted class=Lux. con dislocacion expected loss=0.548 P(node) =0.29
class counts: 39 49 160 68 38
probabilities: 0.110 0.138 0.452 0.192 0.107
left son=18 (344 obs) right son=19 (10 obs)
Primary splits:
cau_terCILCEb splits as LRLL-RL-LLLLL, improve=2.5900, (0 missing)
cau_priCILCE splits as ----L----R--L--, improve=0.7670, (0 missing)
sexo splits as LR, improve=0.0639, (0 missing)
Node number 10: 11 observations
predicted class=Lux. con dislocacion expected loss=0.0909 P(node) =0.00902
class counts: 0 0 10 0 1
probabilities: 0.000 0.000 0.909 0.000 0.091
Node number 11: 49 observations, complexity param=0.00212
predicted class=Lux. con dislocacion expected loss=0.51 P(node) =0.0402
class counts: 2 0 24 18 5
probabilities: 0.041 0.000 0.490 0.367 0.102
left son=22 (14 obs) right son=23 (35 obs)
Primary splits:
cau_terCILCEb splits as RRLR----LRLLR, improve=2.040, (0 missing)
cau_priCILCE splits as RL----R---R---L, improve=0.884, (0 missing)
lugar_CILCE splits as LR-----L-RLR, improve=0.389, (0 missing)
Surrogate splits:
lugar_CILCE splits as RR-----L-LRR, agree=0.776, adj=0.214, (0 split)
cau_priCILCE splits as RR----R---R---L, agree=0.735, adj=0.071, (0 split)
Node number 12: 207 observations, complexity param=0.00636
predicted class=Lux. sin dislocacion expected loss=0.633 P(node) =0.17
class counts: 33 12 68 76 18
probabilities: 0.159 0.058 0.329 0.367 0.087
left son=24 (64 obs) right son=25 (143 obs)
Primary splits:
cau_terCILCEb splits as -R--R--RR-R-L, improve=1.260, (0 missing)
sexo splits as LR, improve=0.937, (0 missing)
cau_priCILCE splits as -----R--R----L-, improve=0.192, (0 missing)
Node number 13: 119 observations
predicted class=Lux. con dislocacion expected loss=0.521 P(node) =0.0975
class counts: 5 17 57 26 14
probabilities: 0.042 0.143 0.479 0.218 0.118
Node number 14: 276 observations, complexity param=0.00212
predicted class=Lux. sin dislocacion expected loss=0.601 P(node) =0.226
class counts: 40 20 66 110 40
probabilities: 0.145 0.072 0.239 0.399 0.145
left son=28 (206 obs) right son=29 (70 obs)
Primary splits:
lugar_CILCE splits as RRRRLRL--LR-, improve=2.97, (0 missing)
cau_priCILCE splits as ---L---L-----R-, improve=2.47, (0 missing)
sexo splits as RL, improve=2.01, (0 missing)
cau_terCILCEb splits as --R--L--RRL-R, improve=1.43, (0 missing)
Surrogate splits:
cau_priCILCE splits as ---R---L-----R-, agree=0.891, adj=0.571, (0 split)
Node number 15: 74 observations
predicted class=Lux. sin dislocacion expected loss=0.338 P(node) =0.0607
class counts: 7 6 7 49 5
probabilities: 0.095 0.081 0.095 0.662 0.068
Node number 16: 111 observations, complexity param=0.00212
predicted class=Lux. con dislocacion expected loss=0.658 P(node) =0.091
class counts: 26 14 38 27 6
probabilities: 0.234 0.126 0.342 0.243 0.054
left son=32 (43 obs) right son=33 (68 obs)
Primary splits:
sexo splits as LR, improve=1.470, (0 missing)
cau_priCILCE splits as --R-L----R-----, improve=1.180, (0 missing)
cau_terCILCEb splits as --L-----R-L-R, improve=0.603, (0 missing)
lugar_CILCE splits as R--------LR-, improve=0.209, (0 missing)
Surrogate splits:
cau_priCILCE splits as --L-R----R-----, agree=0.631, adj=0.047, (0 split)
cau_terCILCEb splits as --L-----R-R-R, agree=0.622, adj=0.023, (0 split)
Node number 17: 19 observations
predicted class=Lux. sin dislocacion expected loss=0.579 P(node) =0.0156
class counts: 1 4 4 8 2
probabilities: 0.053 0.211 0.211 0.421 0.105
Node number 18: 344 observations, complexity param=0.00212
predicted class=Lux. con dislocacion expected loss=0.561 P(node) =0.282
class counts: 39 49 151 67 38
probabilities: 0.113 0.142 0.439 0.195 0.110
left son=36 (30 obs) right son=37 (314 obs)
Primary splits:
cau_terCILCEb splits as R-LL--L-RRRRR, improve=1.8900, (0 missing)
cau_priCILCE splits as ----L----R--L--, improve=0.9880, (0 missing)
sexo splits as LR, improve=0.0736, (0 missing)
Node number 19: 10 observations
predicted class=Lux. con dislocacion expected loss=0.1 P(node) =0.0082
class counts: 0 0 9 1 0
probabilities: 0.000 0.000 0.900 0.100 0.000
Node number 22: 14 observations
predicted class=Lux. con dislocacion expected loss=0.286 P(node) =0.0115
class counts: 0 0 10 2 2
probabilities: 0.000 0.000 0.714 0.143 0.143
Node number 23: 35 observations, complexity param=0.00212
predicted class=Lux. sin dislocacion expected loss=0.543 P(node) =0.0287
class counts: 2 0 14 16 3
probabilities: 0.057 0.000 0.400 0.457 0.086
left son=46 (25 obs) right son=47 (10 obs)
Primary splits:
lugar_CILCE splits as LR--------LR, improve=1.750, (0 missing)
cau_priCILCE splits as RL----R---L----, improve=1.100, (0 missing)
cau_terCILCEb splits as RR-R-----L--R, improve=0.742, (0 missing)
Surrogate splits:
cau_priCILCE splits as LL----L---R----, agree=0.743, adj=0.1, (0 split)
Node number 24: 64 observations
predicted class=Lux. con dislocacion expected loss=0.641 P(node) =0.0525
class counts: 11 8 23 18 4
probabilities: 0.172 0.125 0.359 0.281 0.062
Node number 25: 143 observations, complexity param=0.00191
predicted class=Lux. sin dislocacion expected loss=0.594 P(node) =0.117
class counts: 22 4 45 58 14
probabilities: 0.154 0.028 0.315 0.406 0.098
left son=50 (64 obs) right son=51 (79 obs)
Primary splits:
sexo splits as LR, improve=0.847, (0 missing)
cau_terCILCEb splits as -R--R--RR-L--, improve=0.749, (0 missing)
cau_priCILCE splits as -----R--L----L-, improve=0.201, (0 missing)
Surrogate splits:
cau_priCILCE splits as -----L--R----R-, agree=0.629, adj=0.172, (0 split)
cau_terCILCEb splits as -R--L--LR-R--, agree=0.622, adj=0.156, (0 split)
Node number 28: 206 observations, complexity param=0.00212
predicted class=Lux. sin dislocacion expected loss=0.65 P(node) =0.169
class counts: 34 13 54 72 33
probabilities: 0.165 0.063 0.262 0.350 0.160
left son=56 (39 obs) right son=57 (167 obs)
Primary splits:
cau_terCILCEb splits as --R--L--RRL-R, improve=1.520, (0 missing)
sexo splits as RL, improve=1.360, (0 missing)
lugar_CILCE splits as ----L-R--L--, improve=0.884, (0 missing)
cau_priCILCE splits as -------R-----L-, improve=0.298, (0 missing)
Node number 29: 70 observations
predicted class=Lux. sin dislocacion expected loss=0.457 P(node) =0.0574
class counts: 6 7 12 38 7
probabilities: 0.086 0.100 0.171 0.543 0.100
Node number 32: 43 observations
predicted class=Lux. con dislocacion expected loss=0.558 P(node) =0.0352
class counts: 10 6 19 6 2
probabilities: 0.233 0.140 0.442 0.140 0.047
Node number 33: 68 observations, complexity param=0.00212
predicted class=Lux. sin dislocacion expected loss=0.691 P(node) =0.0557
class counts: 16 8 19 21 4
probabilities: 0.235 0.118 0.279 0.309 0.059
left son=66 (22 obs) right son=67 (46 obs)
Primary splits:
lugar_CILCE splits as R--------RL-, improve=0.739, (0 missing)
cau_priCILCE splits as ----L----R-----, improve=0.535, (0 missing)
cau_terCILCEb splits as --------L-L-R, improve=0.336, (0 missing)
Surrogate splits:
cau_priCILCE splits as ----L----R-----, agree=0.897, adj=0.682, (0 split)
cau_terCILCEb splits as --------R-R-L, agree=0.750, adj=0.227, (0 split)
Node number 36: 30 observations, complexity param=0.00212
predicted class=Fract. sin expos. pulpar expected loss=0.667 P(node) =0.0246
class counts: 1 10 10 7 2
probabilities: 0.033 0.333 0.333 0.233 0.067
left son=72 (11 obs) right son=73 (19 obs)
Primary splits:
cau_priCILCE splits as ----L----R--R--, improve=2.810, (0 missing)
sexo splits as RL, improve=1.330, (0 missing)
cau_terCILCEb splits as --RL--L------, improve=0.471, (0 missing)
Surrogate splits:
cau_terCILCEb splits as --RL--L------, agree=0.733, adj=0.273, (0 split)
Node number 37: 314 observations
predicted class=Lux. con dislocacion expected loss=0.551 P(node) =0.257
class counts: 38 39 141 60 36
probabilities: 0.121 0.124 0.449 0.191 0.115
Node number 46: 25 observations
predicted class=Lux. con dislocacion expected loss=0.48 P(node) =0.0205
class counts: 1 0 13 10 1
probabilities: 0.040 0.000 0.520 0.400 0.040
Node number 47: 10 observations
predicted class=Lux. sin dislocacion expected loss=0.4 P(node) =0.0082
class counts: 1 0 1 6 2
probabilities: 0.100 0.000 0.100 0.600 0.200
Node number 50: 64 observations, complexity param=0.00191
predicted class=Lux. sin dislocacion expected loss=0.656 P(node) =0.0525
class counts: 13 2 19 22 8
probabilities: 0.203 0.031 0.297 0.344 0.125
left son=100 (15 obs) right son=101 (49 obs)
Primary splits:
cau_terCILCEb splits as -R--R--RR-L--, improve=1.690, (0 missing)
cau_priCILCE splits as -----R-------L-, improve=0.966, (0 missing)
Node number 51: 79 observations
predicted class=Lux. sin dislocacion expected loss=0.544 P(node) =0.0648
class counts: 9 2 26 36 6
probabilities: 0.114 0.025 0.329 0.456 0.076
Node number 56: 39 observations
predicted class=Lux. con dislocacion expected loss=0.59 P(node) =0.032
class counts: 4 3 16 11 5
probabilities: 0.103 0.077 0.410 0.282 0.128
Node number 57: 167 observations, complexity param=0.00212
predicted class=Lux. sin dislocacion expected loss=0.635 P(node) =0.137
class counts: 30 10 38 61 28
probabilities: 0.180 0.060 0.228 0.365 0.168
left son=114 (20 obs) right son=115 (147 obs)
Primary splits:
lugar_CILCE splits as ----R-L--L--, improve=1.550, (0 missing)
sexo splits as RL, improve=1.020, (0 missing)
cau_terCILCEb splits as --R-----LR--R, improve=0.684, (0 missing)
cau_priCILCE splits as -------R-----L-, improve=0.437, (0 missing)
Surrogate splits:
cau_priCILCE splits as -------R-----L-, agree=0.97, adj=0.75, (0 split)
Node number 66: 22 observations
predicted class=Fract. con expos. pulpar expected loss=0.727 P(node) =0.018
class counts: 6 4 6 4 2
probabilities: 0.273 0.182 0.273 0.182 0.091
Node number 67: 46 observations
predicted class=Lux. sin dislocacion expected loss=0.63 P(node) =0.0377
class counts: 10 4 13 17 2
probabilities: 0.217 0.087 0.283 0.370 0.043
Node number 72: 11 observations
predicted class=Fract. sin expos. pulpar expected loss=0.455 P(node) =0.00902
class counts: 1 6 0 3 1
probabilities: 0.091 0.545 0.000 0.273 0.091
Node number 73: 19 observations
predicted class=Lux. con dislocacion expected loss=0.474 P(node) =0.0156
class counts: 0 4 10 4 1
probabilities: 0.000 0.211 0.526 0.211 0.053
Node number 100: 15 observations
predicted class=Lux. sin dislocacion expected loss=0.533 P(node) =0.0123
class counts: 5 0 1 7 2
probabilities: 0.333 0.000 0.067 0.467 0.133
Node number 101: 49 observations
predicted class=Lux. con dislocacion expected loss=0.633 P(node) =0.0402
class counts: 8 2 18 15 6
probabilities: 0.163 0.041 0.367 0.306 0.122
Node number 114: 20 observations
predicted class=Lux. con dislocacion expected loss=0.55 P(node) =0.0164
class counts: 3 1 9 5 2
probabilities: 0.150 0.050 0.450 0.250 0.100
Node number 115: 147 observations, complexity param=0.00191
predicted class=Lux. sin dislocacion expected loss=0.619 P(node) =0.12
class counts: 27 9 29 56 26
probabilities: 0.184 0.061 0.197 0.381 0.177
left son=230 (87 obs) right son=231 (60 obs)
Primary splits:
sexo splits as RL, improve=0.760, (0 missing)
cau_terCILCEb splits as --R-----RL--L, improve=0.623, (0 missing)
Node number 230: 87 observations, complexity param=0.00191
predicted class=Lux. sin dislocacion expected loss=0.644 P(node) =0.0713
class counts: 19 4 15 31 18
probabilities: 0.218 0.046 0.172 0.356 0.207
left son=460 (15 obs) right son=461 (72 obs)
Primary splits:
cau_terCILCEb splits as --R-----RL--R, improve=1.47, (0 missing)
Node number 231: 60 observations
predicted class=Lux. sin dislocacion expected loss=0.583 P(node) =0.0492
class counts: 8 5 14 25 8
probabilities: 0.133 0.083 0.233 0.417 0.133
Node number 460: 15 observations
predicted class=Fract. con expos. pulpar expected loss=0.667 P(node) =0.0123
class counts: 5 1 2 2 5
probabilities: 0.333 0.067 0.133 0.133 0.333
Node number 461: 72 observations
predicted class=Lux. sin dislocacion expected loss=0.597 P(node) =0.059
class counts: 14 3 13 29 13
probabilities: 0.194 0.042 0.181 0.403 0.181
El problema es que el error es muy alto, 0.7
plot(fit, uniform=TRUE,
main="Classification Tree for Sex")
post(fit, file = "tree.ps",
title = "Classification Tree")
pfit <- prune(fit, cp = fit$cptable[which.min(fit$cptable[,"xerror"]),"CP"])
plot(pfit, uniform=TRUE,
main="Pruned Classification Tree")
text(pfit, use.n=TRUE, all=TRUE, cex=.8)
arbol_con_party <- df.clean$lesion ~
df.clean$cau_priCILCE +
df.clean$cau_secCILCEa
arbol_con_party_ctree <- ctree(arbol_con_party)
table(predict(arbol_con_party_ctree), df.clean$lesion)
Avulsion Concusion Extrusion FCC FCNC FCRC
Avulsion 11 0 2 2 4 0
Concusion 2 3 0 0 0 0
Extrusion 0 0 0 0 0 0
FCC 0 0 0 0 0 0
FCNC 0 0 0 0 0 0
FCRC 0 0 0 0 0 0
Fract radicular 0 0 0 0 0 0
Intrusion 0 0 0 0 0 0
Luxacion lateral 2 1 1 1 0 1
Sub luxacion 81 69 31 43 118 25
Tej blando 0 0 0 0 0 0
Fract radicular Intrusion Luxacion lateral Sub luxacion
Avulsion 2 1 2 2
Concusion 0 0 1 0
Extrusion 0 0 0 0
FCC 0 0 0 0
FCNC 0 0 0 0
FCRC 0 0 0 0
Fract radicular 0 0 0 0
Intrusion 0 0 0 0
Luxacion lateral 0 2 10 5
Sub luxacion 79 144 144 302
Tej blando 0 0 0 0
Tej blando
Avulsion 3
Concusion 1
Extrusion 0
FCC 0
FCNC 0
FCRC 0
Fract radicular 0
Intrusion 0
Luxacion lateral 4
Sub luxacion 121
Tej blando 0
ver https://www.tutorialspoint.com/r/r_decision_tree.htm https://www.youtube.com/watch?v=XLNsl1Da5MA#t=1288.381 http://www.wekaleamstudios.co.uk/posts/classification-trees/