Vamos a leer los datos y examinar las primeras líneas:
df <- read_sav("Variables.sav") %>% haven::as_factor() %>%
mutate(caida=as.factor(numero_caidas>0))
df2 <- df %>% bin()
## Warning in bin(.): 2419 instance(s) removed due to missing values
df
## # A tibble: 2,831 x 20
## fecha centro edad sexo estancia_en_cen~ tinetti resultado_tinet~
## <date> <dbl> <dbl> <fct> <dbl> <dbl> <chr>
## 1 2017-10-11 82677 75 Homb~ 12 1 Riesgo alto de ~
## 2 2017-10-11 82677 73 Homb~ 4 1 Riesgo alto de ~
## 3 2017-10-11 42182 89 Homb~ 4 1 Riesgo alto de ~
## 4 2017-10-11 34157 89 Homb~ 4 1 Riesgo alto de ~
## 5 2017-10-11 835 84 Homb~ 3 1 Riesgo alto de ~
## 6 2017-10-11 43166 88 Homb~ 3 1 Riesgo alto de ~
## 7 2017-10-11 52822 86 Homb~ 3 1 Riesgo alto de ~
## 8 2017-10-11 134344 85 Homb~ 2 1 Riesgo alto de ~
## 9 2017-10-11 134347 83 Homb~ 9 1 Riesgo alto de ~
## 10 2017-10-11 140716 98 Mujer 1 1 Riesgo alto de ~
## # ... with 2,821 more rows, and 13 more variables: mmse <dbl>,
## # resultado_mmse <chr>, barthel <dbl>, resultado_barthel <chr>,
## # tipo_terap_grupal <chr>, num_terap_grupal <dbl>, tipo_ter_indiv <chr>,
## # num_terap_indiv <dbl>, numero_medicamentos <dbl>, numero_caidas <dbl>,
## # fumat <dbl>, minutos_fisioterapia_semana <dbl>, caida <fct>
El análisis a hacer corresponde a lo siguiente:
estudié como la estancia en el centro afectaba en el número de caídas (siendo significativa) ajustando por sexo y edad, sigue siendo significativa, teniendo un efecto protector significativo. Lo mismo voy a valorar con las siguientes variables: - Tiempo de estancia en el centro es significativo p=0,016 - FUMAT (escala de calidad de vida) p=0,00 - MMSE (estado cognitivo) p=0,00 - Tinetti (medición riesgo de caídas) - Barthel (independencia para actividades de la vida diaria)
Van ahora los análisis modelo a modelo:
tabla<-df %>%
finalfit("numero_caidas", c("sexo", "edad","estancia_en_centro"),metrics=TRUE)
tabla[[1]] %>% knitr::kable(booktabs=T)
|
Dependent: numero_caidas
|
|
Mean (sd)
|
Coefficient (univariable)
|
Coefficient (multivariable)
|
3
|
sexo
|
Hombre
|
1.4 (2.4)
|
|
|
4
|
|
Mujer
|
1.4 (2.8)
|
-0.05 (-0.25 to 0.15, p=0.642)
|
-0.01 (-0.22 to 0.19, p=0.901)
|
1
|
edad
|
[70,109]
|
1.4 (2.6)
|
0.01 (-0.00 to 0.03, p=0.131)
|
0.01 (-0.00 to 0.03, p=0.143)
|
2
|
estancia_en_centro
|
[1,26]
|
1.4 (2.6)
|
-0.04 (-0.07 to -0.01, p=0.016)
|
-0.04 (-0.07 to -0.01, p=0.017)
|
x
|
Number in dataframe = 2831, Number in model = 2831, Missing = 0, Log-likelihood = -6662.8, R-squared = 0.0029, Adjusted r-squared = 0.0018
|
tabla <- df %>%
finalfit("numero_caidas", c("sexo", "edad","fumat"),metrics=TRUE)
tabla[[1]] %>% knitr::kable(booktabs=T)
|
Dependent: numero_caidas
|
|
Mean (sd)
|
Coefficient (univariable)
|
Coefficient (multivariable)
|
3
|
sexo
|
Hombre
|
1.4 (2.4)
|
|
|
4
|
|
Mujer
|
1.4 (2.8)
|
-0.05 (-0.25 to 0.15, p=0.642)
|
0.15 (-0.19 to 0.50, p=0.384)
|
1
|
edad
|
[70,109]
|
1.4 (2.6)
|
0.01 (-0.00 to 0.03, p=0.131)
|
0.02 (-0.01 to 0.04, p=0.195)
|
2
|
fumat
|
[52,112]
|
1.6 (2.5)
|
-0.04 (-0.06 to -0.03, p<0.001)
|
-0.05 (-0.06 to -0.03, p<0.001)
|
x
|
Number in dataframe = 2831, Number in model = 932, Missing = 1899, Log-likelihood = -2141.89, R-squared = 0.048, Adjusted r-squared = 0.045
|
tabla <- df %>%
finalfit("numero_caidas", c("sexo", "edad","mmse"),metrics=TRUE)
tabla[[1]] %>% knitr::kable(booktabs=T)
|
Dependent: numero_caidas
|
|
Mean (sd)
|
Coefficient (univariable)
|
Coefficient (multivariable)
|
3
|
sexo
|
Hombre
|
1.4 (2.4)
|
|
|
4
|
|
Mujer
|
1.4 (2.8)
|
-0.05 (-0.25 to 0.15, p=0.642)
|
0.07 (-0.14 to 0.29, p=0.505)
|
1
|
edad
|
[70,109]
|
1.4 (2.6)
|
0.01 (-0.00 to 0.03, p=0.131)
|
0.01 (-0.00 to 0.03, p=0.123)
|
2
|
mmse
|
[-1,30]
|
1.5 (2.6)
|
-0.05 (-0.06 to -0.04, p<0.001)
|
-0.05 (-0.06 to -0.04, p<0.001)
|
x
|
Number in dataframe = 2831, Number in model = 2631, Missing = 200, Log-likelihood = -6212.41, R-squared = 0.026, Adjusted r-squared = 0.025
|
tabla <- df %>%
finalfit("numero_caidas", c("sexo", "edad","tinetti"),metrics=TRUE)
tabla[[1]] %>% knitr::kable(booktabs=T)
|
Dependent: numero_caidas
|
|
Mean (sd)
|
Coefficient (univariable)
|
Coefficient (multivariable)
|
2
|
sexo
|
Hombre
|
1.4 (2.4)
|
|
|
3
|
|
Mujer
|
1.4 (2.8)
|
-0.05 (-0.25 to 0.15, p=0.642)
|
-0.00 (-0.21 to 0.20, p=0.980)
|
1
|
edad
|
[70,109]
|
1.4 (2.6)
|
0.01 (-0.00 to 0.03, p=0.131)
|
0.01 (-0.01 to 0.02, p=0.292)
|
4
|
tinetti
|
[1,28]
|
1.4 (2.6)
|
-0.03 (-0.04 to -0.01, p=0.001)
|
-0.03 (-0.04 to -0.01, p=0.001)
|
x
|
Number in dataframe = 2831, Number in model = 2831, Missing = 0, Log-likelihood = -6660.57, R-squared = 0.0044, Adjusted r-squared = 0.0034
|
tabla <-df %>%
finalfit("numero_caidas", c("sexo", "edad","barthel"),metrics=TRUE)
tabla[[1]] %>% knitr::kable(booktabs=T)
|
Dependent: numero_caidas
|
|
Mean (sd)
|
Coefficient (univariable)
|
Coefficient (multivariable)
|
3
|
sexo
|
Hombre
|
1.4 (2.4)
|
|
|
4
|
|
Mujer
|
1.4 (2.8)
|
-0.05 (-0.25 to 0.15, p=0.642)
|
0.02 (-0.18 to 0.23, p=0.846)
|
2
|
edad
|
[70,109]
|
1.4 (2.6)
|
0.01 (-0.00 to 0.03, p=0.131)
|
0.01 (-0.01 to 0.02, p=0.238)
|
1
|
barthel
|
[0,100]
|
1.4 (2.6)
|
-0.01 (-0.01 to -0.01, p<0.001)
|
-0.01 (-0.01 to -0.00, p<0.001)
|
x
|
Number in dataframe = 2831, Number in model = 2829, Missing = 2, Log-likelihood = -6651.49, R-squared = 0.008, Adjusted r-squared = 0.007
|
Por último vamos a ver qué tal va un modelo con todo a la vez:
tabla <- df %>%
finalfit("numero_caidas", c("sexo", "edad", "estancia_en_centro","fumat", "mmse", "tinetti", "barthel"), metrics=TRUE)
tabla[[1]] %>% knitr::kable(booktabs=T)
|
Dependent: numero_caidas
|
|
Mean (sd)
|
Coefficient (univariable)
|
Coefficient (multivariable)
|
6
|
sexo
|
Hombre
|
1.4 (2.4)
|
|
|
7
|
|
Mujer
|
1.4 (2.8)
|
-0.05 (-0.25 to 0.15, p=0.642)
|
0.18 (-0.17 to 0.52, p=0.311)
|
2
|
edad
|
[70,109]
|
1.4 (2.6)
|
0.01 (-0.00 to 0.03, p=0.131)
|
0.01 (-0.01 to 0.04, p=0.220)
|
3
|
estancia_en_centro
|
[1,26]
|
1.4 (2.6)
|
-0.04 (-0.07 to -0.01, p=0.016)
|
-0.02 (-0.08 to 0.04, p=0.508)
|
4
|
fumat
|
[52,112]
|
1.6 (2.5)
|
-0.04 (-0.06 to -0.03, p<0.001)
|
-0.04 (-0.05 to -0.02, p<0.001)
|
5
|
mmse
|
[-1,30]
|
1.5 (2.6)
|
-0.05 (-0.06 to -0.04, p<0.001)
|
-0.03 (-0.06 to -0.01, p=0.012)
|
8
|
tinetti
|
[1,28]
|
1.4 (2.6)
|
-0.03 (-0.04 to -0.01, p=0.001)
|
-0.02 (-0.05 to 0.01, p=0.202)
|
1
|
barthel
|
[0,100]
|
1.4 (2.6)
|
-0.01 (-0.01 to -0.01, p<0.001)
|
0.00 (-0.01 to 0.01, p=0.381)
|
x
|
Number in dataframe = 2831, Number in model = 932, Missing = 1899, Log-likelihood = -2137.51, R-squared = 0.057, Adjusted r-squared = 0.05
|
Las estimaciones con todas a la vez no parecen muy buenas. Exploremos las correlaciones entre las explicativas:
df %>% generaTablaCorrelaciones(vNumericas = c("numero_caidas", "estancia_en_centro","edad","fumat", "mmse", "tinetti", "barthel")) %>%
knitr::kable(booktabs=T)
Variable
|
numero_caidas
|
[01]
|
[02]
|
[03]
|
[04]
|
[05]
|
[01] estancia_en_centro
|
-0.05*
|
|
|
|
|
|
[02] edad
|
0.03
|
-0.01
|
|
|
|
|
[03] fumat
|
-0.21***
|
-0.04
|
0.05
|
|
|
|
[04] mmse
|
-0.16***
|
0.07***
|
0.00
|
0.52***
|
|
|
[05] tinetti
|
-0.06***
|
0.05**
|
-0.14***
|
0.17***
|
0.25***
|
|
[06] barthel
|
-0.09***
|
0.10***
|
-0.08***
|
0.33***
|
0.46***
|
0.51***
|
Parece que barthel está muy asociada con mmse, tinetti. No deberán estudiarse todas a la vez pues contienen información similar. Probemos análisis alternativos entonces:
df %>%
finalfit("numero_caidas", c("sexo", "edad", "estancia_en_centro","fumat", "mmse"), metrics=TRUE) %>%
.[[1]] %>% knitr::kable(booktabs=T)
|
Dependent: numero_caidas
|
|
Mean (sd)
|
Coefficient (univariable)
|
Coefficient (multivariable)
|
5
|
sexo
|
Hombre
|
1.4 (2.4)
|
|
|
6
|
|
Mujer
|
1.4 (2.8)
|
-0.05 (-0.25 to 0.15, p=0.642)
|
0.18 (-0.17 to 0.52, p=0.313)
|
1
|
edad
|
[70,109]
|
1.4 (2.6)
|
0.01 (-0.00 to 0.03, p=0.131)
|
0.02 (-0.01 to 0.04, p=0.174)
|
2
|
estancia_en_centro
|
[1,26]
|
1.4 (2.6)
|
-0.04 (-0.07 to -0.01, p=0.016)
|
-0.02 (-0.08 to 0.04, p=0.519)
|
3
|
fumat
|
[52,112]
|
1.6 (2.5)
|
-0.04 (-0.06 to -0.03, p<0.001)
|
-0.04 (-0.05 to -0.02, p<0.001)
|
4
|
mmse
|
[-1,30]
|
1.5 (2.6)
|
-0.05 (-0.06 to -0.04, p<0.001)
|
-0.03 (-0.05 to -0.01, p=0.014)
|
df %>%
finalfit("numero_caidas", c("sexo", "edad", "estancia_en_centro", "barthel"), metrics=TRUE) %>%
.[[1]] %>% knitr::kable(booktabs=T)
|
Dependent: numero_caidas
|
|
Mean (sd)
|
Coefficient (univariable)
|
Coefficient (multivariable)
|
4
|
sexo
|
Hombre
|
1.4 (2.4)
|
|
|
5
|
|
Mujer
|
1.4 (2.8)
|
-0.05 (-0.25 to 0.15, p=0.642)
|
0.03 (-0.18 to 0.23, p=0.777)
|
2
|
edad
|
[70,109]
|
1.4 (2.6)
|
0.01 (-0.00 to 0.03, p=0.131)
|
0.01 (-0.01 to 0.02, p=0.232)
|
3
|
estancia_en_centro
|
[1,26]
|
1.4 (2.6)
|
-0.04 (-0.07 to -0.01, p=0.016)
|
-0.03 (-0.07 to -0.00, p=0.046)
|
1
|
barthel
|
[0,100]
|
1.4 (2.6)
|
-0.01 (-0.01 to -0.01, p<0.001)
|
-0.01 (-0.01 to -0.00, p<0.001)
|
Podría ser útil estudiar la presencia/ausencia de caidas para estas variables, si acaso, agrupando un poco sus valores:
ggplot(df2, aes(x = edad, fill = caida)) + geom_bar(position = "fill") +ylab("")

ggplot(df2, aes(x = sexo, fill = caida)) + geom_bar(position = "fill") +ylab("")

ggplot(df2, aes(x = estancia_en_centro, fill = caida)) + geom_bar(position = "fill") +ylab("")

ggplot(df2, aes(x = fumat, fill = caida)) + geom_bar(position = "fill") +ylab("")

ggplot(df2, aes(x = mmse, fill = caida)) + geom_bar(position = "fill") +ylab("")

ggplot(df2, aes(x = tinetti, fill = caida)) + geom_bar(position = "fill") +ylab("")

ggplot(df2, aes(x = barthel, fill = caida)) + geom_bar(position = "fill") +ylab("")

Y ahora vamos a intentar algunos predictores simple de las caidas. Primero con las variables categorizadas:
data <- optbin(formula = caida ~., data = df2 %>% select (-numero_caidas), method = "infogain")
model <- OneR(formula = caida ~., data = data, verbose = TRUE)
## Warning in OneR.data.frame(x = data, ties.method = ties.method, verbose =
## verbose, : data contains unused factor levels
##
## Attribute Accuracy
## 1 * resultado_mmse 59.71%
## 2 mmse 57.52%
## 3 tipo_ter_indiv 57.28%
## 4 tinetti 56.07%
## 4 fumat 56.07%
## 6 edad 54.13%
## 7 centro 53.64%
## 7 numero_medicamentos 53.64%
## 9 sexo 53.16%
## 9 tipo_terap_grupal 53.16%
## 9 minutos_fisioterapia_semana 53.16%
## 12 estancia_en_centro 52.91%
## 13 num_terap_grupal 52.67%
## 14 resultado_tinetti 52.43%
## 14 num_terap_indiv 52.43%
## 16 barthel 52.18%
## 16 resultado_barthel 52.18%
## 18 fecha 51.94%
## ---
## Chosen attribute due to accuracy
## and ties method (if applicable): '*'
Ahora con las variables tal cual vienen:
data <- optbin(formula = caida ~., data = df %>% select (-numero_caidas), method = "infogain")
## Warning in optbin.data.frame(x = data, method = method, na.omit = na.omit):
## 2419 instance(s) removed due to missing values
model <- OneR(formula = caida ~., data = data, verbose = TRUE)
##
## Attribute Accuracy
## 1 * mmse 59.71%
## 1 resultado_mmse 59.71%
## 3 fumat 57.77%
## 4 tipo_ter_indiv 57.28%
## 5 tinetti 56.07%
## 6 centro 54.37%
## 7 edad 53.64%
## 7 numero_medicamentos 53.64%
## 9 sexo 53.16%
## 9 tipo_terap_grupal 53.16%
## 11 num_terap_grupal 52.67%
## 12 resultado_tinetti 52.43%
## 13 resultado_barthel 52.18%
## 14 fecha 51.94%
## 14 estancia_en_centro 51.94%
## 14 barthel 51.94%
## 14 num_terap_indiv 51.94%
## 14 minutos_fisioterapia_semana 51.94%
## ---
## Chosen attribute due to accuracy
## and ties method (if applicable): '*'
summary(model)
##
## Call:
## OneR.formula(formula = caida ~ ., data = data, verbose = TRUE)
##
## Rules:
## If mmse = (-1.03,26] then caida = TRUE
## If mmse = (26,30] then caida = FALSE
##
## Accuracy:
## 246 of 412 instances classified correctly (60%)
##
## Contingency table:
## mmse
## caida (-1.03,26] (26,30] Sum
## FALSE 140 * 58 198
## TRUE * 188 26 214
## Sum 328 84 412
## ---
## Maximum in each column: '*'
##
## Pearson's Chi-squared test:
## X-squared = 20, df = 1, p-value = 3e-05
plot(model)

prediction <- predict(model, data)
eval_model(prediction, data)
##
## Confusion matrix (absolute):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 58 26 84
## TRUE 140 188 328
## Sum 198 214 412
##
## Confusion matrix (relative):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 0.14 0.06 0.20
## TRUE 0.34 0.46 0.80
## Sum 0.48 0.52 1.00
##
## Accuracy:
## 0.6 (246/412)
##
## Error rate:
## 0.4 (166/412)
##
## Error rate reduction (vs. base rate):
## 0.16 (p-value = 9e-04)
Vamos a hacer lo mismo,pero excluyendo mmse. Eso hará que la variable de interés sea fumat:
data <- optbin(formula = caida ~., data = df %>% select (-numero_caidas,-mmse,-resultado_mmse), method = "infogain")
## Warning in optbin.data.frame(x = data, method = method, na.omit = na.omit):
## 2419 instance(s) removed due to missing values
model <- OneR(formula = caida ~., data = data, verbose = TRUE)
##
## Attribute Accuracy
## 1 * fumat 57.77%
## 2 tipo_ter_indiv 57.28%
## 3 tinetti 56.07%
## 4 centro 54.37%
## 5 edad 53.64%
## 5 numero_medicamentos 53.64%
## 7 sexo 53.16%
## 7 tipo_terap_grupal 53.16%
## 9 num_terap_grupal 52.67%
## 10 resultado_tinetti 52.43%
## 11 resultado_barthel 52.18%
## 12 fecha 51.94%
## 12 estancia_en_centro 51.94%
## 12 barthel 51.94%
## 12 num_terap_indiv 51.94%
## 12 minutos_fisioterapia_semana 51.94%
## ---
## Chosen attribute due to accuracy
## and ties method (if applicable): '*'
summary(model)
##
## Call:
## OneR.formula(formula = caida ~ ., data = data, verbose = TRUE)
##
## Rules:
## If fumat = (53.9,97] then caida = TRUE
## If fumat = (97,111] then caida = FALSE
##
## Accuracy:
## 238 of 412 instances classified correctly (58%)
##
## Contingency table:
## fumat
## caida (53.9,97] (97,111] Sum
## FALSE 145 * 53 198
## TRUE * 185 29 214
## Sum 330 82 412
## ---
## Maximum in each column: '*'
##
## Pearson's Chi-squared test:
## X-squared = 10, df = 1, p-value = 0.001
plot(model)

prediction <- predict(model, data)
eval_model(prediction, data)
##
## Confusion matrix (absolute):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 53 29 82
## TRUE 145 185 330
## Sum 198 214 412
##
## Confusion matrix (relative):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 0.13 0.07 0.20
## TRUE 0.35 0.45 0.80
## Sum 0.48 0.52 1.00
##
## Accuracy:
## 0.58 (238/412)
##
## Error rate:
## 0.42 (174/412)
##
## Error rate reduction (vs. base rate):
## 0.12 (p-value = 0.01)
Ahora excluimos también fumat:
data <- optbin(formula = caida ~., data = df %>% select (-numero_caidas,-mmse,-resultado_mmse, -fumat), method = "infogain")
## Warning in optbin.data.frame(x = data, method = method, na.omit = na.omit):
## 1540 instance(s) removed due to missing values
model <- OneR(formula = caida ~., data = data, verbose = TRUE)
##
## Attribute Accuracy
## 1 * barthel 56.31%
## 2 centro 56.08%
## 3 tinetti 55.85%
## 4 resultado_tinetti 55.62%
## 5 tipo_ter_indiv 55.54%
## 6 resultado_barthel 55.15%
## 7 tipo_terap_grupal 53.6%
## 7 num_terap_grupal 53.6%
## 9 numero_medicamentos 52.67%
## 10 num_terap_indiv 52.13%
## 11 estancia_en_centro 51.98%
## 12 sexo 51.9%
## 13 minutos_fisioterapia_semana 51.51%
## 14 fecha 51.12%
## 14 edad 51.12%
## ---
## Chosen attribute due to accuracy
## and ties method (if applicable): '*'
summary(model)
##
## Call:
## OneR.formula(formula = caida ~ ., data = data, verbose = TRUE)
##
## Rules:
## If barthel = (-0.1,70] then caida = TRUE
## If barthel = (70,100] then caida = FALSE
##
## Accuracy:
## 727 of 1291 instances classified correctly (56%)
##
## Contingency table:
## barthel
## caida (-0.1,70] (70,100] Sum
## FALSE 245 * 415 660
## TRUE * 312 319 631
## Sum 557 734 1291
## ---
## Maximum in each column: '*'
##
## Pearson's Chi-squared test:
## X-squared = 20, df = 1, p-value = 1e-05
plot(model)

prediction <- predict(model, data)
eval_model(prediction, data)
##
## Confusion matrix (absolute):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 415 319 734
## TRUE 245 312 557
## Sum 660 631 1291
##
## Confusion matrix (relative):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 0.32 0.25 0.57
## TRUE 0.19 0.24 0.43
## Sum 0.51 0.49 1.00
##
## Accuracy:
## 0.56 (727/1291)
##
## Error rate:
## 0.44 (564/1291)
##
## Error rate reduction (vs. base rate):
## 0.11 (p-value = 1e-04)
y tipo_ter_indiv también la excluímos:
data <- optbin(formula = caida ~., data = df %>% select (-numero_caidas,-mmse,-resultado_mmse, -fumat, -barthel, -centro), method = "infogain")
## Warning in optbin.data.frame(x = data, method = method, na.omit = na.omit):
## 1540 instance(s) removed due to missing values
model <- OneR(formula = caida ~., data = data, verbose = TRUE)
##
## Attribute Accuracy
## 1 * tinetti 55.85%
## 2 resultado_tinetti 55.62%
## 3 tipo_ter_indiv 55.54%
## 4 resultado_barthel 55.15%
## 5 tipo_terap_grupal 53.6%
## 5 num_terap_grupal 53.6%
## 7 numero_medicamentos 52.67%
## 8 num_terap_indiv 52.13%
## 9 estancia_en_centro 51.98%
## 10 sexo 51.9%
## 11 minutos_fisioterapia_semana 51.51%
## 12 fecha 51.12%
## 12 edad 51.12%
## ---
## Chosen attribute due to accuracy
## and ties method (if applicable): '*'
summary(model)
##
## Call:
## OneR.formula(formula = caida ~ ., data = data, verbose = TRUE)
##
## Rules:
## If tinetti = (0.973,20] then caida = TRUE
## If tinetti = (20,28] then caida = FALSE
##
## Accuracy:
## 721 of 1291 instances classified correctly (56%)
##
## Contingency table:
## tinetti
## caida (0.973,20] (20,28] Sum
## FALSE 232 * 428 660
## TRUE * 293 338 631
## Sum 525 766 1291
## ---
## Maximum in each column: '*'
##
## Pearson's Chi-squared test:
## X-squared = 20, df = 1, p-value = 5e-05
plot(model)

prediction <- predict(model, data)
eval_model(prediction, data)
##
## Confusion matrix (absolute):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 428 338 766
## TRUE 232 293 525
## Sum 660 631 1291
##
## Confusion matrix (relative):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 0.33 0.26 0.59
## TRUE 0.18 0.23 0.41
## Sum 0.51 0.49 1.00
##
## Accuracy:
## 0.56 (721/1291)
##
## Error rate:
## 0.44 (570/1291)
##
## Error rate reduction (vs. base rate):
## 0.097 (p-value = 4e-04)
data <- optbin(formula = caida ~., data = df %>% select (-numero_caidas,-mmse,-resultado_mmse, -fumat, -barthel, -centro, -tinetti, -resultado_tinetti, -tipo_ter_indiv, -resultado_barthel, -tipo_terap_grupal, -num_terap_grupal), method = "infogain")
## Warning in optbin.data.frame(x = data, method = method, na.omit = na.omit):
## 1540 instance(s) removed due to missing values
model <- OneR(formula = caida ~., data = data, verbose = TRUE)
##
## Attribute Accuracy
## 1 * numero_medicamentos 52.67%
## 2 num_terap_indiv 52.13%
## 3 estancia_en_centro 51.98%
## 4 sexo 51.9%
## 5 minutos_fisioterapia_semana 51.51%
## 6 fecha 51.12%
## 6 edad 51.12%
## ---
## Chosen attribute due to accuracy
## and ties method (if applicable): '*'
summary(model)
##
## Call:
## OneR.formula(formula = caida ~ ., data = data, verbose = TRUE)
##
## Rules:
## If numero_medicamentos = (-0.024,7] then caida = FALSE
## If numero_medicamentos = (7,24] then caida = TRUE
##
## Accuracy:
## 680 of 1291 instances classified correctly (53%)
##
## Contingency table:
## numero_medicamentos
## caida (-0.024,7] (7,24] Sum
## FALSE * 328 332 660
## TRUE 279 * 352 631
## Sum 607 684 1291
## ---
## Maximum in each column: '*'
##
## Pearson's Chi-squared test:
## X-squared = 4, df = 1, p-value = 0.06
plot(model)

prediction <- predict(model, data)
eval_model(prediction, data)
##
## Confusion matrix (absolute):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 328 279 607
## TRUE 332 352 684
## Sum 660 631 1291
##
## Confusion matrix (relative):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 0.25 0.22 0.47
## TRUE 0.26 0.27 0.53
## Sum 0.51 0.49 1.00
##
## Accuracy:
## 0.53 (680/1291)
##
## Error rate:
## 0.47 (611/1291)
##
## Error rate reduction (vs. base rate):
## 0.032 (p-value = 0.1)
data <- optbin(formula = caida ~., data = df %>% select (-numero_caidas,-mmse,-resultado_mmse, -fumat, -barthel, -centro, -tinetti, -resultado_tinetti,-tipo_ter_indiv, -resultado_barthel, -tipo_terap_grupal), method = "infogain")
## Warning in optbin.data.frame(x = data, method = method, na.omit = na.omit):
## 1540 instance(s) removed due to missing values
model <- OneR(formula = caida ~., data = data, verbose = TRUE)
##
## Attribute Accuracy
## 1 * num_terap_grupal 53.6%
## 2 numero_medicamentos 52.67%
## 3 num_terap_indiv 52.13%
## 4 estancia_en_centro 51.98%
## 5 sexo 51.9%
## 6 minutos_fisioterapia_semana 51.51%
## 7 fecha 51.12%
## 7 edad 51.12%
## ---
## Chosen attribute due to accuracy
## and ties method (if applicable): '*'
summary(model)
##
## Call:
## OneR.formula(formula = caida ~ ., data = data, verbose = TRUE)
##
## Rules:
## If num_terap_grupal = (-0.006,0] then caida = TRUE
## If num_terap_grupal = (0,6.01] then caida = FALSE
##
## Accuracy:
## 692 of 1291 instances classified correctly (54%)
##
## Contingency table:
## num_terap_grupal
## caida (-0.006,0] (0,6.01] Sum
## FALSE 155 * 505 660
## TRUE * 187 444 631
## Sum 342 949 1291
## ---
## Maximum in each column: '*'
##
## Pearson's Chi-squared test:
## X-squared = 6, df = 1, p-value = 0.01
plot(model)

prediction <- predict(model, data)
eval_model(prediction, data)
##
## Confusion matrix (absolute):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 505 444 949
## TRUE 155 187 342
## Sum 660 631 1291
##
## Confusion matrix (relative):
## Actual
## Prediction FALSE TRUE Sum
## FALSE 0.39 0.34 0.74
## TRUE 0.12 0.14 0.26
## Sum 0.51 0.49 1.00
##
## Accuracy:
## 0.54 (692/1291)
##
## Error rate:
## 0.46 (599/1291)
##
## Error rate reduction (vs. base rate):
## 0.051 (p-value = 0.04)