library(irr)
## Cargando paquete requerido: lpSolve
library(expss)
## Cargando paquete requerido: maditr
##
## To select columns from data: columns(mtcars, mpg, vs:carb)
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.4.3
## Registered S3 methods overwritten by 'Hmisc':
## method from
## [.labelled expss
## print.labelled expss
## as.data.frame.labelled expss
##
## Adjuntando el paquete: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.4.3
## Cargando paquete requerido: xts
## Warning: package 'xts' was built under R version 4.4.3
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Adjuntando el paquete: 'xts'
## The following objects are masked from 'package:maditr':
##
## first, last
##
## Adjuntando el paquete: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(readxl)
#sad <- read_excel("C:/Users/Administrador/Downloads/sad.xlsx",
# sheet = "R cuestionarios")
#View(sad)
#carga de datos
library(readxl)
sad <- read_excel("C:/Users/Administrador/Downloads/TESIC.xlsx")
## New names:
## • `` -> `...44`
View(sad)
attach(sad)
#ahora sumemos los puntos de las preguntas
#los valores 5 a 15 son las columnas correspondientes a las preguntas 1 a 10
View(sad[ , 25:34])
sad$puntaje1 <- rowSums(sad[ , 25:34])
#Buscamos proporción de datos faltantes (Missingvalues)---- NO SE DETECTAN DATOS FALTANTES
#is.na(sad[ , 25:34])
#which(is.na(sad[ , 25:34]))
#pasar variables categóricas a factor
names(sad)<- tolower(names (sad))
datos1=sad[ , 25:34]
sad <- sad %>%
mutate(across(19:35, as.factor))
#crear cicat_factor
sad$cicat_factor <- factor(sad$cicat, levels = c(0, 1), labels = c( "No Cicatrizo", "Cicatrizo"))
table(sad$cicat_factor)
##
## No Cicatrizo Cicatrizo
## 183 223
#paso variables chr a factor
sad <- sad %>% mutate_if(is.character, as.factor)
#crear ampu_factor
sad$ampu_factor <- factor(sad$ampu, levels = c(0, 1), labels = c( "No Amputado", "Amputado"))
table(sad$ampu_factor)
##
## No Amputado Amputado
## 349 57
#crear fallecio_factor
sad$fallecio_factor <- factor(sad$fallecio, levels = c(0, 1), labels = c( "No Fallecido", "Fallecido"))
table(sad$fallecio_factor)
##
## No Fallecido Fallecido
## 392 14
#variable unificada
sad <- sad %>%
mutate(
estado = tolower(trimws(estado))
)
table(sad$estado)
##
## ampu cerro fallecio persiste
## 57 223 14 112
sad <- sad %>%
mutate(
estado_unif = case_when(
estado %in% c("ampu") ~ "amputacion",
estado %in% c("cerro") ~ "cerro_herida",
estado %in% c("murio", "fallecio") ~ "fallecio",
estado %in% c("persiste") ~ "persiste",
estado %in% c("perdido") ~ "perdido_seguimiento",
TRUE ~ NA_character_
)
)
table(sad$estado_unif)
##
## amputacion cerro_herida fallecio persiste
## 57 223 14 112
sad <- sad %>%
mutate(
evento_ampu = case_when(
estado_unif %in% c("cerro_herida","fallecio") ~ 2,
estado_unif %in% c("amputacion") ~ 1,
estado_unif %in% c("perdido_seguimiento", "persiste") ~ 0,
TRUE ~ NA_real_
)
)
table(sad$evento_ampu)
##
## 0 1 2
## 112 57 237
tiempo_evento_am <- Surv(sad$dias_out,sad$ampu)
#Distribución de frecuencia de las respuestas----
fre(c(sad[ , 25:34]))
| Count | Valid percent | Percent | Responses, % | Cumulative responses, % | |
|---|---|---|---|---|---|
| 1 | 196 | 48.3 | 48.3 | 48.3 | 48.3 |
| 2 | 134 | 33.0 | 33.0 | 33.0 | 81.3 |
| 3 | 76 | 18.7 | 18.7 | 18.7 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 1 | 152 | 37.4 | 37.4 | 37.4 | 37.4 |
| 2 | 104 | 25.6 | 25.6 | 25.6 | 63.1 |
| 3 | 150 | 36.9 | 36.9 | 36.9 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 1 | 242 | 59.6 | 59.6 | 59.6 | 59.6 |
| 2 | 90 | 22.2 | 22.2 | 22.2 | 81.8 |
| 3 | 74 | 18.2 | 18.2 | 18.2 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 0 | 206 | 50.7 | 50.7 | 50.7 | 50.7 |
| 1 | 31 | 7.6 | 7.6 | 7.6 | 58.4 |
| 2 | 47 | 11.6 | 11.6 | 11.6 | 70.0 |
| 3 | 122 | 30.0 | 30.0 | 30.0 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 0 | 86 | 21.2 | 21.2 | 21.2 | 21.2 |
| 1 | 32 | 7.9 | 7.9 | 7.9 | 29.1 |
| 2 | 223 | 54.9 | 54.9 | 54.9 | 84.0 |
| 3 | 65 | 16.0 | 16.0 | 16.0 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 0 | 143 | 35.2 | 35.2 | 35.2 | 35.2 |
| 1 | 98 | 24.1 | 24.1 | 24.1 | 59.4 |
| 2 | 124 | 30.5 | 30.5 | 30.5 | 89.9 |
| 3 | 41 | 10.1 | 10.1 | 10.1 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 0 | 4 | 1.0 | 1.0 | 1.0 | 1.0 |
| 1 | 25 | 6.2 | 6.2 | 6.2 | 7.1 |
| 2 | 341 | 84.0 | 84.0 | 84.0 | 91.1 |
| 3 | 36 | 8.9 | 8.9 | 8.9 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 1 | 32 | 7.9 | 7.9 | 7.9 | 7.9 |
| 2 | 133 | 32.8 | 32.8 | 32.8 | 40.6 |
| 3 | 241 | 59.4 | 59.4 | 59.4 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 1 | 188 | 46.3 | 46.3 | 46.3 | 46.3 |
| 2 | 157 | 38.7 | 38.7 | 38.7 | 85.0 |
| 3 | 61 | 15.0 | 15.0 | 15.0 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 | |||
| 1 | 19 | 4.7 | 4.7 | 4.7 | 4.7 |
| 2 | 51 | 12.6 | 12.6 | 12.6 | 17.2 |
| 3 | 336 | 82.8 | 82.8 | 82.8 | 100.0 |
| #Total | 406 | 100 | 100 | 100 | |
| <NA> | 0 | 0.0 |
hist.data.frame(sad[ , 25:34])
#Confiabilidad I: Analisis de consistencia interna
datos<-sad[ , 25:34]
colnames(datos)<- c("LOCAL","TOPO","ZON","ISQ","INF","EDE","NEU","PROF","AREA","FASE")
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
vars <-datos[, c("LOCAL","TOPO","ZON","ISQ","INF","EDE","NEU","PROF","AREA","FASE")]
poly <- polychoric(vars)
## Converted non-numeric input to numeric
R_poly <- poly$rho
colnames(R_poly) <- colnames(vars)
rownames(R_poly) <- colnames(vars)
corrplot(
R_poly,
method = "ellipse",
type = "lower",
addCoef.col = "black",
number.digits = 2, # 👈 redondeo
tl.col = "black",
tl.srt = 45, # 👈 rotación de etiquetas
diag = FALSE
)
fa(R_poly, nfactors = 3, fm = "minres", rotate = "promax")
## Loading required namespace: GPArotation
## Factor Analysis using method = minres
## Call: fa(r = R_poly, nfactors = 3, rotate = "promax", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR3 MR2 h2 u2 com
## LOCAL -0.29 0.29 0.09 0.13 0.870 2.2
## TOPO 0.18 0.65 -0.26 0.58 0.422 1.5
## ZON -0.02 0.94 -0.11 0.86 0.140 1.0
## ISQ 0.05 0.18 -0.51 0.30 0.704 1.3
## INF 0.78 0.18 0.18 0.74 0.265 1.2
## EDE 0.20 0.15 0.41 0.24 0.760 1.8
## NEU 0.00 -0.07 0.81 0.65 0.351 1.0
## PROF 0.97 -0.01 0.07 0.92 0.076 1.0
## AREA 0.20 0.80 0.07 0.79 0.212 1.1
## FASE 0.81 0.19 -0.13 0.83 0.169 1.2
##
## MR1 MR3 MR2
## SS loadings 2.55 2.29 1.19
## Proportion Var 0.25 0.23 0.12
## Cumulative Var 0.25 0.48 0.60
## Proportion Explained 0.42 0.38 0.20
## Cumulative Proportion 0.42 0.80 1.00
##
## With factor correlations of
## MR1 MR3 MR2
## MR1 1.00 0.34 -0.12
## MR3 0.34 1.00 0.09
## MR2 -0.12 0.09 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 45 with the objective function = 5.74
## df of the model are 18 and the objective function was 0.64
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.07
##
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1 MR3 MR2
## Correlation of (regression) scores with factors 0.98 0.96 0.87
## Multiple R square of scores with factors 0.96 0.92 0.75
## Minimum correlation of possible factor scores 0.91 0.84 0.50
fa(R_poly, nfactors = 4, fm = "minres", rotate = "promax")
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Factor Analysis using method = minres
## Call: fa(r = R_poly, nfactors = 4, rotate = "promax", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR3 MR2 MR4 h2 u2 com
## LOCAL -0.03 0.09 0.05 0.66 0.45 0.54938 1.1
## TOPO -0.09 0.87 -0.19 -0.19 0.75 0.25447 1.2
## ZON -0.08 0.93 -0.03 0.16 0.81 0.19245 1.1
## ISQ 0.07 0.17 -0.50 -0.02 0.30 0.70019 1.3
## INF 0.83 0.04 0.13 0.02 0.74 0.25856 1.1
## EDE 0.04 0.26 0.47 -0.14 0.32 0.67694 1.8
## NEU 0.02 -0.10 0.74 0.13 0.59 0.41225 1.1
## PROF 0.86 -0.03 0.03 -0.26 0.88 0.12300 1.2
## AREA 0.23 0.70 0.08 0.27 0.79 0.21106 1.5
## FASE 1.05 -0.08 -0.24 0.13 1.00 -0.00061 1.1
##
## MR1 MR3 MR2 MR4
## SS loadings 2.57 2.20 1.15 0.69
## Proportion Var 0.26 0.22 0.12 0.07
## Cumulative Var 0.26 0.48 0.59 0.66
## Proportion Explained 0.39 0.33 0.17 0.10
## Cumulative Proportion 0.39 0.72 0.90 1.00
##
## With factor correlations of
## MR1 MR3 MR2 MR4
## MR1 1.00 0.60 0.03 -0.21
## MR3 0.60 1.00 0.00 -0.01
## MR2 0.03 0.00 1.00 0.04
## MR4 -0.21 -0.01 0.04 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 4 factors are sufficient.
##
## df null model = 45 with the objective function = 5.74
## df of the model are 11 and the objective function was 0.12
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR3 MR2 MR4
## Correlation of (regression) scores with factors 0.99 0.95 0.87 0.85
## Multiple R square of scores with factors 0.98 0.91 0.76 0.72
## Minimum correlation of possible factor scores 0.97 0.82 0.52 0.43
0.7 → muy bien
0.4–0.7 → aceptable < 0.3 → problemático
🔀 com = complejidad
👉 cuántos factores explican ese ítem
📌 ideal ≈ 1
Ejemplo:
ZON → com = 1.03 👉 carga en UN solo factor → perfecto ✔️ LOCAL → com = 2.19 👉 carga en varios factores → problema ⚠️ Variables fuertes (muy buenas) PROF (h2 0.92) FASE (0.83) ZON (0.86) AREA (0.79) INF (0.73)
👉 sostienen bien la estructura
⚠️ Variables intermedias NEU (0.65) TOPO (0.57)
👉 aceptables
❌ Variables problemáticas LOCAL (h2 0.13, com 2.19) EDE (h2 0.24) ISQ (h2 0.29)
h2 → qué tan bien el modelo explica cada variable u2 → lo que queda sin explicar com → si el ítem pertenece a un solo factor o a varios Un factor representa:
una dimensión latente que explica por qué ciertas variables se correlacionan entre sí.
👉 Entonces:
las variables de un mismo factor tienden a variar juntas pero eso no siempre significa que “aumentan todas en el mismo sentido” Las variables de un mismo factor están asociadas entre sí, ya sea de manera directa o inversa, porque reflejan una misma dimensión subyacente Factor isquémico (tu MR2) NEU ↑ ISQ ↓ (carga negativa)
👉 esto te dice algo interesante:
💥 ese factor no es “puro” 👉 es una dimensión donde:
neuropatía y isquemia no van necesariamente juntas
👉 esto es clínicamente coherente
fa.parallel(datos, fa = "fa", cor = "poly")
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## An ultra-Heywood case was detected. Examine the results carefully
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## An ultra-Heywood case was detected. Examine the results carefully
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Converted non-numeric input to numeric
## Converted non-numeric input to numeric
## Parallel analysis suggests that the number of factors = 4 and the number of components = NA
Fa2<-fa(datos1, nfactors = 2,rotate = "promax" , fm ="mle" , cor = "poly" , correct = 0)
fa.diagram(Fa2)
fa.plot(Fa2, cut = 0.3, labels = names(datos1))
Fa3<-fa(datos1, nfactors = 3,rotate = "promax" , fm ="mle" , cor = "poly" , correct = 0)
fa.diagram(Fa3)
fa.plot(Fa3, cut = 0.3, labels = names(datos1))
Fa4<-fa(datos1, nfactors = 4,rotate = "promax" , fm ="mle" , cor = "poly" , correct = 0)
fa.diagram(Fa4)
fa.plot(Fa4, cut = 0.3, labels = names(datos1))
library(tableone)
#Las variables van directamente sin el nombre del dataframe
catvars = c( "sexo","tbq", "iam", "hta", "dial", "local","topo", "inf", "ede", "neu", "prof", "area", "zon", "fase")
#continuas
vars = c("dias_ev","san_elian", "sexo","tbq", "iam", "hta", "dial", "local","topo", "inf", "ede", "neu", "prof", "area", "zon", "fase" )
tabla_1 <- CreateTableOne(vars = vars, factorVars = catvars, data = sad)
print(tabla_1)
##
## Overall
## n 406
## dias_ev (mean (SD)) 50.26 (112.85)
## san_elian (mean (SD)) 18.26 (4.09)
## sexo = M (%) 324 (79.8)
## tbq (%)
## 0 169 (41.8)
## 1 80 (19.8)
## 2 155 (38.4)
## iam = 1 (%) 83 (20.5)
## hta = 1 (%) 273 (67.4)
## dial = 1 (%) 19 ( 4.7)
## local (%)
## 1 196 (48.3)
## 2 134 (33.0)
## 3 76 (18.7)
## topo (%)
## 1 152 (37.4)
## 2 104 (25.6)
## 3 150 (36.9)
## inf (%)
## 0 86 (21.2)
## 1 32 ( 7.9)
## 2 223 (54.9)
## 3 65 (16.0)
## ede (%)
## 0 143 (35.2)
## 1 98 (24.1)
## 2 124 (30.5)
## 3 41 (10.1)
## neu (%)
## 0 4 ( 1.0)
## 1 25 ( 6.2)
## 2 341 (84.0)
## 3 36 ( 8.9)
## prof (%)
## 1 32 ( 7.9)
## 2 133 (32.8)
## 3 241 (59.4)
## area (%)
## 1 188 (46.3)
## 2 157 (38.7)
## 3 61 (15.0)
## zon (%)
## 1 242 (59.6)
## 2 90 (22.2)
## 3 74 (18.2)
## fase (%)
## 1 19 ( 4.7)
## 2 51 (12.6)
## 3 336 (82.8)
kableone(tabla_1)
| Overall | |
|---|---|
| n | 406 |
| dias_ev (mean (SD)) | 50.26 (112.85) |
| san_elian (mean (SD)) | 18.26 (4.09) |
| sexo = M (%) | 324 (79.8) |
| tbq (%) | |
| 0 | 169 (41.8) |
| 1 | 80 (19.8) |
| 2 | 155 (38.4) |
| iam = 1 (%) | 83 (20.5) |
| hta = 1 (%) | 273 (67.4) |
| dial = 1 (%) | 19 ( 4.7) |
| local (%) | |
| 1 | 196 (48.3) |
| 2 | 134 (33.0) |
| 3 | 76 (18.7) |
| topo (%) | |
| 1 | 152 (37.4) |
| 2 | 104 (25.6) |
| 3 | 150 (36.9) |
| inf (%) | |
| 0 | 86 (21.2) |
| 1 | 32 ( 7.9) |
| 2 | 223 (54.9) |
| 3 | 65 (16.0) |
| ede (%) | |
| 0 | 143 (35.2) |
| 1 | 98 (24.1) |
| 2 | 124 (30.5) |
| 3 | 41 (10.1) |
| neu (%) | |
| 0 | 4 ( 1.0) |
| 1 | 25 ( 6.2) |
| 2 | 341 (84.0) |
| 3 | 36 ( 8.9) |
| prof (%) | |
| 1 | 32 ( 7.9) |
| 2 | 133 (32.8) |
| 3 | 241 (59.4) |
| area (%) | |
| 1 | 188 (46.3) |
| 2 | 157 (38.7) |
| 3 | 61 (15.0) |
| zon (%) | |
| 1 | 242 (59.6) |
| 2 | 90 (22.2) |
| 3 | 74 (18.2) |
| fase (%) | |
| 1 | 19 ( 4.7) |
| 2 | 51 (12.6) |
| 3 | 336 (82.8) |
#variables categorizadas
#Las variables van directamente sin el nombre del dataframe
#ROC
#install.packages("timeROC")
library(timeROC)
## Warning: package 'timeROC' was built under R version 4.4.3
table(sad$evento_ampu, sad$dias_out <= 90)
##
## FALSE TRUE
## 0 66 46
## 1 7 50
## 2 104 133
roc_90 <- timeROC(
T = sad$dias_out,
delta = sad$evento_ampu,
marker = sad$san_elian, # 👈 ACÁ está San Elián
cause = 1,
times = 90,
iid = TRUE
)
table(sad$evento_ampu)
##
## 0 1 2
## 112 57 237
roc_90$AUC
## NULL
plot(roc_90, time = 90)
roc_120 <- timeROC(
T = sad$dias_out,
delta = sad$evento_ampu,
marker = sad$san_elian,
cause = 1,
times = 120,
iid = TRUE
)
roc_120$AUC
## NULL
plot(roc_120, time = 120)
roc_180 <- timeROC(
T = sad$dias_out,
delta = sad$evento_ampu,
marker = sad$san_elian,
cause = 1,
times = 180,
iid = TRUE
)
roc_180$AUC
## NULL
plot(roc_180, time = 180)
sens <- roc_90$TP[,1] # sensibilidad
spec <- 1 - roc_90$FP[,1] # especificidad
cutoffs <- roc_90$cutoffs
youden <- sens + spec - 1
best <- which.max(youden)
#La capacidad discriminativa del puntaje de San Elián para predecir amputación mayor a 90 días se evaluó mediante curvas ROC dependientes del tiempo, considerando la presencia de eventos competitivos. El área bajo la curva fue de 0.829.