# Cargar los paquetes necesarios
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.1
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Warning: package 'car' was built under R version 4.4.1
## Cargando paquete requerido: carData
## Warning: package 'carData' was built under R version 4.4.1
##
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.1
##
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# Cargar los datos desde el escritorio
data <- read.csv("C:/Users/lucas otero/Desktop/Pacientes2.csv")
# Mostrar las primeras filas del dataframe
head(data)
## NOEXPED Enfermedad HIPERTEN HIPERGLU HDLBAJA HIPERTRI CINTALTA EDAD GENERO
## 1 1 NO 0 0 1 0 0 38 0
## 2 2 SI 0 1 1 1 1 49 1
## 3 3 SI 1 1 0 1 1 59 1
## 4 4 NO 1 1 0 0 0 44 0
## 5 5 NO 0 0 1 0 0 42 0
## 6 6 NO 0 0 1 0 1 38 0
## FUMA ALCOHOL POLIURIA
## 1 0 0 0
## 2 0 0 0
## 3 0 0 1
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
# Resumen de los datos
summary(data)
## NOEXPED Enfermedad HIPERTEN HIPERGLU
## Min. : 1.0 Length:985 Min. :0.0000 Min. :0.0000
## 1st Qu.:249.0 Class :character 1st Qu.:0.0000 1st Qu.:0.0000
## Median :496.0 Mode :character Median :0.0000 Median :0.0000
## Mean :496.4 Mean :0.2102 Mean :0.2579
## 3rd Qu.:744.0 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :993.0 Max. :1.0000 Max. :1.0000
## HDLBAJA HIPERTRI CINTALTA EDAD
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :14.00
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:38.00
## Median :1.0000 Median :0.0000 Median :1.0000 Median :45.00
## Mean :0.5635 Mean :0.3086 Mean :0.5157 Mean :43.39
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:50.00
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :79.00
## GENERO FUMA ALCOHOL POLIURIA
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.3442 Mean :0.2345 Mean :0.2416 Mean :0.03655
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
# Gráfico de dispersión
plot(data$EDAD, data$HDLBAJA, main="Gráfico de dispersión de Edad vs HDL Bajo",
xlab="Edad", ylab="HDL Bajo", pch=19)

# Ajustar el modelo de regresión lineal
modelo_simple <- lm(HDLBAJA ~ EDAD, data=data)
# Resumen del modelo
summary(modelo_simple)
##
## Call:
## lm(formula = HDLBAJA ~ EDAD, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6005 -0.5620 0.4234 0.4359 0.4640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.518258 0.058482 8.862 <2e-16 ***
## EDAD 0.001041 0.001297 0.803 0.422
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4963 on 983 degrees of freedom
## Multiple R-squared: 0.000655, Adjusted R-squared: -0.0003616
## F-statistic: 0.6443 on 1 and 983 DF, p-value: 0.4223
# ANOVA del modelo
anova(modelo_simple)
## Analysis of Variance Table
##
## Response: HDLBAJA
## Df Sum Sq Mean Sq F value Pr(>F)
## EDAD 1 0.159 0.15870 0.6443 0.4223
## Residuals 983 242.126 0.24631
residuos_simple <- residuals(modelo_simple)
# Gráficos de diagnóstico
par(mfrow=c(2,2))
plot(modelo_simple)

# Gráfico de los residuos
plot(data$EDAD, residuos_simple, main="Residuos vs Edad",
xlab="Edad", ylab="Residuos", pch=19)
abline(h=0, col="red")
# Ajustar el modelo de regresión lineal múltiple
modelo_multiple <- lm(HDLBAJA ~ HIPERTEN + HIPERGLU + HIPERTRI + CINTALTA + EDAD + GENERO + FUMA + ALCOHOL + POLIURIA, data=data)
# Resumen del modelo
summary(modelo_multiple)
##
## Call:
## lm(formula = HDLBAJA ~ HIPERTEN + HIPERGLU + HIPERTRI + CINTALTA +
## EDAD + GENERO + FUMA + ALCOHOL + POLIURIA, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8293 -0.5046 0.2474 0.4296 0.7273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.549683 0.060807 9.040 < 2e-16 ***
## HIPERTEN 0.017581 0.041036 0.428 0.66843
## HIPERGLU 0.001014 0.037316 0.027 0.97832
## HIPERTRI 0.131745 0.035294 3.733 0.00020 ***
## CINTALTA 0.132630 0.033218 3.993 7.02e-05 ***
## EDAD -0.001254 0.001397 -0.898 0.36958
## GENERO -0.096057 0.034274 -2.803 0.00517 **
## FUMA 0.036418 0.037180 0.980 0.32757
## ALCOHOL -0.094451 0.037383 -2.527 0.01168 *
## POLIURIA 0.068162 0.082730 0.824 0.41019
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4844 on 975 degrees of freedom
## Multiple R-squared: 0.05568, Adjusted R-squared: 0.04696
## F-statistic: 6.388 on 9 and 975 DF, p-value: 7.65e-09
# ANOVA del modelo
anova(modelo_multiple)
## Analysis of Variance Table
##
## Response: HDLBAJA
## Df Sum Sq Mean Sq F value Pr(>F)
## HIPERTEN 1 0.428 0.4280 1.8240 0.1771492
## HIPERGLU 1 0.283 0.2826 1.2041 0.2727740
## HIPERTRI 1 3.501 3.5010 14.9193 0.0001196 ***
## CINTALTA 1 4.808 4.8081 20.4898 6.735e-06 ***
## EDAD 1 0.168 0.1676 0.7144 0.3982060
## GENERO 1 2.551 2.5505 10.8691 0.0010131 **
## FUMA 1 0.110 0.1100 0.4690 0.4936315
## ALCOHOL 1 1.484 1.4835 6.3219 0.0120855 *
## POLIURIA 1 0.159 0.1593 0.6788 0.4101920
## Residuals 975 228.794 0.2347
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residuales del modelo
residuos_multiple <- residuals(modelo_multiple)
# Gráficos de diagnóstico
par(mfrow=c(2,2))

plot(modelo_multiple)

# Gráfico de los residuos
plot(data$HDLBAJA, residuos_multiple, main="Residuos vs HDL Bajo",
xlab="HDL Bajo", ylab="Residuos", pch=19)
abline(h=0, col="red")
# Método Backward
modelo_backward <- step(modelo_multiple, direction="backward")
## Start: AIC=-1417.92
## HDLBAJA ~ HIPERTEN + HIPERGLU + HIPERTRI + CINTALTA + EDAD +
## GENERO + FUMA + ALCOHOL + POLIURIA
##
## Df Sum of Sq RSS AIC
## - HIPERGLU 1 0.0002 228.79 -1419.9
## - HIPERTEN 1 0.0431 228.84 -1419.7
## - POLIURIA 1 0.1593 228.95 -1419.2
## - EDAD 1 0.1891 228.98 -1419.1
## - FUMA 1 0.2251 229.02 -1419.0
## <none> 228.79 -1417.9
## - ALCOHOL 1 1.4980 230.29 -1413.5
## - GENERO 1 1.8432 230.64 -1412.0
## - HIPERTRI 1 3.2696 232.06 -1406.0
## - CINTALTA 1 3.7409 232.53 -1404.0
##
## Step: AIC=-1419.92
## HDLBAJA ~ HIPERTEN + HIPERTRI + CINTALTA + EDAD + GENERO + FUMA +
## ALCOHOL + POLIURIA
##
## Df Sum of Sq RSS AIC
## - HIPERTEN 1 0.0439 228.84 -1421.7
## - POLIURIA 1 0.1602 228.95 -1421.2
## - EDAD 1 0.1922 228.99 -1421.1
## - FUMA 1 0.2250 229.02 -1421.0
## <none> 228.79 -1419.9
## - ALCOHOL 1 1.5036 230.30 -1415.5
## - GENERO 1 1.8456 230.64 -1414.0
## - HIPERTRI 1 3.2814 232.07 -1407.9
## - CINTALTA 1 3.8276 232.62 -1405.6
##
## Step: AIC=-1421.73
## HDLBAJA ~ HIPERTRI + CINTALTA + EDAD + GENERO + FUMA + ALCOHOL +
## POLIURIA
##
## Df Sum of Sq RSS AIC
## - EDAD 1 0.1559 228.99 -1423.1
## - POLIURIA 1 0.1628 229.00 -1423.0
## - FUMA 1 0.2192 229.06 -1422.8
## <none> 228.84 -1421.7
## - ALCOHOL 1 1.5086 230.35 -1417.3
## - GENERO 1 1.8102 230.65 -1416.0
## - HIPERTRI 1 3.3070 232.15 -1409.6
## - CINTALTA 1 4.0502 232.89 -1406.5
##
## Step: AIC=-1423.06
## HDLBAJA ~ HIPERTRI + CINTALTA + GENERO + FUMA + ALCOHOL + POLIURIA
##
## Df Sum of Sq RSS AIC
## - POLIURIA 1 0.1380 229.13 -1424.5
## - FUMA 1 0.2421 229.24 -1424.0
## <none> 228.99 -1423.1
## - ALCOHOL 1 1.5490 230.54 -1418.4
## - GENERO 1 1.7916 230.78 -1417.4
## - HIPERTRI 1 3.1801 232.17 -1411.5
## - CINTALTA 1 3.8948 232.89 -1408.5
##
## Step: AIC=-1424.47
## HDLBAJA ~ HIPERTRI + CINTALTA + GENERO + FUMA + ALCOHOL
##
## Df Sum of Sq RSS AIC
## - FUMA 1 0.2430 229.38 -1425.4
## <none> 229.13 -1424.5
## - ALCOHOL 1 1.5347 230.67 -1419.9
## - GENERO 1 1.7923 230.92 -1418.8
## - HIPERTRI 1 3.2188 232.35 -1412.7
## - CINTALTA 1 3.9281 233.06 -1409.7
##
## Step: AIC=-1425.43
## HDLBAJA ~ HIPERTRI + CINTALTA + GENERO + ALCOHOL
##
## Df Sum of Sq RSS AIC
## <none> 229.38 -1425.4
## - ALCOHOL 1 1.4094 230.78 -1421.4
## - GENERO 1 1.7050 231.08 -1420.1
## - HIPERTRI 1 3.3628 232.74 -1413.1
## - CINTALTA 1 3.9023 233.28 -1410.8
# Resumen del modelo Backward
summary(modelo_backward)
##
## Call:
## lm(formula = HDLBAJA ~ HIPERTRI + CINTALTA + GENERO + ALCOHOL,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7709 -0.5094 0.2291 0.4503 0.6730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.50938 0.02687 18.958 < 2e-16 ***
## HIPERTRI 0.13194 0.03481 3.790 0.00016 ***
## CINTALTA 0.12955 0.03173 4.083 4.8e-05 ***
## GENERO -0.09166 0.03396 -2.699 0.00708 **
## ALCOHOL -0.09072 0.03697 -2.454 0.01430 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4838 on 980 degrees of freedom
## Multiple R-squared: 0.05328, Adjusted R-squared: 0.04942
## F-statistic: 13.79 on 4 and 980 DF, p-value: 6.039e-11
# Método Forward
modelo_forward <- step(lm(HDLBAJA ~ 1, data=data), direction="forward",
scope=~ HIPERTEN + HIPERGLU + HIPERTRI + CINTALTA + EDAD + GENERO + FUMA + ALCOHOL + POLIURIA)
## Start: AIC=-1379.49
## HDLBAJA ~ 1
##
## Df Sum of Sq RSS AIC
## + CINTALTA 1 6.7556 235.53 -1405.3
## + HIPERTRI 1 3.9219 238.36 -1393.6
## + ALCOHOL 1 2.4670 239.82 -1387.6
## + GENERO 1 1.9855 240.30 -1385.6
## <none> 242.28 -1379.5
## + HIPERTEN 1 0.4280 241.86 -1379.2
## + HIPERGLU 1 0.4186 241.87 -1379.2
## + POLIURIA 1 0.2126 242.07 -1378.4
## + EDAD 1 0.1587 242.13 -1378.1
## + FUMA 1 0.0835 242.20 -1377.8
##
## Step: AIC=-1405.35
## HDLBAJA ~ CINTALTA
##
## Df Sum of Sq RSS AIC
## + HIPERTRI 1 2.26105 233.27 -1412.8
## + ALCOHOL 1 1.87264 233.66 -1411.2
## + GENERO 1 1.50099 234.03 -1409.6
## <none> 235.53 -1405.3
## + POLIURIA 1 0.15428 235.37 -1404.0
## + FUMA 1 0.10763 235.42 -1403.8
## + EDAD 1 0.04697 235.48 -1403.5
## + HIPERTEN 1 0.00687 235.52 -1403.4
## + HIPERGLU 1 0.00667 235.52 -1403.4
##
## Step: AIC=-1412.85
## HDLBAJA ~ CINTALTA + HIPERTRI
##
## Df Sum of Sq RSS AIC
## + GENERO 1 2.48363 230.78 -1421.4
## + ALCOHOL 1 2.18811 231.08 -1420.1
## <none> 233.27 -1412.8
## + EDAD 1 0.16315 233.10 -1411.5
## + POLIURIA 1 0.12173 233.15 -1411.4
## + FUMA 1 0.03372 233.23 -1411.0
## + HIPERGLU 1 0.00212 233.27 -1410.9
## + HIPERTEN 1 0.00139 233.27 -1410.8
##
## Step: AIC=-1421.39
## HDLBAJA ~ CINTALTA + HIPERTRI + GENERO
##
## Df Sum of Sq RSS AIC
## + ALCOHOL 1 1.40945 229.38 -1425.4
## <none> 230.78 -1421.4
## + EDAD 1 0.18436 230.60 -1420.2
## + POLIURIA 1 0.12482 230.66 -1419.9
## + FUMA 1 0.11779 230.67 -1419.9
## + HIPERTEN 1 0.00675 230.78 -1419.4
## + HIPERGLU 1 0.00017 230.78 -1419.4
##
## Step: AIC=-1425.43
## HDLBAJA ~ CINTALTA + HIPERTRI + GENERO + ALCOHOL
##
## Df Sum of Sq RSS AIC
## <none> 229.38 -1425.4
## + FUMA 1 0.243023 229.13 -1424.5
## + EDAD 1 0.152205 229.22 -1424.1
## + POLIURIA 1 0.138942 229.24 -1424.0
## + HIPERTEN 1 0.006195 229.37 -1423.5
## + HIPERGLU 1 0.002083 229.37 -1423.4
# Resumen del modelo Forward
summary(modelo_forward)
##
## Call:
## lm(formula = HDLBAJA ~ CINTALTA + HIPERTRI + GENERO + ALCOHOL,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7709 -0.5094 0.2291 0.4503 0.6730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.50938 0.02687 18.958 < 2e-16 ***
## CINTALTA 0.12955 0.03173 4.083 4.8e-05 ***
## HIPERTRI 0.13194 0.03481 3.790 0.00016 ***
## GENERO -0.09166 0.03396 -2.699 0.00708 **
## ALCOHOL -0.09072 0.03697 -2.454 0.01430 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4838 on 980 degrees of freedom
## Multiple R-squared: 0.05328, Adjusted R-squared: 0.04942
## F-statistic: 13.79 on 4 and 980 DF, p-value: 6.039e-11
