# Cargar los paquetes necesarios
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.1
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Warning: package 'car' was built under R version 4.4.1
## Cargando paquete requerido: carData
## Warning: package 'carData' was built under R version 4.4.1
## 
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.1
## 
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
# Cargar los datos desde el escritorio
data <- read.csv("C:/Users/lucas otero/Desktop/Pacientes2.csv")

# Mostrar las primeras filas del dataframe
head(data)
##   NOEXPED Enfermedad HIPERTEN HIPERGLU HDLBAJA HIPERTRI CINTALTA EDAD GENERO
## 1       1         NO        0        0       1        0        0   38      0
## 2       2         SI        0        1       1        1        1   49      1
## 3       3         SI        1        1       0        1        1   59      1
## 4       4         NO        1        1       0        0        0   44      0
## 5       5         NO        0        0       1        0        0   42      0
## 6       6         NO        0        0       1        0        1   38      0
##   FUMA ALCOHOL POLIURIA
## 1    0       0        0
## 2    0       0        0
## 3    0       0        1
## 4    0       0        0
## 5    0       0        0
## 6    0       0        0
# Resumen de los datos
summary(data)
##     NOEXPED       Enfermedad           HIPERTEN         HIPERGLU     
##  Min.   :  1.0   Length:985         Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:249.0   Class :character   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :496.0   Mode  :character   Median :0.0000   Median :0.0000  
##  Mean   :496.4                      Mean   :0.2102   Mean   :0.2579  
##  3rd Qu.:744.0                      3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :993.0                      Max.   :1.0000   Max.   :1.0000  
##     HDLBAJA          HIPERTRI         CINTALTA           EDAD      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :14.00  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:38.00  
##  Median :1.0000   Median :0.0000   Median :1.0000   Median :45.00  
##  Mean   :0.5635   Mean   :0.3086   Mean   :0.5157   Mean   :43.39  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:50.00  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :79.00  
##      GENERO            FUMA           ALCOHOL          POLIURIA      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.3442   Mean   :0.2345   Mean   :0.2416   Mean   :0.03655  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000
# Gráfico de dispersión
plot(data$EDAD, data$HDLBAJA, main="Gráfico de dispersión de Edad vs HDL Bajo",
     xlab="Edad", ylab="HDL Bajo", pch=19)

# Ajustar el modelo de regresión lineal
modelo_simple <- lm(HDLBAJA ~ EDAD, data=data)

# Resumen del modelo
summary(modelo_simple)
## 
## Call:
## lm(formula = HDLBAJA ~ EDAD, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6005 -0.5620  0.4234  0.4359  0.4640 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.518258   0.058482   8.862   <2e-16 ***
## EDAD        0.001041   0.001297   0.803    0.422    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4963 on 983 degrees of freedom
## Multiple R-squared:  0.000655,   Adjusted R-squared:  -0.0003616 
## F-statistic: 0.6443 on 1 and 983 DF,  p-value: 0.4223
# ANOVA del modelo
anova(modelo_simple)
## Analysis of Variance Table
## 
## Response: HDLBAJA
##            Df  Sum Sq Mean Sq F value Pr(>F)
## EDAD        1   0.159 0.15870  0.6443 0.4223
## Residuals 983 242.126 0.24631
residuos_simple <- residuals(modelo_simple)

# Gráficos de diagnóstico
par(mfrow=c(2,2))
plot(modelo_simple)

# Gráfico de los residuos
plot(data$EDAD, residuos_simple, main="Residuos vs Edad",
     xlab="Edad", ylab="Residuos", pch=19)
abline(h=0, col="red")

# Ajustar el modelo de regresión lineal múltiple
modelo_multiple <- lm(HDLBAJA ~ HIPERTEN + HIPERGLU + HIPERTRI + CINTALTA + EDAD + GENERO + FUMA + ALCOHOL + POLIURIA, data=data)

# Resumen del modelo
summary(modelo_multiple)
## 
## Call:
## lm(formula = HDLBAJA ~ HIPERTEN + HIPERGLU + HIPERTRI + CINTALTA + 
##     EDAD + GENERO + FUMA + ALCOHOL + POLIURIA, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8293 -0.5046  0.2474  0.4296  0.7273 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.549683   0.060807   9.040  < 2e-16 ***
## HIPERTEN     0.017581   0.041036   0.428  0.66843    
## HIPERGLU     0.001014   0.037316   0.027  0.97832    
## HIPERTRI     0.131745   0.035294   3.733  0.00020 ***
## CINTALTA     0.132630   0.033218   3.993 7.02e-05 ***
## EDAD        -0.001254   0.001397  -0.898  0.36958    
## GENERO      -0.096057   0.034274  -2.803  0.00517 ** 
## FUMA         0.036418   0.037180   0.980  0.32757    
## ALCOHOL     -0.094451   0.037383  -2.527  0.01168 *  
## POLIURIA     0.068162   0.082730   0.824  0.41019    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4844 on 975 degrees of freedom
## Multiple R-squared:  0.05568,    Adjusted R-squared:  0.04696 
## F-statistic: 6.388 on 9 and 975 DF,  p-value: 7.65e-09
# ANOVA del modelo
anova(modelo_multiple)
## Analysis of Variance Table
## 
## Response: HDLBAJA
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## HIPERTEN    1   0.428  0.4280  1.8240 0.1771492    
## HIPERGLU    1   0.283  0.2826  1.2041 0.2727740    
## HIPERTRI    1   3.501  3.5010 14.9193 0.0001196 ***
## CINTALTA    1   4.808  4.8081 20.4898 6.735e-06 ***
## EDAD        1   0.168  0.1676  0.7144 0.3982060    
## GENERO      1   2.551  2.5505 10.8691 0.0010131 ** 
## FUMA        1   0.110  0.1100  0.4690 0.4936315    
## ALCOHOL     1   1.484  1.4835  6.3219 0.0120855 *  
## POLIURIA    1   0.159  0.1593  0.6788 0.4101920    
## Residuals 975 228.794  0.2347                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residuales del modelo
residuos_multiple <- residuals(modelo_multiple)

# Gráficos de diagnóstico
par(mfrow=c(2,2))

plot(modelo_multiple)

# Gráfico de los residuos
plot(data$HDLBAJA, residuos_multiple, main="Residuos vs HDL Bajo",
     xlab="HDL Bajo", ylab="Residuos", pch=19)
abline(h=0, col="red")

# Método Backward
modelo_backward <- step(modelo_multiple, direction="backward")
## Start:  AIC=-1417.92
## HDLBAJA ~ HIPERTEN + HIPERGLU + HIPERTRI + CINTALTA + EDAD + 
##     GENERO + FUMA + ALCOHOL + POLIURIA
## 
##            Df Sum of Sq    RSS     AIC
## - HIPERGLU  1    0.0002 228.79 -1419.9
## - HIPERTEN  1    0.0431 228.84 -1419.7
## - POLIURIA  1    0.1593 228.95 -1419.2
## - EDAD      1    0.1891 228.98 -1419.1
## - FUMA      1    0.2251 229.02 -1419.0
## <none>                  228.79 -1417.9
## - ALCOHOL   1    1.4980 230.29 -1413.5
## - GENERO    1    1.8432 230.64 -1412.0
## - HIPERTRI  1    3.2696 232.06 -1406.0
## - CINTALTA  1    3.7409 232.53 -1404.0
## 
## Step:  AIC=-1419.92
## HDLBAJA ~ HIPERTEN + HIPERTRI + CINTALTA + EDAD + GENERO + FUMA + 
##     ALCOHOL + POLIURIA
## 
##            Df Sum of Sq    RSS     AIC
## - HIPERTEN  1    0.0439 228.84 -1421.7
## - POLIURIA  1    0.1602 228.95 -1421.2
## - EDAD      1    0.1922 228.99 -1421.1
## - FUMA      1    0.2250 229.02 -1421.0
## <none>                  228.79 -1419.9
## - ALCOHOL   1    1.5036 230.30 -1415.5
## - GENERO    1    1.8456 230.64 -1414.0
## - HIPERTRI  1    3.2814 232.07 -1407.9
## - CINTALTA  1    3.8276 232.62 -1405.6
## 
## Step:  AIC=-1421.73
## HDLBAJA ~ HIPERTRI + CINTALTA + EDAD + GENERO + FUMA + ALCOHOL + 
##     POLIURIA
## 
##            Df Sum of Sq    RSS     AIC
## - EDAD      1    0.1559 228.99 -1423.1
## - POLIURIA  1    0.1628 229.00 -1423.0
## - FUMA      1    0.2192 229.06 -1422.8
## <none>                  228.84 -1421.7
## - ALCOHOL   1    1.5086 230.35 -1417.3
## - GENERO    1    1.8102 230.65 -1416.0
## - HIPERTRI  1    3.3070 232.15 -1409.6
## - CINTALTA  1    4.0502 232.89 -1406.5
## 
## Step:  AIC=-1423.06
## HDLBAJA ~ HIPERTRI + CINTALTA + GENERO + FUMA + ALCOHOL + POLIURIA
## 
##            Df Sum of Sq    RSS     AIC
## - POLIURIA  1    0.1380 229.13 -1424.5
## - FUMA      1    0.2421 229.24 -1424.0
## <none>                  228.99 -1423.1
## - ALCOHOL   1    1.5490 230.54 -1418.4
## - GENERO    1    1.7916 230.78 -1417.4
## - HIPERTRI  1    3.1801 232.17 -1411.5
## - CINTALTA  1    3.8948 232.89 -1408.5
## 
## Step:  AIC=-1424.47
## HDLBAJA ~ HIPERTRI + CINTALTA + GENERO + FUMA + ALCOHOL
## 
##            Df Sum of Sq    RSS     AIC
## - FUMA      1    0.2430 229.38 -1425.4
## <none>                  229.13 -1424.5
## - ALCOHOL   1    1.5347 230.67 -1419.9
## - GENERO    1    1.7923 230.92 -1418.8
## - HIPERTRI  1    3.2188 232.35 -1412.7
## - CINTALTA  1    3.9281 233.06 -1409.7
## 
## Step:  AIC=-1425.43
## HDLBAJA ~ HIPERTRI + CINTALTA + GENERO + ALCOHOL
## 
##            Df Sum of Sq    RSS     AIC
## <none>                  229.38 -1425.4
## - ALCOHOL   1    1.4094 230.78 -1421.4
## - GENERO    1    1.7050 231.08 -1420.1
## - HIPERTRI  1    3.3628 232.74 -1413.1
## - CINTALTA  1    3.9023 233.28 -1410.8
# Resumen del modelo Backward
summary(modelo_backward)
## 
## Call:
## lm(formula = HDLBAJA ~ HIPERTRI + CINTALTA + GENERO + ALCOHOL, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7709 -0.5094  0.2291  0.4503  0.6730 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.50938    0.02687  18.958  < 2e-16 ***
## HIPERTRI     0.13194    0.03481   3.790  0.00016 ***
## CINTALTA     0.12955    0.03173   4.083  4.8e-05 ***
## GENERO      -0.09166    0.03396  -2.699  0.00708 ** 
## ALCOHOL     -0.09072    0.03697  -2.454  0.01430 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4838 on 980 degrees of freedom
## Multiple R-squared:  0.05328,    Adjusted R-squared:  0.04942 
## F-statistic: 13.79 on 4 and 980 DF,  p-value: 6.039e-11
# Método Forward
modelo_forward <- step(lm(HDLBAJA ~ 1, data=data), direction="forward", 
                       scope=~ HIPERTEN + HIPERGLU + HIPERTRI + CINTALTA + EDAD + GENERO + FUMA + ALCOHOL + POLIURIA)
## Start:  AIC=-1379.49
## HDLBAJA ~ 1
## 
##            Df Sum of Sq    RSS     AIC
## + CINTALTA  1    6.7556 235.53 -1405.3
## + HIPERTRI  1    3.9219 238.36 -1393.6
## + ALCOHOL   1    2.4670 239.82 -1387.6
## + GENERO    1    1.9855 240.30 -1385.6
## <none>                  242.28 -1379.5
## + HIPERTEN  1    0.4280 241.86 -1379.2
## + HIPERGLU  1    0.4186 241.87 -1379.2
## + POLIURIA  1    0.2126 242.07 -1378.4
## + EDAD      1    0.1587 242.13 -1378.1
## + FUMA      1    0.0835 242.20 -1377.8
## 
## Step:  AIC=-1405.35
## HDLBAJA ~ CINTALTA
## 
##            Df Sum of Sq    RSS     AIC
## + HIPERTRI  1   2.26105 233.27 -1412.8
## + ALCOHOL   1   1.87264 233.66 -1411.2
## + GENERO    1   1.50099 234.03 -1409.6
## <none>                  235.53 -1405.3
## + POLIURIA  1   0.15428 235.37 -1404.0
## + FUMA      1   0.10763 235.42 -1403.8
## + EDAD      1   0.04697 235.48 -1403.5
## + HIPERTEN  1   0.00687 235.52 -1403.4
## + HIPERGLU  1   0.00667 235.52 -1403.4
## 
## Step:  AIC=-1412.85
## HDLBAJA ~ CINTALTA + HIPERTRI
## 
##            Df Sum of Sq    RSS     AIC
## + GENERO    1   2.48363 230.78 -1421.4
## + ALCOHOL   1   2.18811 231.08 -1420.1
## <none>                  233.27 -1412.8
## + EDAD      1   0.16315 233.10 -1411.5
## + POLIURIA  1   0.12173 233.15 -1411.4
## + FUMA      1   0.03372 233.23 -1411.0
## + HIPERGLU  1   0.00212 233.27 -1410.9
## + HIPERTEN  1   0.00139 233.27 -1410.8
## 
## Step:  AIC=-1421.39
## HDLBAJA ~ CINTALTA + HIPERTRI + GENERO
## 
##            Df Sum of Sq    RSS     AIC
## + ALCOHOL   1   1.40945 229.38 -1425.4
## <none>                  230.78 -1421.4
## + EDAD      1   0.18436 230.60 -1420.2
## + POLIURIA  1   0.12482 230.66 -1419.9
## + FUMA      1   0.11779 230.67 -1419.9
## + HIPERTEN  1   0.00675 230.78 -1419.4
## + HIPERGLU  1   0.00017 230.78 -1419.4
## 
## Step:  AIC=-1425.43
## HDLBAJA ~ CINTALTA + HIPERTRI + GENERO + ALCOHOL
## 
##            Df Sum of Sq    RSS     AIC
## <none>                  229.38 -1425.4
## + FUMA      1  0.243023 229.13 -1424.5
## + EDAD      1  0.152205 229.22 -1424.1
## + POLIURIA  1  0.138942 229.24 -1424.0
## + HIPERTEN  1  0.006195 229.37 -1423.5
## + HIPERGLU  1  0.002083 229.37 -1423.4
# Resumen del modelo Forward
summary(modelo_forward)
## 
## Call:
## lm(formula = HDLBAJA ~ CINTALTA + HIPERTRI + GENERO + ALCOHOL, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7709 -0.5094  0.2291  0.4503  0.6730 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.50938    0.02687  18.958  < 2e-16 ***
## CINTALTA     0.12955    0.03173   4.083  4.8e-05 ***
## HIPERTRI     0.13194    0.03481   3.790  0.00016 ***
## GENERO      -0.09166    0.03396  -2.699  0.00708 ** 
## ALCOHOL     -0.09072    0.03697  -2.454  0.01430 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4838 on 980 degrees of freedom
## Multiple R-squared:  0.05328,    Adjusted R-squared:  0.04942 
## F-statistic: 13.79 on 4 and 980 DF,  p-value: 6.039e-11