Carga libreria

# Cargar las librerías necesarias
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(caret)
## Loading required package: lattice
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

Se urtiliza DATOS IRIS

 # Cargar el conjunto de datos iris directamente
data(iris)

# Seleccionar registros de 'setosa' y 'versicolor' para el análisis
iris_data <- iris[iris$Species %in% c("setosa", "versicolor"), ]

# Convertir Species a variable binaria (versicolor como 1, setosa como 0)
iris_data$SpeciesBinary <- as.numeric(iris_data$Species == "versicolor")

# Eliminar la columna Species
iris_data <- select(iris_data, -Species)

2. Análisis exploratorio

summary(iris_data)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width    SpeciesBinary
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100   Min.   :0.0  
##  1st Qu.:5.000   1st Qu.:2.800   1st Qu.:1.500   1st Qu.:0.200   1st Qu.:0.0  
##  Median :5.400   Median :3.050   Median :2.450   Median :0.800   Median :0.5  
##  Mean   :5.471   Mean   :3.099   Mean   :2.861   Mean   :0.786   Mean   :0.5  
##  3rd Qu.:5.900   3rd Qu.:3.400   3rd Qu.:4.325   3rd Qu.:1.300   3rd Qu.:1.0  
##  Max.   :7.000   Max.   :4.400   Max.   :5.100   Max.   :1.800   Max.   :1.0
ggplot(iris_data, aes(x = Sepal.Length, fill = factor(SpeciesBinary))) + geom_histogram(binwidth = 0.1)

ggplot(iris_data, aes(x = Sepal.Width, fill = factor(SpeciesBinary))) + geom_histogram(binwidth = 0.1)

3. Identificar el modelo

# Ajustar el modelo de regresión logística directamente
model <- glm(SpeciesBinary ~ ., data = iris_data, family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

4. Validar el modelo

# Utilizando el resumen del modelo para la validación
summary(model)
## 
## Call:
## glm(formula = SpeciesBinary ~ ., family = "binomial", data = iris_data)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)       6.556 601950.324       0        1
## Sepal.Length     -9.879 194223.245       0        1
## Sepal.Width      -7.418  92924.451       0        1
## Petal.Length     19.054 144515.981       0        1
## Petal.Width      25.033 216058.936       0        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3863e+02  on 99  degrees of freedom
## Residual deviance: 1.3166e-09  on 95  degrees of freedom
## AIC: 10
## 
## Number of Fisher Scoring iterations: 25

5. Predicción para un nuevo individuo

new_data <- data.frame(Sepal.Length = 5.0, Sepal.Width = 3.0, Petal.Length = 1.5, Petal.Width = 0.2)
predicted_probability <- predict(model, newdata = new_data, type = "response")

# Mostrar la probabilidad predicha
print(predicted_probability)
##            1 
## 2.220446e-16
# Interpretar el modelo
print(summary(model))
## 
## Call:
## glm(formula = SpeciesBinary ~ ., family = "binomial", data = iris_data)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)       6.556 601950.324       0        1
## Sepal.Length     -9.879 194223.245       0        1
## Sepal.Width      -7.418  92924.451       0        1
## Petal.Length     19.054 144515.981       0        1
## Petal.Width      25.033 216058.936       0        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3863e+02  on 99  degrees of freedom
## Residual deviance: 1.3166e-09  on 95  degrees of freedom
## AIC: 10
## 
## Number of Fisher Scoring iterations: 25