Carga libreria
# Cargar las librerías necesarias
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(caret)
## Loading required package: lattice
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
Se urtiliza DATOS IRIS
# Cargar el conjunto de datos iris directamente
data(iris)
# Seleccionar registros de 'setosa' y 'versicolor' para el análisis
iris_data <- iris[iris$Species %in% c("setosa", "versicolor"), ]
# Convertir Species a variable binaria (versicolor como 1, setosa como 0)
iris_data$SpeciesBinary <- as.numeric(iris_data$Species == "versicolor")
# Eliminar la columna Species
iris_data <- select(iris_data, -Species)
summary(iris_data)
## Sepal.Length Sepal.Width Petal.Length Petal.Width SpeciesBinary
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100 Min. :0.0
## 1st Qu.:5.000 1st Qu.:2.800 1st Qu.:1.500 1st Qu.:0.200 1st Qu.:0.0
## Median :5.400 Median :3.050 Median :2.450 Median :0.800 Median :0.5
## Mean :5.471 Mean :3.099 Mean :2.861 Mean :0.786 Mean :0.5
## 3rd Qu.:5.900 3rd Qu.:3.400 3rd Qu.:4.325 3rd Qu.:1.300 3rd Qu.:1.0
## Max. :7.000 Max. :4.400 Max. :5.100 Max. :1.800 Max. :1.0
ggplot(iris_data, aes(x = Sepal.Length, fill = factor(SpeciesBinary))) + geom_histogram(binwidth = 0.1)
ggplot(iris_data, aes(x = Sepal.Width, fill = factor(SpeciesBinary))) + geom_histogram(binwidth = 0.1)
# Ajustar el modelo de regresión logística directamente
model <- glm(SpeciesBinary ~ ., data = iris_data, family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Utilizando el resumen del modelo para la validación
summary(model)
##
## Call:
## glm(formula = SpeciesBinary ~ ., family = "binomial", data = iris_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.556 601950.324 0 1
## Sepal.Length -9.879 194223.245 0 1
## Sepal.Width -7.418 92924.451 0 1
## Petal.Length 19.054 144515.981 0 1
## Petal.Width 25.033 216058.936 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.3863e+02 on 99 degrees of freedom
## Residual deviance: 1.3166e-09 on 95 degrees of freedom
## AIC: 10
##
## Number of Fisher Scoring iterations: 25
new_data <- data.frame(Sepal.Length = 5.0, Sepal.Width = 3.0, Petal.Length = 1.5, Petal.Width = 0.2)
predicted_probability <- predict(model, newdata = new_data, type = "response")
# Mostrar la probabilidad predicha
print(predicted_probability)
## 1
## 2.220446e-16
# Interpretar el modelo
print(summary(model))
##
## Call:
## glm(formula = SpeciesBinary ~ ., family = "binomial", data = iris_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.556 601950.324 0 1
## Sepal.Length -9.879 194223.245 0 1
## Sepal.Width -7.418 92924.451 0 1
## Petal.Length 19.054 144515.981 0 1
## Petal.Width 25.033 216058.936 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.3863e+02 on 99 degrees of freedom
## Residual deviance: 1.3166e-09 on 95 degrees of freedom
## AIC: 10
##
## Number of Fisher Scoring iterations: 25