1 Regresión Logística en Python ejemplo básico 1.

library(reticulate)

1.1 Importando paquetes

#IMPORT PACKAGES
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import matplotlib.pyplot as plt

1.2 Cargando la base de datos.

#LOAD DATA
url = "https://raw.githubusercontent.com/Statology/Python-Guides/main/default.csv"
data=pd.read_csv(url)
data.head(10)
##    default  student      balance        income
## 0        0        0   729.526495  44361.625074
## 1        0        1   817.180407  12106.134700
## 2        0        0  1073.549164  31767.138947
## 3        0        0   529.250605  35704.493935
## 4        0        0   785.655883  38463.495879
## 5        0        1   919.588530   7491.558572
## 6        0        0   825.513331  24905.226578
## 7        0        1   808.667504  17600.451344
## 8        0        0  1161.057854  37468.529288
## 9        0        0     0.000000  29275.268293
#view first six rows of dataset
data[0:6]
##    default  student      balance        income
## 0        0        0   729.526495  44361.625074
## 1        0        1   817.180407  12106.134700
## 2        0        0  1073.549164  31767.138947
## 3        0        0   529.250605  35704.493935
## 4        0        0   785.655883  38463.495879
## 5        0        1   919.588530   7491.558572
#find total observations in dataset
len(data.index)
## 10000

1.3 Estimando el modelo de Regresión Logística.

#FIT LOGISTIC REGRESSION MODEL
X = data[['student', 'balance','income']]
y = data['default']
#split the dataset into training (70%) and testing (30%) sets
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)
#instantiate the model
log_regression = LogisticRegression()

#fit the model using the training data
log_regression.fit(X_train,y_train)
## LogisticRegression()

1.4 Usando el modelo para hacer predicciones del testing.

#use model to make predictions on test data
y_pred = log_regression.predict(X_test)
y_pred
## array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

1.5 Diagnósticos del Modelo.

#MODEL DIAGNOSTICS
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix
## array([[2870,   17],
##        [  93,   20]], dtype=int64)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
## Accuracy: 0.9633333333333334
#plot ROC curve
#y_pred_proba = log_regression.predict_proba(X_test)[::,1]
#fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
#auc = metrics.roc_auc_score(y_test, y_pred_proba)
#plt.plot(fpr,tpr,label="AUC="+str(auc))
#plt.legend(loc=4)
#plt.show()
#use model to make predictions on test data
pred = log_regression.predict([[0,729.526495,44361.625074]])
pred
## array([0], dtype=int64)