import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,precision_score
from sklearn.metrics import confusion_matrixCLASE 9. Regresión Logística Binaria Múltiple en Python
APLICACIÓN GML
Importando librerias
NOTA. No olvide subir a su colab la base de datos utilizada “heart\_failure\_clinical\_records\_dataset.csv” o leerla directamente desde el software.
base="heart_failure_clinical_records_dataset.csv"
datos=pd.read_csv(base)
datos.head()| age | anaemia | creatinine_phosphokinase | diabetes | ejection_fraction | high_blood_pressure | platelets | serum_creatinine | serum_sodium | sex | smoking | time | DEATH_EVENT | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 75.0 | 0 | 582 | 0 | 20 | 1 | 265000.00 | 1.9 | 130 | 1 | 0 | 4 | 1 |
| 1 | 55.0 | 0 | 7861 | 0 | 38 | 0 | 263358.03 | 1.1 | 136 | 1 | 0 | 6 | 1 |
| 2 | 65.0 | 0 | 146 | 0 | 20 | 0 | 162000.00 | 1.3 | 129 | 1 | 1 | 7 | 1 |
| 3 | 50.0 | 1 | 111 | 0 | 20 | 0 | 210000.00 | 1.9 | 137 | 1 | 0 | 7 | 1 |
| 4 | 65.0 | 1 | 160 | 1 | 20 | 0 | 327000.00 | 2.7 | 116 | 0 | 0 | 8 | 1 |
heart_failure=pd.read_csv('heart_failure_clinical_records_dataset.csv')
heart_failure.head(15)| age | anaemia | creatinine_phosphokinase | diabetes | ejection_fraction | high_blood_pressure | platelets | serum_creatinine | serum_sodium | sex | smoking | time | DEATH_EVENT | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 75.0 | 0 | 582 | 0 | 20 | 1 | 265000.00 | 1.9 | 130 | 1 | 0 | 4 | 1 |
| 1 | 55.0 | 0 | 7861 | 0 | 38 | 0 | 263358.03 | 1.1 | 136 | 1 | 0 | 6 | 1 |
| 2 | 65.0 | 0 | 146 | 0 | 20 | 0 | 162000.00 | 1.3 | 129 | 1 | 1 | 7 | 1 |
| 3 | 50.0 | 1 | 111 | 0 | 20 | 0 | 210000.00 | 1.9 | 137 | 1 | 0 | 7 | 1 |
| 4 | 65.0 | 1 | 160 | 1 | 20 | 0 | 327000.00 | 2.7 | 116 | 0 | 0 | 8 | 1 |
| 5 | 90.0 | 1 | 47 | 0 | 40 | 1 | 204000.00 | 2.1 | 132 | 1 | 1 | 8 | 1 |
| 6 | 75.0 | 1 | 246 | 0 | 15 | 0 | 127000.00 | 1.2 | 137 | 1 | 0 | 10 | 1 |
| 7 | 60.0 | 1 | 315 | 1 | 60 | 0 | 454000.00 | 1.1 | 131 | 1 | 1 | 10 | 1 |
| 8 | 65.0 | 0 | 157 | 0 | 65 | 0 | 263358.03 | 1.5 | 138 | 0 | 0 | 10 | 1 |
| 9 | 80.0 | 1 | 123 | 0 | 35 | 1 | 388000.00 | 9.4 | 133 | 1 | 1 | 10 | 1 |
| 10 | 75.0 | 1 | 81 | 0 | 38 | 1 | 368000.00 | 4.0 | 131 | 1 | 1 | 10 | 1 |
| 11 | 62.0 | 0 | 231 | 0 | 25 | 1 | 253000.00 | 0.9 | 140 | 1 | 1 | 10 | 1 |
| 12 | 45.0 | 1 | 981 | 0 | 30 | 0 | 136000.00 | 1.1 | 137 | 1 | 0 | 11 | 1 |
| 13 | 50.0 | 1 | 168 | 0 | 38 | 1 | 276000.00 | 1.1 | 137 | 1 | 0 | 11 | 1 |
| 14 | 49.0 | 1 | 80 | 0 | 30 | 1 | 427000.00 | 1.0 | 138 | 0 | 0 | 12 | 0 |
heart_failure.dtypesage float64
anaemia int64
creatinine_phosphokinase int64
diabetes int64
ejection_fraction int64
high_blood_pressure int64
platelets float64
serum_creatinine float64
serum_sodium int64
sex int64
smoking int64
time int64
DEATH_EVENT int64
dtype: object
heart_failure.shape(299, 13)
heart_failure.columnsIndex(['age', 'anaemia', 'creatinine_phosphokinase', 'diabetes',
'ejection_fraction', 'high_blood_pressure', 'platelets',
'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time',
'DEATH_EVENT'],
dtype='object')
cols=['age', 'creatinine_phosphokinase',
'ejection_fraction', 'platelets',
'serum_creatinine', 'serum_sodium' ,'time']
heart_failure[cols].corr()| age | creatinine_phosphokinase | ejection_fraction | platelets | serum_creatinine | serum_sodium | time | |
|---|---|---|---|---|---|---|---|
| age | 1.000000 | -0.081584 | 0.060098 | -0.052354 | 0.159187 | -0.045966 | -0.224068 |
| creatinine_phosphokinase | -0.081584 | 1.000000 | -0.044080 | 0.024463 | -0.016408 | 0.059550 | -0.009346 |
| ejection_fraction | 0.060098 | -0.044080 | 1.000000 | 0.072177 | -0.011302 | 0.175902 | 0.041729 |
| platelets | -0.052354 | 0.024463 | 0.072177 | 1.000000 | -0.041198 | 0.062125 | 0.010514 |
| serum_creatinine | 0.159187 | -0.016408 | -0.011302 | -0.041198 | 1.000000 | -0.189095 | -0.149315 |
| serum_sodium | -0.045966 | 0.059550 | 0.175902 | 0.062125 | -0.189095 | 1.000000 | 0.087640 |
| time | -0.224068 | -0.009346 | 0.041729 | 0.010514 | -0.149315 | 0.087640 | 1.000000 |
from sklearn.model_selection import train_test_split
X=heart_failure.drop(columns='DEATH_EVENT')
y=heart_failure.DEATH_EVENTX.head()| age | anaemia | creatinine_phosphokinase | diabetes | ejection_fraction | high_blood_pressure | platelets | serum_creatinine | serum_sodium | sex | smoking | time | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 75.0 | 0 | 582 | 0 | 20 | 1 | 265000.00 | 1.9 | 130 | 1 | 0 | 4 |
| 1 | 55.0 | 0 | 7861 | 0 | 38 | 0 | 263358.03 | 1.1 | 136 | 1 | 0 | 6 |
| 2 | 65.0 | 0 | 146 | 0 | 20 | 0 | 162000.00 | 1.3 | 129 | 1 | 1 | 7 |
| 3 | 50.0 | 1 | 111 | 0 | 20 | 0 | 210000.00 | 1.9 | 137 | 1 | 0 | 7 |
| 4 | 65.0 | 1 | 160 | 1 | 20 | 0 | 327000.00 | 2.7 | 116 | 0 | 0 | 8 |
y.head()0 1
1 1
2 1
3 1
4 1
Name: DEATH_EVENT, dtype: int64
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0,test_size=0.3)model=LogisticRegression()
mod=model.fit(X_train,y_train)C:\Users\Usuario\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
mod.coef_array([[ 6.35894825e-02, 3.87147883e-04, 3.10777030e-04,
7.28439217e-04, -8.22673452e-02, -6.62636905e-04,
-8.04069151e-07, 5.61481692e-03, 3.37743333e-03,
-1.03768123e-03, -2.88449178e-04, -1.94393137e-02]])
mod.intercept_array([0.00020332])
X.head(1)| age | anaemia | creatinine_phosphokinase | diabetes | ejection_fraction | high_blood_pressure | platelets | serum_creatinine | serum_sodium | sex | smoking | time | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 75.0 | 0 | 582 | 0 | 20 | 1 | 265000.0 | 1.9 | 130 | 1 | 0 | 4 |
y.head(1)0 1
Name: DEATH_EVENT, dtype: int64
mod.predict([[75.0,0,582,0,20,1,265000.00,1.9,130,1,0,4]])C:\Users\Usuario\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py:493: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names
warnings.warn(
array([1])
predictions=model.predict(X_test)accuracyTrain=model.score(X_train,y_train)
print(accuracyTrain)0.84688995215311
accuracyTest=model.score(X_test,y_test)
print(accuracyTest)0.7888888888888889
from sklearn.model_selection import cross_val_score
scores=cross_val_score(model,X_train,y_train,cv=5).mean()
scoresC:\Users\Usuario\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
C:\Users\Usuario\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
C:\Users\Usuario\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
C:\Users\Usuario\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
C:\Users\Usuario\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
np.float64(0.8232288037166086)
cm=confusion_matrix(y_test,predictions) #Creando la matriz de confusión
cmarray([[58, 4],
[15, 13]])
CREAR EL MODELO CON STATSMODELS
import statsmodels.api as smX_m=sm.add_constant(X_train)
X_m.head()| const | age | anaemia | creatinine_phosphokinase | diabetes | ejection_fraction | high_blood_pressure | platelets | serum_creatinine | serum_sodium | sex | smoking | time | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 281 | 1.0 | 70.0 | 0 | 582 | 0 | 40 | 0 | 51000.0 | 2.7 | 136 | 1 | 1 | 250 |
| 44 | 1.0 | 60.0 | 1 | 588 | 1 | 60 | 0 | 194000.0 | 1.1 | 142 | 0 | 0 | 33 |
| 241 | 1.0 | 65.0 | 0 | 582 | 1 | 30 | 0 | 249000.0 | 1.3 | 136 | 1 | 1 | 212 |
| 189 | 1.0 | 40.0 | 0 | 244 | 0 | 45 | 1 | 275000.0 | 0.9 | 140 | 0 | 0 | 174 |
| 152 | 1.0 | 50.0 | 0 | 115 | 0 | 45 | 1 | 184000.0 | 0.9 | 134 | 1 | 1 | 118 |
logit=sm.Logit(y_train,X_m)
result=logit.fit()
result.summary()Optimization terminated successfully.
Current function value: 0.351414
Iterations 7
| Dep. Variable: | DEATH_EVENT | No. Observations: | 209 |
| Model: | Logit | Df Residuals: | 196 |
| Method: | MLE | Df Model: | 12 |
| Date: | Thu, 25 Jul 2024 | Pseudo R-squ.: | 0.4429 |
| Time: | 22:10:43 | Log-Likelihood: | -73.445 |
| converged: | True | LL-Null: | -131.85 |
| Covariance Type: | nonrobust | LLR p-value: | 2.679e-19 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
| const | 11.4142 | 6.722 | 1.698 | 0.089 | -1.760 | 24.588 |
| age | 0.0663 | 0.021 | 3.143 | 0.002 | 0.025 | 0.108 |
| anaemia | 0.0687 | 0.434 | 0.158 | 0.874 | -0.781 | 0.918 |
| creatinine_phosphokinase | 0.0004 | 0.000 | 1.563 | 0.118 | -0.000 | 0.001 |
| diabetes | 0.2499 | 0.436 | 0.573 | 0.567 | -0.605 | 1.105 |
| ejection_fraction | -0.0883 | 0.022 | -3.980 | 0.000 | -0.132 | -0.045 |
| high_blood_pressure | -0.3556 | 0.454 | -0.783 | 0.433 | -1.245 | 0.534 |
| platelets | -1.213e-06 | 2.23e-06 | -0.543 | 0.587 | -5.59e-06 | 3.17e-06 |
| serum_creatinine | 0.4064 | 0.265 | 1.535 | 0.125 | -0.113 | 0.925 |
| serum_sodium | -0.0792 | 0.047 | -1.702 | 0.089 | -0.170 | 0.012 |
| sex | -1.0197 | 0.508 | -2.009 | 0.045 | -2.015 | -0.025 |
| smoking | 0.2383 | 0.529 | 0.450 | 0.653 | -0.799 | 1.276 |
| time | -0.0206 | 0.004 | -5.554 | 0.000 | -0.028 | -0.013 |
cols=['anaemia','smoking']np.exp(-0.0206)np.float64(0.9796107305032505)
X_m=X_m.drop(columns=cols)X_m.head()| const | age | creatinine_phosphokinase | diabetes | ejection_fraction | high_blood_pressure | platelets | serum_creatinine | serum_sodium | sex | time | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 281 | 1.0 | 70.0 | 582 | 0 | 40 | 0 | 51000.0 | 2.7 | 136 | 1 | 250 |
| 44 | 1.0 | 60.0 | 588 | 1 | 60 | 0 | 194000.0 | 1.1 | 142 | 0 | 33 |
| 241 | 1.0 | 65.0 | 582 | 1 | 30 | 0 | 249000.0 | 1.3 | 136 | 1 | 212 |
| 189 | 1.0 | 40.0 | 244 | 0 | 45 | 1 | 275000.0 | 0.9 | 140 | 0 | 174 |
| 152 | 1.0 | 50.0 | 115 | 0 | 45 | 1 | 184000.0 | 0.9 | 134 | 1 | 118 |
logit2=sm.Logit(y_train,X_m)
result2=logit2.fit()
result2.summary()Optimization terminated successfully.
Current function value: 0.351924
Iterations 7
| Dep. Variable: | DEATH_EVENT | No. Observations: | 209 |
| Model: | Logit | Df Residuals: | 198 |
| Method: | MLE | Df Model: | 10 |
| Date: | Thu, 25 Jul 2024 | Pseudo R-squ.: | 0.4421 |
| Time: | 22:10:43 | Log-Likelihood: | -73.552 |
| converged: | True | LL-Null: | -131.85 |
| Covariance Type: | nonrobust | LLR p-value: | 2.488e-20 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
| const | 11.4122 | 6.723 | 1.697 | 0.090 | -1.765 | 24.589 |
| age | 0.0659 | 0.021 | 3.130 | 0.002 | 0.025 | 0.107 |
| creatinine_phosphokinase | 0.0004 | 0.000 | 1.515 | 0.130 | -0.000 | 0.001 |
| diabetes | 0.2374 | 0.436 | 0.544 | 0.586 | -0.617 | 1.092 |
| ejection_fraction | -0.0887 | 0.022 | -4.005 | 0.000 | -0.132 | -0.045 |
| high_blood_pressure | -0.3588 | 0.452 | -0.793 | 0.428 | -1.245 | 0.528 |
| platelets | -1.156e-06 | 2.23e-06 | -0.519 | 0.604 | -5.52e-06 | 3.21e-06 |
| serum_creatinine | 0.3924 | 0.262 | 1.497 | 0.134 | -0.121 | 0.906 |
| serum_sodium | -0.0783 | 0.046 | -1.686 | 0.092 | -0.169 | 0.013 |
| sex | -0.9332 | 0.457 | -2.044 | 0.041 | -1.828 | -0.038 |
| time | -0.0206 | 0.004 | -5.615 | 0.000 | -0.028 | -0.013 |
cols=['platelets','diabetes']X_m=X_m.drop(columns=cols)X_m.head()| const | age | creatinine_phosphokinase | ejection_fraction | high_blood_pressure | serum_creatinine | serum_sodium | sex | time | |
|---|---|---|---|---|---|---|---|---|---|
| 281 | 1.0 | 70.0 | 582 | 40 | 0 | 2.7 | 136 | 1 | 250 |
| 44 | 1.0 | 60.0 | 588 | 60 | 0 | 1.1 | 142 | 0 | 33 |
| 241 | 1.0 | 65.0 | 582 | 30 | 0 | 1.3 | 136 | 1 | 212 |
| 189 | 1.0 | 40.0 | 244 | 45 | 1 | 0.9 | 140 | 0 | 174 |
| 152 | 1.0 | 50.0 | 115 | 45 | 1 | 0.9 | 134 | 1 | 118 |
logit3=sm.Logit(y_train,X_m)
resul3=logit3.fit()
resul3.summary()Optimization terminated successfully.
Current function value: 0.353362
Iterations 7
| Dep. Variable: | DEATH_EVENT | No. Observations: | 209 |
| Model: | Logit | Df Residuals: | 200 |
| Method: | MLE | Df Model: | 8 |
| Date: | Thu, 25 Jul 2024 | Pseudo R-squ.: | 0.4399 |
| Time: | 22:10:43 | Log-Likelihood: | -73.853 |
| converged: | True | LL-Null: | -131.85 |
| Covariance Type: | nonrobust | LLR p-value: | 2.231e-21 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
| const | 12.1281 | 6.492 | 1.868 | 0.062 | -0.595 | 24.852 |
| age | 0.0642 | 0.020 | 3.135 | 0.002 | 0.024 | 0.104 |
| creatinine_phosphokinase | 0.0004 | 0.000 | 1.523 | 0.128 | -0.000 | 0.001 |
| ejection_fraction | -0.0884 | 0.022 | -4.022 | 0.000 | -0.131 | -0.045 |
| high_blood_pressure | -0.3875 | 0.450 | -0.862 | 0.389 | -1.269 | 0.494 |
| serum_creatinine | 0.3859 | 0.260 | 1.485 | 0.137 | -0.123 | 0.895 |
| serum_sodium | -0.0842 | 0.045 | -1.854 | 0.064 | -0.173 | 0.005 |
| sex | -0.9296 | 0.448 | -2.074 | 0.038 | -1.808 | -0.051 |
| time | -0.0206 | 0.004 | -5.606 | 0.000 | -0.028 | -0.013 |
cols=['high_blood_pressure','creatinine_phosphokinase']
X_m=X_m.drop(columns=cols)
X_m.head()| const | age | ejection_fraction | serum_creatinine | serum_sodium | sex | time | |
|---|---|---|---|---|---|---|---|
| 281 | 1.0 | 70.0 | 40 | 2.7 | 136 | 1 | 250 |
| 44 | 1.0 | 60.0 | 60 | 1.1 | 142 | 0 | 33 |
| 241 | 1.0 | 65.0 | 30 | 1.3 | 136 | 1 | 212 |
| 189 | 1.0 | 40.0 | 45 | 0.9 | 140 | 0 | 174 |
| 152 | 1.0 | 50.0 | 45 | 0.9 | 134 | 1 | 118 |
logit4=sm.Logit(y_train,X_m)
resul4=logit4.fit()
resul4.summary()Optimization terminated successfully.
Current function value: 0.363403
Iterations 7
| Dep. Variable: | DEATH_EVENT | No. Observations: | 209 |
| Model: | Logit | Df Residuals: | 202 |
| Method: | MLE | Df Model: | 6 |
| Date: | Thu, 25 Jul 2024 | Pseudo R-squ.: | 0.4239 |
| Time: | 22:10:43 | Log-Likelihood: | -75.951 |
| converged: | True | LL-Null: | -131.85 |
| Covariance Type: | nonrobust | LLR p-value: | 8.597e-22 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
| const | 11.5137 | 6.391 | 1.802 | 0.072 | -1.012 | 24.039 |
| age | 0.0580 | 0.019 | 3.006 | 0.003 | 0.020 | 0.096 |
| ejection_fraction | -0.0869 | 0.022 | -4.023 | 0.000 | -0.129 | -0.045 |
| serum_creatinine | 0.3977 | 0.251 | 1.584 | 0.113 | -0.094 | 0.890 |
| serum_sodium | -0.0782 | 0.045 | -1.739 | 0.082 | -0.166 | 0.010 |
| sex | -0.7295 | 0.430 | -1.695 | 0.090 | -1.573 | 0.114 |
| time | -0.0198 | 0.003 | -5.663 | 0.000 | -0.027 | -0.013 |
cols=['serum_creatinine','sex']
X_m=X_m.drop(columns=cols)
X_m.head()| const | age | ejection_fraction | serum_sodium | time | |
|---|---|---|---|---|---|
| 281 | 1.0 | 70.0 | 40 | 136 | 250 |
| 44 | 1.0 | 60.0 | 60 | 142 | 33 |
| 241 | 1.0 | 65.0 | 30 | 136 | 212 |
| 189 | 1.0 | 40.0 | 45 | 140 | 174 |
| 152 | 1.0 | 50.0 | 45 | 134 | 118 |
logit5=sm.Logit(y_train,X_m)
resul5=logit5.fit()
resul5.summary()Optimization terminated successfully.
Current function value: 0.376528
Iterations 7
| Dep. Variable: | DEATH_EVENT | No. Observations: | 209 |
| Model: | Logit | Df Residuals: | 204 |
| Method: | MLE | Df Model: | 4 |
| Date: | Thu, 25 Jul 2024 | Pseudo R-squ.: | 0.4031 |
| Time: | 22:10:43 | Log-Likelihood: | -78.694 |
| converged: | True | LL-Null: | -131.85 |
| Covariance Type: | nonrobust | LLR p-value: | 4.467e-22 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
| const | 13.1838 | 6.067 | 2.173 | 0.030 | 1.293 | 25.074 |
| age | 0.0571 | 0.019 | 3.083 | 0.002 | 0.021 | 0.093 |
| ejection_fraction | -0.0847 | 0.021 | -4.029 | 0.000 | -0.126 | -0.043 |
| serum_sodium | -0.0894 | 0.043 | -2.075 | 0.038 | -0.174 | -0.005 |
| time | -0.0203 | 0.003 | -5.876 | 0.000 | -0.027 | -0.014 |
nd={
'const':[1.0],
'age':[50],
'ejection_fraction':[40],
'serum_sodium':[136],
'time':[100]
}
nd=pd.DataFrame(nd,index=['Gerson'])
resul5.predict(nd)Gerson 0.175449
dtype: float64
from sklearn.metrics import confusion_matrix,accuracy_scoreDesempeño del modelo
colu=['age','ejection_fraction','serum_sodium','time']
X_test_m=sm.add_constant(X_test[colu])
pred_train=resul5.predict(X_m)
pred_train.head()281 0.030590
44 0.136501
241 0.106965
189 0.012086
152 0.103600
dtype: float64
pred_train=pred_train.apply(lambda x:1 if x>0.5 else 0)
pred_train.head()281 0
44 0
241 0
189 0
152 0
dtype: int64
pred_test=resul5.predict(X_test_m)
pred_test.head()206 0.012945
188 0.082967
12 0.675642
219 0.040358
237 0.191790
dtype: float64
pred_test=pred_test.apply(lambda x:1 if x>0.5 else 0)
pred_test.head()206 0
188 0
12 1
219 0
237 0
dtype: int64
from sklearn.metrics import accuracy_score
accuracyTrain=accuracy_score(y_train,pred_train)
accuracyTrain0.8564593301435407
from sklearn.metrics import accuracy_score
accuracyTest=accuracy_score(y_test,pred_test)
accuracyTest0.8222222222222222
Forma alternativa. Regresión Logística en Python
X_m.columnsIndex(['const', 'age', 'ejection_fraction', 'serum_sodium', 'time'], dtype='object')
logit_glm=sm.GLM(y,X,family=sm.families.Binomial()).fit()
logit_glm.summary()| Dep. Variable: | DEATH_EVENT | No. Observations: | 299 |
| Model: | GLM | Df Residuals: | 287 |
| Model Family: | Binomial | Df Model: | 11 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -111.43 |
| Date: | Thu, 25 Jul 2024 | Deviance: | 222.85 |
| Time: | 22:10:43 | Pearson chi2: | 274. |
| No. Iterations: | 6 | Pseudo R-squ. (CS): | 0.3995 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
| age | 0.0523 | 0.015 | 3.387 | 0.001 | 0.022 | 0.083 |
| anaemia | -0.0374 | 0.357 | -0.105 | 0.916 | -0.737 | 0.662 |
| creatinine_phosphokinase | 0.0002 | 0.000 | 1.227 | 0.220 | -0.000 | 0.001 |
| diabetes | 0.2460 | 0.345 | 0.714 | 0.476 | -0.430 | 0.922 |
| ejection_fraction | -0.0757 | 0.016 | -4.724 | 0.000 | -0.107 | -0.044 |
| high_blood_pressure | -0.0773 | 0.355 | -0.218 | 0.828 | -0.774 | 0.619 |
| platelets | -1.109e-06 | 1.83e-06 | -0.606 | 0.544 | -4.69e-06 | 2.48e-06 |
| serum_creatinine | 0.7226 | 0.180 | 4.012 | 0.000 | 0.370 | 1.076 |
| serum_sodium | 0.0032 | 0.009 | 0.351 | 0.725 | -0.015 | 0.021 |
| sex | -0.4456 | 0.406 | -1.099 | 0.272 | -1.241 | 0.349 |
| smoking | -0.0163 | 0.409 | -0.040 | 0.968 | -0.817 | 0.785 |
| time | -0.0204 | 0.003 | -6.947 | 0.000 | -0.026 | -0.015 |