KTKN3

reticulate::repl_python()

## Python 3.12.9 (C:/Users/ADMIN/anaconda3/python.exe)
## Reticulate 1.40.0 REPL -- A Python interpreter in R.
## Enter 'exit' or 'quit' to exit the REPL and return to R.

## exit

Sys.setenv(RETICULATE_PYTHON = "C:/Users/ADMIN/anaconda3/python.exe")
reticulate::repl_python()

## Python 3.12.9 (C:/Users/ADMIN/anaconda3/python.exe)
## Reticulate 1.40.0 REPL -- A Python interpreter in R.
## Enter 'exit' or 'quit' to exit the REPL and return to R.

## exit

import pandas as pd

iris_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
iris_columns = ["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species"]

# Đọc dữ liệu trực tiếp từ web và lưu lại
iris = pd.read_csv(iris_url, names=iris_columns)
iris.to_csv("iris.csv", index=False)  # Lưu vào file "iris.csv"

print("Tải xong iris.csv")

## Tải xong iris.csv

import os
print("Thư mục làm việc:", os.getcwd())

## Thư mục làm việc: C:\Users\ADMIN\Documents

print("Các tệp trong thư mục:", os.listdir())

## Các tệp trong thư mục: ['.RData', '.Rhistory', '.virtualenvs', 'anaconda_projects', 'baiKTKN3.Rmd', 'bth2.html', 'bth2.Rmd', 'desktop.ini', 'framingham.csv', 'iris.csv', 'My Music', 'My Pictures', 'My Videos', 'Mẫu Office Tùy chỉnh', 'rsconnect', 'Zalo Received Files']

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, roc_curve, auc

# Đường dẫn dữ liệu
data_path = "C:/Users/ADMIN/Documents"

iris_file = os.path.join(data_path, "iris.csv")
heart_file = os.path.join(data_path, "framingham.csv")

# Đọc dữ liệu
iris = pd.read_csv(iris_file)
heart = pd.read_csv(heart_file)

# Hồi quy tuyến tính trên tập Iris
X_iris = iris[['Sepal.Length', 'Sepal.Width']]
y_iris = iris['Petal.Length']

X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

model_lr = LinearRegression()
model_lr.fit(X_train, y_train)

LinearRegression()

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

y_pred = model_lr.predict(X_test)

r2 = r2_score(y_test, y_pred)
n, p = X_test.shape
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

print(f"Hồi quy tuyến tính:")

## Hồi quy tuyến tính:

print(f"   R²: {r2:.4f}")

##    R²: 0.8932

print(f"   Adjusted R²: {adj_r2:.4f}")

##    Adjusted R²: 0.8853

# Hồi quy logistic trên tập Heart
heart.dropna(inplace=True)

X_heart = heart.drop(columns=['TenYearCHD'])
y_heart = heart['TenYearCHD']

scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X_heart, y_heart, test_size=0.2, random_state=42)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model_logistic = LogisticRegression(max_iter=5000)
model_logistic.fit(X_train, y_train)

LogisticRegression(max_iter=5000)

y_probs = model_logistic.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_probs)
roc_auc = auc(fpr, tpr)

print(f"\nHồi quy Logistic:")

## 
## Hồi quy Logistic:

print(f"   AUC Score: {roc_auc:.4f}")

##    AUC Score: 0.6994

plt.figure(figsize=(8, 6))

## <Figure size 800x600 with 0 Axes>

plt.plot(fpr, tpr, color='blue', label=f'ROC curve (AUC = {roc_auc:.2f})')

## [<matplotlib.lines.Line2D object at 0x0000013647510410>]

plt.plot([0, 1], [0, 1], color='gray', linestyle='--')

## [<matplotlib.lines.Line2D object at 0x000001364771B2C0>]

plt.xlabel('False Positive Rate')

## Text(0.5, 0, 'False Positive Rate')

plt.ylabel('True Positive Rate')

## Text(0, 0.5, 'True Positive Rate')

plt.title('ROC Curve for Heart Disease Prediction')

## Text(0.5, 1.0, 'ROC Curve for Heart Disease Prediction')

plt.legend()

## <matplotlib.legend.Legend object at 0x0000013647740830>

plt.show()

KTKN3

Đinh Xuân Lực

2025-03-13