#Bài 1
library(datasets)
data(iris)
model <- lm(Petal.Width ~ Petal.Length, data = iris)
summary(model)
##
## Call:
## lm(formula = Petal.Width ~ Petal.Length, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56515 -0.12358 -0.01898 0.13288 0.64272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.363076 0.039762 -9.131 4.7e-16 ***
## Petal.Length 0.415755 0.009582 43.387 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2065 on 148 degrees of freedom
## Multiple R-squared: 0.9271, Adjusted R-squared: 0.9266
## F-statistic: 1882 on 1 and 148 DF, p-value: < 2.2e-16
predictions <- predict(model, iris)
mae <- mean(abs(predictions - iris$Petal.Width))
print(paste("MAE:", round(mae, 4)))
## [1] "MAE: 0.1565"
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)
df = iris.frame
X = df[['petal length (cm)']].values
y = df['petal width (cm)'].values
model = LinearRegression()
model.fit(X, y)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
y_pred = model.predict(X)
mae = mean_absolute_error(y, y_pred)
r2 = model.score(X, y)
print("MAE:",round(mae,4))
## MAE: 0.1565
print("Estimate:",round(model.coef_[0],4))
## Estimate: 0.4158
print("Intercept:",round(model.intercept_,4))
## Intercept: -0.3631
print("R2:",round(r2,4))
## R2: 0.9271
#Bài 2
library(ggplot2)
library(lattice)
library(kmed)
library(caret)
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
data("heart")
head(heart)
## age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal
## 1 63 TRUE 1 145 233 TRUE 2 150 FALSE 2.3 3 0 6
## 2 67 TRUE 4 160 286 FALSE 2 108 TRUE 1.5 2 3 3
## 3 67 TRUE 4 120 229 FALSE 2 129 TRUE 2.6 2 2 7
## 4 37 TRUE 3 130 250 FALSE 0 187 FALSE 3.5 3 0 3
## 5 41 FALSE 2 130 204 FALSE 2 172 FALSE 1.4 1 0 3
## 6 56 TRUE 2 120 236 FALSE 0 178 FALSE 0.8 1 0 3
## class
## 1 0
## 2 2
## 3 1
## 4 0
## 5 0
## 6 0
str(heart)
## 'data.frame': 297 obs. of 14 variables:
## $ age : num 63 67 67 37 41 56 62 57 63 53 ...
## $ sex : logi TRUE TRUE TRUE TRUE FALSE TRUE ...
## $ cp : Factor w/ 4 levels "1","2","3","4": 1 4 4 3 2 2 4 4 4 4 ...
## $ trestbps: num 145 160 120 130 130 120 140 120 130 140 ...
## $ chol : num 233 286 229 250 204 236 268 354 254 203 ...
## $ fbs : logi TRUE FALSE FALSE FALSE FALSE FALSE ...
## $ restecg : Factor w/ 3 levels "0","1","2": 3 3 3 1 3 1 3 1 3 3 ...
## $ thalach : num 150 108 129 187 172 178 160 163 147 155 ...
## $ exang : logi FALSE TRUE TRUE FALSE FALSE FALSE ...
## $ oldpeak : num 2.3 1.5 2.6 3.5 1.4 0.8 3.6 0.6 1.4 3.1 ...
## $ slope : Factor w/ 3 levels "1","2","3": 3 2 2 3 1 1 3 1 2 3 ...
## $ ca : num 0 3 2 0 0 0 2 0 1 0 ...
## $ thal : Factor w/ 3 levels "3","6","7": 2 1 3 1 1 1 1 1 3 3 ...
## $ class : int 0 2 1 0 0 0 3 0 2 1 ...
## - attr(*, "na.action")= 'omit' Named int [1:6] 88 167 193 267 288 303
## ..- attr(*, "names")= chr [1:6] "88" "167" "193" "267" ...
heart$class <- ifelse(heart$class == 0, "healthy", "sick")
heart$class <- as.factor(heart$class)
table(heart$class)
##
## healthy sick
## 160 137
str(heart$class)
## Factor w/ 2 levels "healthy","sick": 1 2 2 1 1 1 2 1 2 2 ...
set.seed(123)
train_index <- createDataPartition(heart$class, p = 0.7, list = FALSE)
train_data <- heart[train_index, ]
test_data <- heart[-train_index, ]
model <- glm(class ~ ., data = train_data, family = binomial)
summary(model)
##
## Call:
## glm(formula = class ~ ., family = binomial, data = train_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.246560 3.289284 -2.203 0.02759 *
## age -0.028331 0.028546 -0.992 0.32097
## sexTRUE 1.514205 0.625478 2.421 0.01548 *
## cp2 1.208035 1.013398 1.192 0.23324
## cp3 0.883923 0.873644 1.012 0.31165
## cp4 2.582665 0.878841 2.939 0.00330 **
## trestbps 0.025075 0.013350 1.878 0.06035 .
## chol 0.003019 0.005130 0.588 0.55623
## fbsTRUE -0.571879 0.715197 -0.800 0.42394
## restecg1 0.482948 2.782116 0.174 0.86219
## restecg2 0.174436 0.467335 0.373 0.70896
## thalach -0.004318 0.012512 -0.345 0.73001
## exangTRUE 0.534539 0.517421 1.033 0.30156
## oldpeak 0.408027 0.266236 1.533 0.12538
## slope2 1.457107 0.588026 2.478 0.01321 *
## slope3 0.753410 1.028507 0.733 0.46385
## ca 1.146075 0.310219 3.694 0.00022 ***
## thal6 0.009722 0.959764 0.010 0.99192
## thal7 1.026444 0.512197 2.004 0.04507 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 287.12 on 207 degrees of freedom
## Residual deviance: 139.44 on 189 degrees of freedom
## AIC: 177.44
##
## Number of Fisher Scoring iterations: 6
pred_prob <- predict(model, test_data, type = "response")
pred_class <- ifelse(pred_prob > 0.5, "sick", "healthy")
pred_class <- factor(pred_class, levels = levels(test_data$class))
conf_matrix <- confusionMatrix(pred_class, test_data$class, positive = "sick")
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction healthy sick
## healthy 45 9
## sick 3 32
##
## Accuracy : 0.8652
## 95% CI : (0.7763, 0.9283)
## No Information Rate : 0.5393
## P-Value [Acc > NIR] : 5.93e-11
##
## Kappa : 0.7257
##
## Mcnemar's Test P-Value : 0.1489
##
## Sensitivity : 0.7805
## Specificity : 0.9375
## Pos Pred Value : 0.9143
## Neg Pred Value : 0.8333
## Prevalence : 0.4607
## Detection Rate : 0.3596
## Detection Prevalence : 0.3933
## Balanced Accuracy : 0.8590
##
## 'Positive' Class : sick
##
accuracy <- conf_matrix$overall["Accuracy"]
precision <- conf_matrix$byClass["Precision"]
recall <- conf_matrix$byClass["Recall"]
cat("📌 Accuracy:", accuracy, "\n")
## 📌 Accuracy: 0.8651685
cat("📌 Precision:", precision, "\n")
## 📌 Precision: 0.9142857
cat("📌 Recall:", recall, "\n")
## 📌 Recall: 0.7804878