test 3

#Bài 1

library(datasets)

data(iris)
model <- lm(Petal.Width ~ Petal.Length, data = iris)
summary(model)

## 
## Call:
## lm(formula = Petal.Width ~ Petal.Length, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56515 -0.12358 -0.01898  0.13288  0.64272 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.363076   0.039762  -9.131  4.7e-16 ***
## Petal.Length  0.415755   0.009582  43.387  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2065 on 148 degrees of freedom
## Multiple R-squared:  0.9271, Adjusted R-squared:  0.9266 
## F-statistic:  1882 on 1 and 148 DF,  p-value: < 2.2e-16

predictions <- predict(model, iris)
mae <- mean(abs(predictions - iris$Petal.Width))
print(paste("MAE:", round(mae, 4)))

## [1] "MAE: 0.1565"

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.datasets import load_iris

iris = load_iris(as_frame=True)
df = iris.frame

X = df[['petal length (cm)']].values  
y = df['petal width (cm)'].values  

model = LinearRegression()
model.fit(X, y)

LinearRegression()

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

y_pred = model.predict(X)

mae = mean_absolute_error(y, y_pred)

r2 = model.score(X, y)

print("MAE:",round(mae,4))

## MAE: 0.1565

print("Estimate:",round(model.coef_[0],4))

## Estimate: 0.4158

print("Intercept:",round(model.intercept_,4))

## Intercept: -0.3631

print("R2:",round(r2,4))

## R2: 0.9271

#Bài 2

library(ggplot2)
library(lattice)
library(kmed)
library(caret)
library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

data("heart")
head(heart)

##   age   sex cp trestbps chol   fbs restecg thalach exang oldpeak slope ca thal
## 1  63  TRUE  1      145  233  TRUE       2     150 FALSE     2.3     3  0    6
## 2  67  TRUE  4      160  286 FALSE       2     108  TRUE     1.5     2  3    3
## 3  67  TRUE  4      120  229 FALSE       2     129  TRUE     2.6     2  2    7
## 4  37  TRUE  3      130  250 FALSE       0     187 FALSE     3.5     3  0    3
## 5  41 FALSE  2      130  204 FALSE       2     172 FALSE     1.4     1  0    3
## 6  56  TRUE  2      120  236 FALSE       0     178 FALSE     0.8     1  0    3
##   class
## 1     0
## 2     2
## 3     1
## 4     0
## 5     0
## 6     0

str(heart)

## 'data.frame':    297 obs. of  14 variables:
##  $ age     : num  63 67 67 37 41 56 62 57 63 53 ...
##  $ sex     : logi  TRUE TRUE TRUE TRUE FALSE TRUE ...
##  $ cp      : Factor w/ 4 levels "1","2","3","4": 1 4 4 3 2 2 4 4 4 4 ...
##  $ trestbps: num  145 160 120 130 130 120 140 120 130 140 ...
##  $ chol    : num  233 286 229 250 204 236 268 354 254 203 ...
##  $ fbs     : logi  TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ restecg : Factor w/ 3 levels "0","1","2": 3 3 3 1 3 1 3 1 3 3 ...
##  $ thalach : num  150 108 129 187 172 178 160 163 147 155 ...
##  $ exang   : logi  FALSE TRUE TRUE FALSE FALSE FALSE ...
##  $ oldpeak : num  2.3 1.5 2.6 3.5 1.4 0.8 3.6 0.6 1.4 3.1 ...
##  $ slope   : Factor w/ 3 levels "1","2","3": 3 2 2 3 1 1 3 1 2 3 ...
##  $ ca      : num  0 3 2 0 0 0 2 0 1 0 ...
##  $ thal    : Factor w/ 3 levels "3","6","7": 2 1 3 1 1 1 1 1 3 3 ...
##  $ class   : int  0 2 1 0 0 0 3 0 2 1 ...
##  - attr(*, "na.action")= 'omit' Named int [1:6] 88 167 193 267 288 303
##   ..- attr(*, "names")= chr [1:6] "88" "167" "193" "267" ...

heart$class <- ifelse(heart$class == 0, "healthy", "sick")
heart$class <- as.factor(heart$class)

table(heart$class)

## 
## healthy    sick 
##     160     137

str(heart$class)

##  Factor w/ 2 levels "healthy","sick": 1 2 2 1 1 1 2 1 2 2 ...

set.seed(123)
train_index <- createDataPartition(heart$class, p = 0.7, list = FALSE)
train_data <- heart[train_index, ]
test_data <- heart[-train_index, ]

model <- glm(class ~ ., data = train_data, family = binomial)
summary(model)

## 
## Call:
## glm(formula = class ~ ., family = binomial, data = train_data)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -7.246560   3.289284  -2.203  0.02759 *  
## age         -0.028331   0.028546  -0.992  0.32097    
## sexTRUE      1.514205   0.625478   2.421  0.01548 *  
## cp2          1.208035   1.013398   1.192  0.23324    
## cp3          0.883923   0.873644   1.012  0.31165    
## cp4          2.582665   0.878841   2.939  0.00330 ** 
## trestbps     0.025075   0.013350   1.878  0.06035 .  
## chol         0.003019   0.005130   0.588  0.55623    
## fbsTRUE     -0.571879   0.715197  -0.800  0.42394    
## restecg1     0.482948   2.782116   0.174  0.86219    
## restecg2     0.174436   0.467335   0.373  0.70896    
## thalach     -0.004318   0.012512  -0.345  0.73001    
## exangTRUE    0.534539   0.517421   1.033  0.30156    
## oldpeak      0.408027   0.266236   1.533  0.12538    
## slope2       1.457107   0.588026   2.478  0.01321 *  
## slope3       0.753410   1.028507   0.733  0.46385    
## ca           1.146075   0.310219   3.694  0.00022 ***
## thal6        0.009722   0.959764   0.010  0.99192    
## thal7        1.026444   0.512197   2.004  0.04507 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 287.12  on 207  degrees of freedom
## Residual deviance: 139.44  on 189  degrees of freedom
## AIC: 177.44
## 
## Number of Fisher Scoring iterations: 6

pred_prob <- predict(model, test_data, type = "response") 
pred_class <- ifelse(pred_prob > 0.5, "sick", "healthy")  
pred_class <- factor(pred_class, levels = levels(test_data$class))

conf_matrix <- confusionMatrix(pred_class, test_data$class, positive = "sick")
print(conf_matrix)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction healthy sick
##    healthy      45    9
##    sick          3   32
##                                           
##                Accuracy : 0.8652          
##                  95% CI : (0.7763, 0.9283)
##     No Information Rate : 0.5393          
##     P-Value [Acc > NIR] : 5.93e-11        
##                                           
##                   Kappa : 0.7257          
##                                           
##  Mcnemar's Test P-Value : 0.1489          
##                                           
##             Sensitivity : 0.7805          
##             Specificity : 0.9375          
##          Pos Pred Value : 0.9143          
##          Neg Pred Value : 0.8333          
##              Prevalence : 0.4607          
##          Detection Rate : 0.3596          
##    Detection Prevalence : 0.3933          
##       Balanced Accuracy : 0.8590          
##                                           
##        'Positive' Class : sick            
##

accuracy <- conf_matrix$overall["Accuracy"]
precision <- conf_matrix$byClass["Precision"]
recall <- conf_matrix$byClass["Recall"]

cat("📌 Accuracy:", accuracy, "\n")

## 📌 Accuracy: 0.8651685

cat("📌 Precision:", precision, "\n")

## 📌 Precision: 0.9142857

cat("📌 Recall:", recall, "\n")

## 📌 Recall: 0.7804878

test 3

Nguyễn Ngọc Sáng

2025-03-13