bkn3

#Câu 1
data(iris)
model <- lm(Petal.Length ~ Sepal.Length + Sepal.Width, data = iris)
summary(model)

## 
## Call:
## lm(formula = Petal.Length ~ Sepal.Length + Sepal.Width, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25582 -0.46922 -0.05741  0.45530  1.75599 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.52476    0.56344  -4.481 1.48e-05 ***
## Sepal.Length  1.77559    0.06441  27.569  < 2e-16 ***
## Sepal.Width  -1.33862    0.12236 -10.940  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6465 on 147 degrees of freedom
## Multiple R-squared:  0.8677, Adjusted R-squared:  0.8659 
## F-statistic:   482 on 2 and 147 DF,  p-value: < 2.2e-16

#Kiểm tra giả định của mô hình bằng biểu đồ residual plot
residuals <- model$residuals
plot(model$fitted.values, residuals, 
     main = "Residual Plot", 
     xlab = "Fitted Values (Predicted Petal Length)", 
     ylab = "Residuals", 
     pch = 19, col = "blue")
abline(h = 0, col = "red", lwd = 2)  # Thêm đường tham chiếu y = 0

# Hồi quy tuyến tính trên tập Boston
# Cài đặt thư viện
install.packages("MASS", repos = "https://cloud.r-project.org")  # Nếu chưa cài đặt

## package 'MASS' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\admin\AppData\Local\Temp\Rtmpi82sEx\downloaded_packages

library(MASS)

## Warning: package 'MASS' was built under R version 4.4.3

# Tải dữ liệu Boston
data("Boston")

# Xây dựng mô hình hồi quy tuyến tính
tinh_model <- lm(medv ~ crim + indus, data = Boston)

# Kiểm tra mức ý nghĩa của các hệ số hồi quy
summary(tinh_model)

## 
## Call:
## lm(formula = medv ~ crim + indus, data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.011  -4.876  -1.683   3.024  32.491 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 29.24829    0.67046  43.624  < 2e-16 ***
## crim        -0.24548    0.04434  -5.536 4.99e-08 ***
## indus       -0.52335    0.05559  -9.414  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.83 on 503 degrees of freedom
## Multiple R-squared:  0.278,  Adjusted R-squared:  0.2751 
## F-statistic: 96.83 on 2 and 503 DF,  p-value: < 2.2e-16

#  Hồi quy logistic trên tập PimaIndiansDiabetes
# Cài đặt thư viện
install.packages("mlbench", repos = "https://cloud.r-project.org")  # Nếu chưa cài đặt

## package 'mlbench' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\admin\AppData\Local\Temp\Rtmpi82sEx\downloaded_packages

library(mlbench)

## Warning: package 'mlbench' was built under R version 4.4.3

install.packages("caret", repos = "https://cloud.r-project.org")  # Để đánh giá mô hình

## package 'caret' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\admin\AppData\Local\Temp\Rtmpi82sEx\downloaded_packages

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: ggplot2

## Loading required package: lattice

# Tải dữ liệu
data("PimaIndiansDiabetes")

# Chia tập dữ liệu thành tập huấn luyện và kiểm tra
set.seed(123)
index <- createDataPartition(PimaIndiansDiabetes$diabetes, p = 0.7, list = FALSE)
train_data <- PimaIndiansDiabetes[index, ]
test_data <- PimaIndiansDiabetes[-index, ]

# Xây dựng mô hình hồi quy logistic
logistic_model <- glm(diabetes ~ ., data = train_data, family = binomial)

# Dự đoán trên tập kiểm tra
prob_pred <- predict(logistic_model, test_data, type = "response")
pred_class <- ifelse(prob_pred > 0.5, "pos", "neg")

# Tạo ma trận nhầm lẫn
confusion_mat <- confusionMatrix(factor(pred_class, levels = c("neg", "pos")),
                                 factor(test_data$diabetes, levels = c("neg", "pos")))

# Tính Precision, Recall và F1-score
precision <- confusion_mat$byClass["Precision"]
recall <- confusion_mat$byClass["Recall"]
f1_score <- 2 * (precision * recall) / (precision + recall)

# In kết quả
print(c(Precision = precision, Recall = recall, F1_Score = f1_score))

## Precision.Precision       Recall.Recall  F1_Score.Precision 
##           0.7975460           0.8666667           0.8306709

bkn3

ngô thị hồng ngọc

2025-03-13