Câu 1:

data(iris)

model_lm <- lm(Petal.Length ~ Sepal.Length + Sepal.Width, data = iris)
summary(model_lm)
## 
## Call:
## lm(formula = Petal.Length ~ Sepal.Length + Sepal.Width, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25582 -0.46922 -0.05741  0.45530  1.75599 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.52476    0.56344  -4.481 1.48e-05 ***
## Sepal.Length  1.77559    0.06441  27.569  < 2e-16 ***
## Sepal.Width  -1.33862    0.12236 -10.940  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6465 on 147 degrees of freedom
## Multiple R-squared:  0.8677, Adjusted R-squared:  0.8659 
## F-statistic:   482 on 2 and 147 DF,  p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(model_lm)

Câu 2:

library(mlbench)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
data(PimaIndiansDiabetes)

model_logit <- glm(diabetes ~ ., 
                   data = PimaIndiansDiabetes, 
                   family = binomial)

prob <- predict(model_logit, type = "response")
pred <- ifelse(prob > 0.5, "pos", "neg")
pred <- factor(pred, levels = c("neg", "pos"))

cm <- table(Predicted = pred, Actual = PimaIndiansDiabetes$diabetes)

TP <- cm["pos","pos"]
FP <- cm["pos","neg"]
FN <- cm["neg","pos"]

precision <- TP / (TP + FP)
recall <- TP / (TP + FN)
f1 <- 2 * precision * recall / (precision + recall)

precision
## [1] 0.7393365
recall
## [1] 0.5820896
f1
## [1] 0.651357