# cau 1
# load du lieu
data(airquality)

# bo cac dong bi NA
air <- na.omit(airquality)

# xay dung mo hinh hoi quy tuyen tinh
model <- lm(Ozone ~ ., data = air)

# du doan
pred <- predict(model, air)

# tinh MAE
mae <- mean(abs(air$Ozone - pred))

# in ket qua
print(mae)
## [1] 14.78897
# xem tom tat mo hinh
summary(model)
## 
## Call:
## lm(formula = Ozone ~ ., data = air)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.014 -12.284  -3.302   8.454  95.348 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -64.11632   23.48249  -2.730  0.00742 ** 
## Solar.R       0.05027    0.02342   2.147  0.03411 *  
## Wind         -3.31844    0.64451  -5.149 1.23e-06 ***
## Temp          1.89579    0.27389   6.922 3.66e-10 ***
## Month        -3.03996    1.51346  -2.009  0.04714 *  
## Day           0.27388    0.22967   1.192  0.23576    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.86 on 105 degrees of freedom
## Multiple R-squared:  0.6249, Adjusted R-squared:  0.6071 
## F-statistic: 34.99 on 5 and 105 DF,  p-value: < 2.2e-16
#cau 2:
# load thu vien
library(mlbench)
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
# load du lieu
data(PimaIndiansDiabetes)

df <- PimaIndiansDiabetes

# chia du lieu train test
set.seed(123)
index <- sample(1:nrow(df), 0.7*nrow(df))

train <- df[index, ]
test <- df[-index, ]

# xay dung logistic regression
model <- glm(diabetes ~ ., data = train, family = binomial)

# du doan xac suat
prob <- predict(model, test, type = "response")

# tinh ROC
roc_curve <- roc(test$diabetes, prob)
## Setting levels: control = neg, case = pos
## Setting direction: controls < cases
# tinh AUC
auc_value <- auc(roc_curve)

# in ket qua
print(auc_value)
## Area under the curve: 0.8445
# ve ROC
plot(roc_curve, col="blue", main="ROC Curve")