# cau 1
# load du lieu
data(airquality)
# bo cac dong bi NA
air <- na.omit(airquality)
# xay dung mo hinh hoi quy tuyen tinh
model <- lm(Ozone ~ ., data = air)
# du doan
pred <- predict(model, air)
# tinh MAE
mae <- mean(abs(air$Ozone - pred))
# in ket qua
print(mae)
## [1] 14.78897
# xem tom tat mo hinh
summary(model)
##
## Call:
## lm(formula = Ozone ~ ., data = air)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.014 -12.284 -3.302 8.454 95.348
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -64.11632 23.48249 -2.730 0.00742 **
## Solar.R 0.05027 0.02342 2.147 0.03411 *
## Wind -3.31844 0.64451 -5.149 1.23e-06 ***
## Temp 1.89579 0.27389 6.922 3.66e-10 ***
## Month -3.03996 1.51346 -2.009 0.04714 *
## Day 0.27388 0.22967 1.192 0.23576
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.86 on 105 degrees of freedom
## Multiple R-squared: 0.6249, Adjusted R-squared: 0.6071
## F-statistic: 34.99 on 5 and 105 DF, p-value: < 2.2e-16
#cau 2:
# load thu vien
library(mlbench)
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
# load du lieu
data(PimaIndiansDiabetes)
df <- PimaIndiansDiabetes
# chia du lieu train test
set.seed(123)
index <- sample(1:nrow(df), 0.7*nrow(df))
train <- df[index, ]
test <- df[-index, ]
# xay dung logistic regression
model <- glm(diabetes ~ ., data = train, family = binomial)
# du doan xac suat
prob <- predict(model, test, type = "response")
# tinh ROC
roc_curve <- roc(test$diabetes, prob)
## Setting levels: control = neg, case = pos
## Setting direction: controls < cases
# tinh AUC
auc_value <- auc(roc_curve)
# in ket qua
print(auc_value)
## Area under the curve: 0.8445
# ve ROC
plot(roc_curve, col="blue", main="ROC Curve")
