#Cau_1
library(ggplot2)
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
data("iris")
model_lm <- lm(Sepal.Length ~ Sepal.Width + Petal.Length, data = iris)
summary(model_lm)
##
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width + Petal.Length, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.96159 -0.23489 0.00077 0.21453 0.78557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.24914 0.24797 9.07 7.04e-16 ***
## Sepal.Width 0.59552 0.06933 8.59 1.16e-14 ***
## Petal.Length 0.47192 0.01712 27.57 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3333 on 147 degrees of freedom
## Multiple R-squared: 0.8402, Adjusted R-squared: 0.838
## F-statistic: 386.4 on 2 and 147 DF, p-value: < 2.2e-16
if (!require(car)) install.packages("car", dependencies = TRUE)
library(car)
par(mfrow = c(1, 2))
hist(resid(model_lm), main = "Histogram of Residuals",
xlab = "Residuals", col = "lightblue", breaks = 20)
qqPlot(resid(model_lm), main = "Q-Q Plot of Residuals")

## [1] 107 136
par(mfrow = c(1, 1))
#Cau_2
if (!require(titanic)) install.packages("titanic", dependencies = TRUE)
## Loading required package: titanic
## Warning: package 'titanic' was built under R version 4.4.3
if (!require(caret)) install.packages("caret", dependencies = TRUE)
## Loading required package: caret
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
library(titanic)
library(caret)
data("titanic_train")
head(titanic_train)
## PassengerId Survived Pclass
## 1 1 0 3
## 2 2 1 1
## 3 3 1 3
## 4 4 1 1
## 5 5 0 3
## 6 6 0 3
## Name Sex Age SibSp Parch
## 1 Braund, Mr. Owen Harris male 22 1 0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0
## 3 Heikkinen, Miss. Laina female 26 0 0
## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0
## 5 Allen, Mr. William Henry male 35 0 0
## 6 Moran, Mr. James male NA 0 0
## Ticket Fare Cabin Embarked
## 1 A/5 21171 7.2500 S
## 2 PC 17599 71.2833 C85 C
## 3 STON/O2. 3101282 7.9250 S
## 4 113803 53.1000 C123 S
## 5 373450 8.0500 S
## 6 330877 8.4583 Q
df <- titanic_train[, c("Survived", "Pclass", "Sex", "Age", "Fare")]
df <- na.omit(df)
df$Sex <- ifelse(df$Sex == "male", 1, 0)
df$Survived <- as.factor(df$Survived)
set.seed(123)
train_index <- createDataPartition(df$Survived, p = 0.8, list = FALSE)
train_data <- df[train_index, ]
test_data <- df[-train_index, ]
model_logit <- glm(Survived ~ Pclass + Sex + Age + Fare,
data = train_data, family = binomial)
summary(model_logit)
##
## Call:
## glm(formula = Survived ~ Pclass + Sex + Age + Fare, family = binomial,
## data = train_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.261337 0.619074 6.883 5.84e-12 ***
## Pclass -1.124291 0.173174 -6.492 8.46e-11 ***
## Sex -2.422365 0.224278 -10.801 < 2e-16 ***
## Age -0.025361 0.008367 -3.031 0.00244 **
## Fare 0.001491 0.002390 0.624 0.53278
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 772.45 on 571 degrees of freedom
## Residual deviance: 534.66 on 567 degrees of freedom
## AIC: 544.66
##
## Number of Fisher Scoring iterations: 4
predicted_probs <- predict(model_logit, newdata = test_data, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, 1, 0)
predicted_classes <- as.factor(predicted_classes)
test_data$Survived <- as.factor(test_data$Survived)
conf_matrix <- confusionMatrix(predicted_classes, test_data$Survived)
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 73 16
## 1 11 42
##
## Accuracy : 0.8099
## 95% CI : (0.7355, 0.8708)
## No Information Rate : 0.5915
## P-Value [Acc > NIR] : 2.405e-08
##
## Kappa : 0.6012
##
## Mcnemar's Test P-Value : 0.4414
##
## Sensitivity : 0.8690
## Specificity : 0.7241
## Pos Pred Value : 0.8202
## Neg Pred Value : 0.7925
## Prevalence : 0.5915
## Detection Rate : 0.5141
## Detection Prevalence : 0.6268
## Balanced Accuracy : 0.7966
##
## 'Positive' Class : 0
##
precision <- conf_matrix$byClass["Precision"]
recall <- conf_matrix$byClass["Recall"]
f1_score <- 2 * (precision * recall) / (precision + recall)
cat("Precision:", precision, "\n")
## Precision: 0.8202247
cat("Recall:", recall, "\n")
## Recall: 0.8690476
cat("F1-score:", f1_score, "\n")
## F1-score: 0.8439306