wine <- read.csv(“winequality-white.csv”, sep = “;”)
str(wine) summary(wine)
plot(wine\(alcohol, wine\)quality, xlab = “Alcohol (%)”, ylab = “Quality”, main = “Alcohol vs Wine Quality”)
model_lm <- lm(quality ~ alcohol + density + pH + volatile.acidity, data = wine) summary(model_lm)
SSE_lm <- sum(resid(model_lm)^2) R2_lm <- summary(model_lm)$r.squared
SSE_lm R2_lm
pred_lm <- predict(model_lm, newdata = wine) head(pred_lm)
install.packages(“caTools”) library(caTools)
install.packages(“ROCR”) library(ROCR)
wine_log <- read.csv(“winequality-white.csv”, sep = “;”)
wine_log\(y <- factor(ifelse(wine_log\)quality >= 7, “yes”, “no”), levels = c(“no”, “yes”)) wine_log$quality <- NULL
table(wine_log$y)
set.seed(937) split <- sample.split(wine_log$y, SplitRatio = 0.65)
train <- subset(wine_log, split == TRUE) test <- subset(wine_log, split == FALSE)
nrow(train) nrow(test)
model_log <- glm(y ~ ., data = train, family = binomial()) summary(model_log)
predictTest <- predict(model_log, newdata = test, type = “response”) head(predictTest)
pred_class <- factor(ifelse(predictTest >= 0.5, “yes”, “no”), levels = c(“no”, “yes”))
table(Predicted = pred_class, Actual = test$y)
pred_rocr <- prediction(predictTest, test$y) perf_roc <- performance(pred_rocr, “tpr”, “fpr”) plot(perf_roc, main = “ROC Curve – Logistic Regression”)
auc_val <- performance(pred_rocr, “auc”) as.numeric(auc_val@y.values)