R Markdown

{r}
read.csv("https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/wine/winequality-red.csv", sep = ";")
head(wine)

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot. # Red wine dataset wine <- read.csv(“https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv”, sep = “;”) # Check first few rows head(wine)head

Get summary statistics

summary(wine\(alcohol) mean(wine\)quality) median(wine\(quality) sd(wine\)quality)

For multiple columns at once

Get summary statistics

summary(wine\(alcohol) mean(wine\)quality) median(wine\(quality) sd(wine\)quality)

For multiple columns at once

wine %>% select(alcohol, quality, pH) %>% library(ggplot2)

ggplot(wine, aes(x = quality)) + geom_histogram(binwidth = 1, fill = “#876cd4ff”, color = “white”) + labs( title = “Distribution of Wine Quality Ratings”, x = “Quality Score”, y = “Count” ) + theme_minimal()

# Load libraries library(ggplot2) library(dplyr)

Dataset load karna

wine <- read.csv(“https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/wine/winequality-red.csv”, sep = “;”)

First few rows

head(wine)

ggplot(wine, aes(x = as.factor(quality), y = alcohol, fill = as.factor(quality))) + geom_boxplot() + scale_fill_manual(values = c(“#876cd4ff”, “#D783D8”, “#FF90A5”, “#FFB071”)) + labs( title = “Alcohol Content by Wine Quality”, x = “Quality Score”, y = “Alcohol (%)” ) + theme_minimal() + theme(legend.position = “none”)

Correlation matrix

cor_mat <- round(cor(wine), 2) # Melt for ggplot melted <- melt(cor_mat)

ggplot(melted, aes(Var1, Var2, fill = value)) + geom_tile() + geom_text(aes(label = value), size = 3) + scale_fill_gradient2(low = “#FF90A5”, high = “#876cd4ff”, mid = “white”, midpoint = 0) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(title = “Correlation Heatmap of Wine Features”) # Load essential libraries library(ggplot2) library(dplyr)

Load the dataset directly from GitHub

wine <- read.csv(“https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/wine/winequality-red.csv”, sep = “;”)

Display the first few rows to confirm loading

head(wine)

library(caret)

ggplot(wine, aes(x = quality)) + geom_bar(fill = “steelblue”) + labs(title = “Wine Quality Distribution”, x = “Quality”, y = “Count”)

ggplot(wine, aes(x = quality, y = alcohol, fill = quality)) + geom_boxplot() + labs(title = “Alcohol Content vs Wine Quality”, x = “Quality”, y = “Alcohol”)

M <- cor(wine[,-12]) # target (quality) ko exclude karke corrplot(M, method = “color”) km

set.seed(123) trainIndex <- createDataPartition(wine$quality, p=0.7, list=FALSE) trainData <- wine[trainIndex, ] testData <- wine[-trainIndex, ]

model_tree <- rpart(quality ~ ., data=trainData, method=“class”) pred <- predict(model_tree, testData, type=“class”) confusionMatrix(pred, testData$quality)