{r}
read.csv("https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/wine/winequality-red.csv", sep = ";")
head(wine)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the plot. #
Red wine dataset wine <- read.csv(“https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv”,
sep = “;”) # Check first few rows head(wine)head
summary(wine\(alcohol) mean(wine\)quality) median(wine\(quality) sd(wine\)quality)
summary(wine\(alcohol) mean(wine\)quality) median(wine\(quality) sd(wine\)quality)
wine %>% select(alcohol, quality, pH) %>% library(ggplot2)
ggplot(wine, aes(x = quality)) + geom_histogram(binwidth = 1, fill = “#876cd4ff”, color = “white”) + labs( title = “Distribution of Wine Quality Ratings”, x = “Quality Score”, y = “Count” ) + theme_minimal()
# Load libraries library(ggplot2) library(dplyr)
wine <- read.csv(“https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/wine/winequality-red.csv”, sep = “;”)
head(wine)
ggplot(wine, aes(x = as.factor(quality), y = alcohol, fill = as.factor(quality))) + geom_boxplot() + scale_fill_manual(values = c(“#876cd4ff”, “#D783D8”, “#FF90A5”, “#FFB071”)) + labs( title = “Alcohol Content by Wine Quality”, x = “Quality Score”, y = “Alcohol (%)” ) + theme_minimal() + theme(legend.position = “none”)
cor_mat <- round(cor(wine), 2) # Melt for ggplot melted <- melt(cor_mat)
ggplot(melted, aes(Var1, Var2, fill = value)) + geom_tile() + geom_text(aes(label = value), size = 3) + scale_fill_gradient2(low = “#FF90A5”, high = “#876cd4ff”, mid = “white”, midpoint = 0) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(title = “Correlation Heatmap of Wine Features”) # Load essential libraries library(ggplot2) library(dplyr)
wine <- read.csv(“https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/wine/winequality-red.csv”, sep = “;”)
head(wine)
library(caret)
ggplot(wine, aes(x = quality)) + geom_bar(fill = “steelblue”) + labs(title = “Wine Quality Distribution”, x = “Quality”, y = “Count”)
ggplot(wine, aes(x = quality, y = alcohol, fill = quality)) + geom_boxplot() + labs(title = “Alcohol Content vs Wine Quality”, x = “Quality”, y = “Alcohol”)
M <- cor(wine[,-12]) # target (quality) ko exclude karke corrplot(M, method = “color”) km
set.seed(123) trainIndex <- createDataPartition(wine$quality, p=0.7, list=FALSE) trainData <- wine[trainIndex, ] testData <- wine[-trainIndex, ]
model_tree <- rpart(quality ~ ., data=trainData, method=“class”) pred <- predict(model_tree, testData, type=“class”) confusionMatrix(pred, testData$quality)