=============== PREPOCESSING ==============
# Read data
wine_red <- read.csv("C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Komputasi Statistika/UTS KOMSTAT/winequality/winequality-red.csv", sep = ";")
str(wine_red)
## 'data.frame': 1599 obs. of 12 variables:
## $ fixed.acidity : num 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
## $ volatile.acidity : num 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
## $ citric.acid : num 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
## $ residual.sugar : num 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
## $ chlorides : num 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
## $ free.sulfur.dioxide : num 11 25 15 17 11 13 15 15 9 17 ...
## $ total.sulfur.dioxide: num 34 67 54 60 34 40 59 21 18 102 ...
## $ density : num 0.998 0.997 0.997 0.998 0.998 ...
## $ pH : num 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
## $ sulphates : num 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
## $ alcohol : num 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
## $ quality : int 5 5 5 6 5 5 5 7 7 5 ...
wine_white <- read.csv("C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Komputasi Statistika/UTS KOMSTAT/winequality/winequality-white.csv", sep = ";")
str(wine_white)
## 'data.frame': 4898 obs. of 12 variables:
## $ fixed.acidity : num 7 6.3 8.1 7.2 7.2 8.1 6.2 7 6.3 8.1 ...
## $ volatile.acidity : num 0.27 0.3 0.28 0.23 0.23 0.28 0.32 0.27 0.3 0.22 ...
## $ citric.acid : num 0.36 0.34 0.4 0.32 0.32 0.4 0.16 0.36 0.34 0.43 ...
## $ residual.sugar : num 20.7 1.6 6.9 8.5 8.5 6.9 7 20.7 1.6 1.5 ...
## $ chlorides : num 0.045 0.049 0.05 0.058 0.058 0.05 0.045 0.045 0.049 0.044 ...
## $ free.sulfur.dioxide : num 45 14 30 47 47 30 30 45 14 28 ...
## $ total.sulfur.dioxide: num 170 132 97 186 186 97 136 170 132 129 ...
## $ density : num 1.001 0.994 0.995 0.996 0.996 ...
## $ pH : num 3 3.3 3.26 3.19 3.19 3.26 3.18 3 3.3 3.22 ...
## $ sulphates : num 0.45 0.49 0.44 0.4 0.4 0.44 0.47 0.45 0.49 0.45 ...
## $ alcohol : num 8.8 9.5 10.1 9.9 9.9 10.1 9.6 8.8 9.5 11 ...
## $ quality : int 6 6 6 6 6 6 6 6 6 6 ...
1a. Buat statistik ringkasan dari variabel-variabel dari data wine quality untuk warna yang berbeda (red vs white).
# wine quality - red
summary(wine_red)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.60 Min. :0.1200 Min. :0.000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.3900 1st Qu.:0.090 1st Qu.: 1.900
## Median : 7.90 Median :0.5200 Median :0.260 Median : 2.200
## Mean : 8.32 Mean :0.5278 Mean :0.271 Mean : 2.539
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.420 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :1.000 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.01200 Min. : 1.00 Min. : 6.00 Min. :0.9901
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00 1st Qu.:0.9956
## Median :0.07900 Median :14.00 Median : 38.00 Median :0.9968
## Mean :0.08747 Mean :15.87 Mean : 46.47 Mean :0.9967
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00 3rd Qu.:0.9978
## Max. :0.61100 Max. :72.00 Max. :289.00 Max. :1.0037
## pH sulphates alcohol quality
## Min. :2.740 Min. :0.3300 Min. : 8.40 Min. :3.000
## 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50 1st Qu.:5.000
## Median :3.310 Median :0.6200 Median :10.20 Median :6.000
## Mean :3.311 Mean :0.6581 Mean :10.42 Mean :5.636
## 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10 3rd Qu.:6.000
## Max. :4.010 Max. :2.0000 Max. :14.90 Max. :8.000
# wine quality - white
summary(wine_white)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 3.800 Min. :0.0800 Min. :0.0000 Min. : 0.600
## 1st Qu.: 6.300 1st Qu.:0.2100 1st Qu.:0.2700 1st Qu.: 1.700
## Median : 6.800 Median :0.2600 Median :0.3200 Median : 5.200
## Mean : 6.855 Mean :0.2782 Mean :0.3342 Mean : 6.391
## 3rd Qu.: 7.300 3rd Qu.:0.3200 3rd Qu.:0.3900 3rd Qu.: 9.900
## Max. :14.200 Max. :1.1000 Max. :1.6600 Max. :65.800
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.00900 Min. : 2.00 Min. : 9.0 Min. :0.9871
## 1st Qu.:0.03600 1st Qu.: 23.00 1st Qu.:108.0 1st Qu.:0.9917
## Median :0.04300 Median : 34.00 Median :134.0 Median :0.9937
## Mean :0.04577 Mean : 35.31 Mean :138.4 Mean :0.9940
## 3rd Qu.:0.05000 3rd Qu.: 46.00 3rd Qu.:167.0 3rd Qu.:0.9961
## Max. :0.34600 Max. :289.00 Max. :440.0 Max. :1.0390
## pH sulphates alcohol quality
## Min. :2.720 Min. :0.2200 Min. : 8.00 Min. :3.000
## 1st Qu.:3.090 1st Qu.:0.4100 1st Qu.: 9.50 1st Qu.:5.000
## Median :3.180 Median :0.4700 Median :10.40 Median :6.000
## Mean :3.188 Mean :0.4898 Mean :10.51 Mean :5.878
## 3rd Qu.:3.280 3rd Qu.:0.5500 3rd Qu.:11.40 3rd Qu.:6.000
## Max. :3.820 Max. :1.0800 Max. :14.20 Max. :9.000
# Perbandingan lanngsung keduanya
wine_red$type <- "red"
wine_white$type <- "white"
# Gabungkan kedua data
wine_all <- rbind(wine_red, wine_white)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Statistik ringkasan per jenis wine
summary_stats <- wine_all %>%
group_by(type) %>%
summarise(across(where(is.numeric), list(mean = mean, sd = sd, min = min, max = max), .names = "{.col}_{.fn}"))
summary_stats
## # A tibble: 2 Ć 49
## type fixed.acidity_mean fixed.acidity_sd fixed.acidity_min fixed.acidity_max
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 red 8.32 1.74 4.6 15.9
## 2 white 6.85 0.844 3.8 14.2
## # ā¹ 44 more variables: volatile.acidity_mean <dbl>, volatile.acidity_sd <dbl>,
## # volatile.acidity_min <dbl>, volatile.acidity_max <dbl>,
## # citric.acid_mean <dbl>, citric.acid_sd <dbl>, citric.acid_min <dbl>,
## # citric.acid_max <dbl>, residual.sugar_mean <dbl>, residual.sugar_sd <dbl>,
## # residual.sugar_min <dbl>, residual.sugar_max <dbl>, chlorides_mean <dbl>,
## # chlorides_sd <dbl>, chlorides_min <dbl>, chlorides_max <dbl>,
## # free.sulfur.dioxide_mean <dbl>, free.sulfur.dioxide_sd <dbl>, ā¦
1b. Adakah pengaruh kandungan residu gula (residual_sugar) dan asam sitrat (citric acid) terhadap kualitas wine? Tunjukkan dengan diagram/visualisasi dan pengujian hipotesis yang sesuai. Langkah-langkah pengujian hipotesis: #i) tuliskan hipotesis nol dan hipotesis alternatif Hā: Tidak ada pengaruh signifikan dari residua gulagula (residual_sugar) dan asam sitrat (citric.acid) terhadap kualitas wine (quality). Hā: ada pengaruh signifikan dari residua gulagula (residual_sugar) dan asam sitrat (citric.acid) terhadap kualitas wine (quality).
#ii) gunakan taraf signifikansi α=0.05,
tentukan statistik uji,
tentukan kriteria keputusan,
melakukan perhitungan,
menarik kesimpulan.
library(ggplot2)
# Scatter plot: Residual sugar vs Quality
ggplot(wine_white, aes(x = residual.sugar, y = quality)) +
geom_jitter(alpha = 0.3, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Pengaruh Residual Sugar terhadap Quality (White Wine)", x = "Residual Sugar", y = "Quality")
## `geom_smooth()` using formula = 'y ~ x'
library(ggplot2)
# Scatter plot: Citric Acid vs Quality
ggplot(wine_white, aes(x = citric.acid, y = quality)) +
geom_jitter(alpha = 0.3, color = "darkgreen") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Pengaruh Citric Acid terhadap Quality (White Wine)", x = "Citric Acid", y = "Quality")
## `geom_smooth()` using formula = 'y ~ x'
# Load necessary libraries
library(ggplot2)
# Read the datasets
wine_red <- read.csv("C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Komputasi Statistika/UTS KOMSTAT/winequality/winequality-red.csv", sep = ";")
wine_white <- read.csv("C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Komputasi Statistika/UTS KOMSTAT/winequality/winequality-white.csv", sep = ";")
# Add a column to distinguish between red and white wine
wine_red$color <- 'red'
wine_white$color <- 'white'
# Combine the datasets
wine_data <- rbind(wine_red, wine_white)
# Plot hubungan antara residual sugar dan quality
ggplot(wine_data, aes(x = residual.sugar, y = quality, color = color)) +
geom_point(alpha = 0.6) +
labs(title = "Residual Sugar vs Quality", x = "Residual Sugar (g/dm³)", y = "Quality") +
theme_minimal()
# Plot hubungan antara citric acid dan quality
ggplot(wine_data, aes(x = citric.acid, y = quality, color = color)) +
geom_point(alpha = 0.6) +
labs(title = "Citric Acid vs Quality", x = "Citric Acid (g/dm³)", y = "Quality") +
theme_minimal()
# Plot hubungan antara residual sugar dan quality menggunakan boxplot
ggplot(wine_data, aes(x = factor(quality), y = residual.sugar, fill = color)) +
geom_boxplot() +
labs(title = "Residual Sugar vs Quality", x = "Quality", y = "Residual Sugar (g/dm³)") +
theme_minimal()
# Plot hubungan antara citric acid dan quality menggunakan boxplot
ggplot(wine_data, aes(x = factor(quality), y = citric.acid, fill = color)) +
geom_boxplot() +
labs(title = "Citric Acid vs Quality", x = "Quality", y = "Citric Acid (g/dm³)") +
theme_minimal()
library(ggplot2)
# Boxplot residual sugar terhadap kualitas
ggplot(wine_white, aes(x = factor(quality), y = residual.sugar)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot: Residual Sugar vs Quality (White Wine)",
x = "Wine Quality", y = "Residual Sugar")
# Boxplot citric acid terhadap kualitas
ggplot(wine_white, aes(x = factor(quality), y = citric.acid)) +
geom_boxplot(fill = "lightgreen") +
labs(title = "Boxplot: Citric Acid vs Quality (White Wine)",
x = "Wine Quality", y = "Citric Acid")
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly(data = wine_white,
x = ~residual.sugar,
y = ~citric.acid,
z = ~quality,
color = ~quality, colors = "Reds",
type = "scatter3d", mode = "markers") %>%
layout(title = "3D Scatter: Residual Sugar vs Citric Acid vs Quality")
# Plot hubungan antara residual sugar dan quality menggunakan density plot
ggplot(wine_data, aes(x = residual.sugar, fill = factor(quality))) +
geom_density(alpha = 0.6) +
labs(title = "Density of Residual Sugar by Quality", x = "Residual Sugar (g/dm³)", y = "Density") +
theme_minimal()
# Plot hubungan antara citric acid dan quality menggunakan density plot
ggplot(wine_data, aes(x = citric.acid, fill = factor(quality))) +
geom_density(alpha = 0.6) +
labs(title = "Density of Citric Acid by Quality", x = "Citric Acid (g/dm³)", y = "Density") +
theme_minimal()