=============== PREPOCESSING ==============

# Read data
wine_red <- read.csv("C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Komputasi Statistika/UTS KOMSTAT/winequality/winequality-red.csv", sep = ";")

str(wine_red)
## 'data.frame':    1599 obs. of  12 variables:
##  $ fixed.acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile.acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric.acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual.sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free.sulfur.dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
##  $ total.sulfur.dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  $ quality             : int  5 5 5 6 5 5 5 7 7 5 ...
wine_white <- read.csv("C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Komputasi Statistika/UTS KOMSTAT/winequality/winequality-white.csv", sep = ";")
str(wine_white)
## 'data.frame':    4898 obs. of  12 variables:
##  $ fixed.acidity       : num  7 6.3 8.1 7.2 7.2 8.1 6.2 7 6.3 8.1 ...
##  $ volatile.acidity    : num  0.27 0.3 0.28 0.23 0.23 0.28 0.32 0.27 0.3 0.22 ...
##  $ citric.acid         : num  0.36 0.34 0.4 0.32 0.32 0.4 0.16 0.36 0.34 0.43 ...
##  $ residual.sugar      : num  20.7 1.6 6.9 8.5 8.5 6.9 7 20.7 1.6 1.5 ...
##  $ chlorides           : num  0.045 0.049 0.05 0.058 0.058 0.05 0.045 0.045 0.049 0.044 ...
##  $ free.sulfur.dioxide : num  45 14 30 47 47 30 30 45 14 28 ...
##  $ total.sulfur.dioxide: num  170 132 97 186 186 97 136 170 132 129 ...
##  $ density             : num  1.001 0.994 0.995 0.996 0.996 ...
##  $ pH                  : num  3 3.3 3.26 3.19 3.19 3.26 3.18 3 3.3 3.22 ...
##  $ sulphates           : num  0.45 0.49 0.44 0.4 0.4 0.44 0.47 0.45 0.49 0.45 ...
##  $ alcohol             : num  8.8 9.5 10.1 9.9 9.9 10.1 9.6 8.8 9.5 11 ...
##  $ quality             : int  6 6 6 6 6 6 6 6 6 6 ...

1a. Buat statistik ringkasan dari variabel-variabel dari data wine quality untuk warna yang berbeda (red vs white).

# wine quality - red
summary(wine_red)
##  fixed.acidity   volatile.acidity  citric.acid    residual.sugar  
##  Min.   : 4.60   Min.   :0.1200   Min.   :0.000   Min.   : 0.900  
##  1st Qu.: 7.10   1st Qu.:0.3900   1st Qu.:0.090   1st Qu.: 1.900  
##  Median : 7.90   Median :0.5200   Median :0.260   Median : 2.200  
##  Mean   : 8.32   Mean   :0.5278   Mean   :0.271   Mean   : 2.539  
##  3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.420   3rd Qu.: 2.600  
##  Max.   :15.90   Max.   :1.5800   Max.   :1.000   Max.   :15.500  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.01200   Min.   : 1.00       Min.   :  6.00       Min.   :0.9901  
##  1st Qu.:0.07000   1st Qu.: 7.00       1st Qu.: 22.00       1st Qu.:0.9956  
##  Median :0.07900   Median :14.00       Median : 38.00       Median :0.9968  
##  Mean   :0.08747   Mean   :15.87       Mean   : 46.47       Mean   :0.9967  
##  3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 62.00       3rd Qu.:0.9978  
##  Max.   :0.61100   Max.   :72.00       Max.   :289.00       Max.   :1.0037  
##        pH          sulphates         alcohol         quality     
##  Min.   :2.740   Min.   :0.3300   Min.   : 8.40   Min.   :3.000  
##  1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.50   1st Qu.:5.000  
##  Median :3.310   Median :0.6200   Median :10.20   Median :6.000  
##  Mean   :3.311   Mean   :0.6581   Mean   :10.42   Mean   :5.636  
##  3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.10   3rd Qu.:6.000  
##  Max.   :4.010   Max.   :2.0000   Max.   :14.90   Max.   :8.000
# wine quality - white
summary(wine_white)
##  fixed.acidity    volatile.acidity  citric.acid     residual.sugar  
##  Min.   : 3.800   Min.   :0.0800   Min.   :0.0000   Min.   : 0.600  
##  1st Qu.: 6.300   1st Qu.:0.2100   1st Qu.:0.2700   1st Qu.: 1.700  
##  Median : 6.800   Median :0.2600   Median :0.3200   Median : 5.200  
##  Mean   : 6.855   Mean   :0.2782   Mean   :0.3342   Mean   : 6.391  
##  3rd Qu.: 7.300   3rd Qu.:0.3200   3rd Qu.:0.3900   3rd Qu.: 9.900  
##  Max.   :14.200   Max.   :1.1000   Max.   :1.6600   Max.   :65.800  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.00900   Min.   :  2.00      Min.   :  9.0        Min.   :0.9871  
##  1st Qu.:0.03600   1st Qu.: 23.00      1st Qu.:108.0        1st Qu.:0.9917  
##  Median :0.04300   Median : 34.00      Median :134.0        Median :0.9937  
##  Mean   :0.04577   Mean   : 35.31      Mean   :138.4        Mean   :0.9940  
##  3rd Qu.:0.05000   3rd Qu.: 46.00      3rd Qu.:167.0        3rd Qu.:0.9961  
##  Max.   :0.34600   Max.   :289.00      Max.   :440.0        Max.   :1.0390  
##        pH          sulphates         alcohol         quality     
##  Min.   :2.720   Min.   :0.2200   Min.   : 8.00   Min.   :3.000  
##  1st Qu.:3.090   1st Qu.:0.4100   1st Qu.: 9.50   1st Qu.:5.000  
##  Median :3.180   Median :0.4700   Median :10.40   Median :6.000  
##  Mean   :3.188   Mean   :0.4898   Mean   :10.51   Mean   :5.878  
##  3rd Qu.:3.280   3rd Qu.:0.5500   3rd Qu.:11.40   3rd Qu.:6.000  
##  Max.   :3.820   Max.   :1.0800   Max.   :14.20   Max.   :9.000
# Perbandingan lanngsung keduanya
wine_red$type <- "red"
wine_white$type <- "white"

# Gabungkan kedua data
wine_all <- rbind(wine_red, wine_white)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Statistik ringkasan per jenis wine
summary_stats <- wine_all %>%
  group_by(type) %>%
  summarise(across(where(is.numeric), list(mean = mean, sd = sd, min = min, max = max), .names = "{.col}_{.fn}"))
summary_stats
## # A tibble: 2 Ɨ 49
##   type  fixed.acidity_mean fixed.acidity_sd fixed.acidity_min fixed.acidity_max
##   <chr>              <dbl>            <dbl>             <dbl>             <dbl>
## 1 red                 8.32            1.74                4.6              15.9
## 2 white               6.85            0.844               3.8              14.2
## # ℹ 44 more variables: volatile.acidity_mean <dbl>, volatile.acidity_sd <dbl>,
## #   volatile.acidity_min <dbl>, volatile.acidity_max <dbl>,
## #   citric.acid_mean <dbl>, citric.acid_sd <dbl>, citric.acid_min <dbl>,
## #   citric.acid_max <dbl>, residual.sugar_mean <dbl>, residual.sugar_sd <dbl>,
## #   residual.sugar_min <dbl>, residual.sugar_max <dbl>, chlorides_mean <dbl>,
## #   chlorides_sd <dbl>, chlorides_min <dbl>, chlorides_max <dbl>,
## #   free.sulfur.dioxide_mean <dbl>, free.sulfur.dioxide_sd <dbl>, …

1b. Adakah pengaruh kandungan residu gula (residual_sugar) dan asam sitrat (citric acid) terhadap kualitas wine? Tunjukkan dengan diagram/visualisasi dan pengujian hipotesis yang sesuai. Langkah-langkah pengujian hipotesis: #i) tuliskan hipotesis nol dan hipotesis alternatif Hā‚€: Tidak ada pengaruh signifikan dari residua gulagula (residual_sugar) dan asam sitrat (citric.acid) terhadap kualitas wine (quality). H₁: ada pengaruh signifikan dari residua gulagula (residual_sugar) dan asam sitrat (citric.acid) terhadap kualitas wine (quality).

#ii) gunakan taraf signifikansi α=0.05,

  1. tentukan statistik uji,

  2. tentukan kriteria keputusan,

  3. melakukan perhitungan,

  4. menarik kesimpulan.

library(ggplot2)

# Scatter plot: Residual sugar vs Quality
ggplot(wine_white, aes(x = residual.sugar, y = quality)) +
  geom_jitter(alpha = 0.3, color = "steelblue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Pengaruh Residual Sugar terhadap Quality (White Wine)", x = "Residual Sugar", y = "Quality")
## `geom_smooth()` using formula = 'y ~ x'

library(ggplot2)

# Scatter plot: Citric Acid vs Quality
ggplot(wine_white, aes(x = citric.acid, y = quality)) +
  geom_jitter(alpha = 0.3, color = "darkgreen") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Pengaruh Citric Acid terhadap Quality (White Wine)", x = "Citric Acid", y = "Quality")
## `geom_smooth()` using formula = 'y ~ x'

# Load necessary libraries
library(ggplot2)

# Read the datasets
wine_red <- read.csv("C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Komputasi Statistika/UTS KOMSTAT/winequality/winequality-red.csv", sep = ";")
wine_white <- read.csv("C:/Users/ASUS/Documents/UNY/MySta/SEM 4/Komputasi Statistika/UTS KOMSTAT/winequality/winequality-white.csv", sep = ";")

# Add a column to distinguish between red and white wine
wine_red$color <- 'red'
wine_white$color <- 'white'

# Combine the datasets
wine_data <- rbind(wine_red, wine_white)

# Plot hubungan antara residual sugar dan quality
ggplot(wine_data, aes(x = residual.sugar, y = quality, color = color)) +
  geom_point(alpha = 0.6) +
  labs(title = "Residual Sugar vs Quality", x = "Residual Sugar (g/dm³)", y = "Quality") +
  theme_minimal()

# Plot hubungan antara citric acid dan quality
ggplot(wine_data, aes(x = citric.acid, y = quality, color = color)) +
  geom_point(alpha = 0.6) +
  labs(title = "Citric Acid vs Quality", x = "Citric Acid (g/dm³)", y = "Quality") +
  theme_minimal()

# Plot hubungan antara residual sugar dan quality menggunakan boxplot
ggplot(wine_data, aes(x = factor(quality), y = residual.sugar, fill = color)) +
  geom_boxplot() +
  labs(title = "Residual Sugar vs Quality", x = "Quality", y = "Residual Sugar (g/dm³)") +
  theme_minimal()

# Plot hubungan antara citric acid dan quality menggunakan boxplot
ggplot(wine_data, aes(x = factor(quality), y = citric.acid, fill = color)) +
  geom_boxplot() +
  labs(title = "Citric Acid vs Quality", x = "Quality", y = "Citric Acid (g/dm³)") +
  theme_minimal()

library(ggplot2)

# Boxplot residual sugar terhadap kualitas
ggplot(wine_white, aes(x = factor(quality), y = residual.sugar)) +
  geom_boxplot(fill = "lightblue") +
  labs(title = "Boxplot: Residual Sugar vs Quality (White Wine)",
       x = "Wine Quality", y = "Residual Sugar")

# Boxplot citric acid terhadap kualitas
ggplot(wine_white, aes(x = factor(quality), y = citric.acid)) +
  geom_boxplot(fill = "lightgreen") +
  labs(title = "Boxplot: Citric Acid vs Quality (White Wine)",
       x = "Wine Quality", y = "Citric Acid")

library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_ly(data = wine_white, 
        x = ~residual.sugar, 
        y = ~citric.acid, 
        z = ~quality,
        color = ~quality, colors = "Reds",
        type = "scatter3d", mode = "markers") %>%
  layout(title = "3D Scatter: Residual Sugar vs Citric Acid vs Quality")
# Plot hubungan antara residual sugar dan quality menggunakan density plot
ggplot(wine_data, aes(x = residual.sugar, fill = factor(quality))) +
  geom_density(alpha = 0.6) +
  labs(title = "Density of Residual Sugar by Quality", x = "Residual Sugar (g/dm³)", y = "Density") +
  theme_minimal()

# Plot hubungan antara citric acid dan quality menggunakan density plot
ggplot(wine_data, aes(x = citric.acid, fill = factor(quality))) +
  geom_density(alpha = 0.6) +
  labs(title = "Density of Citric Acid by Quality", x = "Citric Acid (g/dm³)", y = "Density") +
  theme_minimal()