options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages(c("tidyverse", "car", "rstatix", "psych", "MVN"))
## package 'tidyverse' successfully unpacked and MD5 sums checked
## package 'car' successfully unpacked and MD5 sums checked
## package 'rstatix' successfully unpacked and MD5 sums checked
## package 'psych' successfully unpacked and MD5 sums checked
## package 'MVN' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Lenovo\AppData\Local\Temp\RtmpuK7QzT\downloaded_packages
library(tidyverse)
library(car)
library(rstatix)
library(psych)
library(MVN)
wine_data <- read.csv("WineQT.csv")
Dataset yang digunakan dalam penelitian ini adalah WineQT.csv (https://www.kaggle.com/datasets/yasserh/wine-quality-dataset), yaitu data kualitas wine yang memuat informasi kimia-fisik wine dan skor kualitasnya.
df_clean <- wine_data %>%
distinct()
colSums(is.na(df_clean))
## fixed.acidity volatile.acidity citric.acid
## 0 0 0
## residual.sugar chlorides free.sulfur.dioxide
## 0 0 0
## total.sulfur.dioxide density pH
## 0 0 0
## sulphates alcohol quality
## 0 0 0
## Id
## 0
df_clean <- df_clean %>% select(-Id)
df_clean %>% count(quality)
## quality n
## 1 3 6
## 2 4 33
## 3 5 483
## 4 6 462
## 5 7 143
## 6 8 16
Dari hasil yang didapatkan maka hanya kelas 5, 6, dan 7 saja yang digunakan dan mengubahnya menjadi faktor, karena kelas 3, 4, 8 itu kelompok minoritas ekstrem (<40 baris)
df_clean <- wine_data %>%
filter(quality %in% c(5, 6, 7)) %>%
mutate(quality = as.factor(quality))
str(df_clean[, c("quality", "pH", "sulphates", "citric.acid")])
## 'data.frame': 1088 obs. of 4 variables:
## $ quality : Factor w/ 3 levels "5","6","7": 1 1 1 2 1 1 1 3 3 1 ...
## $ pH : num 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.28 ...
## $ sulphates : num 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.54 ...
## $ citric.acid: num 0 0 0.04 0.56 0 0 0.06 0 0.02 0.08 ...
table(df_clean$quality)
##
## 5 6 7
## 483 462 143
summary(df_clean)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.900 Min. :0.120 Min. :0.0000 Min. : 0.900
## 1st Qu.: 7.100 1st Qu.:0.390 1st Qu.:0.1000 1st Qu.: 1.900
## Median : 7.900 Median :0.520 Median :0.2500 Median : 2.200
## Mean : 8.318 Mean :0.526 Mean :0.2694 Mean : 2.529
## 3rd Qu.: 9.100 3rd Qu.:0.640 3rd Qu.:0.4200 3rd Qu.: 2.600
## Max. :15.900 Max. :1.330 Max. :0.7900 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.01200 Min. : 1.00 Min. : 6.00 Min. :0.9901
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00 1st Qu.:0.9956
## Median :0.07900 Median :14.00 Median : 38.00 Median :0.9967
## Mean :0.08684 Mean :15.75 Mean : 46.44 Mean :0.9967
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00 3rd Qu.:0.9979
## Max. :0.61100 Max. :68.00 Max. :289.00 Max. :1.0037
## pH sulphates alcohol quality Id
## Min. :2.860 Min. :0.3900 Min. : 8.40 5:483 Min. : 0.0
## 1st Qu.:3.200 1st Qu.:0.5500 1st Qu.: 9.50 6:462 1st Qu.: 413.8
## Median :3.310 Median :0.6200 Median :10.20 7:143 Median : 795.5
## Mean :3.309 Mean :0.6573 Mean :10.43 Mean : 806.7
## 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10 3rd Qu.:1208.2
## Max. :4.010 Max. :1.9500 Max. :14.90 Max. :1597.0
ggplot(df_clean, aes(x = pH)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
ggplot(df_clean, aes(x = sulphates)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
ggplot(df_clean, aes(x = citric.acid, y = pH)) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(df_clean, aes(x = citric.acid, y = sulphates)) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
cor(df_clean[, c("pH", "sulphates", "citric.acid")])
## pH sulphates citric.acid
## pH 1.0000000 -0.1554168 -0.5328652
## sulphates -0.1554168 1.0000000 0.3130925
## citric.acid -0.5328652 0.3130925 1.0000000
# Outlier Univariat
df_clean %>% group_by(quality) %>% identify_outliers(pH)
## # A tibble: 18 × 15
## quality fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 5 5 1.04 0.24 1.6 0.05
## 2 5 10.7 0.43 0.39 2.2 0.106
## 3 5 10.7 0.43 0.39 2.2 0.106
## 4 5 6.6 0.61 0.01 1.9 0.08
## 5 5 5.6 0.915 0 2.1 0.041
## 6 5 10 0.69 0.11 1.4 0.084
## 7 5 5.6 0.54 0.04 1.7 0.049
## 8 6 14.3 0.31 0.74 1.8 0.075
## 9 6 5.1 0.47 0.02 1.3 0.034
## 10 6 8 0.18 0.37 0.9 0.049
## 11 6 5.2 0.645 0 2.15 0.08
## 12 6 5.4 0.74 0 1.2 0.041
## 13 6 9.1 0.76 0.68 1.7 0.414
## 14 6 5 0.74 0 1.2 0.041
## 15 7 12 0.5 0.59 1.4 0.073
## 16 7 4.9 0.42 0 2.1 0.048
## 17 7 5.4 0.42 0.27 2 0.092
## 18 7 5.1 0.42 0 1.8 0.044
## # ℹ 9 more variables: free.sulfur.dioxide <dbl>, total.sulfur.dioxide <dbl>,
## # density <dbl>, pH <dbl>, sulphates <dbl>, alcohol <dbl>, Id <int>,
## # is.outlier <lgl>, is.extreme <lgl>
df_clean %>% group_by(quality) %>% identify_outliers(sulphates)
## # A tibble: 52 × 15
## quality fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 5 7.8 0.61 0.29 1.6 0.114
## 2 5 7.9 0.43 0.21 1.6 0.106
## 3 5 7.3 0.45 0.36 5.9 0.074
## 4 5 8.1 0.66 0.22 2.2 0.069
## 5 5 9.3 0.39 0.44 2.1 0.107
## 6 5 7.8 0.41 0.68 1.7 0.467
## 7 5 7.8 0.56 0.19 1.8 0.104
## 8 5 7.8 0.56 0.19 1.8 0.104
## 9 5 8.9 0.61 0.49 2 0.27
## 10 5 8.9 0.635 0.37 1.7 0.263
## # ℹ 42 more rows
## # ℹ 9 more variables: free.sulfur.dioxide <dbl>, total.sulfur.dioxide <dbl>,
## # density <dbl>, pH <dbl>, sulphates <dbl>, alcohol <dbl>, Id <int>,
## # is.outlier <lgl>, is.extreme <lgl>
sqrt(df_clean$sulphates)
## [1] 0.7483315 0.8246211 0.8062258 0.7615773 0.7483315 0.7483315 0.6782330
## [8] 0.6855655 0.7549834 0.7348469 0.7211103 1.2489996 0.8660254 1.0392305
## [15] 0.8062258 0.9539392 0.7280110 0.7937254 0.7483315 0.7681146 0.7416198
## [22] 0.7681146 0.7348469 0.8124038 0.7416198 0.7416198 0.7745967 0.8544004
## [29] 0.9110434 0.9486833 1.0954451 0.8544004 0.8602325 0.7937254 0.8000000
## [36] 0.7483315 0.7937254 0.7937254 0.7681146 0.7348469 0.8000000 0.8426150
## [43] 0.7549834 0.6244998 0.6244998 0.7615773 0.9746794 0.6928203 0.8124038
## [50] 0.7416198 0.7211103 0.7483315 0.7549834 0.8831761 0.7681146 1.3964240
## [57] 0.8000000 1.1045361 0.7280110 0.7348469 1.3964240 0.8000000 0.7483315
## [64] 0.7483315 0.7681146 0.7810250 0.7681146 0.7141428 0.6928203 0.7141428
## [71] 1.1445523 0.8888194 0.9643651 0.8124038 0.8185353 0.9643651 0.7937254
## [78] 0.7810250 0.7549834 0.7745967 0.7211103 0.6928203 0.7000000 0.6928203
## [85] 0.9591663 0.8124038 0.7937254 0.7416198 0.9055385 0.8000000 0.9055385
## [92] 0.8888194 0.7615773 0.8888194 0.7874008 0.7211103 0.7615773 0.7280110
## [99] 0.7348469 0.7348469 0.8426150 0.8485281 0.8426150 0.7071068 0.7549834
## [106] 0.7483315 0.7483315 0.7000000 0.7000000 0.7874008 0.7211103 0.7416198
## [113] 0.7810250 0.7549834 0.8831761 0.6855655 0.7810250 0.7483315 0.7681146
## [120] 0.7681146 1.0099505 0.7211103 0.7615773 0.7615773 0.7280110 0.7416198
## [127] 0.7416198 0.7874008 0.7874008 0.7416198 0.7416198 0.7681146 0.9848858
## [134] 0.9055385 0.7483315 0.8774964 0.8774964 0.8185353 0.9273618 0.7483315
## [141] 0.6557439 0.8426150 0.8717798 0.7549834 0.7416198 0.7348469 0.7681146
## [148] 0.7549834 0.7745967 0.7745967 0.7615773 1.2688578 0.7874008 0.7211103
## [155] 0.8366600 0.7416198 0.7615773 1.0440307 0.8426150 0.7000000 0.9165151
## [162] 0.9165151 0.9797959 0.7211103 0.9797959 0.8831761 0.8366600 0.7280110
## [169] 0.7681146 0.7549834 0.7874008 0.8185353 1.1224972 0.8944272 0.8062258
## [176] 0.7549834 0.8185353 0.9327379 0.9539392 0.9848858 0.8185353 0.8185353
## [183] 0.9539392 0.9848858 0.8774964 0.9055385 1.0392305 0.8774964 0.8426150
## [190] 0.8426150 0.8717798 0.8366600 0.9273618 0.9746794 0.9273618 0.8000000
## [197] 0.7745967 0.9000000 0.8000000 0.7211103 0.8544004 0.8944272 0.7681146
## [204] 0.8000000 0.8485281 0.7211103 0.7211103 0.7874008 0.7937254 0.7348469
## [211] 0.7937254 0.7280110 0.8246211 0.8426150 0.8000000 0.7874008 0.8306624
## [218] 0.8306624 0.8602325 0.9110434 0.8185353 0.7211103 0.8602325 0.9165151
## [225] 0.8366600 0.8000000 1.1661904 0.8660254 0.8660254 0.8831761 0.9643651
## [232] 0.9591663 0.9110434 0.8544004 0.8774964 0.7681146 0.7810250 0.9380832
## [239] 0.8660254 0.9219544 0.8246211 0.8366600 0.9539392 0.7483315 0.9327379
## [246] 0.8544004 0.8544004 0.8306624 1.0630146 0.9327379 1.0198039 1.0535654
## [253] 1.0630146 0.9949874 0.8944272 0.8602325 0.8602325 0.8944272 0.7141428
## [260] 0.7745967 0.7280110 0.8062258 0.7416198 0.7280110 0.7810250 0.7937254
## [267] 0.7810250 0.8062258 0.7937254 0.7874008 0.8000000 0.9055385 0.7810250
## [274] 1.0344080 0.8124038 0.6928203 0.8717798 0.8246211 0.7483315 0.6633250
## [281] 0.9486833 0.7483315 0.7000000 0.7483315 0.7745967 0.7745967 0.9486833
## [288] 0.7483315 0.7000000 0.9433981 0.7000000 0.7483315 0.8124038 0.7745967
## [295] 0.8246211 0.9165151 0.8544004 0.8485281 0.7483315 0.7810250 0.7810250
## [302] 0.7937254 0.8602325 0.7937254 0.7615773 0.7615773 0.7810250 0.7810250
## [309] 0.8366600 0.8485281 0.8888194 0.7874008 0.7615773 0.7810250 0.8426150
## [316] 0.9055385 0.7745967 0.8366600 1.0295630 0.8366600 0.8124038 1.0295630
## [323] 0.6855655 0.6480741 0.8660254 0.8888194 0.9486833 0.8602325 0.7416198
## [330] 0.8062258 0.8888194 0.8602325 0.8062258 0.9219544 0.9219544 1.0246951
## [337] 0.8831761 1.0099505 0.8426150 0.8246211 0.7937254 0.9000000 0.8124038
## [344] 0.8124038 1.0677078 0.8602325 0.9486833 0.7416198 0.8000000 0.8774964
## [351] 0.7549834 0.9327379 0.7810250 0.7810250 0.8124038 0.7483315 0.9327379
## [358] 0.7549834 0.8774964 0.8124038 0.7874008 0.8185353 0.8544004 0.7681146
## [365] 0.8485281 0.8888194 0.7681146 0.8831761 0.9273618 0.8660254 0.7483315
## [372] 0.8544004 0.8602325 0.7874008 0.8602325 0.7483315 0.8602325 0.8246211
## [379] 0.7549834 0.7810250 0.7810250 0.8246211 0.7549834 0.7745967 0.8888194
## [386] 0.7549834 0.9539392 0.7810250 0.7745967 0.7745967 0.7141428 0.6633250
## [393] 0.6633250 0.8124038 0.7681146 0.7549834 0.7071068 0.7549834 0.7615773
## [400] 0.7549834 0.7745967 0.6928203 0.7211103 0.7874008 0.7280110 0.8000000
## [407] 0.7211103 0.7348469 0.7348469 0.7745967 0.7549834 0.9643651 0.8944272
## [414] 0.7810250 0.7874008 0.7416198 0.8485281 0.7000000 0.7000000 0.7745967
## [421] 0.7810250 0.7745967 0.7937254 0.7549834 0.7615773 0.7416198 0.7280110
## [428] 0.7280110 1.1661904 0.7874008 0.7874008 0.7874008 0.8185353 0.7549834
## [435] 0.7071068 0.9165151 0.7348469 0.6782330 0.7810250 0.7071068 0.8246211
## [442] 0.9000000 0.9000000 0.8000000 0.8366600 0.7483315 0.7745967 0.8124038
## [449] 0.8124038 0.7211103 0.7211103 0.8185353 0.6928203 0.7280110 0.7348469
## [456] 0.7000000 0.7348469 0.7483315 0.6928203 0.7483315 0.6855655 1.0246951
## [463] 0.7348469 1.0816654 0.7874008 0.7810250 0.7874008 0.8185353 0.7280110
## [470] 0.7874008 0.7549834 0.8185353 0.8185353 0.7000000 0.6855655 0.7141428
## [477] 0.7810250 0.7549834 0.7810250 0.7141428 0.7549834 0.6633250 0.7348469
## [484] 0.6633250 0.7280110 1.2727922 0.7416198 0.8485281 0.8246211 0.8246211
## [491] 0.7000000 0.8185353 0.7211103 0.6855655 0.7745967 0.7874008 0.7071068
## [498] 0.8062258 0.7745967 0.8246211 0.7549834 0.7141428 0.8544004 0.6855655
## [505] 0.8485281 0.8544004 0.7416198 0.7810250 0.7416198 1.0295630 0.6557439
## [512] 0.6782330 0.7071068 0.7071068 0.7211103 0.7549834 0.6480741 0.7280110
## [519] 0.7416198 0.7348469 0.7348469 0.7280110 0.7348469 0.7280110 0.7416198
## [526] 0.7416198 0.9591663 0.6782330 0.7681146 0.7810250 0.7745967 0.7745967
## [533] 0.8062258 0.7681146 0.7071068 0.7483315 0.7483315 0.7211103 0.8246211
## [540] 0.7348469 0.7745967 0.7483315 0.7280110 1.0862780 0.8602325 0.9000000
## [547] 0.7211103 0.6633250 0.9695360 0.8124038 0.7549834 0.8426150 0.9273618
## [554] 0.7416198 0.6855655 0.8246211 0.8717798 0.7348469 0.7745967 0.8602325
## [561] 0.8000000 0.7280110 0.8000000 0.7000000 0.8774964 0.6244998 0.6244998
## [568] 0.9110434 0.7937254 0.8485281 0.7874008 0.8124038 0.8366600 0.8124038
## [575] 0.7416198 0.7416198 0.8544004 0.8544004 0.8544004 0.8831761 0.8185353
## [582] 0.7348469 0.6633250 0.7280110 0.7348469 0.7549834 0.8888194 0.7549834
## [589] 0.8062258 0.8831761 0.7071068 0.8000000 0.8062258 0.7483315 0.8246211
## [596] 0.7549834 0.8062258 0.7549834 0.7615773 0.7615773 0.8774964 0.7615773
## [603] 0.8774964 0.8306624 0.8306624 0.8306624 0.7681146 0.8426150 0.7348469
## [610] 0.7681146 0.7549834 0.7745967 0.7874008 0.8185353 0.7810250 0.8000000
## [617] 0.8485281 0.9433981 0.7483315 0.9433981 0.8944272 0.9327379 0.9486833
## [624] 0.8944272 0.8544004 0.8062258 0.7681146 0.8544004 0.8485281 0.7348469
## [631] 0.7211103 0.8000000 0.8000000 0.9949874 0.7874008 0.8717798 0.8717798
## [638] 0.7874008 0.8246211 0.8306624 0.8185353 0.7874008 0.8426150 0.9219544
## [645] 0.7874008 0.8306624 0.9219544 0.6928203 0.7745967 0.9327379 0.7874008
## [652] 0.7211103 0.7071068 0.7000000 0.8426150 0.7874008 0.9055385 0.8426150
## [659] 0.7615773 0.7071068 0.7000000 0.7141428 0.7000000 0.6928203 0.6928203
## [666] 0.7874008 0.6928203 0.7280110 0.8485281 0.7348469 0.7416198 0.9165151
## [673] 0.9165151 0.8366600 0.7416198 0.7416198 0.6928203 0.7615773 0.7615773
## [680] 0.8660254 0.6633250 0.7810250 0.7280110 0.7280110 0.7348469 0.7416198
## [687] 0.7483315 0.7745967 0.7483315 0.7211103 0.7483315 0.7280110 0.6855655
## [694] 0.8062258 0.7681146 0.8774964 0.9110434 0.8000000 0.7874008 0.7615773
## [701] 0.7874008 0.8000000 0.8831761 0.8774964 1.1575837 0.7416198 0.8774964
## [708] 0.7874008 0.7681146 0.7549834 0.8717798 0.7681146 0.8366600 0.8000000
## [715] 0.7937254 0.7549834 0.7745967 0.8485281 0.9110434 0.7874008 0.9433981
## [722] 0.7615773 0.7615773 0.8544004 0.7615773 0.7141428 0.7141428 0.7681146
## [729] 0.8774964 0.7000000 0.8124038 0.8306624 0.8717798 0.8717798 0.7745967
## [736] 0.7681146 0.9273618 0.7000000 0.7000000 0.7211103 0.8831761 0.7937254
## [743] 0.7937254 0.8124038 0.7745967 0.9219544 0.7000000 0.8717798 0.8000000
## [750] 0.8000000 0.8426150 0.8124038 0.7745967 0.7745967 0.7745967 0.7280110
## [757] 0.7615773 0.6928203 0.7681146 0.7745967 0.8246211 0.7937254 0.9380832
## [764] 0.8246211 0.8602325 0.7745967 0.8717798 0.7615773 0.8000000 0.8062258
## [771] 0.9219544 0.8062258 0.9055385 0.7874008 0.9000000 0.9327379 0.7000000
## [778] 0.8124038 0.7211103 0.8062258 0.7280110 0.8124038 0.7280110 0.8717798
## [785] 0.9327379 0.8426150 0.8185353 0.9380832 0.7483315 0.7874008 0.8185353
## [792] 0.8602325 0.7874008 0.7874008 0.8366600 0.8544004 0.9000000 0.6782330
## [799] 0.7745967 0.8366600 0.8366600 0.7549834 0.7141428 0.8944272 0.7681146
## [806] 0.7745967 0.7348469 0.8888194 0.7615773 0.7348469 0.7348469 0.7416198
## [813] 0.9219544 0.9219544 0.9695360 0.9219544 0.9327379 0.7874008 0.7874008
## [820] 0.6928203 0.8306624 0.9110434 0.8774964 0.8774964 0.9055385 0.7141428
## [827] 0.6855655 0.8485281 0.8544004 0.8000000 0.8831761 0.7874008 0.8000000
## [834] 0.6324555 0.8000000 0.6324555 0.6324555 0.7874008 0.8246211 0.6928203
## [841] 0.7810250 0.7681146 0.7615773 0.7615773 0.8306624 0.6480741 0.8366600
## [848] 0.7937254 1.0723805 0.7615773 0.8485281 0.7745967 0.7745967 0.9327379
## [855] 0.7416198 0.8660254 0.7549834 0.7280110 0.6557439 0.7280110 0.7348469
## [862] 0.7745967 0.7348469 0.7549834 0.8000000 0.8000000 0.8246211 0.7937254
## [869] 0.7483315 0.7549834 1.0816654 1.0816654 0.7681146 0.8660254 0.8185353
## [876] 0.6708204 0.7810250 0.7483315 0.7810250 0.8602325 0.8944272 0.6633250
## [883] 0.7280110 0.7874008 0.7810250 0.7681146 0.6855655 0.7615773 0.7681146
## [890] 1.1532563 0.7681146 0.7416198 0.7745967 0.7745967 0.7745967 0.7745967
## [897] 0.7211103 0.7483315 0.7483315 0.6782330 0.6557439 0.7141428 0.7211103
## [904] 0.8124038 0.6708204 0.6708204 0.6708204 0.7348469 0.7348469 0.7348469
## [911] 0.7348469 0.7280110 0.7280110 0.7483315 0.6244998 0.7141428 0.6782330
## [918] 0.6633250 0.6633250 0.7681146 0.7071068 0.8062258 0.7348469 0.8062258
## [925] 0.7280110 0.7071068 1.0862780 0.7071068 1.0816654 1.0148892 1.0816654
## [932] 0.6782330 0.7141428 0.7874008 0.7874008 0.7000000 0.7000000 0.7071068
## [939] 0.6480741 0.7615773 0.8246211 0.7681146 0.7615773 0.6928203 0.6928203
## [946] 0.8062258 0.8062258 0.7211103 0.8485281 0.7141428 0.9055385 0.7874008
## [953] 0.9695360 0.9643651 1.0049876 0.9643651 0.6633250 0.8544004 0.9695360
## [960] 0.9486833 0.8831761 0.9165151 0.8831761 0.8774964 0.6782330 0.6708204
## [967] 0.6782330 0.7615773 0.9165151 0.7615773 0.7937254 0.9643651 0.7874008
## [974] 0.7348469 0.8602325 0.8774964 0.8062258 0.7348469 0.6855655 0.8246211
## [981] 0.7000000 0.7211103 0.8602325 0.8544004 0.8831761 0.8717798 0.8306624
## [988] 0.7937254 0.7615773 0.7745967 0.8944272 0.8246211 0.7280110 0.8185353
## [995] 0.8062258 0.8062258 0.6855655 0.7211103 0.8888194 0.7348469 0.8660254
## [1002] 0.9380832 0.8660254 0.9380832 0.8246211 0.8246211 0.7810250 0.7416198
## [1009] 0.7745967 0.8831761 0.7615773 0.7483315 0.7348469 0.7681146 0.8366600
## [1016] 0.9000000 0.7937254 0.7937254 0.7280110 0.8306624 0.7416198 0.8124038
## [1023] 0.7416198 0.8485281 0.8366600 0.8774964 0.8185353 0.8485281 0.8485281
## [1030] 0.9110434 0.7937254 0.7615773 0.9848858 0.7745967 0.8000000 0.8062258
## [1037] 0.8185353 0.7745967 0.8306624 0.8124038 0.8246211 0.7810250 0.7874008
## [1044] 0.7681146 0.7937254 0.7937254 0.9000000 0.8000000 0.7810250 0.7615773
## [1051] 0.7348469 0.8124038 0.7745967 0.8246211 0.7483315 0.7745967 0.7483315
## [1058] 0.7211103 0.7211103 0.7211103 0.7348469 0.7348469 0.9055385 0.8426150
## [1065] 0.7348469 0.7681146 0.7549834 0.9643651 0.8062258 0.7141428 0.8185353
## [1072] 0.8000000 0.8831761 0.7745967 0.8185353 0.7745967 0.7071068 0.7874008
## [1079] 0.8944272 0.9219544 0.8124038 0.9055385 0.7483315 0.8660254 0.9055385
## [1086] 0.7615773 0.8717798 0.8426150
# Outlier Multivariat
dv_data <- df_clean %>% select(pH, sulphates)
df_clean$mahalanobis <- mahalanobis(
dv_data,
colMeans(dv_data),
cov(dv_data)
)
cutoff <- qchisq(0.999, df = 2)
df_clean %>% filter(mahalanobis > cutoff)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.8 0.61 0.29 1.6 0.114
## 2 8.6 0.49 0.28 1.9 0.110
## 3 8.6 0.49 0.28 1.9 0.110
## 4 7.8 0.41 0.68 1.7 0.467
## 5 8.9 0.59 0.50 2.0 0.337
## 6 7.7 0.41 0.76 1.8 0.611
## 7 12.5 0.28 0.54 2.3 0.082
## 8 8.9 0.29 0.35 1.9 0.067
## 9 5.1 0.47 0.02 1.3 0.034
## 10 7.1 0.31 0.30 2.2 0.053
## 11 8.5 0.46 0.59 1.4 0.414
## 12 5.4 0.74 0.00 1.2 0.041
## 13 9.1 0.76 0.68 1.7 0.414
## 14 5.0 0.74 0.00 1.2 0.041
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 9 29 0.99740 3.26 1.56 9.1
## 2 20 136 0.99720 2.93 1.95 9.9
## 3 20 136 0.99720 2.93 1.95 9.9
## 4 18 69 0.99730 3.08 1.31 9.3
## 5 27 81 0.99640 3.04 1.61 9.5
## 6 8 45 0.99680 3.06 1.26 9.4
## 7 12 29 0.99970 3.11 1.36 9.8
## 8 25 57 0.99700 3.18 1.36 10.3
## 9 18 44 0.99210 3.90 0.62 12.8
## 10 36 127 0.99650 2.94 1.62 9.5
## 11 16 45 0.99702 3.03 1.34 9.2
## 12 16 46 0.99258 4.01 0.59 12.5
## 13 18 64 0.99652 2.90 1.33 9.1
## 14 16 46 0.99258 4.01 0.59 12.5
## quality Id mahalanobis
## 1 5 13 29.91473
## 2 6 86 62.39609
## 3 6 91 62.39609
## 4 5 106 16.29024
## 5 6 226 33.76780
## 6 5 258 14.36022
## 7 7 339 18.37658
## 8 6 639 17.99130
## 9 6 695 14.95343
## 10 5 723 36.02146
## 11 5 1051 18.37630
## 12 6 1316 20.93535
## 13 6 1319 20.70642
## 14 6 1321 20.93535
df_clean <- df_clean %>%
mutate(outlier_multi = mahalanobis > cutoff)
table(df_clean$outlier_multi)
##
## FALSE TRUE
## 1074 14
df_clean <- df_clean %>%
filter(mahalanobis <= cutoff)
ggplot(df_clean, aes(x = quality, y = pH, fill = quality)) +
geom_boxplot(alpha = 0.7) +
theme_minimal() +
labs(title = "Distribusi pH Berdasarkan Kualitas Anggur",
x = "Kualitas Anggur (Quality)",
y = "Tingkat pH") +
theme(legend.position = "none")
ggplot(df_clean, aes(x = quality, y = sulphates, fill = quality)) +
geom_boxplot(alpha = 0.7) +
theme_minimal() +
labs(title = "Distribusi Kandungan Sulfat Berdasarkan Kualitas Anggur",
x = "Kualitas Anggur (Quality)",
y = "Kandungan Sulfat (Sulphates)") +
theme(legend.position = "none")
# Normalitas pH untuk masing-masing kelas quality
df_clean %>%
group_by(quality) %>%
shapiro_test(pH)
## # A tibble: 3 × 4
## quality variable statistic p
## <fct> <chr> <dbl> <dbl>
## 1 5 pH 0.997 0.512
## 2 6 pH 0.997 0.594
## 3 7 pH 0.986 0.165
# Normalitas sulphates untuk masing-masing kelas quality
df_clean %>%
group_by(quality) %>%
shapiro_test(sulphates)
## # A tibble: 3 × 4
## quality variable statistic p
## <fct> <chr> <dbl> <dbl>
## 1 5 sulphates 0.817 6.41e-23
## 2 6 sulphates 0.946 9.05e-12
## 3 7 sulphates 0.986 1.76e- 1
# Normalitas Multivariat
mvn_results <- lapply(
split(df_clean[, c("pH", "sulphates")], df_clean$quality),
function(x) MVN::mvn(data = x, mvn_test = "hz", tidy = TRUE)
)
lapply(mvn_results, function(x) x$multivariate_normality)
## $`5`
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 10.433 <0.001 asymptotic ✗ Not normal
##
## $`6`
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 5.601 <0.001 asymptotic ✗ Not normal
##
## $`7`
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 0.641 0.379 asymptotic ✓ Normal
catatan:
- p > 0.05 → tidak ada bukti melanggar normalitas multivariat
- p < 0.05 → indikasi tidak normal multivariat
# Levene's Test (Homogenitas Varians Univariat)
leveneTest(pH ~ quality, data = df_clean)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.2333 0.792
## 1071
leveneTest(sulphates ~ quality, data = df_clean)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.1705 0.3106
## 1071
catatan: - p > 0.05 → homogen
# Box's M Test (Homogenitas Matriks Kovarians Multivariat)
box_m(df_clean[, c("pH", "sulphates")], df_clean$quality)
## # A tibble: 1 × 4
## statistic p.value parameter method
## <dbl> <dbl> <dbl> <chr>
## 1 7.20 0.302 6 Box's M-test for Homogeneity of Covariance Matric…
catatan:
- p > 0.001 biasanya dianggap aman
- kalau signifikan, Pillai’s Trace biasanya tetap paling robust
# DV tidak boleh saling bebas; harus ada korelasi (Signifikan p < 0.05)
cor(df_clean[, c("pH", "sulphates")], use = "complete.obs")
## pH sulphates
## pH 1.00000000 -0.06937676
## sulphates -0.06937676 1.00000000
cor.test(df_clean$pH, df_clean$sulphates, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: df_clean$pH and df_clean$sulphates
## t = -2.277, df = 1072, p-value = 0.02298
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.128661178 -0.009598222
## sample estimates:
## cor
## -0.06937676
psych::cortest.bartlett(cor(df_clean[, c("pH", "sulphates")]), n = nrow(df_clean))
## $chisq
## [1] 5.169725
##
## $p.value
## [1] 0.02298386
##
## $df
## [1] 1
catatan:
- p-value < 0.05 -> ada korelasi
- DV sebaiknya berkorelasi sedang
- jangan terlalu rendah (MANOVA jadi kurang berguna)
- jangan terlalu tinggi (indikasi multikolinearitas)
# Kovariat harus memiliki hubungan linier dengan Dependen (Signifikan p < 0.05)
# Korelasi Asam Sitrat terhadap pH
cor.test(df_clean$citric.acid, df_clean$pH, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: df_clean$citric.acid and df_clean$pH
## t = -20.33, df = 1072, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5693624 -0.4829305
## sample estimates:
## cor
## -0.5275101
ggplot(df_clean, aes(x = citric.acid, y = pH, color = quality)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
# Korelasi Asam Sitrat terhadap Sulphates
cor.test(df_clean$citric.acid, df_clean$sulphates, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: df_clean$citric.acid and df_clean$sulphates
## t = 11.067, df = 1072, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2654850 0.3728937
## sample estimates:
## cor
## 0.3202181
ggplot(df_clean, aes(x = citric.acid, y = sulphates, color = quality)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
catatan:
- pola hubungan harus terlihat linear
- p-value < 0.05, maka hubungan linear
# Membuat model linear sementara untuk mengambil nilai residualnya
model_pH <- lm(pH ~ citric.acid + quality, data = df_clean)
model_sulphates <- lm(sulphates ~ citric.acid + quality, data = df_clean)
# Uji Durbin-Watson (Nilai ideal berkisar di angka 2.0)
durbinWatsonTest(model_pH)
## lag Autocorrelation D-W Statistic p-value
## 1 0.1894929 1.61751 0
## Alternative hypothesis: rho != 0
durbinWatsonTest(model_sulphates)
## lag Autocorrelation D-W Statistic p-value
## 1 0.2098177 1.579438 0
## Alternative hypothesis: rho != 0
Catatan:
- Jika hasil sekitar 2, maka dianggap lolos atau tidak ada autokorelasi
slope_pH <- lm(pH ~ citric.acid * quality, data = df_clean)
slope_sulphates <- lm(sulphates ~ citric.acid * quality, data = df_clean)
Anova(slope_pH, type = 2)
## Anova Table (Type II tests)
##
## Response: pH
## Sum Sq Df F value Pr(>F)
## citric.acid 6.6590 1 432.7123 < 2.2e-16 ***
## quality 0.3182 2 10.3379 3.575e-05 ***
## citric.acid:quality 0.1045 2 3.3947 0.03391 *
## Residuals 16.4354 1068
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(slope_sulphates, type = 2)
## Anova Table (Type II tests)
##
## Response: sulphates
## Sum Sq Df F value Pr(>F)
## citric.acid 1.3747 1 84.1861 < 2e-16 ***
## quality 1.4307 2 43.8069 < 2e-16 ***
## citric.acid:quality 0.0785 2 2.4046 0.09079 .
## Residuals 17.4397 1068
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
slope_mancova <- manova(cbind(pH, sulphates) ~ citric.acid * quality, data = df_clean)
summary(slope_mancova, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## citric.acid 1 0.35978 299.804 2 1067 < 2e-16 ***
## quality 2 0.08705 24.301 4 2136 < 2e-16 ***
## citric.acid:quality 2 0.00999 2.682 4 2136 0.03007 *
## Residuals 1068
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
catatan:
- interaksi citric.acid:quality harus tidak signifikan
- kalau signifikan, slope antar grup berbeda → ANCOVA/MANCOVA klasik tidak terpenuhi
- p-value > 0.05
model_ancova_sul <- lm(sulphates ~ citric.acid + quality, data = df_clean)
# cek slope
model_slope_sul <- lm(sulphates ~ citric.acid * quality, data = df_clean)
Anova(model_slope_sul, type = 2)
## Anova Table (Type II tests)
##
## Response: sulphates
## Sum Sq Df F value Pr(>F)
## citric.acid 1.3747 1 84.1861 < 2e-16 ***
## quality 1.4307 2 43.8069 < 2e-16 ***
## citric.acid:quality 0.0785 2 2.4046 0.09079 .
## Residuals 17.4397 1068
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# model final
Anova(model_ancova_sul, type = 2)
## Anova Table (Type II tests)
##
## Response: sulphates
## Sum Sq Df F value Pr(>F)
## citric.acid 1.3747 1 83.966 < 2.2e-16 ***
## quality 1.4307 2 43.692 < 2.2e-16 ***
## Residuals 17.5183 1070
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Berdasarkan hasil uji ANCOVA, variabel citric acid dan quality berpengaruh signifikan terhadap sulphates (p < 0.05). Sementara itu, interaksi antara citric acid dan quality tidak signifikan (p > 0.05), sehingga model tanpa interaksi digunakan . Hal ini menunjukkan bahwa pengaruh citric acid terhadap sulphates konsisten pada setiap kategori kualitas wine.
model_manova <- manova(cbind(pH, sulphates) ~ quality, data = df_clean)
summary(model_manova, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## quality 2 0.11218 31.82 4 2142 < 2.2e-16 ***
## Residuals 1071
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_manova)
## Response pH :
## Df Sum Sq Mean Sq F value Pr(>F)
## quality 2 0.1589 0.079439 3.6674 0.02586 *
## Residuals 1071 23.1989 0.021661
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response sulphates :
## Df Sum Sq Mean Sq F value Pr(>F)
## quality 2 2.221 1.11050 62.952 < 2.2e-16 ***
## Residuals 1071 18.893 0.01764
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Secara keseluruhan, hasil MANOVA membuktikan bahwa terdapat perbedaan profil kimia wine berdasarkan kategori kualitas (Pillai’s Trace = 0.11218 dengan F = 31.82 dan p-value < 0.001, sehingga H0 ditolak). Perbedaan tersebut terutama tampak kuat pada variabel sulphates ((F = 62.952; p < 0.001)), sementara pH juga memberikan kontribusi yang signifikan (F = 3.6674; p = 0.02586). ## UJI UTAMA: MANCOVA
# Membangun model MANCOVA menggunakan Pillai's Trace
model_mancova <- manova(cbind(pH, sulphates) ~ citric.acid + quality, data = df_clean)
# Menampilkan hasil multivariat
summary(model_mancova, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## citric.acid 1 0.35942 299.899 2 1069 < 2.2e-16 ***
## quality 2 0.08644 24.168 4 2140 < 2.2e-16 ***
## Residuals 1070
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Menampilkan hasil univariat (ANCOVA terpisah jika multivariatnya signifikan)
summary.aov(model_mancova)
## Response pH :
## Df Sum Sq Mean Sq F value Pr(>F)
## citric.acid 1 6.4997 6.4997 420.479 < 2.2e-16 ***
## quality 2 0.3182 0.1591 10.292 3.739e-05 ***
## Residuals 1070 16.5399 0.0155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response sulphates :
## Df Sum Sq Mean Sq F value Pr(>F)
## citric.acid 1 2.1650 2.16502 132.237 < 2.2e-16 ***
## quality 2 1.4307 0.71534 43.692 < 2.2e-16 ***
## Residuals 1070 17.5183 0.01637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1