myData <- read_xlsx("../00_data/myData.xlsx")
## New names:
## • `` -> `...7`
myData_long <- myData %>%
pivot_longer(cols = c('pH' , 'quality') ,
names_to = "pH" ,
values_to = "level")
myData_long
## # A tibble: 3,198 × 12
## fixed…¹ volat…² citri…³ resid…⁴ chlor…⁵ free_…⁶ ...7 density sulph…⁷ alcohol
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7.4 0.7 0 1.9 0.076 11 34 0.998 0.56 9.4
## 2 7.4 0.7 0 1.9 0.076 11 34 0.998 0.56 9.4
## 3 7.8 0.88 0 2.6 0.098 25 67 0.997 0.68 9.8
## 4 7.8 0.88 0 2.6 0.098 25 67 0.997 0.68 9.8
## 5 7.8 0.76 0.04 2.3 0.092 15 54 0.997 0.65 9.8
## 6 7.8 0.76 0.04 2.3 0.092 15 54 0.997 0.65 9.8
## 7 11.2 0.28 0.56 1.9 0.075 17 60 0.998 0.58 9.8
## 8 11.2 0.28 0.56 1.9 0.075 17 60 0.998 0.58 9.8
## 9 7.4 0.7 0 1.9 0.076 11 34 0.998 0.56 9.4
## 10 7.4 0.7 0 1.9 0.076 11 34 0.998 0.56 9.4
## # … with 3,188 more rows, 2 more variables: pH <chr>, level <dbl>, and
## # abbreviated variable names ¹fixed_acidity, ²volatile_acidity, ³citric_acid,
## # ⁴residual_sugar, ⁵chlorides, ⁶free_sulfur_dioxide, ⁷sulphates
myData_long %>%
pivot_wider(names_from = pH,
values_from = level)
## Warning: Values from `level` are not uniquely identified; output will contain list-cols.
## * Use `values_fn = list` to suppress this warning.
## * Use `values_fn = {summary_fun}` to summarise duplicates.
## * Use the following dplyr code to identify duplicates.
## {data} %>%
## dplyr::group_by(fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides, free_sulfur_dioxide, ...7, density, sulphates, alcohol, pH) %>%
## dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
## dplyr::filter(n > 1L)
## # A tibble: 1,359 × 12
## fixed…¹ volat…² citri…³ resid…⁴ chlor…⁵ free_…⁶ ...7 density sulph…⁷ alcohol
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7.4 0.7 0 1.9 0.076 11 34 0.998 0.56 9.4
## 2 7.8 0.88 0 2.6 0.098 25 67 0.997 0.68 9.8
## 3 7.8 0.76 0.04 2.3 0.092 15 54 0.997 0.65 9.8
## 4 11.2 0.28 0.56 1.9 0.075 17 60 0.998 0.58 9.8
## 5 7.4 0.66 0 1.8 0.075 13 40 0.998 0.56 9.4
## 6 7.9 0.6 0.06 1.6 0.069 15 59 0.996 0.46 9.4
## 7 7.3 0.65 0 1.2 0.065 15 21 0.995 0.47 10
## 8 7.8 0.58 0.02 2 0.073 9 18 0.997 0.57 9.5
## 9 7.5 0.5 0.36 6.1 0.071 17 102 0.998 0.8 10.5
## 10 6.7 0.58 0.08 1.8 0.097 15 65 0.996 0.54 9.2
## # … with 1,349 more rows, 2 more variables: pH <list>, quality <list>, and
## # abbreviated variable names ¹fixed_acidity, ²volatile_acidity, ³citric_acid,
## # ⁴residual_sugar, ⁵chlorides, ⁶free_sulfur_dioxide, ⁷sulphates
myData_seperated <- myData%>%
separate(col = pH, into = c("quality" , "fixed_acidity"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 6 rows [170, 241,
## 433, 600, 1371, 1373].
myData_seperated
## # A tibble: 1,599 × 11
## volat…¹ citri…² resid…³ chlor…⁴ free_…⁵ ...7 density sulph…⁶ quality fixed…⁷
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 0.7 0 1.9 0.076 11 34 0.998 0.56 3 51
## 2 0.88 0 2.6 0.098 25 67 0.997 0.68 3 2
## 3 0.76 0.04 2.3 0.092 15 54 0.997 0.65 3 26
## 4 0.28 0.56 1.9 0.075 17 60 0.998 0.58 3 16
## 5 0.7 0 1.9 0.076 11 34 0.998 0.56 3 51
## 6 0.66 0 1.8 0.075 13 40 0.998 0.56 3 51
## 7 0.6 0.06 1.6 0.069 15 59 0.996 0.46 3 3
## 8 0.65 0 1.2 0.065 15 21 0.995 0.47 3 39
## 9 0.58 0.02 2 0.073 9 18 0.997 0.57 3 36
## 10 0.5 0.36 6.1 0.071 17 102 0.998 0.8 3 35
## # … with 1,589 more rows, 1 more variable: alcohol <dbl>, and abbreviated
## # variable names ¹volatile_acidity, ²citric_acid, ³residual_sugar,
## # ⁴chlorides, ⁵free_sulfur_dioxide, ⁶sulphates, ⁷fixed_acidity
myData_seperated <- myData %>%
separate(col = pH, into = c("quality" , "fixed_acidity"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 6 rows [170, 241,
## 433, 600, 1371, 1373].
myData_seperated %>%
unite(col = "pH", c(fixed_acidity:citric_acid), sep = "/")
## # A tibble: 1,599 × 3
## volatile_acidity pH alcohol
## <dbl> <chr> <dbl>
## 1 0.7 51/3/0.56/0.9978/34/11/0.076/1.9/0 9.4
## 2 0.88 2/3/0.68/0.9968/67/25/0.098/2.6/0 9.8
## 3 0.76 26/3/0.65/0.997/54/15/0.092/2.3/0.04 9.8
## 4 0.28 16/3/0.58/0.998/60/17/0.075/1.9/0.56 9.8
## 5 0.7 51/3/0.56/0.9978/34/11/0.076/1.9/0 9.4
## 6 0.66 51/3/0.56/0.9978/40/13/0.075/1.8/0 9.4
## 7 0.6 3/3/0.46/0.9964/59/15/0.069/1.6/0.06 9.4
## 8 0.65 39/3/0.47/0.9946/21/15/0.065/1.2/0 10
## 9 0.58 36/3/0.57/0.9968/18/9/0.073/2/0.02 9.5
## 10 0.5 35/3/0.8/0.9978/102/17/0.071/6.1/0.36 10.5
## # … with 1,589 more rows