Week 8: Apply it to your data 7

Import your data

myData <- read_xlsx("../00_data/myData.xlsx")

## New names:
## • `` -> `...7`

Pivoting

long to wide form

myData_long <- myData %>%
    
    pivot_longer(cols = c('pH' , 'quality') ,
                 names_to = "pH" ,
                 values_to = "level")

myData_long

## # A tibble: 3,198 × 12
##    fixed…¹ volat…² citri…³ resid…⁴ chlor…⁵ free_…⁶  ...7 density sulph…⁷ alcohol
##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
##  1     7.4    0.7     0        1.9   0.076      11    34   0.998    0.56     9.4
##  2     7.4    0.7     0        1.9   0.076      11    34   0.998    0.56     9.4
##  3     7.8    0.88    0        2.6   0.098      25    67   0.997    0.68     9.8
##  4     7.8    0.88    0        2.6   0.098      25    67   0.997    0.68     9.8
##  5     7.8    0.76    0.04     2.3   0.092      15    54   0.997    0.65     9.8
##  6     7.8    0.76    0.04     2.3   0.092      15    54   0.997    0.65     9.8
##  7    11.2    0.28    0.56     1.9   0.075      17    60   0.998    0.58     9.8
##  8    11.2    0.28    0.56     1.9   0.075      17    60   0.998    0.58     9.8
##  9     7.4    0.7     0        1.9   0.076      11    34   0.998    0.56     9.4
## 10     7.4    0.7     0        1.9   0.076      11    34   0.998    0.56     9.4
## # … with 3,188 more rows, 2 more variables: pH <chr>, level <dbl>, and
## #   abbreviated variable names ¹fixed_acidity, ²volatile_acidity, ³citric_acid,
## #   ⁴residual_sugar, ⁵chlorides, ⁶free_sulfur_dioxide, ⁷sulphates

wide to long form

myData_long %>%
    
    pivot_wider(names_from = pH, 
                values_from = level)

## Warning: Values from `level` are not uniquely identified; output will contain list-cols.
## * Use `values_fn = list` to suppress this warning.
## * Use `values_fn = {summary_fun}` to summarise duplicates.
## * Use the following dplyr code to identify duplicates.
##   {data} %>%
##     dplyr::group_by(fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides, free_sulfur_dioxide, ...7, density, sulphates, alcohol, pH) %>%
##     dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
##     dplyr::filter(n > 1L)

## # A tibble: 1,359 × 12
##    fixed…¹ volat…² citri…³ resid…⁴ chlor…⁵ free_…⁶  ...7 density sulph…⁷ alcohol
##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
##  1     7.4    0.7     0        1.9   0.076      11    34   0.998    0.56     9.4
##  2     7.8    0.88    0        2.6   0.098      25    67   0.997    0.68     9.8
##  3     7.8    0.76    0.04     2.3   0.092      15    54   0.997    0.65     9.8
##  4    11.2    0.28    0.56     1.9   0.075      17    60   0.998    0.58     9.8
##  5     7.4    0.66    0        1.8   0.075      13    40   0.998    0.56     9.4
##  6     7.9    0.6     0.06     1.6   0.069      15    59   0.996    0.46     9.4
##  7     7.3    0.65    0        1.2   0.065      15    21   0.995    0.47    10  
##  8     7.8    0.58    0.02     2     0.073       9    18   0.997    0.57     9.5
##  9     7.5    0.5     0.36     6.1   0.071      17   102   0.998    0.8     10.5
## 10     6.7    0.58    0.08     1.8   0.097      15    65   0.996    0.54     9.2
## # … with 1,349 more rows, 2 more variables: pH <list>, quality <list>, and
## #   abbreviated variable names ¹fixed_acidity, ²volatile_acidity, ³citric_acid,
## #   ⁴residual_sugar, ⁵chlorides, ⁶free_sulfur_dioxide, ⁷sulphates

Separating and Uniting

myData_seperated <- myData%>%
    separate(col = pH, into = c("quality" , "fixed_acidity"))

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 6 rows [170, 241,
## 433, 600, 1371, 1373].

myData_seperated

## # A tibble: 1,599 × 11
##    volat…¹ citri…² resid…³ chlor…⁴ free_…⁵  ...7 density sulph…⁶ quality fixed…⁷
##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl> <chr>   <chr>  
##  1    0.7     0        1.9   0.076      11    34   0.998    0.56 3       51     
##  2    0.88    0        2.6   0.098      25    67   0.997    0.68 3       2      
##  3    0.76    0.04     2.3   0.092      15    54   0.997    0.65 3       26     
##  4    0.28    0.56     1.9   0.075      17    60   0.998    0.58 3       16     
##  5    0.7     0        1.9   0.076      11    34   0.998    0.56 3       51     
##  6    0.66    0        1.8   0.075      13    40   0.998    0.56 3       51     
##  7    0.6     0.06     1.6   0.069      15    59   0.996    0.46 3       3      
##  8    0.65    0        1.2   0.065      15    21   0.995    0.47 3       39     
##  9    0.58    0.02     2     0.073       9    18   0.997    0.57 3       36     
## 10    0.5     0.36     6.1   0.071      17   102   0.998    0.8  3       35     
## # … with 1,589 more rows, 1 more variable: alcohol <dbl>, and abbreviated
## #   variable names ¹volatile_acidity, ²citric_acid, ³residual_sugar,
## #   ⁴chlorides, ⁵free_sulfur_dioxide, ⁶sulphates, ⁷fixed_acidity

Separate a column

myData_seperated <- myData %>%
    separate(col = pH, into = c("quality" , "fixed_acidity"))

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 6 rows [170, 241,
## 433, 600, 1371, 1373].

Unite two columns

myData_seperated %>%
    
    unite(col = "pH", c(fixed_acidity:citric_acid), sep = "/")

## # A tibble: 1,599 × 3
##    volatile_acidity pH                                    alcohol
##               <dbl> <chr>                                   <dbl>
##  1             0.7  51/3/0.56/0.9978/34/11/0.076/1.9/0        9.4
##  2             0.88 2/3/0.68/0.9968/67/25/0.098/2.6/0         9.8
##  3             0.76 26/3/0.65/0.997/54/15/0.092/2.3/0.04      9.8
##  4             0.28 16/3/0.58/0.998/60/17/0.075/1.9/0.56      9.8
##  5             0.7  51/3/0.56/0.9978/34/11/0.076/1.9/0        9.4
##  6             0.66 51/3/0.56/0.9978/40/13/0.075/1.8/0        9.4
##  7             0.6  3/3/0.46/0.9964/59/15/0.069/1.6/0.06      9.4
##  8             0.65 39/3/0.47/0.9946/21/15/0.065/1.2/0       10  
##  9             0.58 36/3/0.57/0.9968/18/9/0.073/2/0.02        9.5
## 10             0.5  35/3/0.8/0.9978/102/17/0.071/6.1/0.36    10.5
## # … with 1,589 more rows

Week 8: Apply it to your data 7

Daniel Lee

2022-10-05

Import your data

Pivoting

long to wide form

wide to long form

Separating and Uniting

Separate a column

Unite two columns

Missing Values