Import data

# excel file
data <- read_excel("../00_data/myData.xlsx")
## New names:
## • `` -> `...7`
data
## # A tibble: 1,599 × 12
##    fixed_a…¹ volat…² citri…³ resid…⁴ chlor…⁵ free_…⁶  ...7 density    pH sulph…⁷
##        <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl> <dbl>   <dbl>
##  1       7.4    0.7     0        1.9   0.076      11    34   0.998  3.51    0.56
##  2       7.8    0.88    0        2.6   0.098      25    67   0.997  3.2     0.68
##  3       7.8    0.76    0.04     2.3   0.092      15    54   0.997  3.26    0.65
##  4      11.2    0.28    0.56     1.9   0.075      17    60   0.998  3.16    0.58
##  5       7.4    0.7     0        1.9   0.076      11    34   0.998  3.51    0.56
##  6       7.4    0.66    0        1.8   0.075      13    40   0.998  3.51    0.56
##  7       7.9    0.6     0.06     1.6   0.069      15    59   0.996  3.3     0.46
##  8       7.3    0.65    0        1.2   0.065      15    21   0.995  3.39    0.47
##  9       7.8    0.58    0.02     2     0.073       9    18   0.997  3.36    0.57
## 10       7.5    0.5     0.36     6.1   0.071      17   102   0.998  3.35    0.8 
## # … with 1,589 more rows, 2 more variables: alcohol <dbl>, quality <dbl>, and
## #   abbreviated variable names ¹​fixed_acidity, ²​volatile_acidity, ³​citric_acid,
## #   ⁴​residual_sugar, ⁵​chlorides, ⁶​free_sulfur_dioxide, ⁷​sulphates

FIlter

filter(data, quality == 8)
## # A tibble: 18 × 12
##    fixed_a…¹ volat…² citri…³ resid…⁴ chlor…⁵ free_…⁶  ...7 density    pH sulph…⁷
##        <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl> <dbl>   <dbl>
##  1       7.9    0.35    0.46     3.6   0.078      15    37   0.997  3.35    0.86
##  2      10.3    0.32    0.45     6.4   0.073       5    13   0.998  3.23    0.82
##  3       5.6    0.85    0.05     1.4   0.045      12    88   0.992  3.56    0.82
##  4      12.6    0.31    0.72     2.2   0.072       6    29   0.999  2.88    0.82
##  5      11.3    0.62    0.67     5.2   0.086       6    19   0.999  3.22    0.69
##  6       9.4    0.3     0.56     2.8   0.08        6    17   0.996  3.15    0.92
##  7      10.7    0.35    0.53     2.6   0.07        5    16   0.997  3.15    0.65
##  8      10.7    0.35    0.53     2.6   0.07        5    16   0.997  3.15    0.65
##  9       5      0.42    0.24     2     0.06       19    50   0.992  3.72    0.74
## 10       7.8    0.57    0.09     2.3   0.065      34    45   0.994  3.46    0.74
## 11       9.1    0.4     0.5      1.8   0.071       7    16   0.995  3.21    0.69
## 12      10      0.26    0.54     1.9   0.083      42    74   0.995  2.98    0.63
## 13       7.9    0.54    0.34     2.5   0.076       8    17   0.992  3.2     0.72
## 14       8.6    0.42    0.39     1.8   0.068       6    12   0.995  3.35    0.69
## 15       5.5    0.49    0.03     1.8   0.044      28    87   0.991  3.5     0.82
## 16       7.2    0.33    0.33     1.7   0.061       3    13   0.996  3.23    1.1 
## 17       7.2    0.38    0.31     2     0.056      15    29   0.995  3.23    0.76
## 18       7.4    0.36    0.3      1.8   0.074      17    24   0.994  3.24    0.7 
## # … with 2 more variables: alcohol <dbl>, quality <dbl>, and abbreviated
## #   variable names ¹​fixed_acidity, ²​volatile_acidity, ³​citric_acid,
## #   ⁴​residual_sugar, ⁵​chlorides, ⁶​free_sulfur_dioxide, ⁷​sulphates

Arrange

arrange(data, desc(alcohol))
## # A tibble: 1,599 × 12
##    fixed_a…¹ volat…² citri…³ resid…⁴ chlor…⁵ free_…⁶  ...7 density    pH sulph…⁷
##        <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl> <dbl>   <dbl>
##  1      15.9    0.36    0.65     7.5   0.096      22    71   0.998  2.98    0.84
##  2       5.2    0.34    0        1.8   0.05       27    63   0.992  3.68    0.79
##  3       5.2    0.34    0        1.8   0.05       27    63   0.992  3.68    0.79
##  4       8.8    0.46    0.45     2.6   0.065       7    18   0.995  3.32    0.79
##  5       5      0.42    0.24     2     0.06       19    50   0.992  3.72    0.74
##  6       4.9    0.42    0        2.1   0.048      16    42   0.992  3.71    0.74
##  7       5.5    0.49    0.03     1.8   0.044      28    87   0.991  3.5     0.82
##  8       5      0.38    0.01     1.6   0.048      26    60   0.991  3.7     0.75
##  9       5      0.4     0.5      4.3   0.046      29    80   0.990  3.49    0.66
## 10       7.4    0.36    0.34     1.8   0.075      18    38   0.993  3.38    0.88
## # … with 1,589 more rows, 2 more variables: alcohol <dbl>, quality <dbl>, and
## #   abbreviated variable names ¹​fixed_acidity, ²​volatile_acidity, ³​citric_acid,
## #   ⁴​residual_sugar, ⁵​chlorides, ⁶​free_sulfur_dioxide, ⁷​sulphates

Select

Add a new Column

mutate(data,
       gain = fixed_acidity - pH)
## # A tibble: 1,599 × 13
##    fixed_a…¹ volat…² citri…³ resid…⁴ chlor…⁵ free_…⁶  ...7 density    pH sulph…⁷
##        <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl> <dbl>   <dbl>
##  1       7.4    0.7     0        1.9   0.076      11    34   0.998  3.51    0.56
##  2       7.8    0.88    0        2.6   0.098      25    67   0.997  3.2     0.68
##  3       7.8    0.76    0.04     2.3   0.092      15    54   0.997  3.26    0.65
##  4      11.2    0.28    0.56     1.9   0.075      17    60   0.998  3.16    0.58
##  5       7.4    0.7     0        1.9   0.076      11    34   0.998  3.51    0.56
##  6       7.4    0.66    0        1.8   0.075      13    40   0.998  3.51    0.56
##  7       7.9    0.6     0.06     1.6   0.069      15    59   0.996  3.3     0.46
##  8       7.3    0.65    0        1.2   0.065      15    21   0.995  3.39    0.47
##  9       7.8    0.58    0.02     2     0.073       9    18   0.997  3.36    0.57
## 10       7.5    0.5     0.36     6.1   0.071      17   102   0.998  3.35    0.8 
## # … with 1,589 more rows, 3 more variables: alcohol <dbl>, quality <dbl>,
## #   gain <dbl>, and abbreviated variable names ¹​fixed_acidity,
## #   ²​volatile_acidity, ³​citric_acid, ⁴​residual_sugar, ⁵​chlorides,
## #   ⁶​free_sulfur_dioxide, ⁷​sulphates

Summarize