Import data

# excel file
data <- read_excel("../01_module4/data/myData.xlsx")

Apply the following dplyr verbs to your data

Filter rows

filter(data, series_winner == 1)
## # A tibble: 10 × 25
##    Column1 series baker   star_baker technical_winner technical_top3
##      <dbl>  <dbl> <chr>        <dbl>            <dbl>          <dbl>
##  1       3      1 Edd              0                2              4
##  2      16      2 Joanne           0                3              4
##  3      27      3 John             0                1              6
##  4      39      4 Frances          0                1              7
##  5      57      5 Nancy            0                3              7
##  6      66      6 Nadiya           0                4              4
##  7      74      7 Candice          0                1              7
##  8      91      8 Sophie           0                2              6
##  9     105      9 Rahul            0                1              5
## 10     111     10 David            0                2              8
## # ℹ 19 more variables: technical_bottom <dbl>, technical_highest <chr>,
## #   technical_lowest <chr>, technical_median <chr>, series_winner <dbl>,
## #   series_runner_up <dbl>, total_episodes_appeared <dbl>,
## #   first_date_appeared <chr>, last_date_appeared <chr>, first_date_us <chr>,
## #   last_date_us <chr>, percent_episodes_appeared <dbl>,
## #   percent_technical_top3 <dbl>, baker_full <chr>, age <dbl>,
## #   occupation <chr>, hometown <chr>, baker_last <chr>, baker_first <chr>

Arrange rows

arrange(data, baker, technical_winner, series_winner) %>%
    arrange(data, desc(technical_winner)) 
## # A tibble: 120 × 25
##    Column1 series baker     star_baker technical_winner technical_top3
##      <dbl>  <dbl> <chr>          <dbl>            <dbl>          <dbl>
##  1      35      4 Ali                0                0              0
##  2     108     10 Alice              0                2              4
##  3      60      6 Alvin              0                0              1
##  4     109     10 Amelia             0                0              0
##  5      72      7 Andrew             0                2              5
##  6       1      1 Annetha            0                0              1
##  7      96      9 Antony             0                0              0
##  8      36      4 Beca               0                1              3
##  9      11      2 Ben                0                1              3
## 10      73      7 Benjamina          0                1              4
## # ℹ 110 more rows
## # ℹ 19 more variables: technical_bottom <dbl>, technical_highest <chr>,
## #   technical_lowest <chr>, technical_median <chr>, series_winner <dbl>,
## #   series_runner_up <dbl>, total_episodes_appeared <dbl>,
## #   first_date_appeared <chr>, last_date_appeared <chr>, first_date_us <chr>,
## #   last_date_us <chr>, percent_episodes_appeared <dbl>,
## #   percent_technical_top3 <dbl>, baker_full <chr>, age <dbl>, …

Select columns

select(data, baker, series, total_episodes_appeared, series)
## # A tibble: 120 × 3
##    baker     series total_episodes_appeared
##    <chr>      <dbl>                   <dbl>
##  1 Annetha        1                       2
##  2 David          1                       4
##  3 Edd            1                       6
##  4 Jasminder      1                       5
##  5 Jonathan       1                       3
##  6 Lea            1                       1
##  7 Louise         1                       2
##  8 Mark           1                       1
##  9 Miranda        1                       6
## 10 Ruth           1                       6
## # ℹ 110 more rows

Add columns

percent_appeared <- select(data, percent_episodes_appeared)

mutate(percent_appeared, 
       percent_appeared = percent_episodes_appeared / 100)
## # A tibble: 120 × 2
##    percent_episodes_appeared percent_appeared
##                        <dbl>            <dbl>
##  1                      33.3            0.333
##  2                      66.7            0.667
##  3                     100              1    
##  4                      83.3            0.833
##  5                      50              0.5  
##  6                      16.7            0.167
##  7                      33.3            0.333
##  8                      16.7            0.167
##  9                     100              1    
## 10                     100              1    
## # ℹ 110 more rows

Summarize by groups

data %>%
    group_by(series_winner, technical_winner) %>%
    summarise(mean = mean(technical_winner))
## # A tibble: 9 × 3
## # Groups:   series_winner [2]
##   series_winner technical_winner  mean
##           <dbl>            <dbl> <dbl>
## 1             0                0     0
## 2             0                1     1
## 3             0                2     2
## 4             0                3     3
## 5             0                5     5
## 6             1                1     1
## 7             1                2     2
## 8             1                3     3
## 9             1                4     4