Apply the following dplyr verbs to your data
Filter rows
filter(data, series_winner == 1)
## # A tibble: 10 × 25
## Column1 series baker star_baker technical_winner technical_top3
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 3 1 Edd 0 2 4
## 2 16 2 Joanne 0 3 4
## 3 27 3 John 0 1 6
## 4 39 4 Frances 0 1 7
## 5 57 5 Nancy 0 3 7
## 6 66 6 Nadiya 0 4 4
## 7 74 7 Candice 0 1 7
## 8 91 8 Sophie 0 2 6
## 9 105 9 Rahul 0 1 5
## 10 111 10 David 0 2 8
## # ℹ 19 more variables: technical_bottom <dbl>, technical_highest <chr>,
## # technical_lowest <chr>, technical_median <chr>, series_winner <dbl>,
## # series_runner_up <dbl>, total_episodes_appeared <dbl>,
## # first_date_appeared <chr>, last_date_appeared <chr>, first_date_us <chr>,
## # last_date_us <chr>, percent_episodes_appeared <dbl>,
## # percent_technical_top3 <dbl>, baker_full <chr>, age <dbl>,
## # occupation <chr>, hometown <chr>, baker_last <chr>, baker_first <chr>
Arrange rows
arrange(data, baker, technical_winner, series_winner) %>%
arrange(data, desc(technical_winner))
## # A tibble: 120 × 25
## Column1 series baker star_baker technical_winner technical_top3
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 35 4 Ali 0 0 0
## 2 108 10 Alice 0 2 4
## 3 60 6 Alvin 0 0 1
## 4 109 10 Amelia 0 0 0
## 5 72 7 Andrew 0 2 5
## 6 1 1 Annetha 0 0 1
## 7 96 9 Antony 0 0 0
## 8 36 4 Beca 0 1 3
## 9 11 2 Ben 0 1 3
## 10 73 7 Benjamina 0 1 4
## # ℹ 110 more rows
## # ℹ 19 more variables: technical_bottom <dbl>, technical_highest <chr>,
## # technical_lowest <chr>, technical_median <chr>, series_winner <dbl>,
## # series_runner_up <dbl>, total_episodes_appeared <dbl>,
## # first_date_appeared <chr>, last_date_appeared <chr>, first_date_us <chr>,
## # last_date_us <chr>, percent_episodes_appeared <dbl>,
## # percent_technical_top3 <dbl>, baker_full <chr>, age <dbl>, …
Select columns
select(data, baker, series, total_episodes_appeared, series)
## # A tibble: 120 × 3
## baker series total_episodes_appeared
## <chr> <dbl> <dbl>
## 1 Annetha 1 2
## 2 David 1 4
## 3 Edd 1 6
## 4 Jasminder 1 5
## 5 Jonathan 1 3
## 6 Lea 1 1
## 7 Louise 1 2
## 8 Mark 1 1
## 9 Miranda 1 6
## 10 Ruth 1 6
## # ℹ 110 more rows
Add columns
percent_appeared <- select(data, percent_episodes_appeared)
mutate(percent_appeared,
percent_appeared = percent_episodes_appeared / 100)
## # A tibble: 120 × 2
## percent_episodes_appeared percent_appeared
## <dbl> <dbl>
## 1 33.3 0.333
## 2 66.7 0.667
## 3 100 1
## 4 83.3 0.833
## 5 50 0.5
## 6 16.7 0.167
## 7 33.3 0.333
## 8 16.7 0.167
## 9 100 1
## 10 100 1
## # ℹ 110 more rows
Summarize by groups
data %>%
group_by(series_winner, technical_winner) %>%
summarise(mean = mean(technical_winner))
## # A tibble: 9 × 3
## # Groups: series_winner [2]
## series_winner technical_winner mean
## <dbl> <dbl> <dbl>
## 1 0 0 0
## 2 0 1 1
## 3 0 2 2
## 4 0 3 3
## 5 0 5 5
## 6 1 1 1
## 7 1 2 2
## 8 1 3 3
## 9 1 4 4