Import data

olympics <- read_excel("myData.xlsx")
olympics
## # A tibble: 271,116 × 15
##       id name     sex   age   height weight team  noc   games  year season city 
##    <dbl> <chr>    <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1     1 A Dijia… M     24    180    80     China CHN   1992…  1992 Summer Barc…
##  2     2 A Lamusi M     23    170    60     China CHN   2012…  2012 Summer Lond…
##  3     3 Gunnar … M     24    NA     NA     Denm… DEN   1920…  1920 Summer Antw…
##  4     4 Edgar L… M     34    NA     NA     Denm… DEN   1900…  1900 Summer Paris
##  5     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  6     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  7     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  8     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  9     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## 10     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## # ℹ 271,106 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>

Apply the following dplyr verbs to your data

Filter rows

filter(olympics, sex == "F")
## # A tibble: 74,522 × 15
##       id name     sex   age   height weight team  noc   games  year season city 
##    <dbl> <chr>    <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1     5 "Christ… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  2     5 "Christ… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  3     5 "Christ… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  4     5 "Christ… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  5     5 "Christ… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
##  6     5 "Christ… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
##  7     8 "Cornel… F     18    168    NA     Neth… NED   1932…  1932 Summer Los …
##  8     8 "Cornel… F     18    168    NA     Neth… NED   1932…  1932 Summer Los …
##  9    13 "Minna … F     30    159    55.5   Finl… FIN   1996…  1996 Summer Atla…
## 10    13 "Minna … F     34    159    55.5   Finl… FIN   2000…  2000 Summer Sydn…
## # ℹ 74,512 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>
# How many medals were won by females?

Arrange rows

arrange(olympics, age)
## # A tibble: 271,116 × 15
##        id name    sex   age   height weight team  noc   games  year season city 
##     <dbl> <chr>   <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1  71691 Dimitr… M     10    NA     NA     Ethn… GRE   1896…  1896 Summer Athi…
##  2  22411 Magdal… F     11    152    NA     Grea… GBR   1932…  1932 Winter Lake…
##  3  37333 Carlos… M     11    NA     NA     Spain ESP   1992…  1992 Summer Barc…
##  4  40129 Luigin… F     11    NA     NA     Italy ITA   1928…  1928 Summer Amst…
##  5  47618 Sonja … F     11    155    45     Norw… NOR   1924…  1924 Winter Cham…
##  6  51268 Beatri… F     11    151    38     Roma… ROU   1968…  1968 Winter Gren…
##  7  52070 Etsuko… F     11    NA     NA     Japan JPN   1936…  1936 Winter Garm…
##  8  70616 Liu Lu… F     11    NA     NA     China CHN   1988…  1988 Winter Calg…
##  9  76675 Marcel… F     11    NA     NA     Sout… RSA   1960…  1960 Winter Squa…
## 10 118925 Megan … F     11    157    NA     Grea… GBR   1932…  1932 Winter Lake…
## # ℹ 271,106 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>

Select columns

select(olympics, name, sport, medal)
## # A tibble: 271,116 × 3
##    name                     sport         medal
##    <chr>                    <chr>         <chr>
##  1 A Dijiang                Basketball    NA   
##  2 A Lamusi                 Judo          NA   
##  3 Gunnar Nielsen Aaby      Football      NA   
##  4 Edgar Lindenau Aabye     Tug-Of-War    Gold 
##  5 Christine Jacoba Aaftink Speed Skating NA   
##  6 Christine Jacoba Aaftink Speed Skating NA   
##  7 Christine Jacoba Aaftink Speed Skating NA   
##  8 Christine Jacoba Aaftink Speed Skating NA   
##  9 Christine Jacoba Aaftink Speed Skating NA   
## 10 Christine Jacoba Aaftink Speed Skating NA   
## # ℹ 271,106 more rows

Add columns

mutate(olympics, 
       height_num = as.numeric(height),
       weight_num = as.numeric(weight),
       bmi = weight_num / (height_num/100)^2)
## # A tibble: 271,116 × 18
##       id name     sex   age   height weight team  noc   games  year season city 
##    <dbl> <chr>    <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1     1 A Dijia… M     24    180    80     China CHN   1992…  1992 Summer Barc…
##  2     2 A Lamusi M     23    170    60     China CHN   2012…  2012 Summer Lond…
##  3     3 Gunnar … M     24    NA     NA     Denm… DEN   1920…  1920 Summer Antw…
##  4     4 Edgar L… M     34    NA     NA     Denm… DEN   1900…  1900 Summer Paris
##  5     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  6     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  7     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  8     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  9     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## 10     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## # ℹ 271,106 more rows
## # ℹ 6 more variables: sport <chr>, event <chr>, medal <chr>, height_num <dbl>,
## #   weight_num <dbl>, bmi <dbl>

Summarize by groups

olympics %>%
  group_by(sport) %>%
  summarise(count = n())
## # A tibble: 66 × 2
##    sport            count
##    <chr>            <int>
##  1 Aeronautics          1
##  2 Alpine Skiing     8829
##  3 Alpinism            25
##  4 Archery           2334
##  5 Art Competitions  3578
##  6 Athletics        38624
##  7 Badminton         1457
##  8 Baseball           894
##  9 Basketball        4536
## 10 Basque Pelota        2
## # ℹ 56 more rows