Import data

# excel file
data <- read_excel("../00_data/myData_apply2.xlsx")
data
## # A tibble: 271,116 × 16
##    Column1     id name  sex   age   height weight team  noc   games  year season
##      <dbl>  <dbl> <chr> <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr> 
##  1  270479 135289 Zzim… M     20    NA     NA     Braz… BRA   1952…  1952 Summer
##  2  207677 104222 zzet… M     26    178    74     Turk… TUR   2016…  2016 Summer
##  3  102956  52087 zzet… M     23    172    85     Turk… TUR   2004…  2004 Summer
##  4  102957  52087 zzet… M     27    172    85     Turk… TUR   2008…  2008 Summer
##  5  106601  53910 Zyta… F     26    187    85     Pola… POL   1988…  1988 Summer
##  6  219493 110259 Zygm… M     25    185    82     Pola… POL   1932…  1932 Summer
##  7  183779  92370 Zygm… M     21    179    72     Pola… POL   1952…  1952 Summer
##  8  183780  92370 Zygm… M     26    179    72     Pola… POL   1956…  1956 Summer
##  9  152047  76313 Zygm… M     27    175    71     Pola… POL   1972…  1972 Summer
## 10  152048  76313 Zygm… M     31    175    71     Pola… POL   1976…  1976 Summer
## # ℹ 271,106 more rows
## # ℹ 4 more variables: city <chr>, sport <chr>, event <chr>, medal <chr>

Apply the following dplyr verbs to your data

Filter rows

filter(data, sex == "F")
## # A tibble: 74,522 × 16
##    Column1     id name  sex   age   height weight team  noc   games  year season
##      <dbl>  <dbl> <chr> <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr> 
##  1  106601  53910 Zyta… F     26    187    85     Pola… POL   1988…  1988 Summer
##  2  234595 117643 Zuza… F     14    155    48     Pola… POL   1992…  1992 Winter
##  3  153676  77119 Zuza… F     17    NA     NA     Pola… POL   2008…  2008 Summer
##  4  153677  77119 Zuza… F     17    NA     NA     Pola… POL   2008…  2008 Summer
##  5  195453  98118 Zuza… F     25    172    58     Pola… POL   2000…  2000 Summer
##  6  195454  98118 Zuza… F     25    172    58     Pola… POL   2000…  2000 Summer
##  7  195455  98118 Zuza… F     29    172    58     Pola… POL   2004…  2004 Summer
##  8  241407 120919 Zuza… F     21    176    73     Slov… SVK   2010…  2010 Winter
##  9  241332 120882 Zuza… F     31    NA     NA     Slov… SVK   2008…  2008 Summer
## 10  228356 114694 Zuza… F     24    166    65     Slov… SVK   2008…  2008 Summer
## # ℹ 74,512 more rows
## # ℹ 4 more variables: city <chr>, sport <chr>, event <chr>, medal <chr>

Arrange rows

arrange(data, desc(games))
## # A tibble: 271,116 × 16
##    Column1     id name  sex   age   height weight team  noc   games  year season
##      <dbl>  <dbl> <chr> <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr> 
##  1  207677 104222 "zze… M     26    178    74     Turk… TUR   2016…  2016 Summer
##  2   93560  47358 "Zuz… F     29    173    63     Czec… CZE   2016…  2016 Summer
##  3   92831  47033 "Zur… F     20    164    58     Cuba  CUB   2016…  2016 Summer
##  4  101798  51481 "Zur… M     24    171    68     Geor… GEO   2016…  2016 Summer
##  5   49482  25486 "Zur… M     25    183    75     Geor… GEO   2016…  2016 Summer
##  6  270560 135318 "Zu … M     26    188    87     China CHN   2016…  2016 Summer
##  7  105526  53373 "Zsu… F     27    185    70     Hung… HUN   2016…  2016 Summer
##  8  105527  53373 "Zsu… F     27    185    70     Hung… HUN   2016…  2016 Summer
##  9  105528  53373 "Zsu… F     27    185    70     Hung… HUN   2016…  2016 Summer
## 10   19556  10326 "Zso… M     30    195    95     Hung… HUN   2016…  2016 Summer
## # ℹ 271,106 more rows
## # ℹ 4 more variables: city <chr>, sport <chr>, event <chr>, medal <chr>

Select columns

select(data, sex:season)
## # A tibble: 271,116 × 9
##    sex   age   height weight team   noc   games        year season
##    <chr> <chr> <chr>  <chr>  <chr>  <chr> <chr>       <dbl> <chr> 
##  1 M     20    NA     NA     Brazil BRA   1952 Summer  1952 Summer
##  2 M     26    178    74     Turkey TUR   2016 Summer  2016 Summer
##  3 M     23    172    85     Turkey TUR   2004 Summer  2004 Summer
##  4 M     27    172    85     Turkey TUR   2008 Summer  2008 Summer
##  5 F     26    187    85     Poland POL   1988 Summer  1988 Summer
##  6 M     25    185    82     Poland POL   1932 Summer  1932 Summer
##  7 M     21    179    72     Poland POL   1952 Summer  1952 Summer
##  8 M     26    179    72     Poland POL   1956 Summer  1956 Summer
##  9 M     27    175    71     Poland POL   1972 Summer  1972 Summer
## 10 M     31    175    71     Poland POL   1976 Summer  1976 Summer
## # ℹ 271,106 more rows
select(data, sex, age, team, year, season)
## # A tibble: 271,116 × 5
##    sex   age   team    year season
##    <chr> <chr> <chr>  <dbl> <chr> 
##  1 M     20    Brazil  1952 Summer
##  2 M     26    Turkey  2016 Summer
##  3 M     23    Turkey  2004 Summer
##  4 M     27    Turkey  2008 Summer
##  5 F     26    Poland  1988 Summer
##  6 M     25    Poland  1932 Summer
##  7 M     21    Poland  1952 Summer
##  8 M     26    Poland  1956 Summer
##  9 M     27    Poland  1972 Summer
## 10 M     31    Poland  1976 Summer
## # ℹ 271,106 more rows

Add columns

Summarize by groups

data %>%
  group_by(team) %>%
    
  summarise(count = n()) %>%
    
  arrange(desc(count))
## # A tibble: 1,184 × 2
##    team          count
##    <chr>         <int>
##  1 United States 17847
##  2 France        11988
##  3 Great Britain 11404
##  4 Italy         10260
##  5 Germany        9326
##  6 Canada         9279
##  7 Japan          8289
##  8 Sweden         8052
##  9 Australia      7513
## 10 Hungary        6547
## # ℹ 1,174 more rows