Import data
# excel file
data <- read_excel("../00_data/myData_apply2.xlsx")
data
## # A tibble: 271,116 × 16
## Column1 id name sex age height weight team noc games year season
## <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 270479 135289 Zzim… M 20 NA NA Braz… BRA 1952… 1952 Summer
## 2 207677 104222 zzet… M 26 178 74 Turk… TUR 2016… 2016 Summer
## 3 102956 52087 zzet… M 23 172 85 Turk… TUR 2004… 2004 Summer
## 4 102957 52087 zzet… M 27 172 85 Turk… TUR 2008… 2008 Summer
## 5 106601 53910 Zyta… F 26 187 85 Pola… POL 1988… 1988 Summer
## 6 219493 110259 Zygm… M 25 185 82 Pola… POL 1932… 1932 Summer
## 7 183779 92370 Zygm… M 21 179 72 Pola… POL 1952… 1952 Summer
## 8 183780 92370 Zygm… M 26 179 72 Pola… POL 1956… 1956 Summer
## 9 152047 76313 Zygm… M 27 175 71 Pola… POL 1972… 1972 Summer
## 10 152048 76313 Zygm… M 31 175 71 Pola… POL 1976… 1976 Summer
## # ℹ 271,106 more rows
## # ℹ 4 more variables: city <chr>, sport <chr>, event <chr>, medal <chr>
Apply the following dplyr verbs to your data
Filter rows
filter(data, sex == "F")
## # A tibble: 74,522 × 16
## Column1 id name sex age height weight team noc games year season
## <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 106601 53910 Zyta… F 26 187 85 Pola… POL 1988… 1988 Summer
## 2 234595 117643 Zuza… F 14 155 48 Pola… POL 1992… 1992 Winter
## 3 153676 77119 Zuza… F 17 NA NA Pola… POL 2008… 2008 Summer
## 4 153677 77119 Zuza… F 17 NA NA Pola… POL 2008… 2008 Summer
## 5 195453 98118 Zuza… F 25 172 58 Pola… POL 2000… 2000 Summer
## 6 195454 98118 Zuza… F 25 172 58 Pola… POL 2000… 2000 Summer
## 7 195455 98118 Zuza… F 29 172 58 Pola… POL 2004… 2004 Summer
## 8 241407 120919 Zuza… F 21 176 73 Slov… SVK 2010… 2010 Winter
## 9 241332 120882 Zuza… F 31 NA NA Slov… SVK 2008… 2008 Summer
## 10 228356 114694 Zuza… F 24 166 65 Slov… SVK 2008… 2008 Summer
## # ℹ 74,512 more rows
## # ℹ 4 more variables: city <chr>, sport <chr>, event <chr>, medal <chr>
Arrange rows
arrange(data, desc(games))
## # A tibble: 271,116 × 16
## Column1 id name sex age height weight team noc games year season
## <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 207677 104222 "zze… M 26 178 74 Turk… TUR 2016… 2016 Summer
## 2 93560 47358 "Zuz… F 29 173 63 Czec… CZE 2016… 2016 Summer
## 3 92831 47033 "Zur… F 20 164 58 Cuba CUB 2016… 2016 Summer
## 4 101798 51481 "Zur… M 24 171 68 Geor… GEO 2016… 2016 Summer
## 5 49482 25486 "Zur… M 25 183 75 Geor… GEO 2016… 2016 Summer
## 6 270560 135318 "Zu … M 26 188 87 China CHN 2016… 2016 Summer
## 7 105526 53373 "Zsu… F 27 185 70 Hung… HUN 2016… 2016 Summer
## 8 105527 53373 "Zsu… F 27 185 70 Hung… HUN 2016… 2016 Summer
## 9 105528 53373 "Zsu… F 27 185 70 Hung… HUN 2016… 2016 Summer
## 10 19556 10326 "Zso… M 30 195 95 Hung… HUN 2016… 2016 Summer
## # ℹ 271,106 more rows
## # ℹ 4 more variables: city <chr>, sport <chr>, event <chr>, medal <chr>
Select columns
select(data, sex:season)
## # A tibble: 271,116 × 9
## sex age height weight team noc games year season
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 M 20 NA NA Brazil BRA 1952 Summer 1952 Summer
## 2 M 26 178 74 Turkey TUR 2016 Summer 2016 Summer
## 3 M 23 172 85 Turkey TUR 2004 Summer 2004 Summer
## 4 M 27 172 85 Turkey TUR 2008 Summer 2008 Summer
## 5 F 26 187 85 Poland POL 1988 Summer 1988 Summer
## 6 M 25 185 82 Poland POL 1932 Summer 1932 Summer
## 7 M 21 179 72 Poland POL 1952 Summer 1952 Summer
## 8 M 26 179 72 Poland POL 1956 Summer 1956 Summer
## 9 M 27 175 71 Poland POL 1972 Summer 1972 Summer
## 10 M 31 175 71 Poland POL 1976 Summer 1976 Summer
## # ℹ 271,106 more rows
select(data, sex, age, team, year, season)
## # A tibble: 271,116 × 5
## sex age team year season
## <chr> <chr> <chr> <dbl> <chr>
## 1 M 20 Brazil 1952 Summer
## 2 M 26 Turkey 2016 Summer
## 3 M 23 Turkey 2004 Summer
## 4 M 27 Turkey 2008 Summer
## 5 F 26 Poland 1988 Summer
## 6 M 25 Poland 1932 Summer
## 7 M 21 Poland 1952 Summer
## 8 M 26 Poland 1956 Summer
## 9 M 27 Poland 1972 Summer
## 10 M 31 Poland 1976 Summer
## # ℹ 271,106 more rows
Add columns
Summarize by groups
data %>%
group_by(team) %>%
summarise(count = n()) %>%
arrange(desc(count))
## # A tibble: 1,184 × 2
## team count
## <chr> <int>
## 1 United States 17847
## 2 France 11988
## 3 Great Britain 11404
## 4 Italy 10260
## 5 Germany 9326
## 6 Canada 9279
## 7 Japan 8289
## 8 Sweden 8052
## 9 Australia 7513
## 10 Hungary 6547
## # ℹ 1,174 more rows