Mydata <- read_csv("../00_data/tdf_winners.csv")
## Rows: 106 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): winner_name, winner_team, full_name, nickname, birth_town, birth_c...
## dbl (9): edition, distance, time_overall, time_margin, stage_wins, stages_l...
## date (3): start_date, born, died
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Mydata
## # A tibble: 106 × 19
## edition start_date winner_name winner_team distance time_overall time_margin
## <dbl> <date> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 1903-07-01 Maurice Gar… La Françai… 2428 94.6 2.99
## 2 2 1904-07-02 Henri Cornet Conte 2428 96.1 2.27
## 3 3 1905-07-09 Louis Trous… Peugeot–Wo… 2994 NA NA
## 4 4 1906-07-04 René Pottier Peugeot–Wo… 4637 NA NA
## 5 5 1907-07-08 Lucien Peti… Peugeot–Wo… 4488 NA NA
## 6 6 1908-07-13 Lucien Peti… Peugeot–Wo… 4497 NA NA
## 7 7 1909-07-05 François Fa… Alcyon–Dun… 4498 NA NA
## 8 8 1910-07-01 Octave Lapi… Alcyon–Dun… 4734 NA NA
## 9 9 1911-07-02 Gustave Gar… Alcyon–Dun… 5343 NA NA
## 10 10 1912-06-30 Odile Defra… Alcyon–Dun… 5289 NA NA
## # ℹ 96 more rows
## # ℹ 12 more variables: stage_wins <dbl>, stages_led <dbl>, height <dbl>,
## # weight <dbl>, age <dbl>, born <date>, died <date>, full_name <chr>,
## # nickname <chr>, birth_town <chr>, birth_country <chr>, nationality <chr>
Mydata %>% count(birth_country)
## # A tibble: 15 × 2
## birth_country n
## <chr> <int>
## 1 Australia 1
## 2 Belgium 19
## 3 Columbia 1
## 4 Denmark 1
## 5 France 36
## 6 Germany 1
## 7 Ireland 1
## 8 Italy 11
## 9 Kenya 4
## 10 Luxembourg 4
## 11 Netherlands 2
## 12 Spain 12
## 13 Switzerland 2
## 14 USA 10
## 15 Wales 1
birth_country_levels <- c("France", "Belgium", "Spain", "Italy", "USA", "Luxembourg", "Kenya", "Switzerland", "Netherlands", "Ireland", "Denmark", "Germany", "Australia", "Wales", "Columbia" )
Mydata_rev <- Mydata %>%
mutate(birth_country = birth_country %>% factor(levels = birth_country_levels))
Mydata_rev %>% count(birth_country)
## # A tibble: 15 × 2
## birth_country n
## <fct> <int>
## 1 France 36
## 2 Belgium 19
## 3 Spain 12
## 4 Italy 11
## 5 USA 10
## 6 Luxembourg 4
## 7 Kenya 4
## 8 Switzerland 2
## 9 Netherlands 2
## 10 Ireland 1
## 11 Denmark 1
## 12 Germany 1
## 13 Australia 1
## 14 Wales 1
## 15 Columbia 1
Mydata %>%
count(birth_country) %>%
ggplot(aes(x = birth_country, y = n)) +
geom_bar(stat = "identity") +
labs(x = "Birth Country", y = "Wins", title = "Wins per country") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Mydata_rev %>%
count(birth_country) %>%
ggplot(aes(x = birth_country, y = n)) +
geom_bar(stat = "identity") +
labs(x = "Birth Country", y = "Wins", title = "Wins per country") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Show examples of three functions:
Mydata %>%
mutate(birth_country = fct_recode(birth_country,
"AUS" = "Australia",
"BEL" = "Belgium",
"COL" = "Columbia",
"DEN" = "Denmark",
"FRA" = "France",
"GER" = "Germany",
"IRL" = "Ireland",
"ITA" = "Italy",
"LUX" = "Luxembourg",
"NED" = "Netherlands",
"ESP" = "Spain",
"SUI" = "Switzerland",
"USA" = "USA"
)) %>%
count(birth_country)
## # A tibble: 15 × 2
## birth_country n
## <fct> <int>
## 1 AUS 1
## 2 BEL 19
## 3 COL 1
## 4 DEN 1
## 5 FRA 36
## 6 GER 1
## 7 IRL 1
## 8 ITA 11
## 9 Kenya 4
## 10 LUX 4
## 11 NED 2
## 12 ESP 12
## 13 SUI 2
## 14 USA 10
## 15 Wales 1
Mydata %>%
mutate(birth_country = fct_collapse(birth_country,
Europe = c(
"France", "Belgium", "Italy", "Luxembourg",
"Netherlands", "Switzerland", "Columbia", "Denmark", "Germany", "Ireland", "Spain", "Wales"),
North_America = "USA",
Africa = "Kenya",
Oceania = c("Australia"))) %>%
count(birth_country)
## # A tibble: 4 × 2
## birth_country n
## <fct> <int>
## 1 Oceania 1
## 2 Europe 91
## 3 Africa 4
## 4 North_America 10
Mydata %>%
mutate(birth_country = fct_lump(birth_country)) %>%
count(birth_country)
## # A tibble: 15 × 2
## birth_country n
## <fct> <int>
## 1 Australia 1
## 2 Belgium 19
## 3 Columbia 1
## 4 Denmark 1
## 5 France 36
## 6 Germany 1
## 7 Ireland 1
## 8 Italy 11
## 9 Kenya 4
## 10 Luxembourg 4
## 11 Netherlands 2
## 12 Spain 12
## 13 Switzerland 2
## 14 USA 10
## 15 Wales 1
No need to do anything here.