Uma breve análise de perfil dos atletas da Olimpíadas 2024, sob os seguintes critérios:
Quantidade
Gênero
Modalidade
Idade
Altura
DF <- read.csv("athletes new.csv")
country <- DF |>
group_by(country) |>
summarise(count = n()) |>
ungroup() |>
arrange(desc(count)) |>
mutate(posicao = row_number(), .before = country) |>
filter(count >= 100)
tabela_country <- country |>
slice(1:5) |>
rbind(country |> filter(country == "Brazil"))
plot_country <- ggplot(country, aes(x = count, y = country, fill = country))+
geom_bar(stat = "identity", fill = "#69b3a2", color = "black")+
scale_fill_manual(values = c("Brazil" = "skyblue"))+
labs(title = "Número de Atletas por País",
x = "Quantidade", y = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
| posicao | country | count |
|---|---|---|
| 1 | United States | 620 |
| 2 | France | 600 |
| 3 | Australia | 476 |
| 4 | Germany | 457 |
| 5 | Japan | 432 |
| 11 | Brazil | 290 |
# Percentual total
percent_gender_tot <- DF |>
group_by(gender) |>
summarise(count = n()) |>
mutate(percent = count / sum(count) * 100)
plot_gender_tot <- ggplot(percent_gender_tot, aes(x = gender, y = count, fill = gender)) +
geom_bar(stat = "identity") +
geom_text(aes(label = count),
vjust = 0.5, size = 4) +
geom_text(aes(label = paste0(round(percent, 1), "%")),
y = percent_gender_tot$count/2, vjust = 0.5,
color = "white", size = 5) +
scale_fill_manual(values = c("Female" = "#FFAFCC", "Male" = "#A2D2FF")) +
scale_x_discrete(labels = c("Female" = "Feminino", "Male" = "Masculino")) +
labs(title = "Proporção de Gênero - Geral",
x = "",
y = "") +
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.y = element_blank())
# Razão
razao_gender <- DF |>
select("gender", "country") |>
group_by(gender, country) |>
summarise(count = n()) |>
pivot_wider(names_from = "gender",
values_from = "count") |>
mutate(Female = replace_na(Female, 0),
Male = replace_na(Male, 0),
razao = Male/Female *100) |>
arrange(razao)
tabela_gender_razao <- razao_gender |>
slice(c(1, 201:206))
plot_gender_razao <- ggplot(razao_gender, aes(x = razao)) +
geom_boxplot(fill = "#69b3a2", color = "black") +
scale_x_continuous(breaks = c(0, 100, 200, 500, 1000),
labels = c(0, 100, 200, 500, 1000))+
labs(title = "Razão de Gênero por País",
x = "Razão = (Homens/Mulheres) * 100", y = "") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
axis.text.y = element_blank())+
annotate("segment", x = 100, xend = 100, y = -Inf, yend = Inf,
color = "red", linetype = "dashed", size = 0.8)
| country | Female | Male | razao |
|---|---|---|---|
| Solomon Islands | 2 | 0 | 0 |
| Belize | 0 | 1 | Inf |
| Guinea-Bissau | 0 | 6 | Inf |
| Iraq | 0 | 23 | Inf |
| Liechtenstein | 0 | 1 | Inf |
| Nauru | 0 | 1 | Inf |
| Somalia | 0 | 1 | Inf |
disciplines <- DF |>
mutate(disciplines = gsub("\\[|\\]|'", "", disciplines))
duplicados <- disciplines |>
filter(grepl(",", disciplines)) |>
mutate(disciplines = str_replace(disciplines, "^.*?,\\s*", ""))
disciplines <- mutate(disciplines, disciplines = str_replace(disciplines, "\\s*,.*", "")) |>
bind_rows(duplicados) |>
arrange(code)
tabela_disciplines <- disciplines |>
group_by(disciplines) |>
summarise(count = n()) |>
ungroup() |>
arrange(count)
plot_disciplines <- ggplot(disciplines, aes(y = disciplines))+
geom_bar( fill = "#69b3a2", color = "black")+
scale_x_continuous(breaks = c(0, 100, 250, 500, 1000, 2000),
labels = c(0, 100, 250, 500, 1000, 2000))+
labs(title = "Modalidade",
x = "Quantidade", y = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(size = 6.5))
| disciplines | count |
|---|---|
| Cycling BMX Freestyle | 24 |
| Trampoline Gymnastics | 32 |
| Breaking | 33 |
| Surfing | 48 |
| Marathon Swimming | 55 |
| 3x3 Basketball | 65 |
| Cycling BMX Racing | 67 |
| Sport Climbing | 68 |
| Cycling Mountain Bike | 70 |
| Modern Pentathlon | 72 |
| Canoe Slalom | 84 |
| Skateboarding | 88 |
| Rhythmic Gymnastics | 94 |
| Beach Volleyball | 96 |
| Artistic Swimming | 106 |
| Triathlon | 112 |
| Golf | 120 |
| Weightlifting | 122 |
| Archery | 128 |
| Taekwondo | 134 |
| Diving | 135 |
| Badminton | 175 |
| Table Tennis | 175 |
| Tennis | 176 |
| Cycling Road | 188 |
| Artistic Gymnastics | 190 |
| Cycling Track | 235 |
| Canoe Sprint | 239 |
| Equestrian | 242 |
| Boxing | 248 |
| Fencing | 260 |
| Water Polo | 286 |
| Basketball | 288 |
| Wrestling | 291 |
| Volleyball | 311 |
| Rugby Sevens | 317 |
| Sailing | 330 |
| Shooting | 342 |
| Judo | 378 |
| Handball | 386 |
| Hockey | 415 |
| Rowing | 493 |
| Football | 553 |
| Swimming | 853 |
| Athletics | 2023 |
# Idade Geral
DF_olimpiadas <- ymd("2024-08-01")
idade <- DF |>
mutate(birth_date = ymd(birth_date),
idade = floor(interval(birth_date, DF_olimpiadas) / years(1)))
tabela_idade <- idade |>
arrange(idade) |>
select("name", "gender","country", "disciplines", "idade") |>
slice(c(1:5, 11111:11115))
plot_idades_geral <- ggplot(idade, aes(x = idade)) +
geom_histogram(binwidth = 1, fill = "#69b3a2", color = "black")+
scale_x_continuous(breaks = seq(from = 5, to = 69, by = 5),
labels = seq(from = 5, to = 69, by = 5)) +
labs(title = "Histograma de Idade",
x = "Idade (anos)", y = "") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
annotate("segment", x = 24, xend = 24, y = -Inf, yend = Inf,
color = "red", linetype = "dashed", size = 0.8)
# Por modalidade
idade_discplines <- disciplines |>
mutate(birth_date = ymd(birth_date),
idade = floor(interval(birth_date, DF_olimpiadas) / years(1))) |>
group_by(disciplines) |>
summarise(mean_idade = floor(mean(idade))) |>
arrange(mean_idade)
plot_idades_modalidade <- ggplot(idade_discplines, aes(x = mean_idade, y = disciplines)) +
geom_bar(stat = "identity", fill = "#69b3a2", color = "black")+
coord_cartesian(xlim = c(15, 40))+
labs(title = "Média de Idade por Esporte",
x = "Idade (anos)", y = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(size = 6.5),
panel.grid.major = element_line(color = "gray75"))
| name | gender | country | disciplines | idade |
|---|---|---|---|---|
| ZHENG Haohao | Female | China | [‘Skateboarding’] | 11 |
| SUKASEM Vareeraya | Female | Thailand | [‘Skateboarding’] | 12 |
| ZHU Yuanling | Female | China | [‘Skateboarding’] | 13 |
| SIRVIO Heili | Female | Finland | [‘Skateboarding’] | 13 |
| AMR HOSSNY Sara | Female | Egypt | [‘Fencing’] | 14 |
| SCHWIZER Pius | Male | Switzerland | [‘Equestrian’] | 61 |
| NI Xia Lian | Female | Luxembourg | [‘Table Tennis’] | 61 |
| BENGTSSON Rolf-Goran | Male | Sweden | [‘Equestrian’] | 62 |
| JIMENEZ COBO Juan Antonio | Male | Spain | [‘Equestrian’] | 65 |
| HANNA Mary | Female | Australia | [‘Equestrian’] | 69 |
| disciplines | mean_idade |
|---|---|
| Rhythmic Gymnastics | 19 |
| Skateboarding | 21 |
| Artistic Swimming | 22 |
| Swimming | 22 |
| Artistic Gymnastics | 23 |
| Diving | 23 |
| Sport Climbing | 23 |
| Archery | 24 |
| Cycling BMX Freestyle | 24 |
| Football | 24 |
| Marathon Swimming | 24 |
| Taekwondo | 24 |
| Trampoline Gymnastics | 24 |
| Cycling BMX Racing | 25 |
| Cycling Track | 25 |
| Surfing | 25 |
| Athletics | 26 |
| Badminton | 26 |
| Boxing | 26 |
| Breaking | 26 |
| Canoe Sprint | 26 |
| Cycling Mountain Bike | 26 |
| Hockey | 26 |
| Judo | 26 |
| Modern Pentathlon | 26 |
| Rugby Sevens | 26 |
| Water Polo | 26 |
| Weightlifting | 26 |
| Basketball | 27 |
| Cycling Road | 27 |
| Fencing | 27 |
| Rowing | 27 |
| Sailing | 27 |
| Volleyball | 27 |
| Wrestling | 27 |
| Canoe Slalom | 28 |
| Handball | 28 |
| Table Tennis | 28 |
| Tennis | 28 |
| Triathlon | 28 |
| 3x3 Basketball | 29 |
| Beach Volleyball | 29 |
| Golf | 29 |
| Shooting | 29 |
| Equestrian | 38 |
height <- disciplines |>
filter(height != 0) |>
group_by(disciplines) |>
summarise(mean_height = mean(height)) |>
arrange(mean_height)
plot_height <- ggplot(height, aes(x = mean_height, y = disciplines)) +
geom_bar(stat = "identity", fill = "#69b3a2", color = "black")+
coord_cartesian(xlim = c(165, 195))+
scale_x_continuous(breaks = seq(from = 165, to = 195, by = 2),
labels = seq(from = 165, to = 195, by = 2)) +
labs(title = "Altura Média por Esporte",
x = "Altura (cm)", y = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5),
panel.grid.major = element_line(color = "gray75"))
| disciplines | mean_height |
|---|---|
| Sport Climbing | 168.8088 |
| Boxing | 173.0685 |
| Badminton | 173.9029 |
| Football | 175.0851 |
| Golf | 175.2000 |
| Rugby Sevens | 175.2500 |
| Athletics | 175.9168 |
| Judo | 180.0000 |
| Tennis | 180.5852 |
| Handball | 183.6891 |
| Water Polo | 185.0769 |
| Beach Volleyball | 188.3125 |
| 3x3 Basketball | 189.9385 |
| Volleyball | 190.9453 |
| Basketball | 191.5174 |
Quantidade:
Estiveram presentes 206 Delegações e 11.115 atletas.
O Estados Unidos foi o país que levou mais atletas, 620.
O Brasil levou 290, ficando na posição 11.
Gênero:
A proporção de gênero foi de 50,9% de Homens e 49,1% de Mulheres.
A media foi de 112.5.
Tiveram 7 delegações unissexuais.
Modalidade:
O esporte com mais atletas é o Atletismo, com 2023 atletas.
Idade:
A idade mais frequente é a de 24 anos.
O esporte com a menor média é a Ginástica Rítmica com 19 anos; e o com a maior é o Hipismo, 38 anos.
Altura:
O esporte com a maior média é o Basquete com 191,5 cm.