Descrição:
A Maratona de Boston é uma corrida anual que acontece na cidade de Boston, Massachusetts. Atraindo milhares de corredores de diferentes níveis de habilidade e experiência, ano após ano, participantes de todo o mundo se reúnem para competir por recordes mundiais, recordes de divisão e melhores marcas pessoais. Analisar os resultados desses atletas oferece uma visão interessante sobre a distribuição e a variação dos tempos de todos os corredores que completaram com sucesso essa corrida histórica.
Instalação dos pacotes utilizados
install.packages("here")
install.packages("skimr")
install.packages("janitor")
install.packages("dplyr")
install.packages("rmarkdown")
install.packages("lubridate")
install.packages("tidyverse")
install.packages("gridExtra")
Coleta dos Dados
Analisando o data set
## 'data.frame': 26598 obs. of 15 variables:
## $ age_group : chr "18-39" "18-39" "18-39" "18-39" ...
## $ place_overall : int 1 2 3 4 5 6 7 8 9 10 ...
## $ place_gender : int 1 2 3 4 5 6 7 8 9 10 ...
## $ place_division : int 1 2 3 4 5 6 7 8 9 10 ...
## $ name : chr "Chebet, Evans" "Geay, Gabriel" "Kipruto, Benson" "Korir, Albert" ...
## $ team : chr "Team–" "Team–" "Team–" "Team–" ...
## $ bib_number : chr "1" "3" "5" "19" ...
## $ half_time : chr "1H 2M 20S" "1H 2M 20S" "1H 2M 19S" "1H 2M 20S" ...
## $ finish_net : chr "2H 5M 54S" "2H 6M 4S" "2H 6M 6S" "2H 8M 1S" ...
## $ finish_gun : chr "2H 5M 54S" "2H 6M 4S" "2H 6M 6S" "2H 8M 1S" ...
## $ gender : chr "M" "M" "M" "M" ...
## $ half_time_sec : int 3740 3740 3739 3740 3740 3739 3839 3839 3739 3839 ...
## $ finish_net_sec : int 7554 7564 7566 7681 7715 7763 7784 7786 7804 7817 ...
## $ finish_gun_sec : int 7554 7564 7566 7681 7715 7763 7784 7786 7804 7817 ...
## $ finish_net_minutes: num 126 126 126 128 129 ...
#Valores NA-Missing por Colunas
sapply(ds, function(x) sum(is.na(x))) #contando valores NA´s das colunas## age_group place_overall place_gender place_division
## 0 0 0 0
## name team bib_number half_time
## 0 0 0 72
## finish_net finish_gun gender half_time_sec
## 0 0 0 72
## finish_net_sec finish_gun_sec finish_net_minutes
## 0 0 0
#Maior Tempo na Maratona
ds_run_max <- ds_run %>%
filter(
ds_run$finish_net_sec == max(ds_run$finish_net_sec)
)
#Plot your table with table Grob in the library(gridExtra)
ds_run_max_table <- tableGrob(ds_run_max)
#Arrange them as you want with grid.arrange
grid.arrange(ds_run_max_table)#Menor Tempo na Maratona
ds_run_min <- ds_run %>%
filter(
ds_run$finish_net_sec == min(ds_run$finish_net_sec)
)
#Plot your table with table Grob in the library(gridExtra)
ds_run_min_table <- tableGrob(ds_run_min)
#Arrange them as you want with grid.arrange
grid.arrange(ds_run_min_table)#Tempo na Meia Martona x Tempo na Maratona
ggplot(data = ds_run) +
geom_point(mapping = aes(x = half_time_sec, y = finish_net_sec), color = "#008DD5") +
facet_wrap(~age_group) +
theme_light()#Tempo na Meia Maratona x Tempo na Maratona (Gênero)
ggplot(data = ds_run) +
geom_point(mapping = aes(x = half_time_sec, y = finish_net_sec), color = "#008DD5") +
facet_wrap(~gender) +
theme_light()#Participantes por Gênero
ggplot(data = ds_run, aes(x = gender, fill = gender)) +
geom_bar() +
scale_fill_manual(values = c("M" = "#008DD5", "W" = "#F56476")) +
theme_minimal() +
geom_text(stat = "count", aes(label = ..count..), vjust = -0.5)## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#Mais rápido na categoria Men
faster_gender_M <- ds_run %>%
filter(gender == "M") %>%
group_by(age_group) %>%
summarise(best_result_M = seconds_to_period( min(finish_net_sec)))
#Plot your table with table Grob in the library(gridExtra)
faster_gender_M_table <- tableGrob(faster_gender_M)
#Arrange them as you want with grid.arrange
grid.arrange(faster_gender_M_table)#Mais rápido na categoria Woman
faster_gender_W <- ds_run %>%
filter(gender == "W") %>%
group_by(age_group) %>%
summarise(best_result_W = seconds_to_period( min(finish_net_sec)))
#Plot your table with table Grob in the library(gridExtra)
faster_gender_W_table <- tableGrob(faster_gender_W)
#Arrange them as you want with grid.arrange
grid.arrange(faster_gender_W_table)#Mais Rápido na Categoria
faster_gender <- inner_join(faster_gender_M, faster_gender_W, by = "age_group")
faster_gender <- faster_gender %>%
mutate(WIN = if_else(best_result_W > best_result_M, "M", "W"))
#Plot your table with table Grob in the library(gridExtra)
faster_gender_table <- tableGrob(faster_gender)
#Arrange them as you want with grid.arrange
grid.arrange(faster_gender_table)#Categorias Mais Rapidas
time_2h30m<- ds_run %>%
group_by(age_group) %>%
filter(
finish_net_sec <= 9000
) %>%
count()%>%
rename(time_2h30m = n)
time_2h30m_3h<- ds_run %>%
group_by(age_group) %>%
filter(
finish_net_sec > 9000 & finish_net_sec <= 10800
) %>%
count() %>%
rename(time_2h30m_3h = n)
time_3h_3h30m <- ds_run %>%
group_by(age_group) %>%
filter(
finish_net_sec > 10800 & finish_net_sec <= 12600
) %>%
count() %>%
rename(time_3h_3h30m = n)
time_3h30m_4h <- ds_run %>%
group_by(age_group) %>%
filter(
finish_net_sec > 12600 & finish_net_sec <= 14400
) %>%
count() %>%
rename(time_3h30m_4h = n)
time_4h<- ds_run %>%
group_by(age_group) %>%
filter(
finish_net_sec > 14400
) %>%
count()%>%
rename(time_4h = n)
faster_category <- time_2h30m %>%
full_join(time_2h30m_3h, by = "age_group") %>%
full_join(time_3h_3h30m, by = "age_group") %>%
full_join(time_3h30m_4h, by = "age_group") %>%
full_join(time_4h, by = "age_group")
View(faster_category)
#Plot your table with table Grob in the library(gridExtra)
faster_category_table <- tableGrob(faster_category)
#Arrange them as you want with grid.arrange
grid.arrange(faster_category_table)# Calcular os totais
total_time_2h30m <- sum(time_2h30m$time_2h30m)
total_time_2h30m_3h <- sum(time_2h30m_3h$time_2h30m_3h)
total_time_3h_3h30m <- sum(time_3h_3h30m$time_3h_3h30m)
total_time_3h30m_4h <- sum(time_3h30m_4h$time_3h30m_4h)
total_time_4h <- sum(time_4h$time_4h)
# Criar um data frame com os resultados
faster_category_total <- data.frame(
Category = c("time_2h30m", "time_2h30m_3h", "time_3h_3h30m", "time_3h30m_4h", "time_4h"),
Total = c(total_time_2h30m, total_time_2h30m_3h, total_time_3h_3h30m, total_time_3h30m_4h, total_time_4h)
)
#Plot your table with table Grob in the library(gridExtra)
faster_category_total_table <- tableGrob(faster_category_total)
#Arrange them as you want with grid.arrange
grid.arrange(faster_category_total_table)#Gráfico Total por Categoria
ggplot(data = faster_category_total) +
geom_col(mapping = aes(x = Category, y = Total), fill = "#F56476") +
geom_text(mapping = aes(x = Category, y = Total, label = Total),
vjust = -0.5, size = 5, color = "black") + # Ajuste o vjust e size conforme necessário
labs(title = "Total por Categoria", x = "Categoria", y = "Total") +
theme_minimal()#Encontrar Outliers#
ds_run$finish_net_sec <- as.numeric(ds_run$finish_net_sec)
quartis <- quantile(ds_run$finish_net_sec, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
quartis## 0% 25% 50% 75% 100%
## 7554.00 11464.25 12835.50 14666.75 26431.00
# Calcular o intervalo interquartil (IQR)
iqr <- IQR(ds_run$finish_net_sec)
q1 <- quartis[2] # Primeiro quartil (Q1)
q3 <- quartis[4] # Terceiro quartil (Q3)
# Definir os limites inferior e superior
limite_inferior <- q1 - 1.5 * iqr
limite_superior <- q3 + 1.5 * iqr
outliers <- ds_run$finish_net_sec[ds_run$finish_net_sec < limite_inferior |ds_run$finish_net_sec > limite_superior]
print(outliers)## [1] 19495 19509 19539 19593 19598 19609 19612 19635 19640 19650 19681 19683
## [13] 19694 19694 19713 19718 19726 19731 19761 19773 19774 19784 19789 19789
## [25] 19798 19808 19837 19854 19893 19898 19924 19926 19934 19953 19961 19961
## [37] 20000 20030 20055 20059 20099 20102 20139 20202 20239 20322 20371 20398
## [49] 20439 20445 20460 20460 20470 20511 20523 20546 20577 20597 20647 20648
## [61] 20706 20745 20817 20827 20839 20882 20920 20955 20956 20963 20986 21068
## [73] 21102 21144 21231 21380 21477 21482 21535 21537 21572 21582 21641 21764
## [85] 21881 22003 19502 19511 19576 19580 19693 19715 19725 19733 19777 19780
## [97] 19814 19831 19854 19985 20082 20086 20128 20220 20235 20236 20247 20379
## [109] 20457 20501 20503 20543 20646 20707 20760 20762 20864 20890 20918 21100
## [121] 21226 21238 21239 21329 21493 21705 24188 19516 19527 19540 19583 19652
## [133] 19814 19821 19826 19868 19911 20051 20104 20125 20137 20256 20266 20361
## [145] 20469 20564 20599 20601 20728 20809 20812 20813 20916 21016 21161 21210
## [157] 21290 21314 21349 21363 21386 21393 21502 21504 21513 21605 21618 21889
## [169] 22109 22146 22616 19490 19517 19535 19593 19595 19653 19721 19742 19757
## [181] 19802 19847 19874 19892 19928 19928 20004 20010 20049 20239 20240 20279
## [193] 20294 20355 20385 20506 20507 20507 20727 20764 20824 20861 20878 20905
## [205] 20917 20918 20972 21003 21011 21021 21075 21142 21152 21190 21260 21269
## [217] 21288 21332 21571 21755 21801 21827 21903 23049 24056 25460 26019 19580
## [229] 19582 19640 19659 19663 19805 19910 19929 20123 20147 20173 20279 20281
## [241] 20283 20300 20339 20360 20362 20436 20455 20467 20512 20572 20727 20741
## [253] 20781 20990 21004 21016 21023 21054 21134 21184 21270 21291 21372 21419
## [265] 21582 21625 21648 21752 21803 21850 21936 23865 19499 19525 19566 19590
## [277] 19598 19714 19716 19727 19750 19762 19845 19846 19924 19945 20045 20148
## [289] 20157 20236 20415 20468 20510 20545 20749 20790 20816 20822 20827 20994
## [301] 21015 21032 21082 21157 21314 21365 21577 21584 21601 21674 21709 21724
## [313] 21752 22130 22702 22871 23939 19662 19700 19763 19769 19774 19774 19779
## [325] 19816 20010 20186 20377 20388 20422 20443 20523 20533 20583 20617 20626
## [337] 20632 20686 20822 20828 20837 21268 21280 21396 21606 21702 21731 21805
## [349] 21883 22120 23514 19608 19631 19740 19864 20216 20237 20295 20414 20596
## [361] 20652 20671 20733 21059 21115 21141 21214 21457 21490 21826 21916 22414
## [373] 22712 23306 23641 19493 19641 19976 20213 20286 20502 20736 21031 21157
## [385] 22101 22187 22318 20175 19479 19493 19493 19497 19507 19528 19530 19549
## [397] 19563 19586 19598 19605 19612 19631 19633 19634 19634 19636 19641 19659
## [409] 19667 19680 19695 19709 19716 19721 19732 19733 19768 19770 19771 19784
## [421] 19807 19817 19831 19836 19845 19849 19851 19851 19881 19897 19902 19906
## [433] 19920 19923 19935 19938 19956 19957 19962 19962 19970 19979 19980 20003
## [445] 20005 20005 20009 20010 20031 20044 20053 20054 20068 20074 20084 20091
## [457] 20108 20108 20109 20114 20131 20133 20133 20147 20156 20188 20208 20218
## [469] 20232 20237 20261 20280 20297 20311 20321 20328 20330 20359 20370 20376
## [481] 20388 20397 20412 20412 20412 20435 20446 20452 20454 20462 20474 20506
## [493] 20526 20533 20539 20543 20543 20557 20560 20564 20571 20575 20581 20598
## [505] 20611 20624 20631 20637 20651 20669 20678 20686 20701 20706 20738 20743
## [517] 20761 20822 20823 20834 20856 20872 20872 20901 20918 20937 20943 20964
## [529] 20983 20987 20994 21001 21049 21071 21073 21121 21144 21173 21191 21192
## [541] 21206 21227 21244 21252 21270 21279 21287 21355 21383 21395 21396 21421
## [553] 21476 21477 21482 21508 21514 21541 21543 21551 21570 21572 21585 21624
## [565] 21652 21655 21715 21744 21760 21765 21795 21797 21829 21910 21911 21938
## [577] 21949 21957 22007 22102 22739 23169 23407 26431 19472 19475 19478 19490
## [589] 19496 19580 19639 19639 19639 19704 19720 19725 19732 19747 19779 19797
## [601] 19845 19858 19875 19881 19894 19910 19961 20040 20118 20135 20139 20250
## [613] 20268 20337 20427 20475 20479 20528 20529 20531 20554 20586 20597 20625
## [625] 20631 20673 20700 20704 20708 20714 20741 20773 20781 20832 20836 20879
## [637] 20902 20945 20991 21055 21056 21160 21166 21200 21280 21309 21339 21403
## [649] 21440 21441 21499 21500 21500 21511 21520 21543 21607 21614 21741 21762
## [661] 21857 19582 19620 19634 19648 19669 19673 19795 19812 19821 19868 19895
## [673] 19962 19967 20010 20063 20097 20097 20141 20153 20156 20212 20229 20233
## [685] 20255 20281 20341 20365 20446 20451 20534 20611 20662 20695 20719 20719
## [697] 20746 20773 20906 20906 20957 21003 21037 21061 21141 21215 21275 21286
## [709] 21341 21444 21505 21650 21735 21825 21886 19475 19564 19567 19588 19612
## [721] 19620 19643 19658 19790 19818 19819 19840 19861 19871 19878 19933 19955
## [733] 19974 20058 20059 20121 20190 20203 20216 20232 20232 20254 20286 20323
## [745] 20405 20565 20588 20611 20652 20706 20775 20779 20827 20891 20892 20893
## [757] 20900 20904 21005 21060 21081 21133 21184 21210 21287 21309 21324 21340
## [769] 21448 21451 21527 21631 21698 22053 22145 22187 22225 19510 19581 19646
## [781] 19735 19774 19875 19887 19889 19985 20010 20227 20270 20343 20345 20456
## [793] 20592 20612 20701 20740 20780 20874 20922 20955 21005 21075 21089 21093
## [805] 21129 21239 21299 21312 21417 21456 21469 21554 21620 21746 21747 21836
## [817] 21916 21936 22262 24994 19700 19866 19923 19977 20021 20041 20062 20117
## [829] 20166 20218 20316 20322 20331 20394 20448 20597 21058 21139 21169 21193
## [841] 21531 21615 21693 21865 21935 23134 19991 20183 20233 20465 20780 20867
## [853] 20906 21064 21065 21352 21359 21382 21483 19537 20075 20229 20643 20916
## [865] 21003 21155 21417 21799 22101 22606 19728 20437 20579
# Criar o boxplot com os Quartis e valores extremos
boxplot(ds_run$finish_net_sec,
main = "Boxplot com Outliers",
ylab = "Valores",
col = "lightblue")
# Adicionar os rótulos dos quartis
text(x = 1, y = quartis,
labels = round(quartis, 2),
pos = 4,
cex = 0.8,
col = "red")