Boxplot
# ------------------------
# Boxplot pit stopov
# ------------------------
limit_95 <- quantile(pit_stops$duration_ms, 0.95, na.rm = TRUE)
ggplot(dplyr::filter(pit_stops, duration_ms <= limit_95), aes(x = duration_ms)) +
geom_histogram(binwidth = 200, fill = "lightblue", color = "darkblue") +
labs(
title = "Rozdelenie trvania pitstopov F1",
x = "Dĺžka pit stopu (ms)",
y = "Počet pit stopov"
) +
theme_minimal()

ggplot(pit_stops, aes(x = lap)) +
geom_histogram(binwidth = 2, boundary = 0, fill = "lightblue", color = "darkblue") +
labs(
title = "Rozdelenie pit stopov podľa kola",
x = "Kolo",
y = "Počet pit stopov"
) +
theme_minimal()

limit_95 <- quantile(pit_stops$duration_ms, 0.95, na.rm = TRUE)
ggplot(
dplyr::filter(pit_stops, stop_number %in% 1:4, duration_ms <= limit_95),
aes(x = factor(stop_number), y = duration_ms)
) +
geom_boxplot(fill = "lightblue", color = "darkblue") +
labs(
title = "Dĺžka pit stopov podľa poradia (bez extrémov)",
x = "Poradie pit stopu",
y = "Trvanie (ms)"
) +
theme_minimal()

# ------------------------
# Základné štatistiky do tabuľky
# ------------------------
pit_stats <- pit_stops %>%
group_by(stop_number) %>%
summarise(
n = n(),
mean = mean(duration_ms, na.rm = TRUE),
sd = sd(duration_ms, na.rm = TRUE),
min = min(duration_ms, na.rm = TRUE),
q25 = quantile(duration_ms, 0.25, na.rm = TRUE),
median = median(duration_ms, na.rm = TRUE),
q75 = quantile(duration_ms, 0.75, na.rm = TRUE),
max = max(duration_ms, na.rm = TRUE),
.groups = "drop"
)
pit_stats %>%
kable(
digits = 2,
caption = "Základné štatistiky dĺžky pit stopov podľa poradia zastávky"
) %>%
kable_styling(
full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed")
) %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, background = "#f2f2f2") %>%
add_header_above(c(" " = 1, "Štatistiky dĺžky pit stopu" = 8))
Základné štatistiky dĺžky pit stopov podľa poradia zastávky
|
Štatistiky dĺžky pit stopu |
| stop_number |
n |
mean |
sd |
min |
q25 |
median |
q75 |
max |
| 1 |
5573 |
74778.35 |
300436.67 |
14160 |
22087.00 |
23744.0 |
26879.00 |
3069017 |
| 2 |
3716 |
81413.58 |
270539.34 |
12897 |
21892.75 |
23467.5 |
25788.50 |
2048175 |
| 3 |
1496 |
96718.27 |
325293.53 |
12959 |
21472.00 |
23350.0 |
26238.25 |
2485913 |
| 4 |
427 |
106832.92 |
374243.56 |
13925 |
21041.00 |
23132.0 |
27150.50 |
2434653 |
| 5 |
120 |
391127.17 |
757595.35 |
14493 |
22481.25 |
25681.5 |
42762.25 |
2461612 |
| 6 |
29 |
426872.79 |
822788.18 |
13173 |
22979.00 |
31494.0 |
74200.00 |
2483382 |
| 7 |
3 |
29079.33 |
5910.42 |
22379 |
26842.50 |
31306.0 |
32429.50 |
33553 |
| 15 |
1 |
24239.00 |
NA |
24239 |
24239.00 |
24239.0 |
24239.00 |
24239 |
| 42 |
1 |
24132.00 |
NA |
24132 |
24132.00 |
24132.0 |
24132.00 |
24132 |
| 48 |
1 |
28211.00 |
NA |
28211 |
28211.00 |
28211.0 |
28211.00 |
28211 |
| 51 |
1 |
24232.00 |
NA |
24232 |
24232.00 |
24232.0 |
24232.00 |
24232 |
| 52 |
1 |
23813.00 |
NA |
23813 |
23813.00 |
23813.0 |
23813.00 |
23813 |
| 57 |
1 |
24384.00 |
NA |
24384 |
24384.00 |
24384.0 |
24384.00 |
24384 |
| 70 |
1 |
24367.00 |
NA |
24367 |
24367.00 |
24367.0 |
24367.00 |
24367 |
lap_stats <- pit_stops %>%
group_by(stop_number) %>%
summarise(
n = n(),
mean_lap = mean(lap, na.rm = TRUE),
median_lap = median(lap, na.rm = TRUE),
min_lap = min(lap, na.rm = TRUE),
max_lap = max(lap, na.rm = TRUE),
.groups = "drop"
)
lap_stats %>%
kable(
digits = 2,
caption = "Základné štatistiky kola pit stopu podľa poradia zastávky"
) %>%
kable_styling(
full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed")
) %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, background = "#f2f2f2")
Základné štatistiky kola pit stopu podľa poradia zastávky
| stop_number |
n |
mean_lap |
median_lap |
min_lap |
max_lap |
| 1 |
5573 |
16.39 |
15 |
1 |
64 |
| 2 |
3716 |
30.95 |
31 |
2 |
74 |
| 3 |
1496 |
37.99 |
39 |
3 |
73 |
| 4 |
427 |
42.34 |
44 |
6 |
78 |
| 5 |
120 |
47.41 |
46 |
20 |
70 |
| 6 |
29 |
52.21 |
54 |
31 |
64 |
| 7 |
3 |
65.00 |
64 |
64 |
67 |
| 15 |
1 |
2.00 |
2 |
2 |
2 |
| 42 |
1 |
2.00 |
2 |
2 |
2 |
| 48 |
1 |
3.00 |
3 |
3 |
3 |
| 51 |
1 |
2.00 |
2 |
2 |
2 |
| 52 |
1 |
2.00 |
2 |
2 |
2 |
| 57 |
1 |
2.00 |
2 |
2 |
2 |
| 70 |
1 |
2.00 |
2 |
2 |
2 |
stops_per_race <- pit_stops %>%
group_by(race_id) %>%
summarise(total_stops = n(),
drivers = n_distinct(driver_id),
avg_stops = total_stops / drivers)
ggplot(stops_per_race, aes(x = avg_stops)) +
geom_histogram(binwidth = 0.2, fill = "lightblue", color = "darkblue") +
labs(
title = "Priemerný počet pit stopov na jazdca v pretekoch",
x = "Pit stopy na jazdca",
y = "Počet pretekov"
) +
theme_minimal()

# ------------------------
# t-test: Porovnanie priemerov medzi prvým a druhým pit stopom
# ------------------------
t_test_result <- t.test(
pit_stops$duration_ms[pit_stops$stop_number == 1],
pit_stops$duration_ms[pit_stops$stop_number == 2]
)
t_test_result
Welch Two Sample t-test
data: pit_stops$duration_ms[pit_stops$stop_number == 1] and pit_stops$duration_ms[pit_stops$stop_number == 2]
t = -1.1075, df = 8503.2, p-value = 0.2681
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-18379.140 5108.681
sample estimates:
mean of x mean of y
74778.35 81413.58
# ------------------------
# ANOVA: Porovnanie medzi všetkými pit stopmi
# ------------------------
anova_result <- aov(duration_ms ~ factor(stop_number), data = pit_stops)
summary(anova_result)
Df Sum Sq Mean Sq F value Pr(>F)
factor(stop_number) 13 1.571e+13 1.208e+12 12.71 <2e-16 ***
Residuals 11357 1.080e+15 9.509e+10
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# ------------------------
# Lineárna regresia: Predpoveď času pit stopu podľa lap a čísla stopu
# ------------------------
model <- lm(duration_ms ~ lap + stop_number, data = pit_stops)
summary(model)
Call:
lm(formula = duration_ms ~ lap + stop_number, data = pit_stops)
Residuals:
Min 1Q Median 3Q Max
-1125645 -74757 -59495 -42449 2948258
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 107753.5 6026.5 17.880 < 2e-16 ***
lap -1939.7 207.9 -9.331 < 2e-16 ***
stop_number 14944.8 2026.5 7.375 1.76e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 309000 on 11368 degrees of freedom
Multiple R-squared: 0.009317, Adjusted R-squared: 0.009143
F-statistic: 53.46 on 2 and 11368 DF, p-value: < 2.2e-16