# Odhad modelu
# * lap (-25.63): Štatisticky významný. Každým kolom sa pit stop v priemere skráti o 25,6
# * ms.stop (1.57): Štatisticky nevýznamný ($p = 0.959$). Poradie zastávky dĺžku času nemení.
# * $R^2$ (0.0065): Model vysvetľuje len 0,65 % zmien v čase. Je takmer nepoužiteľný na predpovede, lebo dĺžku pit stopu ovplyvňujú iné faktory (chyby, počasie).
# * Rezíduá: Obrovský rozptyl (chyby až do +35 sekúnd) potvrdzuje prítomnosť extrémnych zlyhaní v boxoch, ktoré model nevie zachytiť.
# Diagnostické grafy
# Reset par nastavení pre istotu
model <- lm(milliseconds ~ lap + stop, data = pit_stops)
summary(model)
##
## Call:
## lm(formula = milliseconds ~ lap + stop, data = pit_stops)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11804 -2564 -993 1222 35777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25102.854 93.112 269.598 < 2e-16 ***
## lap -25.630 3.197 -8.017 1.2e-15 ***
## stop 1.576 30.431 0.052 0.959
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4591 on 10851 degrees of freedom
## Multiple R-squared: 0.006565, Adjusted R-squared: 0.006382
## F-statistic: 35.85 on 2 and 10851 DF, p-value: 3.022e-16
par(mfrow = c(2, 2), mar = c(4.5, 4.5, 3, 1))
plot(model, which = 1,
col = "steelblue", pch = 19, cex = 0.7)
plot(model, which = 2,
col = "steelblue", pch = 19, cex = 0.7)
plot(model, which = 3, ,
col = "steelblue", pch = 19, cex = 0.7)
plot(model, which = 5,
col = "steelblue", pch = 19, cex = 0.7)

par(mfrow = c(1, 1))
# Reset na predvolené nastavenie
par(mfrow = c(1, 1))
# Normality - Shapiro-Wilk (max 5000 pozorovaní) → použijeme náhodný výber
set.seed(123)
resid_sample <- sample(residuals(model), min(5000, length(residuals(model))))
cat("Shapiro-Wilk test (vzorka 5000 rezíduí):\n")
## Shapiro-Wilk test (vzorka 5000 rezíduí):
print(shapiro.test(resid_sample))
##
## Shapiro-Wilk normality test
##
## data: resid_sample
## W = 0.85022, p-value < 2.2e-16
cat("\nJarque-Bera test:\n")
##
## Jarque-Bera test:
print(jarque.bera.test(residuals(model)))
##
## Jarque Bera Test
##
## data: residuals(model)
## X-squared = 34872, df = 2, p-value < 2.2e-16
cat("\nBreusch-Pagan test:\n")
##
## Breusch-Pagan test:
print(bptest(model))
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 49.874, df = 2, p-value = 1.479e-11
cat("\nDurbin-Watson test:\n")
##
## Durbin-Watson test:
print(dwtest(model))
##
## Durbin-Watson test
##
## data: model
## DW = 0.9287, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
cat("\nOutlier Test:\n")
##
## Outlier Test:
print(outlierTest(model))
## rstudent unadjusted p-value Bonferroni p
## 4183 7.816374 5.9396e-15 6.4468e-11
## 10724 7.549229 4.7297e-14 5.1336e-10
## 7041 7.471668 8.5276e-14 9.2558e-10
## 4613 7.311887 2.8198e-13 3.0606e-09
## 7904 7.279347 3.5865e-13 3.8928e-09
## 4703 7.214757 5.7636e-13 6.2558e-09
## 7905 7.089481 1.4299e-12 1.5520e-08
## 188 6.966937 3.4269e-12 3.7195e-08
## 7906 6.946501 3.9590e-12 4.2971e-08
## 6205 6.905420 5.2854e-12 5.7368e-08
cat("\n=== Top 10 Cookovej vzdialenosti ===\n")
##
## === Top 10 Cookovej vzdialenosti ===
print(head(sort(cooks.distance(model), decreasing = TRUE), 10))
## 10354 4183 7807 7905 9443 9754
## 0.018274838 0.012751728 0.009697134 0.008411823 0.007555129 0.007541992
## 7041 4613 7907 5926
## 0.006576089 0.006299202 0.006157171 0.005838120
# Test heteroskedasticity
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 49.874, df = 2, p-value = 1.479e-11
bptest(model, ~ fitted(model) + I(fitted(model)^2))
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 160.25, df = 2, p-value < 2.2e-16
# Test autokorelácie
dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 0.9287, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(model, order = 1)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: model
## LM test = 3114.3, df = 1, p-value < 2.2e-16
# Odľahlé a vplyvné pozorovania
outlierTest(model)
## rstudent unadjusted p-value Bonferroni p
## 4183 7.816374 5.9396e-15 6.4468e-11
## 10724 7.549229 4.7297e-14 5.1336e-10
## 7041 7.471668 8.5276e-14 9.2558e-10
## 4613 7.311887 2.8198e-13 3.0606e-09
## 7904 7.279347 3.5865e-13 3.8928e-09
## 4703 7.214757 5.7636e-13 6.2558e-09
## 7905 7.089481 1.4299e-12 1.5520e-08
## 188 6.966937 3.4269e-12 3.7195e-08
## 7906 6.946501 3.9590e-12 4.2971e-08
## 6205 6.905420 5.2854e-12 5.7368e-08
cat("\n=== Top 10 najvplyvnejších pozorovaní (Cookova vzdialenosť) ===\n")
##
## === Top 10 najvplyvnejších pozorovaní (Cookova vzdialenosť) ===
head(sort(cooks.distance(model), decreasing = TRUE), 10)
## 10354 4183 7807 7905 9443 9754
## 0.018274838 0.012751728 0.009697134 0.008411823 0.007555129 0.007541992
## 7041 4613 7907 5926
## 0.006576089 0.006299202 0.006157171 0.005838120