knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(dplyr)
# Import CSV súboru
nhl <- read_csv("nhlplayoffs.csv")
# Zobrazenie prvých riadkov a názvov stĺpcov
head(nhl)
colnames(nhl)
## [1] "rank" "team" "year"
## [4] "games" "wins" "losses"
## [7] "ties" "shootout_wins" "shootout_losses"
## [10] "win_loss_percentage" "goals_scored" "goals_against"
## [13] "goal_differential"
nhl <- nhl %>%
rename(
Year = year,
Team = team,
Wins = wins,
Losses = losses,
GoalsAgainst = goals_against,
) %>%
mutate(
) %>%
filter(!is.na(Year))
summary(nhl)
## rank Team Year games
## Min. : 1.000 Length:1009 Min. :1918 Min. : 2.000
## 1st Qu.: 3.000 Class :character 1st Qu.:1972 1st Qu.: 5.000
## Median : 6.000 Mode :character Median :1990 Median : 7.000
## Mean : 7.067 Mean :1986 Mean : 9.364
## 3rd Qu.:11.000 3rd Qu.:2007 3rd Qu.:12.000
## Max. :24.000 Max. :2022 Max. :27.000
## Wins Losses ties shootout_wins
## Min. : 0.000 Min. : 0.000 Min. :0.00000 Min. : 0.0000
## 1st Qu.: 1.000 1st Qu.: 4.000 1st Qu.:0.00000 1st Qu.: 0.0000
## Median : 3.000 Median : 4.000 Median :0.00000 Median : 1.0000
## Mean : 4.657 Mean : 4.657 Mean :0.04955 Mean : 0.9326
## 3rd Qu.: 7.000 3rd Qu.: 6.000 3rd Qu.:0.00000 3rd Qu.: 1.0000
## Max. :18.000 Max. :12.000 Max. :4.00000 Max. :10.0000
## shootout_losses win_loss_percentage goals_scored GoalsAgainst
## Min. :0.0000 Min. :0.0000 Min. : 0.00 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:0.3330 1st Qu.:11.00 1st Qu.:16.00
## Median :1.0000 Median :0.4290 Median :20.00 Median :22.00
## Mean :0.9326 Mean :0.4112 Mean :26.63 Mean :26.63
## 3rd Qu.:1.0000 3rd Qu.:0.5450 3rd Qu.:37.00 3rd Qu.:35.00
## Max. :4.0000 Max. :1.0000 Max. :98.00 Max. :91.00
## goal_differential
## Min. :-27
## 1st Qu.: -6
## Median : -2
## Mean : 0
## 3rd Qu.: 3
## Max. : 49
library(ggplot2)
ggplot(nhl, aes(x = goals_scored, y = win_loss_percentage)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
theme_minimal() +
labs(
title = "Vzťah medzi počtom gólov a výherným percentom",
x = "Počet strelených gólov",
y = "Percento výhier (%)"
)

ggplot(nhl, aes(x = factor(Year), y = Wins)) +
geom_boxplot(fill = "lightblue", color = "darkblue") +
theme_minimal() +
labs(
title = "Rozdelenie počtu výhier v play-off podľa rokov",
x = "Rok",
y = "Počet výhier"
)

nhl.trend <- nhl %>%
group_by(Year) %>%
summarise(AvgWinPercent = mean(win_loss_percentage, na.rm = TRUE))
ggplot(nhl.trend, aes(x = Year, y = AvgWinPercent)) +
geom_line(color = "darkred", size = 1) +
geom_point(color = "black") +
theme_minimal() +
labs(
title = "Priemerné výherné percento tímov NHL (2006–2022)",
x = "Rok",
y = "Priemerné % výhier"
)

library(knitr)
library(kableExtra)
nhl.stats <- nhl %>%
summarise(
Teams = n(),
AvgWins = mean(Wins, na.rm = TRUE),
AvgGF = mean(goals_scored, na.rm = TRUE),
AvgGA = mean(GoalsAgainst, na.rm = TRUE),
AvgDiff = mean(goal_differential, na.rm = TRUE),
AvgWinPct = mean(win_loss_percentage, na.rm = TRUE)
)
kable(nhl.stats, digits = 2, caption = "Základné štatistiky výkonu tímov NHL (2006–2022)") %>%
kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover"))
Základné štatistiky výkonu tímov NHL (2006–2022)
|
Teams
|
AvgWins
|
AvgGF
|
AvgGA
|
AvgDiff
|
AvgWinPct
|
|
1009
|
4.66
|
26.63
|
26.63
|
0
|
0.41
|
top10 <- nhl %>%
group_by(Team) %>%
summarise(AvgWins = mean(Wins, na.rm = TRUE)) %>%
arrange(desc(AvgWins)) %>%
head(10)
kable(top10, caption = "TOP 10 najúspešnejších tímov podľa priemerného počtu výhier") %>%
kable_styling(full_width = FALSE)
TOP 10 najúspešnejších tímov podľa priemerného počtu výhier
|
Team
|
AvgWins
|
|
Vegas Golden Knights
|
9.500000
|
|
Tampa Bay Lightning
|
8.642857
|
|
Colorado Avalanche
|
7.777778
|
|
Carolina Hurricanes
|
7.222222
|
|
Edmonton Oilers
|
7.000000
|
|
Mighty Ducks of Anaheim
|
7.000000
|
|
New York Islanders
|
6.370370
|
|
Dallas Stars
|
6.352941
|
|
New Jersey Devils
|
6.227273
|
|
Anaheim Ducks
|
6.100000
|
before2015 <- nhl %>% filter(Year < 2015) %>% pull(win_loss_percentage)
after2015 <- nhl %>% filter(Year >= 2015) %>% pull(win_loss_percentage)
t.test(before2015, after2015)
##
## Welch Two Sample t-test
##
## data: before2015 and after2015
## t = -0.19841, df = 197.91, p-value = 0.8429
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03746614 0.03061629
## sample estimates:
## mean of x mean of y
## 0.4107148 0.4141397
# Interaktívne grafy --------------------------------------------------------
library(plotly)
## Interaktívny trend výherného percenta podľa rokov
p1 <- ggplot(nhl.trend, aes(x = Year, y = AvgWinPercent)) +
geom_line(color = "darkred", size = 1) +
geom_point(color = "black") +
theme_minimal() +
labs(
title = "Priemerné výherné percento tímov NHL (2006–2022)",
x = "Rok",
y = "Priemerné % výhier"
)
ggplotly(p1)
## Interaktívny scatter plot – Góly vs. Percento výhier
p2 <- ggplot(nhl, aes(x = goals_scored, y = win_loss_percentage, text = paste("Tím:", Team, "<br>Rok:", Year))) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
theme_minimal() +
labs(
title = "Vzťah medzi počtom gólov a výherným percentom (interaktívne)",
x = "Počet strelených gólov",
y = "Percento výhier (%)"
)
ggplotly(p2, tooltip = "text")