library(tidyverse)
library(readxl)
library(tidytext)
# Load your Excel file
rugby_data <- read_excel("data.xlsx")
# Convert Date column
rugby_data$Date <- as.Date(as.character(rugby_data$Date), format = "%Y%m%d")
# Remove duplicates
rugby_data <- rugby_data %>% distinct()
# Add Year column
rugby_data$Year <- as.numeric(format(rugby_data$Date, "%Y"))Rugby Data Visualisation Report
1 Introduction
This report uses an Excel dataset of rugby matches to create 10 outputs (tables and visualisations).
The aim is to summarise scoring, performance trends, player contribution, and home/away effects in a way that supports analysis and decision-making.
2 Visualisations
2.1 Total points scored by each team per year.
This table shows the total points per team per year, an important metric to track attacking performance.
team_year_table <- rugby_data %>%
distinct(Team, Opposition, Date, .keep_all = TRUE) %>%
group_by(Team, Year) %>%
summarise(Total_Points = sum(`Points For`, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(Team), Year)
team_year_table# A tibble: 72 × 3
Team Year Total_Points
<chr> <dbl> <dbl>
1 WALES 2015 336
2 WALES 2016 304
3 WALES 2017 221
4 WALES 2018 295
5 USA 2015 16
6 USA 2017 19
7 USA 2018 30
8 URUG 2015 12
9 TONGA 2016 19
10 TONGA 2017 6
# ℹ 62 more rows
2.2 Top 5 Teams Bar Chart.
This bar chart helps break down the teams by the top 5 performers.
top5_data <- team_year_table %>%
group_by(Year) %>%
slice_max(Total_Points, n = 5) %>%
ungroup()
ggplot(top5_data,
aes(x = tidytext::reorder_within(Team, Total_Points, Year),
y = Total_Points)) +
geom_col(fill = "steelblue") +
coord_flip() +
facet_wrap(~Year, scales = "free_y") +
tidytext::scale_x_reordered() +
labs(
title = "Top 5 Teams by Total Points (2015–2018)",
x = "Team",
y = "Total Points"
)2.3 Irelands Total Points
Being able to see a graph of Irelands Total Points allows us to track their points over a number of years. Here we monitor over time are they improving, regressing or consistently performing. 2015 was their highest tally with 2018 being their lowest. Potentially signs of performance decline.
IRE_year <- team_year_table %>%
filter(Team == "IRE") %>%
arrange(Year)
ggplot(IRE_year, aes(x = Year, y = Total_Points)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
labs(
title = "Ireland Total Points by Year (2015–2018)",
x = "Year",
y = "Total Points"
) +
theme_minimal()2.4 A table of the mean score of each team
This visual allows us to identify in a given year, what is the average amount of points a team is scoring. Performance indicators like this allow us to compare one team agaisnt another.
match_level <- rugby_data %>%
distinct(Team, Opposition, Date, .keep_all = TRUE) %>%
group_by(Team, Year) %>%
summarise(
Total_Matches = n(),
Total_Score = sum(`Points For`, na.rm = TRUE),
Mean_Score = mean(`Points For`, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(Mean_Score))
match_level# A tibble: 72 × 5
Team Year Total_Matches Total_Score Mean_Score
<chr> <dbl> <int> <dbl> <dbl>
1 NZL 2015 1 62 62
2 ARG 2015 1 43 43
3 NZL 2016 7 263 37.6
4 NZL 2018 6 218 36.3
5 SAMOA 2015 1 33 33
6 ENG 2016 13 424 32.6
7 IRE 2017 11 355 32.3
8 ENG 2017 10 318 31.8
9 HURRI 2017 1 31 31
10 USA 2018 1 30 30
# ℹ 62 more rows
2.5 Top 10 Teams by Average Points Per Match
Using the table above we can now create a bar chart that orders the teams into a top 10. A great way to see what nation is setting the standards and who needs to catch up. New Zeland being the pace setters here.
total_team <- rugby_data %>%
distinct(Team, Opposition, Date, .keep_all = TRUE) %>%
group_by(Team) %>%
summarise(
Total_Matches = n(),
Total_Score = sum(`Points For`, na.rm = TRUE),
Mean_Score = mean(`Points For`, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(Mean_Score))
top10 <- total_team %>%
slice_max(Mean_Score, n = 10)
ggplot(top10,
aes(x = reorder(Team, Mean_Score),
y = Mean_Score)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 Teams by Average Points per Match",
x = "Team",
y = "Average Points per Match"
) +
theme_minimal()2.6 Average Points Performance Curve
This performance curve is a valuable visual as it helps us to benchmark trends over time and see whether performance is improving, stable, or declining relative to other teams. We can see New Zealand were by far the strongest performers in 2015, potentially down to one result being a particular outlier and we can also see that Scotland were the most consistent performers.
performance_curve <- rugby_data %>%
distinct(Team, Opposition, Date, .keep_all = TRUE) %>%
filter(Team %in% c("NZL","WALES","ENG","IRE","SCOT")) %>%
group_by(Team, Year) %>%
summarise(
Total_Matches = n(),
Total_Score = sum(`Points For`, na.rm = TRUE),
Mean_Score = mean(`Points For`, na.rm = TRUE),
.groups = "drop"
)
ggplot(performance_curve,
aes(x = Year,
y = Mean_Score,
color = Team,
group = Team)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
labs(
title = "Performance Curve: Mean Points per Match",
x = "Year",
y = "Average Points per Match",
color = "Team"
) +
theme_minimal()2.7 Player attacking contributions across all matches
Having looked at the attacking rugby metrics from a team basis, we now see the attacking performance metrics from an individual basis. A real insight into who is performing really well their team.
player_level <- rugby_data %>%
group_by(Name, Team) %>%
summarise(
Total_Try = sum(`Try`, na.rm = TRUE),
Total_Try_Assist = sum(`Try Assist`, na.rm = TRUE),
Total_Conversion = sum(`Conversion`, na.rm = TRUE),
Total_Penalty = sum(`Penalty`, na.rm = TRUE),
Total_Drop_Goal = sum(`Drop Goal`, na.rm = TRUE),
Total_Points = sum(`Points`, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(Total_Points))
player_level# A tibble: 1,436 × 8
Name Team Total_Try Total_Try_Assist Total_Conversion Total_Penalty
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 O Farrell ENG 7 11 70 81
2 G Laidlaw SCOT 1 15 54 72
3 J Sexton IRE 1 8 44 67
4 L Halfpenny WALES 2 5 31 70
5 B Foley AUS 7 11 36 33
6 B Barrett NZL 8 10 41 23
7 D Biggar WALES 4 11 28 34
8 T Allan ITALY 5 7 23 24
9 H Pollard SA 2 2 18 29
10 P Jackson IRE 1 4 37 17
# ℹ 1,426 more rows
# ℹ 2 more variables: Total_Drop_Goal <dbl>, Total_Points <dbl>
2.8 Player defenisive contributions across all matches
Having looked at the attacking rugby metrics of individual players, now lets analyse their defensive performances. In order to get a fair and accurate picture, a minumim of 100 tackles needed to be attempted. Richie Gray from Scotland with a very impressive 99% tackle success rate.
tackle_summary <- rugby_data %>%
group_by(Name) %>%
summarise(
Total_Tackles = sum(Tackles, na.rm = TRUE),
Total_Missed = sum(`Missed Tackles`, na.rm = TRUE),
Tackles_Attempted = Total_Tackles + Total_Missed,
Tackle_Success_Percent =
(Total_Tackles / Tackles_Attempted) * 100,
.groups = "drop"
) %>%
filter(Tackles_Attempted >= 100) %>%
arrange(desc(Tackle_Success_Percent))
tackle_summary# A tibble: 171 × 5
Name Total_Tackles Total_Missed Tackles_Attempted Tackle_Success_Percent
<chr> <dbl> <dbl> <dbl> <dbl>
1 R Gray 193 2 195 99.0
2 J Ball 112 2 114 98.2
3 G Biagi 105 2 107 98.1
4 J Tipuric 295 8 303 97.4
5 J Gray 458 13 471 97.2
6 L Charte… 193 6 199 97.0
7 A Zanni 188 6 194 96.9
8 N Smith 111 4 115 96.5
9 C Hill 165 6 171 96.5
10 E Etzebe… 136 5 141 96.5
# ℹ 161 more rows
2.9 Home and away win rates by team
This table examines the distribution of wins achieved at home and away for each team.
It is important because home advantage is a well established phenomenon in sport, and comparing win rates provides insight into whether teams perform differently depending on match location.
win_summary <- rugby_data %>%
distinct(Team, Opposition, Date, .keep_all = TRUE) %>%
mutate(
Win = `Points For` > `Points Against`
) %>%
group_by(Team) %>%
summarise(
Total_Wins = sum(Win, na.rm = TRUE),
Home_Wins = sum(Win & `Home/Away` == "Home", na.rm = TRUE),
Away_Wins = sum(Win & `Home/Away` == "Away", na.rm = TRUE),
Home_Win_Rate = Home_Wins / Total_Wins,
Away_Win_Rate = Away_Wins / Total_Wins,
.groups = "drop"
)
win_summary# A tibble: 27 × 6
Team Total_Wins Home_Wins Away_Wins Home_Win_Rate Away_Win_Rate
<chr> <int> <int> <int> <dbl> <dbl>
1 ARG 2 0 2 0 1
2 AUS 10 2 8 0.2 0.8
3 BLUES 1 1 0 1 0
4 CAN 0 0 0 NaN NaN
5 CFS 0 0 0 NaN NaN
6 CRUS 0 0 0 NaN NaN
7 ENG 36 24 12 0.667 0.333
8 FIJI 1 1 0 1 0
9 FRA 15 13 2 0.867 0.133
10 GEORG 0 0 0 NaN NaN
# ℹ 17 more rows
2.10 Does Home or Away impact a result
This table examines all games and gives an excellent insight into the role home and away plays in the match outcome. 60% of 199 games were won by the home team with 40% won by the away team.
win_totals <- rugby_data %>%
distinct(Team, Opposition, Date, .keep_all = TRUE) %>%
mutate(
Win = `Points For` > `Points Against`
) %>%
summarise(
Total_Wins = sum(Win, na.rm = TRUE),
Total_Home_Wins = sum(Win & `Home/Away` == "Home", na.rm = TRUE),
Total_Away_Wins = sum(Win & `Home/Away` == "Away", na.rm = TRUE),
Home_Win_Rate = Total_Home_Wins / Total_Wins,
Away_Win_Rate = Total_Away_Wins / Total_Wins
)
win_totals# A tibble: 1 × 5
Total_Wins Total_Home_Wins Total_Away_Wins Home_Win_Rate Away_Win_Rate
<int> <int> <int> <dbl> <dbl>
1 199 120 79 0.603 0.397