Approach:
- Created structured player data with round-by-round results (W/L/D)
and opponent matchups
- Calculated total points from game outcomes and average opponent
ratings
- Generated visualizations to compare player performance against
pre-tournament ratings
- Analyzed discrepancies between expected and actual performance based
on rating tiers
Step 1: Load Required Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(ggplot2)
library(forcats)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
Step 2: Create Sample Data with Round Results and opponents
players_data <- data.frame(
Player = c("Gary Hua", "Alice Smith", "Bob Johnson", "Charlie Brown", "Diana Davis"),
State = c("ON", "CA", "TX", "NY", "FL"),
PreRating = c(1794, 1650, 1550, 1700, 1600),
R1_Result = c("W", "L", "D", "W", "L"),
R1_Opponent = c("Alice Smith", "Gary Hua", "Charlie Brown", "Bob Johnson", "Diana Davis"),
R2_Result = c("W", "W", "L", "D", "W"),
R2_Opponent = c("Bob Johnson", "Charlie Brown", "Gary Hua", "Alice Smith", "Diana Davis"),
R3_Result = c("D", "W", "W", "L", "L"),
R3_Opponent = c("Charlie Brown", "Diana Davis", "Alice Smith", "Gary Hua", "Bob Johnson"),
R4_Result = c("W", "D", "L", "W", "D"),
R4_Opponent = c("Diana Davis", "Bob Johnson", "Alice Smith", "Diana Davis", "Gary Hua"),
R5_Result = c("W", "W", "D", "L", "W"),
R5_Opponent = c("Charlie Brown", "Diana Davis", "Bob Johnson", "Alice Smith", "Charlie Brown"),
OpponentRatings = c(
"1650, 1550, 1700, 1600, 1700",
"1794, 1700, 1600, 1550, 1600",
"1794, 1794, 1650, 1650, 1550",
"1550, 1650, 1794, 1600, 1650",
"1600, 1600, 1550, 1794, 1700"
)
)
# Display the data
players_data %>%
kable(format = "html", escape = FALSE) %>% # Ensure HTML format and escape special characters if needed
kable_styling(
bootstrap_options = c("bordered", "striped", "condensed"), # Add borders, striped rows, and compact spacing
position = "center", # Center the table on the page :cite[1]
font_size = 12, # Adjust font size for better readability
)# %>%
|
Player
|
State
|
PreRating
|
R1_Result
|
R1_Opponent
|
R2_Result
|
R2_Opponent
|
R3_Result
|
R3_Opponent
|
R4_Result
|
R4_Opponent
|
R5_Result
|
R5_Opponent
|
OpponentRatings
|
|
Gary Hua
|
ON
|
1794
|
W
|
Alice Smith
|
W
|
Bob Johnson
|
D
|
Charlie Brown
|
W
|
Diana Davis
|
W
|
Charlie Brown
|
1650, 1550, 1700, 1600, 1700
|
|
Alice Smith
|
CA
|
1650
|
L
|
Gary Hua
|
W
|
Charlie Brown
|
W
|
Diana Davis
|
D
|
Bob Johnson
|
W
|
Diana Davis
|
1794, 1700, 1600, 1550, 1600
|
|
Bob Johnson
|
TX
|
1550
|
D
|
Charlie Brown
|
L
|
Gary Hua
|
W
|
Alice Smith
|
L
|
Alice Smith
|
D
|
Bob Johnson
|
1794, 1794, 1650, 1650, 1550
|
|
Charlie Brown
|
NY
|
1700
|
W
|
Bob Johnson
|
D
|
Alice Smith
|
L
|
Gary Hua
|
W
|
Diana Davis
|
L
|
Alice Smith
|
1550, 1650, 1794, 1600, 1650
|
|
Diana Davis
|
FL
|
1600
|
L
|
Diana Davis
|
W
|
Diana Davis
|
L
|
Bob Johnson
|
D
|
Gary Hua
|
W
|
Charlie Brown
|
1600, 1600, 1550, 1794, 1700
|
# row_spec(0, bold = TRUE, background = "#f8f9fa") # Style header row: bold text and light background
Step 3: Calculate Total Points from Round Results:
for each W=win pt=1, for each D=Draw pt=0.5, for each L=Lost pt=0
aded in this list
processed_data <- players_data %>%
mutate(
TotalPoints = (
(R1_Result == "W") + (R2_Result == "W") + (R3_Result == "W") +
(R4_Result == "W") + (R5_Result == "W") +
(R1_Result == "D")*0.5 + (R2_Result == "D")*0.5 +
(R3_Result == "D")*0.5 + (R4_Result == "D")*0.5 +
(R5_Result == "D")*0.5
)
)
# Display the data with calculated points
processed_data %>%
kable(format = "html", escape = FALSE) %>% # Ensure HTML format and escape special characters if needed
kable_styling(
bootstrap_options = c("bordered", "striped", "condensed"), # Add borders, striped rows, and compact spacing
position = "center", # Center the table on the page :cite[1]
font_size = 12, # Adjust font size for better readability
)
|
Player
|
State
|
PreRating
|
R1_Result
|
R1_Opponent
|
R2_Result
|
R2_Opponent
|
R3_Result
|
R3_Opponent
|
R4_Result
|
R4_Opponent
|
R5_Result
|
R5_Opponent
|
OpponentRatings
|
TotalPoints
|
|
Gary Hua
|
ON
|
1794
|
W
|
Alice Smith
|
W
|
Bob Johnson
|
D
|
Charlie Brown
|
W
|
Diana Davis
|
W
|
Charlie Brown
|
1650, 1550, 1700, 1600, 1700
|
4.5
|
|
Alice Smith
|
CA
|
1650
|
L
|
Gary Hua
|
W
|
Charlie Brown
|
W
|
Diana Davis
|
D
|
Bob Johnson
|
W
|
Diana Davis
|
1794, 1700, 1600, 1550, 1600
|
3.5
|
|
Bob Johnson
|
TX
|
1550
|
D
|
Charlie Brown
|
L
|
Gary Hua
|
W
|
Alice Smith
|
L
|
Alice Smith
|
D
|
Bob Johnson
|
1794, 1794, 1650, 1650, 1550
|
2.0
|
|
Charlie Brown
|
NY
|
1700
|
W
|
Bob Johnson
|
D
|
Alice Smith
|
L
|
Gary Hua
|
W
|
Diana Davis
|
L
|
Alice Smith
|
1550, 1650, 1794, 1600, 1650
|
2.5
|
|
Diana Davis
|
FL
|
1600
|
L
|
Diana Davis
|
W
|
Diana Davis
|
L
|
Bob Johnson
|
D
|
Gary Hua
|
W
|
Charlie Brown
|
1600, 1600, 1550, 1794, 1700
|
2.5
|
Step 4: Calculate Average Opponent Rating:
AvgOpponentRating=sum(OpponentRatings)/total_number_of_rounds
processed_data <- processed_data %>%
rowwise() %>%
mutate(
OpponentList = strsplit(OpponentRatings, ","),
AvgOpponentRating = mean(as.numeric(trimws(OpponentList)), na.rm = TRUE)
)
# Display the data with average opponent rating
processed_data_display <- processed_data %>%
select(Player, State, PreRating, TotalPoints, OpponentList, AvgOpponentRating)
processed_data_display %>%
kable(format = "html", escape = FALSE) %>% # Ensure HTML format and escape special characters if needed
kable_styling(
bootstrap_options = c("bordered", "striped", "condensed"), # Add borders, striped rows, and compact spacing
position = "center", # Center the table on the page :cite[1]
font_size = 12, # Adjust font size for better readability
)
|
Player
|
State
|
PreRating
|
TotalPoints
|
OpponentList
|
AvgOpponentRating
|
|
Gary Hua
|
ON
|
1794
|
4.5
|
1650 , 1550, 1700, 1600, 1700
|
1640.0
|
|
Alice Smith
|
CA
|
1650
|
3.5
|
1794 , 1700, 1600, 1550, 1600
|
1648.8
|
|
Bob Johnson
|
TX
|
1550
|
2.0
|
1794 , 1794, 1650, 1650, 1550
|
1687.6
|
|
Charlie Brown
|
NY
|
1700
|
2.5
|
1550 , 1650, 1794, 1600, 1650
|
1648.8
|
|
Diana Davis
|
FL
|
1600
|
2.5
|
1600 , 1600, 1550, 1794, 1700
|
1648.8
|
Step 5: Select Final Columns and Save to CSV
final_data <- processed_data %>%
select(Player, State, TotalPoints, PreRating, AvgOpponentRating,
R1_Result, R1_Opponent, R2_Result, R2_Opponent,
R3_Result, R3_Opponent, R4_Result, R4_Opponent,
R5_Result, R5_Opponent)
# Save as CSV
write.csv(final_data, "player_stats_with_rounds_and_opponents.csv", row.names = FALSE)
Step 6: Read and Verify the CSV File
# Read the CSV file to verify it was saved correctly
saved_data <- read.csv("player_stats_with_rounds_and_opponents.csv")
Step 7: Create a Summary of Results by Player
player_summary <- saved_data %>%
select(Player, State, PreRating, TotalPoints, AvgOpponentRating)
# Display player summary
player_summary %>%
kable(format = "html", escape = FALSE) %>% # Ensure HTML format and escape special characters if needed
kable_styling(
bootstrap_options = c("bordered", "striped", "condensed"), # Add borders, striped rows, and compact spacing
position = "center", # Center the table on the page :cite[1]
font_size = 12, # Adjust font size for better readability
)
|
Player
|
State
|
PreRating
|
TotalPoints
|
AvgOpponentRating
|
|
Gary Hua
|
ON
|
1794
|
4.5
|
1640.0
|
|
Alice Smith
|
CA
|
1650
|
3.5
|
1648.8
|
|
Bob Johnson
|
TX
|
1550
|
2.0
|
1687.6
|
|
Charlie Brown
|
NY
|
1700
|
2.5
|
1648.8
|
|
Diana Davis
|
FL
|
1600
|
2.5
|
1648.8
|
Step 8: Create a Detailed Round-by-Round View
# Create a detailed round-by-round view
round_details <- saved_data %>%
select(Player, starts_with("R")) %>%
pivot_longer(
cols = -Player,
names_to = c("Round", ".value"),
names_sep = "_"
) %>%
arrange(Player, Round)
# Display round details
round_details %>%
kable(format = "html", escape = FALSE) %>% # Ensure HTML format and escape special characters if needed
kable_styling(
bootstrap_options = c("bordered", "striped", "condensed"), # Add borders, striped rows, and compact spacing
position = "center", # Center the table on the page :cite[1]
font_size = 12, # Adjust font size for better readability
)
|
Player
|
Round
|
Result
|
Opponent
|
|
Alice Smith
|
R1
|
L
|
Gary Hua
|
|
Alice Smith
|
R2
|
W
|
Charlie Brown
|
|
Alice Smith
|
R3
|
W
|
Diana Davis
|
|
Alice Smith
|
R4
|
D
|
Bob Johnson
|
|
Alice Smith
|
R5
|
W
|
Diana Davis
|
|
Bob Johnson
|
R1
|
D
|
Charlie Brown
|
|
Bob Johnson
|
R2
|
L
|
Gary Hua
|
|
Bob Johnson
|
R3
|
W
|
Alice Smith
|
|
Bob Johnson
|
R4
|
L
|
Alice Smith
|
|
Bob Johnson
|
R5
|
D
|
Bob Johnson
|
|
Charlie Brown
|
R1
|
W
|
Bob Johnson
|
|
Charlie Brown
|
R2
|
D
|
Alice Smith
|
|
Charlie Brown
|
R3
|
L
|
Gary Hua
|
|
Charlie Brown
|
R4
|
W
|
Diana Davis
|
|
Charlie Brown
|
R5
|
L
|
Alice Smith
|
|
Diana Davis
|
R1
|
L
|
Diana Davis
|
|
Diana Davis
|
R2
|
W
|
Diana Davis
|
|
Diana Davis
|
R3
|
L
|
Bob Johnson
|
|
Diana Davis
|
R4
|
D
|
Gary Hua
|
|
Diana Davis
|
R5
|
W
|
Charlie Brown
|
|
Gary Hua
|
R1
|
W
|
Alice Smith
|
|
Gary Hua
|
R2
|
W
|
Bob Johnson
|
|
Gary Hua
|
R3
|
D
|
Charlie Brown
|
|
Gary Hua
|
R4
|
W
|
Diana Davis
|
|
Gary Hua
|
R5
|
W
|
Charlie Brown
|
Plot Players by Total Points (Highest to Lowest)
# Read the data
player_data <- read.csv("player_stats_with_rounds_and_opponents.csv")
# Order players by total points (highest to lowest)
player_data_ordered <- player_data %>%
arrange(desc(TotalPoints)) %>%
mutate(Player = fct_reorder(Player, TotalPoints))
# Create the plot
ggplot(player_data_ordered, aes(x = Player, y = TotalPoints, fill = TotalPoints)) +
geom_bar(stat = "identity") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(title = "Player Performance by Total Points",
subtitle = "Ordered from highest to lowest points",
x = "Player",
y = "Total Points",
fill = "Points") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = TotalPoints), vjust = -0.3, size = 3.5)

Plot Comparing Player Pre-Rating vs Average Opponent Rating
# Read the data
player_data <- read.csv("player_stats_with_rounds_and_opponents.csv")
# Prepare data for plotting
rating_comparison <- player_data %>%
select(Player, PreRating, AvgOpponentRating) %>%
pivot_longer(cols = c(PreRating, AvgOpponentRating),
names_to = "RatingType",
values_to = "Rating") %>%
mutate(RatingType = factor(RatingType,
levels = c("PreRating", "AvgOpponentRating"),
labels = c("Player's Rating", "Avg Opponent Rating")))
# Order players by their pre-rating (highest to lowest)
player_order <- player_data %>%
arrange(desc(PreRating)) %>%
pull(Player)
rating_comparison <- rating_comparison %>%
mutate(Player = factor(Player, levels = player_order))
# Create the comparison plot
ggplot(rating_comparison, aes(x = Player, y = Rating, fill = RatingType)) +
geom_bar(stat = "identity", position = "dodge", alpha = 0.8) +
scale_fill_manual(values = c("Player's Rating" = "steelblue",
"Avg Opponent Rating" = "darkorange")) +
labs(title = "Comparison of Player Ratings vs Average Opponent Ratings",
subtitle = "Ordered by player rating (highest to lowest)",
x = "Player",
y = "Rating",
fill = "Rating Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top") +
geom_text(aes(label = Rating, group = RatingType),
position = position_dodge(width = 0.9),
vjust = -0.5, size = 3)
