Loading required libraries
library(stringr)
library(dplyr)
library(readr)
library(knitr)
Load Required Libraries
library(stringr) # For string manipulation
library(dplyr) # For data manipulation
library(readr) # For reading/writing files
library(knitr) # For nice table display
Read and Parse the Chess Data
# Read the text file
chess_text <- readLines("chess_data.txt", warn = FALSE)
# Remove header lines and separators (lines with dashes)
clean_data <- chess_text[!grepl("^-+", chess_text) & chess_text != ""]
# Remove any remaining empty lines
clean_data <- clean_data[nzchar(trimws(clean_data))]
cat("Total lines in file:", length(chess_text), "\n")
## Total lines in file: 196
cat("Clean data lines:", length(clean_data), "\n")
## Clean data lines: 130
Calculate Average Opponent Ratings
# Calculate average opponent ratings
avg_opponent_ratings <- c()
for (i in 1:length(player_names)) {
opponents <- all_opponents[[i]]
if (length(opponents) > 0) {
# Look up opponent ratings
opponent_ratings <- c()
for (opp_num in opponents) {
if (opp_num <= length(pre_ratings) && !is.na(pre_ratings[opp_num])) {
opponent_ratings <- c(opponent_ratings, pre_ratings[opp_num])
}
}
# Calculate average
if (length(opponent_ratings) > 0) {
avg_opponent_ratings <- c(avg_opponent_ratings, round(mean(opponent_ratings), 0))
} else {
avg_opponent_ratings <- c(avg_opponent_ratings, NA)
}
} else {
avg_opponent_ratings <- c(avg_opponent_ratings, NA)
}
}
Making the Final Dataset
# Create dataframe with extracted data
chess_results <- data.frame(
Player_Name = player_names,
State = states,
Total_Points = total_points,
Pre_Rating = pre_ratings,
Avg_Opponent_Rating = avg_opponent_ratings,
stringsAsFactors = FALSE
)
# Display results
kable(head(chess_results, 10), caption = "First 10 Players")
First 10 Players
Player Name |
Num |
NA |
NA |
NA |
GARY HUA |
ON |
6.0 |
1794 |
1585 |
DAKSHESH DARURI |
MI |
6.0 |
1553 |
1440 |
ADITYA BAJAJ |
MI |
6.0 |
1384 |
1454 |
PATRICK H SCHILLING |
MI |
5.5 |
1716 |
1660 |
HANSHI ZUO |
MI |
5.5 |
1655 |
1512 |
HANSEN SONG |
OH |
5.0 |
1686 |
1472 |
GARY DEE SWATHELL |
MI |
5.0 |
1649 |
1476 |
EZEKIEL HOUGHTON |
MI |
5.0 |
1641 |
1412 |
STEFANO LEE |
ON |
5.0 |
1411 |
1488 |
# Show summary statistics
cat("\nSummary Statistics:\n")
##
## Summary Statistics:
cat("Number of players:", nrow(chess_results), "\n")
## Number of players: 65
cat("States represented:", length(unique(chess_results$State)), "\n")
## States represented: 4
cat("Average pre-rating:", round(mean(chess_results$Pre_Rating, na.rm = TRUE), 0), "\n")
## Average pre-rating: 1378
cat("Average opponent rating:", round(mean(chess_results$Avg_Opponent_Rating, na.rm = TRUE), 0), "\n")
## Average opponent rating: 1396
Transferring the Data to a .csv
# Write to CSV file
write.csv(chess_results, "chess_tournament_results.csv", row.names = FALSE)
# Verify it worked
if (file.exists("chess_tournament_results.csv")) {
cat("✓ Successfully created chess_tournament_results.csv\n")
# Read back the CSV to verify
test_data <- read.csv("chess_tournament_results.csv")
cat("CSV file contains", nrow(test_data), "rows and", ncol(test_data), "columns\n")
# Show first few rows of CSV
kable(head(test_data, 5), caption = "Verification: First 5 rows of CSV file")
} else {
cat("✗ Error: CSV file was not created\n")
}
## ✓ Successfully created chess_tournament_results.csv
## CSV file contains 65 rows and 5 columns
Verification: First 5 rows of CSV file
Player Name |
Num |
NA |
NA |
NA |
GARY HUA |
ON |
6.0 |
1794 |
1585 |
DAKSHESH DARURI |
MI |
6.0 |
1553 |
1440 |
ADITYA BAJAJ |
MI |
6.0 |
1384 |
1454 |
PATRICK H SCHILLING |
MI |
5.5 |
1716 |
1660 |