In this project, you’re given a text file with chess tournament
results where the information has some structure. Your job is to create
an R Markdown file that generates a .CSV file (that could for example be
imported into a SQL database).
library(tidyverse)
url <- "https://raw.githubusercontent.com/D-hartog/DATA607/main/PROJECT1/chess.txt"
chess_txt <- read.csv("https://raw.githubusercontent.com/D-hartog/DATA607/main/PROJECT1/chess.txt")
head(chess_txt, 9)
Use the seperate_wider_delim to separate each column in the top_row
by “|” to create new columns with the player’s name and total
points.
data <- data %>%
separate_wider_delim(
top_row,
delim = "|",
names = c("num", "player_name", "total_points"),
too_many = "debug"
)
data[,c("num","top_row", "top_row_ok", "top_row_pieces")] <- NULL
Repeat the above to the bottom_row column to get the state and
pre-rank for each palyer. Remove uneccessary columns.
data <- data %>%
separate_wider_delim(
bottom_row,
delim = "|",
names = c("state", "pre_rank"),
too_many = "debug"
)
data[, c("bottom_row","bottom_row_ok", "bottom_row_remainder", "bottom_row_pieces")] <- NULL
head(data)
Extract the pre-rank for each player from the pre-rank column using
a combination of a regular expression to parse the numbers and a for
loop to grab the pre - rank number.
rank_match_list <- str_match_all(data$pre_rank, "\\d{3,4}")
rank_match_list[[1]]
## [,1]
## [1,] "1544"
## [2,] "5895"
## [3,] "1794"
## [4,] "1817"
#The player rank is the third number in each of the 64 lists
p_rank <- c()
for (i in 1:64){
player_pr <- rank_match_list[[i]][3,1]
p_rank <- append(p_rank, player_pr)
}
#Assign the p_rank vector back into the pre-rank column
data$pre_rank <- p_rank
head(data)
Finding the average pre-ranks of each players opponents
# Parse out the opponents from the "top_row_remainder" column
opponents_list <- str_match_all(data$top_row_remainder, "\\d{1,2}")
# Loop through the list of opponents and grab each pre-ranking to calculate the mean
opp_pre_rank <- c()
for(i in c(1:64)){
opponents <- opponents_list[[i]]
opponents_pr <- data[opponents[1:length(opponents),1],"pre_rank"]
opponents_pr_num <- as.numeric(unlist(opponents_pr))
avg_opp_rank <- mean(opponents_pr_num)
opp_pre_rank <- append(opp_pre_rank,avg_opp_rank)
}
# Round the vector with all the mean opponenet rankings and set it to a new column and drop the top_row_remainder
data$opponent_avgpr <- round(opp_pre_rank)
data$top_row_remainder <- NULL
head(data)
Re-order the columns, clean them and convert the data types to
numeric where appropriate
data <- data[,c(1,3,2,4,5)]
data$player_name <- str_trim(data$player_name)
data$state <- str_trim(data$state)
data$total_points <- str_trim(data$total_points)
data$total_points <- as.numeric(data$total_points)
data$pre_rank <- as.numeric(data$pre_rank)
data