#load required libraries
library(RCurl)
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
chesstxt <- getURL("https://raw.githubusercontent.com/Shayaeng/Data607/main/Project1/chess.txt")
chess_raw <- readLines(textConnection(chesstxt))
chess_filtered <- c()
#First, let's get rid of those '---' lines
for (line in chess_raw) {
if (!grepl("^-+$", line)) {
chess_filtered <- c(chess_filtered, line)
}
}
#Now we'll remove the first two lines
chess_filtered <- chess_filtered[-(1:2)]
#Next, let's combine each of the two lines per record
chess_combined <- c()
for (i in seq(1, length(chess_filtered), by = 2)) {
bottom_part <- chess_filtered[i]
top_part <- chess_filtered[i + 1]
combined_line <- paste(bottom_part, top_part)
chess_combined <- c(chess_combined, combined_line)
}
#for (line in chess_combined) {
# print(line)
#}
#split the lines by their records
chess_split <- c()
for (line in chess_combined) {
split_line <- strsplit(line, split = '[/]|->|\\|')
trimmed_line <- lapply(split_line, str_trim)
chess_split <- c(chess_split, trimmed_line)
}
#convert to data frame
chess_df <- data.frame(do.call(rbind, chess_split))
colnames(chess_df) <- c('PairId','PlayerName', 'Points', 'Round1', 'Round2', 'Round3', 'Round4', 'Round5', 'Round6', 'Round7', 'State', 'ChessId', 'PreRating', 'PostRating', '?', 'Result1', 'Result2', 'Result3', 'Result4', 'Result5', 'Result6', 'Result7')
chess_df$PreRating <- sub("^[^[:space:]]*\\s*", "", chess_df$PreRating)
#subset relevant columns
subset_df <- chess_df[, c('PairId','PlayerName', 'State', 'Points', 'Round1', 'Round2', 'Round3', 'Round4', 'Round5', 'Round6', 'Round7', 'PreRating')]
#trim the extra characters
subset_df$PreRating <- sub("P.*", "", subset_df$PreRating)
subset_df[c('Round1', 'Round2','Round3', 'Round4', 'Round5', 'Round6', 'Round7')] <- lapply(subset_df[c('Round1', 'Round2','Round3', 'Round4', 'Round5', 'Round6', 'Round7')], function(x) sub("^[^[:space:]]*\\s*", "", x))
#convert the columns
subset_df[c('PairId', 'Round1', 'Round2', 'Round3', 'Round4', 'Round5', 'Round6', 'Round7')] <- lapply(subset_df[c('PairId', 'Round1', 'Round2', 'Round3', 'Round4', 'Round5', 'Round6', 'Round7')], as.integer)
subset_df[c('Points', 'PreRating')] <- lapply(subset_df[c('Points', 'PreRating')], as.numeric)
#add opponents rating
subset_df <- subset_df %>%
left_join(subset_df[, c('PairId', 'PreRating')], by = c('Round1' = 'PairId')) %>%
rename(OppRound1 = PreRating.y, PreRating = PreRating.x) %>%
left_join(subset_df[, c('PairId', 'PreRating')], by = c('Round2' = 'PairId')) %>%
rename(OppRound2 = PreRating.y, PreRating = PreRating.x) %>%
left_join(subset_df[, c('PairId', 'PreRating')], by = c('Round3' = 'PairId')) %>%
rename(OppRound3 = PreRating.y, PreRating = PreRating.x) %>%
left_join(subset_df[, c('PairId', 'PreRating')], by = c('Round4' = 'PairId')) %>%
rename(OppRound4 = PreRating.y, PreRating = PreRating.x) %>%
left_join(subset_df[, c('PairId', 'PreRating')], by = c('Round5' = 'PairId')) %>%
rename(OppRound5 = PreRating.y, PreRating = PreRating.x) %>%
left_join(subset_df[, c('PairId', 'PreRating')], by = c('Round6' = 'PairId')) %>%
rename(OppRound6 = PreRating.y, PreRating = PreRating.x) %>%
left_join(subset_df[, c('PairId', 'PreRating')], by = c('Round7' = 'PairId')) %>%
rename(OppRound7 = PreRating.y, PreRating = PreRating.x)
#add average rating
subset_df <- subset_df %>%
rowwise() %>%
mutate(OppAverage = round(mean(c(`OppRound1`, `OppRound2`, `OppRound3`, `OppRound4`, `OppRound5`, `OppRound6`, `OppRound7`), na.rm = TRUE)))
#select and reorder columns
chess_final <- subset_df[, c('PlayerName', 'State', 'Points', 'PreRating', 'OppAverage')]
#create the .csv file
write.csv(chess_final, file = "chess_final.csv", row.names = FALSE)