In this project, you're given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database)with the following information for all of the players: Player's Name, Player's State, Total Number of Points, Player's Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents' ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
Reading data from file
library(stringr)
#directly reading from 3rd line
data <- read.csv("https://raw.githubusercontent.com/chirag-vithlani/607/master/week5/tournamentinfo.txt", skip=3)
#flaten data
flattenData<-unlist(data)
Extract required data
#get Player Names ->3 or more characters followed by space followed by 3 or more characters
PlayerNames<-unlist(str_extract_all(flattenData, "[[:alpha:]]{3,}[:blank:][[:alpha:]]{1,}"))
# getting state data
stateData <- str_trim(unlist(str_extract_all(flattenData, " ON | MI | OH ")))
#getting points
points <- str_trim(unlist(str_extract_all(flattenData, "[:digit:][.][:digit:]")))
# Player's Pre-rating
# getting rating
Pre_performanceRatingPrecededWithR<-str_extract_all(flattenData, "R: *[[:digit:] ]*")
#removing "R"
Pre_performanceRating <- str_replace_all(str_trim(unlist(Pre_performanceRatingPrecededWithR)), "R: ", "")
Getting “average Pre Chess Rating of Opponents”
#to get the opposite player numbers, first we need to get row which cotains this information
rows_that_contains_opp_player_data=str_sub(flattenData, start = 48, end = 89)
filtered_opp_player_rows<- rows_that_contains_opp_player_data[c(seq(1, length(flattenData), by = 3))]
#Extract matching patterns from a string
#filter the number which are either one digit or two digit
opp_rating_String_raw_wise <- str_extract_all(filtered_opp_player_rows,"[[:digit:]]{1,2}")
opp_rating_as_continuous_numeric <- as.numeric(unlist(opp_rating_String_raw_wise))
#get opposition player rating using position
opp_player_pre_rating <- as.numeric(Pre_performanceRating[opp_rating_as_continuous_numeric])
#creates a matrix from the given set of values
#the desired number of rows.
#the desired number of columns.
opp_rating_matrix <- matrix(opp_player_pre_rating, nrow=7, ncol=64)
## Warning in matrix(opp_player_pre_rating, nrow = 7, ncol = 64): data length
## [408] is not a sub-multiple or multiple of the number of rows [7]
#na.rm=>logical. Should missing values (including NaN) be omitted from the calculations?
#Form column sums
opp_rating_avg <- colMeans(opp_rating_matrix, na.rm=TRUE)
Write to output file
# Data Frame
output<-data.frame(PlayerNames, stateData, points, Pre_performanceRating, opp_rating_avg)
write.csv(output, file = "./PlayerData.csv",row.names=FALSE)