Project overview

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

#Reading the file
tournament = readLines('tournamentinfo.txt')

Below we read the file and parse it to extract the data we need. I decided to run 2 regex statements one per line and have 2 data frames and combine them in the end to get the data I need.

#Regex to read the 1st line starting with Player number.
t=unlist(str_match(tournament, "(\\d+)\\s\\|\\s*(\\w+\\s\\w+\\s?\\w*?\\W?\\s?\\w*?)\\s*\\|(\\d\\.\\d)\\s+\\|[A-Z]\\s+(\\d*)\\|[A-Z]\\s+(\\d*)\\|[A-Z]\\s+(\\d*)\\|[A-Z]\\s+(\\d*)\\|[A-Z]\\s+(\\d*)\\|[A-Z]\\s+(\\d*)\\|[A-Z]\\s+(\\d*)\\|"))
#Regex to read 2nd line starting with State
t1=unlist(str_match(tournament, "([A-Z]{2}).*\\:\\s+(\\d*)\\w?.*\\s"))

#Getting rid of lines with NA
t=t[!(rowSums(is.na(t))),]
t1=t1[!(rowSums(is.na(t1))),]

#stringsAsFactors = FALSE
t=data.frame(t, stringsAsFactors = FALSE)
t1=data.frame(t1, stringsAsFactors = FALSE)

#Removing the first column
t$X1 <- NULL
t1$X1 <- NULL

#Nameing the dataframe columns
names(t) = c("pnum","name","points","game1","game2","game3","game4","game5","game6","game7")
names(t1) =c("state","prescore")
head(t)
##   pnum                name points game1 game2 game3 game4 game5 game6
## 1    1         GARY HUA       6.0    39    21    18    14     7    12
## 2    2  DAKSHESH DARURI       6.0    63    58     4    17    16    20
## 3    3     ADITYA BAJAJ       6.0     8    61    25    21    11    13
## 4    4 PATRICK H SCHILLING    5.5    23    28     2    26     5    19
## 5    5       HANSHI ZUO       5.5    45    37    12    13     4    14
## 6    6      HANSEN SONG       5.0    34    29    11    35    10    27
##   game7
## 1     4
## 2     7
## 3    12
## 4     1
## 5    17
## 6    21
head(t1)
##   state prescore
## 1    ON     1794
## 2    MI     1553
## 3    MI     1384
## 4    MI     1716
## 5    MI     1655
## 6    OH     1686

Next we convert the data into numeric data type in order to perform calculations and have a for loop that goes through each player and calculates the average opponents pregame score.

#converting to numeric
t$game1=as.numeric(as.character(t$game1))
t$game2=as.numeric(as.character(t$game2))
t$game3=as.numeric(as.character(t$game3))
t$game4=as.numeric(as.character(t$game4))
t$game5=as.numeric(as.character(t$game5))
t$game6=as.numeric(as.character(t$game6))
t$game7=as.numeric(as.character(t$game7))
t1$prescore=as.numeric(as.character(t1$prescore))

#looping through and calculating the avg.
for (x in 1:64)
{
  t$mean[x]=round(mean(c(t1[t[x,4],2],t1[t[x,5],2],t1[t[x,6],2],t1[t[x,7],2],t1[t[x,8],2],t1[t[x,9],2],t1[t[x,10],2]), na.rm=TRUE), digits=0)
}

Next we create the final data frame which will hold all the required columns from 2 data frames and then we save the dataframe into a file called tournament_scores.csv and then read the fila and display it.

#Creating the final dataframe and naming tghe columns
final = cbind.data.frame(t$name,t1$state,t$points,t1$prescore,t$mean)
names(final) =c("Player_Name","State","Points","Pre_score","AvgPnt_score")

#Trimming the trailing whitespace
final$Player_Name=trimws(final$Player_Name)

#Write, read and display the file.
write.csv(final, "tournament_scores.csv", row.names=FALSE)
readfile = read.csv("tournament_scores.csv")
head(readfile)
##           Player_Name State Points Pre_score AvgPnt_score
## 1            GARY HUA    ON    6.0      1794         1605
## 2     DAKSHESH DARURI    MI    6.0      1553         1469
## 3        ADITYA BAJAJ    MI    6.0      1384         1564
## 4 PATRICK H SCHILLING    MI    5.5      1716         1574
## 5          HANSHI ZUO    MI    5.5      1655         1501
## 6         HANSEN SONG    OH    5.0      1686         1519