Project 1

In this project, I parce a text file with chess tournament results where the information has some structure. My job is to create an R Markdown file that generates a .CSV file

Load libraries

library(readr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v dplyr   1.0.4
## v tibble  3.0.6     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Read file into memory

#fln = read_file('https://raw.githubusercontent.com/dburtsev/CUNYR/master/tournamentinfo.txt',locale=default_locale())
strLines = readLines('https://raw.githubusercontent.com/dburtsev/CUNYR/master/tournamentinfo.txt',-1, warn = FALSE)

Parse text into data frame

# Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
# Gary Hua, ON, 6.0, 1794, 1605
df = data.frame(Num = integer(), PlayerName = character(), PlayerState = character(), Points = double(), PreRating = integer(), AverageRating = integer(), opp1 = integer(), opp2 = integer(), opp3 = integer(), opp4 = integer(), opp5 = integer(), opp6 = integer(), opp7 = integer(), Rating=integer(),  stringsAsFactors=FALSE)
ln = length(strLines)
Num = as.integer(0)
PlayerName = ''
PlayerState = ''
Points = 0.0
PreRating = 0
Rating = 0
AverageRating = as.integer(0)
opp1 = as.integer(0)
opp2 = as.integer(0)
opp3 = as.integer(0)
opp4 = as.integer(0)
opp5 = as.integer(0)
opp6 = as.integer(0)
opp7 = as.integer(0)
b = TRUE

for (i in 5:ln) {
  if (startsWith(strLines[i], '-' )) {
    next
  }
  else {
    if (b) {
      b = FALSE
    }
    else {
      dftmp <- data.frame(col = (paste(strLines[i - 1], strLines[i]))) %>% separate(col, into = paste('Col', 1:21, sep = ""),remove = T, "\\|")
      #print(dftmp)
      Num = as.integer(str_trim(dftmp$Col1))
      PlayerName = str_trim(dftmp$Col2)
      PlayerState = str_trim(dftmp$Col11)
      Points = as.numeric(str_trim(dftmp$Col3))
      PreRating = as.integer(str_trim(substring(dftmp$Col12, 15,19)))
      Rating = as.integer(str_trim(substring(dftmp$Col12, 25,28)))
      opp1 = as.integer(str_trim(substring(dftmp$Col4, 4,5)))
      opp2 = as.integer(str_trim(substring(dftmp$Col5, 4,5)))
      opp3 = as.integer(str_trim(substring(dftmp$Col6, 4,5)))
      opp4 = as.integer(str_trim(substring(dftmp$Col7, 4,5)))
      opp5 = as.integer(str_trim(substring(dftmp$Col8, 4,5)))
      opp6 = as.integer(str_trim(substring(dftmp$Col9, 4,5)))
      opp7 = as.integer(str_trim(substring(dftmp$Col10, 4,5)))
      df[nrow(df) + 1,] = c(Num,PlayerName, PlayerState, Points, PreRating, AverageRating,opp1,opp2,opp3,opp4,opp5,opp6,opp7,Rating)
      b = TRUE
    }
  }
}
df[is.na(df)] = 0

Calculate Average Pre Chess Rating

ln = nrow(df)
n = as.integer(0)
correction = as.integer(0)
for (i in 1:ln) {
  
  if(df[i,]$opp1 == 0) 
  {pr1 = n 
  correction = correction + 1} 
  else {pr1 = as.integer(df[df[i,]$opp1,]$PreRating)}
  if(df[i,]$opp2 == 0) {pr2 = n
  correction = correction + 1} 
  else {pr2 = as.integer(df[df[i,]$opp2,]$PreRating)}
  if(df[i,]$opp3 == 0) 
  {pr3 = n
    correction = correction + 1} else {pr3 = as.integer(df[df[i,]$opp3,]$PreRating)}
  if(df[i,]$opp4 == 0) {pr4 = n
  correction = correction + 1} else {pr4 = as.integer(df[df[i,]$opp4,]$PreRating)}
  if(df[i,]$opp5 == 0) {pr5 = n
  correction = correction + 1} else {pr5 = as.integer(df[df[i,]$opp5,]$PreRating)}
  if(df[i,]$opp6 == 0) {pr6 = n
  correction = correction + 1} else {pr6 = as.integer(df[df[i,]$opp6,]$PreRating)}
  if(df[i,]$opp7 == 0) {pr7 = n
  correction = correction + 1} else {pr7 = as.integer(df[df[i,]$opp7,]$PreRating)}

  df[i,]$AverageRating = (pr1 + pr2 + pr3 + pr4 + pr5 + pr6 + pr7) %/% (7 - correction)
  correction = as.integer(0)
}

Which player scored the most points relative to his or her expected result?

df["MP"] = as.integer(df$Rating) - as.integer(df$PreRating)
x = max(df["MP"])
print(x)
## [1] 699
filter(df, MP == x )$PlayerName
## [1] "JACOB ALEXANDER LAVALLEY"

Save data as csv file

print(getwd())
## [1] "C:/CUNY/DATA607/Project1"
retval = select(df,PlayerName, PlayerState, Points, PreRating, AverageRating)
write.csv(retval,"output.csv",row.names = FALSE)