Load Package

  1. Read all lines
  2. Remove lines arent required, like — lines
  3. Split the lines into two lists. One for the rounds info and another with pregame and state info
  4. Using regular expression parse all required items and store them in indiviudal lists. All individual lists must have 64 items, one per player. I am showing length of the lists to make sure all lists are of the same size.
  5. Use the iniviudal lists to create a data frame and write that to the file.
require(stringr)
## Loading required package: stringr

Load data and clean up

#Load data from the file using readlines. 
raw_file_lines <- readLines('tournamentinfo.txt', warn = F)

#Remove anything that are not letters and numbers,  like "----"
file_lines <- subset(raw_file_lines, str_detect(raw_file_lines,'[[:alnum:]]'))

Data Summary

#Summary of the original file. Orginal file contains 196 lines including "---"lines.
summary(raw_file_lines)
##    Length     Class      Mode 
##       196 character character
#Summary of the lines after removing "----" lines
summary(file_lines)
##    Length     Class      Mode 
##       130 character character

Parse lines

#Split and store the lines into two different lists statrting at line 4 and 3. 
#Player row contains rounds information
player_row <-str_split(file_lines[seq(3, length(file_lines), by=2)], '[|]')
head(player_row, n=1)
## [[1]]
##  [1] "    1 "                           
##  [2] " GARY HUA                        "
##  [3] "6.0  "                            
##  [4] "W  39"                            
##  [5] "W  21"                            
##  [6] "W  18"                            
##  [7] "W  14"                            
##  [8] "W   7"                            
##  [9] "D  12"                            
## [10] "D   4"                            
## [11] ""
#Ratings row contains all pre game player ratings and state
ratings_row <-str_split(file_lines[seq(4, length(file_lines), by=2)], '[|]')
head(ratings_row, n = 1)
## [[1]]
##  [1] "   ON "                           
##  [2] " 15445895 / R: 1794   ->1817     "
##  [3] "N:2  "                            
##  [4] "W    "                            
##  [5] "B    "                            
##  [6] "W    "                            
##  [7] "B    "                            
##  [8] "W    "                            
##  [9] "B    "                            
## [10] "W    "                            
## [11] ""
#Both lists are of the same size.
length(ratings_row)
## [1] 64
length(player_row)
## [1] 64

Player names

#Get the player names from the second column.
names <-unlist(lapply(player_row, function(x)str_trim(x[2])))
head(names)
## [1] "GARY HUA"            "DAKSHESH DARURI"     "ADITYA BAJAJ"       
## [4] "PATRICK H SCHILLING" "HANSHI ZUO"          "HANSEN SONG"
# Number of players
length(names)
## [1] 64

Player total points

# Get the total points for the player. 
player_points <- as.numeric(unlist(lapply(player_row, function(x)x[3])))

head(player_points)
## [1] 6.0 6.0 6.0 5.5 5.5 5.0
length(player_points)
## [1] 64

Player states

#Get player state.
state <- unlist(lapply(ratings_row, function(x)str_trim(x[1])))

head(state)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
length(state)
## [1] 64

Player pre ratings

# Second colums contains the pre ratings
ratings_col <-  unlist(lapply(ratings_row, function(x)x[2]))

head(ratings_col)
## [1] " 15445895 / R: 1794   ->1817     " " 14598900 / R: 1553   ->1663     "
## [3] " 14959604 / R: 1384   ->1640     " " 12616049 / R: 1716   ->1744     "
## [5] " 14601533 / R: 1655   ->1690     " " 15055204 / R: 1686   ->1687     "
length(ratings_col)
## [1] 64
#Get the pre-rating using regular expression.
player_ratings<-unlist(lapply(ratings_col, function(x) as.numeric(str_extract( str_extract(x,":\\s*\\d{1,}"),"\\d{1,}"))))

head(player_ratings)
## [1] 1794 1553 1384 1716 1655 1686
length(player_ratings)
## [1] 64

Opponents rating

# Get the rounds player played from column 4 to 10.
player_rounds <- lapply(player_row, function(x)x[c(4:10)])
head(player_rounds, n = 1)
## [[1]]
## [1] "W  39" "W  21" "W  18" "W  14" "W   7" "D  12" "D   4"
length(player_rounds)
## [1] 64
#Get the oppoents Id using regular experession
opponents<-lapply(player_rounds, function(x)unlist(str_extract_all(x,'\\d{1,}')))
head(opponents , n =1)
## [[1]]
## [1] "39" "21" "18" "14" "7"  "12" "4"
length(opponents)
## [1] 64
#Calculate opponents pre rating.
opponents_ratings <- vapply(opponents, 
                            function(x) round(sum(player_ratings[ as.numeric( unlist(x))])/length(x)),FUN.VALUE = c(0)) 

head(opponents_ratings, n =1)
## [1] 1605
length(opponents_ratings)
## [1] 64

Create a data frame and view summary

df <-data.frame(names, state,player_points,player_ratings, opponents_ratings)
names(df)<-c('Name','State','Total Points','Pre-Rating','Average Pre-Rating of Opponents')
nrow(df)
## [1] 64
head(df)
##                  Name State Total Points Pre-Rating
## 1            GARY HUA    ON          6.0       1794
## 2     DAKSHESH DARURI    MI          6.0       1553
## 3        ADITYA BAJAJ    MI          6.0       1384
## 4 PATRICK H SCHILLING    MI          5.5       1716
## 5          HANSHI ZUO    MI          5.5       1655
## 6         HANSEN SONG    OH          5.0       1686
##   Average Pre-Rating of Opponents
## 1                            1605
## 2                            1469
## 3                            1564
## 4                            1574
## 5                            1501
## 6                            1519

Write to file

write.table(df,'output.txt',   quote = FALSE, sep = ',', row.names = FALSE)

Read the file

data <-read.csv('output.txt')

head(data, n =10)
##                   Name State Total.Points Pre.Rating
## 1             GARY HUA    ON          6.0       1794
## 2      DAKSHESH DARURI    MI          6.0       1553
## 3         ADITYA BAJAJ    MI          6.0       1384
## 4  PATRICK H SCHILLING    MI          5.5       1716
## 5           HANSHI ZUO    MI          5.5       1655
## 6          HANSEN SONG    OH          5.0       1686
## 7    GARY DEE SWATHELL    MI          5.0       1649
## 8     EZEKIEL HOUGHTON    MI          5.0       1641
## 9          STEFANO LEE    ON          5.0       1411
## 10           ANVIT RAO    MI          5.0       1365
##    Average.Pre.Rating.of.Opponents
## 1                             1605
## 2                             1469
## 3                             1564
## 4                             1574
## 5                             1501
## 6                             1519
## 7                             1372
## 8                             1468
## 9                             1523
## 10                            1554
nrow(data)
## [1] 64