In this first section I will convert our txt file to an easier to manipulate data frame. The below image is the current structure of the txt file and the below code will convert it to the desired data frame.

image

library(stringr)
library(sqldf)
library(knitr)
library(tcltk)

tinfo <- file(github.url, open="r" )
chess.data <- readLines(tinfo) # read lines 
chess.data <- data.frame(chess.data, do.call(rbind, strsplit(chess.data, split = "|", fixed = TRUE)))
chess.data <- data.frame(lapply(chess.data, as.character), stringsAsFactors=FALSE)
chess.data <- chess.data[!grepl(".+--.+", chess.data$X1),] # removes "------" lines
chess.data <- chess.data[,-c(1,12)]
combine_rows <- function(dataframe){
  df <- dataframe[0,]  ##create empty dataframe copy
  for (i in colnames(dataframe)){  ## for all columns in data frame 
      for (j in seq(1:length(dataframe[[i]]))){ ## for all rows in dataframe
         df[j,i] <- gsub("\\s+"," ",(paste(dataframe[[i]][j:(j+1)], collapse = ""))) 
         ## gsub removes extra spaces, then collapses two rows 
         }
        } 
return(df) # return the dataframe
        }

chess.data <- (combine_rows(chess.data)) # run function on data frame
colnames(chess.data) <- chess.data[1,] # create column names from first row
chess.data <- chess.data[-(1:2),] # remove header information
index <- !is.na(as.numeric(gsub( " .*$", "",  str_trim(chess.data[,1])))) #gets the part of string, if a numeric value first it converts to True
chess.data <- chess.data[index,] 
chess.data$ID <- str_trim(str_extract(chess.data[,1], "\\s[[0-9]]{1,}"))
chess.data$State <- str_trim(str_extract(chess.data[,1], "\\s[^[0-9]].+"))
chess.data$Name <- str_extract(chess.data[,2], ".[^[0-9]]{1,}")
chess.data$Pre_score <- str_extract(str_extract(chess.data[,2], ":.[[0-9]]{1,}.[^[-]]"), "[[0-9]]{1,}")
chess.data$Total_points <- str_extract(chess.data[,3], "[[0-9]].[[0-9]]\\s")

split.round.columns <- function(dataframe){
                  df.colname <- colnames(dataframe[!is.na(str_extract(colnames(dataframe), "Round.+"))])
                  df <- dataframe[!is.na(str_extract(colnames(dataframe), "Round.+"))]
                  df_prescore <- dataframe[!is.na(str_extract(colnames(dataframe), "^[[ID]]|Pre_score"))]
                  df.1 <- df.2 <- df.3 <- df[0,]  #empty dataframe copies
                  for (i in df.colname){ 
                      for (j in seq(1:length(dataframe[[i]]))){
                        df.1[j,i]  <- str_extract(dataframe[[i]][j], "[[:alpha:]]")
                        df.2[j,i] <- str_extract(dataframe[[i]][j], "[[0-9]]{1,}")
                        df.3[j,i] <- as.numeric(ifelse(!is.na(df.2[j,i]), 
                                                       dataframe$Pre_score[dataframe$ID ==  df.2[j,i]], 
                                                       NA))
                         }
                       } 
                  colnames(df.1) <- str_c(str_trim(df.colname), " Results")
                  colnames(df.2) <- str_c(str_trim(df.colname), " Opponent")
                  colnames(df.3) <- str_c(str_trim(df.colname), " Opponent Prescore")
                 for (t in (seq(1:ncol(df)))){
                      dataframe <- cbind(dataframe, df.1[, t, drop = FALSE])
                      dataframe <- cbind(dataframe, df.2[, t, drop = FALSE])
                      dataframe <- cbind(dataframe, df.3[, t, drop = FALSE])
                      }
return(dataframe) 
}

chess.data <- split.round.columns(chess.data)
chess.data <- chess.data[,-c(1:10)] # remove original columns

x <- t(head(chess.data, n = 4))
colnames(x) <-  x[1, ]
x = x[-1,]
kable(x, align = "c", padding = 4) #check dataframe
1 2 3 4
State ON MI MI MI
Name GARY HUA DAKSHESH DARURI ADITYA BAJAJ PATRICK H SCHILLING
Pre_score 1794 1553 1384 1716
Total_points 6.0 6.0 6.0 5.5
Round 1 Results W W L W
Round 1 Opponent 39 63 8 23
Round 1 Opponent Prescore 1436 1175 1641 1363
Round 2 Results W W W D
Round 2 Opponent 21 58 61 28
Round 2 Opponent Prescore 1563 917 955 1507
Round 3 Results W L W W
Round 3 Opponent 18 4 25 2
Round 3 Opponent Prescore 1600 1716 1745 1553
Round 4 Results W W W W
Round 4 Opponent 14 17 21 26
Round 4 Opponent Prescore 1610 1629 1563 1579
Round 5 Results W W W D
Round 5 Opponent 7 16 11 5
Round 5 Opponent Prescore 1649 1604 1712 1655
Round 6 Results D W W W
Round 6 Opponent 12 20 13 19
Round 6 Opponent Prescore 1663 1595 1666 1564
Round 7 Results D W W D
Round 7 Opponent 4 7 12 1
Round 7 Opponent Prescore 1716 1649 1663 1794

Now we have the data frame in a more appropriate format, we can find the mean of the opponents prescores that each player played in the chess tournament.

get.average.opp.prescore <- function(dataframe){
  df_prescore <- dataframe[!is.na(str_extract(colnames(dataframe), "[[ID]]|Prescore"))]
  df_prescore[] <- lapply(df_prescore, function(columns) as.numeric(columns))
  df_prescore$avg_prescore <- rowMeans(df_prescore[c(2:(ncol(df_prescore)))], na.rm = TRUE)
  dataframe <- df_prescore[c(1, (ncol(df_prescore)))]
  return(dataframe)
}

ID.AverOPscore <- get.average.opp.prescore(chess.data)
kable(head(ID.AverOPscore),  row.names = FALSE, align = "c")
ID avg_prescore
1 1605.286
2 1469.286
3 1563.571
4 1573.571
5 1500.857
6 1518.714

We now have a reference table with the IDs and the average prescore of the opponents played. We just need to look at our converted dataframe and use the reference table to produce the results in the desired format Player's Name, Player's State, Total Number of Points, Player's Pre-Rating, and Average Pre Chess Rating of Opponents.

results <- sqldf("SELECT Name `Player's Name`, State `Player's State`, 
                  Total_points `Total Number of Points`, Pre_score `Player's Pre-Rating`, 
                  avg_prescore `Average Pre Chess Rating of Opponents`
              FROM 'chess.data' C LEFT OUTER JOIN 'ID.AverOPscore' I on C.ID = I.ID")
kable(results, align = "c")
Player’s Name Player’s State Total Number of Points Player’s Pre-Rating Average Pre Chess Rating of Opponents
GARY HUA ON 6.0 1794 1605.286
DAKSHESH DARURI MI 6.0 1553 1469.286
ADITYA BAJAJ MI 6.0 1384 1563.571
PATRICK H SCHILLING MI 5.5 1716 1573.571
HANSHI ZUO MI 5.5 1655 1500.857
HANSEN SONG OH 5.0 1686 1518.714
GARY DEE SWATHELL MI 5.0 1649 1372.143
EZEKIEL HOUGHTON MI 5.0 1641 1468.429
STEFANO LEE ON 5.0 1411 1523.143
ANVIT RAO MI 5.0 1365 1554.143
CAMERON WILLIAM MC LEMAN MI 4.5 1712 1467.571
KENNETH J TACK MI 4.5 1663 1506.167
TORRANCE HENRY JR MI 4.5 1666 1497.857
BRADLEY SHAW MI 4.5 1610 1515.000
ZACHARY JAMES HOUGHTON MI 4.5 1220 1483.857
MIKE NIKITIN MI 4.0 1604 1385.800
RONALD GRZEGORCZYK MI 4.0 1629 1498.571
DAVID SUNDEEN MI 4.0 1600 1480.000
DIPANKAR ROY MI 4.0 1564 1426.286
JASON ZHENG MI 4.0 1595 1410.857
DINH DANG BUI ON 4.0 1563 1470.429
EUGENE L MCCLURE MI 4.0 1555 1300.333
ALAN BUI ON 4.0 1363 1213.857
MICHAEL R ALDRICH MI 4.0 1229 1357.000
LOREN SCHWIEBERT MI 3.5 1745 1363.286
MAX ZHU ON 3.5 1579 1506.857
GAURAV GIDWANI MI 3.5 1552 1221.667
SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522.143
CHIEDOZIE OKORIE MI 3.5 1602 1313.500
GEORGE AVERY JONES ON 3.5 1522 1144.143
RISHI SHETTY MI 3.5 1494 1259.857
JOSHUA PHILIP MATHEWS ON 3.5 1441 1378.714
JADE GE MI 3.5 1449 1276.857
MICHAEL JEFFERY THOMAS MI 3.5 1399 1375.286
JOSHUA DAVID LEE MI 3.5 1438 1149.714
SIDDHARTH JHA MI 3.5 1355 1388.167
AMIYATOSH PWNANANDAM MI 3.5 980 1384.800
BRIAN LIU MI 3.0 1423 1539.167
JOEL R HENDON MI 3.0 1436 1429.571
FOREST ZHANG MI 3.0 1348 1390.571
KYLE WILLIAM MURPHY MI 3.0 1403 1248.500
JARED GE MI 3.0 1332 1149.857
ROBERT GLEN VASEY MI 3.0 1283 1106.571
JUSTIN D SCHILLING MI 3.0 1199 1327.000
DEREK YAN MI 3.0 1242 1152.000
JACOB ALEXANDER LAVALLEY MI 3.0 377 1357.714
ERIC WRIGHT MI 2.5 1362 1392.000
DANIEL KHAIN MI 2.5 1382 1355.800
MICHAEL J MARTIN MI 2.5 1291 1285.800
SHIVAM JHA MI 2.5 1056 1296.000
TEJAS AYYAGARI MI 2.5 1011 1356.143
ETHAN GUO MI 2.5 935 1494.571
JOSE C YBARRA MI 2.0 1393 1345.333
LARRY HODGE MI 2.0 1270 1206.167
ALEX KONG MI 2.0 1186 1406.000
MARISA RICCI MI 2.0 1153 1414.400
MICHAEL LU MI 2.0 1092 1363.000
VIRAJ MOHILE MI 2.0 917 1391.000
SEAN M MC CORMICK MI 2.0 853 1319.000
JULIA SHEN MI 1.5 967 1330.200
JEZZEL FARKAS ON 1.5 955 1327.286
ASHWIN BALAJI MI 1.0 1530 1186.000
THOMAS JOSEPH HOSMER MI 1.0 1175 1350.200
BEN LI MI 1.0 1163 1263.000
write.csv(results, paste(as.character(Sys.Date()), "CHUNT Chess Data results.csv", collapse = ""))