In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605
1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
library(stringr)
filename <- "tournamentinfo.txt"
# Create a function to read in and structure the data
readTournamentFile <- function(filename, skip = 4, warn = FALSE) {
lines <- readLines(filename, warn = warn)
# Make one record by line
recordPerLine <- c()
for(i in seq(from=skip+1, to=length(lines), by=3)) {
recordPerLine <- c(recordPerLine,paste(lines[i],lines[i+1]) )
}
# Split the records up based on the | delimiter
records <- str_split(recordPerLine, "\\|")
# Make a data frame from the table, one record at a time
dd <- as.data.frame( t( unlist(records[1]) ) )
for(i in 2:length(records)) {
dd_t <- as.data.frame( t( unlist(records[i]) ) )
dd <- rbind(dd, dd_t)
}
# Add column names
colnames( dd ) <- c("Player Number", "Player Name", "Total",
"Rnd 1 Result", "Rnd 2 Result", "Rnd 3 Result", "Rnd 4 Result", "Rnd 5 Result", "Rnd 6 Result", "Rnd 7 Result",
"Player State", "USCF ID / Rtg (Pre->Post)", "DONTKNOW",
"Rnd 1 Color", "Rnd 2 Color", "Rnd 3 Color", "Rnd 4 Color", "Rnd 5 Color", "Rnd 6 Color", "Rnd 7 Color",
"DONTCARE")
# Get rid of whitespace
dd2 <- as.data.frame(apply(dd,2,function(x)gsub('\\s+', '',x)))
# Fix player names (too much white space removed)
dd2$`Player Name` <- trimws( as.character(dd$`Player Name`) )
# Do extractions
dd2 <- cbind( dd2, `UCSF ID`=str_extract( dd2$`USCF ID / Rtg (Pre->Post)`, "[[:digit:]]+") )
# Pre tournament rating
dd2 <- cbind( dd2, `Pre Rating`=str_extract( dd2$`USCF ID / Rtg (Pre->Post)`, "(?<=\\:)[[:digit:]]+") )
# Round results
dd2 <- cbind( dd2, `Round 1 Opponent`=str_extract(dd2$`Rnd 1 Result`, "[[:digit:]]+") )
dd2 <- cbind( dd2, `Round 2 Opponent`=str_extract(dd2$`Rnd 2 Result`, "[[:digit:]]+") )
dd2 <- cbind( dd2, `Round 3 Opponent`=str_extract(dd2$`Rnd 3 Result`, "[[:digit:]]+") )
dd2 <- cbind( dd2, `Round 4 Opponent`=str_extract(dd2$`Rnd 4 Result`, "[[:digit:]]+") )
dd2 <- cbind( dd2, `Round 5 Opponent`=str_extract(dd2$`Rnd 5 Result`, "[[:digit:]]+") )
dd2 <- cbind( dd2, `Round 6 Opponent`=str_extract(dd2$`Rnd 6 Result`, "[[:digit:]]+") )
dd2 <- cbind( dd2, `Round 7 Opponent`=str_extract(dd2$`Rnd 7 Result`, "[[:digit:]]+") )
# Fix types
dd2$`Pre Rating` <- as.numeric(as.character(dd2$`Pre Rating`))
dd2$`Total` <- as.numeric(as.character(dd2$`Total`))
dd2$`Player Number` <- as.character(dd2$`Player Number`)
dd2$`Round 1 Opponent` <- as.character(dd2$`Round 1 Opponent`)
dd2$`Round 2 Opponent` <- as.character(dd2$`Round 2 Opponent`)
dd2$`Round 3 Opponent` <- as.character(dd2$`Round 3 Opponent`)
dd2$`Round 4 Opponent` <- as.character(dd2$`Round 4 Opponent`)
dd2$`Round 5 Opponent` <- as.character(dd2$`Round 5 Opponent`)
dd2$`Round 6 Opponent` <- as.character(dd2$`Round 6 Opponent`)
dd2$`Round 7 Opponent` <- as.character(dd2$`Round 7 Opponent`)
# Remove unneeded columns
dd2$DONTCARE <- NULL
dd2$DONTKNOW <- NULL
dd2$`USCF ID / Rtg (Pre->Post)` <- NULL
dd2$`UCSF ID`<- NULL
dd2$`Rnd 1 Result` <- NULL
dd2$`Rnd 2 Result` <- NULL
dd2$`Rnd 3 Result` <- NULL
dd2$`Rnd 4 Result` <- NULL
dd2$`Rnd 5 Result` <- NULL
dd2$`Rnd 6 Result` <- NULL
dd2$`Rnd 7 Result` <- NULL
dd2$`Rnd 1 Color` <- NULL
dd2$`Rnd 2 Color` <- NULL
dd2$`Rnd 3 Color` <- NULL
dd2$`Rnd 4 Color` <- NULL
dd2$`Rnd 5 Color` <- NULL
dd2$`Rnd 6 Color` <- NULL
dd2$`Rnd 7 Color` <- NULL
dd2
}
#Run function on text file
data_chess <- readTournamentFile(filename)
head(data_chess)
## Player Number Player Name Total Player State Pre Rating
## 1 1 GARY HUA 6.0 ON 1794
## 2 2 DAKSHESH DARURI 6.0 MI 1553
## 3 3 ADITYA BAJAJ 6.0 MI 1384
## 4 4 PATRICK H SCHILLING 5.5 MI 1716
## 5 5 HANSHI ZUO 5.5 MI 1655
## 6 6 HANSEN SONG 5.0 OH 1686
## Round 1 Opponent Round 2 Opponent Round 3 Opponent Round 4 Opponent
## 1 39 21 18 14
## 2 63 58 4 17
## 3 8 61 25 21
## 4 23 28 2 26
## 5 45 37 12 13
## 6 34 29 11 35
## Round 5 Opponent Round 6 Opponent Round 7 Opponent
## 1 7 12 4
## 2 16 20 7
## 3 11 13 12
## 4 5 19 1
## 5 4 14 17
## 6 10 27 21
#Collect pre-rating scores and take the average
preavgscores <- c()
for(i in data_chess$`Player Number`) {
row <- data_chess[ data_chess$`Player Number` == i, ]
opp1pre <- data_chess[ data_chess$`Player Number` == row$`Round 1 Opponent`, ]$`Pre Rating`
opp2pre <- data_chess[ data_chess$`Player Number` == row$`Round 2 Opponent`, ]$`Pre Rating`
opp3pre <- data_chess[ data_chess$`Player Number` == row$`Round 3 Opponent`, ]$`Pre Rating`
opp4pre <- data_chess[ data_chess$`Player Number` == row$`Round 4 Opponent`, ]$`Pre Rating`
opp5pre <- data_chess[ data_chess$`Player Number` == row$`Round 5 Opponent`, ]$`Pre Rating`
opp6pre <- data_chess[ data_chess$`Player Number` == row$`Round 6 Opponent`, ]$`Pre Rating`
opp7pre <- data_chess[ data_chess$`Player Number` == row$`Round 7 Opponent`, ]$`Pre Rating`
avgpre <- round(mean(c(opp1pre,opp2pre,opp3pre,opp4pre,opp5pre,opp6pre,opp7pre),na.rm = TRUE),0)
preavgscores <- c(preavgscores,avgpre)
}
data_chess <- cbind(data_chess,`Avg Opp Rating`=preavgscores)
#Remove unneeded columns
data_chess$`Round 1 Opponent` <- NULL
data_chess$`Round 2 Opponent` <- NULL
data_chess$`Round 3 Opponent` <- NULL
data_chess$`Round 4 Opponent` <- NULL
data_chess$`Round 5 Opponent` <- NULL
data_chess$`Round 6 Opponent` <- NULL
data_chess$`Round 7 Opponent` <- NULL
data_chess$`Player Number` <- NULL
#Final result
data_chess
## Player Name Total Player State Pre Rating Avg Opp Rating
## 1 GARY HUA 6.0 ON 1794 1605
## 2 DAKSHESH DARURI 6.0 MI 1553 1469
## 3 ADITYA BAJAJ 6.0 MI 1384 1564
## 4 PATRICK H SCHILLING 5.5 MI 1716 1574
## 5 HANSHI ZUO 5.5 MI 1655 1501
## 6 HANSEN SONG 5.0 OH 1686 1519
## 7 GARY DEE SWATHELL 5.0 MI 1649 1372
## 8 EZEKIEL HOUGHTON 5.0 MI 1641 1468
## 9 STEFANO LEE 5.0 ON 1411 1523
## 10 ANVIT RAO 5.0 MI 1365 1554
## 11 CAMERON WILLIAM MC LEMAN 4.5 MI 1712 1468
## 12 KENNETH J TACK 4.5 MI 1663 1506
## 13 TORRANCE HENRY JR 4.5 MI 1666 1498
## 14 BRADLEY SHAW 4.5 MI 1610 1515
## 15 ZACHARY JAMES HOUGHTON 4.5 MI 1220 1484
## 16 MIKE NIKITIN 4.0 MI 1604 1386
## 17 RONALD GRZEGORCZYK 4.0 MI 1629 1499
## 18 DAVID SUNDEEN 4.0 MI 1600 1480
## 19 DIPANKAR ROY 4.0 MI 1564 1426
## 20 JASON ZHENG 4.0 MI 1595 1411
## 21 DINH DANG BUI 4.0 ON 1563 1470
## 22 EUGENE L MCCLURE 4.0 MI 1555 1300
## 23 ALAN BUI 4.0 ON 1363 1214
## 24 MICHAEL R ALDRICH 4.0 MI 1229 1357
## 25 LOREN SCHWIEBERT 3.5 MI 1745 1363
## 26 MAX ZHU 3.5 ON 1579 1507
## 27 GAURAV GIDWANI 3.5 MI 1552 1222
## 28 SOFIA ADINA STANESCU-BELLU 3.5 MI 1507 1522
## 29 CHIEDOZIE OKORIE 3.5 MI 1602 1314
## 30 GEORGE AVERY JONES 3.5 ON 1522 1144
## 31 RISHI SHETTY 3.5 MI 1494 1260
## 32 JOSHUA PHILIP MATHEWS 3.5 ON 1441 1379
## 33 JADE GE 3.5 MI 1449 1277
## 34 MICHAEL JEFFERY THOMAS 3.5 MI 1399 1375
## 35 JOSHUA DAVID LEE 3.5 MI 1438 1150
## 36 SIDDHARTH JHA 3.5 MI 1355 1388
## 37 AMIYATOSH PWNANANDAM 3.5 MI 980 1385
## 38 BRIAN LIU 3.0 MI 1423 1539
## 39 JOEL R HENDON 3.0 MI 1436 1430
## 40 FOREST ZHANG 3.0 MI 1348 1391
## 41 KYLE WILLIAM MURPHY 3.0 MI 1403 1248
## 42 JARED GE 3.0 MI 1332 1150
## 43 ROBERT GLEN VASEY 3.0 MI 1283 1107
## 44 JUSTIN D SCHILLING 3.0 MI 1199 1327
## 45 DEREK YAN 3.0 MI 1242 1152
## 46 JACOB ALEXANDER LAVALLEY 3.0 MI 377 1358
## 47 ERIC WRIGHT 2.5 MI 1362 1392
## 48 DANIEL KHAIN 2.5 MI 1382 1356
## 49 MICHAEL J MARTIN 2.5 MI 1291 1286
## 50 SHIVAM JHA 2.5 MI 1056 1296
## 51 TEJAS AYYAGARI 2.5 MI 1011 1356
## 52 ETHAN GUO 2.5 MI 935 1495
## 53 JOSE C YBARRA 2.0 MI 1393 1345
## 54 LARRY HODGE 2.0 MI 1270 1206
## 55 ALEX KONG 2.0 MI 1186 1406
## 56 MARISA RICCI 2.0 MI 1153 1414
## 57 MICHAEL LU 2.0 MI 1092 1363
## 58 VIRAJ MOHILE 2.0 MI 917 1391
## 59 SEAN M MC CORMICK 2.0 MI 853 1319
## 60 JULIA SHEN 1.5 MI 967 1330
## 61 JEZZEL FARKAS 1.5 ON 955 1327
## 62 ASHWIN BALAJI 1.0 MI 1530 1186
## 63 THOMAS JOSEPH HOSMER 1.0 MI 1175 1350
## 64 BEN LI 1.0 MI 1163 1263
# Create a .csv file
write.csv(data_chess, "ChessPlayerData.csv", row.names=FALSE)