library(tidyverse)
library(knitr)
chess_data <- as.data.frame(read.delim("https://raw.githubusercontent.com/cassandra-coste/CUNY607/main/tournamentinfo.txt", header = FALSE, stringsAsFactors = FALSE, sep = "|"))
Remove junk rows (code developed by classmate Gabriel Campos and shared via Slack)
toDelete <- seq(1, length(chess_data$V1), 3)
toDelete
## [1] 1 4 7 10 13 16 19 22 25 28 31 34 37 40 43 46 49 52 55
## [20] 58 61 64 67 70 73 76 79 82 85 88 91 94 97 100 103 106 109 112
## [39] 115 118 121 124 127 130 133 136 139 142 145 148 151 154 157 160 163 166 169
## [58] 172 175 178 181 184 187 190 193 196
chess_data <- chess_data[-toDelete ,]
Pull data for player pre-tournament rating by extracting the digit code following ‘R:’
chess_data_clean <- extract(
chess_data,
col=V2,
into="Player_Rating",
regex = "(R: +\\d+)",
remove = FALSE
)
Remove extraneous characters and convert to numeric for player rating column as there is no other pertinent information there
chess_data_clean <- extract(
chess_data_clean,
col=Player_Rating,
into="Player_Rating",
regex = "(\\d+)",
remove = FALSE,
convert = TRUE
)
Extract round data for all 7 rounds removing the original data column
chess_data_clean <- extract(
chess_data_clean,
col = V4,
into = c("Round_1"),
regex = "(\\d+)",
remove = TRUE,
convert = TRUE
)
chess_data_clean <- extract(
chess_data_clean,
col = V5,
into = c("Round_2"),
regex = "(\\d+)",
remove = TRUE,
convert = TRUE
)
chess_data_clean <- extract(
chess_data_clean,
col = V6,
into = c("Round_3"),
regex = "(\\d+)",
remove = TRUE,
convert = TRUE
)
chess_data_clean <- extract(
chess_data_clean,
col = V7,
into = c("Round_4"),
regex = "(\\d+)",
remove = TRUE,
convert = TRUE
)
chess_data_clean <- extract(
chess_data_clean,
col = V8,
into = c("Round_5"),
regex = "(\\d+)",
remove = TRUE,
convert = TRUE
)
chess_data_clean <- extract(
chess_data_clean,
col = V9,
into = c("Round_6"),
regex = "(\\d+)",
remove = TRUE,
convert = TRUE
)
chess_data_clean <- extract(
chess_data_clean,
col = V10,
into = c("Round_7"),
regex = "(\\d+)",
remove = TRUE,
convert = TRUE
)
The following steps were employed:
Create indexes of odd and even rows to separate the data into two dataframes containing the pertinent data
Remove first rows with messy data
Create ‘Pair’ column in the even dataframe for joining them back together later
Renames columns as necessary
Convert the odd dataframe ‘Pair’ column to numeric for join
Select which columns from each dataframe have relevant data
even_indexes<-seq(2,130,2)
odd_indexes<-seq(1,129,2)
chess_data_even <- chess_data_clean[-odd_indexes ,]
chess_data_even = chess_data_even[-1,]
chess_data_even$Pair <- seq.int(nrow(chess_data_even))
chess_data_odd <- chess_data_clean[-even_indexes ,]
chess_data_odd = chess_data_odd[-1,]
chess_data_odd <- rename(chess_data_odd, Pair = V1)
chess_data_odd$Pair <- as.integer(chess_data_odd$Pair)
chess_data_odd <- select(chess_data_odd, -c(Player_Rating, V11)) %>% rename(Player_Name = V2) %>% rename(Total_Score = V3)
chess_data_even <- select(chess_data_even, c(V1, Player_Rating, Pair)) %>% rename(Player_State = V1)
Using a full join, the even and odd dataframes are joined via the ‘Pair’ column
chess_data_final <- full_join(chess_data_even, chess_data_odd, by = "Pair")
Create a loop that replaces players Pair ID with their player rating pre-match
for (i in 1:nrow(chess_data_final)) {
for (j in 6:12) {
if (!is.na(chess_data_final[i,j])) {
chess_data_final[i,j] <- as.numeric(chess_data_final[chess_data_final[i,j],2])
}
}
}
chess_data_final$Average_Opponent <- rowMeans(chess_data_final[6:12], na.rm = TRUE)
chess_data_output <- chess_data_final %>%select(Player_Name, Player_State, Total_Score, Player_Rating, Average_Opponent) %>% mutate_if(is.numeric, format, digits=4)
kable(chess_data_output, col.names = c('Player Name', 'Player State', 'Total Number of Points', 'Player Pre-Rating', 'Average Rating of Opponents'), align = "ccrrr", caption = "Chess Tournament Scorecard")
| Player Name | Player State | Total Number of Points | Player Pre-Rating | Average Rating of Opponents |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |
| GARY DEE SWATHELL | MI | 5.0 | 1649 | 1372 |
| EZEKIEL HOUGHTON | MI | 5.0 | 1641 | 1468 |
| STEFANO LEE | ON | 5.0 | 1411 | 1523 |
| ANVIT RAO | MI | 5.0 | 1365 | 1554 |
| CAMERON WILLIAM MC LEMAN | MI | 4.5 | 1712 | 1468 |
| KENNETH J TACK | MI | 4.5 | 1663 | 1506 |
| TORRANCE HENRY JR | MI | 4.5 | 1666 | 1498 |
| BRADLEY SHAW | MI | 4.5 | 1610 | 1515 |
| ZACHARY JAMES HOUGHTON | MI | 4.5 | 1220 | 1484 |
| MIKE NIKITIN | MI | 4.0 | 1604 | 1386 |
| RONALD GRZEGORCZYK | MI | 4.0 | 1629 | 1499 |
| DAVID SUNDEEN | MI | 4.0 | 1600 | 1480 |
| DIPANKAR ROY | MI | 4.0 | 1564 | 1426 |
| JASON ZHENG | MI | 4.0 | 1595 | 1411 |
| DINH DANG BUI | ON | 4.0 | 1563 | 1470 |
| EUGENE L MCCLURE | MI | 4.0 | 1555 | 1300 |
| ALAN BUI | ON | 4.0 | 1363 | 1214 |
| MICHAEL R ALDRICH | MI | 4.0 | 1229 | 1357 |
| LOREN SCHWIEBERT | MI | 3.5 | 1745 | 1363 |
| MAX ZHU | ON | 3.5 | 1579 | 1507 |
| GAURAV GIDWANI | MI | 3.5 | 1552 | 1222 |
| SOFIA ADINA STANESCU-BELLU | MI | 3.5 | 1507 | 1522 |
| CHIEDOZIE OKORIE | MI | 3.5 | 1602 | 1314 |
| GEORGE AVERY JONES | ON | 3.5 | 1522 | 1144 |
| RISHI SHETTY | MI | 3.5 | 1494 | 1260 |
| JOSHUA PHILIP MATHEWS | ON | 3.5 | 1441 | 1379 |
| JADE GE | MI | 3.5 | 1449 | 1277 |
| MICHAEL JEFFERY THOMAS | MI | 3.5 | 1399 | 1375 |
| JOSHUA DAVID LEE | MI | 3.5 | 1438 | 1150 |
| SIDDHARTH JHA | MI | 3.5 | 1355 | 1388 |
| AMIYATOSH PWNANANDAM | MI | 3.5 | 980 | 1385 |
| BRIAN LIU | MI | 3.0 | 1423 | 1539 |
| JOEL R HENDON | MI | 3.0 | 1436 | 1430 |
| FOREST ZHANG | MI | 3.0 | 1348 | 1391 |
| KYLE WILLIAM MURPHY | MI | 3.0 | 1403 | 1248 |
| JARED GE | MI | 3.0 | 1332 | 1150 |
| ROBERT GLEN VASEY | MI | 3.0 | 1283 | 1107 |
| JUSTIN D SCHILLING | MI | 3.0 | 1199 | 1327 |
| DEREK YAN | MI | 3.0 | 1242 | 1152 |
| JACOB ALEXANDER LAVALLEY | MI | 3.0 | 377 | 1358 |
| ERIC WRIGHT | MI | 2.5 | 1362 | 1392 |
| DANIEL KHAIN | MI | 2.5 | 1382 | 1356 |
| MICHAEL J MARTIN | MI | 2.5 | 1291 | 1286 |
| SHIVAM JHA | MI | 2.5 | 1056 | 1296 |
| TEJAS AYYAGARI | MI | 2.5 | 1011 | 1356 |
| ETHAN GUO | MI | 2.5 | 935 | 1495 |
| JOSE C YBARRA | MI | 2.0 | 1393 | 1345 |
| LARRY HODGE | MI | 2.0 | 1270 | 1206 |
| ALEX KONG | MI | 2.0 | 1186 | 1406 |
| MARISA RICCI | MI | 2.0 | 1153 | 1414 |
| MICHAEL LU | MI | 2.0 | 1092 | 1363 |
| VIRAJ MOHILE | MI | 2.0 | 917 | 1391 |
| SEAN M MC CORMICK | MI | 2.0 | 853 | 1319 |
| JULIA SHEN | MI | 1.5 | 967 | 1330 |
| JEZZEL FARKAS | ON | 1.5 | 955 | 1327 |
| ASHWIN BALAJI | MI | 1.0 | 1530 | 1186 |
| THOMAS JOSEPH HOSMER | MI | 1.0 | 1175 | 1350 |
| BEN LI | MI | 1.0 | 1163 | 1263 |
Code to export final dataframe to CSV
write.csv(chess_data_output,"chess_data_output.csv", row.names = FALSE)