DATA607_Project1

Import and cleanup

Steps

We import the textfile from github where its been stored, remove the dotted lines, and then combine the two rows per player into 1 row

raw_data <- readLines("https://raw.githubusercontent.com/jerryjerald27/Data-607/refs/heads/main/Week4Assignment/tournamentinfo.txt")[-(1:3)]
#remove the dotted lines 
raw_data <- raw_data[str_detect(raw_data, '^\\-+$') == FALSE]
# #combine two lines to one row 
combined_data <- raw_data %>%
  str_trim() %>%
  # .[str_detect(., "^\\-S")] %>%  
  enframe(name = NULL) %>%
  mutate(row_num = rep(1:(n()/2), each = 2)) %>%
  group_by(row_num) %>%
  summarise(combined = paste(value, collapse = " ")) %>%
  pull(combined)

knitr::kable((head(combined_data)),"simple")

x
1 \| GARY HUA \|6.0 \|W 39\|W 21\|W 18\|W 14\|W 7\|D 12\|D 4\| ON \| 15445895 / R: 1794 ->1817 \|N:2 \|W \|B \|W \|B \|W \|B \|W \|
2 \| DAKSHESH DARURI \|6.0 \|W 63\|W 58\|L 4\|W 17\|W 16\|W 20\|W 7\| MI \| 14598900 / R: 1553 ->1663 \|N:2 \|B \|W \|B \|W \|B \|W \|B \|
3 \| ADITYA BAJAJ \|6.0 \|L 8\|W 61\|W 25\|W 21\|W 11\|W 13\|W 12\| MI \| 14959604 / R: 1384 ->1640 \|N:2 \|W \|B \|W \|B \|W \|B \|W \|
4 \| PATRICK H SCHILLING \|5.5 \|W 23\|D 28\|W 2\|W 26\|D 5\|W 19\|D 1\| MI \| 12616049 / R: 1716 ->1744 \|N:2 \|W \|B \|W \|B \|W \|B \|B \|
5 \| HANSHI ZUO \|5.5 \|W 45\|W 37\|D 12\|D 13\|D 4\|W 14\|W 17\| MI \| 14601533 / R: 1655 ->1690 \|N:2 \|B \|W \|B \|W \|B \|W \|B \|
6 \| HANSEN SONG \|5.0 \|W 34\|D 29\|L 11\|W 35\|D 10\|W 27\|W 21\| OH \| 15055204 / R: 1686 ->1687 \|N:3 \|W \|B \|W \|B \|B \|W \|B \|

We then map this combined long observation into each of their specific columns

# Split combined data by "|" and create a dataframe
raw_data_frame <- combined_data %>%
  str_split(pattern = "\\|") %>%
  map_dfr(~tibble(Player = .[1], Name = .[2], State =.[11], Total = .[3], Pre_Rating = .[12], 
                  Round1 = .[4], Round2 = .[5], Round3 = .[6], 
                  Round4 = .[7], Round5 = .[8], Round6 = .[9], Round7 = .[10]))

knitr::kable(head(raw_data_frame),"simple")

Player	Name	State	Total	Pre_Rating	Round1	Round2	Round3	Round4	Round5	Round6	Round7
1	GARY HUA	ON	6.0	15445895 / R: 1794 ->1817	W 39	W 21	W 18	W 14	W 7	D 12	D 4
2	DAKSHESH DARURI	MI	6.0	14598900 / R: 1553 ->1663	W 63	W 58	L 4	W 17	W 16	W 20	W 7
3	ADITYA BAJAJ	MI	6.0	14959604 / R: 1384 ->1640	L 8	W 61	W 25	W 21	W 11	W 13	W 12
4	PATRICK H SCHILLING	MI	5.5	12616049 / R: 1716 ->1744	W 23	D 28	W 2	W 26	D 5	W 19	D 1
5	HANSHI ZUO	MI	5.5	14601533 / R: 1655 ->1690	W 45	W 37	D 12	D 13	D 4	W 14	W 17
6	HANSEN SONG	OH	5.0	15055204 / R: 1686 ->1687	W 34	D 29	L 11	W 35	D 10	W 27	W 21

We can now mutate the columns to further clean up the Pre rating column to only include the rating, and the Rounds column to only include the Opponent ID and not the game result

raw_data_frame <- raw_data_frame %>%
  mutate(
    Pre_Rating = as.numeric(str_extract(Pre_Rating, "(?<=R:\\s{1,2})\\d{3,4}")),   # Gets just the prerating from the prerating column
    across(starts_with("Round"), ~ gsub("[^0-9]", "", .)),    #mutates all columns starting with rounds to only include digits
    Player = as.numeric(Player)
  )

knitr::kable(head(raw_data_frame),"simple")

Player	Name	State	Total	Pre_Rating	Round1	Round2	Round3	Round4	Round5	Round6	Round7
1	GARY HUA	ON	6.0	1794	39	21	18	14	7	12	4
2	DAKSHESH DARURI	MI	6.0	1553	63	58	4	17	16	20	7
3	ADITYA BAJAJ	MI	6.0	1384	8	61	25	21	11	13	12
4	PATRICK H SCHILLING	MI	5.5	1716	23	28	2	26	5	19	1
5	HANSHI ZUO	MI	5.5	1655	45	37	12	13	4	14	17
6	HANSEN SONG	OH	5.0	1686	34	29	11	35	10	27	21

Calculating average Opponent ratings

This is done in 2 steps

For each player we replace the opponent Ids for each of the rounds with the Pre_ratings of the opponents

round_columns <- paste0("Round", 1:7)

for (col in round_columns) {
  raw_data_frame[[col]] <- sapply(raw_data_frame[[col]], function(x) {
    rating <- raw_data_frame$Pre_Rating[raw_data_frame$Player == x]
    if (length(rating) > 0) rating else NA
  })
}

knitr::kable(head(raw_data_frame),"simple")

Player	Name	State	Total	Pre_Rating	Round1	Round2	Round3	Round4	Round5	Round6	Round7
1	GARY HUA	ON	6.0	1794	1436	1563	1600	1610	1649	1663	1716
2	DAKSHESH DARURI	MI	6.0	1553	1175	917	1716	1629	1604	1595	1649
3	ADITYA BAJAJ	MI	6.0	1384	1641	955	1745	1563	1712	1666	1663
4	PATRICK H SCHILLING	MI	5.5	1716	1363	1507	1553	1579	1655	1564	1794
5	HANSHI ZUO	MI	5.5	1655	1242	980	1663	1666	1716	1610	1629
6	HANSEN SONG	OH	5.0	1686	1399	1602	1712	1438	1365	1552	1563

We then Average out the ratings across the rounds

# Calculate average opponent pre-rating
raw_data_frame$Avg_Opp_Pre_rating <- round(rowMeans(raw_data_frame[round_columns], na.rm = TRUE))
Average_chess_rating <- raw_data_frame %>% select(Name, State, Total, Pre_Rating, Avg_Opp_Pre_rating)
 write.csv(Average_chess_rating, "cleaned_chess.csv", row.names = TRUE)

Conclusion

We can now view the completed table and export it into a .csv file

knitr::kable((Average_chess_rating), "simple")

Name	State	Total	Pre_Rating	Avg_Opp_Pre_rating
GARY HUA	ON	6.0	1794	1605
DAKSHESH DARURI	MI	6.0	1553	1469
ADITYA BAJAJ	MI	6.0	1384	1564
PATRICK H SCHILLING	MI	5.5	1716	1574
HANSHI ZUO	MI	5.5	1655	1501
HANSEN SONG	OH	5.0	1686	1519
GARY DEE SWATHELL	MI	5.0	1649	1372
EZEKIEL HOUGHTON	MI	5.0	1641	1468
STEFANO LEE	ON	5.0	1411	1523
ANVIT RAO	MI	5.0	1365	1554
CAMERON WILLIAM MC LEMAN	MI	4.5	1712	1468
KENNETH J TACK	MI	4.5	1663	1506
TORRANCE HENRY JR	MI	4.5	1666	1498
BRADLEY SHAW	MI	4.5	1610	1515
ZACHARY JAMES HOUGHTON	MI	4.5	1220	1484
MIKE NIKITIN	MI	4.0	1604	1386
RONALD GRZEGORCZYK	MI	4.0	1629	1499
DAVID SUNDEEN	MI	4.0	1600	1480
DIPANKAR ROY	MI	4.0	1564	1426
JASON ZHENG	MI	4.0	1595	1411
DINH DANG BUI	ON	4.0	1563	1470
EUGENE L MCCLURE	MI	4.0	1555	1300
ALAN BUI	ON	4.0	1363	1214
MICHAEL R ALDRICH	MI	4.0	1229	1357
LOREN SCHWIEBERT	MI	3.5	1745	1363
MAX ZHU	ON	3.5	1579	1507
GAURAV GIDWANI	MI	3.5	1552	1222
SOFIA ADINA STANESCU-BELLU	MI	3.5	1507	1522
CHIEDOZIE OKORIE	MI	3.5	1602	1314
GEORGE AVERY JONES	ON	3.5	1522	1144
RISHI SHETTY	MI	3.5	1494	1260
JOSHUA PHILIP MATHEWS	ON	3.5	1441	1379
JADE GE	MI	3.5	1449	1277
MICHAEL JEFFERY THOMAS	MI	3.5	1399	1375
JOSHUA DAVID LEE	MI	3.5	1438	1150
SIDDHARTH JHA	MI	3.5	1355	1388
AMIYATOSH PWNANANDAM	MI	3.5	980	1385
BRIAN LIU	MI	3.0	1423	1539
JOEL R HENDON	MI	3.0	1436	1430
FOREST ZHANG	MI	3.0	1348	1391
KYLE WILLIAM MURPHY	MI	3.0	1403	1248
JARED GE	MI	3.0	1332	1150
ROBERT GLEN VASEY	MI	3.0	1283	1107
JUSTIN D SCHILLING	MI	3.0	1199	1327
DEREK YAN	MI	3.0	1242	1152
JACOB ALEXANDER LAVALLEY	MI	3.0	377	1358
ERIC WRIGHT	MI	2.5	1362	1392
DANIEL KHAIN	MI	2.5	1382	1356
MICHAEL J MARTIN	MI	2.5	1291	1286
SHIVAM JHA	MI	2.5	1056	1296
TEJAS AYYAGARI	MI	2.5	1011	1356
ETHAN GUO	MI	2.5	935	1495
JOSE C YBARRA	MI	2.0	1393	1345
LARRY HODGE	MI	2.0	1270	1206
ALEX KONG	MI	2.0	1186	1406
MARISA RICCI	MI	2.0	1153	1414
MICHAEL LU	MI	2.0	1092	1363
VIRAJ MOHILE	MI	2.0	917	1391
SEAN M MC CORMICK	MI	2.0	853	1319
JULIA SHEN	MI	1.5	967	1330
JEZZEL FARKAS	ON	1.5	955	1327
ASHWIN BALAJI	MI	1.0	1530	1186
THOMAS JOSEPH HOSMER	MI	1.0	1175	1350
BEN LI	MI	1.0	1163	1263

DATA607_Project1

Jerald Melukkaran

23/2/2025

Introduction

Import and cleanup

Calculating average Opponent ratings

Conclusion