Strategy

Two lines of the text file will be used to extract the data needed. The first line with the player name has the “Player Name” and “Total Pts.” The second line with “state” has the “state and pre_rtg that are required. The first line has the pair numbers for each of the pairs for each player. These numbers will be extracted and used as the index to get the pre_rtg for the pairs of each player. The pre_rtg’s will be summed along each row and divided by the number of pairs to find the mean.

Load required libraries and read the text file

The top 4 lines in the text file will be eliminated and every third line after that with dashes will be ignored.

#Load the required library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
# Read the text file into a character vector
text_lines <- read_lines("https://raw.githubusercontent.com/hawa1983/DATA607/main/Chess%20Tournament.txt")

# Skip the first four lines
text_lines <- tail(text_lines, -4)

Create the initial data frame, df:

we created an empty data frame, extracted the values needed, and added a row at a time.

line_list = list()
# Create an empty data frame. Data will be added to this data frame later
df <- data.frame(player_no = character(0), name = character(0), state = character(0), score = character(0), current_rating = character(0), next_round_rating = character(0), pair_ids = list())

# Loop through the text_lines vector three lines at a time
for (i in seq(1, length(text_lines), by = 3)){
 
 
    # Save the first two lines as input strings. Ignore the third line with the dashes
    input_string1 <- text_lines[i]
    input_string2 <- text_lines[i + 1]
    
    line_list <- append(line_list, str_flatten(c(input_string1, input_string2)))
    
    # Define regular expressions to extract the information
    # Positive look behind and positive look ahead extension 
    # notations were used to isolate the values need. In all but pre_rtg and post_rtg, these 
    # notations are really not necessary since each value is the first occurrence of it's character type combination 
    # and str_extract only extract the first occurrence.
    
    player_no <- str_extract(input_string1, r"((?<=\s{3})(\d+)(?=\s\|))") 
    player_name <- str_extract(input_string1, r"((?<=\|\s)(\b[A-Z,\-,\s]+\b)(?=\s+\|\d+\.\d+\s+\|))") 
    state <- str_extract(input_string2, r"((\b[A-Z]{2}\b))") 
    rating <- str_extract(input_string1, r"((\b\d*\.\d*\b))") 
    current_rating <- str_extract(input_string2, r"((?<=R:\s{0,5})\d{3,4})")
    next_round_rating <- str_extract(input_string2, r"((?<=->\s{0,5})\d+)")
    pair_ids <- unlist(str_extract_all(input_string1, "(?<=[A-Z]\\s{1,4})(\\d*)(?=\\|)"))
      
    # Add rows one at a time to df data frame.
    new_row <- c(player_no, player_name, state, rating, current_rating, next_round_rating, pair_ids)
    df <- rbind(df, new_row)

}

# Rename the columns of df
names(df) <- c("id", "name", "state", "total_pts", "pre_rtg", "post_rtg", "pair_1", "pair_2", "pair_3", "pair_4", "pair_5", "pair_6", "pair_7")





# Display the data frame
options(kableExtra.latex.load_packages = FALSE)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
kable(df, longtable = T, booktabs = T, caption = "Chess Game Data Extraction") |>
  kable_styling(latex_options = c("striped", "scale_down", "repeat_header")) |> 
  row_spec(0, background = "grey") |> 
  landscape()
Chess Game Data Extraction
id name state total_pts pre_rtg post_rtg pair_1 pair_2 pair_3 pair_4 pair_5 pair_6 pair_7
1 GARY HUA ON 6.0 1794 1817 39 21 18 14 7 12 4
2 DAKSHESH DARURI MI 6.0 1553 1663 63 58 4 17 16 20 7
3 ADITYA BAJAJ MI 6.0 1384 1640 8 61 25 21 11 13 12
4 PATRICK H SCHILLING MI 5.5 1716 1744 23 28 2 26 5 19 1
5 HANSHI ZUO MI 5.5 1655 1690 45 37 12 13 4 14 17
6 HANSEN SONG OH 5.0 1686 1687 34 29 11 35 10 27 21
7 GARY DEE SWATHELL MI 5.0 1649 1673 57 46 13 11 1 9 2
8 EZEKIEL HOUGHTON MI 5.0 1641 1657 3 32 14 9 47 28 19
9 STEFANO LEE ON 5.0 1411 1564 25 18 59 8 26 7 20
10 ANVIT RAO MI 5.0 1365 1544 16 19 55 31 6 25 18
11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1696 38 56 6 7 3 34 26
12 KENNETH J TACK MI 4.5 1663 1670 42 33 5 38 1 3
13 TORRANCE HENRY JR MI 4.5 1666 1662 36 27 7 5 33 3 32
14 BRADLEY SHAW MI 4.5 1610 1618 54 44 8 1 27 5 31
15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1416 19 16 30 22 54 33 38
16 MIKE NIKITIN MI 4.0 1604 1613 10 15 39 2 36
17 RONALD GRZEGORCZYK MI 4.0 1629 1610 48 41 26 2 23 22 5
18 DAVID SUNDEEN MI 4.0 1600 1600 47 9 1 32 19 38 10
19 DIPANKAR ROY MI 4.0 1564 1570 15 10 52 28 18 4 8
20 JASON ZHENG MI 4.0 1595 1569 40 49 23 41 28 2 9
21 DINH DANG BUI ON 4.0 1563 1562 43 1 47 3 40 39 6
22 EUGENE L MCCLURE MI 4.0 1555 1529 64 52 28 15 17 40
23 ALAN BUI ON 4.0 1363 1371 4 43 20 58 17 37 46
24 MICHAEL R ALDRICH MI 4.0 1229 1300 28 47 43 25 60 44 39
25 LOREN SCHWIEBERT MI 3.5 1745 1681 9 53 3 24 34 10 47
26 MAX ZHU ON 3.5 1579 1564 49 40 17 4 9 32 11
27 GAURAV GIDWANI MI 3.5 1552 1539 51 13 46 37 14 6
28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1513 24 4 22 19 20 8 36
29 CHIEDOZIE OKORIE MI 3.5 1602 1508 50 6 38 34 52 48
30 GEORGE AVERY JONES ON 3.5 1522 1444 52 64 15 55 31 61 50
31 RISHI SHETTY MI 3.5 1494 1444 58 55 64 10 30 50 14
32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1433 61 8 44 18 51 26 13
33 JADE GE MI 3.5 1449 1421 60 12 50 36 13 15 51
34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1400 6 60 37 29 25 11 52
35 JOSHUA DAVID LEE MI 3.5 1438 1392 46 38 56 6 57 52 48
36 SIDDHARTH JHA MI 3.5 1355 1367 13 57 51 33 16 28
37 AMIYATOSH PWNANANDAM MI 3.5 980 1077 5 34 27 23 61
38 BRIAN LIU MI 3.0 1423 1439 11 35 29 12 18 15
39 JOEL R HENDON MI 3.0 1436 1413 1 54 40 16 44 21 24
40 FOREST ZHANG MI 3.0 1348 1346 20 26 39 59 21 56 22
41 KYLE WILLIAM MURPHY MI 3.0 1403 1341 59 17 58 20
42 JARED GE MI 3.0 1332 1256 12 50 57 60 61 64 56
43 ROBERT GLEN VASEY MI 3.0 1283 1244 21 23 24 63 59 46 55
44 JUSTIN D SCHILLING MI 3.0 1199 1199 14 32 53 39 24 59
45 DEREK YAN MI 3.0 1242 1191 5 51 60 56 63 55 58
46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1076 35 7 27 50 64 43 23
47 ERIC WRIGHT MI 2.5 1362 1341 18 24 21 61 8 51 25
48 DANIEL KHAIN MI 2.5 1382 1335 17 63 52 29 35
49 MICHAEL J MARTIN MI 2.5 1291 1259 26 20 63 64 58
50 SHIVAM JHA MI 2.5 1056 1111 29 42 33 46 31 30
51 TEJAS AYYAGARI MI 2.5 1011 1097 27 45 36 57 32 47 33
52 ETHAN GUO MI 2.5 935 1092 30 22 19 48 29 35 34
53 JOSE C YBARRA MI 2.0 1393 1359 25 44 57
54 LARRY HODGE MI 2.0 1270 1200 14 39 61 15 59 64
55 ALEX KONG MI 2.0 1186 1163 62 31 10 30 45 43
56 MARISA RICCI MI 2.0 1153 1140 11 35 45 40 42
57 MICHAEL LU MI 2.0 1092 1079 7 36 42 51 35 53
58 VIRAJ MOHILE MI 2.0 917 941 31 2 41 23 49 45
59 SEAN M MC CORMICK MI 2.0 853 878 41 9 40 43 54 44
60 JULIA SHEN MI 1.5 967 984 33 34 45 42 24
61 JEZZEL FARKAS ON 1.5 955 979 32 3 54 47 42 30 37
62 ASHWIN BALAJI MI 1.0 1530 1535 55
63 THOMAS JOSEPH HOSMER MI 1.0 1175 1125 2 48 49 43 45
64 BEN LI MI 1.0 1163 1112 22 30 31 49 46 42 54

Replace the columns pair_1 to pair_7 with the pre_rtg value that match the values of the column

In strategy 1, we substituted the values of

# Replace the values of each of the columns that start with pair_ with the pre_rtg value using the column value as the index to select the pre_rtg value. Also change their data type from character to numeric.
# Change the data type of the columns 4 to 6.
df <- df %>%
   mutate_at(vars(starts_with("pair_")), ~ as.double(pre_rtg[as.integer(.)])
             ) |> mutate_at(
             vars(4:6), ~ as.double(.)
             )

options(kableExtra.latex.load_packages = FALSE)
library(kableExtra)

kable(df, longtable = T, booktabs = T, caption = "Chess Game Data Extraction") |>
  kable_styling(latex_options = c("striped", "scale_down", "repeat_header")) |> 
  row_spec(0, background = "grey") |> 
  landscape()
Chess Game Data Extraction
id name state total_pts pre_rtg post_rtg pair_1 pair_2 pair_3 pair_4 pair_5 pair_6 pair_7
1 GARY HUA ON 6.0 1794 1817 1436 1563 1600 1610 1649 1663 1716
2 DAKSHESH DARURI MI 6.0 1553 1663 1175 917 1716 1629 1604 1595 1649
3 ADITYA BAJAJ MI 6.0 1384 1640 1641 955 1745 1563 1712 1666 1663
4 PATRICK H SCHILLING MI 5.5 1716 1744 1363 1507 1553 1579 1655 1564 1794
5 HANSHI ZUO MI 5.5 1655 1690 1242 980 1663 1666 1716 1610 1629
6 HANSEN SONG OH 5.0 1686 1687 1399 1602 1712 1438 1365 1552 1563
7 GARY DEE SWATHELL MI 5.0 1649 1673 1092 377 1666 1712 1794 1411 1553
8 EZEKIEL HOUGHTON MI 5.0 1641 1657 1384 1441 1610 1411 1362 1507 1564
9 STEFANO LEE ON 5.0 1411 1564 1745 1600 853 1641 1579 1649 1595
10 ANVIT RAO MI 5.0 1365 1544 1604 1564 1186 1494 1686 1745 1600
11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1696 1423 1153 1686 1649 1384 1399 1579
12 KENNETH J TACK MI 4.5 1663 1670 1332 1449 1655 1423 NA 1794 1384
13 TORRANCE HENRY JR MI 4.5 1666 1662 1355 1552 1649 1655 1449 1384 1441
14 BRADLEY SHAW MI 4.5 1610 1618 1270 1199 1641 1794 1552 1655 1494
15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1416 1564 1604 1522 1555 1270 1449 1423
16 MIKE NIKITIN MI 4.0 1604 1613 1365 1220 NA 1436 1553 1355 NA
17 RONALD GRZEGORCZYK MI 4.0 1629 1610 1382 1403 1579 1553 1363 1555 1655
18 DAVID SUNDEEN MI 4.0 1600 1600 1362 1411 1794 1441 1564 1423 1365
19 DIPANKAR ROY MI 4.0 1564 1570 1220 1365 935 1507 1600 1716 1641
20 JASON ZHENG MI 4.0 1595 1569 1348 1291 1363 1403 1507 1553 1411
21 DINH DANG BUI ON 4.0 1563 1562 1283 1794 1362 1384 1348 1436 1686
22 EUGENE L MCCLURE MI 4.0 1555 1529 1163 935 1507 1220 NA 1629 1348
23 ALAN BUI ON 4.0 1363 1371 1716 1283 1595 917 1629 980 377
24 MICHAEL R ALDRICH MI 4.0 1229 1300 1507 1362 1283 1745 967 1199 1436
25 LOREN SCHWIEBERT MI 3.5 1745 1681 1411 1393 1384 1229 1399 1365 1362
26 MAX ZHU ON 3.5 1579 1564 1291 1348 1629 1716 1411 1441 1712
27 GAURAV GIDWANI MI 3.5 1552 1539 1011 1666 377 980 1610 1686 NA
28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1513 1229 1716 1555 1564 1595 1641 1355
29 CHIEDOZIE OKORIE MI 3.5 1602 1508 1056 1686 1423 1399 935 1382 NA
30 GEORGE AVERY JONES ON 3.5 1522 1444 935 1163 1220 1186 1494 955 1056
31 RISHI SHETTY MI 3.5 1494 1444 917 1186 1163 1365 1522 1056 1610
32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1433 955 1641 1199 1600 1011 1579 1666
33 JADE GE MI 3.5 1449 1421 967 1663 1056 1355 1666 1220 1011
34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1400 1686 967 980 1602 1745 1712 935
35 JOSHUA DAVID LEE MI 3.5 1438 1392 377 1423 1153 1686 1092 935 1382
36 SIDDHARTH JHA MI 3.5 1355 1367 1666 1092 1011 1449 NA 1604 1507
37 AMIYATOSH PWNANANDAM MI 3.5 980 1077 NA 1655 1399 1552 NA 1363 955
38 BRIAN LIU MI 3.0 1423 1439 1712 1438 1602 1663 NA 1600 1220
39 JOEL R HENDON MI 3.0 1436 1413 1794 1270 1348 1604 1199 1563 1229
40 FOREST ZHANG MI 3.0 1348 1346 1595 1579 1436 853 1563 1153 1555
41 KYLE WILLIAM MURPHY MI 3.0 1403 1341 853 1629 917 1595 NA NA NA
42 JARED GE MI 3.0 1332 1256 1663 1056 1092 967 955 1163 1153
43 ROBERT GLEN VASEY MI 3.0 1283 1244 1563 1363 1229 1175 853 377 1186
44 JUSTIN D SCHILLING MI 3.0 1199 1199 NA 1610 1441 1393 1436 1229 853
45 DEREK YAN MI 3.0 1242 1191 1655 1011 967 1153 1175 1186 917
46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1076 1438 1649 1552 1056 1163 1283 1363
47 ERIC WRIGHT MI 2.5 1362 1341 1600 1229 1563 955 1641 1011 1745
48 DANIEL KHAIN MI 2.5 1382 1335 1629 1175 NA 935 NA 1602 1438
49 MICHAEL J MARTIN MI 2.5 1291 1259 1579 1595 1175 1163 917 NA NA
50 SHIVAM JHA MI 2.5 1056 1111 1602 1332 1449 377 NA 1494 1522
51 TEJAS AYYAGARI MI 2.5 1011 1097 1552 1242 1355 1092 1441 1362 1449
52 ETHAN GUO MI 2.5 935 1092 1522 1555 1564 1382 1602 1438 1399
53 JOSE C YBARRA MI 2.0 1393 1359 NA 1745 NA 1199 NA 1092 NA
54 LARRY HODGE MI 2.0 1270 1200 1610 1436 955 NA 1220 853 1163
55 ALEX KONG MI 2.0 1186 1163 1530 1494 1365 1522 NA 1242 1283
56 MARISA RICCI MI 2.0 1153 1140 NA 1712 1438 1242 NA 1348 1332
57 MICHAEL LU MI 2.0 1092 1079 1649 1355 1332 1011 1438 1393 NA
58 VIRAJ MOHILE MI 2.0 917 941 1494 1553 1403 1363 1291 NA 1242
59 SEAN M MC CORMICK MI 2.0 853 878 1403 NA 1411 1348 1283 1270 1199
60 JULIA SHEN MI 1.5 967 984 1449 1399 1242 1332 1229 NA NA
61 JEZZEL FARKAS ON 1.5 955 979 1441 1384 1270 1362 1332 1522 980
62 ASHWIN BALAJI MI 1.0 1530 1535 1186 NA NA NA NA NA NA
63 THOMAS JOSEPH HOSMER MI 1.0 1175 1125 1553 1382 1291 1283 1242 NA NA
64 BEN LI MI 1.0 1163 1112 1555 1522 1494 1291 377 1332 1270

Calculate the average pre_rtg for each player

Specify the range of columns whose values I want to average. Find the average of columns pair_1 to pair_7 across each row. Ignore columns with NA. Select and display the required columns

df_final <- df |>
  rowwise() |>
  mutate(mean_rtg = round(mean(c_across(starts_with("pair_")), na.rm = TRUE))) |> 
  select("Player Name" = name, 
         "Player State" = state, 
         "Total Points" = total_pts, 
         "Pre-Rating" = pre_rtg, 
         "Average Pre-Rating" = mean_rtg
         )

#write the dataframe to a csv file
write_csv(df_final, "ratings.csv")

options(kableExtra.latex.load_packages = FALSE)
library(kableExtra)

kable(df_final, longtable = T, booktabs = T, caption = "Chess Game Average Pre-game Rating") |>
  kable_styling(latex_options = c("striped", "scale_down", "repeat_header")) |> 
  row_spec(0, background = "grey") |> 
  landscape()
Chess Game Average Pre-game Rating
Player Name Player State Total Points Pre-Rating Average Pre-Rating
GARY HUA ON 6.0 1794 1605
DAKSHESH DARURI MI 6.0 1553 1469
ADITYA BAJAJ MI 6.0 1384 1564
PATRICK H SCHILLING MI 5.5 1716 1574
HANSHI ZUO MI 5.5 1655 1501
HANSEN SONG OH 5.0 1686 1519
GARY DEE SWATHELL MI 5.0 1649 1372
EZEKIEL HOUGHTON MI 5.0 1641 1468
STEFANO LEE ON 5.0 1411 1523
ANVIT RAO MI 5.0 1365 1554
CAMERON WILLIAM MC LEMAN MI 4.5 1712 1468
KENNETH J TACK MI 4.5 1663 1506
TORRANCE HENRY JR MI 4.5 1666 1498
BRADLEY SHAW MI 4.5 1610 1515
ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
MIKE NIKITIN MI 4.0 1604 1386
RONALD GRZEGORCZYK MI 4.0 1629 1499
DAVID SUNDEEN MI 4.0 1600 1480
DIPANKAR ROY MI 4.0 1564 1426
JASON ZHENG MI 4.0 1595 1411
DINH DANG BUI ON 4.0 1563 1470
EUGENE L MCCLURE MI 4.0 1555 1300
ALAN BUI ON 4.0 1363 1214
MICHAEL R ALDRICH MI 4.0 1229 1357
LOREN SCHWIEBERT MI 3.5 1745 1363
MAX ZHU ON 3.5 1579 1507
GAURAV GIDWANI MI 3.5 1552 1222
SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522
CHIEDOZIE OKORIE MI 3.5 1602 1314
GEORGE AVERY JONES ON 3.5 1522 1144
RISHI SHETTY MI 3.5 1494 1260
JOSHUA PHILIP MATHEWS ON 3.5 1441 1379
JADE GE MI 3.5 1449 1277
MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
JOSHUA DAVID LEE MI 3.5 1438 1150
SIDDHARTH JHA MI 3.5 1355 1388
AMIYATOSH PWNANANDAM MI 3.5 980 1385
BRIAN LIU MI 3.0 1423 1539
JOEL R HENDON MI 3.0 1436 1430
FOREST ZHANG MI 3.0 1348 1391
KYLE WILLIAM MURPHY MI 3.0 1403 1248
JARED GE MI 3.0 1332 1150
ROBERT GLEN VASEY MI 3.0 1283 1107
JUSTIN D SCHILLING MI 3.0 1199 1327
DEREK YAN MI 3.0 1242 1152
JACOB ALEXANDER LAVALLEY MI 3.0 377 1358
ERIC WRIGHT MI 2.5 1362 1392
DANIEL KHAIN MI 2.5 1382 1356
MICHAEL J MARTIN MI 2.5 1291 1286
SHIVAM JHA MI 2.5 1056 1296
TEJAS AYYAGARI MI 2.5 1011 1356
ETHAN GUO MI 2.5 935 1495
JOSE C YBARRA MI 2.0 1393 1345
LARRY HODGE MI 2.0 1270 1206
ALEX KONG MI 2.0 1186 1406
MARISA RICCI MI 2.0 1153 1414
MICHAEL LU MI 2.0 1092 1363
VIRAJ MOHILE MI 2.0 917 1391
SEAN M MC CORMICK MI 2.0 853 1319
JULIA SHEN MI 1.5 967 1330
JEZZEL FARKAS ON 1.5 955 1327
ASHWIN BALAJI MI 1.0 1530 1186
THOMAS JOSEPH HOSMER MI 1.0 1175 1350
BEN LI MI 1.0 1163 1263