library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (readr)
library(stringr)
urlfile= "https://raw.githubusercontent.com/ursulapodosenin/DAT-607/main/tournament_info.txt"

raw_tournament_data <- data.frame(readLines(urlfile))
## Warning in readLines(urlfile): incomplete final line found on
## 'https://raw.githubusercontent.com/ursulapodosenin/DAT-607/main/tournament_info.txt'
str(raw_tournament_data)
## 'data.frame':    196 obs. of  1 variable:
##  $ readLines.urlfile.: chr  "-----------------------------------------------------------------------------------------" " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| " " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | " "-----------------------------------------------------------------------------------------" ...
dim(raw_tournament_data)
## [1] 196   1
player_number <- as.numeric(unlist(str_extract_all(raw_tournament_data,"(?<=\\s{3,4})\\d{1,2}(?=\\s)")))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
player_name <- unlist(str_extract_all(raw_tournament_data,"(?<=\\d\\s\\|\\s)([A-z, -]*\\s){1,}[[:alpha:]]*(?=\\s*\\|)"))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
player_state <- unlist(str_extract_all(raw_tournament_data, "[[:upper:]]{2}(?=\\s\\|)"))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
total_points <- as.numeric(unlist(str_extract_all(raw_tournament_data, "(?<=\\|)\\d\\.\\d")))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
player_pre_rating <- as.numeric(unlist(str_extract_all(raw_tournament_data, "(?<=R:\\s{1,2})(\\d{3,4}(?=\\s))|(\\d{3,4}(?=P\\d{1,2}\\s*-))")))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
tournament_data <- data.frame(player_number, player_name, player_state, total_points, player_pre_rating)
tournament_data
##    player_number                      player_name player_state total_points
## 1              1 GARY HUA                                   ON          6.0
## 2              2 DAKSHESH DARURI                            MI          6.0
## 3              3 ADITYA BAJAJ                               MI          6.0
## 4              4 PATRICK H SCHILLING                        MI          5.5
## 5              5 HANSHI ZUO                                 MI          5.5
## 6              6 HANSEN SONG                                OH          5.0
## 7              7 GARY DEE SWATHELL                          MI          5.0
## 8              8 EZEKIEL HOUGHTON                           MI          5.0
## 9              9 STEFANO LEE                                ON          5.0
## 10            10 ANVIT RAO                                  MI          5.0
## 11            11 CAMERON WILLIAM MC LEMAN                   MI          4.5
## 12            12 KENNETH J TACK                             MI          4.5
## 13            13 TORRANCE HENRY JR                          MI          4.5
## 14            14 BRADLEY SHAW                               MI          4.5
## 15            15 ZACHARY JAMES HOUGHTON                     MI          4.5
## 16            16 MIKE NIKITIN                               MI          4.0
## 17            17 RONALD GRZEGORCZYK                         MI          4.0
## 18            18 DAVID SUNDEEN                              MI          4.0
## 19            19 DIPANKAR ROY                               MI          4.0
## 20            20 JASON ZHENG                                MI          4.0
## 21            21 DINH DANG BUI                              ON          4.0
## 22            22 EUGENE L MCCLURE                           MI          4.0
## 23            23 ALAN BUI                                   ON          4.0
## 24            24 MICHAEL R ALDRICH                          MI          4.0
## 25            25 LOREN SCHWIEBERT                           MI          3.5
## 26            26 MAX ZHU                                    ON          3.5
## 27            27 GAURAV GIDWANI                             MI          3.5
## 28            28 SOFIA ADINA STANESCU-BELLU                 MI          3.5
## 29            29 CHIEDOZIE OKORIE                           MI          3.5
## 30            30 GEORGE AVERY JONES                         ON          3.5
## 31            31 RISHI SHETTY                               MI          3.5
## 32            32 JOSHUA PHILIP MATHEWS                      ON          3.5
## 33            33 JADE GE                                    MI          3.5
## 34            34 MICHAEL JEFFERY THOMAS                     MI          3.5
## 35            35 JOSHUA DAVID LEE                           MI          3.5
## 36            36 SIDDHARTH JHA                              MI          3.5
## 37            37 AMIYATOSH PWNANANDAM                       MI          3.5
## 38            38 BRIAN LIU                                  MI          3.0
## 39            39 JOEL R HENDON                              MI          3.0
## 40            40 FOREST ZHANG                               MI          3.0
## 41            41 KYLE WILLIAM MURPHY                        MI          3.0
## 42            42 JARED GE                                   MI          3.0
## 43            43 ROBERT GLEN VASEY                          MI          3.0
## 44            44 JUSTIN D SCHILLING                         MI          3.0
## 45            45 DEREK YAN                                  MI          3.0
## 46            46 JACOB ALEXANDER LAVALLEY                   MI          3.0
## 47            47 ERIC WRIGHT                                MI          2.5
## 48            48 DANIEL KHAIN                               MI          2.5
## 49            49 MICHAEL J MARTIN                           MI          2.5
## 50            50 SHIVAM JHA                                 MI          2.5
## 51            51 TEJAS AYYAGARI                             MI          2.5
## 52            52 ETHAN GUO                                  MI          2.5
## 53            53 JOSE C YBARRA                              MI          2.0
## 54            54 LARRY HODGE                                MI          2.0
## 55            55 ALEX KONG                                  MI          2.0
## 56            56 MARISA RICCI                               MI          2.0
## 57            57 MICHAEL LU                                 MI          2.0
## 58            58 VIRAJ MOHILE                               MI          2.0
## 59            59 SEAN M MC CORMICK                          MI          2.0
## 60            60 JULIA SHEN                                 MI          1.5
## 61            61 JEZZEL FARKAS                              ON          1.5
## 62            62 ASHWIN BALAJI                              MI          1.0
## 63            63 THOMAS JOSEPH HOSMER                       MI          1.0
## 64            64 BEN LI                                     MI          1.0
##    player_pre_rating
## 1               1794
## 2               1553
## 3               1384
## 4               1716
## 5               1655
## 6               1686
## 7               1649
## 8               1641
## 9               1411
## 10              1365
## 11              1712
## 12              1663
## 13              1666
## 14              1610
## 15              1220
## 16              1604
## 17              1629
## 18              1600
## 19              1564
## 20              1595
## 21              1563
## 22              1555
## 23              1363
## 24              1229
## 25              1745
## 26              1579
## 27              1552
## 28              1507
## 29              1602
## 30              1522
## 31              1494
## 32              1441
## 33              1449
## 34              1399
## 35              1438
## 36              1355
## 37               980
## 38              1423
## 39              1436
## 40              1348
## 41              1403
## 42              1332
## 43              1283
## 44              1199
## 45              1242
## 46               377
## 47              1362
## 48              1382
## 49              1291
## 50              1056
## 51              1011
## 52               935
## 53              1393
## 54              1270
## 55              1186
## 56              1153
## 57              1092
## 58               917
## 59               853
## 60               967
## 61               955
## 62              1530
## 63              1175
## 64              1163
tournament_data <- tournament_data[-c(1:5),]
head(tournament_data)
##    player_number                      player_name player_state total_points
## 6              6 HANSEN SONG                                OH          5.0
## 7              7 GARY DEE SWATHELL                          MI          5.0
## 8              8 EZEKIEL HOUGHTON                           MI          5.0
## 9              9 STEFANO LEE                                ON          5.0
## 10            10 ANVIT RAO                                  MI          5.0
## 11            11 CAMERON WILLIAM MC LEMAN                   MI          4.5
##    player_pre_rating
## 6               1686
## 7               1649
## 8               1641
## 9               1411
## 10              1365
## 11              1712
opponent_rating <- integer(length(player_number)) 

for (i in 1:length(player_number)) {
  opponents_indices <- as.numeric(unlist(player_number[player_number[i]]))
  opponent_rating[i] <- round(mean(player_pre_rating[opponents_indices]), digits = 0)
}

chess_tournament_data <- data.frame(player_number, player_name, player_state, total_points, player_pre_rating)
write.csv(chess_tournament_data,file = "chess_tournament_data.csv")