Load Data / Import Data into R

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(dplyr)
library (stringr)



chess_tournament <- read.csv(paste0("/Users/blessinga/Desktop/chess.txt"), header=F)

head(chess_tournament)
##                                                                                           V1
## 1  -----------------------------------------------------------------------------------------
## 2  Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| 
## 3  Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | 
## 4  -----------------------------------------------------------------------------------------
## 5      1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 6     ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
summary(chess_tournament) 
##       V1           
##  Length:196        
##  Class :character  
##  Mode  :character

Part 2: Remove and Divide Sequence

#Remove Uneeded Rows (heading)
chess_tournament2 <- chess_tournament[-c(1:3),]

#Pull out rows : (3n+2) and (3n+3)
n <- length(chess_tournament2)
row1 <- chess_tournament2[seq(2, n, 3)]
row2 <- chess_tournament2[seq(3, n, 3)]

Extract Data w/ Expressions

library(stringr)
Plr_Num <- as.integer(str_extract(row1, "\\d+"))
Plr_Name <- str_trim(str_extract(row1, "(\\w+\\s){2,3}"))
Plr_State <- str_extract(row2, "\\w+")
Plr_Points <- as.numeric(str_extract(row1, "\\d+\\.\\d+"))
Plr_PreRating <- as.integer(str_extract(str_extract(row2, "[^\\d]\\d{3,4}[^\\d]"), "\\d+"))
Opponents <- str_extract_all(str_extract_all(row1, "\\d+\\|"), "\\d+")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing

Calculate Opponent Average Scores

O_PreRating <- numeric(n / 3)

for (i in 1:(n / 3)) { 
  O_PreRating[i] <- mean(Plr_PreRating[as.numeric(unlist(Opponents[Plr_Num[i]]))]) 
}

Create Data Frame

csv <- data.frame(Plr_Name, Plr_State, Plr_Points, Plr_PreRating, O_PreRating); csv
##                    Plr_Name Plr_State Plr_Points Plr_PreRating O_PreRating
## 1                  GARY HUA        ON        6.0          1794    1605.286
## 2           DAKSHESH DARURI        MI        6.0          1553    1469.286
## 3              ADITYA BAJAJ        MI        6.0          1384    1563.571
## 4       PATRICK H SCHILLING        MI        5.5          1716    1573.571
## 5                HANSHI ZUO        MI        5.5          1655    1500.857
## 6               HANSEN SONG        OH        5.0          1686    1518.714
## 7         GARY DEE SWATHELL        MI        5.0          1649    1372.143
## 8          EZEKIEL HOUGHTON        MI        5.0          1641    1468.429
## 9               STEFANO LEE        ON        5.0          1411    1523.143
## 10                ANVIT RAO        MI        5.0          1365    1554.143
## 11       CAMERON WILLIAM MC        MI        4.5          1712    1467.571
## 12           KENNETH J TACK        MI        4.5          1663    1506.167
## 13        TORRANCE HENRY JR        MI        4.5          1666    1497.857
## 14             BRADLEY SHAW        MI        4.5          1610    1515.000
## 15   ZACHARY JAMES HOUGHTON        MI        4.5          1220    1483.857
## 16             MIKE NIKITIN        MI        4.0          1604    1385.800
## 17       RONALD GRZEGORCZYK        MI        4.0          1629    1498.571
## 18            DAVID SUNDEEN        MI        4.0          1600    1480.000
## 19             DIPANKAR ROY        MI        4.0          1564    1426.286
## 20              JASON ZHENG        MI        4.0          1595    1410.857
## 21            DINH DANG BUI        ON        4.0          1563    1470.429
## 22         EUGENE L MCCLURE        MI        4.0          1555    1300.333
## 23                 ALAN BUI        ON        4.0          1363    1213.857
## 24        MICHAEL R ALDRICH        MI        4.0          1229    1357.000
## 25         LOREN SCHWIEBERT        MI        3.5          1745    1363.286
## 26                  MAX ZHU        ON        3.5          1579    1506.857
## 27           GAURAV GIDWANI        MI        3.5          1552    1221.667
## 28              SOFIA ADINA        MI        3.5          1507    1522.143
## 29         CHIEDOZIE OKORIE        MI        3.5          1602    1313.500
## 30       GEORGE AVERY JONES        ON        3.5          1522    1144.143
## 31             RISHI SHETTY        MI        3.5          1494    1259.857
## 32    JOSHUA PHILIP MATHEWS        ON        3.5          1441    1378.714
## 33                  JADE GE        MI        3.5          1449    1276.857
## 34   MICHAEL JEFFERY THOMAS        MI        3.5          1399    1375.286
## 35         JOSHUA DAVID LEE        MI        3.5          1438    1149.714
## 36            SIDDHARTH JHA        MI        3.5          1355    1388.167
## 37     AMIYATOSH PWNANANDAM        MI        3.5           980    1384.800
## 38                BRIAN LIU        MI        3.0          1423    1539.167
## 39            JOEL R HENDON        MI        3.0          1436    1429.571
## 40             FOREST ZHANG        MI        3.0          1348    1390.571
## 41      KYLE WILLIAM MURPHY        MI        3.0          1403    1248.500
## 42                 JARED GE        MI        3.0          1332    1149.857
## 43        ROBERT GLEN VASEY        MI        3.0          1283    1106.571
## 44       JUSTIN D SCHILLING        MI        3.0          1199    1327.000
## 45                DEREK YAN        MI        3.0          1242    1152.000
## 46 JACOB ALEXANDER LAVALLEY        MI        3.0           377    1357.714
## 47              ERIC WRIGHT        MI        2.5          1362    1392.000
## 48             DANIEL KHAIN        MI        2.5          1382    1355.800
## 49         MICHAEL J MARTIN        MI        2.5          1291    1285.800
## 50               SHIVAM JHA        MI        2.5          1056    1296.000
## 51           TEJAS AYYAGARI        MI        2.5          1011    1356.143
## 52                ETHAN GUO        MI        2.5           935    1494.571
## 53            JOSE C YBARRA        MI        2.0          1393    1345.333
## 54              LARRY HODGE        MI        2.0          1270    1206.167
## 55                ALEX KONG        MI        2.0          1186    1406.000
## 56             MARISA RICCI        MI        2.0          1153    1414.400
## 57               MICHAEL LU        MI        2.0          1092    1363.000
## 58             VIRAJ MOHILE        MI        2.0           917    1391.000
## 59                SEAN M MC        MI        2.0           853    1319.000
## 60               JULIA SHEN        MI        1.5           967    1330.200
## 61            JEZZEL FARKAS        ON        1.5           955    1327.286
## 62            ASHWIN BALAJI        MI        1.0          1530    1186.000
## 63     THOMAS JOSEPH HOSMER        MI        1.0          1175    1350.200
## 64                   BEN LI        MI        1.0          1163    1263.000

Writing the CVS file

write.table(csv, file = "Project1_607_chessinfo.csv", sep = ",", col.names = T)