library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(readr)
library(knitr)
link <- 'https://raw.githubusercontent.com/jonburns2454/DATA607/main/project%201%20data.txt'
chess_df <- read.csv(file = link, col.names = F)
head(chess_df)
## FALSE.
## 1 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 2 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 3 -----------------------------------------------------------------------------------------
## 4 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 5 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 6 -----------------------------------------------------------------------------------------
tournament_data <- matrix(unlist(chess_df), byrow = T)
player_data <- tournament_data[seq(4, length(tournament_data), 3)]
head(player_data)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
game_data <- tournament_data[seq(5, length(tournament_data), 3)]
head(game_data)
## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
The following data is extracted using regex, with the goal of adding it into a dataframe
#ID
ID <- unlist(as.numeric(str_extract(player_data, '\\d+')))
head(ID)
## [1] 1 2 3 4 5 6
#Names
Names <- unlist(str_extract_all(player_data, "\\D+\\w+[[:space:]]\\w+([[:space:]]\\w+)*", simplify = T))
Names <- gsub('[^[:alnum:] ]', '', Names)#Removed the "|" from the front of the names
Names <- Names[!apply(Names == "", 1, all),]
head(Names)
## [1] " GARY HUA" " DAKSHESH DARURI" " ADITYA BAJAJ"
## [4] " PATRICK H SCHILLING" " HANSHI ZUO" " HANSEN SONG"
#State data
States <- unlist(str_extract_all(game_data, "[[:upper:]]{2}(?=\\s\\|)"))
head(States)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
#Total Points
Total_Points <- unlist(str_extract_all(player_data, "\\d\\.\\d"))
head(Total_Points)
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0"
#Pre-rating
Pre_Ratings <- unlist(str_extract_all(game_data, "[R:]([[:space:]]+\\d+)"))
Pre_Ratings <- gsub('[^[:alnum:] ]', '', Pre_Ratings) #Removes : from the vector
#Rounds
Round <- unlist(str_extract_all(player_data, "[W|L|D]\\s{2,}\\d+", simplify = T))
Round <- str_remove_all(Round, "W|L|D")
num_rows <- length(Round) %/% 7
opponent_matrix <- matrix(Round, nrow = num_rows, byrow = F)
head(opponent_matrix)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] " 39" " 21" " 18" " 14" " 7" " 12" " 4"
## [2,] " 63" " 58" " 4" " 17" " 16" " 20" " 7"
## [3,] " 8" " 61" " 25" " 21" " 11" " 13" " 12"
## [4,] " 23" " 28" " 2" " 26" " 5" " 19" " 1"
## [5,] " 45" " 37" " 12" " 13" " 4" " 14" " 17"
## [6,] " 34" " 29" " 11" " 35" " 10" " 27" " 21"
PreChessRating loop
OppRating <- data.frame() # Create an empty data frame
Pre_Ratings <- as.numeric(Pre_Ratings)
for (row in 1:nrow(opponent_matrix)) {
nOpps <- 0
sumOpps <- 0
for (col in 1:ncol(opponent_matrix)) {
if(opponent_matrix[row, col] != "") {
opp.index <- as.numeric(opponent_matrix[row, col])
sumOpps <- sumOpps + Pre_Ratings[opp.index]
nOpps <- nOpps + 1
}
}
# Check if nOpps is greater than 0
avg <- if (nOpps > 0) sumOpps/nOpps else NA
# put the results into a df
OppRating <- rbind(OppRating, data.frame(avg))
}
colnames(OppRating) <- c('OppAvg')
print(OppRating)
## OppAvg
## 1 1605.286
## 2 1469.286
## 3 1563.571
## 4 1573.571
## 5 1500.857
## 6 1518.714
## 7 1372.143
## 8 1468.429
## 9 1523.143
## 10 1554.143
## 11 1467.571
## 12 1506.167
## 13 1497.857
## 14 1515.000
## 15 1483.857
## 16 1385.800
## 17 1498.571
## 18 1480.000
## 19 1426.286
## 20 1410.857
## 21 1470.429
## 22 1300.333
## 23 1213.857
## 24 1357.000
## 25 1363.286
## 26 1506.857
## 27 1221.667
## 28 1522.143
## 29 1313.500
## 30 1144.143
## 31 1259.857
## 32 1378.714
## 33 1276.857
## 34 1375.286
## 35 1149.714
## 36 1388.167
## 37 1384.800
## 38 1539.167
## 39 1429.571
## 40 1390.571
## 41 1248.500
## 42 1149.857
## 43 1106.571
## 44 1327.000
## 45 1152.000
## 46 1357.714
## 47 1392.000
## 48 1355.800
## 49 1285.800
## 50 1296.000
## 51 1356.143
## 52 1494.571
## 53 1345.333
## 54 1206.167
## 55 1406.000
## 56 1414.400
## 57 1363.000
## 58 1391.000
## 59 1319.000
## 60 1330.200
## 61 1327.286
## 62 1186.000
## 63 1350.200
## 64 1263.000
Create the final dataframe and table for presentation
Tournament_Results <- data.frame(Names, States, Total_Points, Pre_Ratings, OppRating)
write.csv(Tournament_Results, 'tournament.csv', append = F)
## Warning in write.csv(Tournament_Results, "tournament.csv", append = F): attempt
## to set 'append' ignored
kable(Tournament_Results, format = "html", align = "clcccc", digits = 0)
| Names | States | Total_Points | Pre_Ratings | OppAvg |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |
| GARY DEE SWATHELL | MI | 5.0 | 1649 | 1372 |
| EZEKIEL HOUGHTON | MI | 5.0 | 1641 | 1468 |
| STEFANO LEE | ON | 5.0 | 1411 | 1523 |
| ANVIT RAO | MI | 5.0 | 1365 | 1554 |
| CAMERON WILLIAM MC LEMAN | MI | 4.5 | 1712 | 1468 |
| KENNETH J TACK | MI | 4.5 | 1663 | 1506 |
| TORRANCE HENRY JR | MI | 4.5 | 1666 | 1498 |
| BRADLEY SHAW | MI | 4.5 | 1610 | 1515 |
| ZACHARY JAMES HOUGHTON | MI | 4.5 | 1220 | 1484 |
| MIKE NIKITIN | MI | 4.0 | 1604 | 1386 |
| RONALD GRZEGORCZYK | MI | 4.0 | 1629 | 1499 |
| DAVID SUNDEEN | MI | 4.0 | 1600 | 1480 |
| DIPANKAR ROY | MI | 4.0 | 1564 | 1426 |
| JASON ZHENG | MI | 4.0 | 1595 | 1411 |
| DINH DANG BUI | ON | 4.0 | 1563 | 1470 |
| EUGENE L MCCLURE | MI | 4.0 | 1555 | 1300 |
| ALAN BUI | ON | 4.0 | 1363 | 1214 |
| MICHAEL R ALDRICH | MI | 4.0 | 1229 | 1357 |
| LOREN SCHWIEBERT | MI | 3.5 | 1745 | 1363 |
| MAX ZHU | ON | 3.5 | 1579 | 1507 |
| GAURAV GIDWANI | MI | 3.5 | 1552 | 1222 |
| SOFIA ADINA STANESCU | MI | 3.5 | 1507 | 1522 |
| CHIEDOZIE OKORIE | MI | 3.5 | 1602 | 1314 |
| GEORGE AVERY JONES | ON | 3.5 | 1522 | 1144 |
| RISHI SHETTY | MI | 3.5 | 1494 | 1260 |
| JOSHUA PHILIP MATHEWS | ON | 3.5 | 1441 | 1379 |
| JADE GE | MI | 3.5 | 1449 | 1277 |
| MICHAEL JEFFERY THOMAS | MI | 3.5 | 1399 | 1375 |
| JOSHUA DAVID LEE | MI | 3.5 | 1438 | 1150 |
| SIDDHARTH JHA | MI | 3.5 | 1355 | 1388 |
| AMIYATOSH PWNANANDAM | MI | 3.5 | 980 | 1385 |
| BRIAN LIU | MI | 3.0 | 1423 | 1539 |
| JOEL R HENDON | MI | 3.0 | 1436 | 1430 |
| FOREST ZHANG | MI | 3.0 | 1348 | 1391 |
| KYLE WILLIAM MURPHY | MI | 3.0 | 1403 | 1248 |
| JARED GE | MI | 3.0 | 1332 | 1150 |
| ROBERT GLEN VASEY | MI | 3.0 | 1283 | 1107 |
| JUSTIN D SCHILLING | MI | 3.0 | 1199 | 1327 |
| DEREK YAN | MI | 3.0 | 1242 | 1152 |
| JACOB ALEXANDER LAVALLEY | MI | 3.0 | 377 | 1358 |
| ERIC WRIGHT | MI | 2.5 | 1362 | 1392 |
| DANIEL KHAIN | MI | 2.5 | 1382 | 1356 |
| MICHAEL J MARTIN | MI | 2.5 | 1291 | 1286 |
| SHIVAM JHA | MI | 2.5 | 1056 | 1296 |
| TEJAS AYYAGARI | MI | 2.5 | 1011 | 1356 |
| ETHAN GUO | MI | 2.5 | 935 | 1495 |
| JOSE C YBARRA | MI | 2.0 | 1393 | 1345 |
| LARRY HODGE | MI | 2.0 | 1270 | 1206 |
| ALEX KONG | MI | 2.0 | 1186 | 1406 |
| MARISA RICCI | MI | 2.0 | 1153 | 1414 |
| MICHAEL LU | MI | 2.0 | 1092 | 1363 |
| VIRAJ MOHILE | MI | 2.0 | 917 | 1391 |
| SEAN M MC CORMICK | MI | 2.0 | 853 | 1319 |
| JULIA SHEN | MI | 1.5 | 967 | 1330 |
| JEZZEL FARKAS | ON | 1.5 | 955 | 1327 |
| ASHWIN BALAJI | MI | 1.0 | 1530 | 1186 |
| THOMAS JOSEPH HOSMER | MI | 1.0 | 1175 | 1350 |
| BEN LI | MI | 1.0 | 1163 | 1263 |