In this project, we’re given a text file with chess tournament results where the information has some structure. Our job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:
Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
For the first player, the information would be:
Gary Hua, ON, 6.0, 1794, 1605
1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
##Load libraries used
library(tidyverse)
library(stringr)
library(ggplot2)
library(readr)
library(kableExtra)
##Read file from Github
chess_tbl = readLines("https://raw.githubusercontent.com/carolc57/Data607-Fall23/main/chess_scores.txt")
## Warning in
## readLines("https://raw.githubusercontent.com/carolc57/Data607-Fall23/main/chess_scores.txt"):
## incomplete final line found on
## 'https://raw.githubusercontent.com/carolc57/Data607-Fall23/main/chess_scores.txt'
head(chess_tbl, 15)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
## [11] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [12] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [13] "-----------------------------------------------------------------------------------------"
## [14] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [15] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
##Remove rows w/ dashes
#remove rows w/ dashes
chess_tbl_mod <- str_replace_all(string = chess_tbl, pattern = "^-+$", "")
head (chess_tbl_mod)
## [1] ""
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] ""
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
#Remove empty vectors
chess_tbl_mod <- chess_tbl[sapply(chess_tbl_mod, nchar) > 0]
head(chess_tbl_mod)
## [1] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [2] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [3] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [4] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [5] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [6] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
##Remove Header rows
chess_tbl_mod <- chess_tbl_mod[-c(1:2)] ##remove rows 1 and 2 because interfere w/ later operations
head(chess_tbl_mod, 16)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [5] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [7] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [8] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [9] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [10] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [11] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
## [12] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
## [13] " 7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|"
## [14] " MI | 11146376 / R: 1649 ->1673 |N:3 |W |B |W |B |B |W |W |"
## [15] " 8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|"
## [16] " MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |"
##Since the data for one competitor is spread between two rows, I’ll need to create two separate dataframe and paste them together
chess_tbl_mod_odd = chess_tbl_mod[seq(1,128,2)]
chess_tbl_mod_even = chess_tbl_mod[seq(2,128,2)]
#The combined data frame
chess_tbl_mod_combined <- paste(chess_tbl_mod_odd, chess_tbl_mod_even)
head(chess_tbl_mod_combined, 16)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7| MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12| MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1| MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17| MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21| OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
## [7] " 7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2| MI | 11146376 / R: 1649 ->1673 |N:3 |W |B |W |B |B |W |W |"
## [8] " 8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19| MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |"
## [9] " 9 | STEFANO LEE |5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20| ON | 14954524 / R: 1411 ->1564 |N:2 |W |B |W |B |W |B |B |"
## [10] " 10 | ANVIT RAO |5.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18| MI | 14150362 / R: 1365 ->1544 |N:3 |W |W |B |B |W |B |W |"
## [11] " 11 | CAMERON WILLIAM MC LEMAN |4.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26| MI | 12581589 / R: 1712 ->1696 |N:3 |B |W |B |W |B |W |B |"
## [12] " 12 | KENNETH J TACK |4.5 |W 42|W 33|D 5|W 38|H |D 1|L 3| MI | 12681257 / R: 1663 ->1670 |N:3 |W |B |W |B | |W |B |"
## [13] " 13 | TORRANCE HENRY JR |4.5 |W 36|W 27|L 7|D 5|W 33|L 3|W 32| MI | 15082995 / R: 1666 ->1662 |N:3 |B |W |B |B |W |W |B |"
## [14] " 14 | BRADLEY SHAW |4.5 |W 54|W 44|W 8|L 1|D 27|L 5|W 31| MI | 10131499 / R: 1610 ->1618 |N:3 |W |B |W |W |B |B |W |"
## [15] " 15 | ZACHARY JAMES HOUGHTON |4.5 |D 19|L 16|W 30|L 22|W 54|W 33|W 38| MI | 15619130 / R: 1220P13->1416P20 |N:3 |B |B |W |W |B |B |W |"
## [16] " 16 | MIKE NIKITIN |4.0 |D 10|W 15|H |W 39|L 2|W 36|U | MI | 10295068 / R: 1604 ->1613 |N:3 |B |W | |B |W |B | |"
#Tidy Player data. Use various str and regex functions to parse data
chess_final <- chess_tbl_mod_combined #change file name to preserve data prior to manipulation
head(chess_final)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7| MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12| MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1| MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17| MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21| OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
PlayerIDTemp <- as.integer(str_extract(chess_final, "\\d+"))
PlayerID <- subset(c(PlayerIDTemp), c(PlayerIDTemp)!="0",64)
Name <- str_extract(string = chess_final, pattern = "\\s([[:alpha:] ]{5,})\\b\\s") #gets name
Player_Points <- str_extract(string = chess_final, pattern = "[0-9]\\.[0-9]") #gets total points for each player
State <- unlist(str_extract_all(chess_final, "\\|[[:space:]]{1,}[[A-Z]]{2} \\|"))
State <- str_replace_all(State, pattern = "(\\|[[:space:]]{1,})|([[:space:]]{1,}\\|)", replacement = "")
Player_rating <- str_extract(string = chess_final, pattern = "\\s\\d{3,4}[^\\d]")
#remove "P" at end of rating for some players
Player_rating <- as.integer(str_extract(Player_rating, "\\d+"))
USCF_id <- str_extract(string = chess_final, pattern ="[[:digit:]]{8}")
#Opponent Data - parse player’s opponets per round
Opp_Id <-str_extract_all(str_extract_all(chess_final, "\\d+\\|"), "\\d+")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
Opp_Id <-subset(c(Opp_Id), c(Opp_Id)!="0")
head(Opp_Id)
## [[1]]
## [1] "39" "21" "18" "14" "7" "12" "4"
##
## [[2]]
## [1] "63" "58" "4" "17" "16" "20" "7"
##
## [[3]]
## [1] "8" "61" "25" "21" "11" "13" "12"
##
## [[4]]
## [1] "23" "28" "2" "26" "5" "19" "1"
##
## [[5]]
## [1] "45" "37" "12" "13" "4" "14" "17"
##
## [[6]]
## [1] "34" "29" "11" "35" "10" "27" "21"
##Function to calculate opponent average ratings
x<-length(chess_tbl_mod)
OppAvgRtg <-numeric(x/2)
for (i in 1:(x/2)) {
OppAvgRtg[i] <- mean(Player_rating[as.numeric(unlist(Opp_Id[PlayerID[i]]))])
}
OppAvgRtg <- round((OppAvgRtg),0)
##chess_ratings dataframe
chess_ratings <- data.frame(Name,State,Player_Points,Player_rating, OppAvgRtg)
##Write output to .csv file
write.csv(chess_ratings, file = "Carols Chess Ratings.csv");
##Chess Ratings Table
kable(chess_ratings) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "center")
| Name | State | Player_Points | Player_rating | OppAvgRtg |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |
| GARY DEE SWATHELL | MI | 5.0 | 1649 | 1372 |
| EZEKIEL HOUGHTON | MI | 5.0 | 1641 | 1468 |
| STEFANO LEE | ON | 5.0 | 1411 | 1523 |
| ANVIT RAO | MI | 5.0 | 1365 | 1554 |
| CAMERON WILLIAM MC LEMAN | MI | 4.5 | 1712 | 1468 |
| KENNETH J TACK | MI | 4.5 | 1663 | 1506 |
| TORRANCE HENRY JR | MI | 4.5 | 1666 | 1498 |
| BRADLEY SHAW | MI | 4.5 | 1610 | 1515 |
| ZACHARY JAMES HOUGHTON | MI | 4.5 | 1220 | 1484 |
| MIKE NIKITIN | MI | 4.0 | 1604 | 1386 |
| RONALD GRZEGORCZYK | MI | 4.0 | 1629 | 1499 |
| DAVID SUNDEEN | MI | 4.0 | 1600 | 1480 |
| DIPANKAR ROY | MI | 4.0 | 1564 | 1426 |
| JASON ZHENG | MI | 4.0 | 1595 | 1411 |
| DINH DANG BUI | ON | 4.0 | 1563 | 1470 |
| EUGENE L MCCLURE | MI | 4.0 | 1555 | 1300 |
| ALAN BUI | ON | 4.0 | 1363 | 1214 |
| MICHAEL R ALDRICH | MI | 4.0 | 1229 | 1357 |
| LOREN SCHWIEBERT | MI | 3.5 | 1745 | 1363 |
| MAX ZHU | ON | 3.5 | 1579 | 1507 |
| GAURAV GIDWANI | MI | 3.5 | 1552 | 1222 |
| SOFIA ADINA | MI | 3.5 | 1507 | 1522 |
| CHIEDOZIE OKORIE | MI | 3.5 | 1602 | 1314 |
| GEORGE AVERY JONES | ON | 3.5 | 1522 | 1144 |
| RISHI SHETTY | MI | 3.5 | 1494 | 1260 |
| JOSHUA PHILIP MATHEWS | ON | 3.5 | 1441 | 1379 |
| JADE GE | MI | 3.5 | 1449 | 1277 |
| MICHAEL JEFFERY THOMAS | MI | 3.5 | 1399 | 1375 |
| JOSHUA DAVID LEE | MI | 3.5 | 1438 | 1150 |
| SIDDHARTH JHA | MI | 3.5 | 1355 | 1388 |
| AMIYATOSH PWNANANDAM | MI | 3.5 | 980 | 1385 |
| BRIAN LIU | MI | 3.0 | 1423 | 1539 |
| JOEL R HENDON | MI | 3.0 | 1436 | 1430 |
| FOREST ZHANG | MI | 3.0 | 1348 | 1391 |
| KYLE WILLIAM MURPHY | MI | 3.0 | 1403 | 1248 |
| JARED GE | MI | 3.0 | 1332 | 1150 |
| ROBERT GLEN VASEY | MI | 3.0 | 1283 | 1107 |
| JUSTIN D SCHILLING | MI | 3.0 | 1199 | 1327 |
| DEREK YAN | MI | 3.0 | 1242 | 1152 |
| JACOB ALEXANDER LAVALLEY | MI | 3.0 | 377 | 1358 |
| ERIC WRIGHT | MI | 2.5 | 1362 | 1392 |
| DANIEL KHAIN | MI | 2.5 | 1382 | 1356 |
| MICHAEL J MARTIN | MI | 2.5 | 1291 | 1286 |
| SHIVAM JHA | MI | 2.5 | 1056 | 1296 |
| TEJAS AYYAGARI | MI | 2.5 | 1011 | 1356 |
| ETHAN GUO | MI | 2.5 | 935 | 1495 |
| JOSE C YBARRA | MI | 2.0 | 1393 | 1345 |
| LARRY HODGE | MI | 2.0 | 1270 | 1206 |
| ALEX KONG | MI | 2.0 | 1186 | 1406 |
| MARISA RICCI | MI | 2.0 | 1153 | 1414 |
| MICHAEL LU | MI | 2.0 | 1092 | 1363 |
| VIRAJ MOHILE | MI | 2.0 | 917 | 1391 |
| SEAN M MC CORMICK | MI | 2.0 | 853 | 1319 |
| JULIA SHEN | MI | 1.5 | 967 | 1330 |
| JEZZEL FARKAS | ON | 1.5 | 955 | 1327 |
| ASHWIN BALAJI | MI | 1.0 | 1530 | 1186 |
| THOMAS JOSEPH HOSMER | MI | 1.0 | 1175 | 1350 |
| BEN LI | MI | 1.0 | 1163 | 1263 |