library(stringr)
chess_data <- read.table(url("https://raw.githubusercontent.com/Sizzlo/chessratings/master/tournamentinfo.txt"), sep = ",")
#ignore the first five rows of the data
chess_data <- chess_data[c(5:nrow(chess_data)),]
#extract the id of the players
id_pattern <- "\\d{1,2}(?=\\s\\|)"
chess_id <- unlist(str_extract_all(chess_data, id_pattern))
chess_id
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14"
## [15] "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28"
## [29] "29" "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42"
## [43] "43" "44" "45" "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56"
## [57] "57" "58" "59" "60" "61" "62" "63" "64"
#extract the player's names
name_pattern <- "([[:upper:]]+\\s){2,}"
chess_names <- unlist(str_extract_all(chess_data, name_pattern))
chess_names
## [1] "GARY HUA " "DAKSHESH DARURI "
## [3] "ADITYA BAJAJ " "PATRICK H SCHILLING "
## [5] "HANSHI ZUO " "HANSEN SONG "
## [7] "GARY DEE SWATHELL " "EZEKIEL HOUGHTON "
## [9] "STEFANO LEE " "ANVIT RAO "
## [11] "CAMERON WILLIAM MC LEMAN " "KENNETH J TACK "
## [13] "TORRANCE HENRY JR " "BRADLEY SHAW "
## [15] "ZACHARY JAMES HOUGHTON " "MIKE NIKITIN "
## [17] "RONALD GRZEGORCZYK " "DAVID SUNDEEN "
## [19] "DIPANKAR ROY " "JASON ZHENG "
## [21] "DINH DANG BUI " "EUGENE L MCCLURE "
## [23] "ALAN BUI " "MICHAEL R ALDRICH "
## [25] "LOREN SCHWIEBERT " "MAX ZHU "
## [27] "GAURAV GIDWANI " "SOFIA ADINA "
## [29] "CHIEDOZIE OKORIE " "GEORGE AVERY JONES "
## [31] "RISHI SHETTY " "JOSHUA PHILIP MATHEWS "
## [33] "JADE GE " "MICHAEL JEFFERY THOMAS "
## [35] "JOSHUA DAVID LEE " "SIDDHARTH JHA "
## [37] "AMIYATOSH PWNANANDAM " "BRIAN LIU "
## [39] "JOEL R HENDON " "FOREST ZHANG "
## [41] "KYLE WILLIAM MURPHY " "JARED GE "
## [43] "ROBERT GLEN VASEY " "JUSTIN D SCHILLING "
## [45] "DEREK YAN " "JACOB ALEXANDER LAVALLEY "
## [47] "ERIC WRIGHT " "DANIEL KHAIN "
## [49] "MICHAEL J MARTIN " "SHIVAM JHA "
## [51] "TEJAS AYYAGARI " "ETHAN GUO "
## [53] "JOSE C YBARRA " "LARRY HODGE "
## [55] "ALEX KONG " "MARISA RICCI "
## [57] "MICHAEL LU " "VIRAJ MOHILE "
## [59] "SEAN M MC CORMICK " "JULIA SHEN "
## [61] "JEZZEL FARKAS " "ASHWIN BALAJI "
## [63] "THOMAS JOSEPH HOSMER " "BEN LI "
#extract the player's states
state_pattern <- "([[:upper:]]){2}\\s(?=\\|)"
chess_states <- unlist(str_extract_all(chess_data, state_pattern))
chess_states
## [1] "ON " "MI " "MI " "MI " "MI " "OH " "MI " "MI " "ON " "MI " "MI "
## [12] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "ON " "MI "
## [23] "ON " "MI " "MI " "ON " "MI " "MI " "MI " "ON " "MI " "ON " "MI "
## [34] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI "
## [45] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI "
## [56] "MI " "MI " "MI " "MI " "MI " "ON " "MI " "MI " "MI "
#extract the points
chess_points<-"(\\d\\.\\d)"
chess_points<-unlist(str_extract_all(chess_data, chess_points))
chess_points
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5"
## [12] "4.5" "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [23] "4.0" "4.0" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [34] "3.5" "3.5" "3.5" "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0"
## [45] "3.0" "3.0" "2.5" "2.5" "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0"
## [56] "2.0" "2.0" "2.0" "2.0" "1.5" "1.5" "1.0" "1.0" "1.0"
#extract player's preratings
pre_ratings <- "(/ R: ([:space:]{0,2})([[:digit:]]{1,4}))"
chess_preratings <- unlist(str_extract_all(chess_data, pre_ratings))
chess_preratings
## [1] "/ R: 1794" "/ R: 1553" "/ R: 1384" "/ R: 1716" "/ R: 1655"
## [6] "/ R: 1686" "/ R: 1649" "/ R: 1641" "/ R: 1411" "/ R: 1365"
## [11] "/ R: 1712" "/ R: 1663" "/ R: 1666" "/ R: 1610" "/ R: 1220"
## [16] "/ R: 1604" "/ R: 1629" "/ R: 1600" "/ R: 1564" "/ R: 1595"
## [21] "/ R: 1563" "/ R: 1555" "/ R: 1363" "/ R: 1229" "/ R: 1745"
## [26] "/ R: 1579" "/ R: 1552" "/ R: 1507" "/ R: 1602" "/ R: 1522"
## [31] "/ R: 1494" "/ R: 1441" "/ R: 1449" "/ R: 1399" "/ R: 1438"
## [36] "/ R: 1355" "/ R: 980" "/ R: 1423" "/ R: 1436" "/ R: 1348"
## [41] "/ R: 1403" "/ R: 1332" "/ R: 1283" "/ R: 1199" "/ R: 1242"
## [46] "/ R: 377" "/ R: 1362" "/ R: 1382" "/ R: 1291" "/ R: 1056"
## [51] "/ R: 1011" "/ R: 935" "/ R: 1393" "/ R: 1270" "/ R: 1186"
## [56] "/ R: 1153" "/ R: 1092" "/ R: 917" "/ R: 853" "/ R: 967"
## [61] "/ R: 955" "/ R: 1530" "/ R: 1175" "/ R: 1163"
chess_preratings2<-unlist(str_extract_all(chess_preratings, "([[:digit:]]{1,4})"))
chess_preratings2
## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980" "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377" "1362" "1382" "1291" "1056"
## [51] "1011" "935" "1393" "1270" "1186" "1153" "1092" "917" "853" "967"
## [61] "955" "1530" "1175" "1163"
#extract the average of opponent's preratings
opponents_pattern <- "(\\d{1,}|[[:blank:]]{1})(?=\\|)"
player_opponents <- unlist(str_extract_all(unlist(chess_data), opponents_pattern))
player_opponents[player_opponents==" "] <- NA
opponent1 <- as.numeric(player_opponents[seq(4, length(player_opponents), 10)])
opponent1 <- as.numeric(opponent1[seq(1, length(opponent1), 2)])
opponent2 <- as.numeric(player_opponents[seq(5, length(player_opponents), 10)])
opponent2 <- as.numeric(opponent2[seq(1, length(opponent2), 2)])
opponent3 <- as.numeric(player_opponents[seq(6, length(player_opponents), 10)])
opponent3 <- as.numeric(opponent3[seq(1, length(opponent3), 2)])
opponent4 <- as.numeric(player_opponents[seq(7, length(player_opponents), 10)])
opponent4 <- as.numeric(opponent4[seq(1, length(opponent4), 2)])
opponent5 <- as.numeric(player_opponents[seq(8, length(player_opponents), 10)])
opponent5 <- as.numeric(opponent5[seq(1, length(opponent5), 2)])
opponent6 <- as.numeric(player_opponents[seq(9, length(player_opponents), 10)])
opponent6 <- as.numeric(opponent6[seq(1, length(opponent6), 2)])
opponent7 <- as.numeric(player_opponents[seq(10, length(player_opponents), 10)])
opponent7 <- as.numeric(opponent7[seq(1, length(opponent7), 2)])
player_opponents <- matrix(c(opponent1, opponent2, opponent3, opponent4, opponent5, opponent6, opponent7),nrow = 64, ncol = 7)
#Match opponent with player's ID
chess_avg <- 0
chess_table <- 0
for (i in 1:(length(chess_id)))
{
chess_avg[i] <- mean(as.numeric(chess_preratings2[player_opponents[i,]]), na.rm = T)
}
#creating the table with all the data
chess_table <- data.frame(chess_names, chess_states, chess_points, chess_preratings2, chess_avg)
chess_table
## chess_names chess_states chess_points chess_preratings2
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC CORMICK MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
## chess_avg
## 1 1605.286
## 2 1469.286
## 3 1563.571
## 4 1573.571
## 5 1500.857
## 6 1518.714
## 7 1372.143
## 8 1468.429
## 9 1523.143
## 10 1554.143
## 11 1467.571
## 12 1506.167
## 13 1497.857
## 14 1515.000
## 15 1483.857
## 16 1385.800
## 17 1498.571
## 18 1480.000
## 19 1426.286
## 20 1410.857
## 21 1470.429
## 22 1300.333
## 23 1213.857
## 24 1357.000
## 25 1363.286
## 26 1506.857
## 27 1221.667
## 28 1522.143
## 29 1313.500
## 30 1144.143
## 31 1259.857
## 32 1378.714
## 33 1276.857
## 34 1375.286
## 35 1149.714
## 36 1388.167
## 37 1384.800
## 38 1539.167
## 39 1429.571
## 40 1390.571
## 41 1248.500
## 42 1149.857
## 43 1106.571
## 44 1327.000
## 45 1152.000
## 46 1357.714
## 47 1392.000
## 48 1355.800
## 49 1285.800
## 50 1296.000
## 51 1356.143
## 52 1494.571
## 53 1345.333
## 54 1206.167
## 55 1406.000
## 56 1414.400
## 57 1363.000
## 58 1391.000
## 59 1319.000
## 60 1330.200
## 61 1327.286
## 62 1186.000
## 63 1350.200
## 64 1263.000
#write table to .csv file
write.csv(chess_table, file = "chessrankings.csv")