library(stringr)
chess_data <- read.table(url("https://raw.githubusercontent.com/Sizzlo/chessratings/master/tournamentinfo.txt"), sep = ",")

#ignore the first five rows of the data
chess_data <- chess_data[c(5:nrow(chess_data)),]

#extract the id of the players
id_pattern <- "\\d{1,2}(?=\\s\\|)"
chess_id <- unlist(str_extract_all(chess_data, id_pattern))
chess_id
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
## [15] "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28"
## [29] "29" "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42"
## [43] "43" "44" "45" "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56"
## [57] "57" "58" "59" "60" "61" "62" "63" "64"
#extract the player's names 
name_pattern <- "([[:upper:]]+\\s){2,}"
chess_names <- unlist(str_extract_all(chess_data, name_pattern))
chess_names
##  [1] "GARY HUA "                 "DAKSHESH DARURI "         
##  [3] "ADITYA BAJAJ "             "PATRICK H SCHILLING "     
##  [5] "HANSHI ZUO "               "HANSEN SONG "             
##  [7] "GARY DEE SWATHELL "        "EZEKIEL HOUGHTON "        
##  [9] "STEFANO LEE "              "ANVIT RAO "               
## [11] "CAMERON WILLIAM MC LEMAN " "KENNETH J TACK "          
## [13] "TORRANCE HENRY JR "        "BRADLEY SHAW "            
## [15] "ZACHARY JAMES HOUGHTON "   "MIKE NIKITIN "            
## [17] "RONALD GRZEGORCZYK "       "DAVID SUNDEEN "           
## [19] "DIPANKAR ROY "             "JASON ZHENG "             
## [21] "DINH DANG BUI "            "EUGENE L MCCLURE "        
## [23] "ALAN BUI "                 "MICHAEL R ALDRICH "       
## [25] "LOREN SCHWIEBERT "         "MAX ZHU "                 
## [27] "GAURAV GIDWANI "           "SOFIA ADINA "             
## [29] "CHIEDOZIE OKORIE "         "GEORGE AVERY JONES "      
## [31] "RISHI SHETTY "             "JOSHUA PHILIP MATHEWS "   
## [33] "JADE GE "                  "MICHAEL JEFFERY THOMAS "  
## [35] "JOSHUA DAVID LEE "         "SIDDHARTH JHA "           
## [37] "AMIYATOSH PWNANANDAM "     "BRIAN LIU "               
## [39] "JOEL R HENDON "            "FOREST ZHANG "            
## [41] "KYLE WILLIAM MURPHY "      "JARED GE "                
## [43] "ROBERT GLEN VASEY "        "JUSTIN D SCHILLING "      
## [45] "DEREK YAN "                "JACOB ALEXANDER LAVALLEY "
## [47] "ERIC WRIGHT "              "DANIEL KHAIN "            
## [49] "MICHAEL J MARTIN "         "SHIVAM JHA "              
## [51] "TEJAS AYYAGARI "           "ETHAN GUO "               
## [53] "JOSE C YBARRA "            "LARRY HODGE "             
## [55] "ALEX KONG "                "MARISA RICCI "            
## [57] "MICHAEL LU "               "VIRAJ MOHILE "            
## [59] "SEAN M MC CORMICK "        "JULIA SHEN "              
## [61] "JEZZEL FARKAS "            "ASHWIN BALAJI "           
## [63] "THOMAS JOSEPH HOSMER "     "BEN LI "
#extract the player's states
state_pattern <- "([[:upper:]]){2}\\s(?=\\|)"
chess_states <- unlist(str_extract_all(chess_data, state_pattern))
chess_states
##  [1] "ON " "MI " "MI " "MI " "MI " "OH " "MI " "MI " "ON " "MI " "MI "
## [12] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "ON " "MI "
## [23] "ON " "MI " "MI " "ON " "MI " "MI " "MI " "ON " "MI " "ON " "MI "
## [34] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI "
## [45] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI "
## [56] "MI " "MI " "MI " "MI " "MI " "ON " "MI " "MI " "MI "
#extract the points
chess_points<-"(\\d\\.\\d)"
chess_points<-unlist(str_extract_all(chess_data, chess_points))
chess_points
##  [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5"
## [12] "4.5" "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [23] "4.0" "4.0" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [34] "3.5" "3.5" "3.5" "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0"
## [45] "3.0" "3.0" "2.5" "2.5" "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0"
## [56] "2.0" "2.0" "2.0" "2.0" "1.5" "1.5" "1.0" "1.0" "1.0"
#extract player's preratings
pre_ratings <- "(/ R: ([:space:]{0,2})([[:digit:]]{1,4}))"
chess_preratings <- unlist(str_extract_all(chess_data, pre_ratings))
chess_preratings
##  [1] "/ R: 1794" "/ R: 1553" "/ R: 1384" "/ R: 1716" "/ R: 1655"
##  [6] "/ R: 1686" "/ R: 1649" "/ R: 1641" "/ R: 1411" "/ R: 1365"
## [11] "/ R: 1712" "/ R: 1663" "/ R: 1666" "/ R: 1610" "/ R: 1220"
## [16] "/ R: 1604" "/ R: 1629" "/ R: 1600" "/ R: 1564" "/ R: 1595"
## [21] "/ R: 1563" "/ R: 1555" "/ R: 1363" "/ R: 1229" "/ R: 1745"
## [26] "/ R: 1579" "/ R: 1552" "/ R: 1507" "/ R: 1602" "/ R: 1522"
## [31] "/ R: 1494" "/ R: 1441" "/ R: 1449" "/ R: 1399" "/ R: 1438"
## [36] "/ R: 1355" "/ R:  980" "/ R: 1423" "/ R: 1436" "/ R: 1348"
## [41] "/ R: 1403" "/ R: 1332" "/ R: 1283" "/ R: 1199" "/ R: 1242"
## [46] "/ R:  377" "/ R: 1362" "/ R: 1382" "/ R: 1291" "/ R: 1056"
## [51] "/ R: 1011" "/ R:  935" "/ R: 1393" "/ R: 1270" "/ R: 1186"
## [56] "/ R: 1153" "/ R: 1092" "/ R:  917" "/ R:  853" "/ R:  967"
## [61] "/ R:  955" "/ R: 1530" "/ R: 1175" "/ R: 1163"
chess_preratings2<-unlist(str_extract_all(chess_preratings, "([[:digit:]]{1,4})"))
chess_preratings2
##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"
#extract the average of opponent's preratings
opponents_pattern <- "(\\d{1,}|[[:blank:]]{1})(?=\\|)"
player_opponents <- unlist(str_extract_all(unlist(chess_data), opponents_pattern))
player_opponents[player_opponents==" "]  <- NA
opponent1 <- as.numeric(player_opponents[seq(4, length(player_opponents), 10)])
opponent1 <- as.numeric(opponent1[seq(1, length(opponent1), 2)])
opponent2 <- as.numeric(player_opponents[seq(5, length(player_opponents), 10)])
opponent2 <- as.numeric(opponent2[seq(1, length(opponent2), 2)])
opponent3 <- as.numeric(player_opponents[seq(6, length(player_opponents), 10)])
opponent3 <- as.numeric(opponent3[seq(1, length(opponent3), 2)])
opponent4 <- as.numeric(player_opponents[seq(7, length(player_opponents), 10)])
opponent4 <- as.numeric(opponent4[seq(1, length(opponent4), 2)])
opponent5 <- as.numeric(player_opponents[seq(8, length(player_opponents), 10)])
opponent5 <- as.numeric(opponent5[seq(1, length(opponent5), 2)])
opponent6 <- as.numeric(player_opponents[seq(9, length(player_opponents), 10)])
opponent6 <- as.numeric(opponent6[seq(1, length(opponent6), 2)])
opponent7 <- as.numeric(player_opponents[seq(10, length(player_opponents), 10)])
opponent7 <- as.numeric(opponent7[seq(1, length(opponent7), 2)])
player_opponents <- matrix(c(opponent1, opponent2, opponent3, opponent4, opponent5, opponent6, opponent7),nrow = 64, ncol = 7)

#Match opponent with player's ID
chess_avg <- 0
chess_table <- 0
for (i in 1:(length(chess_id)))
{
  chess_avg[i] <- mean(as.numeric(chess_preratings2[player_opponents[i,]]), na.rm = T)
}

#creating the table with all the data
chess_table <- data.frame(chess_names, chess_states, chess_points, chess_preratings2, chess_avg)
chess_table 
##                  chess_names chess_states chess_points chess_preratings2
## 1                  GARY HUA           ON           6.0              1794
## 2           DAKSHESH DARURI           MI           6.0              1553
## 3              ADITYA BAJAJ           MI           6.0              1384
## 4       PATRICK H SCHILLING           MI           5.5              1716
## 5                HANSHI ZUO           MI           5.5              1655
## 6               HANSEN SONG           OH           5.0              1686
## 7         GARY DEE SWATHELL           MI           5.0              1649
## 8          EZEKIEL HOUGHTON           MI           5.0              1641
## 9               STEFANO LEE           ON           5.0              1411
## 10                ANVIT RAO           MI           5.0              1365
## 11 CAMERON WILLIAM MC LEMAN           MI           4.5              1712
## 12           KENNETH J TACK           MI           4.5              1663
## 13        TORRANCE HENRY JR           MI           4.5              1666
## 14             BRADLEY SHAW           MI           4.5              1610
## 15   ZACHARY JAMES HOUGHTON           MI           4.5              1220
## 16             MIKE NIKITIN           MI           4.0              1604
## 17       RONALD GRZEGORCZYK           MI           4.0              1629
## 18            DAVID SUNDEEN           MI           4.0              1600
## 19             DIPANKAR ROY           MI           4.0              1564
## 20              JASON ZHENG           MI           4.0              1595
## 21            DINH DANG BUI           ON           4.0              1563
## 22         EUGENE L MCCLURE           MI           4.0              1555
## 23                 ALAN BUI           ON           4.0              1363
## 24        MICHAEL R ALDRICH           MI           4.0              1229
## 25         LOREN SCHWIEBERT           MI           3.5              1745
## 26                  MAX ZHU           ON           3.5              1579
## 27           GAURAV GIDWANI           MI           3.5              1552
## 28              SOFIA ADINA           MI           3.5              1507
## 29         CHIEDOZIE OKORIE           MI           3.5              1602
## 30       GEORGE AVERY JONES           ON           3.5              1522
## 31             RISHI SHETTY           MI           3.5              1494
## 32    JOSHUA PHILIP MATHEWS           ON           3.5              1441
## 33                  JADE GE           MI           3.5              1449
## 34   MICHAEL JEFFERY THOMAS           MI           3.5              1399
## 35         JOSHUA DAVID LEE           MI           3.5              1438
## 36            SIDDHARTH JHA           MI           3.5              1355
## 37     AMIYATOSH PWNANANDAM           MI           3.5               980
## 38                BRIAN LIU           MI           3.0              1423
## 39            JOEL R HENDON           MI           3.0              1436
## 40             FOREST ZHANG           MI           3.0              1348
## 41      KYLE WILLIAM MURPHY           MI           3.0              1403
## 42                 JARED GE           MI           3.0              1332
## 43        ROBERT GLEN VASEY           MI           3.0              1283
## 44       JUSTIN D SCHILLING           MI           3.0              1199
## 45                DEREK YAN           MI           3.0              1242
## 46 JACOB ALEXANDER LAVALLEY           MI           3.0               377
## 47              ERIC WRIGHT           MI           2.5              1362
## 48             DANIEL KHAIN           MI           2.5              1382
## 49         MICHAEL J MARTIN           MI           2.5              1291
## 50               SHIVAM JHA           MI           2.5              1056
## 51           TEJAS AYYAGARI           MI           2.5              1011
## 52                ETHAN GUO           MI           2.5               935
## 53            JOSE C YBARRA           MI           2.0              1393
## 54              LARRY HODGE           MI           2.0              1270
## 55                ALEX KONG           MI           2.0              1186
## 56             MARISA RICCI           MI           2.0              1153
## 57               MICHAEL LU           MI           2.0              1092
## 58             VIRAJ MOHILE           MI           2.0               917
## 59        SEAN M MC CORMICK           MI           2.0               853
## 60               JULIA SHEN           MI           1.5               967
## 61            JEZZEL FARKAS           ON           1.5               955
## 62            ASHWIN BALAJI           MI           1.0              1530
## 63     THOMAS JOSEPH HOSMER           MI           1.0              1175
## 64                   BEN LI           MI           1.0              1163
##    chess_avg
## 1   1605.286
## 2   1469.286
## 3   1563.571
## 4   1573.571
## 5   1500.857
## 6   1518.714
## 7   1372.143
## 8   1468.429
## 9   1523.143
## 10  1554.143
## 11  1467.571
## 12  1506.167
## 13  1497.857
## 14  1515.000
## 15  1483.857
## 16  1385.800
## 17  1498.571
## 18  1480.000
## 19  1426.286
## 20  1410.857
## 21  1470.429
## 22  1300.333
## 23  1213.857
## 24  1357.000
## 25  1363.286
## 26  1506.857
## 27  1221.667
## 28  1522.143
## 29  1313.500
## 30  1144.143
## 31  1259.857
## 32  1378.714
## 33  1276.857
## 34  1375.286
## 35  1149.714
## 36  1388.167
## 37  1384.800
## 38  1539.167
## 39  1429.571
## 40  1390.571
## 41  1248.500
## 42  1149.857
## 43  1106.571
## 44  1327.000
## 45  1152.000
## 46  1357.714
## 47  1392.000
## 48  1355.800
## 49  1285.800
## 50  1296.000
## 51  1356.143
## 52  1494.571
## 53  1345.333
## 54  1206.167
## 55  1406.000
## 56  1414.400
## 57  1363.000
## 58  1391.000
## 59  1319.000
## 60  1330.200
## 61  1327.286
## 62  1186.000
## 63  1350.200
## 64  1263.000
#write table to .csv file
write.csv(chess_table, file = "chessrankings.csv")