CUNY SPS DATA607 Project 1

Author: Chinedu Onyeka

Date: September 18th, 2021

Summary

In this project, I used regular expressions to extract player names, their state, total points and prerating. Also, I extracted and computed the average prerating of the individual player’s opponents and write the output to a csv file called tournament.csv

Load the required libraries

library(tidyverse)
library(stringr)

Read the file

url = "https://raw.githubusercontent.com/chinedu2301/DATA607-Data-Acquisition-and-Management/main/tournamentinfo.txt"
text <- read.table(url, sep = ",", skip = 4)
text <- text$V1

head(text, n = 11)
##  [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
##  [2] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##  [3] "-----------------------------------------------------------------------------------------"
##  [4] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
##  [5] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
##  [6] "-----------------------------------------------------------------------------------------"
##  [7] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
##  [8] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##  [9] "-----------------------------------------------------------------------------------------"
## [10] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
## [11] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"

Remove all dashes

text <- str_remove_all(text, pattern = "-")
head(text, n = 11)
##  [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
##  [2] "   ON | 15445895 / R: 1794   >1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [3] ""                                                                                         
##  [4] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
##  [5] "   MI | 14598900 / R: 1553   >1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
##  [6] ""                                                                                         
##  [7] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
##  [8] "   MI | 14959604 / R: 1384   >1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [9] ""                                                                                         
## [10] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
## [11] "   MI | 12616049 / R: 1716   >1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"

Remove all empty spaces

text <- str_replace_all(text, pattern = "\\s+", replacement = " " )
head(text, n = 11)
##  [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"          
##  [2] " ON | 15445895 / R: 1794 >1817 |N:2 |W |B |W |B |W |B |W |"     
##  [3] ""                                                               
##  [4] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"   
##  [5] " MI | 14598900 / R: 1553 >1663 |N:2 |B |W |B |W |B |W |B |"     
##  [6] ""                                                               
##  [7] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"     
##  [8] " MI | 14959604 / R: 1384 >1640 |N:2 |W |B |W |B |W |B |W |"     
##  [9] ""                                                               
## [10] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [11] " MI | 12616049 / R: 1716 >1744 |N:2 |W |B |W |B |W |B |B |"

Extract the Player Names

p_name <- unlist(str_extract_all(text, pattern = "\\| [:alpha:]+\\s*[:alpha:]+ (\\w+)?\\s*(\\w+)?"))
p_name
##  [1] "| GARY HUA "                  "| DAKSHESH DARURI "          
##  [3] "| ADITYA BAJAJ "              "| PATRICK H SCHILLING "      
##  [5] "| HANSHI ZUO "                "| HANSEN SONG "              
##  [7] "| GARY DEE SWATHELL "         "| EZEKIEL HOUGHTON "         
##  [9] "| STEFANO LEE "               "| ANVIT RAO "                
## [11] "| CAMERON WILLIAM MC LEMAN"   "| KENNETH J TACK "           
## [13] "| TORRANCE HENRY JR "         "| BRADLEY SHAW "             
## [15] "| ZACHARY JAMES HOUGHTON "    "| MIKE NIKITIN "             
## [17] "| RONALD GRZEGORCZYK "        "| DAVID SUNDEEN "            
## [19] "| DIPANKAR ROY "              "| JASON ZHENG "              
## [21] "| DINH DANG BUI "             "| EUGENE L MCCLURE "         
## [23] "| ALAN BUI "                  "| MICHAEL R ALDRICH "        
## [25] "| LOREN SCHWIEBERT "          "| MAX ZHU "                  
## [27] "| GAURAV GIDWANI "            "| SOFIA ADINA STANESCUBELLU "
## [29] "| CHIEDOZIE OKORIE "          "| GEORGE AVERY JONES "       
## [31] "| RISHI SHETTY "              "| JOSHUA PHILIP MATHEWS "    
## [33] "| JADE GE "                   "| MICHAEL JEFFERY THOMAS "   
## [35] "| JOSHUA DAVID LEE "          "| SIDDHARTH JHA "            
## [37] "| AMIYATOSH PWNANANDAM "      "| BRIAN LIU "                
## [39] "| JOEL R HENDON "             "| FOREST ZHANG "             
## [41] "| KYLE WILLIAM MURPHY "       "| JARED GE "                 
## [43] "| ROBERT GLEN VASEY "         "| JUSTIN D SCHILLING "       
## [45] "| DEREK YAN "                 "| JACOB ALEXANDER LAVALLEY " 
## [47] "| ERIC WRIGHT "               "| DANIEL KHAIN "             
## [49] "| MICHAEL J MARTIN "          "| SHIVAM JHA "               
## [51] "| TEJAS AYYAGARI "            "| ETHAN GUO "                
## [53] "| JOSE C YBARRA "             "| LARRY HODGE "              
## [55] "| ALEX KONG "                 "| MARISA RICCI "             
## [57] "| MICHAEL LU "                "| VIRAJ MOHILE "             
## [59] "| SEAN M MC CORMICK"          "| JULIA SHEN "               
## [61] "| JEZZEL FARKAS "             "| ASHWIN BALAJI "            
## [63] "| THOMAS JOSEPH HOSMER "      "| BEN LI "

Remove the pipe “|” and the space before the names

pl_name <- unlist(str_remove_all(p_name, pattern = "\\| "))
pl_name
##  [1] "GARY HUA "                  "DAKSHESH DARURI "          
##  [3] "ADITYA BAJAJ "              "PATRICK H SCHILLING "      
##  [5] "HANSHI ZUO "                "HANSEN SONG "              
##  [7] "GARY DEE SWATHELL "         "EZEKIEL HOUGHTON "         
##  [9] "STEFANO LEE "               "ANVIT RAO "                
## [11] "CAMERON WILLIAM MC LEMAN"   "KENNETH J TACK "           
## [13] "TORRANCE HENRY JR "         "BRADLEY SHAW "             
## [15] "ZACHARY JAMES HOUGHTON "    "MIKE NIKITIN "             
## [17] "RONALD GRZEGORCZYK "        "DAVID SUNDEEN "            
## [19] "DIPANKAR ROY "              "JASON ZHENG "              
## [21] "DINH DANG BUI "             "EUGENE L MCCLURE "         
## [23] "ALAN BUI "                  "MICHAEL R ALDRICH "        
## [25] "LOREN SCHWIEBERT "          "MAX ZHU "                  
## [27] "GAURAV GIDWANI "            "SOFIA ADINA STANESCUBELLU "
## [29] "CHIEDOZIE OKORIE "          "GEORGE AVERY JONES "       
## [31] "RISHI SHETTY "              "JOSHUA PHILIP MATHEWS "    
## [33] "JADE GE "                   "MICHAEL JEFFERY THOMAS "   
## [35] "JOSHUA DAVID LEE "          "SIDDHARTH JHA "            
## [37] "AMIYATOSH PWNANANDAM "      "BRIAN LIU "                
## [39] "JOEL R HENDON "             "FOREST ZHANG "             
## [41] "KYLE WILLIAM MURPHY "       "JARED GE "                 
## [43] "ROBERT GLEN VASEY "         "JUSTIN D SCHILLING "       
## [45] "DEREK YAN "                 "JACOB ALEXANDER LAVALLEY " 
## [47] "ERIC WRIGHT "               "DANIEL KHAIN "             
## [49] "MICHAEL J MARTIN "          "SHIVAM JHA "               
## [51] "TEJAS AYYAGARI "            "ETHAN GUO "                
## [53] "JOSE C YBARRA "             "LARRY HODGE "              
## [55] "ALEX KONG "                 "MARISA RICCI "             
## [57] "MICHAEL LU "                "VIRAJ MOHILE "             
## [59] "SEAN M MC CORMICK"          "JULIA SHEN "               
## [61] "JEZZEL FARKAS "             "ASHWIN BALAJI "            
## [63] "THOMAS JOSEPH HOSMER "      "BEN LI "

Extract Player’s State

player_state <- unlist(str_extract_all(text, pattern = " [:alpha:]{2}\\s\\| "))
player_state
##  [1] " ON | " " MI | " " MI | " " MI | " " MI | " " OH | " " MI | " " MI | "
##  [9] " ON | " " MI | " " MI | " " MI | " " MI | " " MI | " " MI | " " MI | "
## [17] " MI | " " MI | " " MI | " " MI | " " ON | " " MI | " " ON | " " MI | "
## [25] " MI | " " ON | " " MI | " " MI | " " MI | " " ON | " " MI | " " ON | "
## [33] " MI | " " MI | " " MI | " " MI | " " MI | " " MI | " " MI | " " MI | "
## [41] " MI | " " MI | " " MI | " " MI | " " MI | " " MI | " " MI | " " MI | "
## [49] " MI | " " MI | " " MI | " " MI | " " MI | " " MI | " " MI | " " MI | "
## [57] " MI | " " MI | " " MI | " " MI | " " ON | " " MI | " " MI | " " MI | "

Remove the “|” and spaces

player_state <- unlist(str_remove_all(player_state, pattern = "\\s\\| "))
player_state <- unlist(str_remove_all(player_state, pattern = " "))
player_state
##  [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI"
## [16] "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI" "MI" "ON"
## [31] "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [46] "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [61] "ON" "MI" "MI" "MI"

Extract Player’s Total Number of Points

total_points <- unlist(str_extract_all(text, pattern = "[:digit:]\\.[:digit:]"))
total_points
##  [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5" "4.5"
## [13] "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [25] "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [37] "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "2.5" "2.5"
## [49] "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "1.5"
## [61] "1.5" "1.0" "1.0" "1.0"

Change the Player’s Total Points to numeric

player_total_points <- map_dbl(total_points, as.numeric)
player_total_points
##  [1] 6.0 6.0 6.0 5.5 5.5 5.0 5.0 5.0 5.0 5.0 4.5 4.5 4.5 4.5 4.5 4.0 4.0 4.0 4.0
## [20] 4.0 4.0 4.0 4.0 4.0 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.0
## [39] 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 2.5 2.5 2.5 2.5 2.5 2.5 2.0 2.0 2.0 2.0 2.0
## [58] 2.0 2.0 1.5 1.5 1.0 1.0 1.0

Extract Player’s Pre-Rating

players_prerating <- unlist(str_extract_all(text, pattern = "R:\\s+[:digit:]+"))
players_prerating
##  [1] "R: 1794" "R: 1553" "R: 1384" "R: 1716" "R: 1655" "R: 1686" "R: 1649"
##  [8] "R: 1641" "R: 1411" "R: 1365" "R: 1712" "R: 1663" "R: 1666" "R: 1610"
## [15] "R: 1220" "R: 1604" "R: 1629" "R: 1600" "R: 1564" "R: 1595" "R: 1563"
## [22] "R: 1555" "R: 1363" "R: 1229" "R: 1745" "R: 1579" "R: 1552" "R: 1507"
## [29] "R: 1602" "R: 1522" "R: 1494" "R: 1441" "R: 1449" "R: 1399" "R: 1438"
## [36] "R: 1355" "R: 980"  "R: 1423" "R: 1436" "R: 1348" "R: 1403" "R: 1332"
## [43] "R: 1283" "R: 1199" "R: 1242" "R: 377"  "R: 1362" "R: 1382" "R: 1291"
## [50] "R: 1056" "R: 1011" "R: 935"  "R: 1393" "R: 1270" "R: 1186" "R: 1153"
## [57] "R: 1092" "R: 917"  "R: 853"  "R: 967"  "R: 955"  "R: 1530" "R: 1175"
## [64] "R: 1163"

Remove the “R:”

players_prerating <- unlist(str_remove_all(players_prerating, pattern = "R: "))
players_prerating
##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"

Change players_prerating type to numeric

player_prerating <- map_dbl(players_prerating, as.numeric)
player_prerating
##  [1] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 1712 1663 1666 1610 1220
## [16] 1604 1629 1600 1564 1595 1563 1555 1363 1229 1745 1579 1552 1507 1602 1522
## [31] 1494 1441 1449 1399 1438 1355  980 1423 1436 1348 1403 1332 1283 1199 1242
## [46]  377 1362 1382 1291 1056 1011  935 1393 1270 1186 1153 1092  917  853  967
## [61]  955 1530 1175 1163

Extract Average Opponent Pre-Rating

opponent <- unlist(str_extract_all(text, pattern = "\\|[0-9].*"))
opponent
##  [1] "|6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"  
##  [2] "|6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"  
##  [3] "|6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|" 
##  [4] "|5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"   
##  [5] "|5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|" 
##  [6] "|5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
##  [7] "|5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|"   
##  [8] "|5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|"  
##  [9] "|5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|"  
## [10] "|5.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|" 
## [11] "|4.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26|"   
## [12] "|4.5 |W 42|W 33|D 5|W 38|H |D 1|L 3|"     
## [13] "|4.5 |W 36|W 27|L 7|D 5|W 33|L 3|W 32|"   
## [14] "|4.5 |W 54|W 44|W 8|L 1|D 27|L 5|W 31|"   
## [15] "|4.5 |D 19|L 16|W 30|L 22|W 54|W 33|W 38|"
## [16] "|4.0 |D 10|W 15|H |W 39|L 2|W 36|U |"     
## [17] "|4.0 |W 48|W 41|L 26|L 2|W 23|W 22|L 5|"  
## [18] "|4.0 |W 47|W 9|L 1|W 32|L 19|W 38|L 10|"  
## [19] "|4.0 |D 15|W 10|W 52|D 28|W 18|L 4|L 8|"  
## [20] "|4.0 |L 40|W 49|W 23|W 41|W 28|L 2|L 9|"  
## [21] "|4.0 |W 43|L 1|W 47|L 3|W 40|W 39|L 6|"   
## [22] "|4.0 |W 64|D 52|L 28|W 15|H |L 17|W 40|"  
## [23] "|4.0 |L 4|W 43|L 20|W 58|L 17|W 37|W 46|" 
## [24] "|4.0 |L 28|L 47|W 43|L 25|W 60|W 44|W 39|"
## [25] "|3.5 |L 9|W 53|L 3|W 24|D 34|L 10|W 47|"  
## [26] "|3.5 |W 49|W 40|W 17|L 4|L 9|D 32|L 11|"  
## [27] "|3.5 |W 51|L 13|W 46|W 37|D 14|L 6|U |"   
## [28] "|3.5 |W 24|D 4|W 22|D 19|L 20|L 8|D 36|"  
## [29] "|3.5 |W 50|D 6|L 38|L 34|W 52|W 48|U |"   
## [30] "|3.5 |L 52|D 64|L 15|W 55|L 31|W 61|W 50|"
## [31] "|3.5 |L 58|D 55|W 64|L 10|W 30|W 50|L 14|"
## [32] "|3.5 |W 61|L 8|W 44|L 18|W 51|D 26|L 13|" 
## [33] "|3.5 |W 60|L 12|W 50|D 36|L 13|L 15|W 51|"
## [34] "|3.5 |L 6|W 60|L 37|W 29|D 25|L 11|W 52|" 
## [35] "|3.5 |L 46|L 38|W 56|L 6|W 57|D 52|W 48|" 
## [36] "|3.5 |L 13|W 57|W 51|D 33|H |L 16|D 28|"  
## [37] "|3.5 |B |L 5|W 34|L 27|H |L 23|W 61|"     
## [38] "|3.0 |D 11|W 35|W 29|L 12|H |L 18|L 15|"  
## [39] "|3.0 |L 1|W 54|W 40|L 16|W 44|L 21|L 24|" 
## [40] "|3.0 |W 20|L 26|L 39|W 59|L 21|W 56|L 22|"
## [41] "|3.0 |W 59|L 17|W 58|L 20|X |U |U |"      
## [42] "|3.0 |L 12|L 50|L 57|D 60|D 61|W 64|W 56|"
## [43] "|3.0 |L 21|L 23|L 24|W 63|W 59|L 46|W 55|"
## [44] "|3.0 |B |L 14|L 32|W 53|L 39|L 24|W 59|"  
## [45] "|3.0 |L 5|L 51|D 60|L 56|W 63|D 55|W 58|" 
## [46] "|3.0 |W 35|L 7|L 27|L 50|W 64|W 43|L 23|" 
## [47] "|2.5 |L 18|W 24|L 21|W 61|L 8|D 51|L 25|" 
## [48] "|2.5 |L 17|W 63|H |D 52|H |L 29|L 35|"    
## [49] "|2.5 |L 26|L 20|D 63|D 64|W 58|H |U |"    
## [50] "|2.5 |L 29|W 42|L 33|W 46|H |L 31|L 30|"  
## [51] "|2.5 |L 27|W 45|L 36|W 57|L 32|D 47|L 33|"
## [52] "|2.5 |W 30|D 22|L 19|D 48|L 29|D 35|L 34|"
## [53] "|2.0 |H |L 25|H |L 44|U |W 57|U |"        
## [54] "|2.0 |L 14|L 39|L 61|B |L 15|L 59|W 64|"  
## [55] "|2.0 |L 62|D 31|L 10|L 30|B |D 45|L 43|"  
## [56] "|2.0 |H |L 11|L 35|W 45|H |L 40|L 42|"    
## [57] "|2.0 |L 7|L 36|W 42|L 51|L 35|L 53|B |"   
## [58] "|2.0 |W 31|L 2|L 41|L 23|L 49|B |L 45|"   
## [59] "|2.0 |L 41|B |L 9|L 40|L 43|W 54|L 44|"   
## [60] "|1.5 |L 33|L 34|D 45|D 42|L 24|H |U |"    
## [61] "|1.5 |L 32|L 3|W 54|L 47|D 42|L 30|L 37|" 
## [62] "|1.0 |W 55|U |U |U |U |U |U |"            
## [63] "|1.0 |L 2|L 48|D 49|L 43|L 45|H |U |"     
## [64] "|1.0 |L 22|D 30|L 31|D 49|L 46|L 42|L 54|"

Looking at this result, we see that some players do not have opponents in all 7 rounds. To make extracting the opponent numbers easier, We replace |B |, |U |, |X |, and |H | with |R 0| to maintain the same format as others.

oppo <- unlist(str_replace_all(opponent, pattern = "\\|[BUXH] ", replacement = "\\|R 0"))
oppo
##  [1] "|6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"  
##  [2] "|6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"  
##  [3] "|6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|" 
##  [4] "|5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"   
##  [5] "|5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|" 
##  [6] "|5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
##  [7] "|5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|"   
##  [8] "|5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|"  
##  [9] "|5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|"  
## [10] "|5.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|" 
## [11] "|4.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26|"   
## [12] "|4.5 |W 42|W 33|D 5|W 38|R 0|D 1|L 3|"    
## [13] "|4.5 |W 36|W 27|L 7|D 5|W 33|L 3|W 32|"   
## [14] "|4.5 |W 54|W 44|W 8|L 1|D 27|L 5|W 31|"   
## [15] "|4.5 |D 19|L 16|W 30|L 22|W 54|W 33|W 38|"
## [16] "|4.0 |D 10|W 15|R 0|W 39|L 2|W 36|R 0|"   
## [17] "|4.0 |W 48|W 41|L 26|L 2|W 23|W 22|L 5|"  
## [18] "|4.0 |W 47|W 9|L 1|W 32|L 19|W 38|L 10|"  
## [19] "|4.0 |D 15|W 10|W 52|D 28|W 18|L 4|L 8|"  
## [20] "|4.0 |L 40|W 49|W 23|W 41|W 28|L 2|L 9|"  
## [21] "|4.0 |W 43|L 1|W 47|L 3|W 40|W 39|L 6|"   
## [22] "|4.0 |W 64|D 52|L 28|W 15|R 0|L 17|W 40|" 
## [23] "|4.0 |L 4|W 43|L 20|W 58|L 17|W 37|W 46|" 
## [24] "|4.0 |L 28|L 47|W 43|L 25|W 60|W 44|W 39|"
## [25] "|3.5 |L 9|W 53|L 3|W 24|D 34|L 10|W 47|"  
## [26] "|3.5 |W 49|W 40|W 17|L 4|L 9|D 32|L 11|"  
## [27] "|3.5 |W 51|L 13|W 46|W 37|D 14|L 6|R 0|"  
## [28] "|3.5 |W 24|D 4|W 22|D 19|L 20|L 8|D 36|"  
## [29] "|3.5 |W 50|D 6|L 38|L 34|W 52|W 48|R 0|"  
## [30] "|3.5 |L 52|D 64|L 15|W 55|L 31|W 61|W 50|"
## [31] "|3.5 |L 58|D 55|W 64|L 10|W 30|W 50|L 14|"
## [32] "|3.5 |W 61|L 8|W 44|L 18|W 51|D 26|L 13|" 
## [33] "|3.5 |W 60|L 12|W 50|D 36|L 13|L 15|W 51|"
## [34] "|3.5 |L 6|W 60|L 37|W 29|D 25|L 11|W 52|" 
## [35] "|3.5 |L 46|L 38|W 56|L 6|W 57|D 52|W 48|" 
## [36] "|3.5 |L 13|W 57|W 51|D 33|R 0|L 16|D 28|" 
## [37] "|3.5 |R 0|L 5|W 34|L 27|R 0|L 23|W 61|"   
## [38] "|3.0 |D 11|W 35|W 29|L 12|R 0|L 18|L 15|" 
## [39] "|3.0 |L 1|W 54|W 40|L 16|W 44|L 21|L 24|" 
## [40] "|3.0 |W 20|L 26|L 39|W 59|L 21|W 56|L 22|"
## [41] "|3.0 |W 59|L 17|W 58|L 20|R 0|R 0|R 0|"   
## [42] "|3.0 |L 12|L 50|L 57|D 60|D 61|W 64|W 56|"
## [43] "|3.0 |L 21|L 23|L 24|W 63|W 59|L 46|W 55|"
## [44] "|3.0 |R 0|L 14|L 32|W 53|L 39|L 24|W 59|" 
## [45] "|3.0 |L 5|L 51|D 60|L 56|W 63|D 55|W 58|" 
## [46] "|3.0 |W 35|L 7|L 27|L 50|W 64|W 43|L 23|" 
## [47] "|2.5 |L 18|W 24|L 21|W 61|L 8|D 51|L 25|" 
## [48] "|2.5 |L 17|W 63|R 0|D 52|R 0|L 29|L 35|"  
## [49] "|2.5 |L 26|L 20|D 63|D 64|W 58|R 0|R 0|"  
## [50] "|2.5 |L 29|W 42|L 33|W 46|R 0|L 31|L 30|" 
## [51] "|2.5 |L 27|W 45|L 36|W 57|L 32|D 47|L 33|"
## [52] "|2.5 |W 30|D 22|L 19|D 48|L 29|D 35|L 34|"
## [53] "|2.0 |R 0|L 25|R 0|L 44|R 0|W 57|R 0|"    
## [54] "|2.0 |L 14|L 39|L 61|R 0|L 15|L 59|W 64|" 
## [55] "|2.0 |L 62|D 31|L 10|L 30|R 0|D 45|L 43|" 
## [56] "|2.0 |R 0|L 11|L 35|W 45|R 0|L 40|L 42|"  
## [57] "|2.0 |L 7|L 36|W 42|L 51|L 35|L 53|R 0|"  
## [58] "|2.0 |W 31|L 2|L 41|L 23|L 49|R 0|L 45|"  
## [59] "|2.0 |L 41|R 0|L 9|L 40|L 43|W 54|L 44|"  
## [60] "|1.5 |L 33|L 34|D 45|D 42|L 24|R 0|R 0|"  
## [61] "|1.5 |L 32|L 3|W 54|L 47|D 42|L 30|L 37|" 
## [62] "|1.0 |W 55|R 0|R 0|R 0|R 0|R 0|R 0|"      
## [63] "|1.0 |L 2|L 48|D 49|L 43|L 45|R 0|R 0|"   
## [64] "|1.0 |L 22|D 30|L 31|D 49|L 46|L 42|L 54|"

Remove the first part containing the total points as well as the letters and the pipe symbol

#Remove the first part containing the total points
oppon <- unlist(str_remove_all(oppo, pattern = "\\|\\d\\.\\d\\s"))

#Remove the Letters
oppone <- unlist(str_remove_all(oppon, pattern = "[:alpha:]"))

#Remove the pipe "|"
oppone <- unlist(str_remove_all(oppone, pattern = "\\|"))
oppone
##  [1] " 39 21 18 14 7 12 4"   " 63 58 4 17 16 20 7"   " 8 61 25 21 11 13 12" 
##  [4] " 23 28 2 26 5 19 1"    " 45 37 12 13 4 14 17"  " 34 29 11 35 10 27 21"
##  [7] " 57 46 13 11 1 9 2"    " 3 32 14 9 47 28 19"   " 25 18 59 8 26 7 20"  
## [10] " 16 19 55 31 6 25 18"  " 38 56 6 7 3 34 26"    " 42 33 5 38 0 1 3"    
## [13] " 36 27 7 5 33 3 32"    " 54 44 8 1 27 5 31"    " 19 16 30 22 54 33 38"
## [16] " 10 15 0 39 2 36 0"    " 48 41 26 2 23 22 5"   " 47 9 1 32 19 38 10"  
## [19] " 15 10 52 28 18 4 8"   " 40 49 23 41 28 2 9"   " 43 1 47 3 40 39 6"   
## [22] " 64 52 28 15 0 17 40"  " 4 43 20 58 17 37 46"  " 28 47 43 25 60 44 39"
## [25] " 9 53 3 24 34 10 47"   " 49 40 17 4 9 32 11"   " 51 13 46 37 14 6 0"  
## [28] " 24 4 22 19 20 8 36"   " 50 6 38 34 52 48 0"   " 52 64 15 55 31 61 50"
## [31] " 58 55 64 10 30 50 14" " 61 8 44 18 51 26 13"  " 60 12 50 36 13 15 51"
## [34] " 6 60 37 29 25 11 52"  " 46 38 56 6 57 52 48"  " 13 57 51 33 0 16 28" 
## [37] " 0 5 34 27 0 23 61"    " 11 35 29 12 0 18 15"  " 1 54 40 16 44 21 24" 
## [40] " 20 26 39 59 21 56 22" " 59 17 58 20 0 0 0"    " 12 50 57 60 61 64 56"
## [43] " 21 23 24 63 59 46 55" " 0 14 32 53 39 24 59"  " 5 51 60 56 63 55 58" 
## [46] " 35 7 27 50 64 43 23"  " 18 24 21 61 8 51 25"  " 17 63 0 52 0 29 35"  
## [49] " 26 20 63 64 58 0 0"   " 29 42 33 46 0 31 30"  " 27 45 36 57 32 47 33"
## [52] " 30 22 19 48 29 35 34" " 0 25 0 44 0 57 0"     " 14 39 61 0 15 59 64" 
## [55] " 62 31 10 30 0 45 43"  " 0 11 35 45 0 40 42"   " 7 36 42 51 35 53 0"  
## [58] " 31 2 41 23 49 0 45"   " 41 0 9 40 43 54 44"   " 33 34 45 42 24 0 0"  
## [61] " 32 3 54 47 42 30 37"  " 55 0 0 0 0 0 0"       " 2 48 49 43 45 0 0"   
## [64] " 22 30 31 49 46 42 54"
opponen <- unlist(str_extract_all(oppone, pattern = " [:digit:]{1,2}"))
opponen
##   [1] " 39" " 21" " 18" " 14" " 7"  " 12" " 4"  " 63" " 58" " 4"  " 17" " 16"
##  [13] " 20" " 7"  " 8"  " 61" " 25" " 21" " 11" " 13" " 12" " 23" " 28" " 2" 
##  [25] " 26" " 5"  " 19" " 1"  " 45" " 37" " 12" " 13" " 4"  " 14" " 17" " 34"
##  [37] " 29" " 11" " 35" " 10" " 27" " 21" " 57" " 46" " 13" " 11" " 1"  " 9" 
##  [49] " 2"  " 3"  " 32" " 14" " 9"  " 47" " 28" " 19" " 25" " 18" " 59" " 8" 
##  [61] " 26" " 7"  " 20" " 16" " 19" " 55" " 31" " 6"  " 25" " 18" " 38" " 56"
##  [73] " 6"  " 7"  " 3"  " 34" " 26" " 42" " 33" " 5"  " 38" " 0"  " 1"  " 3" 
##  [85] " 36" " 27" " 7"  " 5"  " 33" " 3"  " 32" " 54" " 44" " 8"  " 1"  " 27"
##  [97] " 5"  " 31" " 19" " 16" " 30" " 22" " 54" " 33" " 38" " 10" " 15" " 0" 
## [109] " 39" " 2"  " 36" " 0"  " 48" " 41" " 26" " 2"  " 23" " 22" " 5"  " 47"
## [121] " 9"  " 1"  " 32" " 19" " 38" " 10" " 15" " 10" " 52" " 28" " 18" " 4" 
## [133] " 8"  " 40" " 49" " 23" " 41" " 28" " 2"  " 9"  " 43" " 1"  " 47" " 3" 
## [145] " 40" " 39" " 6"  " 64" " 52" " 28" " 15" " 0"  " 17" " 40" " 4"  " 43"
## [157] " 20" " 58" " 17" " 37" " 46" " 28" " 47" " 43" " 25" " 60" " 44" " 39"
## [169] " 9"  " 53" " 3"  " 24" " 34" " 10" " 47" " 49" " 40" " 17" " 4"  " 9" 
## [181] " 32" " 11" " 51" " 13" " 46" " 37" " 14" " 6"  " 0"  " 24" " 4"  " 22"
## [193] " 19" " 20" " 8"  " 36" " 50" " 6"  " 38" " 34" " 52" " 48" " 0"  " 52"
## [205] " 64" " 15" " 55" " 31" " 61" " 50" " 58" " 55" " 64" " 10" " 30" " 50"
## [217] " 14" " 61" " 8"  " 44" " 18" " 51" " 26" " 13" " 60" " 12" " 50" " 36"
## [229] " 13" " 15" " 51" " 6"  " 60" " 37" " 29" " 25" " 11" " 52" " 46" " 38"
## [241] " 56" " 6"  " 57" " 52" " 48" " 13" " 57" " 51" " 33" " 0"  " 16" " 28"
## [253] " 0"  " 5"  " 34" " 27" " 0"  " 23" " 61" " 11" " 35" " 29" " 12" " 0" 
## [265] " 18" " 15" " 1"  " 54" " 40" " 16" " 44" " 21" " 24" " 20" " 26" " 39"
## [277] " 59" " 21" " 56" " 22" " 59" " 17" " 58" " 20" " 0"  " 0"  " 0"  " 12"
## [289] " 50" " 57" " 60" " 61" " 64" " 56" " 21" " 23" " 24" " 63" " 59" " 46"
## [301] " 55" " 0"  " 14" " 32" " 53" " 39" " 24" " 59" " 5"  " 51" " 60" " 56"
## [313] " 63" " 55" " 58" " 35" " 7"  " 27" " 50" " 64" " 43" " 23" " 18" " 24"
## [325] " 21" " 61" " 8"  " 51" " 25" " 17" " 63" " 0"  " 52" " 0"  " 29" " 35"
## [337] " 26" " 20" " 63" " 64" " 58" " 0"  " 0"  " 29" " 42" " 33" " 46" " 0" 
## [349] " 31" " 30" " 27" " 45" " 36" " 57" " 32" " 47" " 33" " 30" " 22" " 19"
## [361] " 48" " 29" " 35" " 34" " 0"  " 25" " 0"  " 44" " 0"  " 57" " 0"  " 14"
## [373] " 39" " 61" " 0"  " 15" " 59" " 64" " 62" " 31" " 10" " 30" " 0"  " 45"
## [385] " 43" " 0"  " 11" " 35" " 45" " 0"  " 40" " 42" " 7"  " 36" " 42" " 51"
## [397] " 35" " 53" " 0"  " 31" " 2"  " 41" " 23" " 49" " 0"  " 45" " 41" " 0" 
## [409] " 9"  " 40" " 43" " 54" " 44" " 33" " 34" " 45" " 42" " 24" " 0"  " 0" 
## [421] " 32" " 3"  " 54" " 47" " 42" " 30" " 37" " 55" " 0"  " 0"  " 0"  " 0" 
## [433] " 0"  " 0"  " 2"  " 48" " 49" " 43" " 45" " 0"  " 0"  " 22" " 30" " 31"
## [445] " 49" " 46" " 42" " 54"

Remove the spaces before the numbers

opponen_char <- unlist(str_remove_all(opponen, pattern = " "))
opponen_char
##   [1] "39" "21" "18" "14" "7"  "12" "4"  "63" "58" "4"  "17" "16" "20" "7"  "8" 
##  [16] "61" "25" "21" "11" "13" "12" "23" "28" "2"  "26" "5"  "19" "1"  "45" "37"
##  [31] "12" "13" "4"  "14" "17" "34" "29" "11" "35" "10" "27" "21" "57" "46" "13"
##  [46] "11" "1"  "9"  "2"  "3"  "32" "14" "9"  "47" "28" "19" "25" "18" "59" "8" 
##  [61] "26" "7"  "20" "16" "19" "55" "31" "6"  "25" "18" "38" "56" "6"  "7"  "3" 
##  [76] "34" "26" "42" "33" "5"  "38" "0"  "1"  "3"  "36" "27" "7"  "5"  "33" "3" 
##  [91] "32" "54" "44" "8"  "1"  "27" "5"  "31" "19" "16" "30" "22" "54" "33" "38"
## [106] "10" "15" "0"  "39" "2"  "36" "0"  "48" "41" "26" "2"  "23" "22" "5"  "47"
## [121] "9"  "1"  "32" "19" "38" "10" "15" "10" "52" "28" "18" "4"  "8"  "40" "49"
## [136] "23" "41" "28" "2"  "9"  "43" "1"  "47" "3"  "40" "39" "6"  "64" "52" "28"
## [151] "15" "0"  "17" "40" "4"  "43" "20" "58" "17" "37" "46" "28" "47" "43" "25"
## [166] "60" "44" "39" "9"  "53" "3"  "24" "34" "10" "47" "49" "40" "17" "4"  "9" 
## [181] "32" "11" "51" "13" "46" "37" "14" "6"  "0"  "24" "4"  "22" "19" "20" "8" 
## [196] "36" "50" "6"  "38" "34" "52" "48" "0"  "52" "64" "15" "55" "31" "61" "50"
## [211] "58" "55" "64" "10" "30" "50" "14" "61" "8"  "44" "18" "51" "26" "13" "60"
## [226] "12" "50" "36" "13" "15" "51" "6"  "60" "37" "29" "25" "11" "52" "46" "38"
## [241] "56" "6"  "57" "52" "48" "13" "57" "51" "33" "0"  "16" "28" "0"  "5"  "34"
## [256] "27" "0"  "23" "61" "11" "35" "29" "12" "0"  "18" "15" "1"  "54" "40" "16"
## [271] "44" "21" "24" "20" "26" "39" "59" "21" "56" "22" "59" "17" "58" "20" "0" 
## [286] "0"  "0"  "12" "50" "57" "60" "61" "64" "56" "21" "23" "24" "63" "59" "46"
## [301] "55" "0"  "14" "32" "53" "39" "24" "59" "5"  "51" "60" "56" "63" "55" "58"
## [316] "35" "7"  "27" "50" "64" "43" "23" "18" "24" "21" "61" "8"  "51" "25" "17"
## [331] "63" "0"  "52" "0"  "29" "35" "26" "20" "63" "64" "58" "0"  "0"  "29" "42"
## [346] "33" "46" "0"  "31" "30" "27" "45" "36" "57" "32" "47" "33" "30" "22" "19"
## [361] "48" "29" "35" "34" "0"  "25" "0"  "44" "0"  "57" "0"  "14" "39" "61" "0" 
## [376] "15" "59" "64" "62" "31" "10" "30" "0"  "45" "43" "0"  "11" "35" "45" "0" 
## [391] "40" "42" "7"  "36" "42" "51" "35" "53" "0"  "31" "2"  "41" "23" "49" "0" 
## [406] "45" "41" "0"  "9"  "40" "43" "54" "44" "33" "34" "45" "42" "24" "0"  "0" 
## [421] "32" "3"  "54" "47" "42" "30" "37" "55" "0"  "0"  "0"  "0"  "0"  "0"  "2" 
## [436] "48" "49" "43" "45" "0"  "0"  "22" "30" "31" "49" "46" "42" "54"

Convert these numbers to numeric

opponen_dig <- opponen_char %>% map_dbl(as.numeric)
opponen_dig
##   [1] 39 21 18 14  7 12  4 63 58  4 17 16 20  7  8 61 25 21 11 13 12 23 28  2 26
##  [26]  5 19  1 45 37 12 13  4 14 17 34 29 11 35 10 27 21 57 46 13 11  1  9  2  3
##  [51] 32 14  9 47 28 19 25 18 59  8 26  7 20 16 19 55 31  6 25 18 38 56  6  7  3
##  [76] 34 26 42 33  5 38  0  1  3 36 27  7  5 33  3 32 54 44  8  1 27  5 31 19 16
## [101] 30 22 54 33 38 10 15  0 39  2 36  0 48 41 26  2 23 22  5 47  9  1 32 19 38
## [126] 10 15 10 52 28 18  4  8 40 49 23 41 28  2  9 43  1 47  3 40 39  6 64 52 28
## [151] 15  0 17 40  4 43 20 58 17 37 46 28 47 43 25 60 44 39  9 53  3 24 34 10 47
## [176] 49 40 17  4  9 32 11 51 13 46 37 14  6  0 24  4 22 19 20  8 36 50  6 38 34
## [201] 52 48  0 52 64 15 55 31 61 50 58 55 64 10 30 50 14 61  8 44 18 51 26 13 60
## [226] 12 50 36 13 15 51  6 60 37 29 25 11 52 46 38 56  6 57 52 48 13 57 51 33  0
## [251] 16 28  0  5 34 27  0 23 61 11 35 29 12  0 18 15  1 54 40 16 44 21 24 20 26
## [276] 39 59 21 56 22 59 17 58 20  0  0  0 12 50 57 60 61 64 56 21 23 24 63 59 46
## [301] 55  0 14 32 53 39 24 59  5 51 60 56 63 55 58 35  7 27 50 64 43 23 18 24 21
## [326] 61  8 51 25 17 63  0 52  0 29 35 26 20 63 64 58  0  0 29 42 33 46  0 31 30
## [351] 27 45 36 57 32 47 33 30 22 19 48 29 35 34  0 25  0 44  0 57  0 14 39 61  0
## [376] 15 59 64 62 31 10 30  0 45 43  0 11 35 45  0 40 42  7 36 42 51 35 53  0 31
## [401]  2 41 23 49  0 45 41  0  9 40 43 54 44 33 34 45 42 24  0  0 32  3 54 47 42
## [426] 30 37 55  0  0  0  0  0  0  2 48 49 43 45  0  0 22 30 31 49 46 42 54

The next task is to replace this opponent Vectors with their corresponding preratings.

#check the structure of the vectors in question to make sure they are numeric
str(opponen_dig)
##  num [1:448] 39 21 18 14 7 12 4 63 58 4 ...
str(player_prerating)
##  num [1:64] 1794 1553 1384 1716 1655 ...

Transform the opponent_dig to a matrix and then to dataframe

opponent_matrix <- matrix(opponen_dig, byrow = TRUE, ncol = 7)
opponent_matrix
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7]
##  [1,]   39   21   18   14    7   12    4
##  [2,]   63   58    4   17   16   20    7
##  [3,]    8   61   25   21   11   13   12
##  [4,]   23   28    2   26    5   19    1
##  [5,]   45   37   12   13    4   14   17
##  [6,]   34   29   11   35   10   27   21
##  [7,]   57   46   13   11    1    9    2
##  [8,]    3   32   14    9   47   28   19
##  [9,]   25   18   59    8   26    7   20
## [10,]   16   19   55   31    6   25   18
## [11,]   38   56    6    7    3   34   26
## [12,]   42   33    5   38    0    1    3
## [13,]   36   27    7    5   33    3   32
## [14,]   54   44    8    1   27    5   31
## [15,]   19   16   30   22   54   33   38
## [16,]   10   15    0   39    2   36    0
## [17,]   48   41   26    2   23   22    5
## [18,]   47    9    1   32   19   38   10
## [19,]   15   10   52   28   18    4    8
## [20,]   40   49   23   41   28    2    9
## [21,]   43    1   47    3   40   39    6
## [22,]   64   52   28   15    0   17   40
## [23,]    4   43   20   58   17   37   46
## [24,]   28   47   43   25   60   44   39
## [25,]    9   53    3   24   34   10   47
## [26,]   49   40   17    4    9   32   11
## [27,]   51   13   46   37   14    6    0
## [28,]   24    4   22   19   20    8   36
## [29,]   50    6   38   34   52   48    0
## [30,]   52   64   15   55   31   61   50
## [31,]   58   55   64   10   30   50   14
## [32,]   61    8   44   18   51   26   13
## [33,]   60   12   50   36   13   15   51
## [34,]    6   60   37   29   25   11   52
## [35,]   46   38   56    6   57   52   48
## [36,]   13   57   51   33    0   16   28
## [37,]    0    5   34   27    0   23   61
## [38,]   11   35   29   12    0   18   15
## [39,]    1   54   40   16   44   21   24
## [40,]   20   26   39   59   21   56   22
## [41,]   59   17   58   20    0    0    0
## [42,]   12   50   57   60   61   64   56
## [43,]   21   23   24   63   59   46   55
## [44,]    0   14   32   53   39   24   59
## [45,]    5   51   60   56   63   55   58
## [46,]   35    7   27   50   64   43   23
## [47,]   18   24   21   61    8   51   25
## [48,]   17   63    0   52    0   29   35
## [49,]   26   20   63   64   58    0    0
## [50,]   29   42   33   46    0   31   30
## [51,]   27   45   36   57   32   47   33
## [52,]   30   22   19   48   29   35   34
## [53,]    0   25    0   44    0   57    0
## [54,]   14   39   61    0   15   59   64
## [55,]   62   31   10   30    0   45   43
## [56,]    0   11   35   45    0   40   42
## [57,]    7   36   42   51   35   53    0
## [58,]   31    2   41   23   49    0   45
## [59,]   41    0    9   40   43   54   44
## [60,]   33   34   45   42   24    0    0
## [61,]   32    3   54   47   42   30   37
## [62,]   55    0    0    0    0    0    0
## [63,]    2   48   49   43   45    0    0
## [64,]   22   30   31   49   46   42   54
opponent_headers <- c("O1","O2", "O3", "O4", "O5", "O6", "O7")
opponent_df <- as.data.frame(opponent_matrix)
colnames(opponent_df) <- opponent_headers
head(opponent_df, n = 10)
##    O1 O2 O3 O4 O5 O6 O7
## 1  39 21 18 14  7 12  4
## 2  63 58  4 17 16 20  7
## 3   8 61 25 21 11 13 12
## 4  23 28  2 26  5 19  1
## 5  45 37 12 13  4 14 17
## 6  34 29 11 35 10 27 21
## 7  57 46 13 11  1  9  2
## 8   3 32 14  9 47 28 19
## 9  25 18 59  8 26  7 20
## 10 16 19 55 31  6 25 18

Transform the player pre rating to a matrix and then to a dataframe

#Transform to matrix
player_pre_rating_matrix <- matrix(player_prerating, byrow = TRUE)

#Transform to dataframe
pre_rating_header <- "rating"
pre_rating <- as.data.frame(player_pre_rating_matrix, row.names = 1:length(player_pre_rating_matrix))
colnames(pre_rating) <- pre_rating_header
head(pre_rating, n = 10)
##    rating
## 1    1794
## 2    1553
## 3    1384
## 4    1716
## 5    1655
## 6    1686
## 7    1649
## 8    1641
## 9    1411
## 10   1365

Add another column called rating_index for the player number corresponding to each prerating

rating_index <- 1:64
rating_index <- as.data.frame(rating_index)
pre_rating <- cbind(rating_index, pre_rating)
head(pre_rating, n = 10)
##    rating_index rating
## 1             1   1794
## 2             2   1553
## 3             3   1384
## 4             4   1716
## 5             5   1655
## 6             6   1686
## 7             7   1649
## 8             8   1641
## 9             9   1411
## 10           10   1365

Write a for loop to replace the opponent numbers in opponent_df dataframe with their corresponding player rating.

for (row in 1:nrow(opponent_df)){
  for (col in 1:ncol(opponent_df)){
    if (opponent_df[row, col] != 0){
      opponent_df[row, col] = pre_rating$rating[opponent_df[row,col]]
    }else {
      opponent_df[row,col] = NA
    }
  }
}
head(opponent_df, n = 10)
##      O1   O2   O3   O4   O5   O6   O7
## 1  1436 1563 1600 1610 1649 1663 1716
## 2  1175  917 1716 1629 1604 1595 1649
## 3  1641  955 1745 1563 1712 1666 1663
## 4  1363 1507 1553 1579 1655 1564 1794
## 5  1242  980 1663 1666 1716 1610 1629
## 6  1399 1602 1712 1438 1365 1552 1563
## 7  1092  377 1666 1712 1794 1411 1553
## 8  1384 1441 1610 1411 1362 1507 1564
## 9  1745 1600  853 1641 1579 1649 1595
## 10 1604 1564 1186 1494 1686 1745 1600
sum_opponent_df <- opponent_df %>% mutate(rowSums(opponent_df, na.rm = TRUE))
head(sum_opponent_df, n = 10)
##      O1   O2   O3   O4   O5   O6   O7 rowSums(opponent_df, na.rm = TRUE)
## 1  1436 1563 1600 1610 1649 1663 1716                              11237
## 2  1175  917 1716 1629 1604 1595 1649                              10285
## 3  1641  955 1745 1563 1712 1666 1663                              10945
## 4  1363 1507 1553 1579 1655 1564 1794                              11015
## 5  1242  980 1663 1666 1716 1610 1629                              10506
## 6  1399 1602 1712 1438 1365 1552 1563                              10631
## 7  1092  377 1666 1712 1794 1411 1553                               9605
## 8  1384 1441 1610 1411 1362 1507 1564                              10279
## 9  1745 1600  853 1641 1579 1649 1595                              10662
## 10 1604 1564 1186 1494 1686 1745 1600                              10879

Compute the average oponnent prerating for each row.

avg_opponent <- opponent_df %>% mutate(round(rowMeans(opponent_df, na.rm = TRUE),0))
head(avg_opponent, n = 10)
##      O1   O2   O3   O4   O5   O6   O7
## 1  1436 1563 1600 1610 1649 1663 1716
## 2  1175  917 1716 1629 1604 1595 1649
## 3  1641  955 1745 1563 1712 1666 1663
## 4  1363 1507 1553 1579 1655 1564 1794
## 5  1242  980 1663 1666 1716 1610 1629
## 6  1399 1602 1712 1438 1365 1552 1563
## 7  1092  377 1666 1712 1794 1411 1553
## 8  1384 1441 1610 1411 1362 1507 1564
## 9  1745 1600  853 1641 1579 1649 1595
## 10 1604 1564 1186 1494 1686 1745 1600
##    round(rowMeans(opponent_df, na.rm = TRUE), 0)
## 1                                           1605
## 2                                           1469
## 3                                           1564
## 4                                           1574
## 5                                           1501
## 6                                           1519
## 7                                           1372
## 8                                           1468
## 9                                           1523
## 10                                          1554

Rename the column names

opponent_headers_withAvg <- c("O1","O2", "O3", "O4", "O5", "O6", "O7", "Average_opponent_rating")
colnames(avg_opponent) <- opponent_headers_withAvg
head(avg_opponent, n = 4)
##     O1   O2   O3   O4   O5   O6   O7 Average_opponent_rating
## 1 1436 1563 1600 1610 1649 1663 1716                    1605
## 2 1175  917 1716 1629 1604 1595 1649                    1469
## 3 1641  955 1745 1563 1712 1666 1663                    1564
## 4 1363 1507 1553 1579 1655 1564 1794                    1574

Extract the Average_opponent_rating from this dataframe into a standalone vector.

opponent_avg_prerating <- avg_opponent$Average_opponent_rating
opponent_avg_prerating
##  [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515 1484
## [16] 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522 1314 1144
## [31] 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150 1107 1327 1152
## [46] 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414 1363 1391 1319 1330
## [61] 1327 1186 1350 1263

Merge the player_names, player_state, player_total_points, player_prerating, and opponent_avg_prerating into a single data frame.

name <- pl_name
state <- player_state
points <- player_total_points
prerating <- player_prerating
avg_opponent_prerating <- opponent_avg_prerating
tournament <- data.frame(name, state, points, prerating, avg_opponent_prerating)
tournament
##                          name state points prerating avg_opponent_prerating
## 1                   GARY HUA     ON    6.0      1794                   1605
## 2            DAKSHESH DARURI     MI    6.0      1553                   1469
## 3               ADITYA BAJAJ     MI    6.0      1384                   1564
## 4        PATRICK H SCHILLING     MI    5.5      1716                   1574
## 5                 HANSHI ZUO     MI    5.5      1655                   1501
## 6                HANSEN SONG     OH    5.0      1686                   1519
## 7          GARY DEE SWATHELL     MI    5.0      1649                   1372
## 8           EZEKIEL HOUGHTON     MI    5.0      1641                   1468
## 9                STEFANO LEE     ON    5.0      1411                   1523
## 10                 ANVIT RAO     MI    5.0      1365                   1554
## 11   CAMERON WILLIAM MC LEMAN    MI    4.5      1712                   1468
## 12            KENNETH J TACK     MI    4.5      1663                   1506
## 13         TORRANCE HENRY JR     MI    4.5      1666                   1498
## 14              BRADLEY SHAW     MI    4.5      1610                   1515
## 15    ZACHARY JAMES HOUGHTON     MI    4.5      1220                   1484
## 16              MIKE NIKITIN     MI    4.0      1604                   1386
## 17        RONALD GRZEGORCZYK     MI    4.0      1629                   1499
## 18             DAVID SUNDEEN     MI    4.0      1600                   1480
## 19              DIPANKAR ROY     MI    4.0      1564                   1426
## 20               JASON ZHENG     MI    4.0      1595                   1411
## 21             DINH DANG BUI     ON    4.0      1563                   1470
## 22          EUGENE L MCCLURE     MI    4.0      1555                   1300
## 23                  ALAN BUI     ON    4.0      1363                   1214
## 24         MICHAEL R ALDRICH     MI    4.0      1229                   1357
## 25          LOREN SCHWIEBERT     MI    3.5      1745                   1363
## 26                   MAX ZHU     ON    3.5      1579                   1507
## 27            GAURAV GIDWANI     MI    3.5      1552                   1222
## 28 SOFIA ADINA STANESCUBELLU     MI    3.5      1507                   1522
## 29          CHIEDOZIE OKORIE     MI    3.5      1602                   1314
## 30        GEORGE AVERY JONES     ON    3.5      1522                   1144
## 31              RISHI SHETTY     MI    3.5      1494                   1260
## 32     JOSHUA PHILIP MATHEWS     ON    3.5      1441                   1379
## 33                   JADE GE     MI    3.5      1449                   1277
## 34    MICHAEL JEFFERY THOMAS     MI    3.5      1399                   1375
## 35          JOSHUA DAVID LEE     MI    3.5      1438                   1150
## 36             SIDDHARTH JHA     MI    3.5      1355                   1388
## 37      AMIYATOSH PWNANANDAM     MI    3.5       980                   1385
## 38                 BRIAN LIU     MI    3.0      1423                   1539
## 39             JOEL R HENDON     MI    3.0      1436                   1430
## 40              FOREST ZHANG     MI    3.0      1348                   1391
## 41       KYLE WILLIAM MURPHY     MI    3.0      1403                   1248
## 42                  JARED GE     MI    3.0      1332                   1150
## 43         ROBERT GLEN VASEY     MI    3.0      1283                   1107
## 44        JUSTIN D SCHILLING     MI    3.0      1199                   1327
## 45                 DEREK YAN     MI    3.0      1242                   1152
## 46  JACOB ALEXANDER LAVALLEY     MI    3.0       377                   1358
## 47               ERIC WRIGHT     MI    2.5      1362                   1392
## 48              DANIEL KHAIN     MI    2.5      1382                   1356
## 49          MICHAEL J MARTIN     MI    2.5      1291                   1286
## 50                SHIVAM JHA     MI    2.5      1056                   1296
## 51            TEJAS AYYAGARI     MI    2.5      1011                   1356
## 52                 ETHAN GUO     MI    2.5       935                   1495
## 53             JOSE C YBARRA     MI    2.0      1393                   1345
## 54               LARRY HODGE     MI    2.0      1270                   1206
## 55                 ALEX KONG     MI    2.0      1186                   1406
## 56              MARISA RICCI     MI    2.0      1153                   1414
## 57                MICHAEL LU     MI    2.0      1092                   1363
## 58              VIRAJ MOHILE     MI    2.0       917                   1391
## 59          SEAN M MC CORMICK    MI    2.0       853                   1319
## 60                JULIA SHEN     MI    1.5       967                   1330
## 61             JEZZEL FARKAS     ON    1.5       955                   1327
## 62             ASHWIN BALAJI     MI    1.0      1530                   1186
## 63      THOMAS JOSEPH HOSMER     MI    1.0      1175                   1350
## 64                    BEN LI     MI    1.0      1163                   1263

Write the tournament data frame to a csv file.

write.csv(tournament, file = "tournament.csv")