In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played. If you have questions about the meaning of the data or the results, please post them on the discussion forum. Data science, like chess, is a game of back and forth… The chess rating system (invented by a Minnesota statistician named Arpad Elo) has been used in many other contexts, including assessing relative strength of employment candidates by human resource departments. You may substitute another text file (or set of text files, or data scraped from web pages) of similar or greater complexity, and create your own assignment and solution. You may work in a small team. All of your code should be in an R markdown file (and published to rpubs.com); with your data accessible for the person running the script.

1) Load Packages

library(stringr)
library(DT)

2) Import Data into Rstudio

Data_S <- read.csv(paste0("https://raw.githubusercontent.com/Fyoun123/Data607/master/Project%201/tournamentinfo.txt"),header = F)

Test of Data

head(Data_S, n=10)
##                                                                                            V1
## 1   -----------------------------------------------------------------------------------------
## 2   Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| 
## 3   Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | 
## 4   -----------------------------------------------------------------------------------------
## 5       1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 6      ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## 7   -----------------------------------------------------------------------------------------
## 8       2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|
## 9      MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |
## 10  -----------------------------------------------------------------------------------------

3) Removal of headers

Data_S_2 <- Data_S[-c(1:3),]

Test of step

head(Data_S_2, n=5)
## [1] -----------------------------------------------------------------------------------------
## [2]     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## [3]    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## [4] -----------------------------------------------------------------------------------------
## [5]     2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|
## 131 Levels:     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4| ...

4) Extracting rows with the appropriate data and extraction of said data.

R_L <- length(Data_S_2)
R_1 <- Data_S_2[seq(2, R_L,3)]
R_2 <- Data_S_2[seq(3, R_L,3)]
Player_number <- as.integer(str_extract(R_1, "\\d+"))
Player_name <- str_trim(str_extract(R_1, "(\\w+\\s){2,3}"))
Player_state <- str_extract(R_2,"\\w+")
PLayers_points <- as.numeric(str_extract(R_1, "\\d+\\.\\d+"))
Player_rating <- as.integer(str_extract(str_extract(R_2, "[^\\d]\\d{3,4}[^\\d]"), "\\d+"))
Opponents <- str_extract_all(str_extract_all(R_1, "\\d+\\|"), "\\d+")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing

5) Average of Opponenets score

Opponent_OPRa <- numeric(R_L/3)
for (i in 1:(R_L / 3)) {Opponent_OPRa[i] <- mean(Player_rating[as.numeric(unlist(Opponents[Player_number[i]]))])}

6) Data Frame of Data

Project_table <- data.frame(Player_name,Player_state,PLayers_points,Player_rating,Opponent_OPRa)

Test

Project_table
##                 Player_name Player_state PLayers_points Player_rating
## 1                  GARY HUA           ON            6.0          1794
## 2           DAKSHESH DARURI           MI            6.0          1553
## 3              ADITYA BAJAJ           MI            6.0          1384
## 4       PATRICK H SCHILLING           MI            5.5          1716
## 5                HANSHI ZUO           MI            5.5          1655
## 6               HANSEN SONG           OH            5.0          1686
## 7         GARY DEE SWATHELL           MI            5.0          1649
## 8          EZEKIEL HOUGHTON           MI            5.0          1641
## 9               STEFANO LEE           ON            5.0          1411
## 10                ANVIT RAO           MI            5.0          1365
## 11       CAMERON WILLIAM MC           MI            4.5          1712
## 12           KENNETH J TACK           MI            4.5          1663
## 13        TORRANCE HENRY JR           MI            4.5          1666
## 14             BRADLEY SHAW           MI            4.5          1610
## 15   ZACHARY JAMES HOUGHTON           MI            4.5          1220
## 16             MIKE NIKITIN           MI            4.0          1604
## 17       RONALD GRZEGORCZYK           MI            4.0          1629
## 18            DAVID SUNDEEN           MI            4.0          1600
## 19             DIPANKAR ROY           MI            4.0          1564
## 20              JASON ZHENG           MI            4.0          1595
## 21            DINH DANG BUI           ON            4.0          1563
## 22         EUGENE L MCCLURE           MI            4.0          1555
## 23                 ALAN BUI           ON            4.0          1363
## 24        MICHAEL R ALDRICH           MI            4.0          1229
## 25         LOREN SCHWIEBERT           MI            3.5          1745
## 26                  MAX ZHU           ON            3.5          1579
## 27           GAURAV GIDWANI           MI            3.5          1552
## 28              SOFIA ADINA           MI            3.5          1507
## 29         CHIEDOZIE OKORIE           MI            3.5          1602
## 30       GEORGE AVERY JONES           ON            3.5          1522
## 31             RISHI SHETTY           MI            3.5          1494
## 32    JOSHUA PHILIP MATHEWS           ON            3.5          1441
## 33                  JADE GE           MI            3.5          1449
## 34   MICHAEL JEFFERY THOMAS           MI            3.5          1399
## 35         JOSHUA DAVID LEE           MI            3.5          1438
## 36            SIDDHARTH JHA           MI            3.5          1355
## 37     AMIYATOSH PWNANANDAM           MI            3.5           980
## 38                BRIAN LIU           MI            3.0          1423
## 39            JOEL R HENDON           MI            3.0          1436
## 40             FOREST ZHANG           MI            3.0          1348
## 41      KYLE WILLIAM MURPHY           MI            3.0          1403
## 42                 JARED GE           MI            3.0          1332
## 43        ROBERT GLEN VASEY           MI            3.0          1283
## 44       JUSTIN D SCHILLING           MI            3.0          1199
## 45                DEREK YAN           MI            3.0          1242
## 46 JACOB ALEXANDER LAVALLEY           MI            3.0           377
## 47              ERIC WRIGHT           MI            2.5          1362
## 48             DANIEL KHAIN           MI            2.5          1382
## 49         MICHAEL J MARTIN           MI            2.5          1291
## 50               SHIVAM JHA           MI            2.5          1056
## 51           TEJAS AYYAGARI           MI            2.5          1011
## 52                ETHAN GUO           MI            2.5           935
## 53            JOSE C YBARRA           MI            2.0          1393
## 54              LARRY HODGE           MI            2.0          1270
## 55                ALEX KONG           MI            2.0          1186
## 56             MARISA RICCI           MI            2.0          1153
## 57               MICHAEL LU           MI            2.0          1092
## 58             VIRAJ MOHILE           MI            2.0           917
## 59                SEAN M MC           MI            2.0           853
## 60               JULIA SHEN           MI            1.5           967
## 61            JEZZEL FARKAS           ON            1.5           955
## 62            ASHWIN BALAJI           MI            1.0          1530
## 63     THOMAS JOSEPH HOSMER           MI            1.0          1175
## 64                   BEN LI           MI            1.0          1163
##    Opponent_OPRa
## 1       1605.286
## 2       1469.286
## 3       1563.571
## 4       1573.571
## 5       1500.857
## 6       1518.714
## 7       1372.143
## 8       1468.429
## 9       1523.143
## 10      1554.143
## 11      1467.571
## 12      1506.167
## 13      1497.857
## 14      1515.000
## 15      1483.857
## 16      1385.800
## 17      1498.571
## 18      1480.000
## 19      1426.286
## 20      1410.857
## 21      1470.429
## 22      1300.333
## 23      1213.857
## 24      1357.000
## 25      1363.286
## 26      1506.857
## 27      1221.667
## 28      1522.143
## 29      1313.500
## 30      1144.143
## 31      1259.857
## 32      1378.714
## 33      1276.857
## 34      1375.286
## 35      1149.714
## 36      1388.167
## 37      1384.800
## 38      1539.167
## 39      1429.571
## 40      1390.571
## 41      1248.500
## 42      1149.857
## 43      1106.571
## 44      1327.000
## 45      1152.000
## 46      1357.714
## 47      1392.000
## 48      1355.800
## 49      1285.800
## 50      1296.000
## 51      1356.143
## 52      1494.571
## 53      1345.333
## 54      1206.167
## 55      1406.000
## 56      1414.400
## 57      1363.000
## 58      1391.000
## 59      1319.000
## 60      1330.200
## 61      1327.286
## 62      1186.000
## 63      1350.200
## 64      1263.000

7) Export to CSV

write.table(Project_table, file = "Project1.csv" , sep = ",", col.names = T)