Project 1

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:

Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents

For the first player, the information would be:

Gary Hua, ON, 6.0, 1794, 1605

1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, andvdividing by the total number of games played.

If you have questions about the meaning of the data or the results, please post them on the discussion forum. Data science, like chess, is a game of back and forth…

The chess rating system (invented by a Minnesota statistician named Arpad Elo) has been used in many other contexts, including assessing relative strength of employment candidates by human resource departments.

You may substitute another text file (or set of text files, or data scraped from web pages) of similar or greater complexity, and create your own assignment and solution. You may work in a small team. All of your code should be in an R markdown file (and published to rpubs.com); with your data accessible for the person running the script.

# Include required libraries and load tournament data from Github while removing the header and first 3 rows
library(stringr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.3.0      ✔ forcats 0.5.2 
## ✔ readr   2.1.3      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
ChessTournamentData <- read.csv(paste0("https://raw.githubusercontent.com/BeshkiaKvarnstrom/MSDS-DATA607/main/File-tournamentinfo.txt"), skip = 3,  header=F)


#Data cleaning and extracting necessary data and assigning to variables
ChessTournament_DT <- ChessTournamentData[,]

nrows <- length(ChessTournament_DT)
row_dt_even <- ChessTournament_DT[seq(2, nrows, 3)]
row_dt_odd <- ChessTournament_DT[seq(3, nrows, 3)]

Player_ID <- as.integer(str_extract(row_dt_even, "\\d+"))
Player_Name <- str_trim(str_extract(row_dt_even, "(\\w+\\s){2,3}"))
Player_State <- str_extract(row_dt_odd, "\\w+")
Player_Points <- as.numeric(str_extract(row_dt_even, "\\d+\\.\\d+"))
Player_PreRating <- as.integer(str_extract(str_extract(row_dt_odd, "[^\\d]\\d{3,4}[^\\d]"), "\\d+"))
Opponents <- str_extract_all(str_extract_all(row_dt_even, "\\d+\\|"), "\\d+")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
Opponent_PreRating <- numeric(nrows / 3)

# Calculates the average pre rating for each opponent
 for (i in 1:(nrows / 3)) { 
   Opponent_PreRating[i] <- mean(Player_PreRating[as.numeric(unlist(Opponents[Player_ID[i]]))])
 }

ChessTournament_Results <- data.frame(Player_Name, Player_State, Player_Points, Player_PreRating, Opponent_PreRating); ChessTournament_Results
##                 Player_Name Player_State Player_Points Player_PreRating
## 1                  GARY HUA           ON           6.0             1794
## 2           DAKSHESH DARURI           MI           6.0             1553
## 3              ADITYA BAJAJ           MI           6.0             1384
## 4       PATRICK H SCHILLING           MI           5.5             1716
## 5                HANSHI ZUO           MI           5.5             1655
## 6               HANSEN SONG           OH           5.0             1686
## 7         GARY DEE SWATHELL           MI           5.0             1649
## 8          EZEKIEL HOUGHTON           MI           5.0             1641
## 9               STEFANO LEE           ON           5.0             1411
## 10                ANVIT RAO           MI           5.0             1365
## 11       CAMERON WILLIAM MC           MI           4.5             1712
## 12           KENNETH J TACK           MI           4.5             1663
## 13        TORRANCE HENRY JR           MI           4.5             1666
## 14             BRADLEY SHAW           MI           4.5             1610
## 15   ZACHARY JAMES HOUGHTON           MI           4.5             1220
## 16             MIKE NIKITIN           MI           4.0             1604
## 17       RONALD GRZEGORCZYK           MI           4.0             1629
## 18            DAVID SUNDEEN           MI           4.0             1600
## 19             DIPANKAR ROY           MI           4.0             1564
## 20              JASON ZHENG           MI           4.0             1595
## 21            DINH DANG BUI           ON           4.0             1563
## 22         EUGENE L MCCLURE           MI           4.0             1555
## 23                 ALAN BUI           ON           4.0             1363
## 24        MICHAEL R ALDRICH           MI           4.0             1229
## 25         LOREN SCHWIEBERT           MI           3.5             1745
## 26                  MAX ZHU           ON           3.5             1579
## 27           GAURAV GIDWANI           MI           3.5             1552
## 28              SOFIA ADINA           MI           3.5             1507
## 29         CHIEDOZIE OKORIE           MI           3.5             1602
## 30       GEORGE AVERY JONES           ON           3.5             1522
## 31             RISHI SHETTY           MI           3.5             1494
## 32    JOSHUA PHILIP MATHEWS           ON           3.5             1441
## 33                  JADE GE           MI           3.5             1449
## 34   MICHAEL JEFFERY THOMAS           MI           3.5             1399
## 35         JOSHUA DAVID LEE           MI           3.5             1438
## 36            SIDDHARTH JHA           MI           3.5             1355
## 37     AMIYATOSH PWNANANDAM           MI           3.5              980
## 38                BRIAN LIU           MI           3.0             1423
## 39            JOEL R HENDON           MI           3.0             1436
## 40             FOREST ZHANG           MI           3.0             1348
## 41      KYLE WILLIAM MURPHY           MI           3.0             1403
## 42                 JARED GE           MI           3.0             1332
## 43        ROBERT GLEN VASEY           MI           3.0             1283
## 44       JUSTIN D SCHILLING           MI           3.0             1199
## 45                DEREK YAN           MI           3.0             1242
## 46 JACOB ALEXANDER LAVALLEY           MI           3.0              377
## 47              ERIC WRIGHT           MI           2.5             1362
## 48             DANIEL KHAIN           MI           2.5             1382
## 49         MICHAEL J MARTIN           MI           2.5             1291
## 50               SHIVAM JHA           MI           2.5             1056
## 51           TEJAS AYYAGARI           MI           2.5             1011
## 52                ETHAN GUO           MI           2.5              935
## 53            JOSE C YBARRA           MI           2.0             1393
## 54              LARRY HODGE           MI           2.0             1270
## 55                ALEX KONG           MI           2.0             1186
## 56             MARISA RICCI           MI           2.0             1153
## 57               MICHAEL LU           MI           2.0             1092
## 58             VIRAJ MOHILE           MI           2.0              917
## 59                SEAN M MC           MI           2.0              853
## 60               JULIA SHEN           MI           1.5              967
## 61            JEZZEL FARKAS           ON           1.5              955
## 62            ASHWIN BALAJI           MI           1.0             1530
## 63     THOMAS JOSEPH HOSMER           MI           1.0             1175
## 64                   BEN LI           MI           1.0             1163
##    Opponent_PreRating
## 1            1605.286
## 2            1469.286
## 3            1563.571
## 4            1573.571
## 5            1500.857
## 6            1518.714
## 7            1372.143
## 8            1468.429
## 9            1523.143
## 10           1554.143
## 11           1467.571
## 12           1506.167
## 13           1497.857
## 14           1515.000
## 15           1483.857
## 16           1385.800
## 17           1498.571
## 18           1480.000
## 19           1426.286
## 20           1410.857
## 21           1470.429
## 22           1300.333
## 23           1213.857
## 24           1357.000
## 25           1363.286
## 26           1506.857
## 27           1221.667
## 28           1522.143
## 29           1313.500
## 30           1144.143
## 31           1259.857
## 32           1378.714
## 33           1276.857
## 34           1375.286
## 35           1149.714
## 36           1388.167
## 37           1384.800
## 38           1539.167
## 39           1429.571
## 40           1390.571
## 41           1248.500
## 42           1149.857
## 43           1106.571
## 44           1327.000
## 45           1152.000
## 46           1357.714
## 47           1392.000
## 48           1355.800
## 49           1285.800
## 50           1296.000
## 51           1356.143
## 52           1494.571
## 53           1345.333
## 54           1206.167
## 55           1406.000
## 56           1414.400
## 57           1363.000
## 58           1391.000
## 59           1319.000
## 60           1330.200
## 61           1327.286
## 62           1186.000
## 63           1350.200
## 64           1263.000
# Export data from the Data Frame to a CSV file
write.csv(ChessTournament_Results, file ="Chess_Tournament_Results.csv")