Setup

The following R packages are required for this project:

  1. Downloader
  2. Stringr
library(downloader)
library(stringr)

Project Description

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be:

Gary Hua, ON, 6.0, 1794, 1605

1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

The goal of this project is to generate a .CSV file (which could for example be imported into a SQL database) with the following information for all of the players contained in the chess tournament cross table:

Importing the dataset from the text file

Download the file, called `tournamentinfo.txt’, from the GitHub URL. This file will be downloaded to your local working directory.

https://raw.githubusercontent.com/ravi-kothari/DATA-607/master/tournamentinfo.txt?token=AQGrklbb_mgZp2MfY-1Ixw30fsRR2reTks5X8gcVwA%3D%3D

url <- "https://raw.githubusercontent.com/ravi-kothari/DATA-607/master/tournamentinfo.txt?token=AQGrklbb_mgZp2MfY-1Ixw30fsRR2reTks5X8gcVwA%3D%3D"

download_file <- "tournamentinfo.txt" 

# the file will be downloaded to the working directory
downloader::download(url, download_file)

getwd()
## [1] "C:/Users/rkothari/Documents/MSDA/DATA 607"

Extraction of the columns using regular expressions

library(stringr)
results <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
# extract player name from the text file using regular expression
player_name <- unlist(str_extract_all(results, "\\w+[^USCF|a-z] ?\\w+ \\w+"))

# extract player_state
player_state <- unlist(str_extract_all(results, "(?:^|\\W)ON | MI | OH(?:$|\\W)"))

# extract total number of points for the player.

player_pts <- unlist(str_extract_all(results, "\\d\\.\\d"))

# extract pre rating for the player
# I divided the steps into couple of steps for this.

pre_rating_step1 <-  unlist(str_extract_all(results, "(R:\\s*)(\\d+)"))
pre_rating_step1[1]
## [1] "R: 1794"
pre_rating <- unlist(str_extract_all(pre_rating_step1, "(\\d+)"))
is.vector(pre_rating)
## [1] TRUE
pre_rating <- as.numeric(pre_rating) # converting strings to numeric
pre_rating[1]
## [1] 1794
# For avg rating I divided the extraction into three steps

# Step 1: extract the full line of text that starts with "|" and a number.
avg_calc_step1 <- unlist(str_extract_all(results, "\\|[0-9].*"))

avg_calc_step1[16]
## [1] "|4.0  |D  10|W  15|H    |W  39|L   2|W  36|U    |"
# used str_replace to replace empty spaces with 00 in the data.
avg_calc_step2 <- str_replace_all(avg_calc_step1, "\\s{1,2}\\|","00|")

avg_calc_step2[16]
## [1] "|4.000|D  10|W  15|H  00|W  39|L   2|W  36|U  00|"
# Step three is to only extract the number of opponent for each round.
avg_calc_step3 <- (str_extract_all(avg_calc_step2, "\\s\\d{1,2}"))

avg_calc_step3[16]
## [[1]]
## [1] " 10" " 15" " 00" " 39" " 2"  " 36" " 00"
# Calculation of the Average Pre Chess Rating of Opponents

#converting the list into a matrix 
matrix_calc <- matrix(unlist(avg_calc_step3), byrow=TRUE, nrow=length(avg_calc_step3) )
dim(matrix_calc)
## [1] 64  7
new_matrix <- t(apply(matrix_calc, 1,as.numeric)) 
dim(new_matrix)
## [1] 64  7
#replacing each element in the matrix with actual rating for opponents and adding NA if its 0
for (i in 1:nrow(new_matrix)) 
  {
  for (j in 1:ncol(new_matrix))
    {
    if (new_matrix[i,j] == 0){
      new_matrix[i,j] = NA
    }
    else{
      new_matrix[i,j] <- pre_rating[new_matrix[i,j]]
    }
  }
}
# Finally calculating the mean of each row to get the average opponent ratings.
avg_opponents <- c(rowMeans(new_matrix, na.rm = TRUE))

# Putting everything together!
df <- data.frame(player_name,player_state, player_pts,pre_rating,avg_opponents)
colnames(df) <- c("Players Name","Players State", "Total Points", "Players Pre-Rating", "Opponents Ratings")

# Exporting the result into a CSV file.
write.csv(df, "Chess.csv", row.names=FALSE)

Output

df
##                Players Name Players State Total Points Players Pre-Rating
## 1                  GARY HUA           ON           6.0               1794
## 2           DAKSHESH DARURI           MI           6.0               1553
## 3              ADITYA BAJAJ           MI           6.0               1384
## 4       PATRICK H SCHILLING           MI           5.5               1716
## 5                HANSHI ZUO           MI           5.5               1655
## 6               HANSEN SONG           OH           5.0               1686
## 7         GARY DEE SWATHELL           MI           5.0               1649
## 8          EZEKIEL HOUGHTON           MI           5.0               1641
## 9               STEFANO LEE           ON           5.0               1411
## 10                ANVIT RAO           MI           5.0               1365
## 11       CAMERON WILLIAM MC           MI           4.5               1712
## 12           KENNETH J TACK           MI           4.5               1663
## 13        TORRANCE HENRY JR           MI           4.5               1666
## 14             BRADLEY SHAW           MI           4.5               1610
## 15   ZACHARY JAMES HOUGHTON           MI           4.5               1220
## 16             MIKE NIKITIN           MI           4.0               1604
## 17       RONALD GRZEGORCZYK           MI           4.0               1629
## 18            DAVID SUNDEEN           MI           4.0               1600
## 19             DIPANKAR ROY           MI           4.0               1564
## 20              JASON ZHENG           MI           4.0               1595
## 21            DINH DANG BUI           ON           4.0               1563
## 22         EUGENE L MCCLURE           MI           4.0               1555
## 23                 ALAN BUI           ON           4.0               1363
## 24        MICHAEL R ALDRICH           MI           4.0               1229
## 25         LOREN SCHWIEBERT           MI           3.5               1745
## 26                  MAX ZHU           ON           3.5               1579
## 27           GAURAV GIDWANI           MI           3.5               1552
## 28     SOFIA ADINA STANESCU           MI           3.5               1507
## 29         CHIEDOZIE OKORIE           MI           3.5               1602
## 30       GEORGE AVERY JONES           ON           3.5               1522
## 31             RISHI SHETTY           MI           3.5               1494
## 32    JOSHUA PHILIP MATHEWS           ON           3.5               1441
## 33                  JADE GE           MI           3.5               1449
## 34   MICHAEL JEFFERY THOMAS           MI           3.5               1399
## 35         JOSHUA DAVID LEE           MI           3.5               1438
## 36            SIDDHARTH JHA           MI           3.5               1355
## 37     AMIYATOSH PWNANANDAM           MI           3.5                980
## 38                BRIAN LIU           MI           3.0               1423
## 39            JOEL R HENDON           MI           3.0               1436
## 40             FOREST ZHANG           MI           3.0               1348
## 41      KYLE WILLIAM MURPHY           MI           3.0               1403
## 42                 JARED GE           MI           3.0               1332
## 43        ROBERT GLEN VASEY           MI           3.0               1283
## 44       JUSTIN D SCHILLING           MI           3.0               1199
## 45                DEREK YAN           MI           3.0               1242
## 46 JACOB ALEXANDER LAVALLEY           MI           3.0                377
## 47              ERIC WRIGHT           MI           2.5               1362
## 48             DANIEL KHAIN           MI           2.5               1382
## 49         MICHAEL J MARTIN           MI           2.5               1291
## 50               SHIVAM JHA           MI           2.5               1056
## 51           TEJAS AYYAGARI           MI           2.5               1011
## 52                ETHAN GUO           MI           2.5                935
## 53            JOSE C YBARRA           MI           2.0               1393
## 54              LARRY HODGE           MI           2.0               1270
## 55                ALEX KONG           MI           2.0               1186
## 56             MARISA RICCI           MI           2.0               1153
## 57               MICHAEL LU           MI           2.0               1092
## 58             VIRAJ MOHILE           MI           2.0                917
## 59                SEAN M MC           MI           2.0                853
## 60               JULIA SHEN           MI           1.5                967
## 61            JEZZEL FARKAS           ON           1.5                955
## 62            ASHWIN BALAJI           MI           1.0               1530
## 63     THOMAS JOSEPH HOSMER           MI           1.0               1175
## 64                   BEN LI           MI           1.0               1163
##    Opponents Ratings
## 1           1605.286
## 2           1469.286
## 3           1563.571
## 4           1573.571
## 5           1500.857
## 6           1518.714
## 7           1372.143
## 8           1468.429
## 9           1523.143
## 10          1554.143
## 11          1467.571
## 12          1506.167
## 13          1497.857
## 14          1515.000
## 15          1483.857
## 16          1385.800
## 17          1498.571
## 18          1480.000
## 19          1426.286
## 20          1410.857
## 21          1470.429
## 22          1300.333
## 23          1213.857
## 24          1357.000
## 25          1363.286
## 26          1506.857
## 27          1221.667
## 28          1522.143
## 29          1313.500
## 30          1144.143
## 31          1259.857
## 32          1378.714
## 33          1276.857
## 34          1375.286
## 35          1149.714
## 36          1388.167
## 37          1384.800
## 38          1539.167
## 39          1429.571
## 40          1390.571
## 41          1248.500
## 42          1149.857
## 43          1106.571
## 44          1327.000
## 45          1152.000
## 46          1357.714
## 47          1392.000
## 48          1355.800
## 49          1285.800
## 50          1296.000
## 51          1356.143
## 52          1494.571
## 53          1345.333
## 54          1206.167
## 55          1406.000
## 56          1414.400
## 57          1363.000
## 58          1391.000
## 59          1319.000
## 60          1330.200
## 61          1327.286
## 62          1186.000
## 63          1350.200
## 64          1263.000