The following R packages are required for this project:
library(downloader)
library(stringr)
In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be:
Gary Hua, ON, 6.0, 1794, 1605
1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
The goal of this project is to generate a .CSV file (which could for example be imported into a SQL database) with the following information for all of the players contained in the chess tournament cross table:
Download the file, called `tournamentinfo.txt’, from the GitHub URL. This file will be downloaded to your local working directory.
url <- "https://raw.githubusercontent.com/ravi-kothari/DATA-607/master/tournamentinfo.txt?token=AQGrklbb_mgZp2MfY-1Ixw30fsRR2reTks5X8gcVwA%3D%3D"
download_file <- "tournamentinfo.txt"
# the file will be downloaded to the working directory
downloader::download(url, download_file)
getwd()
## [1] "C:/Users/rkothari/Documents/MSDA/DATA 607"
library(stringr)
results <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
# extract player name from the text file using regular expression
player_name <- unlist(str_extract_all(results, "\\w+[^USCF|a-z] ?\\w+ \\w+"))
# extract player_state
player_state <- unlist(str_extract_all(results, "(?:^|\\W)ON | MI | OH(?:$|\\W)"))
# extract total number of points for the player.
player_pts <- unlist(str_extract_all(results, "\\d\\.\\d"))
# extract pre rating for the player
# I divided the steps into couple of steps for this.
pre_rating_step1 <- unlist(str_extract_all(results, "(R:\\s*)(\\d+)"))
pre_rating_step1[1]
## [1] "R: 1794"
pre_rating <- unlist(str_extract_all(pre_rating_step1, "(\\d+)"))
is.vector(pre_rating)
## [1] TRUE
pre_rating <- as.numeric(pre_rating) # converting strings to numeric
pre_rating[1]
## [1] 1794
# For avg rating I divided the extraction into three steps
# Step 1: extract the full line of text that starts with "|" and a number.
avg_calc_step1 <- unlist(str_extract_all(results, "\\|[0-9].*"))
avg_calc_step1[16]
## [1] "|4.0 |D 10|W 15|H |W 39|L 2|W 36|U |"
# used str_replace to replace empty spaces with 00 in the data.
avg_calc_step2 <- str_replace_all(avg_calc_step1, "\\s{1,2}\\|","00|")
avg_calc_step2[16]
## [1] "|4.000|D 10|W 15|H 00|W 39|L 2|W 36|U 00|"
# Step three is to only extract the number of opponent for each round.
avg_calc_step3 <- (str_extract_all(avg_calc_step2, "\\s\\d{1,2}"))
avg_calc_step3[16]
## [[1]]
## [1] " 10" " 15" " 00" " 39" " 2" " 36" " 00"
# Calculation of the Average Pre Chess Rating of Opponents
#converting the list into a matrix
matrix_calc <- matrix(unlist(avg_calc_step3), byrow=TRUE, nrow=length(avg_calc_step3) )
dim(matrix_calc)
## [1] 64 7
new_matrix <- t(apply(matrix_calc, 1,as.numeric))
dim(new_matrix)
## [1] 64 7
#replacing each element in the matrix with actual rating for opponents and adding NA if its 0
for (i in 1:nrow(new_matrix))
{
for (j in 1:ncol(new_matrix))
{
if (new_matrix[i,j] == 0){
new_matrix[i,j] = NA
}
else{
new_matrix[i,j] <- pre_rating[new_matrix[i,j]]
}
}
}
# Finally calculating the mean of each row to get the average opponent ratings.
avg_opponents <- c(rowMeans(new_matrix, na.rm = TRUE))
# Putting everything together!
df <- data.frame(player_name,player_state, player_pts,pre_rating,avg_opponents)
colnames(df) <- c("Players Name","Players State", "Total Points", "Players Pre-Rating", "Opponents Ratings")
# Exporting the result into a CSV file.
write.csv(df, "Chess.csv", row.names=FALSE)
df
## Players Name Players State Total Points Players Pre-Rating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA STANESCU MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
## Opponents Ratings
## 1 1605.286
## 2 1469.286
## 3 1563.571
## 4 1573.571
## 5 1500.857
## 6 1518.714
## 7 1372.143
## 8 1468.429
## 9 1523.143
## 10 1554.143
## 11 1467.571
## 12 1506.167
## 13 1497.857
## 14 1515.000
## 15 1483.857
## 16 1385.800
## 17 1498.571
## 18 1480.000
## 19 1426.286
## 20 1410.857
## 21 1470.429
## 22 1300.333
## 23 1213.857
## 24 1357.000
## 25 1363.286
## 26 1506.857
## 27 1221.667
## 28 1522.143
## 29 1313.500
## 30 1144.143
## 31 1259.857
## 32 1378.714
## 33 1276.857
## 34 1375.286
## 35 1149.714
## 36 1388.167
## 37 1384.800
## 38 1539.167
## 39 1429.571
## 40 1390.571
## 41 1248.500
## 42 1149.857
## 43 1106.571
## 44 1327.000
## 45 1152.000
## 46 1357.714
## 47 1392.000
## 48 1355.800
## 49 1285.800
## 50 1296.000
## 51 1356.143
## 52 1494.571
## 53 1345.333
## 54 1206.167
## 55 1406.000
## 56 1414.400
## 57 1363.000
## 58 1391.000
## 59 1319.000
## 60 1330.200
## 61 1327.286
## 62 1186.000
## 63 1350.200
## 64 1263.000