In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605
library(knitr)
library(stringr)
library(plyr)
setwd("~/Desktop/MSDA/proj1")
textdata <- readLines("tournamentinfo1.txt")
textdata <- textdata[c(-1:-3)] # Remove first three lines as they read headers
head(textdata)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [3] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [6] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
Note: Always check for dimentions or row names as they should be matched throughout project
Player_name <- unlist(str_extract_all(textdata,"\\w+[[:alpha:]] ?\\w+ \\w+ "))
Player_name
## [1] "GARY HUA " "DAKSHESH DARURI "
## [3] "ADITYA BAJAJ " "PATRICK H SCHILLING "
## [5] "HANSHI ZUO " "HANSEN SONG "
## [7] "GARY DEE SWATHELL " "EZEKIEL HOUGHTON "
## [9] "STEFANO LEE " "ANVIT RAO "
## [11] "CAMERON WILLIAM MC " "KENNETH J TACK "
## [13] "TORRANCE HENRY JR " "BRADLEY SHAW "
## [15] "ZACHARY JAMES HOUGHTON " "MIKE NIKITIN "
## [17] "RONALD GRZEGORCZYK " "DAVID SUNDEEN "
## [19] "DIPANKAR ROY " "JASON ZHENG "
## [21] "DINH DANG BUI " "EUGENE L MCCLURE "
## [23] "ALAN BUI " "MICHAEL R ALDRICH "
## [25] "LOREN SCHWIEBERT " "MAX ZHU "
## [27] "GAURAV GIDWANI " "SOFIA ADINA "
## [29] "CHIEDOZIE OKORIE " "GEORGE AVERY JONES "
## [31] "RISHI SHETTY " "JOSHUA PHILIP MATHEWS "
## [33] "JADE GE " "MICHAEL JEFFERY THOMAS "
## [35] "JOSHUA DAVID LEE " "SIDDHARTH JHA "
## [37] "AMIYATOSH PWNANANDAM " "BRIAN LIU "
## [39] "JOEL R HENDON " "FOREST ZHANG "
## [41] "KYLE WILLIAM MURPHY " "JARED GE "
## [43] "ROBERT GLEN VASEY " "JUSTIN D SCHILLING "
## [45] "DEREK YAN " "JACOB ALEXANDER LAVALLEY "
## [47] "ERIC WRIGHT " "DANIEL KHAIN "
## [49] "MICHAEL J MARTIN " "SHIVAM JHA "
## [51] "TEJAS AYYAGARI " "ETHAN GUO "
## [53] "JOSE C YBARRA " "LARRY HODGE "
## [55] "ALEX KONG " "MARISA RICCI "
## [57] "MICHAEL LU " "VIRAJ MOHILE "
## [59] "SEAN M MC " "JULIA SHEN "
## [61] "JEZZEL FARKAS " "ASHWIN BALAJI "
## [63] "THOMAS JOSEPH HOSMER " "BEN LI "
Player_state <- unlist(str_extract_all(textdata,"(?:^|\\W)ON | MI | OH "))
Player_state
## [1] " ON " " MI " " MI " " MI " " MI " " OH " " MI " " MI " " ON " " MI "
## [11] " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI "
## [21] " ON " " MI " " ON " " MI " " MI " " ON " " MI " " MI " " MI " " ON "
## [31] " MI " " ON " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI "
## [41] " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI "
## [51] " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI "
## [61] " ON " " MI " " MI " " MI "
Total_number_points <- unlist(str_extract_all(textdata,"\\d\\.\\d"))
Total_number_points
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5"
## [12] "4.5" "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [23] "4.0" "4.0" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [34] "3.5" "3.5" "3.5" "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0"
## [45] "3.0" "3.0" "2.5" "2.5" "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0"
## [56] "2.0" "2.0" "2.0" "2.0" "1.5" "1.5" "1.0" "1.0" "1.0"
Pre_rating_points <- unlist(str_extract_all(textdata,"[[O-R]][[:punct:]]\\s* (\\d+)"))
Pre_rating_points1 <- unlist(str_replace(Pre_rating_points,"R: "," "))
Pre_rating_points1 <- as.numeric(Pre_rating_points1)
Pre_rating_points1
## [1] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 1712 1663 1666 1610
## [15] 1220 1604 1629 1600 1564 1595 1563 1555 1363 1229 1745 1579 1552 1507
## [29] 1602 1522 1494 1441 1449 1399 1438 1355 980 1423 1436 1348 1403 1332
## [43] 1283 1199 1242 377 1362 1382 1291 1056 1011 935 1393 1270 1186 1153
## [57] 1092 917 853 967 955 1530 1175 1163
output3<- unlist(str_extract_all(textdata,"\\|[[:number:]].*"))
output2 <- unlist(str_replace_all(output3,"\\s{1,2}\\|","00"))
output1 <- (str_extract_all(output2,"\\s\\d{1,2}"))
head(output1)
## [[1]]
## [1] " 39" " 21" " 18" " 14" " 7" " 12" " 4"
##
## [[2]]
## [1] " 63" " 58" " 4" " 17" " 16" " 20" " 7"
##
## [[3]]
## [1] " 8" " 61" " 25" " 21" " 11" " 13" " 12"
##
## [[4]]
## [1] " 23" " 28" " 2" " 26" " 5" " 19" " 1"
##
## [[5]]
## [1] " 45" " 37" " 12" " 13" " 4" " 14" " 17"
##
## [[6]]
## [1] " 34" " 29" " 11" " 35" " 10" " 27" " 21"
matrix1 <- matrix(unlist(output1),byrow = TRUE,nrow = length(output1))
matrix2 <-t(apply(matrix1,1,as.numeric)) #converting character into numeric dataframe.
matrix2 <- data.frame(matrix2)
matrix2[matrix2 == 0] <- NA
sub_dataset <- data.frame(cbind(Pre_rating_points1,matrix2))
head(sub_dataset)
## Pre_rating_points1 X1 X2 X3 X4 X5 X6 X7
## 1 1794 39 21 18 14 7 12 4
## 2 1553 63 58 4 17 16 20 7
## 3 1384 8 61 25 21 11 13 12
## 4 1716 23 28 2 26 5 19 1
## 5 1655 45 37 12 13 4 14 17
## 6 1686 34 29 11 35 10 27 21
I tried with if-else loop but it got stucked and output was Null."For" loop worked.
for (i in 1:64) {
sub_dataset$X1[i] <- (sub_dataset$Pre_rating[sub_dataset$X1[i]])
sub_dataset$X2[i] <- (sub_dataset$Pre_rating[sub_dataset$X2[i]])
sub_dataset$X3[i] <- (sub_dataset$Pre_rating[sub_dataset$X3[i]])
sub_dataset$X4[i] <- (sub_dataset$Pre_rating[sub_dataset$X4[i]])
sub_dataset$X5[i] <- (sub_dataset$Pre_rating[sub_dataset$X5[i]])
sub_dataset$X6[i] <- (sub_dataset$Pre_rating[sub_dataset$X6[i]])
sub_dataset$X7[i] <- (sub_dataset$Pre_rating[sub_dataset$X7[i]])
}
head(sub_dataset)
## Pre_rating_points1 X1 X2 X3 X4 X5 X6 X7
## 1 1794 1436 1563 1600 1610 1649 1663 1716
## 2 1553 1175 917 1716 1629 1604 1595 1649
## 3 1384 1641 955 1745 1563 1712 1666 1663
## 4 1716 1363 1507 1553 1579 1655 1564 1794
## 5 1655 1242 980 1663 1666 1716 1610 1629
## 6 1686 1399 1602 1712 1438 1365 1552 1563
for (i in 1:64) {
sub_dataset$Average[i] <- rowMeans(sub_dataset[i,2:8],na.rm = TRUE)
}
head(sub_dataset)
## Pre_rating_points1 X1 X2 X3 X4 X5 X6 X7 Average
## 1 1794 1436 1563 1600 1610 1649 1663 1716 1605.286
## 2 1553 1175 917 1716 1629 1604 1595 1649 1469.286
## 3 1384 1641 955 1745 1563 1712 1666 1663 1563.571
## 4 1716 1363 1507 1553 1579 1655 1564 1794 1573.571
## 5 1655 1242 980 1663 1666 1716 1610 1629 1500.857
## 6 1686 1399 1602 1712 1438 1365 1552 1563 1518.714
dataset <- data.frame(Player_name,Player_state,Total_number_points,sub_dataset$Pre_rating,sub_dataset$Average)
head(dataset)
## Player_name Player_state Total_number_points
## 1 GARY HUA ON 6.0
## 2 DAKSHESH DARURI MI 6.0
## 3 ADITYA BAJAJ MI 6.0
## 4 PATRICK H SCHILLING MI 5.5
## 5 HANSHI ZUO MI 5.5
## 6 HANSEN SONG OH 5.0
## sub_dataset.Pre_rating sub_dataset.Average
## 1 1794 1605.286
## 2 1553 1469.286
## 3 1384 1563.571
## 4 1716 1573.571
## 5 1655 1500.857
## 6 1686 1518.714
write.csv(dataset,"Tournament.csv")