In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents. For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
chess<- readLines("C:\\Users\\ambra\\Desktop\\Data 607\\W4\\tournamentinfo.txt")
## Warning in readLines("C:\\Users\\ambra\\Desktop\\Data 607\\W4\
## \tournamentinfo.txt"): incomplete final line found on 'C:\Users\ambra
## \Desktop\Data 607\W4\tournamentinfo.txt'
class(chess)
## [1] "character"
library(stringr)
##Remove headings
chess<- chess[c(-1:-4)]
##Extract player's name
NAME<-str_trim(unlist(str_extract_all(chess, "[[:upper:]]{2,}\\s\\w+(\\s\\w+)?\\s?\\w+?\\s?[[:punct:]]?\\w*")))
##Extract state
state<- unlist(str_extract_all(chess, "(ON|MI|OH)\\s\\|"))
STATE<- str_trim(unlist(str_replace_all(state, "\\|", "")))
##Extract total number of points
points<- unlist(str_extract_all(chess, "\\|\\d\\.\\d\\s"))
POINTS<- str_trim(unlist(str_replace_all(points,"\\|", "")))
##Extract pre-rating
prerating<- unlist(str_extract_all(chess,"R:\\s?\\s\\d{3,}P?"))
prerating2<-str_trim(str_replace_all(prerating, "R:\\s?\\s", ""))
PRE_RATING2<-str_trim(str_replace_all(prerating2, "P$", ""))
PRE_RATING<- sapply(PRE_RATING2, as.numeric)
y<- as.data.frame(chess)
##Extract opponents for each player
opponents<- apply (y, 1, function(x) (unlist(str_extract_all(x,"(W|D|L)( )+\\d{1,3}"))))
##Remove 0 characters from vector
opponents1<- opponents[lapply(opponents,length)>0]
##remove W|D|L
opponentsindex<- str_extract_all(opponents1,"\\d{1,3}")
library(plyr)
opponentsdf<- ldply(opponentsindex, rbind)
opponentsdf1<- sapply(opponentsdf, function(x) as.numeric(as.character(x)))
chessdf<- data.frame(cbind(NAME, STATE, POINTS, PRE_RATING))
chessdf["X1"]<-opponentsdf1[,1]
chessdf["X2"]<-opponentsdf1[,2]
chessdf["X3"]<-opponentsdf1[,3]
chessdf["X4"]<-opponentsdf1[,4]
chessdf["X5"]<-opponentsdf1[,5]
chessdf["X6"]<-opponentsdf1[,6]
chessdf["X7"]<-opponentsdf1[,7]
rownames(chessdf)<- seq_len(nrow(chessdf))
for (i in 1:nrow(chessdf)){
chessdf$X1[i]<-as.numeric(as.character(chessdf$PRE_RATING[chessdf$X1[i]]))
chessdf$X2[i]<-as.numeric(as.character(chessdf$PRE_RATING[chessdf$X2[i]]))
chessdf$X3[i]<-as.numeric(as.character(chessdf$PRE_RATING[chessdf$X3[i]]))
chessdf$X4[i]<-as.numeric(as.character(chessdf$PRE_RATING[chessdf$X4[i]]))
chessdf$X5[i]<-as.numeric(as.character(chessdf$PRE_RATING[chessdf$X5[i]]))
chessdf$X6[i]<-as.numeric(as.character(chessdf$PRE_RATING[chessdf$X6[i]]))
chessdf$X7[i]<-as.numeric(as.character(chessdf$PRE_RATING[chessdf$X7[i]]))
}
chessdf["AVERAGE_PRERATING"]<- round(rowMeans(chessdf[, 5:11], na.rm = TRUE ), digits = 0)
library(htmlTable)
htmlTable(chessdf)
NAME | STATE | POINTS | PRE_RATING | X1 | X2 | X3 | X4 | X5 | X6 | X7 | AVERAGE_PRERATING | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | GARY HUA | ON | 6.0 | 1794 | 1436 | 1563 | 1600 | 1610 | 1649 | 1663 | 1716 | 1605 |
2 | DAKSHESH DARURI | MI | 6.0 | 1553 | 1175 | 917 | 1716 | 1629 | 1604 | 1595 | 1649 | 1469 |
3 | ADITYA BAJAJ | MI | 6.0 | 1384 | 1641 | 955 | 1745 | 1563 | 1712 | 1666 | 1663 | 1564 |
4 | PATRICK H SCHILLING | MI | 5.5 | 1716 | 1363 | 1507 | 1553 | 1579 | 1655 | 1564 | 1794 | 1574 |
5 | HANSHI ZUO | MI | 5.5 | 1655 | 1242 | 980 | 1663 | 1666 | 1716 | 1610 | 1629 | 1501 |
6 | HANSEN SONG | OH | 5.0 | 1686 | 1399 | 1602 | 1712 | 1438 | 1365 | 1552 | 1563 | 1519 |
7 | GARY DEE SWATHELL | MI | 5.0 | 1649 | 1092 | 377 | 1666 | 1712 | 1794 | 1411 | 1553 | 1372 |
8 | EZEKIEL HOUGHTON | MI | 5.0 | 1641 | 1384 | 1441 | 1610 | 1411 | 1362 | 1507 | 1564 | 1468 |
9 | STEFANO LEE | ON | 5.0 | 1411 | 1745 | 1600 | 853 | 1641 | 1579 | 1649 | 1595 | 1523 |
10 | ANVIT RAO | MI | 5.0 | 1365 | 1604 | 1564 | 1186 | 1494 | 1686 | 1745 | 1600 | 1554 |
11 | CAMERON WILLIAM MC LEMAN | MI | 4.5 | 1712 | 1423 | 1153 | 1686 | 1649 | 1384 | 1399 | 1579 | 1468 |
12 | KENNETH J TACK | MI | 4.5 | 1663 | 1332 | 1449 | 1655 | 1423 | 1794 | 1384 | 1506 | |
13 | TORRANCE HENRY JR | MI | 4.5 | 1666 | 1355 | 1552 | 1649 | 1655 | 1449 | 1384 | 1441 | 1498 |
14 | BRADLEY SHAW | MI | 4.5 | 1610 | 1270 | 1199 | 1641 | 1794 | 1552 | 1655 | 1494 | 1515 |
15 | ZACHARY JAMES HOUGHTON | MI | 4.5 | 1220 | 1564 | 1604 | 1522 | 1555 | 1270 | 1449 | 1423 | 1484 |
16 | MIKE NIKITIN | MI | 4.0 | 1604 | 1365 | 1220 | 1436 | 1553 | 1355 | 1386 | ||
17 | RONALD GRZEGORCZYK | MI | 4.0 | 1629 | 1382 | 1403 | 1579 | 1553 | 1363 | 1555 | 1655 | 1499 |
18 | DAVID SUNDEEN | MI | 4.0 | 1600 | 1362 | 1411 | 1794 | 1441 | 1564 | 1423 | 1365 | 1480 |
19 | DIPANKAR ROY | MI | 4.0 | 1564 | 1220 | 1365 | 935 | 1507 | 1600 | 1716 | 1641 | 1426 |
20 | JASON ZHENG | MI | 4.0 | 1595 | 1348 | 1291 | 1363 | 1403 | 1507 | 1553 | 1411 | 1411 |
21 | DINH DANG BUI | ON | 4.0 | 1563 | 1283 | 1794 | 1362 | 1384 | 1348 | 1436 | 1686 | 1470 |
22 | EUGENE L MCCLURE | MI | 4.0 | 1555 | 1163 | 935 | 1507 | 1220 | 1629 | 1348 | 1300 | |
23 | ALAN BUI | ON | 4.0 | 1363 | 1716 | 1283 | 1595 | 917 | 1629 | 980 | 377 | 1214 |
24 | MICHAEL R ALDRICH | MI | 4.0 | 1229 | 1507 | 1362 | 1283 | 1745 | 967 | 1199 | 1436 | 1357 |
25 | LOREN SCHWIEBERT | MI | 3.5 | 1745 | 1411 | 1393 | 1384 | 1229 | 1399 | 1365 | 1362 | 1363 |
26 | MAX ZHU | ON | 3.5 | 1579 | 1291 | 1348 | 1629 | 1716 | 1411 | 1441 | 1712 | 1507 |
27 | GAURAV GIDWANI | MI | 3.5 | 1552 | 1011 | 1666 | 377 | 980 | 1610 | 1686 | 1222 | |
28 | SOFIA ADINA STANESCU-BELLU | MI | 3.5 | 1507 | 1229 | 1716 | 1555 | 1564 | 1595 | 1641 | 1355 | 1522 |
29 | CHIEDOZIE OKORIE | MI | 3.5 | 1602 | 1056 | 1686 | 1423 | 1399 | 935 | 1382 | 1314 | |
30 | GEORGE AVERY JONES | ON | 3.5 | 1522 | 935 | 1163 | 1220 | 1186 | 1494 | 955 | 1056 | 1144 |
31 | RISHI SHETTY | MI | 3.5 | 1494 | 917 | 1186 | 1163 | 1365 | 1522 | 1056 | 1610 | 1260 |
32 | JOSHUA PHILIP MATHEWS | ON | 3.5 | 1441 | 955 | 1641 | 1199 | 1600 | 1011 | 1579 | 1666 | 1379 |
33 | JADE GE | MI | 3.5 | 1449 | 967 | 1663 | 1056 | 1355 | 1666 | 1220 | 1011 | 1277 |
34 | MICHAEL JEFFERY THOMAS | MI | 3.5 | 1399 | 1686 | 967 | 980 | 1602 | 1745 | 1712 | 935 | 1375 |
35 | JOSHUA DAVID LEE | MI | 3.5 | 1438 | 377 | 1423 | 1153 | 1686 | 1092 | 935 | 1382 | 1150 |
36 | SIDDHARTH JHA | MI | 3.5 | 1355 | 1666 | 1092 | 1011 | 1449 | 1604 | 1507 | 1388 | |
37 | AMIYATOSH PWNANANDAM | MI | 3.5 | 980 | 1655 | 1399 | 1552 | 1363 | 955 | 1385 | ||
38 | BRIAN LIU | MI | 3.0 | 1423 | 1712 | 1438 | 1602 | 1663 | 1600 | 1220 | 1539 | |
39 | JOEL R HENDON | MI | 3.0 | 1436 | 1794 | 1270 | 1348 | 1604 | 1199 | 1563 | 1229 | 1430 |
40 | FOREST ZHANG | MI | 3.0 | 1348 | 1595 | 1579 | 1436 | 853 | 1563 | 1153 | 1555 | 1391 |
41 | KYLE WILLIAM MURPHY | MI | 3.0 | 1403 | 853 | 1629 | 917 | 1595 | 1248 | |||
42 | JARED GE | MI | 3.0 | 1332 | 1663 | 1056 | 1092 | 967 | 955 | 1163 | 1153 | 1150 |
43 | ROBERT GLEN VASEY | MI | 3.0 | 1283 | 1563 | 1363 | 1229 | 1175 | 853 | 377 | 1186 | 1107 |
44 | JUSTIN D SCHILLING | MI | 3.0 | 1199 | 1610 | 1441 | 1393 | 1436 | 1229 | 853 | 1327 | |
45 | DEREK YAN | MI | 3.0 | 1242 | 1655 | 1011 | 967 | 1153 | 1175 | 1186 | 917 | 1152 |
46 | JACOB ALEXANDER LAVALLEY | MI | 3.0 | 377 | 1438 | 1649 | 1552 | 1056 | 1163 | 1283 | 1363 | 1358 |
47 | ERIC WRIGHT | MI | 2.5 | 1362 | 1600 | 1229 | 1563 | 955 | 1641 | 1011 | 1745 | 1392 |
48 | DANIEL KHAIN | MI | 2.5 | 1382 | 1629 | 1175 | 935 | 1602 | 1438 | 1356 | ||
49 | MICHAEL J MARTIN | MI | 2.5 | 1291 | 1579 | 1595 | 1175 | 1163 | 917 | 1286 | ||
50 | SHIVAM JHA | MI | 2.5 | 1056 | 1602 | 1332 | 1449 | 377 | 1494 | 1522 | 1296 | |
51 | TEJAS AYYAGARI | MI | 2.5 | 1011 | 1552 | 1242 | 1355 | 1092 | 1441 | 1362 | 1449 | 1356 |
52 | ETHAN GUO | MI | 2.5 | 935 | 1522 | 1555 | 1564 | 1382 | 1602 | 1438 | 1399 | 1495 |
53 | JOSE C YBARRA | MI | 2.0 | 1393 | 1745 | 1199 | 1092 | 1345 | ||||
54 | LARRY HODGE | MI | 2.0 | 1270 | 1610 | 1436 | 955 | 1220 | 853 | 1163 | 1206 | |
55 | ALEX KONG | MI | 2.0 | 1186 | 1530 | 1494 | 1365 | 1522 | 1242 | 1283 | 1406 | |
56 | MARISA RICCI | MI | 2.0 | 1153 | 1712 | 1438 | 1242 | 1348 | 1332 | 1414 | ||
57 | MICHAEL LU | MI | 2.0 | 1092 | 1649 | 1355 | 1332 | 1011 | 1438 | 1393 | 1363 | |
58 | VIRAJ MOHILE | MI | 2.0 | 917 | 1494 | 1553 | 1403 | 1363 | 1291 | 1242 | 1391 | |
59 | SEAN M MC CORMICK | MI | 2.0 | 853 | 1403 | 1411 | 1348 | 1283 | 1270 | 1199 | 1319 | |
60 | JULIA SHEN | MI | 1.5 | 967 | 1449 | 1399 | 1242 | 1332 | 1229 | 1330 | ||
61 | JEZZEL FARKAS | ON | 1.5 | 955 | 1441 | 1384 | 1270 | 1362 | 1332 | 1522 | 980 | 1327 |
62 | ASHWIN BALAJI | MI | 1.0 | 1530 | 1186 | 1186 | ||||||
63 | THOMAS JOSEPH HOSMER | MI | 1.0 | 1175 | 1553 | 1382 | 1291 | 1283 | 1242 | 1350 | ||
64 | BEN LI | MI | 1.0 | 1163 | 1555 | 1522 | 1494 | 1291 | 377 | 1332 | 1270 | 1263 |
finalchessdf<- chessdf[-(5:11)]
write.csv(finalchessdf, file="C:/Users/ambra/Desktop/Data 607/W4/TournamentInfo.csv")