Load text file into R and removing the —- on the file
chess <- read.delim("https://raw.githubusercontent.com/AnnaMoy/Data-607/main/Tournament.txt", sep = "|" ,header = FALSE)[-c(1:4,7,10, 13,16,19,22,25,28,31,34,37,40,43,46,49,52,55,58,61,64,67,70,73,76,79,82,85,88,91,94,97,100,103,106,109,112,115,118,121,124,127,130,133,136,139,142,145,148,151,154,157,160,163,166,169,172,175,178,181,184,187,190,193,196),-c(11)]
Load library
library(tidyverse)
Transform the data
# extract the pair number, name, state, total_points, rating line information
pair_num <- str_subset(chess$V1, pattern = "\\d+")
name <- str_subset(chess$V2, pattern = "[A-Za-z]+ [A-Za-z]+ ")
state <- str_subset(chess$V1, pattern = "[A-Za-z]+")
total_point <- str_subset(chess$V3, pattern ="\\d+.\\d+" )
rating <- str_subset(chess$V2, pattern ="\\d+")
# remove blanks space from state
state<- str_remove_all(state, " ")
# extract only the player's pre-rating and post-rating
pre_rating <- str_sub(rating, 16,19)
post_rating <-str_sub(rating, 25,29)
# Take every other row and extract the round 1 to round 7 numbers
row1_data<-chess %>%
slice(which(row_number() %% 2 == 1))
round1<-str_sub(row1_data$V4, 4,5)
round2<-str_sub(row1_data$V5, 4,5)
round3<-str_sub(row1_data$V6, 4,5)
round4<-str_sub(row1_data$V7, 4,5)
round5<-str_sub(row1_data$V8, 4,5)
round6<-str_sub(row1_data$V9, 4,5)
round7<-str_sub(row1_data$V10, 4,5)
#transfer to numeric
round1 = as.numeric(round1)
round2 = as.numeric(round2)
round3 = as.numeric(round3)
round4 = as.numeric(round4)
round5 = as.numeric(round5)
round6 = as.numeric(round6)
round7 = as.numeric(round7)
# Convert information into a data frame
df <- data.frame(pair_num, name, state, total_point, pre_rating, post_rating)
# Transform the data to numeric for particular columns
df2<-transform(df,pair_num = as.integer(pair_num),total_point = as.numeric(total_point),pre_rating = as.numeric(pre_rating),post_rating = as.numeric(post_rating))
## Warning in eval(substitute(list(...)), `_data`, parent.frame()): NAs introduced
## by coercion
# Get the post-rating for each round
Round1 <-df2$pre_rating[round1]
Round2 <-df2$pre_rating[round2]
Round3 <-df2$pre_rating[round3]
Round4 <-df2$pre_rating[round4]
Round5 <-df2$pre_rating[round5]
Round6 <-df2$pre_rating[round6]
Round7 <-df2$pre_rating[round7]
#combine all the rounds in a data frame
together <- data.frame(Round1, Round2, Round3, Round4, Round5, Round6, Round7)
# avg of all the rounds
avg <- rowMeans(together, na.rm =T)
#round avg to the nearest whole number
avg <-round(avg,digits=0)
#Convert into a data frame
chess_tournament <- data.frame(pair_num, name, state, total_point, pre_rating, post_rating, Round1, Round2, Round3, Round4, Round5, Round6, Round7, avg)
# Rename the column names
colnames(chess_tournament) <- c("Pair Num", "Player's Name", "Player's State", "Total Number of Points"," Player's Pre-Rating", "Player's Post-Rating", "Round 1", "Round 2", "Round 3", "Round 4", "Round 5", "Round 6", "Round 7", "Average Pre Class Rating of Opponents")
chess_tournament
## Pair Num Player's Name Player's State
## 1 1 GARY HUA ON
## 2 2 DAKSHESH DARURI MI
## 3 3 ADITYA BAJAJ MI
## 4 4 PATRICK H SCHILLING MI
## 5 5 HANSHI ZUO MI
## 6 6 HANSEN SONG OH
## 7 7 GARY DEE SWATHELL MI
## 8 8 EZEKIEL HOUGHTON MI
## 9 9 STEFANO LEE ON
## 10 10 ANVIT RAO MI
## 11 11 CAMERON WILLIAM MC LEMAN MI
## 12 12 KENNETH J TACK MI
## 13 13 TORRANCE HENRY JR MI
## 14 14 BRADLEY SHAW MI
## 15 15 ZACHARY JAMES HOUGHTON MI
## 16 16 MIKE NIKITIN MI
## 17 17 RONALD GRZEGORCZYK MI
## 18 18 DAVID SUNDEEN MI
## 19 19 DIPANKAR ROY MI
## 20 20 JASON ZHENG MI
## 21 21 DINH DANG BUI ON
## 22 22 EUGENE L MCCLURE MI
## 23 23 ALAN BUI ON
## 24 24 MICHAEL R ALDRICH MI
## 25 25 LOREN SCHWIEBERT MI
## 26 26 MAX ZHU ON
## 27 27 GAURAV GIDWANI MI
## 28 28 SOFIA ADINA STANESCU-BELLU MI
## 29 29 CHIEDOZIE OKORIE MI
## 30 30 GEORGE AVERY JONES ON
## 31 31 RISHI SHETTY MI
## 32 32 JOSHUA PHILIP MATHEWS ON
## 33 33 JADE GE MI
## 34 34 MICHAEL JEFFERY THOMAS MI
## 35 35 JOSHUA DAVID LEE MI
## 36 36 SIDDHARTH JHA MI
## 37 37 AMIYATOSH PWNANANDAM MI
## 38 38 BRIAN LIU MI
## 39 39 JOEL R HENDON MI
## 40 40 FOREST ZHANG MI
## 41 41 KYLE WILLIAM MURPHY MI
## 42 42 JARED GE MI
## 43 43 ROBERT GLEN VASEY MI
## 44 44 JUSTIN D SCHILLING MI
## 45 45 DEREK YAN MI
## 46 46 JACOB ALEXANDER LAVALLEY MI
## 47 47 ERIC WRIGHT MI
## 48 48 DANIEL KHAIN MI
## 49 49 MICHAEL J MARTIN MI
## 50 50 SHIVAM JHA MI
## 51 51 TEJAS AYYAGARI MI
## 52 52 ETHAN GUO MI
## 53 53 JOSE C YBARRA MI
## 54 54 LARRY HODGE MI
## 55 55 ALEX KONG MI
## 56 56 MARISA RICCI MI
## 57 57 MICHAEL LU MI
## 58 58 VIRAJ MOHILE MI
## 59 59 SEAN M MC CORMICK MI
## 60 60 JULIA SHEN MI
## 61 61 JEZZEL FARKAS ON
## 62 62 ASHWIN BALAJI MI
## 63 63 THOMAS JOSEPH HOSMER MI
## 64 64 BEN LI MI
## Total Number of Points Player's Pre-Rating Player's Post-Rating Round 1
## 1 6.0 1794 1817 1436
## 2 6.0 1553 1663 1175
## 3 6.0 1384 1640 1641
## 4 5.5 1716 1744 1363
## 5 5.5 1655 1690 1242
## 6 5.0 1686 1687 1399
## 7 5.0 1649 1673 1092
## 8 5.0 1641 1657P 1384
## 9 5.0 1411 1564 1745
## 10 5.0 1365 1544 1604
## 11 4.5 1712 1696 1423
## 12 4.5 1663 1670 1332
## 13 4.5 1666 1662 1355
## 14 4.5 1610 1618 1270
## 15 4.5 1220 1416P 1564
## 16 4.0 1604 1613 1365
## 17 4.0 1629 1610 1382
## 18 4.0 1600 1600 1362
## 19 4.0 1564 1570 1220
## 20 4.0 1595 1569 1348
## 21 4.0 1563 1562 1283
## 22 4.0 1555 1529 1163
## 23 4.0 1363 1371 1716
## 24 4.0 1229 1300 1507
## 25 3.5 1745 1681 1411
## 26 3.5 1579 1564 1291
## 27 3.5 1552 1539 1011
## 28 3.5 1507 1513 1229
## 29 3.5 1602 1508P 1056
## 30 3.5 1522 1444 935
## 31 3.5 1494 1444 917
## 32 3.5 1441 1433 955
## 33 3.5 1449 1421 967
## 34 3.5 1399 1400 1686
## 35 3.5 1438 1392 377
## 36 3.5 1355 1367 1666
## 37 3.5 980 1077P NA
## 38 3.0 1423 1439 1712
## 39 3.0 1436 1413 1794
## 40 3.0 1348 1346 1595
## 41 3.0 1403 1341P 853
## 42 3.0 1332 1256 1663
## 43 3.0 1283 1244 1563
## 44 3.0 1199 1199 NA
## 45 3.0 1242 1191 1655
## 46 3.0 377 1076P 1438
## 47 2.5 1362 1341 1600
## 48 2.5 1382 1335 1629
## 49 2.5 1291 1259P 1579
## 50 2.5 1056 1111 1602
## 51 2.5 1011 1097 1552
## 52 2.5 935 1092 1522
## 53 2.0 1393 1359 NA
## 54 2.0 1270 1200 1610
## 55 2.0 1186 1163 1530
## 56 2.0 1153 1140 NA
## 57 2.0 1092 1079 1649
## 58 2.0 917 941 1494
## 59 2.0 853 878 1403
## 60 1.5 967 984 1449
## 61 1.5 955 979P 1441
## 62 1.0 1530 1535 1186
## 63 1.0 1175 1125 1553
## 64 1.0 1163 1112 1555
## Round 2 Round 3 Round 4 Round 5 Round 6 Round 7
## 1 1563 1600 1610 1649 1663 1716
## 2 917 1716 1629 1604 1595 1649
## 3 955 1745 1563 1712 1666 1663
## 4 1507 1553 1579 1655 1564 1794
## 5 980 1663 1666 1716 1610 1629
## 6 1602 1712 1438 1365 1552 1563
## 7 377 1666 1712 1794 1411 1553
## 8 1441 1610 1411 1362 1507 1564
## 9 1600 853 1641 1579 1649 1595
## 10 1564 1186 1494 1686 1745 1600
## 11 1153 1686 1649 1384 1399 1579
## 12 1449 1655 1423 NA 1794 1384
## 13 1552 1649 1655 1449 1384 1441
## 14 1199 1641 1794 1552 1655 1494
## 15 1604 1522 1555 1270 1449 1423
## 16 1220 NA 1436 1553 1355 NA
## 17 1403 1579 1553 1363 1555 1655
## 18 1411 1794 1441 1564 1423 1365
## 19 1365 935 1507 1600 1716 1641
## 20 1291 1363 1403 1507 1553 1411
## 21 1794 1362 1384 1348 1436 1686
## 22 935 1507 1220 NA 1629 1348
## 23 1283 1595 917 1629 980 377
## 24 1362 1283 1745 967 1199 1436
## 25 1393 1384 1229 1399 1365 1362
## 26 1348 1629 1716 1411 1441 1712
## 27 1666 377 980 1610 1686 NA
## 28 1716 1555 1564 1595 1641 1355
## 29 1686 1423 1399 935 1382 NA
## 30 1163 1220 1186 1494 955 1056
## 31 1186 1163 1365 1522 1056 1610
## 32 1641 1199 1600 1011 1579 1666
## 33 1663 1056 1355 1666 1220 1011
## 34 967 980 1602 1745 1712 935
## 35 1423 1153 1686 1092 935 1382
## 36 1092 1011 1449 NA 1604 1507
## 37 1655 1399 1552 NA 1363 955
## 38 1438 1602 1663 NA 1600 1220
## 39 1270 1348 1604 1199 1563 1229
## 40 1579 1436 853 1563 1153 1555
## 41 1629 917 1595 NA NA NA
## 42 1056 1092 967 955 1163 1153
## 43 1363 1229 1175 853 377 1186
## 44 1610 1441 1393 1436 1229 853
## 45 1011 967 1153 1175 1186 917
## 46 1649 1552 1056 1163 1283 1363
## 47 1229 1563 955 1641 1011 1745
## 48 1175 NA 935 NA 1602 1438
## 49 1595 1175 1163 917 NA NA
## 50 1332 1449 377 NA 1494 1522
## 51 1242 1355 1092 1441 1362 1449
## 52 1555 1564 1382 1602 1438 1399
## 53 1745 NA 1199 NA 1092 NA
## 54 1436 955 NA 1220 853 1163
## 55 1494 1365 1522 NA 1242 1283
## 56 1712 1438 1242 NA 1348 1332
## 57 1355 1332 1011 1438 1393 NA
## 58 1553 1403 1363 1291 NA 1242
## 59 NA 1411 1348 1283 1270 1199
## 60 1399 1242 1332 1229 NA NA
## 61 1384 1270 1362 1332 1522 980
## 62 NA NA NA NA NA NA
## 63 1382 1291 1283 1242 NA NA
## 64 1522 1494 1291 377 1332 1270
## Average Pre Class Rating of Opponents
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
## 7 1372
## 8 1468
## 9 1523
## 10 1554
## 11 1468
## 12 1506
## 13 1498
## 14 1515
## 15 1484
## 16 1386
## 17 1499
## 18 1480
## 19 1426
## 20 1411
## 21 1470
## 22 1300
## 23 1214
## 24 1357
## 25 1363
## 26 1507
## 27 1222
## 28 1522
## 29 1314
## 30 1144
## 31 1260
## 32 1379
## 33 1277
## 34 1375
## 35 1150
## 36 1388
## 37 1385
## 38 1539
## 39 1430
## 40 1391
## 41 1248
## 42 1150
## 43 1107
## 44 1327
## 45 1152
## 46 1358
## 47 1392
## 48 1356
## 49 1286
## 50 1296
## 51 1356
## 52 1495
## 53 1345
## 54 1206
## 55 1406
## 56 1414
## 57 1363
## 58 1391
## 59 1319
## 60 1330
## 61 1327
## 62 1186
## 63 1350
## 64 1263
Write to CSV
write.csv(chess_tournament, "chess_tournament.csv")