Load packages.

library(knitr)
library(stringr)
library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import text file, identify delimiters, skip first 4 rows.

chess<-read.table("tournamentinfo.txt", sep="|", fill=TRUE, stringsAsFactors = FALSE, skip=4)
#str(chess) #testing
#head(chess) #testing

Combine observations across two rows.

namesrow <- chess[seq(1,nrow(chess),by=3),]
#head(namesrow) #testing
staterow <- chess[seq(2,nrow(chess),by=3),]
#head(staterow) #testing
chess <- cbind(namesrow, staterow)
#head(chess) #testing

Discard unneeded columns.

chess <- subset(chess[, c(1:10,12,13)])
#head(chess) #testing

Use regular expressions to extract player ID and pre-game rating.

chess <- extract(chess, V2.1, c('PID', 'PreRating'), '(.[0-9]{8}) / R: (.[0-9]{3,4})')
#str(chess) #testing

Use regular expressions to split each round into round number and opponent number.

[REVISED to adjust syntax of regular expression.]

chess <- extract (chess, V4, c('Game1', 'Opp1'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V5, c('Game2', 'Opp2'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V6, c('Game3', 'Opp3'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V7, c('Game4', 'Opp4'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V8, c('Game5', 'Opp5'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V9, c('Game6', 'Opp6'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V10, c('Game7', 'Opp7'), '([A-Z]{0,1})* +([0-9]{0,2})')

View result and structure.

knitr::kable(head(chess))

	V1	V2	V3	Opp1	Opp2	Opp3	Opp4	Opp5	Opp6	Opp7	V1.1	PID	PreRating
1	1	GARY HUA	6.0	39	21	18	14	7	12	4	ON	15445895	1794
4	2	DAKSHESH DARURI	6.0	63	58	4	17	16	20	7	MI	14598900	1553
7	3	ADITYA BAJAJ	6.0	8	61	25	21	11	13	12	MI	14959604	1384
10	4	PATRICK H SCHILLING	5.5	23	28	2	26	5	19	1	MI	12616049	1716
13	5	HANSHI ZUO	5.5	45	37	12	13	4	14	17	MI	14601533	1655
16	6	HANSEN SONG	5.0	34	29	11	35	10	27	21	OH	15055204	1686

str(chess)

## 'data.frame':    64 obs. of  20 variables:
##  $ V1       : chr  "    1 " "    2 " "    3 " "    4 " ...
##  $ V2       : chr  " GARY HUA                        " " DAKSHESH DARURI                 " " ADITYA BAJAJ                    " " PATRICK H SCHILLING             " ...
##  $ V3       : chr  "6.0  " "6.0  " "6.0  " "5.5  " ...
##  $ Game1    : chr  "" "" "" "" ...
##  $ Opp1     : chr  "39" "63" "8" "23" ...
##  $ Game2    : chr  "" "" "" "" ...
##  $ Opp2     : chr  "21" "58" "61" "28" ...
##  $ Game3    : chr  "" "" "" "" ...
##  $ Opp3     : chr  "18" "4" "25" "2" ...
##  $ Game4    : chr  "" "" "" "" ...
##  $ Opp4     : chr  "14" "17" "21" "26" ...
##  $ Game5    : chr  "" "" "" "" ...
##  $ Opp5     : chr  "7" "16" "11" "5" ...
##  $ Game6    : chr  "" "" "" "" ...
##  $ Opp6     : chr  "12" "20" "13" "19" ...
##  $ Game7    : chr  "" "" "" "" ...
##  $ Opp7     : chr  "4" "7" "12" "1" ...
##  $ V1.1     : chr  "   ON " "   MI " "   MI " "   MI " ...
##  $ PID      : chr  " 15445895" " 14598900" " 14959604" " 12616049" ...
##  $ PreRating: chr  "1794" "1553" "1384" "1716" ...

Change required character columns to numeric, view structure.

chess[, c(1,3,5,7,9,11,13,15,17,19,20)] <- sapply(chess[, c(1,3,5,7,9,11,13,15,17,19,20)], as.numeric)
str(chess)

## 'data.frame':    64 obs. of  20 variables:
##  $ V1       : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ V2       : chr  " GARY HUA                        " " DAKSHESH DARURI                 " " ADITYA BAJAJ                    " " PATRICK H SCHILLING             " ...
##  $ V3       : num  6 6 6 5.5 5.5 5 5 5 5 5 ...
##  $ Game1    : chr  "" "" "" "" ...
##  $ Opp1     : num  39 63 8 23 45 34 57 3 25 16 ...
##  $ Game2    : chr  "" "" "" "" ...
##  $ Opp2     : num  21 58 61 28 37 29 46 32 18 19 ...
##  $ Game3    : chr  "" "" "" "" ...
##  $ Opp3     : num  18 4 25 2 12 11 13 14 59 55 ...
##  $ Game4    : chr  "" "" "" "" ...
##  $ Opp4     : num  14 17 21 26 13 35 11 9 8 31 ...
##  $ Game5    : chr  "" "" "" "" ...
##  $ Opp5     : num  7 16 11 5 4 10 1 47 26 6 ...
##  $ Game6    : chr  "" "" "" "" ...
##  $ Opp6     : num  12 20 13 19 14 27 9 28 7 25 ...
##  $ Game7    : chr  "" "" "" "" ...
##  $ Opp7     : num  4 7 12 1 17 21 2 19 20 18 ...
##  $ V1.1     : chr  "   ON " "   MI " "   MI " "   MI " ...
##  $ PID      : num  15445895 14598900 14959604 12616049 14601533 ...
##  $ PreRating: num  1794 1553 1384 1716 1655 ...

Create lookup table of all players and their pre-game rating, view structure.

tblookup <- subset(chess[, c(1,20)])
str(tblookup)

## 'data.frame':    64 obs. of  2 variables:
##  $ V1       : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ PreRating: num  1794 1553 1384 1716 1655 ...

head(tblookup)

##    V1 PreRating
## 1   1      1794
## 4   2      1553
## 7   3      1384
## 10  4      1716
## 13  5      1655
## 16  6      1686

Perform lookup for each opponent by their opponent number.

combo<- merge(chess, tblookup, by.x="Opp1",by.y="V1", sort=FALSE, all.x=TRUE)
combo<- merge(combo, tblookup, by.x="Opp2",by.y="V1", sort=FALSE, all.x=TRUE)
combo<- merge(combo, tblookup, by.x="Opp3",by.y="V1", sort=FALSE, all.x=TRUE)

## Warning in merge.data.frame(combo, tblookup, by.x = "Opp3", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y' are duplicated in the result

combo<- merge(combo, tblookup, by.x="Opp4",by.y="V1", sort=FALSE, all.x=TRUE)

## Warning in merge.data.frame(combo, tblookup, by.x = "Opp4", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y' are duplicated in the result

combo<- merge(combo, tblookup, by.x="Opp5",by.y="V1", sort=FALSE, all.x=TRUE)

## Warning in merge.data.frame(combo, tblookup, by.x = "Opp5", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y', 'PreRating.x', 'PreRating.y' are
## duplicated in the result

combo<- merge(combo, tblookup, by.x="Opp6",by.y="V1", sort=FALSE, all.x=TRUE)

## Warning in merge.data.frame(combo, tblookup, by.x = "Opp6", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y', 'PreRating.x', 'PreRating.y' are
## duplicated in the result

combo<- merge(combo, tblookup, by.x="Opp7",by.y="V1", sort=FALSE, all.x=TRUE)

## Warning in merge.data.frame(combo, tblookup, by.x = "Opp7", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y', 'PreRating.x', 'PreRating.y',
## 'PreRating.x', 'PreRating.y' are duplicated in the result

Discard unneeded columns, rename columns, view result.

[REVISED to sort columns with ‘order’.]

combo <- subset(combo[, c(8:10,18:27)])
colnames(combo) <- c("Order","Player'S Name","Total Number of Points","Player's State","Player ID","Player's PreRating","Opp1PR","Opp2PR","Opp3PR","Opp4PR","Opp5PR","Opp6PR","Opp7PR")

combo<-combo[ order(combo[,1]), ]
knitr::kable((head(combo)))

Order	Player’S Name	Total Number of Points	Player’s State	Player ID	Player’s PreRating	Opp1PR	Opp2PR	Opp3PR	Opp4PR	Opp5PR	Opp6PR	Opp7PR
1	GARY HUA	6.0	ON	15445895	1794	1436	1563	1600	1610	1649	1663	1716
2	DAKSHESH DARURI	6.0	MI	14598900	1553	1175	917	1716	1629	1604	1595	1649
3	ADITYA BAJAJ	6.0	MI	14959604	1384	1641	955	1745	1563	1712	1666	1663
4	PATRICK H SCHILLING	5.5	MI	12616049	1716	1363	1507	1553	1579	1655	1564	1794
5	HANSHI ZUO	5.5	MI	14601533	1655	1242	980	1663	1666	1716	1610	1629
6	HANSEN SONG	5.0	OH	15055204	1686	1399	1602	1712	1438	1365	1552	1563

knitr::kable((tail(combo)))

	Order	Player’S Name	Total Number of Points	Player’s State	Player ID	Player’s PreRating	Opp1PR	Opp2PR	Opp3PR	Opp4PR	Opp5PR	Opp6PR	Opp7PR
43	59	SEAN M MC CORMICK	2.0	MI	12841036	853	1403	NA	1411	1348	1283	1270	1199
63	60	JULIA SHEN	1.5	MI	14579262	967	1449	1399	1242	1332	1229	NA	NA
40	61	JEZZEL FARKAS	1.5	ON	15771592	955	1441	1384	1270	1362	1332	1522	980
55	62	ASHWIN BALAJI	1.0	MI	15219542	1530	1186	NA	NA	NA	NA	NA	NA
60	63	THOMAS JOSEPH HOSMER	1.0	MI	15057092	1175	1553	1382	1291	1283	1242	NA	NA
41	64	BEN LI	1.0	MI	15006561	1163	1555	1522	1494	1291	377	1332	1270

Add column for average opponent pre-game rating, view result.

[REVISED to add ‘na.rm=TRUE’ to remove NA values from mean.]

combo <- mutate(combo, "Average Pre-Chess Rating of Opponents" = round(rowMeans(combo[,7:13],na.rm = TRUE),0))
knitr::kable((head(combo)))

Order	Player’S Name	Total Number of Points	Player’s State	Player ID	Player’s PreRating	Opp1PR	Opp2PR	Opp3PR	Opp4PR	Opp5PR	Opp6PR	Opp7PR	Average Pre-Chess Rating of Opponents
1	GARY HUA	6.0	ON	15445895	1794	1436	1563	1600	1610	1649	1663	1716	1605
2	DAKSHESH DARURI	6.0	MI	14598900	1553	1175	917	1716	1629	1604	1595	1649	1469
3	ADITYA BAJAJ	6.0	MI	14959604	1384	1641	955	1745	1563	1712	1666	1663	1564
4	PATRICK H SCHILLING	5.5	MI	12616049	1716	1363	1507	1553	1579	1655	1564	1794	1574
5	HANSHI ZUO	5.5	MI	14601533	1655	1242	980	1663	1666	1716	1610	1629	1501
6	HANSEN SONG	5.0	OH	15055204	1686	1399	1602	1712	1438	1365	1552	1563	1519

knitr::kable((tail(combo)))

	Order	Player’S Name	Total Number of Points	Player’s State	Player ID	Player’s PreRating	Opp1PR	Opp2PR	Opp3PR	Opp4PR	Opp5PR	Opp6PR	Opp7PR	Average Pre-Chess Rating of Opponents
59	59	SEAN M MC CORMICK	2.0	MI	12841036	853	1403	NA	1411	1348	1283	1270	1199	1319
60	60	JULIA SHEN	1.5	MI	14579262	967	1449	1399	1242	1332	1229	NA	NA	1330
61	61	JEZZEL FARKAS	1.5	ON	15771592	955	1441	1384	1270	1362	1332	1522	980	1327
62	62	ASHWIN BALAJI	1.0	MI	15219542	1530	1186	NA	NA	NA	NA	NA	NA	1186
63	63	THOMAS JOSEPH HOSMER	1.0	MI	15057092	1175	1553	1382	1291	1283	1242	NA	NA	1350
64	64	BEN LI	1.0	MI	15006561	1163	1555	1522	1494	1291	377	1332	1270	1263

Discard unneeded columns, view result.

results <- subset(combo[,c(1:4,6,14)])
knitr::kable(results)

Order	Player’S Name	Total Number of Points	Player’s State	Player’s PreRating	Average Pre-Chess Rating of Opponents
1	GARY HUA	6.0	ON	1794	1605
2	DAKSHESH DARURI	6.0	MI	1553	1469
3	ADITYA BAJAJ	6.0	MI	1384	1564
4	PATRICK H SCHILLING	5.5	MI	1716	1574
5	HANSHI ZUO	5.5	MI	1655	1501
6	HANSEN SONG	5.0	OH	1686	1519
7	GARY DEE SWATHELL	5.0	MI	1649	1372
8	EZEKIEL HOUGHTON	5.0	MI	1641	1468
9	STEFANO LEE	5.0	ON	1411	1523
10	ANVIT RAO	5.0	MI	1365	1554
11	CAMERON WILLIAM MC LEMAN	4.5	MI	1712	1468
12	KENNETH J TACK	4.5	MI	1663	1506
13	TORRANCE HENRY JR	4.5	MI	1666	1498
14	BRADLEY SHAW	4.5	MI	1610	1515
15	ZACHARY JAMES HOUGHTON	4.5	MI	1220	1484
16	MIKE NIKITIN	4.0	MI	1604	1386
17	RONALD GRZEGORCZYK	4.0	MI	1629	1499
18	DAVID SUNDEEN	4.0	MI	1600	1480
19	DIPANKAR ROY	4.0	MI	1564	1426
20	JASON ZHENG	4.0	MI	1595	1411
21	DINH DANG BUI	4.0	ON	1563	1470
22	EUGENE L MCCLURE	4.0	MI	1555	1300
23	ALAN BUI	4.0	ON	1363	1214
24	MICHAEL R ALDRICH	4.0	MI	1229	1357
25	LOREN SCHWIEBERT	3.5	MI	1745	1363
26	MAX ZHU	3.5	ON	1579	1507
27	GAURAV GIDWANI	3.5	MI	1552	1222
28	SOFIA ADINA STANESCU-BELLU	3.5	MI	1507	1522
29	CHIEDOZIE OKORIE	3.5	MI	1602	1314
30	GEORGE AVERY JONES	3.5	ON	1522	1144
31	RISHI SHETTY	3.5	MI	1494	1260
32	JOSHUA PHILIP MATHEWS	3.5	ON	1441	1379
33	JADE GE	3.5	MI	1449	1277
34	MICHAEL JEFFERY THOMAS	3.5	MI	1399	1375
35	JOSHUA DAVID LEE	3.5	MI	1438	1150
36	SIDDHARTH JHA	3.5	MI	1355	1388
37	AMIYATOSH PWNANANDAM	3.5	MI	980	1385
38	BRIAN LIU	3.0	MI	1423	1539
39	JOEL R HENDON	3.0	MI	1436	1430
40	FOREST ZHANG	3.0	MI	1348	1391
41	KYLE WILLIAM MURPHY	3.0	MI	1403	1248
42	JARED GE	3.0	MI	1332	1150
43	ROBERT GLEN VASEY	3.0	MI	1283	1107
44	JUSTIN D SCHILLING	3.0	MI	1199	1327
45	DEREK YAN	3.0	MI	1242	1152
46	JACOB ALEXANDER LAVALLEY	3.0	MI	377	1358
47	ERIC WRIGHT	2.5	MI	1362	1392
48	DANIEL KHAIN	2.5	MI	1382	1356
49	MICHAEL J MARTIN	2.5	MI	1291	1286
50	SHIVAM JHA	2.5	MI	1056	1296
51	TEJAS AYYAGARI	2.5	MI	1011	1356
52	ETHAN GUO	2.5	MI	935	1495
53	JOSE C YBARRA	2.0	MI	1393	1345
54	LARRY HODGE	2.0	MI	1270	1206
55	ALEX KONG	2.0	MI	1186	1406
56	MARISA RICCI	2.0	MI	1153	1414
57	MICHAEL LU	2.0	MI	1092	1363
58	VIRAJ MOHILE	2.0	MI	917	1391
59	SEAN M MC CORMICK	2.0	MI	853	1319
60	JULIA SHEN	1.5	MI	967	1330
61	JEZZEL FARKAS	1.5	ON	955	1327
62	ASHWIN BALAJI	1.0	MI	1530	1186
63	THOMAS JOSEPH HOSMER	1.0	MI	1175	1350
64	BEN LI	1.0	MI	1163	1263

Export as CSV.

write.table(results, "chessresults.txt")

This can be done more easily.

Data 607 Project 1

Armenoush Aslanian-Persico

Load packages.

Import text file, identify delimiters, skip first 4 rows.

Combine observations across two rows.

Discard unneeded columns.

Use regular expressions to extract player ID and pre-game rating.

Use regular expressions to split each round into round number and opponent number.

[REVISED to adjust syntax of regular expression.]

View result and structure.

Change required character columns to numeric, view structure.

Create lookup table of all players and their pre-game rating, view structure.

Perform lookup for each opponent by their opponent number.

Discard unneeded columns, rename columns, view result.

[REVISED to sort columns with ‘order’.]

Add column for average opponent pre-game rating, view result.

[REVISED to add ‘na.rm=TRUE’ to remove NA values from mean.]

Discard unneeded columns, view result.

Export as CSV.

This can be done more easily.