average pre-chess rating of player’s opponents.
Load the data and make the columns more meaningful.
Note: I use a Ubuntu Linux environment to store my work so do change the
directory of your choice on your OS.
# library(utils)
chess_data <- read.fwf("/home/jonboy1987/Desktop/CUNYSPS/IS607/Assignments/Project1/tournamentinfo.txt"
,skip = 4, widths = c(5,3,32,6,6,6,6,6,6,6,6))
# give the chess_data dataset more better attribute names
colnames(chess_data) <- c("playerID_state","V2","PlayerName_prating","total_pts"
,"round1","round2","round3","round4","round5","round6"
,"round7")
head(chess_data)
## playerID_state V2 PlayerName_prating total_pts round1
## 1 1 | GARY HUA |6.0 |W 39
## 2 ON | 15445895 / R: 1794 ->1817 |N:2 |W
## 3 ----- --- -------------------------------- ------ ------
## 4 2 | DAKSHESH DARURI |6.0 |W 63
## 5 MI | 14598900 / R: 1553 ->1663 |N:2 |B
## 6 ----- --- -------------------------------- ------ ------
## round2 round3 round4 round5 round6 round7
## 1 |W 21 |W 18 |W 14 |W 7 |D 12 |D 4
## 2 |B |W |B |W |B |W
## 3 ------ ------ ------ ------ ------ ------
## 4 |W 58 |L 4 |W 17 |W 16 |W 20 |W 7
## 5 |W |B |W |B |W |B
## 6 ------ ------ ------ ------ ------ ------
We can extract the State using a regular expression by using column 1 of the
Chess data set by using “[A-Z]{2}” to get the states
# use grep and value = TRUE to show the strings that match the regexp
states <- grep("[A-Z]{2}", chess_data$playerID_state, value = TRUE)
# strip out the white spaces and store the Data
states <- trimws(states)
We will also extract the ID of each player; this will come in handy in
computing the average pre-rating of opponents.
ID <- grep("[0-9]+", chess_data$playerID_state, value = TRUE)
ID <- trimws(ID)
ID
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14"
## [15] "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28"
## [29] "29" "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42"
## [43] "43" "44" "45" "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56"
## [57] "57" "58" "59" "60" "61" "62" "63" "64"
The 2nd column in the chess_data set contains the name and Pre-Rating
For the hardest part; to compute the average pre chess rating of opponents
First we will want to grab all the columns that contains all 7 rounds of
the tournament and extract the player ID’s from them.
rounds <- chess_data[,c("round1", "round2", "round3", "round4", "round5",
"round6", "round7")]
# Begin stripping down the rounds dataset
rounds <- rounds[-c(seq(from = 3, to = 192, by = 3)), ] # remove the -'s
# Remove the rows below the player ID's
rounds <- rounds[-c(seq(from = 2, to = 191, by = 2)), ]
# Add NA's to the entries that are empty
# (player did not vs opponent for whatever reason)
rounds <- apply(rounds, 2, function(x) gsub("\\s{4}", NA, x))
# Now strip rest of characters so only the ID's and NA remain
rounds <- apply(rounds,2, function(x) gsub("^\\|[A-Z]\\s+", "",x))
rounds <- apply(rounds,2,as.numeric) # make the ID's numberic for indexing
head(rounds,15)
## round1 round2 round3 round4 round5 round6 round7
## [1,] 39 21 18 14 7 12 4
## [2,] 63 58 4 17 16 20 7
## [3,] 8 61 25 21 11 13 12
## [4,] 23 28 2 26 5 19 1
## [5,] 45 37 12 13 4 14 17
## [6,] 34 29 11 35 10 27 21
## [7,] 57 46 13 11 1 9 2
## [8,] 3 32 14 9 47 28 19
## [9,] 25 18 59 8 26 7 20
## [10,] 16 19 55 31 6 25 18
## [11,] 38 56 6 7 3 34 26
## [12,] 42 33 5 38 NA 1 3
## [13,] 36 27 7 5 33 3 32
## [14,] 54 44 8 1 27 5 31
## [15,] 19 16 30 22 54 33 38
With the rounds dataset now cleaned to just ID’s and NA’s we can map
the ID to the pre-ratings
# make a key-value pair so we can map
ID_rating <- list(ID, pre_rating)
rounds <- apply(rounds, 1:2, function(x) ID_rating[[2]][x])
rounds <- apply(rounds,2,as.numeric) # make numeric for computing the average
rounds
## round1 round2 round3 round4 round5 round6 round7
## [1,] 1436 1563 1600 1610 1649 1663 1716
## [2,] 1175 917 1716 1629 1604 1595 1649
## [3,] 1641 955 1745 1563 1712 1666 1663
## [4,] 1363 1507 1553 1579 1655 1564 1794
## [5,] 1242 980 1663 1666 1716 1610 1629
## [6,] 1399 1602 1712 1438 1365 1552 1563
## [7,] 1092 377 1666 1712 1794 1411 1553
## [8,] 1384 1441 1610 1411 1362 1507 1564
## [9,] 1745 1600 853 1641 1579 1649 1595
## [10,] 1604 1564 1186 1494 1686 1745 1600
## [11,] 1423 1153 1686 1649 1384 1399 1579
## [12,] 1332 1449 1655 1423 NA 1794 1384
## [13,] 1355 1552 1649 1655 1449 1384 1441
## [14,] 1270 1199 1641 1794 1552 1655 1494
## [15,] 1564 1604 1522 1555 1270 1449 1423
## [16,] 1365 1220 NA 1436 1553 1355 NA
## [17,] 1382 1403 1579 1553 1363 1555 1655
## [18,] 1362 1411 1794 1441 1564 1423 1365
## [19,] 1220 1365 935 1507 1600 1716 1641
## [20,] 1348 1291 1363 1403 1507 1553 1411
## [21,] 1283 1794 1362 1384 1348 1436 1686
## [22,] 1163 935 1507 1220 NA 1629 1348
## [23,] 1716 1283 1595 917 1629 980 377
## [24,] 1507 1362 1283 1745 967 1199 1436
## [25,] 1411 1393 1384 1229 1399 1365 1362
## [26,] 1291 1348 1629 1716 1411 1441 1712
## [27,] 1011 1666 377 980 1610 1686 NA
## [28,] 1229 1716 1555 1564 1595 1641 1355
## [29,] 1056 1686 1423 1399 935 1382 NA
## [30,] 935 1163 1220 1186 1494 955 1056
## [31,] 917 1186 1163 1365 1522 1056 1610
## [32,] 955 1641 1199 1600 1011 1579 1666
## [33,] 967 1663 1056 1355 1666 1220 1011
## [34,] 1686 967 980 1602 1745 1712 935
## [35,] 377 1423 1153 1686 1092 935 1382
## [36,] 1666 1092 1011 1449 NA 1604 1507
## [37,] NA 1655 1399 1552 NA 1363 955
## [38,] 1712 1438 1602 1663 NA 1600 1220
## [39,] 1794 1270 1348 1604 1199 1563 1229
## [40,] 1595 1579 1436 853 1563 1153 1555
## [41,] 853 1629 917 1595 NA NA NA
## [42,] 1663 1056 1092 967 955 1163 1153
## [43,] 1563 1363 1229 1175 853 377 1186
## [44,] NA 1610 1441 1393 1436 1229 853
## [45,] 1655 1011 967 1153 1175 1186 917
## [46,] 1438 1649 1552 1056 1163 1283 1363
## [47,] 1600 1229 1563 955 1641 1011 1745
## [48,] 1629 1175 NA 935 NA 1602 1438
## [49,] 1579 1595 1175 1163 917 NA NA
## [50,] 1602 1332 1449 377 NA 1494 1522
## [51,] 1552 1242 1355 1092 1441 1362 1449
## [52,] 1522 1555 1564 1382 1602 1438 1399
## [53,] NA 1745 NA 1199 NA 1092 NA
## [54,] 1610 1436 955 NA 1220 853 1163
## [55,] 1530 1494 1365 1522 NA 1242 1283
## [56,] NA 1712 1438 1242 NA 1348 1332
## [57,] 1649 1355 1332 1011 1438 1393 NA
## [58,] 1494 1553 1403 1363 1291 NA 1242
## [59,] 1403 NA 1411 1348 1283 1270 1199
## [60,] 1449 1399 1242 1332 1229 NA NA
## [61,] 1441 1384 1270 1362 1332 1522 980
## [62,] 1186 NA NA NA NA NA NA
## [63,] 1553 1382 1291 1283 1242 NA NA
## [64,] 1555 1522 1494 1291 377 1332 1270
Compute the Average Pre-Rating of opponents of each player
avg_prerating <- round(apply(rounds, 1, mean, na.rm = TRUE), 0)
avg_prerating
## [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515
## [15] 1484 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522
## [29] 1314 1144 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150
## [43] 1107 1327 1152 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414
## [57] 1363 1391 1319 1330 1327 1186 1350 1263
chess_data_frame <- data.frame(name = names, state = states,
TotalPoints = total_points,
Pre_Rating = pre_rating,
Average_Pre_Rating = avg_prerating)
chess_data_frame
## name state TotalPoints Pre_Rating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC CORMICK MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
## Average_Pre_Rating
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
## 7 1372
## 8 1468
## 9 1523
## 10 1554
## 11 1468
## 12 1506
## 13 1498
## 14 1515
## 15 1484
## 16 1386
## 17 1499
## 18 1480
## 19 1426
## 20 1411
## 21 1470
## 22 1300
## 23 1214
## 24 1357
## 25 1363
## 26 1507
## 27 1222
## 28 1522
## 29 1314
## 30 1144
## 31 1260
## 32 1379
## 33 1277
## 34 1375
## 35 1150
## 36 1388
## 37 1385
## 38 1539
## 39 1430
## 40 1391
## 41 1248
## 42 1150
## 43 1107
## 44 1327
## 45 1152
## 46 1358
## 47 1392
## 48 1356
## 49 1286
## 50 1296
## 51 1356
## 52 1495
## 53 1345
## 54 1206
## 55 1406
## 56 1414
## 57 1363
## 58 1391
## 59 1319
## 60 1330
## 61 1327
## 62 1186
## 63 1350
## 64 1263
# Finally export the new Clean dataset into a .csv file
write.csv(chess_data_frame,
"/home/jonboy1987/Desktop/CUNYSPS/IS607/Assignments/Project1/chess_tournament_data.csv",
row.names = FALSE)