Overall it works, but the reference for opponent id to corresponding score has issue, i am still debugging the code.
library(stringr)
#https://stackoverflow.com/questions/12626637/reading-a-text-file-in-r-line-by-line
v_data <- c("")
v_id <- c("")
v_player_name <- c("")
v_player_state <- c("")
v_player_uscf_id <- c("")
v_total_point <- c("")
v_round_1_oppoID <- c("")
v_round_2_oppoID <- c("")
v_round_3_oppoID <- c("")
v_round_4_oppoID <- c("")
v_round_5_oppoID <- c("")
v_round_6_oppoID <- c("")
v_round_7_oppoID <- c("")
v_player_Pre_rating <- c("")
v_player_Post_rating <- c("")
v_opponent_list <- c("")
v_avg_pre_opponent_rating <- c("")
ix <- 1
fileName <- "tournamentinfo.txt"
conn <- file(fileName,open="r")
linn <-readLines(conn)
## Warning in readLines(conn): incomplete final line found on
## 'tournamentinfo.txt'
for (i in 1:length(linn)){
tmp <- unlist(str_extract_all(linn[i],"[^|]*"))
if(length(tmp) > 7) {
if( str_detect( tmp[1],"[[:digit:]]{1,}" )) { #first line
ix <- ix + 1
v_id[ix] <- as.numeric(tmp[1])
v_player_name[ix] <- trimws(tmp[3])
v_total_point[ix] <- trimws(tmp[5])
#if value is <NA>, it means no opponent in the specific round
v_round_1_oppoID[ix] <- as.numeric( unlist(str_extract_all(tmp[7], "[[:space:][:digit:]]{1,}")))
v_round_2_oppoID[ix] <- as.numeric(unlist(str_extract_all(tmp[9], "[[:space:][:digit:]]{1,}")))
v_round_3_oppoID[ix] <- as.numeric(unlist(str_extract_all(tmp[11], "[[:space:][:digit:]]{1,}")))
v_round_4_oppoID[ix] <- as.numeric(unlist(str_extract_all(tmp[13], "[[:space:][:digit:]]{1,}")))
v_round_5_oppoID[ix] <- as.numeric(unlist(str_extract_all(tmp[15], "[[:space:][:digit:]]{1,}")))
v_round_6_oppoID[ix] <- as.numeric(unlist(str_extract_all(tmp[17], "[[:space:][:digit:]]{1,}")))
v_round_7_oppoID[ix] <- as.numeric(unlist(str_extract_all(tmp[19], "[[:space:][:digit:]]{1,}")))
v_avg_pre_opponent_rating <- 0
}
else { #second line
v_player_state[ix] <- trimws(tmp[1])
tmp2 <- unlist(str_extract_all(tmp[3], "[[:digit:]]{1,}[[:space:]/P]"))
v_player_uscf_id[ix] <- tmp2[1]
v_player_Pre_rating[ix] <- unlist(str_extract_all(tmp2[2], "[[:digit:]]{1,}"))
v_player_Post_rating[ix] <- unlist(str_extract_all(tmp2[3], "[[:digit:]]{1,}"))
}
}
}
close(conn)
work with data table, but i encountered many issue such as referencing the table itself. i finally converted reound # opponent id to c() to look up the data, but the corronspding opponets pre-score is not correct, i still need to debug it
df_playerScore <- data.frame(
record_ID = v_id,
Name = v_player_name,
State = v_player_state,
USCF_ID = v_player_uscf_id,
Total_Point = v_total_point,
Pre_rating = v_player_Pre_rating,
Post_rating = v_player_Post_rating,
Round1_opponents_ID = v_round_1_oppoID,
Round2_opponents_ID = v_round_2_oppoID,
Round3_opponents_ID = v_round_3_oppoID,
Round4_opponents_ID = v_round_4_oppoID,
Round5_opponents_ID = v_round_5_oppoID,
Round6_opponents_ID = v_round_6_oppoID,
Round7_opponents_ID = v_round_7_oppoID,
Opponents_list = v_opponent_list,
Avg_Pre_Opponents_rating = v_avg_pre_opponent_rating
)
library(data.table)
dt <- data.table(df_playerScore)
for(x in 1:nrow(dt))
{
ref <- dt[x,c(8,9,10,11,12,13,14), with=FALSE]
#not sure what goes wrong about the lookup
ref2 <- c(ref$Round1_opponents_ID, ref$Round2_opponents_ID, ref$Round3_opponents_ID, ref$Round4_opponents_ID, ref$Round5_opponents_ID, ref$Round6_opponents_ID, ref$Round7_opponents_ID)
tmp_pre_rating <- (as.vector(dt[ref2,6,with=FALSE]))
sum_opponent_Pre_rating <- 0
valid_game_count <-0
for(t in 1:7) {
if(is.na(tmp_pre_rating[t])) {
#skip the game
}
else
{
sum_opponent_Pre_rating <- sum_opponent_Pre_rating + as.numeric(as.character( tmp_pre_rating[t]$Pre_rating))
valid_game_count <- valid_game_count + 1
}
}
#assign the avg pre opponents score
dt[x, Avg_Pre_Opponents_rating:=(sum_opponent_Pre_rating / valid_game_count) ]
}
Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
print(dt)
## record_ID Name State USCF_ID Total_Point
## 1: Num NA
## 2: 1 GARY HUA ON 15445895 6.0
## 3: 2 DAKSHESH DARURI MI 14598900 6.0
## 4: 3 ADITYA BAJAJ MI 14959604 6.0
## 5: 4 PATRICK H SCHILLING MI 12616049 5.5
## 6: 5 HANSHI ZUO MI 14601533 5.5
## 7: 6 HANSEN SONG OH 15055204 5.0
## 8: 7 GARY DEE SWATHELL MI 11146376 5.0
## 9: 8 EZEKIEL HOUGHTON MI 15142253 5.0
## 10: 9 STEFANO LEE ON 14954524 5.0
## 11: 10 ANVIT RAO MI 14150362 5.0
## 12: 11 CAMERON WILLIAM MC LEMAN MI 12581589 4.5
## 13: 12 KENNETH J TACK MI 12681257 4.5
## 14: 13 TORRANCE HENRY JR MI 15082995 4.5
## 15: 14 BRADLEY SHAW MI 10131499 4.5
## 16: 15 ZACHARY JAMES HOUGHTON MI 15619130 4.5
## 17: 16 MIKE NIKITIN MI 10295068 4.0
## 18: 17 RONALD GRZEGORCZYK MI 10297702 4.0
## 19: 18 DAVID SUNDEEN MI 11342094 4.0
## 20: 19 DIPANKAR ROY MI 14862333 4.0
## 21: 20 JASON ZHENG MI 14529060 4.0
## 22: 21 DINH DANG BUI ON 15495066 4.0
## 23: 22 EUGENE L MCCLURE MI 12405534 4.0
## 24: 23 ALAN BUI ON 15030142 4.0
## 25: 24 MICHAEL R ALDRICH MI 13469010 4.0
## 26: 25 LOREN SCHWIEBERT MI 12486656 3.5
## 27: 26 MAX ZHU ON 15131520 3.5
## 28: 27 GAURAV GIDWANI MI 14476567 3.5
## 29: 28 SOFIA ADINA STANESCU-BELLU MI 14882954 3.5
## 30: 29 CHIEDOZIE OKORIE MI 15323285 3.5
## 31: 30 GEORGE AVERY JONES ON 12577178 3.5
## 32: 31 RISHI SHETTY MI 15131618 3.5
## 33: 32 JOSHUA PHILIP MATHEWS ON 14073750 3.5
## 34: 33 JADE GE MI 14691842 3.5
## 35: 34 MICHAEL JEFFERY THOMAS MI 15051807 3.5
## 36: 35 JOSHUA DAVID LEE MI 14601397 3.5
## 37: 36 SIDDHARTH JHA MI 14773163 3.5
## 38: 37 AMIYATOSH PWNANANDAM MI 15489571 3.5
## 39: 38 BRIAN LIU MI 15108523 3.0
## 40: 39 JOEL R HENDON MI 12923035 3.0
## 41: 40 FOREST ZHANG MI 14892710 3.0
## 42: 41 KYLE WILLIAM MURPHY MI 15761443 3.0
## 43: 42 JARED GE MI 14462326 3.0
## 44: 43 ROBERT GLEN VASEY MI 14101068 3.0
## 45: 44 JUSTIN D SCHILLING MI 15323504 3.0
## 46: 45 DEREK YAN MI 15372807 3.0
## 47: 46 JACOB ALEXANDER LAVALLEY MI 15490981 3.0
## 48: 47 ERIC WRIGHT MI 12533115 2.5
## 49: 48 DANIEL KHAIN MI 14369165 2.5
## 50: 49 MICHAEL J MARTIN MI 12531685 2.5
## 51: 50 SHIVAM JHA MI 14773178 2.5
## 52: 51 TEJAS AYYAGARI MI 15205474 2.5
## 53: 52 ETHAN GUO MI 14918803 2.5
## 54: 53 JOSE C YBARRA MI 12578849 2.0
## 55: 54 LARRY HODGE MI 12836773 2.0
## 56: 55 ALEX KONG MI 15412571 2.0
## 57: 56 MARISA RICCI MI 14679887 2.0
## 58: 57 MICHAEL LU MI 15113330 2.0
## 59: 58 VIRAJ MOHILE MI 14700365 2.0
## 60: 59 SEAN M MC CORMICK MI 12841036 2.0
## 61: 60 JULIA SHEN MI 14579262 1.5
## 62: 61 JEZZEL FARKAS ON 15771592 1.5
## 63: 62 ASHWIN BALAJI MI 15219542 1.0
## 64: 63 THOMAS JOSEPH HOSMER MI 15057092 1.0
## 65: 64 BEN LI MI 15006561 1.0
## record_ID Name State USCF_ID Total_Point
## Pre_rating Post_rating Round1_opponents_ID Round2_opponents_ID
## 1: NA NA
## 2: 1794 1817 39 21
## 3: 1553 1663 63 58
## 4: 1384 1640 8 61
## 5: 1716 1744 23 28
## 6: 1655 1690 45 37
## 7: 1686 1687 34 29
## 8: 1649 1673 57 46
## 9: 1641 1657 3 32
## 10: 1411 1564 25 18
## 11: 1365 1544 16 19
## 12: 1712 1696 38 56
## 13: 1663 1670 42 33
## 14: 1666 1662 36 27
## 15: 1610 1618 54 44
## 16: 1220 1416 19 16
## 17: 1604 1613 10 15
## 18: 1629 1610 48 41
## 19: 1600 1600 47 9
## 20: 1564 1570 15 10
## 21: 1595 1569 40 49
## 22: 1563 1562 43 1
## 23: 1555 1529 64 52
## 24: 1363 1371 4 43
## 25: 1229 1300 28 47
## 26: 1745 1681 9 53
## 27: 1579 1564 49 40
## 28: 1552 1539 51 13
## 29: 1507 1513 24 4
## 30: 1602 6 50 6
## 31: 1522 1444 52 64
## 32: 1494 1444 58 55
## 33: 1441 1433 61 8
## 34: 1449 1421 60 12
## 35: 1399 1400 6 60
## 36: 1438 1392 46 38
## 37: 1355 1367 13 57
## 38: 980 1077 NA 5
## 39: 1423 1439 11 35
## 40: 1436 1413 1 54
## 41: 1348 1346 20 26
## 42: 1403 5 59 17
## 43: 1332 1256 12 50
## 44: 1283 1244 21 23
## 45: 1199 1199 NA 14
## 46: 1242 1191 5 51
## 47: 377 3 35 7
## 48: 1362 1341 18 24
## 49: 1382 1335 17 63
## 50: 1291 1259 26 20
## 51: 1056 1111 29 42
## 52: 1011 1097 27 45
## 53: 935 1092 30 22
## 54: 1393 1359 NA 25
## 55: 1270 1200 14 39
## 56: 1186 1163 62 31
## 57: 1153 1140 NA 11
## 58: 1092 1079 7 36
## 59: 917 941 31 2
## 60: 853 878 41 NA
## 61: 967 984 33 34
## 62: 955 979 32 3
## 63: 1530 1535 55 NA
## 64: 1175 1125 2 48
## 65: 1163 1112 22 30
## Pre_rating Post_rating Round1_opponents_ID Round2_opponents_ID
## Round3_opponents_ID Round4_opponents_ID Round5_opponents_ID
## 1:
## 2: 18 14 7
## 3: 4 17 16
## 4: 25 21 11
## 5: 2 26 5
## 6: 12 13 4
## 7: 11 35 10
## 8: 13 11 1
## 9: 14 9 47
## 10: 59 8 26
## 11: 55 31 6
## 12: 6 7 3
## 13: 5 38 NA
## 14: 7 5 33
## 15: 8 1 27
## 16: 30 22 54
## 17: NA 39 2
## 18: 26 2 23
## 19: 1 32 19
## 20: 52 28 18
## 21: 23 41 28
## 22: 47 3 40
## 23: 28 15 NA
## 24: 20 58 17
## 25: 43 25 60
## 26: 3 24 34
## 27: 17 4 9
## 28: 46 37 14
## 29: 22 19 20
## 30: 38 34 52
## 31: 15 55 31
## 32: 64 10 30
## 33: 44 18 51
## 34: 50 36 13
## 35: 37 29 25
## 36: 56 6 57
## 37: 51 33 NA
## 38: 34 27 NA
## 39: 29 12 NA
## 40: 40 16 44
## 41: 39 59 21
## 42: 58 20 NA
## 43: 57 60 61
## 44: 24 63 59
## 45: 32 53 39
## 46: 60 56 63
## 47: 27 50 64
## 48: 21 61 8
## 49: NA 52 NA
## 50: 63 64 58
## 51: 33 46 NA
## 52: 36 57 32
## 53: 19 48 29
## 54: NA 44 NA
## 55: 61 NA 15
## 56: 10 30 NA
## 57: 35 45 NA
## 58: 42 51 35
## 59: 41 23 49
## 60: 9 40 43
## 61: 45 42 24
## 62: 54 47 42
## 63: NA NA NA
## 64: 49 43 45
## 65: 31 49 46
## Round3_opponents_ID Round4_opponents_ID Round5_opponents_ID
## Round6_opponents_ID Round7_opponents_ID Opponents_list
## 1:
## 2: 12 4
## 3: 20 7
## 4: 13 12
## 5: 19 1
## 6: 14 17
## 7: 27 21
## 8: 9 2
## 9: 28 19
## 10: 7 20
## 11: 25 18
## 12: 34 26
## 13: 1 3
## 14: 3 32
## 15: 5 31
## 16: 33 38
## 17: 36 NA
## 18: 22 5
## 19: 38 10
## 20: 4 8
## 21: 2 9
## 22: 39 6
## 23: 17 40
## 24: 37 46
## 25: 44 39
## 26: 10 47
## 27: 32 11
## 28: 6 NA
## 29: 8 36
## 30: 48 NA
## 31: 61 50
## 32: 50 14
## 33: 26 13
## 34: 15 51
## 35: 11 52
## 36: 52 48
## 37: 16 28
## 38: 23 61
## 39: 18 15
## 40: 21 24
## 41: 56 22
## 42: NA NA
## 43: 64 56
## 44: 46 55
## 45: 24 59
## 46: 55 58
## 47: 43 23
## 48: 51 25
## 49: 29 35
## 50: NA NA
## 51: 31 30
## 52: 47 33
## 53: 35 34
## 54: 57 NA
## 55: 59 64
## 56: 45 43
## 57: 40 42
## 58: 53 NA
## 59: NA 45
## 60: 54 44
## 61: NA NA
## 62: 30 37
## 63: NA NA
## 64: NA NA
## 65: 42 54
## Round6_opponents_ID Round7_opponents_ID Opponents_list
## Avg_Pre_Opponents_rating
## 1: NaN
## 2: 1487.714
## 3: 1361.857
## 4: 1419.857
## 5: 1624.571
## 6: 1602.429
## 7: 1551.714
## 8: 1450.857
## 9: 1491.571
## 10: 1328.571
## 11: 1533.286
## 12: 1351.286
## 13: 1503.333
## 14: 1392.429
## 15: 1411.857
## 16: 1499.000
## 17: 1577.000
## 18: 1457.000
## 19: 1515.714
## 20: 1335.000
## 21: 1381.429
## 22: 1400.286
## 23: 1428.333
## 24: 1434.714
## 25: 1256.714
## 26: 1428.143
## 27: 1386.857
## 28: 1464.500
## 29: 1522.429
## 30: 1344.333
## 31: 1138.714
## 32: 1307.429
## 33: 1381.143
## 34: 1503.857
## 35: 1348.000
## 36: 1214.143
## 37: 1507.833
## 38: 1409.800
## 39: 1546.500
## 40: 1507.571
## 41: 1467.571
## 42: 1274.750
## 43: 1032.429
## 44: 1360.571
## 45: 1540.000
## 46: 1280.143
## 47: 1143.000
## 48: 1376.286
## 49: 1368.800
## 50: 1303.800
## 51: 1470.833
## 52: 1334.429
## 53: 1434.143
## 54: 1349.000
## 55: 1325.500
## 56: 1424.667
## 57: 1425.200
## 58: 1345.000
## 59: 1464.167
## 60: 1350.667
## 61: 1339.000
## 62: 1409.429
## 63: 1382.000
## 64: 1420.000
## 65: 1267.000
## Avg_Pre_Opponents_rating