Load libraries
library(RCurl)
library(data.table)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
##
## complete
library(readr)
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Read the text files
lines<-read_lines("F:\\CUNY masters\\project1_data_607\\7645617.txt")
lines[1:10]
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
Creating two data frames with player data and round data from
text_file
chess_player<-c()
chess_round<-c()
for (i in seq(5, 195, 3)) {
row5 <- unlist(strsplit(lines[i], '\\|'))
player_num <- as.numeric(gsub(' ', '', row5[1]))
player_name <- str_trim(row5[2])
total_point <- str_trim(row5[3])
row6 <- unlist(strsplit(lines[i+1], '\\|'))
player_state <- str_trim(row6[1])
player_pre_rating <- unlist(str_extract_all(row6[2], "[[:digit:]]+"))[2]
chess_player<- rbind(chess_player, c(player_num, player_name, player_state, total_point, player_pre_rating))
df1 <- data.frame(row5[4:10])
df1$player_num <- player_num
df1$player_pre_rating <- player_pre_rating
chess_round<- rbind(chess_round,df1)
}
Get the player data into a data frame
df2 <- data.frame(chess_player)
colnames(df2) <- c("player_num", "player_name", "player_state", "total_point", "player_pre_rating")
head(df2)
## player_num player_name player_state total_point player_pre_rating
## 1 1 GARY HUA ON 6.0 1794
## 2 2 DAKSHESH DARURI MI 6.0 1553
## 3 3 ADITYA BAJAJ MI 6.0 1384
## 4 4 PATRICK H SCHILLING MI 5.5 1716
## 5 5 HANSHI ZUO MI 5.5 1655
## 6 6 HANSEN SONG OH 5.0 1686
Get the round data into a data frame by doing some string cleaning
and data conversion.
colnames(chess_round) <- c("win_loss_opponent_id", "player_num", "player_pre_rating")
chess_round$win_loss <- sapply(strsplit(as.character(chess_round$win_loss_opponent_id), ' '), '[', 1)
chess_round$opponent_id <- sapply(chess_round$win_loss_opponent_id, function(x)gsub('\\s+', ' ', x))
chess_round$opponent_id <- as.numeric(sapply(strsplit(as.character(chess_round$opponent_id), ' '), '[', 2))
chess_round$player_pre_rating <- as.numeric(chess_round$player_pre_rating)
chess_round<- chess_round[, c('player_num', 'win_loss', 'opponent_id', 'player_pre_rating')]
head(chess_round)
## player_num win_loss opponent_id player_pre_rating
## 1 1 W 39 1794
## 2 1 W 21 1794
## 3 1 W 18 1794
## 4 1 W 14 1794
## 5 1 W 7 1794
## 6 1 D 12 1794
Calculating average and merging player and round data frame into one
data frame
col_01 <- c('player_name', 'player_state', 'total_point', 'player_pre_rating', 'avg')
avg_pre <- chess_round%>% group_by(opponent_id) %>% summarise(avg = as.integer(mean(player_pre_rating)), .groups='drop')
df2 <- merge(df2, avg_pre, by.x="player_num", by.y="opponent_id")[, col_01]
df2$total_point<-as.numeric(df2$total_point)
head(df2)
## player_name player_state total_point player_pre_rating avg
## 1 GARY HUA ON 6.0 1794 1605
## 2 ANVIT RAO MI 5.0 1365 1554
## 3 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1467
## 4 KENNETH J TACK MI 4.5 1663 1506
## 5 TORRANCE HENRY JR MI 4.5 1666 1497
## 6 BRADLEY SHAW MI 4.5 1610 1515
Save result into a csv file and read the csv file to check
write.csv(df2,"chess_game_file.csv")
csv_file<-read.csv("chess_game_file.csv")
csv_file
## X player_name player_state total_point player_pre_rating
## 1 1 GARY HUA ON 6.0 1794
## 2 2 ANVIT RAO MI 5.0 1365
## 3 3 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 4 4 KENNETH J TACK MI 4.5 1663
## 5 5 TORRANCE HENRY JR MI 4.5 1666
## 6 6 BRADLEY SHAW MI 4.5 1610
## 7 7 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 8 8 MIKE NIKITIN MI 4.0 1604
## 9 9 RONALD GRZEGORCZYK MI 4.0 1629
## 10 10 DAVID SUNDEEN MI 4.0 1600
## 11 11 DIPANKAR ROY MI 4.0 1564
## 12 12 DAKSHESH DARURI MI 6.0 1553
## 13 13 JASON ZHENG MI 4.0 1595
## 14 14 DINH DANG BUI ON 4.0 1563
## 15 15 EUGENE L MCCLURE MI 4.0 1555
## 16 16 ALAN BUI ON 4.0 1363
## 17 17 MICHAEL R ALDRICH MI 4.0 1229
## 18 18 LOREN SCHWIEBERT MI 3.5 1745
## 19 19 MAX ZHU ON 3.5 1579
## 20 20 GAURAV GIDWANI MI 3.5 1552
## 21 21 SOFIA ADINA STANESCU-BELLU MI 3.5 1507
## 22 22 CHIEDOZIE OKORIE MI 3.5 1602
## 23 23 ADITYA BAJAJ MI 6.0 1384
## 24 24 GEORGE AVERY JONES ON 3.5 1522
## 25 25 RISHI SHETTY MI 3.5 1494
## 26 26 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 27 27 JADE GE MI 3.5 1449
## 28 28 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 29 29 JOSHUA DAVID LEE MI 3.5 1438
## 30 30 SIDDHARTH JHA MI 3.5 1355
## 31 31 AMIYATOSH PWNANANDAM MI 3.5 980
## 32 32 BRIAN LIU MI 3.0 1423
## 33 33 JOEL R HENDON MI 3.0 1436
## 34 34 PATRICK H SCHILLING MI 5.5 1716
## 35 35 FOREST ZHANG MI 3.0 1348
## 36 36 KYLE WILLIAM MURPHY MI 3.0 1403
## 37 37 JARED GE MI 3.0 1332
## 38 38 ROBERT GLEN VASEY MI 3.0 1283
## 39 39 JUSTIN D SCHILLING MI 3.0 1199
## 40 40 DEREK YAN MI 3.0 1242
## 41 41 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 42 42 ERIC WRIGHT MI 2.5 1362
## 43 43 DANIEL KHAIN MI 2.5 1382
## 44 44 MICHAEL J MARTIN MI 2.5 1291
## 45 45 HANSHI ZUO MI 5.5 1655
## 46 46 SHIVAM JHA MI 2.5 1056
## 47 47 TEJAS AYYAGARI MI 2.5 1011
## 48 48 ETHAN GUO MI 2.5 935
## 49 49 JOSE C YBARRA MI 2.0 1393
## 50 50 LARRY HODGE MI 2.0 1270
## 51 51 ALEX KONG MI 2.0 1186
## 52 52 MARISA RICCI MI 2.0 1153
## 53 53 MICHAEL LU MI 2.0 1092
## 54 54 VIRAJ MOHILE MI 2.0 917
## 55 55 SEAN M MC CORMICK MI 2.0 853
## 56 56 HANSEN SONG OH 5.0 1686
## 57 57 JULIA SHEN MI 1.5 967
## 58 58 JEZZEL FARKAS ON 1.5 955
## 59 59 ASHWIN BALAJI MI 1.0 1530
## 60 60 THOMAS JOSEPH HOSMER MI 1.0 1175
## 61 61 BEN LI MI 1.0 1163
## 62 62 GARY DEE SWATHELL MI 5.0 1649
## 63 63 EZEKIEL HOUGHTON MI 5.0 1641
## 64 64 STEFANO LEE ON 5.0 1411
## avg
## 1 1605
## 2 1554
## 3 1467
## 4 1506
## 5 1497
## 6 1515
## 7 1483
## 8 1385
## 9 1498
## 10 1480
## 11 1426
## 12 1469
## 13 1410
## 14 1470
## 15 1300
## 16 1213
## 17 1357
## 18 1363
## 19 1506
## 20 1221
## 21 1522
## 22 1313
## 23 1563
## 24 1144
## 25 1259
## 26 1378
## 27 1276
## 28 1375
## 29 1149
## 30 1388
## 31 1384
## 32 1539
## 33 1429
## 34 1573
## 35 1390
## 36 1248
## 37 1149
## 38 1106
## 39 1327
## 40 1152
## 41 1357
## 42 1392
## 43 1355
## 44 1285
## 45 1500
## 46 1296
## 47 1356
## 48 1494
## 49 1345
## 50 1206
## 51 1406
## 52 1414
## 53 1363
## 54 1391
## 55 1319
## 56 1518
## 57 1330
## 58 1327
## 59 1186
## 60 1350
## 61 1263
## 62 1372
## 63 1468
## 64 1523