# Download File
#--------------
# https://stackoverflow.com/questions/23028760/download-a-file-from-https-using-download-file
url<-"https://raw.githubusercontent.com/jameskuruvilla/DATA607/master/tournament.txt"
r_file<- "tournament.txt"
downloader::download(url, r_file)
# Read text File into vector
#---------------------------
#https://stackoverflow.com/questions/23001548/dealing-with-readlines-function-in-r
v_conn <-file(r_file,open="r")
tournament.raw <- readLines(v_conn, warn = FALSE)
close(v_conn)#use gsub() to replace all the text you don't want with an empty string.
#-----------------------------------------------------------------------
#https://stackoverflow.com/questions/13529360/replace-text-within-parenthesis-in-r
tournament <- gsub("-{3,}", "",gsub("->", ">>",tournament.raw))
# Following code also will work the same way
#tournament <- str_replace_all(str_replace_all(tournament.raw, "->", ">>"), "-{3,}", "")
tournament <- tournament[tournament != ""] # Remove the emtpy lines
tournament <- tournament[-(1:2)] # Remove the First 2 lines as those are headings
head(tournament)## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 >>1817 |N:2 |W |B |W |B |W |B |W |"
## [3] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] " MI | 14598900 / R: 1553 >>1663 |N:2 |B |W |B |W |B |W |B |"
## [5] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] " MI | 14959604 / R: 1384 >>1640 |N:2 |W |B |W |B |W |B |W |"
# =====================================
# Str_sub OR substr OR substring can be used to cut the specific character set from the strng
# https://www.rdocumentation.org/packages/stringr/versions/1.1.0/topics/str_sub
# http://rfunction.com/archives/1692
# =====================================
# extract only the lines starts with a digit
tournament.fmt1<- tournament[str_detect(substr(tournament, 1, 6), "[0-9]")]
# extract only the lines starts with an alphabet
tournament.fmt2 <- tournament[str_detect(str_sub(tournament, 1, 6), "[A-Z]{2,2}")] # Extract column values from these 2 vectors.
fmt1.df <- data.frame( Player_num = as.numeric(str_sub(tournament.fmt1, 1, 6)),
Player_name = str_trim(str_sub(tournament.fmt1, 8, 40), side="both"),
Total_pts = as.numeric(str_sub(tournament.fmt1, 42, 46)),
Round1 = str_sub(tournament.fmt1, 48, 52),
Round2 = str_sub(tournament.fmt1, 54, 58),
Round3 = str_sub(tournament.fmt1, 60, 64),
Round4 = str_sub(tournament.fmt1, 66, 70),
Round5 = str_sub(tournament.fmt1, 72, 76),
Round6 = str_sub(tournament.fmt1, 78, 82),
Round7 = str_sub(tournament.fmt1, 84, 88), stringsAsFactors=FALSE)
head(fmt1.df)fmt2.df <- data.frame( Player_state = str_trim(str_sub(tournament.fmt2, 1, 6), side="both"),
Uscf_id = str_extract(str_sub(tournament.fmt2, 8, 40), "\\d+"),
Pre_rating = as.numeric(str_extract(str_sub(tournament.fmt2, 8, 40), "(?<=R: ).\\d+(?=)")),
Post_rating = as.numeric(str_extract(str_sub(tournament.fmt2, 8, 40), "(?<=>>).\\d+(?=)")),
stringsAsFactors=FALSE)
head(fmt2.df)# Combine both data frames by columns
# https://stat.ethz.ch/R-manual/R-devel/library/methods/html/cbind2.html
tournament.df <-cbind(fmt1.df, fmt2.df)
head(tournament.df)# Select all columns except round columns
player_df <- select(tournament.df, Player_num:Total_pts, Player_state:Post_rating) #Select requires the dplyr package
# Select only player number and all the rounds the player played
rounds <- tournament.df %>% select(Player_num, Round1:Round7)
head(rounds)# Following link says how melt works
# https://tgmstat.wordpress.com/2013/10/31/reshape-and-aggregate-data-with-the-r-package-reshape2/
melt_rounds <- rounds%>% melt(id.var=c("Player_num"), value.name="Result_opp")
head(melt_rounds)mut_melt_rounds <-
melt_rounds%>% #Add the columns round, Result and opp_num (opponent Number)
mutate(Round = as.numeric(gsub("Round", "",variable)), # Replace "round" with "" in the column 'variable'
Result = str_extract(Result_opp, "^\\w+"), # Extract the begginning word
Opp_num = as.numeric(str_extract(Result_opp, "\\d+$")) # Extract the ending Digits.
)
head(mut_melt_rounds)#sel_mut_melt_rounds <- mut_melt_rounds %>% select(c(Player_num,Round,Result,Opp_num)) #
sel_mut_melt_rounds <- mut_melt_rounds %>% select(-c(variable, Result_opp)) # Remove the columns 'variable' and 'Result_opp'
head(sel_mut_melt_rounds);head(player_df )# Join sel_mut_melt_rounds and player_df on op_num and Payer_num
# Following Link gives more information on Joins
# http://dplyr.tidyverse.org/reference/join.html
join_sel_mut_melt_rds <- sel_mut_melt_rounds %>%
inner_join(select(player_df, Player_num, Pre_rating,Post_rating), by = c("Opp_num" = "Player_num"))
head(join_sel_mut_melt_rds)sel_p <- join_sel_mut_melt_rds %>% select(Player_num, Round, Result, Opp_num, Pre_rating)
head(sel_p)sort_sel_p <- sel_p %>% arrange(Player_num, Round) ;head(sort_sel_p)names(sort_sel_p)[names(sort_sel_p) == "Pre_rating"] <- "Opp_pre_rating" ;head(sort_sel_p)player.opp_avg_rating <- sort_sel_p%>%group_by(Player_num) %>% summarise(Opp_avg_pre_rating = round(mean(Opp_pre_rating)))
player_df <- player.opp_avg_rating %>% inner_join(player_df, by="Player_num")
player_df_final <- player_df %>% select(Player_name, Player_state, Total_pts,Pre_rating, Opp_avg_pre_rating) htmlTable(player_df_final)| Player_name | Player_state | Total_pts | Pre_rating | Opp_avg_pre_rating | |
|---|---|---|---|---|---|
| 1 | GARY HUA | ON | 6 | 1794 | 1605 |
| 2 | DAKSHESH DARURI | MI | 6 | 1553 | 1469 |
| 3 | ADITYA BAJAJ | MI | 6 | 1384 | 1564 |
| 4 | PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| 5 | HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| 6 | HANSEN SONG | OH | 5 | 1686 | 1519 |
| 7 | GARY DEE SWATHELL | MI | 5 | 1649 | 1372 |
| 8 | EZEKIEL HOUGHTON | MI | 5 | 1641 | 1468 |
| 9 | STEFANO LEE | ON | 5 | 1411 | 1523 |
| 10 | ANVIT RAO | MI | 5 | 1365 | 1554 |
| 11 | CAMERON WILLIAM MC LEMAN | MI | 4.5 | 1712 | 1468 |
| 12 | KENNETH J TACK | MI | 4.5 | 1663 | 1506 |
| 13 | TORRANCE HENRY JR | MI | 4.5 | 1666 | 1498 |
| 14 | BRADLEY SHAW | MI | 4.5 | 1610 | 1515 |
| 15 | ZACHARY JAMES HOUGHTON | MI | 4.5 | 1220 | 1484 |
| 16 | MIKE NIKITIN | MI | 4 | 1604 | 1386 |
| 17 | RONALD GRZEGORCZYK | MI | 4 | 1629 | 1499 |
| 18 | DAVID SUNDEEN | MI | 4 | 1600 | 1480 |
| 19 | DIPANKAR ROY | MI | 4 | 1564 | 1426 |
| 20 | JASON ZHENG | MI | 4 | 1595 | 1411 |
| 21 | DINH DANG BUI | ON | 4 | 1563 | 1470 |
| 22 | EUGENE L MCCLURE | MI | 4 | 1555 | 1300 |
| 23 | ALAN BUI | ON | 4 | 1363 | 1214 |
| 24 | MICHAEL R ALDRICH | MI | 4 | 1229 | 1357 |
| 25 | LOREN SCHWIEBERT | MI | 3.5 | 1745 | 1363 |
| 26 | MAX ZHU | ON | 3.5 | 1579 | 1507 |
| 27 | GAURAV GIDWANI | MI | 3.5 | 1552 | 1222 |
| 28 | SOFIA ADINA STANESCU-BELLU | MI | 3.5 | 1507 | 1522 |
| 29 | CHIEDOZIE OKORIE | MI | 3.5 | 1602 | 1314 |
| 30 | GEORGE AVERY JONES | ON | 3.5 | 1522 | 1144 |
| 31 | RISHI SHETTY | MI | 3.5 | 1494 | 1260 |
| 32 | JOSHUA PHILIP MATHEWS | ON | 3.5 | 1441 | 1379 |
| 33 | JADE GE | MI | 3.5 | 1449 | 1277 |
| 34 | MICHAEL JEFFERY THOMAS | MI | 3.5 | 1399 | 1375 |
| 35 | JOSHUA DAVID LEE | MI | 3.5 | 1438 | 1150 |
| 36 | SIDDHARTH JHA | MI | 3.5 | 1355 | 1388 |
| 37 | AMIYATOSH PWNANANDAM | MI | 3.5 | 980 | 1385 |
| 38 | BRIAN LIU | MI | 3 | 1423 | 1539 |
| 39 | JOEL R HENDON | MI | 3 | 1436 | 1430 |
| 40 | FOREST ZHANG | MI | 3 | 1348 | 1391 |
| 41 | KYLE WILLIAM MURPHY | MI | 3 | 1403 | 1248 |
| 42 | JARED GE | MI | 3 | 1332 | 1150 |
| 43 | ROBERT GLEN VASEY | MI | 3 | 1283 | 1107 |
| 44 | JUSTIN D SCHILLING | MI | 3 | 1199 | 1327 |
| 45 | DEREK YAN | MI | 3 | 1242 | 1152 |
| 46 | JACOB ALEXANDER LAVALLEY | MI | 3 | 377 | 1358 |
| 47 | ERIC WRIGHT | MI | 2.5 | 1362 | 1392 |
| 48 | DANIEL KHAIN | MI | 2.5 | 1382 | 1356 |
| 49 | MICHAEL J MARTIN | MI | 2.5 | 1291 | 1286 |
| 50 | SHIVAM JHA | MI | 2.5 | 1056 | 1296 |
| 51 | TEJAS AYYAGARI | MI | 2.5 | 1011 | 1356 |
| 52 | ETHAN GUO | MI | 2.5 | 935 | 1495 |
| 53 | JOSE C YBARRA | MI | 2 | 1393 | 1345 |
| 54 | LARRY HODGE | MI | 2 | 1270 | 1206 |
| 55 | ALEX KONG | MI | 2 | 1186 | 1406 |
| 56 | MARISA RICCI | MI | 2 | 1153 | 1414 |
| 57 | MICHAEL LU | MI | 2 | 1092 | 1363 |
| 58 | VIRAJ MOHILE | MI | 2 | 917 | 1391 |
| 59 | SEAN M MC CORMICK | MI | 2 | 853 | 1319 |
| 60 | JULIA SHEN | MI | 1.5 | 967 | 1330 |
| 61 | JEZZEL FARKAS | ON | 1.5 | 955 | 1327 |
| 62 | ASHWIN BALAJI | MI | 1 | 1530 | 1186 |
| 63 | THOMAS JOSEPH HOSMER | MI | 1 | 1175 | 1350 |
| 64 | BEN LI | MI | 1 | 1163 | 1263 |
The table is exported into a file ’CHESS PLAYERS.csv using following command into the local drive and then loaded into GitHub. The following link gives the output from GitHub.
write.csv(player_df_final, "CHESS PLAYERS.csv", row.names=FALSE)