The assignment tasks students with creating a Mardown file that produces a .csv file for further analysis from a text file with chess tournament results.
chess_file = "https://raw.githubusercontent.com/evanmclaughlin/ECM607/9e2ce75e6992b76c132019c38ce9c51eb0660570/tournamentinfo.txt"
chess_data= readLines( chess_file , warn = FALSE)
#next, eliminate dashed lines and white space
chess_data<-chess_data[!str_detect(chess_data,"\\-----")]
chess_data<-str_split(chess_data[3:length(chess_data)],"\\|")
head(chess_data)
## [[1]]
## [1] " 1 " " GARY HUA "
## [3] "6.0 " "W 39"
## [5] "W 21" "W 18"
## [7] "W 14" "W 7"
## [9] "D 12" "D 4"
## [11] ""
##
## [[2]]
## [1] " ON " " 15445895 / R: 1794 ->1817 "
## [3] "N:2 " "W "
## [5] "B " "W "
## [7] "B " "W "
## [9] "B " "W "
## [11] ""
##
## [[3]]
## [1] " 2 " " DAKSHESH DARURI "
## [3] "6.0 " "W 63"
## [5] "W 58" "L 4"
## [7] "W 17" "W 16"
## [9] "W 20" "W 7"
## [11] ""
##
## [[4]]
## [1] " MI " " 14598900 / R: 1553 ->1663 "
## [3] "N:2 " "B "
## [5] "W " "B "
## [7] "W " "B "
## [9] "W " "B "
## [11] ""
##
## [[5]]
## [1] " 3 " " ADITYA BAJAJ "
## [3] "6.0 " "L 8"
## [5] "W 61" "W 25"
## [7] "W 21" "W 11"
## [9] "W 13" "W 12"
## [11] ""
##
## [[6]]
## [1] " MI " " 14959604 / R: 1384 ->1640 "
## [3] "N:2 " "W "
## [5] "B " "W "
## [7] "B " "W "
## [9] "B " "W "
## [11] ""
#start with names
chess_ext <- str_extract_all(chess_data,"[[:alpha:]-?[:alpha:] ?]{2,}")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
chess_names <- unlist(chess_ext)[str_detect(unlist(chess_ext),"[[:alpha:]]{3,}")]
#head(chess_names)
#next, extract state
chess_state <- unlist(chess_ext)[(str_detect(unlist(chess_ext),"[[:alpha:]]{2}"))&(!str_detect(unlist(chess_ext),"[[:alpha:]]{3,}"))]
#head(chess_state)
#Total points next, but first extract all numerical data for subset
chess_num <- str_extract_all(chess_data,"\\d{1,}+\\.?.?")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
points <- unlist(chess_num)[(str_detect(unlist(chess_num),"\\d\\.\\d"))]
#head(points)
# now for pre-rating
rating <- str_extract_all(chess_data,"(( \\:)|(\\>))?.?\\d{1,}P*\\.?")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
pre_rating <- unlist(rating)[(str_detect(unlist(rating),"\\b\\d{3,4}P?\\b"))&(!str_detect(unlist(rating),"\\>.?\\b\\d{3,4}P?\\b"))]
pre_rating <- str_replace_all(pre_rating,"P","")
#head(pre_rating)
id <- seq(1,64,by=1)
names <- str_trim(chess_names,"both")
state <- str_trim(chess_state,"both")
pre_rating <- str_trim(pre_rating,"both")
rankings <- as.data.frame(cbind(id, names, state, points, pre_rating))
head(rankings)
## id names state points pre_rating
## 1 1 GARY HUA ON 6.0 1794
## 2 2 DAKSHESH DARURI MI 6.0 1553
## 3 3 ADITYA BAJAJ MI 6.0 1384
## 4 4 PATRICK H SCHILLING MI 5.5 1716
## 5 5 HANSHI ZUO MI 5.5 1655
## 6 6 HANSEN SONG OH 5.0 1686
#extract opponent data now, pulling in games played
games_info <- str_replace_all(str_extract_all(str_extract_all(chess_data,"[WDL]...\\d{1,2}"),"\\.?\\d{1,2}"),"\\b[0]\\b",".")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
## Warning in stri_replace_all_regex(string, pattern,
## fix_replacement(replacement), : argument is not an atomic vector; coercing
games <- str_detect(games_info,fixed("."))
games_info <- games_info[!(games)]
head(games_info)
## [1] "c(\"39\", \"21\", \"18\", \"14\", \"7\", \"12\", \"4\")"
## [2] "c(\"63\", \"58\", \"4\", \"17\", \"16\", \"20\", \"7\")"
## [3] "c(\"8\", \"61\", \"25\", \"21\", \"11\", \"13\", \"12\")"
## [4] "c(\"23\", \"28\", \"2\", \"26\", \"5\", \"19\", \"1\")"
## [5] "c(\"45\", \"37\", \"12\", \"13\", \"4\", \"14\", \"17\")"
## [6] "c(\"34\", \"29\", \"11\", \"35\", \"10\", \"27\", \"21\")"
rankings[] <- lapply(rankings, gsub, pattern = '>', replacement='')
rankings[] <- lapply(rankings, gsub, pattern = 'P', replacement='')
#create a loop to run calculation for each player using opponent data
matches <- array(0, dim = nrow(rankings))
#make calculation fields numeric
rankings$points <- as.numeric(as.character(rankings$points))
rankings$pre_rating <- as.numeric(as.character(rankings$pre_rating))
for (i in 1:nrow(rankings))
{outcomes <- as.numeric(str_split(unlist(str_extract_all(games_info[i],"\\d{1,2}"))," "))
matches[i] <- mean(rankings[outcomes, colnames(rankings) == "pre_rating"])
}
rankings$avg_rating <- matches
head(rankings)
## id names state points pre_rating avg_rating
## 1 1 GARY HUA ON 6.0 1794 1605.286
## 2 2 DAKSHESH DARURI MI 6.0 1553 1469.286
## 3 3 ADITYA BAJAJ MI 6.0 1384 1563.571
## 4 4 ATRICK H SCHILLING MI 5.5 1716 1573.571
## 5 5 HANSHI ZUO MI 5.5 1655 1500.857
## 6 6 HANSEN SONG OH 5.0 1686 1518.714
write.csv(rankings, "chess_final.csv", row.names = TRUE)