Chess Data.
project1_data <- read.table("https://raw.githubusercontent.com/mikegankhuyag/607-Projects/master/tournamentinfo.txt",header = FALSE, sep = "\t", stringsAsFactors = FALSE)
head(project1_data)
library(stringr)
project1_data[1:10,]
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
Chess_Data <- paste (project1_data[seq(5,196,3),],project1_data[seq(6,196,3),1],sep = "")
seq
## function (...)
## UseMethod("seq")
## <bytecode: 0x0000000014e6de20>
## <environment: namespace:base>
head(Chess_Data)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7| MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12| MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1| MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17| MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21| OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
Chess_data1 <- str_replace_all(Chess_Data, pattern = "\\:\\S?","|")
str_sub(Chess_data1,130, 134) <- "|end"
str_sub(Chess_data1,119, 120) <- " |"
head(Chess_data1)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ON | 15445895 / R| 1794 |1817 |end|W |B |W |B |W |B |W |"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7| MI | 14598900 / R| 1553 |1663 |end|B |W |B |W |B |W |B |"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12| MI | 14959604 / R| 1384 |1640 |end|W |B |W |B |W |B |W |"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1| MI | 12616049 / R| 1716 |1744 |end|W |B |W |B |W |B |B |"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17| MI | 14601533 / R| 1655 |1690 |end|B |W |B |W |B |W |B |"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21| OH | 15055204 / R| 1686 |1687 |end|W |B |W |B |B |W |B |"
Chess_data2 <-read.table(text = Chess_data1, sep = "|")
head(Chess_data2)
chess_data3 <- data.frame(Chess_data2[,1:14])
colnames(chess_data3) <-c("ID","Player","Score","Game1","Game2","Game3","Game4","Game5","Game6","Game7","State","USCF_ID","Pre_Rating", "Post_Rating")
chess_data3[1:10,]
chess_data3$USCF_ID <- unlist(str_extract_all(chess_data3$USCF_ID,"\\(?\\d{8}?"))
chess_data3$Pre_Rating <- substr(chess_data3$Pre_Rating, 1, 5)
chess_data3$Post_Rating <- substr(chess_data3$Post_Rating, 1, 4)
chess_data3[1:10,]
Chess_data_4 <- chess_data3
Chess_data_4$Game1 <- unlist((str_extract(Chess_data_4$Game1, "\\d+")))
Chess_data_4$Game2 <- unlist((str_extract(Chess_data_4$Game2, "\\d+")))
Chess_data_4$Game3 <- unlist((str_extract(Chess_data_4$Game3, "\\d+")))
Chess_data_4$Game4 <- unlist((str_extract(Chess_data_4$Game4, "\\d+")))
Chess_data_4$Game5 <- unlist((str_extract(Chess_data_4$Game5, "\\d+")))
Chess_data_4$Game6 <- unlist((str_extract(Chess_data_4$Game6, "\\d+")))
Chess_data_4$Game7 <- unlist((str_extract(Chess_data_4$Game7, "\\d+")))
head(Chess_data_4)
Chess_data_Ranks <- matrix(c(Chess_data_4$ID, Chess_data_4$Pre_Rating), ncol = 2,nrow = 64)
colnames(Chess_data_Ranks) <- c("ID", "Pre_Rank")
Chess_data_Ranks <- data.frame(Chess_data_Ranks)
head(Chess_data_Ranks)
Chess_data_Ranks2 <- Chess_data_4
Chess_data_Ranks2$Game1 <- Chess_data_Ranks$Pre_Rank[ match(Chess_data_Ranks2$Game1, Chess_data_Ranks$ID)]
Chess_data_Ranks2$Game2 <- Chess_data_Ranks$Pre_Rank[ match(Chess_data_Ranks2$Game2, Chess_data_Ranks$ID)]
Chess_data_Ranks2$Game3 <- Chess_data_Ranks$Pre_Rank[ match(Chess_data_Ranks2$Game3, Chess_data_Ranks$ID)]
Chess_data_Ranks2$Game4 <- Chess_data_Ranks$Pre_Rank[ match(Chess_data_Ranks2$Game4, Chess_data_Ranks$ID)]
Chess_data_Ranks2$Game5 <- Chess_data_Ranks$Pre_Rank[ match(Chess_data_Ranks2$Game5, Chess_data_Ranks$ID)]
Chess_data_Ranks2$Game6 <- Chess_data_Ranks$Pre_Rank[ match(Chess_data_Ranks2$Game6, Chess_data_Ranks$ID)]
Chess_data_Ranks2$Game7 <- Chess_data_Ranks$Pre_Rank[ match(Chess_data_Ranks2$Game7, Chess_data_Ranks$ID)]
head(Chess_data_Ranks2)
Chess_data_Ranks2$Game1 <- as.numeric(as.character(Chess_data_Ranks2$Game1))
Chess_data_Ranks2$Game2 <- as.numeric(as.character(Chess_data_Ranks2$Game2))
Chess_data_Ranks2$Game3 <- as.numeric(as.character(Chess_data_Ranks2$Game3))
Chess_data_Ranks2$Game4 <- as.numeric(as.character(Chess_data_Ranks2$Game4))
Chess_data_Ranks2$Game5 <- as.numeric(as.character(Chess_data_Ranks2$Game5))
Chess_data_Ranks2$Game6 <- as.numeric(as.character(Chess_data_Ranks2$Game6))
Chess_data_Ranks2$Game7 <- as.numeric(as.character(Chess_data_Ranks2$Game7))
New_Rating <- round(rowMeans(Chess_data_Ranks2[,4:10],na.rm = TRUE),digits = 0)
Chess_data_Ranks2$New_Rating <- New_Rating
Chess_data_Ranks2[,c(1,2,13,15)]
Chess_data_Ranks2$Pre_Rating <- as.numeric(as.character(Chess_data_Ranks2$Pre_Rating))
Ranking_differences <- Chess_data_Ranks2$New_Rating - Chess_data_Ranks2$Pre_Rating
Chess_data_Ranks2$Ranking_differences <- Ranking_differences
head(Chess_data_Ranks2)
summary(Chess_data_Ranks2$Ranking_differences)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -382.000 -172.250 -68.000 0.125 139.750 981.000
Chess_data_Ranks2