library(stringr)
tournament <- readLines("https://raw.githubusercontent.com/chitrarth2018/607_Project_1/master/tournamentinfo.txt")

head(tournament)
## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
#replacing dash with empty character
tourn_wo_dash <- gsub("-", "", tournament)
head(tourn_wo_dash)
## [1] ""                                                                                          
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre>Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | " 
## [4] ""                                                                                          
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   >1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
#removing empty rows
tourn_clean <- tourn_wo_dash[sapply(tourn_wo_dash, nchar) > 0]
head(tourn_clean)
## [1] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [2] " Num  | USCF ID / Rtg (Pre>Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | " 
## [3] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [4] "   ON | 15445895 / R: 1794   >1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"  
## [5] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
## [6] "   MI | 14598900 / R: 1553   >1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
even_indexes<-seq(2,130,2)
odd_indexes<-seq(1,129,2)
player_det1<-tourn_clean[even_indexes]
player_det2<-tourn_clean[odd_indexes]
head(player_det1)
## [1] " Num  | USCF ID / Rtg (Pre>Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [2] "   ON | 15445895 / R: 1794   >1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
## [3] "   MI | 14598900 / R: 1553   >1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [4] "   MI | 14959604 / R: 1384   >1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
## [5] "   MI | 12616049 / R: 1716   >1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |" 
## [6] "   MI | 14601533 / R: 1655   >1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
head(player_det2)
## [1] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [2] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [3] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
## [4] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|" 
## [5] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|" 
## [6] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
#removing 1st row from both the tables
player_det1<-player_det1[-1]
player_det2<-player_det2[-1]

#extracting the player name
player_name<- gsub("\\| ", "", str_extract(player_det2, "\\| (([a-zA-Z])* ){1,}"))
head(player_name)
## [1] "GARY HUA                        " "DAKSHESH DARURI                 "
## [3] "ADITYA BAJAJ                    " "PATRICK H SCHILLING             "
## [5] "HANSHI ZUO                      " "HANSEN SONG                     "
#extracting the player state
player_state<-str_extract(player_det1, "([a-zA-Z]){2}")
head(player_state)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
#creating the data frame
chess_results<-data.frame(player_name,player_state)

#extracting the player score
chess_results$player_score<-gsub("\\|","", str_extract(player_det2, "\\|([0-9])\\.([0-9])"))

#extracting the player player pre-rating
chess_results$pre_rating<- gsub(":","", str_extract(player_det1, ":( )*([0-9])*"))

#counting the number of games played by a player
chess_results$num_games<-str_count(player_det2, "\\|([a-zA-Z])( )+([0-9]+)")

#extracting the opponent player numbers
player_opp<-gsub("\\|([a-zA-Z])", "",str_extract_all(player_det2, "\\|([a-zA-Z])( )*([0-9]*)"))
player_opp<-str_extract_all(player_det2, "\\|([a-zA-Z])( )*([0-9]*)", simplify = TRUE)

#replacing the | and W, B etc.
for (i in 1:64){
  
  player_opp[i,]<-str_replace(player_opp[i,], "\\|([a-zA-Z])", "")

}


#accessing the pre-rating of the opponents using the opponent number and the computing the mean pre-rating
for (i in 1:64){

chess_results$opp_R1[i]<-as.integer(chess_results$pre_rating[as.integer(player_opp[i,1])])
chess_results$opp_R2[i]<-as.integer(chess_results$pre_rating[as.integer(player_opp[i,2])])
chess_results$opp_R3[i]<-as.integer(chess_results$pre_rating[as.integer(player_opp[i,3])])
chess_results$opp_R4[i]<-as.integer(chess_results$pre_rating[as.integer(player_opp[i,4])])
chess_results$opp_R5[i]<-as.integer(chess_results$pre_rating[as.integer(player_opp[i,5])])
chess_results$opp_R6[i]<-as.integer(chess_results$pre_rating[as.integer(player_opp[i,6])])
chess_results$opp_R7[i]<-as.integer(chess_results$pre_rating[as.integer(player_opp[i,7])])

x<-c(chess_results$opp_R1[i],chess_results$opp_R2[i],chess_results$opp_R3[i],     chess_results$opp_R4[i],chess_results$opp_R5[i],chess_results$opp_R6[i],chess_results$opp_R7[i])

chess_results$opp_rate_mean[i]<-round(mean(x, na.rm=TRUE),0)

}

#deleting unwanted columns
chess_results_final<-chess_results[,c(-5:-12)]
View(chess_results_final)