Step 1:-Load the text file into a data frame called chess

chess<-read.table("C:/Users/Arindam/Documents/Data Science/Cuny/Data 607/Assignments/tournamentinfo.txt",header=TRUE,sep =  "|")

str (chess)
## 'data.frame':    128 obs. of  11 variables:
##  $ Pair.Num                               : Factor w/ 67 levels "    1 ","    2 ",..: 1 67 2 65 3 65 4 65 5 65 ...
##  $ Player.Name..USCF.ID...Rtg..Pre..Post..: Factor w/ 128 levels " 10131499 / R: 1610   ->1618     ",..: 88 58 76 27 65 40 115 13 92 29 ...
##  $ Total.Pts                              : Factor w/ 15 levels "     ","1.0  ",..: 12 13 12 13 12 13 11 13 11 13 ...
##  $ Round.1                                : Factor w/ 64 levels "     ","B    ",..: 48 37 63 2 16 37 40 37 51 2 ...
##  $ Round.2                                : Factor w/ 66 levels "     ","B    ",..: 44 2 63 40 65 2 6 2 50 40 ...
##  $ Round.3                                : Factor w/ 65 levels "     ","B    ",..: 44 38 12 2 47 38 39 38 4 2 ...
##  $ Round.4                                : Factor w/ 66 levels "     ","B    ",..: 44 2 46 41 47 2 49 2 4 41 ...
##  $ Round.5                                : Factor w/ 58 levels "     ","B    ",..: 37 36 39 2 38 36 4 36 3 2 ...
##  $ Round.6                                : Factor w/ 63 levels "     ","B    ",..: 4 2 44 39 41 2 43 2 42 39 ...
##  $ Round.7                                : Factor w/ 58 levels "     ","B    ",..: 4 33 34 2 35 33 3 2 36 2 ...
##  $ X                                      : logi  NA NA NA NA NA NA ...

Step 2: split chess file int two separate files chess_det & Chess_ref

chess_det<-subset(chess[(which(1:nrow(chess) %% 2 != 0)),])

chess2<-subset(chess[(which(1:nrow(chess) %% 2 == 0)),])

colnames(chess_det)[colnames(chess_det)=="Player.Name..USCF.ID...Rtg..Pre..Post.."] <- "Player_Name"

colnames(chess2)[colnames(chess2)=="Pair.Num"]<-"Country"

chess2$Pair.Num<-1:nrow(chess2)

str (chess)
## 'data.frame':    128 obs. of  11 variables:
##  $ Pair.Num                               : Factor w/ 67 levels "    1 ","    2 ",..: 1 67 2 65 3 65 4 65 5 65 ...
##  $ Player.Name..USCF.ID...Rtg..Pre..Post..: Factor w/ 128 levels " 10131499 / R: 1610   ->1618     ",..: 88 58 76 27 65 40 115 13 92 29 ...
##  $ Total.Pts                              : Factor w/ 15 levels "     ","1.0  ",..: 12 13 12 13 12 13 11 13 11 13 ...
##  $ Round.1                                : Factor w/ 64 levels "     ","B    ",..: 48 37 63 2 16 37 40 37 51 2 ...
##  $ Round.2                                : Factor w/ 66 levels "     ","B    ",..: 44 2 63 40 65 2 6 2 50 40 ...
##  $ Round.3                                : Factor w/ 65 levels "     ","B    ",..: 44 38 12 2 47 38 39 38 4 2 ...
##  $ Round.4                                : Factor w/ 66 levels "     ","B    ",..: 44 2 46 41 47 2 49 2 4 41 ...
##  $ Round.5                                : Factor w/ 58 levels "     ","B    ",..: 37 36 39 2 38 36 4 36 3 2 ...
##  $ Round.6                                : Factor w/ 63 levels "     ","B    ",..: 4 2 44 39 41 2 43 2 42 39 ...
##  $ Round.7                                : Factor w/ 58 levels "     ","B    ",..: 4 33 34 2 35 33 3 2 36 2 ...
##  $ X                                      : logi  NA NA NA NA NA NA ...

step 3: Extact player rating in chess 2 file

library(stringr)

chess2$Rating<-str_extract(chess2$Player.Name..USCF.ID...Rtg..Pre..Post..,":\\s[:digit:]{1,}")
chess2$Rating<-str_extract(chess2$Rating,"[:digit:]{1,}")

str (chess)
## 'data.frame':    128 obs. of  11 variables:
##  $ Pair.Num                               : Factor w/ 67 levels "    1 ","    2 ",..: 1 67 2 65 3 65 4 65 5 65 ...
##  $ Player.Name..USCF.ID...Rtg..Pre..Post..: Factor w/ 128 levels " 10131499 / R: 1610   ->1618     ",..: 88 58 76 27 65 40 115 13 92 29 ...
##  $ Total.Pts                              : Factor w/ 15 levels "     ","1.0  ",..: 12 13 12 13 12 13 11 13 11 13 ...
##  $ Round.1                                : Factor w/ 64 levels "     ","B    ",..: 48 37 63 2 16 37 40 37 51 2 ...
##  $ Round.2                                : Factor w/ 66 levels "     ","B    ",..: 44 2 63 40 65 2 6 2 50 40 ...
##  $ Round.3                                : Factor w/ 65 levels "     ","B    ",..: 44 38 12 2 47 38 39 38 4 2 ...
##  $ Round.4                                : Factor w/ 66 levels "     ","B    ",..: 44 2 46 41 47 2 49 2 4 41 ...
##  $ Round.5                                : Factor w/ 58 levels "     ","B    ",..: 37 36 39 2 38 36 4 36 3 2 ...
##  $ Round.6                                : Factor w/ 63 levels "     ","B    ",..: 4 2 44 39 41 2 43 2 42 39 ...
##  $ Round.7                                : Factor w/ 58 levels "     ","B    ",..: 4 33 34 2 35 33 3 2 36 2 ...
##  $ X                                      : logi  NA NA NA NA NA NA ...

step 4: cretae a refernce file for player rating and country

Rating<-subset(chess2, select=c("Country","Rating","Pair.Num"))

step 5: extract opponents player number for each player

chess_det$Opp_R1<-str_extract_all(chess_det$Round.1,"[:digit:]{1,}")
chess_det$Opp_R2<-str_extract_all(chess_det$Round.2,"[:digit:]{1,}")
chess_det$Opp_R3<-str_extract_all(chess_det$Round.3,"[:digit:]{1,}")
chess_det$Opp_R4<-str_extract_all(chess_det$Round.4,"[:digit:]{1,}")
chess_det$Opp_R5<-str_extract_all(chess_det$Round.5,"[:digit:]{1,}")
chess_det$Opp_R6<-str_extract_all(chess_det$Round.6,"[:digit:]{1,}")

step 6: Calculate rating of opponents

chess_det$Opp1_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R1,Rating$Pair.Num)])
chess_det$Opp2_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R2,Rating$Pair.Num)])
chess_det$Opp3_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R3,Rating$Pair.Num)])
chess_det$Opp4_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R4,Rating$Pair.Num)])
chess_det$Opp5_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R5,Rating$Pair.Num)])
chess_det$Opp6_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R6,Rating$Pair.Num)])

step 7: Calculate rating of opponents

chess_det$Opp1_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R1,Rating$Pair.Num)])
chess_det$Opp2_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R2,Rating$Pair.Num)])
chess_det$Opp3_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R3,Rating$Pair.Num)])
chess_det$Opp4_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R4,Rating$Pair.Num)])
chess_det$Opp5_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R5,Rating$Pair.Num)])
chess_det$Opp6_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R6,Rating$Pair.Num)])
chess_det$Opp_total_rat<-rowSums(chess_det[,18:23],na.rm=TRUE)

step 8: Calculate no of opponents played with and avg rating

chess_det$opp_count_to<-(ifelse(!is.na(chess_det$Opp1_Rat),1,0)+ifelse(!is.na(chess_det$Opp2_Rat),1,0)+ifelse(!is.na(chess_det$Opp3_Rat),1,0)+ifelse(!is.na(chess_det$Opp4_Rat),1,0)+ifelse(!is.na(chess_det$Opp5_Rat),1,0)+ifelse(!is.na(chess_det$Opp6_Rat),1,0))

chess_det$Avg_Opp_Rat<-chess_det$Opp_total_rat/chess_det$opp_count_to

step 9: create final file with requried columns and extract .csv file

chess_det_temp<-subset(chess_det,select=c("Player_Name","Pair.Num","Total.Pts","Avg_Opp_Rat"))
chess_det_temp$Pair.Num<-as.numeric(chess_det_temp$Pair.Num)

Play_details<-merge(chess_det_temp,Rating, by.chess_det_temp="Pair.Num", by.Rating="Pair.Num")

Play_details<-Play_details[c(2,5,3,6,4)]
write.csv(Play_details,file="Players details.csv")