Step 1:-Load the text file into a data frame called chess
chess<-read.table("C:/Users/Arindam/Documents/Data Science/Cuny/Data 607/Assignments/tournamentinfo.txt",header=TRUE,sep = "|")
str (chess)## 'data.frame': 128 obs. of 11 variables:
## $ Pair.Num : Factor w/ 67 levels " 1 "," 2 ",..: 1 67 2 65 3 65 4 65 5 65 ...
## $ Player.Name..USCF.ID...Rtg..Pre..Post..: Factor w/ 128 levels " 10131499 / R: 1610 ->1618 ",..: 88 58 76 27 65 40 115 13 92 29 ...
## $ Total.Pts : Factor w/ 15 levels " ","1.0 ",..: 12 13 12 13 12 13 11 13 11 13 ...
## $ Round.1 : Factor w/ 64 levels " ","B ",..: 48 37 63 2 16 37 40 37 51 2 ...
## $ Round.2 : Factor w/ 66 levels " ","B ",..: 44 2 63 40 65 2 6 2 50 40 ...
## $ Round.3 : Factor w/ 65 levels " ","B ",..: 44 38 12 2 47 38 39 38 4 2 ...
## $ Round.4 : Factor w/ 66 levels " ","B ",..: 44 2 46 41 47 2 49 2 4 41 ...
## $ Round.5 : Factor w/ 58 levels " ","B ",..: 37 36 39 2 38 36 4 36 3 2 ...
## $ Round.6 : Factor w/ 63 levels " ","B ",..: 4 2 44 39 41 2 43 2 42 39 ...
## $ Round.7 : Factor w/ 58 levels " ","B ",..: 4 33 34 2 35 33 3 2 36 2 ...
## $ X : logi NA NA NA NA NA NA ...
Step 2: split chess file int two separate files chess_det & Chess_ref
chess_det<-subset(chess[(which(1:nrow(chess) %% 2 != 0)),])
chess2<-subset(chess[(which(1:nrow(chess) %% 2 == 0)),])
colnames(chess_det)[colnames(chess_det)=="Player.Name..USCF.ID...Rtg..Pre..Post.."] <- "Player_Name"
colnames(chess2)[colnames(chess2)=="Pair.Num"]<-"Country"
chess2$Pair.Num<-1:nrow(chess2)
str (chess)## 'data.frame': 128 obs. of 11 variables:
## $ Pair.Num : Factor w/ 67 levels " 1 "," 2 ",..: 1 67 2 65 3 65 4 65 5 65 ...
## $ Player.Name..USCF.ID...Rtg..Pre..Post..: Factor w/ 128 levels " 10131499 / R: 1610 ->1618 ",..: 88 58 76 27 65 40 115 13 92 29 ...
## $ Total.Pts : Factor w/ 15 levels " ","1.0 ",..: 12 13 12 13 12 13 11 13 11 13 ...
## $ Round.1 : Factor w/ 64 levels " ","B ",..: 48 37 63 2 16 37 40 37 51 2 ...
## $ Round.2 : Factor w/ 66 levels " ","B ",..: 44 2 63 40 65 2 6 2 50 40 ...
## $ Round.3 : Factor w/ 65 levels " ","B ",..: 44 38 12 2 47 38 39 38 4 2 ...
## $ Round.4 : Factor w/ 66 levels " ","B ",..: 44 2 46 41 47 2 49 2 4 41 ...
## $ Round.5 : Factor w/ 58 levels " ","B ",..: 37 36 39 2 38 36 4 36 3 2 ...
## $ Round.6 : Factor w/ 63 levels " ","B ",..: 4 2 44 39 41 2 43 2 42 39 ...
## $ Round.7 : Factor w/ 58 levels " ","B ",..: 4 33 34 2 35 33 3 2 36 2 ...
## $ X : logi NA NA NA NA NA NA ...
step 3: Extact player rating in chess 2 file
library(stringr)
chess2$Rating<-str_extract(chess2$Player.Name..USCF.ID...Rtg..Pre..Post..,":\\s[:digit:]{1,}")
chess2$Rating<-str_extract(chess2$Rating,"[:digit:]{1,}")
str (chess)## 'data.frame': 128 obs. of 11 variables:
## $ Pair.Num : Factor w/ 67 levels " 1 "," 2 ",..: 1 67 2 65 3 65 4 65 5 65 ...
## $ Player.Name..USCF.ID...Rtg..Pre..Post..: Factor w/ 128 levels " 10131499 / R: 1610 ->1618 ",..: 88 58 76 27 65 40 115 13 92 29 ...
## $ Total.Pts : Factor w/ 15 levels " ","1.0 ",..: 12 13 12 13 12 13 11 13 11 13 ...
## $ Round.1 : Factor w/ 64 levels " ","B ",..: 48 37 63 2 16 37 40 37 51 2 ...
## $ Round.2 : Factor w/ 66 levels " ","B ",..: 44 2 63 40 65 2 6 2 50 40 ...
## $ Round.3 : Factor w/ 65 levels " ","B ",..: 44 38 12 2 47 38 39 38 4 2 ...
## $ Round.4 : Factor w/ 66 levels " ","B ",..: 44 2 46 41 47 2 49 2 4 41 ...
## $ Round.5 : Factor w/ 58 levels " ","B ",..: 37 36 39 2 38 36 4 36 3 2 ...
## $ Round.6 : Factor w/ 63 levels " ","B ",..: 4 2 44 39 41 2 43 2 42 39 ...
## $ Round.7 : Factor w/ 58 levels " ","B ",..: 4 33 34 2 35 33 3 2 36 2 ...
## $ X : logi NA NA NA NA NA NA ...
step 4: cretae a refernce file for player rating and country
Rating<-subset(chess2, select=c("Country","Rating","Pair.Num"))step 5: extract opponents player number for each player
chess_det$Opp_R1<-str_extract_all(chess_det$Round.1,"[:digit:]{1,}")
chess_det$Opp_R2<-str_extract_all(chess_det$Round.2,"[:digit:]{1,}")
chess_det$Opp_R3<-str_extract_all(chess_det$Round.3,"[:digit:]{1,}")
chess_det$Opp_R4<-str_extract_all(chess_det$Round.4,"[:digit:]{1,}")
chess_det$Opp_R5<-str_extract_all(chess_det$Round.5,"[:digit:]{1,}")
chess_det$Opp_R6<-str_extract_all(chess_det$Round.6,"[:digit:]{1,}")step 6: Calculate rating of opponents
chess_det$Opp1_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R1,Rating$Pair.Num)])
chess_det$Opp2_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R2,Rating$Pair.Num)])
chess_det$Opp3_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R3,Rating$Pair.Num)])
chess_det$Opp4_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R4,Rating$Pair.Num)])
chess_det$Opp5_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R5,Rating$Pair.Num)])
chess_det$Opp6_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R6,Rating$Pair.Num)])step 7: Calculate rating of opponents
chess_det$Opp1_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R1,Rating$Pair.Num)])
chess_det$Opp2_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R2,Rating$Pair.Num)])
chess_det$Opp3_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R3,Rating$Pair.Num)])
chess_det$Opp4_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R4,Rating$Pair.Num)])
chess_det$Opp5_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R5,Rating$Pair.Num)])
chess_det$Opp6_Rat<-as.numeric(Rating$Rating[match(chess_det$Opp_R6,Rating$Pair.Num)])
chess_det$Opp_total_rat<-rowSums(chess_det[,18:23],na.rm=TRUE)step 8: Calculate no of opponents played with and avg rating
chess_det$opp_count_to<-(ifelse(!is.na(chess_det$Opp1_Rat),1,0)+ifelse(!is.na(chess_det$Opp2_Rat),1,0)+ifelse(!is.na(chess_det$Opp3_Rat),1,0)+ifelse(!is.na(chess_det$Opp4_Rat),1,0)+ifelse(!is.na(chess_det$Opp5_Rat),1,0)+ifelse(!is.na(chess_det$Opp6_Rat),1,0))
chess_det$Avg_Opp_Rat<-chess_det$Opp_total_rat/chess_det$opp_count_tostep 9: create final file with requried columns and extract .csv file
chess_det_temp<-subset(chess_det,select=c("Player_Name","Pair.Num","Total.Pts","Avg_Opp_Rat"))
chess_det_temp$Pair.Num<-as.numeric(chess_det_temp$Pair.Num)
Play_details<-merge(chess_det_temp,Rating, by.chess_det_temp="Pair.Num", by.Rating="Pair.Num")
Play_details<-Play_details[c(2,5,3,6,4)]
write.csv(Play_details,file="Players details.csv")