1.read lines of data, store lines into two lists based on number of lines for each records.
raw.data <- readLines("https://raw.githubusercontent.com/xiaoxiaogao-DD/DATA607-Project1/master/tournamentinfo.txt")
i <- 5
j <- 6
index <- 1
len <- length(raw.data)
first_line <- vector()
second_line <- vector()
while (i < len| j < len) {
first_line[index] <- raw.data[i]#store each first line of each record in the list
i <- i + 3
second_line[index] <- raw.data[j]#same for each second line
j <- j + 3
index <- index + 1
}
2.extract Player’s Name, Player’s State, Total Number of Points and Player’s Pre-Rating.
library(stringr)
i <- 1
player_number <- vector()
player_name <- vector()
player_state <- vector()
player_point <- vector()
player_prerating <- vector()
while (i <= length(first_line)) {
player_number[i] <- str_trim(str_sub(first_line[i],4,5),"left")
player_name[i] <- str_trim(str_sub(first_line[i],8,40),"both")
player_state[i] <- str_sub(second_line[i],4,5)
player_point[i] <- as.numeric(str_sub(first_line[i],42,44))
player_prerating[i] <- as.numeric(str_trim(str_sub(second_line[i],23,26),"left"))
i <- i + 1
}
head(player_number)
## [1] "1" "2" "3" "4" "5" "6"
length(player_number)
## [1] 64
head(player_name)
## [1] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ"
## [4] "PATRICK H SCHILLING" "HANSHI ZUO" "HANSEN SONG"
length(player_name)
## [1] 64
head(player_state)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
length(player_state)
## [1] 64
head(player_point)
## [1] 6.0 6.0 6.0 5.5 5.5 5.0
length(player_point)
## [1] 64
head(player_prerating)
## [1] 1794 1553 1384 1716 1655 1686
length(player_prerating)
## [1] 64
extract player’s number of each opponent.
i <- 1
opponent_number <- list(0)
while (i <= length(first_line)) {
game1 <- as.numeric(str_trim(str_sub(first_line[i],51,52),"both"))
game2 <- as.numeric(str_trim(str_sub(first_line[i],57,58),"both"))
game3 <- as.numeric(str_trim(str_sub(first_line[i],63,64),"both"))
game4 <- as.numeric(str_trim(str_sub(first_line[i],69,70),"both"))
game5 <- as.numeric(str_trim(str_sub(first_line[i],75,76),"both"))
game6 <- as.numeric(str_trim(str_sub(first_line[i],81,82),"both"))
game7 <- as.numeric(str_trim(str_sub(first_line[i],87,88),"both"))
opponent_number[[i]] <- c(game1,game2,game3,game4,game5,game6,game7)
i <- i + 1
}
head(opponent_number)
## [[1]]
## [1] 39 21 18 14 7 12 4
##
## [[2]]
## [1] 63 58 4 17 16 20 7
##
## [[3]]
## [1] 8 61 25 21 11 13 12
##
## [[4]]
## [1] 23 28 2 26 5 19 1
##
## [[5]]
## [1] 45 37 12 13 4 14 17
##
## [[6]]
## [1] 34 29 11 35 10 27 21
length(opponent_number)
## [1] 64
3.Create a dataframe
Player_raw <- data.frame(player_number,player_name,player_state,player_point,player_prerating)
head(Player_raw)
## player_number player_name player_state player_point
## 1 1 GARY HUA ON 6.0
## 2 2 DAKSHESH DARURI MI 6.0
## 3 3 ADITYA BAJAJ MI 6.0
## 4 4 PATRICK H SCHILLING MI 5.5
## 5 5 HANSHI ZUO MI 5.5
## 6 6 HANSEN SONG OH 5.0
## player_prerating
## 1 1794
## 2 1553
## 3 1384
## 4 1716
## 5 1655
## 6 1686
summary(Player_raw)
## player_number player_name player_state player_point
## 1 : 1 ADITYA BAJAJ : 1 MI:55 Min. :1.000
## 10 : 1 ALAN BUI : 1 OH: 1 1st Qu.:2.500
## 11 : 1 ALEX KONG : 1 ON: 8 Median :3.500
## 12 : 1 AMIYATOSH PWNANANDAM: 1 Mean :3.438
## 13 : 1 ANVIT RAO : 1 3rd Qu.:4.000
## 14 : 1 ASHWIN BALAJI : 1 Max. :6.000
## (Other):58 (Other) :58
## player_prerating
## Min. : 377
## 1st Qu.:1227
## Median :1407
## Mean :1378
## 3rd Qu.:1583
## Max. :1794
##
Player_raw$player_prerating[55]
## [1] 1186
4.Create the opponent_prerating column
opponent_prerating <- vector()
opponent_length <- 0
opponent_prerating_sum <- 0
i <- 1
j <- 1
while (i <= length(player_number)) {
while (j <= length(opponent_number[[i]]) ) {
temp <- opponent_number[[i]]
if (is.na(temp[j]) == FALSE) {
opponent_prerating_sum <- opponent_prerating_sum + Player_raw$player_prerating[Player_raw$player_number==temp[j]]
opponent_length <- opponent_length + 1
}
j <- j +1
}
if (opponent_length != 0) {
opponent_prerating[i] <- round((opponent_prerating_sum/opponent_length))
opponent_prerating_sum <- 0
opponent_length <- 0
}
i <- i + 1
j <- 1
}
head(opponent_prerating)
## [1] 1605 1469 1564 1574 1501 1519
5.add the opponent_prerating column to Player dataframe so that the final dataframe includes:
Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
Player <- data.frame(Player_raw,opponent_prerating)
head(Player)
## player_number player_name player_state player_point
## 1 1 GARY HUA ON 6.0
## 2 2 DAKSHESH DARURI MI 6.0
## 3 3 ADITYA BAJAJ MI 6.0
## 4 4 PATRICK H SCHILLING MI 5.5
## 5 5 HANSHI ZUO MI 5.5
## 6 6 HANSEN SONG OH 5.0
## player_prerating opponent_prerating
## 1 1794 1605
## 2 1553 1469
## 3 1384 1564
## 4 1716 1574
## 5 1655 1501
## 6 1686 1519
6.generate a .csv file
write.csv(Player,file = "607_Project1_chess_player.csv",row.names = FALSE)