MSDS Spring 2018

DATA 607 Data Aquisition and Management

Jiadi Li

Week 4 Project 1:R Character Manipulation and Date Processing - Chess Tournament Cross Table

1.read lines of data, store lines into two lists based on number of lines for each records.

raw.data <- readLines("https://raw.githubusercontent.com/xiaoxiaogao-DD/DATA607-Project1/master/tournamentinfo.txt")

i <- 5
j <- 6
index <- 1
len <- length(raw.data)

first_line <- vector()
second_line <- vector()

while (i < len| j < len)  {
  first_line[index] <- raw.data[i]#store each first line of each record in the list
  i <- i + 3

  second_line[index] <- raw.data[j]#same for each second line
  j <- j + 3
  index <- index + 1
}

2.extract Player’s Name, Player’s State, Total Number of Points and Player’s Pre-Rating.

library(stringr)

i <- 1
player_number <- vector()
player_name <- vector()
player_state <- vector()
player_point <- vector()
player_prerating <- vector()

while (i <= length(first_line)) {
  player_number[i] <- str_trim(str_sub(first_line[i],4,5),"left")
  player_name[i] <- str_trim(str_sub(first_line[i],8,40),"both")
  player_state[i] <- str_sub(second_line[i],4,5)
  player_point[i] <- as.numeric(str_sub(first_line[i],42,44))
  player_prerating[i] <- as.numeric(str_trim(str_sub(second_line[i],23,26),"left"))
  i <- i + 1
}

head(player_number)
## [1] "1" "2" "3" "4" "5" "6"
length(player_number)
## [1] 64
head(player_name)
## [1] "GARY HUA"            "DAKSHESH DARURI"     "ADITYA BAJAJ"       
## [4] "PATRICK H SCHILLING" "HANSHI ZUO"          "HANSEN SONG"
length(player_name)
## [1] 64
head(player_state)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
length(player_state)
## [1] 64
head(player_point)
## [1] 6.0 6.0 6.0 5.5 5.5 5.0
length(player_point)
## [1] 64
head(player_prerating)
## [1] 1794 1553 1384 1716 1655 1686
length(player_prerating)
## [1] 64

extract player’s number of each opponent.

i <- 1
opponent_number <- list(0)

while (i <= length(first_line)) {
  game1 <- as.numeric(str_trim(str_sub(first_line[i],51,52),"both"))
  game2 <- as.numeric(str_trim(str_sub(first_line[i],57,58),"both"))
  game3 <- as.numeric(str_trim(str_sub(first_line[i],63,64),"both"))
  game4 <- as.numeric(str_trim(str_sub(first_line[i],69,70),"both"))
  game5 <- as.numeric(str_trim(str_sub(first_line[i],75,76),"both"))
  game6 <- as.numeric(str_trim(str_sub(first_line[i],81,82),"both"))
  game7 <- as.numeric(str_trim(str_sub(first_line[i],87,88),"both"))
  
  opponent_number[[i]] <- c(game1,game2,game3,game4,game5,game6,game7)
  
  i <- i + 1
}

head(opponent_number)
## [[1]]
## [1] 39 21 18 14  7 12  4
## 
## [[2]]
## [1] 63 58  4 17 16 20  7
## 
## [[3]]
## [1]  8 61 25 21 11 13 12
## 
## [[4]]
## [1] 23 28  2 26  5 19  1
## 
## [[5]]
## [1] 45 37 12 13  4 14 17
## 
## [[6]]
## [1] 34 29 11 35 10 27 21
length(opponent_number)
## [1] 64

3.Create a dataframe

Player_raw <- data.frame(player_number,player_name,player_state,player_point,player_prerating)
head(Player_raw)
##   player_number         player_name player_state player_point
## 1             1            GARY HUA           ON          6.0
## 2             2     DAKSHESH DARURI           MI          6.0
## 3             3        ADITYA BAJAJ           MI          6.0
## 4             4 PATRICK H SCHILLING           MI          5.5
## 5             5          HANSHI ZUO           MI          5.5
## 6             6         HANSEN SONG           OH          5.0
##   player_prerating
## 1             1794
## 2             1553
## 3             1384
## 4             1716
## 5             1655
## 6             1686
summary(Player_raw)
##  player_number               player_name player_state  player_point  
##  1      : 1    ADITYA BAJAJ        : 1   MI:55        Min.   :1.000  
##  10     : 1    ALAN BUI            : 1   OH: 1        1st Qu.:2.500  
##  11     : 1    ALEX KONG           : 1   ON: 8        Median :3.500  
##  12     : 1    AMIYATOSH PWNANANDAM: 1                Mean   :3.438  
##  13     : 1    ANVIT RAO           : 1                3rd Qu.:4.000  
##  14     : 1    ASHWIN BALAJI       : 1                Max.   :6.000  
##  (Other):58    (Other)             :58                               
##  player_prerating
##  Min.   : 377    
##  1st Qu.:1227    
##  Median :1407    
##  Mean   :1378    
##  3rd Qu.:1583    
##  Max.   :1794    
## 
Player_raw$player_prerating[55]
## [1] 1186

4.Create the opponent_prerating column

opponent_prerating <- vector()
opponent_length <- 0
opponent_prerating_sum <- 0
i <- 1
j <- 1

while (i <= length(player_number)) {
  while (j <= length(opponent_number[[i]]) ) {
      temp <- opponent_number[[i]]
      if (is.na(temp[j]) == FALSE) {
        opponent_prerating_sum <- opponent_prerating_sum + Player_raw$player_prerating[Player_raw$player_number==temp[j]]
        opponent_length <- opponent_length + 1
      }
      j <- j +1
    }
  if (opponent_length != 0) {
    opponent_prerating[i] <- round((opponent_prerating_sum/opponent_length))
    opponent_prerating_sum <- 0
    opponent_length <- 0
  }
  i <- i + 1
  j <- 1
}

head(opponent_prerating)
## [1] 1605 1469 1564 1574 1501 1519

5.add the opponent_prerating column to Player dataframe so that the final dataframe includes:
Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents

Player <- data.frame(Player_raw,opponent_prerating)
head(Player)
##   player_number         player_name player_state player_point
## 1             1            GARY HUA           ON          6.0
## 2             2     DAKSHESH DARURI           MI          6.0
## 3             3        ADITYA BAJAJ           MI          6.0
## 4             4 PATRICK H SCHILLING           MI          5.5
## 5             5          HANSHI ZUO           MI          5.5
## 6             6         HANSEN SONG           OH          5.0
##   player_prerating opponent_prerating
## 1             1794               1605
## 2             1553               1469
## 3             1384               1564
## 4             1716               1574
## 5             1655               1501
## 6             1686               1519

6.generate a .csv file

write.csv(Player,file = "607_Project1_chess_player.csv",row.names = FALSE)