Grando Project 1

options(width = 100)
# This is a standard setup I include so that my working
# directory is set correctly whether I work on one of my
# windows or linux machines.
if (Sys.info()["sysname"] == "Windows") {
    setwd("~/Masters/DATA607/Project1")
} else {
    setwd("~/Documents/Masters/DATA607/Project1")
}

Load the project text file

library(stringr)
raw.data <- readLines("./tournamentinfo.txt", warn = FALSE)
head(raw.data)
## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"

Create a regular expression to parse the text file and load the results into a dataframe

get_data <- str_match_all(raw.data, "(\\d+)\\s\\|\\s([A-Za-z]+\\s[A-za-z]+\\s[A-Za-z]*)\\D+?(\\d+\\.?\\d?)\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?|\\s(\\w{2})\\s\\|.*?R\\:\\s+(\\d+)?")
head(unlist(get_data), n = 26)
##  [1] "1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4"
##  [2] "1"                                                                                   
##  [3] "GARY HUA "                                                                           
##  [4] "6.0"                                                                                 
##  [5] "39"                                                                                  
##  [6] "21"                                                                                  
##  [7] "18"                                                                                  
##  [8] "14"                                                                                  
##  [9] "7"                                                                                   
## [10] "12"                                                                                  
## [11] "4"                                                                                   
## [12] NA                                                                                    
## [13] NA                                                                                    
## [14] " ON | 15445895 / R: 1794"                                                            
## [15] NA                                                                                    
## [16] NA                                                                                    
## [17] NA                                                                                    
## [18] NA                                                                                    
## [19] NA                                                                                    
## [20] NA                                                                                    
## [21] NA                                                                                    
## [22] NA                                                                                    
## [23] NA                                                                                    
## [24] NA                                                                                    
## [25] "ON"                                                                                  
## [26] "1794"
m <- matrix(unlist(get_data), ncol = 26, byrow = TRUE)
head(m)
##      [,1]                                                                                   [,2]
## [1,] "1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4" "1" 
## [2,] "2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7" "2" 
## [3,] "3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12" "3" 
## [4,] "4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1" "4" 
## [5,] "5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17" "5" 
## [6,] "6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21" "6" 
##      [,3]                  [,4]  [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] "GARY HUA "           "6.0" "39" "21" "18" "14" "7"  "12"  "4"   NA    NA   
## [2,] "DAKSHESH DARURI "    "6.0" "63" "58" "4"  "17" "16" "20"  "7"   NA    NA   
## [3,] "ADITYA BAJAJ "       "6.0" "8"  "61" "25" "21" "11" "13"  "12"  NA    NA   
## [4,] "PATRICK H SCHILLING" "5.5" "23" "28" "2"  "26" "5"  "19"  "1"   NA    NA   
## [5,] "HANSHI ZUO "         "5.5" "45" "37" "12" "13" "4"  "14"  "17"  NA    NA   
## [6,] "HANSEN SONG "        "5.0" "34" "29" "11" "35" "10" "27"  "21"  NA    NA   
##      [,14]                      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## [1,] " ON | 15445895 / R: 1794" NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "ON" 
## [2,] " MI | 14598900 / R: 1553" NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "MI" 
## [3,] " MI | 14959604 / R: 1384" NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "MI" 
## [4,] " MI | 12616049 / R: 1716" NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "MI" 
## [5,] " MI | 14601533 / R: 1655" NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "MI" 
## [6,] " OH | 15055204 / R: 1686" NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "OH" 
##      [,26] 
## [1,] "1794"
## [2,] "1553"
## [3,] "1384"
## [4,] "1716"
## [5,] "1655"
## [6,] "1686"
player_df <- data.frame(m[, 2:11], m[, 25:26])
names(player_df) <- c("player_index", "player_name", "total_number_points", 
    "round_1", "round_2", "round_3", "round_4", "round_5", "round_6", 
    "round_7", "state", "pre_rating")
head(player_df)
##   player_index         player_name total_number_points round_1 round_2 round_3 round_4 round_5
## 1            1           GARY HUA                  6.0      39      21      18      14       7
## 2            2    DAKSHESH DARURI                  6.0      63      58       4      17      16
## 3            3       ADITYA BAJAJ                  6.0       8      61      25      21      11
## 4            4 PATRICK H SCHILLING                 5.5      23      28       2      26       5
## 5            5         HANSHI ZUO                  5.5      45      37      12      13       4
## 6            6        HANSEN SONG                  5.0      34      29      11      35      10
##   round_6 round_7 state pre_rating
## 1      12       4    ON       1794
## 2      20       7    MI       1553
## 3      13      12    MI       1384
## 4      19       1    MI       1716
## 5      14      17    MI       1655
## 6      27      21    OH       1686

Calculate the average opponenet score, create the requested table, and export it to a csv file

get_avg <- function(r1, r2, r3, r4, r5, r6, r7) {
    r1 <- as.numeric(as.character(player_df[which(player_df$player_index == 
        as.numeric(as.character(r1))), 12]))
    r2 <- as.numeric(as.character(player_df[which(player_df$player_index == 
        as.numeric(as.character(r2))), 12]))
    r3 <- as.numeric(as.character(player_df[which(player_df$player_index == 
        as.numeric(as.character(r3))), 12]))
    r4 <- as.numeric(as.character(player_df[which(player_df$player_index == 
        as.numeric(as.character(r4))), 12]))
    r5 <- as.numeric(as.character(player_df[which(player_df$player_index == 
        as.numeric(as.character(r5))), 12]))
    r6 <- as.numeric(as.character(player_df[which(player_df$player_index == 
        as.numeric(as.character(r6))), 12]))
    r7 <- as.numeric(as.character(player_df[which(player_df$player_index == 
        as.numeric(as.character(r7))), 12]))
    scores <- round(mean(c(r1, r2, r3, r4, r5, r6, r7), na.rm = TRUE), 
        digits = 0)
    return(scores)
}
player_df$avg_opp_pre_rating <- mapply(FUN = get_avg, player_df$round_1, 
    player_df$round_2, player_df$round_3, player_df$round_4, 
    player_df$round_5, player_df$round_6, player_df$round_7)
head(player_df)
##   player_index         player_name total_number_points round_1 round_2 round_3 round_4 round_5
## 1            1           GARY HUA                  6.0      39      21      18      14       7
## 2            2    DAKSHESH DARURI                  6.0      63      58       4      17      16
## 3            3       ADITYA BAJAJ                  6.0       8      61      25      21      11
## 4            4 PATRICK H SCHILLING                 5.5      23      28       2      26       5
## 5            5         HANSHI ZUO                  5.5      45      37      12      13       4
## 6            6        HANSEN SONG                  5.0      34      29      11      35      10
##   round_6 round_7 state pre_rating avg_opp_pre_rating
## 1      12       4    ON       1794               1605
## 2      20       7    MI       1553               1469
## 3      13      12    MI       1384               1564
## 4      19       1    MI       1716               1574
## 5      14      17    MI       1655               1501
## 6      27      21    OH       1686               1519
final_player_df <- data.frame(player_df$player_name, player_df$state, 
    player_df$total_number_points, player_df$pre_rating, player_df$avg_opp_pre_rating)
names(final_player_df) <- c("player_name", "state", "total_number_points", 
    "pre_rating", "avg_opp_pre_rating")
head(final_player_df)
##           player_name state total_number_points pre_rating avg_opp_pre_rating
## 1           GARY HUA     ON                 6.0       1794               1605
## 2    DAKSHESH DARURI     MI                 6.0       1553               1469
## 3       ADITYA BAJAJ     MI                 6.0       1384               1564
## 4 PATRICK H SCHILLING    MI                 5.5       1716               1574
## 5         HANSHI ZUO     MI                 5.5       1655               1501
## 6        HANSEN SONG     OH                 5.0       1686               1519
write.csv(final_player_df, "final_player.csv")