knitr::opts_chunk$set(echo = TRUE)
library (readr)
library(stringr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dbplyr)
## 
## Attaching package: 'dbplyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
#urlfile="https://raw.githubusercontent.com/Nhodgkinson/DATA_607_project1/main/tournamentinfo.txt"




tournamentinfo <- paste(readLines("https://raw.githubusercontent.com/Nhodgkinson/DATA_607_project1/main/tournamentinfo.txt"), collapse = '\n')
## Warning in readLines("https://raw.githubusercontent.com/Nhodgkinson/
## DATA_607_project1/main/tournamentinfo.txt"): incomplete final line found
## on 'https://raw.githubusercontent.com/Nhodgkinson/DATA_607_project1/main/
## tournamentinfo.txt'
df<-str_remove_all(tournamentinfo, "(-)\\1{2,}")

mydata<-read_delim(df, delim="|")
## New names:
## Rows: 129 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "|" chr
## (11): Pair , Player Name , Total, Round...4, Round...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `Round` -> `Round...4`
## • `Round` -> `Round...5`
## • `Round` -> `Round...6`
## • `Round` -> `Round...7`
## • `Round` -> `Round...8`
## • `Round` -> `Round...9`
## • `Round` -> `Round...10`
mydata<-mydata[-1,]

R Markdown

String extracting data to create new columns

#player num column returns only digit values
player_num1 <- str_extract(mydata$` Pair `, "\\d+")
mydata$player_num1<-player_num1

#player state column returns non digit values 
player_state <- str_extract(mydata$` Pair `, "\\D+")
mydata$player_state<-player_state


#player pre rating column
player_pre_rating <- str_extract(mydata$` Player Name                     `, "(\\bR: *)\\d{3,}")
#this next line removes the : from the new player_pre_rating value so I only have digits
player_pre_rating <- str_extract(player_pre_rating, "\\d{3,}")
mydata$player_pre_rating<-player_pre_rating

#player name column
player_name <- str_extract(mydata$` Player Name                     `, "\\D*")
mydata$player_name <-player_name

#player total column
player_total <- str_extract(mydata$Total, "\\d(\\b.)\\d")
mydata$player_total <- player_total


mydata<-subset(mydata, select=-c(1:3))

#Subset data

df<-mydata[!is.na(mydata$player_pre_rating),]
mydata<-subset(mydata, select=-c(8,10,11))

mydata<-mydata[!is.na(mydata$player_num1),]

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

df_new <- cbind(df, mydata)
df_new<-subset(df_new, select=-c(1:9,12,13))
df_new[,c(10:12,1:9)]
##    player_num1                       player_name player_total player_state
## 1            1  GARY HUA                                  6.0          ON 
## 2            2  DAKSHESH DARURI                           6.0          MI 
## 3            3  ADITYA BAJAJ                              6.0          MI 
## 4            4  PATRICK H SCHILLING                       5.5          MI 
## 5            5  HANSHI ZUO                                5.5          MI 
## 6            6  HANSEN SONG                               5.0          OH 
## 7            7  GARY DEE SWATHELL                         5.0          MI 
## 8            8  EZEKIEL HOUGHTON                          5.0          MI 
## 9            9  STEFANO LEE                               5.0          ON 
## 10          10  ANVIT RAO                                 5.0          MI 
## 11          11  CAMERON WILLIAM MC LEMAN                  4.5          MI 
## 12          12  KENNETH J TACK                            4.5          MI 
## 13          13  TORRANCE HENRY JR                         4.5          MI 
## 14          14  BRADLEY SHAW                              4.5          MI 
## 15          15  ZACHARY JAMES HOUGHTON                    4.5          MI 
## 16          16  MIKE NIKITIN                              4.0          MI 
## 17          17  RONALD GRZEGORCZYK                        4.0          MI 
## 18          18  DAVID SUNDEEN                             4.0          MI 
## 19          19  DIPANKAR ROY                              4.0          MI 
## 20          20  JASON ZHENG                               4.0          MI 
## 21          21  DINH DANG BUI                             4.0          ON 
## 22          22  EUGENE L MCCLURE                          4.0          MI 
## 23          23  ALAN BUI                                  4.0          ON 
## 24          24  MICHAEL R ALDRICH                         4.0          MI 
## 25          25  LOREN SCHWIEBERT                          3.5          MI 
## 26          26  MAX ZHU                                   3.5          ON 
## 27          27  GAURAV GIDWANI                            3.5          MI 
## 28          28  SOFIA ADINA STANESCU-BELLU                3.5          MI 
## 29          29  CHIEDOZIE OKORIE                          3.5          MI 
## 30          30  GEORGE AVERY JONES                        3.5          ON 
## 31          31  RISHI SHETTY                              3.5          MI 
## 32          32  JOSHUA PHILIP MATHEWS                     3.5          ON 
## 33          33  JADE GE                                   3.5          MI 
## 34          34  MICHAEL JEFFERY THOMAS                    3.5          MI 
## 35          35  JOSHUA DAVID LEE                          3.5          MI 
## 36          36  SIDDHARTH JHA                             3.5          MI 
## 37          37  AMIYATOSH PWNANANDAM                      3.5          MI 
## 38          38  BRIAN LIU                                 3.0          MI 
## 39          39  JOEL R HENDON                             3.0          MI 
## 40          40  FOREST ZHANG                              3.0          MI 
## 41          41  KYLE WILLIAM MURPHY                       3.0          MI 
## 42          42  JARED GE                                  3.0          MI 
## 43          43  ROBERT GLEN VASEY                         3.0          MI 
## 44          44  JUSTIN D SCHILLING                        3.0          MI 
## 45          45  DEREK YAN                                 3.0          MI 
## 46          46  JACOB ALEXANDER LAVALLEY                  3.0          MI 
## 47          47  ERIC WRIGHT                               2.5          MI 
## 48          48  DANIEL KHAIN                              2.5          MI 
## 49          49  MICHAEL J MARTIN                          2.5          MI 
## 50          50  SHIVAM JHA                                2.5          MI 
## 51          51  TEJAS AYYAGARI                            2.5          MI 
## 52          52  ETHAN GUO                                 2.5          MI 
## 53          53  JOSE C YBARRA                             2.0          MI 
## 54          54  LARRY HODGE                               2.0          MI 
## 55          55  ALEX KONG                                 2.0          MI 
## 56          56  MARISA RICCI                              2.0          MI 
## 57          57  MICHAEL LU                                2.0          MI 
## 58          58  VIRAJ MOHILE                              2.0          MI 
## 59          59  SEAN M MC CORMICK                         2.0          MI 
## 60          60  JULIA SHEN                                1.5          MI 
## 61          61  JEZZEL FARKAS                             1.5          ON 
## 62          62  ASHWIN BALAJI                             1.0          MI 
## 63          63  THOMAS JOSEPH HOSMER                      1.0          MI 
## 64          64  BEN LI                                    1.0          MI 
##    player_pre_rating Round...4 Round...5 Round...6 Round...7 Round...8
## 1               1794     W  39     W  21     W  18     W  14     W   7
## 2               1553     W  63     W  58     L   4     W  17     W  16
## 3               1384     L   8     W  61     W  25     W  21     W  11
## 4               1716     W  23     D  28     W   2     W  26     D   5
## 5               1655     W  45     W  37     D  12     D  13     D   4
## 6               1686     W  34     D  29     L  11     W  35     D  10
## 7               1649     W  57     W  46     W  13     W  11     L   1
## 8               1641     W   3     W  32     L  14     L   9     W  47
## 9               1411     W  25     L  18     W  59     W   8     W  26
## 10              1365     D  16     L  19     W  55     W  31     D   6
## 11              1712     D  38     W  56     W   6     L   7     L   3
## 12              1663     W  42     W  33     D   5     W  38     H    
## 13              1666     W  36     W  27     L   7     D   5     W  33
## 14              1610     W  54     W  44     W   8     L   1     D  27
## 15              1220     D  19     L  16     W  30     L  22     W  54
## 16              1604     D  10     W  15     H         W  39     L   2
## 17              1629     W  48     W  41     L  26     L   2     W  23
## 18              1600     W  47     W   9     L   1     W  32     L  19
## 19              1564     D  15     W  10     W  52     D  28     W  18
## 20              1595     L  40     W  49     W  23     W  41     W  28
## 21              1563     W  43     L   1     W  47     L   3     W  40
## 22              1555     W  64     D  52     L  28     W  15     H    
## 23              1363     L   4     W  43     L  20     W  58     L  17
## 24              1229     L  28     L  47     W  43     L  25     W  60
## 25              1745     L   9     W  53     L   3     W  24     D  34
## 26              1579     W  49     W  40     W  17     L   4     L   9
## 27              1552     W  51     L  13     W  46     W  37     D  14
## 28              1507     W  24     D   4     W  22     D  19     L  20
## 29              1602     W  50     D   6     L  38     L  34     W  52
## 30              1522     L  52     D  64     L  15     W  55     L  31
## 31              1494     L  58     D  55     W  64     L  10     W  30
## 32              1441     W  61     L   8     W  44     L  18     W  51
## 33              1449     W  60     L  12     W  50     D  36     L  13
## 34              1399     L   6     W  60     L  37     W  29     D  25
## 35              1438     L  46     L  38     W  56     L   6     W  57
## 36              1355     L  13     W  57     W  51     D  33     H    
## 37               980     B         L   5     W  34     L  27     H    
## 38              1423     D  11     W  35     W  29     L  12     H    
## 39              1436     L   1     W  54     W  40     L  16     W  44
## 40              1348     W  20     L  26     L  39     W  59     L  21
## 41              1403     W  59     L  17     W  58     L  20     X    
## 42              1332     L  12     L  50     L  57     D  60     D  61
## 43              1283     L  21     L  23     L  24     W  63     W  59
## 44              1199     B         L  14     L  32     W  53     L  39
## 45              1242     L   5     L  51     D  60     L  56     W  63
## 46               377     W  35     L   7     L  27     L  50     W  64
## 47              1362     L  18     W  24     L  21     W  61     L   8
## 48              1382     L  17     W  63     H         D  52     H    
## 49              1291     L  26     L  20     D  63     D  64     W  58
## 50              1056     L  29     W  42     L  33     W  46     H    
## 51              1011     L  27     W  45     L  36     W  57     L  32
## 52               935     W  30     D  22     L  19     D  48     L  29
## 53              1393     H         L  25     H         L  44     U    
## 54              1270     L  14     L  39     L  61     B         L  15
## 55              1186     L  62     D  31     L  10     L  30     B    
## 56              1153     H         L  11     L  35     W  45     H    
## 57              1092     L   7     L  36     W  42     L  51     L  35
## 58               917     W  31     L   2     L  41     L  23     L  49
## 59               853     L  41     B         L   9     L  40     L  43
## 60               967     L  33     L  34     D  45     D  42     L  24
## 61               955     L  32     L   3     W  54     L  47     D  42
## 62              1530     W  55     U         U         U         U    
## 63              1175     L   2     L  48     D  49     L  43     L  45
## 64              1163     L  22     D  30     L  31     D  49     L  46
##    Round...9 Round...10
## 1      D  12      D   4
## 2      W  20      W   7
## 3      W  13      W  12
## 4      W  19      D   1
## 5      W  14      W  17
## 6      W  27      W  21
## 7      W   9      L   2
## 8      W  28      W  19
## 9      L   7      W  20
## 10     W  25      W  18
## 11     W  34      W  26
## 12     D   1      L   3
## 13     L   3      W  32
## 14     L   5      W  31
## 15     W  33      W  38
## 16     W  36      U    
## 17     W  22      L   5
## 18     W  38      L  10
## 19     L   4      L   8
## 20     L   2      L   9
## 21     W  39      L   6
## 22     L  17      W  40
## 23     W  37      W  46
## 24     W  44      W  39
## 25     L  10      W  47
## 26     D  32      L  11
## 27     L   6      U    
## 28     L   8      D  36
## 29     W  48      U    
## 30     W  61      W  50
## 31     W  50      L  14
## 32     D  26      L  13
## 33     L  15      W  51
## 34     L  11      W  52
## 35     D  52      W  48
## 36     L  16      D  28
## 37     L  23      W  61
## 38     L  18      L  15
## 39     L  21      L  24
## 40     W  56      L  22
## 41     U          U    
## 42     W  64      W  56
## 43     L  46      W  55
## 44     L  24      W  59
## 45     D  55      W  58
## 46     W  43      L  23
## 47     D  51      L  25
## 48     L  29      L  35
## 49     H          U    
## 50     L  31      L  30
## 51     D  47      L  33
## 52     D  35      L  34
## 53     W  57      U    
## 54     L  59      W  64
## 55     D  45      L  43
## 56     L  40      L  42
## 57     L  53      B    
## 58     B          L  45
## 59     W  54      L  44
## 60     H          U    
## 61     L  30      L  37
## 62     U          U    
## 63     H          U    
## 64     L  42      L  54
opp_data <- df_new[,c(10,3:9)] 

#naming rows

I renamed the rows and pulled the numeric values out.

player_num <- opp_data$player_num1
opp_data$player_num<-player_num

rnd1 <- str_extract(opp_data$Round...4, "(\\b *)\\d{1,}")
opp_data$rnd1 <-rnd1

rnd2 <- str_extract(opp_data$Round...5, "(\\b *)\\d{1,}")
opp_data$rnd2 <-rnd2

rnd3 <- str_extract(opp_data$Round...6, "(\\b *)\\d{1,}")
opp_data$rnd3 <-rnd3

rnd4 <- str_extract(opp_data$Round...7, "(\\b *)\\d{1,}")
opp_data$rnd4 <-rnd4

rnd5 <- str_extract(opp_data$Round...8, "(\\b *)\\d{1,}")
opp_data$rnd5 <-rnd5

rnd6 <- str_extract(opp_data$Round...9, "(\\b *)\\d{1,}")
opp_data$rnd6 <-rnd6

rnd7 <- str_extract(opp_data$Round...10, "(\\b *)\\d{1,}")
opp_data$rnd7 <-rnd7

opp_data<-subset(opp_data, select=-c(2:9))

df_new$player_pre_rating<- as.integer(df_new$player_pre_rating)

#multiple attempts and not succeeding I looked up a few possible solutions and I’m not able to figure it out. I first start out by removing the N/A and replacing with blanks. Then I got stuck and could not figure this out.

opp_data[is.na(opp_data)] <- ""

opp_data <- opp_data %>%
  left_join(df_new, c("player_num1" = "player_num1")) %>% select (player_num1, player_pre_rating) %>%
  group_by (player_num1) %>%
  summarize(avg_opp_rating=  round(mean(player_pre_rating),0))

#Final Ran out of time and could not figure out how to avg the scores. I wanted to create a second table called opp_data and ref the play num on df_new to get an avg score.

chess_final <- df_new %>%
  left_join(opp_data, c("player_num1" = "player_num1")) %>% select (player_name, player_state, player_total, player_pre_rating,avg_opp_rating)

#csv

write.csv(chess_final,"tournmnt_data.csv",row.names = FALSE)