knitr::opts_chunk$set(echo = TRUE)
library (readr)
library(stringr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dbplyr)
##
## Attaching package: 'dbplyr'
##
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
#urlfile="https://raw.githubusercontent.com/Nhodgkinson/DATA_607_project1/main/tournamentinfo.txt"
tournamentinfo <- paste(readLines("https://raw.githubusercontent.com/Nhodgkinson/DATA_607_project1/main/tournamentinfo.txt"), collapse = '\n')
## Warning in readLines("https://raw.githubusercontent.com/Nhodgkinson/
## DATA_607_project1/main/tournamentinfo.txt"): incomplete final line found
## on 'https://raw.githubusercontent.com/Nhodgkinson/DATA_607_project1/main/
## tournamentinfo.txt'
df<-str_remove_all(tournamentinfo, "(-)\\1{2,}")
mydata<-read_delim(df, delim="|")
## New names:
## Rows: 129 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "|" chr
## (11): Pair , Player Name , Total, Round...4, Round...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `Round` -> `Round...4`
## • `Round` -> `Round...5`
## • `Round` -> `Round...6`
## • `Round` -> `Round...7`
## • `Round` -> `Round...8`
## • `Round` -> `Round...9`
## • `Round` -> `Round...10`
mydata<-mydata[-1,]
String extracting data to create new columns
#player num column returns only digit values
player_num1 <- str_extract(mydata$` Pair `, "\\d+")
mydata$player_num1<-player_num1
#player state column returns non digit values
player_state <- str_extract(mydata$` Pair `, "\\D+")
mydata$player_state<-player_state
#player pre rating column
player_pre_rating <- str_extract(mydata$` Player Name `, "(\\bR: *)\\d{3,}")
#this next line removes the : from the new player_pre_rating value so I only have digits
player_pre_rating <- str_extract(player_pre_rating, "\\d{3,}")
mydata$player_pre_rating<-player_pre_rating
#player name column
player_name <- str_extract(mydata$` Player Name `, "\\D*")
mydata$player_name <-player_name
#player total column
player_total <- str_extract(mydata$Total, "\\d(\\b.)\\d")
mydata$player_total <- player_total
mydata<-subset(mydata, select=-c(1:3))
#Subset data
df<-mydata[!is.na(mydata$player_pre_rating),]
mydata<-subset(mydata, select=-c(8,10,11))
mydata<-mydata[!is.na(mydata$player_num1),]
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
df_new <- cbind(df, mydata)
df_new<-subset(df_new, select=-c(1:9,12,13))
df_new[,c(10:12,1:9)]
## player_num1 player_name player_total player_state
## 1 1 GARY HUA 6.0 ON
## 2 2 DAKSHESH DARURI 6.0 MI
## 3 3 ADITYA BAJAJ 6.0 MI
## 4 4 PATRICK H SCHILLING 5.5 MI
## 5 5 HANSHI ZUO 5.5 MI
## 6 6 HANSEN SONG 5.0 OH
## 7 7 GARY DEE SWATHELL 5.0 MI
## 8 8 EZEKIEL HOUGHTON 5.0 MI
## 9 9 STEFANO LEE 5.0 ON
## 10 10 ANVIT RAO 5.0 MI
## 11 11 CAMERON WILLIAM MC LEMAN 4.5 MI
## 12 12 KENNETH J TACK 4.5 MI
## 13 13 TORRANCE HENRY JR 4.5 MI
## 14 14 BRADLEY SHAW 4.5 MI
## 15 15 ZACHARY JAMES HOUGHTON 4.5 MI
## 16 16 MIKE NIKITIN 4.0 MI
## 17 17 RONALD GRZEGORCZYK 4.0 MI
## 18 18 DAVID SUNDEEN 4.0 MI
## 19 19 DIPANKAR ROY 4.0 MI
## 20 20 JASON ZHENG 4.0 MI
## 21 21 DINH DANG BUI 4.0 ON
## 22 22 EUGENE L MCCLURE 4.0 MI
## 23 23 ALAN BUI 4.0 ON
## 24 24 MICHAEL R ALDRICH 4.0 MI
## 25 25 LOREN SCHWIEBERT 3.5 MI
## 26 26 MAX ZHU 3.5 ON
## 27 27 GAURAV GIDWANI 3.5 MI
## 28 28 SOFIA ADINA STANESCU-BELLU 3.5 MI
## 29 29 CHIEDOZIE OKORIE 3.5 MI
## 30 30 GEORGE AVERY JONES 3.5 ON
## 31 31 RISHI SHETTY 3.5 MI
## 32 32 JOSHUA PHILIP MATHEWS 3.5 ON
## 33 33 JADE GE 3.5 MI
## 34 34 MICHAEL JEFFERY THOMAS 3.5 MI
## 35 35 JOSHUA DAVID LEE 3.5 MI
## 36 36 SIDDHARTH JHA 3.5 MI
## 37 37 AMIYATOSH PWNANANDAM 3.5 MI
## 38 38 BRIAN LIU 3.0 MI
## 39 39 JOEL R HENDON 3.0 MI
## 40 40 FOREST ZHANG 3.0 MI
## 41 41 KYLE WILLIAM MURPHY 3.0 MI
## 42 42 JARED GE 3.0 MI
## 43 43 ROBERT GLEN VASEY 3.0 MI
## 44 44 JUSTIN D SCHILLING 3.0 MI
## 45 45 DEREK YAN 3.0 MI
## 46 46 JACOB ALEXANDER LAVALLEY 3.0 MI
## 47 47 ERIC WRIGHT 2.5 MI
## 48 48 DANIEL KHAIN 2.5 MI
## 49 49 MICHAEL J MARTIN 2.5 MI
## 50 50 SHIVAM JHA 2.5 MI
## 51 51 TEJAS AYYAGARI 2.5 MI
## 52 52 ETHAN GUO 2.5 MI
## 53 53 JOSE C YBARRA 2.0 MI
## 54 54 LARRY HODGE 2.0 MI
## 55 55 ALEX KONG 2.0 MI
## 56 56 MARISA RICCI 2.0 MI
## 57 57 MICHAEL LU 2.0 MI
## 58 58 VIRAJ MOHILE 2.0 MI
## 59 59 SEAN M MC CORMICK 2.0 MI
## 60 60 JULIA SHEN 1.5 MI
## 61 61 JEZZEL FARKAS 1.5 ON
## 62 62 ASHWIN BALAJI 1.0 MI
## 63 63 THOMAS JOSEPH HOSMER 1.0 MI
## 64 64 BEN LI 1.0 MI
## player_pre_rating Round...4 Round...5 Round...6 Round...7 Round...8
## 1 1794 W 39 W 21 W 18 W 14 W 7
## 2 1553 W 63 W 58 L 4 W 17 W 16
## 3 1384 L 8 W 61 W 25 W 21 W 11
## 4 1716 W 23 D 28 W 2 W 26 D 5
## 5 1655 W 45 W 37 D 12 D 13 D 4
## 6 1686 W 34 D 29 L 11 W 35 D 10
## 7 1649 W 57 W 46 W 13 W 11 L 1
## 8 1641 W 3 W 32 L 14 L 9 W 47
## 9 1411 W 25 L 18 W 59 W 8 W 26
## 10 1365 D 16 L 19 W 55 W 31 D 6
## 11 1712 D 38 W 56 W 6 L 7 L 3
## 12 1663 W 42 W 33 D 5 W 38 H
## 13 1666 W 36 W 27 L 7 D 5 W 33
## 14 1610 W 54 W 44 W 8 L 1 D 27
## 15 1220 D 19 L 16 W 30 L 22 W 54
## 16 1604 D 10 W 15 H W 39 L 2
## 17 1629 W 48 W 41 L 26 L 2 W 23
## 18 1600 W 47 W 9 L 1 W 32 L 19
## 19 1564 D 15 W 10 W 52 D 28 W 18
## 20 1595 L 40 W 49 W 23 W 41 W 28
## 21 1563 W 43 L 1 W 47 L 3 W 40
## 22 1555 W 64 D 52 L 28 W 15 H
## 23 1363 L 4 W 43 L 20 W 58 L 17
## 24 1229 L 28 L 47 W 43 L 25 W 60
## 25 1745 L 9 W 53 L 3 W 24 D 34
## 26 1579 W 49 W 40 W 17 L 4 L 9
## 27 1552 W 51 L 13 W 46 W 37 D 14
## 28 1507 W 24 D 4 W 22 D 19 L 20
## 29 1602 W 50 D 6 L 38 L 34 W 52
## 30 1522 L 52 D 64 L 15 W 55 L 31
## 31 1494 L 58 D 55 W 64 L 10 W 30
## 32 1441 W 61 L 8 W 44 L 18 W 51
## 33 1449 W 60 L 12 W 50 D 36 L 13
## 34 1399 L 6 W 60 L 37 W 29 D 25
## 35 1438 L 46 L 38 W 56 L 6 W 57
## 36 1355 L 13 W 57 W 51 D 33 H
## 37 980 B L 5 W 34 L 27 H
## 38 1423 D 11 W 35 W 29 L 12 H
## 39 1436 L 1 W 54 W 40 L 16 W 44
## 40 1348 W 20 L 26 L 39 W 59 L 21
## 41 1403 W 59 L 17 W 58 L 20 X
## 42 1332 L 12 L 50 L 57 D 60 D 61
## 43 1283 L 21 L 23 L 24 W 63 W 59
## 44 1199 B L 14 L 32 W 53 L 39
## 45 1242 L 5 L 51 D 60 L 56 W 63
## 46 377 W 35 L 7 L 27 L 50 W 64
## 47 1362 L 18 W 24 L 21 W 61 L 8
## 48 1382 L 17 W 63 H D 52 H
## 49 1291 L 26 L 20 D 63 D 64 W 58
## 50 1056 L 29 W 42 L 33 W 46 H
## 51 1011 L 27 W 45 L 36 W 57 L 32
## 52 935 W 30 D 22 L 19 D 48 L 29
## 53 1393 H L 25 H L 44 U
## 54 1270 L 14 L 39 L 61 B L 15
## 55 1186 L 62 D 31 L 10 L 30 B
## 56 1153 H L 11 L 35 W 45 H
## 57 1092 L 7 L 36 W 42 L 51 L 35
## 58 917 W 31 L 2 L 41 L 23 L 49
## 59 853 L 41 B L 9 L 40 L 43
## 60 967 L 33 L 34 D 45 D 42 L 24
## 61 955 L 32 L 3 W 54 L 47 D 42
## 62 1530 W 55 U U U U
## 63 1175 L 2 L 48 D 49 L 43 L 45
## 64 1163 L 22 D 30 L 31 D 49 L 46
## Round...9 Round...10
## 1 D 12 D 4
## 2 W 20 W 7
## 3 W 13 W 12
## 4 W 19 D 1
## 5 W 14 W 17
## 6 W 27 W 21
## 7 W 9 L 2
## 8 W 28 W 19
## 9 L 7 W 20
## 10 W 25 W 18
## 11 W 34 W 26
## 12 D 1 L 3
## 13 L 3 W 32
## 14 L 5 W 31
## 15 W 33 W 38
## 16 W 36 U
## 17 W 22 L 5
## 18 W 38 L 10
## 19 L 4 L 8
## 20 L 2 L 9
## 21 W 39 L 6
## 22 L 17 W 40
## 23 W 37 W 46
## 24 W 44 W 39
## 25 L 10 W 47
## 26 D 32 L 11
## 27 L 6 U
## 28 L 8 D 36
## 29 W 48 U
## 30 W 61 W 50
## 31 W 50 L 14
## 32 D 26 L 13
## 33 L 15 W 51
## 34 L 11 W 52
## 35 D 52 W 48
## 36 L 16 D 28
## 37 L 23 W 61
## 38 L 18 L 15
## 39 L 21 L 24
## 40 W 56 L 22
## 41 U U
## 42 W 64 W 56
## 43 L 46 W 55
## 44 L 24 W 59
## 45 D 55 W 58
## 46 W 43 L 23
## 47 D 51 L 25
## 48 L 29 L 35
## 49 H U
## 50 L 31 L 30
## 51 D 47 L 33
## 52 D 35 L 34
## 53 W 57 U
## 54 L 59 W 64
## 55 D 45 L 43
## 56 L 40 L 42
## 57 L 53 B
## 58 B L 45
## 59 W 54 L 44
## 60 H U
## 61 L 30 L 37
## 62 U U
## 63 H U
## 64 L 42 L 54
opp_data <- df_new[,c(10,3:9)]
#naming rows
I renamed the rows and pulled the numeric values out.
player_num <- opp_data$player_num1
opp_data$player_num<-player_num
rnd1 <- str_extract(opp_data$Round...4, "(\\b *)\\d{1,}")
opp_data$rnd1 <-rnd1
rnd2 <- str_extract(opp_data$Round...5, "(\\b *)\\d{1,}")
opp_data$rnd2 <-rnd2
rnd3 <- str_extract(opp_data$Round...6, "(\\b *)\\d{1,}")
opp_data$rnd3 <-rnd3
rnd4 <- str_extract(opp_data$Round...7, "(\\b *)\\d{1,}")
opp_data$rnd4 <-rnd4
rnd5 <- str_extract(opp_data$Round...8, "(\\b *)\\d{1,}")
opp_data$rnd5 <-rnd5
rnd6 <- str_extract(opp_data$Round...9, "(\\b *)\\d{1,}")
opp_data$rnd6 <-rnd6
rnd7 <- str_extract(opp_data$Round...10, "(\\b *)\\d{1,}")
opp_data$rnd7 <-rnd7
opp_data<-subset(opp_data, select=-c(2:9))
df_new$player_pre_rating<- as.integer(df_new$player_pre_rating)
#multiple attempts and not succeeding I looked up a few possible solutions and I’m not able to figure it out. I first start out by removing the N/A and replacing with blanks. Then I got stuck and could not figure this out.
opp_data[is.na(opp_data)] <- ""
opp_data <- opp_data %>%
left_join(df_new, c("player_num1" = "player_num1")) %>% select (player_num1, player_pre_rating) %>%
group_by (player_num1) %>%
summarize(avg_opp_rating= round(mean(player_pre_rating),0))
#Final Ran out of time and could not figure out how to avg the scores. I wanted to create a second table called opp_data and ref the play num on df_new to get an avg score.
chess_final <- df_new %>%
left_join(opp_data, c("player_num1" = "player_num1")) %>% select (player_name, player_state, player_total, player_pre_rating,avg_opp_rating)
#csv
write.csv(chess_final,"tournmnt_data.csv",row.names = FALSE)