In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
I loaded text file to my github account and R read line from github.
chessGame <- read_lines("https://raw.githubusercontent.com/deepasharma06/Data-607/main/ChessGame.txt", skip=4)
head(chessGame)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] "-----------------------------------------------------------------------------------------"
## [4] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [5] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [6] "-----------------------------------------------------------------------------------------"
pattern_names <- "(?<=\\| )[A-Za-z -]{6,}(?=\\|)"
names <- str_match_all(chessGame, pattern_names)
##names
names2 <- str_trim(names[seq(1, length(names), 3)],
side = c("both","left","right"))
##names2
head(names2)
## [1] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ"
## [4] "PATRICK H SCHILLING" "HANSHI ZUO" "HANSEN SONG"
pattern_states <- "[A-Z]{2}(?=\\s\\|)"
states <- str_match_all(chessGame,pattern_states)
##states
states2 <- str_trim(states[seq(2, length(states), 3)],
side = c("both","left","right"))
##states2
head(states2)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
pattern_points <- "\\d\\.\\d"
points <- str_match_all(chessGame,pattern_points)
points2 <- str_trim(points[seq(1, length(points), 3)],
side = c("both","left","right"))
head(points2)
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0"
## Pattern Rating
pattern_rating <- "((?<=R: )|(?<=R: ))\\d{3,4}"
rating <- str_match_all(chessGame,pattern_rating)
#Remove unneeded rows
rating2 <- rating[seq(2, length(rating), 3)]
#Remove unneeded columns
rating_temp <- lapply(rating2, function(x) x[,-2])
# This to handle the Unrated people
rating_temp[lengths(rating_temp) == 0] <- "0000"
# Trim whitespace
rating2 <- str_trim(rating_temp,side = c("both","left","right"))
# Convert from character to numeric
rating2 <- unlist(lapply(rating2,as.numeric), recursive = FALSE)
head(rating2)
## [1] 1794 1553 1384 1716 1655 1686
# This line extract every 3rd row
MychessGame <- chessGame[seq(1, length(chessGame), 3)]
# This pattern extracts the number that followed a D, W or L"
pattern_games <- "(((?<=W )|(?<=L ))|(?<=D ))\\s{0,3}\\d{0,2}(?=\\|)"
# Results are stored in this list of lists
players_played <- str_match_all(MychessGame,pattern_games)
# Lets remove 2 and 3 column, since I won't use them
players2 <- lapply(players_played, function(x) x[,-c(2:3)])
# Need to trim for whitespace
players2 <- lapply(players2,str_trim)
head(players2)
## [[1]]
## [1] "39" "21" "18" "14" "7" "12" "4"
##
## [[2]]
## [1] "63" "58" "4" "17" "16" "20" "7"
##
## [[3]]
## [1] "8" "61" "25" "21" "11" "13" "12"
##
## [[4]]
## [1] "23" "28" "2" "26" "5" "19" "1"
##
## [[5]]
## [1] "45" "37" "12" "13" "4" "14" "17"
##
## [[6]]
## [1] "34" "29" "11" "35" "10" "27" "21"
index_players <- lapply(players2,as.numeric)
rows_players <- length(index_players)
player_opponents <- vector(mode = "list", length = rows_players)
for (row in 1:rows_players) {
for (col in 1:length(index_players[[row]])) {
player_opponents[[row]][col] <- rating2[[index_players[[row]][col]]]
}
}
# Take mean first and then simplify list of vectors into a single vector
avg_players <- round(unlist(lapply(player_opponents,mean),recursive=FALSE))
head(avg_players)
## [1] 1605 1469 1564 1574 1501 1519
Result<- cbind(names2, states2, points2, rating2, avg_players)
write.csv(Result,"607project1.txt", row.names=FALSE)
##output
head(Result)
## names2 states2 points2 rating2 avg_players
## [1,] "GARY HUA" "ON" "6.0" "1794" "1605"
## [2,] "DAKSHESH DARURI" "MI" "6.0" "1553" "1469"
## [3,] "ADITYA BAJAJ" "MI" "6.0" "1384" "1564"
## [4,] "PATRICK H SCHILLING" "MI" "5.5" "1716" "1574"
## [5,] "HANSHI ZUO" "MI" "5.5" "1655" "1501"
## [6,] "HANSEN SONG" "OH" "5.0" "1686" "1519"
check_csv_file <- read.csv("607project1.txt")
head(check_csv_file)
## names2 states2 points2 rating2 avg_players
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519