Project Question

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

library(stringr)
library(knitr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Sourcing Data

tournamentinfo<-"https://raw.githubusercontent.com/juanellemarks/JuanelleMarks_DATA607_2018/master/tournamentinfo.text"

textFile <- read.csv(tournamentinfo, header = FALSE)

textFile[c(1:10), ]
##  [1] ----------------------------------------------------------------------------------------- 
##  [2]  Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| 
##  [3]  Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | 
##  [4] ----------------------------------------------------------------------------------------- 
##  [5]     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4| 
##  [6]    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    | 
##  [7] ----------------------------------------------------------------------------------------- 
##  [8]     2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7| 
##  [9]    MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    | 
## [10] ----------------------------------------------------------------------------------------- 
## 131 Levels:     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4| ...

Data Cleansing

#remove first four rows as they are unecessary to the solution
textFile1<-textFile[-c(1:4),]
head(textFile1,3)
## [1]     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## [2]    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## [3] -----------------------------------------------------------------------------------------
## 131 Levels:     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4| ...
#Convert from factor to character
textFile1 <- as.character(textFile1)
head(textFile1,3)
## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [3] "-----------------------------------------------------------------------------------------"

Data Manipulation

#extract rows with player information
playerInfo<-textFile1[seq(1, length(textFile1), 3)] 
head(playerInfo,3)
## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [3] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
#extract rows with rating information
ratingInfo<-textFile1[seq(2, length(textFile1), 3)]
head(ratingInfo,3)
## [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [2] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [3] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
#remove unwanted characters from rows
player_id <- as.integer(str_extract(playerInfo, "\\d+"))

playerName <- str_trim(str_extract(playerInfo, "(\\w+\\s){2,3}"))

player_state <- str_extract(ratingInfo, "\\w\\w")
totalPoints<- as.numeric(str_extract(playerInfo, "\\d.\\d"))

pre_Rating<- as.integer(str_extract(str_extract(ratingInfo, "\\D\\d{3,4}\\D"), "\\d+")) 

opponent <- str_extract_all(str_extract_all(playerInfo, "\\d+\\|"), "\\d+")
opponent_count<- sapply(opponent, length)
seq.max <- seq_len(max(opponent_count))
z <- t(sapply(opponent, "[", i = seq.max)) 
textFile2 <- as.data.frame(z)


y <- data.frame(player_id, pre_Rating)
textFile3 <- textFile2
textFile3[] <- y$pre_Rating[match(unlist(textFile3), y$player_id)]
kable(head(textFile3))
V1 V2 V3 V4 V5 V6 V7
1436 1563 1600 1610 1649 1663 1716
1175 917 1716 1629 1604 1595 1649
1641 955 1745 1563 1712 1666 1663
1363 1507 1553 1579 1655 1564 1794
1242 980 1663 1666 1716 1610 1629
1399 1602 1712 1438 1365 1552 1563
#  Finding average pre-Chess rating of opponents
textFile3$oppAvg <- round(apply(textFile3, 1, mean, na.rm=TRUE))
kable(head(textFile3))
V1 V2 V3 V4 V5 V6 V7 oppAvg
1436 1563 1600 1610 1649 1663 1716 1605
1175 917 1716 1629 1604 1595 1649 1469
1641 955 1745 1563 1712 1666 1663 1564
1363 1507 1553 1579 1655 1564 1794 1574
1242 980 1663 1666 1716 1610 1629 1501
1399 1602 1712 1438 1365 1552 1563 1519
final_textFile <- data_frame(player_id, playerName, player_state , totalPoints, pre_Rating, textFile3$oppAvg )

colnames(final_textFile) <-  c("ID", "Player_Name", "Player_State", "Total_Points", "Pre_Match_Rating", "Opponent's_Average")

kable(head(final_textFile, 10))
ID Player_Name Player_State Total_Points Pre_Match_Rating Opponent’s_Average
1 GARY HUA ON 6.0 1794 1605
2 DAKSHESH DARURI MI 6.0 1553 1469
3 ADITYA BAJAJ MI 6.0 1384 1564
4 PATRICK H SCHILLING MI 5.5 1716 1574
5 HANSHI ZUO MI 5.5 1655 1501
6 HANSEN SONG OH 5.0 1686 1519
7 GARY DEE SWATHELL MI 5.0 1649 1372
8 EZEKIEL HOUGHTON MI 5.0 1641 1468
9 STEFANO LEE ON 5.0 1411 1523
10 ANVIT RAO MI 5.0 1365 1554

Write to CSV File

write.csv(final_textFile,file="/Users/juanelle/Desktop/MSDS/Data607/Week Three/Project 1/DATA607_ProjectOne_Newtxtfile")