DATA607-Project 1

Project Question

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

library(stringr)
library(knitr)
library(dplyr)

## Warning: package 'dplyr' was built under R version 3.4.4

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Sourcing Data

tournamentinfo<-"https://raw.githubusercontent.com/juanellemarks/JuanelleMarks_DATA607_2018/master/tournamentinfo.text"

textFile <- read.csv(tournamentinfo, header = FALSE)

textFile[c(1:10), ]

##  [1] ----------------------------------------------------------------------------------------- 
##  [2]  Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| 
##  [3]  Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | 
##  [4] ----------------------------------------------------------------------------------------- 
##  [5]     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4| 
##  [6]    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    | 
##  [7] ----------------------------------------------------------------------------------------- 
##  [8]     2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7| 
##  [9]    MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    | 
## [10] ----------------------------------------------------------------------------------------- 
## 131 Levels:     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4| ...

Data Cleansing

#remove first four rows as they are unecessary to the solution
textFile1<-textFile[-c(1:4),]
head(textFile1,3)

## [1]     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## [2]    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## [3] -----------------------------------------------------------------------------------------
## 131 Levels:     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4| ...

#Convert from factor to character
textFile1 <- as.character(textFile1)
head(textFile1,3)

## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [3] "-----------------------------------------------------------------------------------------"

Data Manipulation

#extract rows with player information
playerInfo<-textFile1[seq(1, length(textFile1), 3)] 
head(playerInfo,3)

## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [3] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"

#extract rows with rating information
ratingInfo<-textFile1[seq(2, length(textFile1), 3)]
head(ratingInfo,3)

## [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [2] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [3] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"

#remove unwanted characters from rows
player_id <- as.integer(str_extract(playerInfo, "\\d+"))

playerName <- str_trim(str_extract(playerInfo, "(\\w+\\s){2,3}"))

player_state <- str_extract(ratingInfo, "\\w\\w")
totalPoints<- as.numeric(str_extract(playerInfo, "\\d.\\d"))

pre_Rating<- as.integer(str_extract(str_extract(ratingInfo, "\\D\\d{3,4}\\D"), "\\d+")) 

opponent <- str_extract_all(str_extract_all(playerInfo, "\\d+\\|"), "\\d+")

opponent_count<- sapply(opponent, length)
seq.max <- seq_len(max(opponent_count))
z <- t(sapply(opponent, "[", i = seq.max)) 
textFile2 <- as.data.frame(z)


y <- data.frame(player_id, pre_Rating)
textFile3 <- textFile2
textFile3[] <- y$pre_Rating[match(unlist(textFile3), y$player_id)]
kable(head(textFile3))

V1	V2	V3	V4	V5	V6	V7
1436	1563	1600	1610	1649	1663	1716
1175	917	1716	1629	1604	1595	1649
1641	955	1745	1563	1712	1666	1663
1363	1507	1553	1579	1655	1564	1794
1242	980	1663	1666	1716	1610	1629
1399	1602	1712	1438	1365	1552	1563

#  Finding average pre-Chess rating of opponents
textFile3$oppAvg <- round(apply(textFile3, 1, mean, na.rm=TRUE))
kable(head(textFile3))

V1	V2	V3	V4	V5	V6	V7	oppAvg
1436	1563	1600	1610	1649	1663	1716	1605
1175	917	1716	1629	1604	1595	1649	1469
1641	955	1745	1563	1712	1666	1663	1564
1363	1507	1553	1579	1655	1564	1794	1574
1242	980	1663	1666	1716	1610	1629	1501
1399	1602	1712	1438	1365	1552	1563	1519

final_textFile <- data_frame(player_id, playerName, player_state , totalPoints, pre_Rating, textFile3$oppAvg )

colnames(final_textFile) <-  c("ID", "Player_Name", "Player_State", "Total_Points", "Pre_Match_Rating", "Opponent's_Average")

kable(head(final_textFile, 10))

ID	Player_Name	Player_State	Total_Points	Pre_Match_Rating	Opponent’s_Average
1	GARY HUA	ON	6.0	1794	1605
2	DAKSHESH DARURI	MI	6.0	1553	1469
3	ADITYA BAJAJ	MI	6.0	1384	1564
4	PATRICK H SCHILLING	MI	5.5	1716	1574
5	HANSHI ZUO	MI	5.5	1655	1501
6	HANSEN SONG	OH	5.0	1686	1519
7	GARY DEE SWATHELL	MI	5.0	1649	1372
8	EZEKIEL HOUGHTON	MI	5.0	1641	1468
9	STEFANO LEE	ON	5.0	1411	1523
10	ANVIT RAO	MI	5.0	1365	1554

Write to CSV File

write.csv(final_textFile,file="/Users/juanelle/Desktop/MSDS/Data607/Week Three/Project 1/DATA607_ProjectOne_Newtxtfile")

DATA607-Project 1

Juanelle Marks

9/23/2018

Project Question

Sourcing Data

Data Cleansing

Data Manipulation

Write to CSV File