In this project, you are given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:
Player’s Name, Players State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605
Raw Data
library("bitops")
library("RCurl")
library('stringr')
## Warning: package 'stringr' was built under R version 3.3.3
# Load text file from Github repository
url = "https://raw.githubusercontent.com/chrisestevez/DataAnalytics/master/Project1/tournamentinfo.txt"
# empty data frame to hold extracted values
finalDataFrame = data.frame()
Rdata = getURL(url)
MyData = read.csv(text = Rdata,header = FALSE,stringsAsFactors = F)
#initiate a loop and retreive row information
for (i in 1:nrow(MyData)) {
playerNum= str_extract(MyData[i,],"[[:digit:]]+")
name = str_extract(MyData[i,],"(\\w+\\s\\w+\\s*\\w*\\s*\\w*)")
State = str_extract(MyData[i+1,],"[[:alpha:]]+")
NumPoints =str_extract(MyData[i,],"(\\d\\.\\d)")
PreRating = str_sub(MyData[i+1,],23, 26)
r1 =str_sub(MyData[i,], 51, 52)
r2 = str_sub(MyData[i,], 57, 58)
r3 = str_sub(MyData[i,], 63, 64)
r4 = str_sub(MyData[i,], 69, 70)
r5 = str_sub(MyData[i,], 75, 76)
r6 = str_sub(MyData[i,], 81, 82)
r7 = str_sub(MyData[i,], 87, 88)
#variable will hold data and append to data frame
capturenum = data.frame("PlayerNum"=playerNum ,"PlayerState"=State,"PlayerName"=name,"TotalNumPoints" =NumPoints, "PlayerPreRating"=PreRating, "AvgPrerate"=0.0,"game1"=r1,"game2"=r2,"game3"=r3,"game4"=r4,"game5"=r5,"game6"=r6,"game7"=r7,stringsAsFactors = F)
finalDataFrame =rbind(finalDataFrame,capturenum)
}
# here is the result of the finalDataFrame.
head(finalDataFrame)
## PlayerNum PlayerState PlayerName TotalNumPoints
## 1 <NA> Pair <NA> <NA>
## 2 <NA> Num Player Name <NA>
## 3 1 <NA> USCF ID <NA>
## 4 <NA> GARY <NA> <NA>
## 5 1 ON GARY HUA 6.0
## 6 15445895 <NA> <NA> <NA>
## PlayerPreRating AvgPrerate game1 game2 game3 game4 game5 game6 game7
## 1 0 -- -- -- -- -- -- --
## 2 (Pre 0 nd nd nd nd nd nd nd
## 3 ---- 0
## 4 0 -- -- -- -- -- -- --
## 5 1794 0 39 21 18 14 7 12 4
## 6 ---- 0
#remove Nas from data
datanoNas = data.frame()
datanoNas =finalDataFrame[!is.na(finalDataFrame$TotalNumPoints),]
#Here is the current out put withought Nas
head(datanoNas)
## PlayerNum PlayerState PlayerName TotalNumPoints
## 5 1 ON GARY HUA 6.0
## 8 2 MI DAKSHESH DARURI 6.0
## 11 3 MI ADITYA BAJAJ 6.0
## 14 4 MI PATRICK H SCHILLING 5.5
## 17 5 MI HANSHI ZUO 5.5
## 20 6 OH HANSEN SONG 5.0
## PlayerPreRating AvgPrerate game1 game2 game3 game4 game5 game6 game7
## 5 1794 0 39 21 18 14 7 12 4
## 8 1553 0 63 58 4 17 16 20 7
## 11 1384 0 8 61 25 21 11 13 12
## 14 1716 0 23 28 2 26 5 19 1
## 17 1655 0 45 37 12 13 4 14 17
## 20 1686 0 34 29 11 35 10 27 21
# Converted values to numeric in order to do calculations.
datanoNas$game1=as.numeric(as.character(datanoNas$game1))
datanoNas$game2=as.numeric(as.character(datanoNas$game2))
datanoNas$game3=as.numeric(as.character(datanoNas$game3))
datanoNas$game4=as.numeric(as.character(datanoNas$game4))
datanoNas$game5=as.numeric(as.character(datanoNas$game5))
datanoNas$game6=as.numeric(as.character(datanoNas$game6))
datanoNas$game7=as.numeric(as.character(datanoNas$game7))
datanoNas$PlayerPreRating=as.numeric(as.character(datanoNas$PlayerPreRating))
# initial implementation
# using merge function I was matching the opponents column to the players pre rating.
#End goal was to sum the players rating and divide by non Nas
#Due to my inability to rename the column after matching to original data I abandon this method.
#please see below example
f1 =data.frame()
l1= data.frame(datanoNas$PlayerNum,datanoNas$PlayerPreRating)
head(l1)
## datanoNas.PlayerNum datanoNas.PlayerPreRating
## 1 1 1794
## 2 2 1553
## 3 3 1384
## 4 4 1716
## 5 5 1655
## 6 6 1686
f1=merge(datanoNas,l1,by.x = "game1",by.y = "datanoNas.PlayerNum",all.x = TRUE)
head(f1)
## game1 PlayerNum PlayerState PlayerName
## 1 1 39 MI JOEL R HENDON
## 2 2 63 MI THOMAS JOSEPH HOSMER
## 3 3 8 MI EZEKIEL HOUGHTON
## 4 4 23 ON ALAN BUI
## 5 5 45 MI DEREK YAN
## 6 6 34 MI MICHAEL JEFFERY THOMAS
## TotalNumPoints PlayerPreRating AvgPrerate game2 game3 game4 game5 game6
## 1 3.0 1436 0 54 40 16 44 21
## 2 1.0 1175 0 48 49 43 45 NA
## 3 5.0 1641 0 32 14 9 47 28
## 4 4.0 1363 0 43 20 58 17 37
## 5 3.0 1242 0 51 60 56 63 55
## 6 3.5 1399 0 60 37 29 25 11
## game7 datanoNas.PlayerPreRating
## 1 24 1794
## 2 NA 1553
## 3 19 1384
## 4 46 1716
## 5 58 1655
## 6 52 1686
#colnames(f1$datanoNas.PlayerPreRating) ="Round1"
l2= data.frame(datanoNas$PlayerNum,datanoNas$PlayerPreRating)
head(l2)
## datanoNas.PlayerNum datanoNas.PlayerPreRating
## 1 1 1794
## 2 2 1553
## 3 3 1384
## 4 4 1716
## 5 5 1655
## 6 6 1686
f1=merge(datanoNas,l2,by.x = "game2",by.y = "datanoNas.PlayerNum",all.x = TRUE)
head(f1)
## game2 PlayerNum PlayerState PlayerName
## 1 1 21 ON DINH DANG BUI
## 2 2 58 MI VIRAJ MOHILE
## 3 3 61 ON JEZZEL FARKAS
## 4 4 28 MI SOFIA ADINA STANESCU
## 5 5 37 MI AMIYATOSH PWNANANDAM
## 6 6 29 MI CHIEDOZIE OKORIE
## TotalNumPoints PlayerPreRating AvgPrerate game1 game3 game4 game5 game6
## 1 4.0 1563 0 43 47 3 40 39
## 2 2.0 917 0 31 41 23 49 NA
## 3 1.5 955 0 32 54 47 42 30
## 4 3.5 1507 0 24 22 19 20 8
## 5 3.5 980 0 NA 34 27 NA 23
## 6 3.5 1602 0 50 38 34 52 48
## game7 datanoNas.PlayerPreRating
## 1 6 1794
## 2 45 1553
## 3 37 1384
## 4 36 1716
## 5 61 1655
## 6 NA 1686
#Start loop
for (rowNum in 1:nrow(datanoNas)) {
#empty variable to hold opponents pregame average
favgpreRate = NULL
for (colNum in 1:7) {
#included nested loop to iterate thru each column
colName = paste("game", colNum,sep = "")
oppRowOriginal = datanoNas[rowNum, colName]
favgpreRate = c(favgpreRate,datanoNas[oppRowOriginal, "PlayerPreRating"])
}
datanoNas[rowNum, "AvgPrerate"] = round(mean(favgpreRate, na.rm = TRUE), 0)
}
#Output with pre game values
#head(datanoNas)
# here I generate CSV file to local default directory.
#http://stackoverflow.com/questions/16630085/write-csv-or-table-variables-to-file
#write.csv(datanoNas[,c("PlayerName","PlayerState","TotalNumPoints", "PlayerPreRating","AvgPrerate")], file="Project1outfile.csv",row.names=FALSE)
knitr::kable(datanoNas)
PlayerNum | PlayerState | PlayerName | TotalNumPoints | PlayerPreRating | AvgPrerate | game1 | game2 | game3 | game4 | game5 | game6 | game7 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5 | 1 | ON | GARY HUA | 6.0 | 1794 | 1605 | 39 | 21 | 18 | 14 | 7 | 12 | 4 |
8 | 2 | MI | DAKSHESH DARURI | 6.0 | 1553 | 1469 | 63 | 58 | 4 | 17 | 16 | 20 | 7 |
11 | 3 | MI | ADITYA BAJAJ | 6.0 | 1384 | 1564 | 8 | 61 | 25 | 21 | 11 | 13 | 12 |
14 | 4 | MI | PATRICK H SCHILLING | 5.5 | 1716 | 1574 | 23 | 28 | 2 | 26 | 5 | 19 | 1 |
17 | 5 | MI | HANSHI ZUO | 5.5 | 1655 | 1501 | 45 | 37 | 12 | 13 | 4 | 14 | 17 |
20 | 6 | OH | HANSEN SONG | 5.0 | 1686 | 1519 | 34 | 29 | 11 | 35 | 10 | 27 | 21 |
23 | 7 | MI | GARY DEE SWATHELL | 5.0 | 1649 | 1372 | 57 | 46 | 13 | 11 | 1 | 9 | 2 |
26 | 8 | MI | EZEKIEL HOUGHTON | 5.0 | 1641 | 1468 | 3 | 32 | 14 | 9 | 47 | 28 | 19 |
29 | 9 | ON | STEFANO LEE | 5.0 | 1411 | 1523 | 25 | 18 | 59 | 8 | 26 | 7 | 20 |
32 | 10 | MI | ANVIT RAO | 5.0 | 1365 | 1554 | 16 | 19 | 55 | 31 | 6 | 25 | 18 |
35 | 11 | MI | CAMERON WILLIAM MC LEMAN | 4.5 | 1712 | 1468 | 38 | 56 | 6 | 7 | 3 | 34 | 26 |
38 | 12 | MI | KENNETH J TACK | 4.5 | 1663 | 1506 | 42 | 33 | 5 | 38 | NA | 1 | 3 |
41 | 13 | MI | TORRANCE HENRY JR | 4.5 | 1666 | 1498 | 36 | 27 | 7 | 5 | 33 | 3 | 32 |
44 | 14 | MI | BRADLEY SHAW | 4.5 | 1610 | 1515 | 54 | 44 | 8 | 1 | 27 | 5 | 31 |
47 | 15 | MI | ZACHARY JAMES HOUGHTON | 4.5 | 1220 | 1484 | 19 | 16 | 30 | 22 | 54 | 33 | 38 |
50 | 16 | MI | MIKE NIKITIN | 4.0 | 1604 | 1386 | 10 | 15 | NA | 39 | 2 | 36 | NA |
53 | 17 | MI | RONALD GRZEGORCZYK | 4.0 | 1629 | 1499 | 48 | 41 | 26 | 2 | 23 | 22 | 5 |
56 | 18 | MI | DAVID SUNDEEN | 4.0 | 1600 | 1480 | 47 | 9 | 1 | 32 | 19 | 38 | 10 |
59 | 19 | MI | DIPANKAR ROY | 4.0 | 1564 | 1426 | 15 | 10 | 52 | 28 | 18 | 4 | 8 |
62 | 20 | MI | JASON ZHENG | 4.0 | 1595 | 1411 | 40 | 49 | 23 | 41 | 28 | 2 | 9 |
65 | 21 | ON | DINH DANG BUI | 4.0 | 1563 | 1470 | 43 | 1 | 47 | 3 | 40 | 39 | 6 |
68 | 22 | MI | EUGENE L MCCLURE | 4.0 | 1555 | 1300 | 64 | 52 | 28 | 15 | NA | 17 | 40 |
71 | 23 | ON | ALAN BUI | 4.0 | 1363 | 1214 | 4 | 43 | 20 | 58 | 17 | 37 | 46 |
74 | 24 | MI | MICHAEL R ALDRICH | 4.0 | 1229 | 1357 | 28 | 47 | 43 | 25 | 60 | 44 | 39 |
77 | 25 | MI | LOREN SCHWIEBERT | 3.5 | 1745 | 1363 | 9 | 53 | 3 | 24 | 34 | 10 | 47 |
80 | 26 | ON | MAX ZHU | 3.5 | 1579 | 1507 | 49 | 40 | 17 | 4 | 9 | 32 | 11 |
83 | 27 | MI | GAURAV GIDWANI | 3.5 | 1552 | 1222 | 51 | 13 | 46 | 37 | 14 | 6 | NA |
86 | 28 | MI | SOFIA ADINA STANESCU | 3.5 | 1507 | 1522 | 24 | 4 | 22 | 19 | 20 | 8 | 36 |
89 | 29 | MI | CHIEDOZIE OKORIE | 3.5 | 1602 | 1314 | 50 | 6 | 38 | 34 | 52 | 48 | NA |
92 | 30 | ON | GEORGE AVERY JONES | 3.5 | 1522 | 1144 | 52 | 64 | 15 | 55 | 31 | 61 | 50 |
95 | 31 | MI | RISHI SHETTY | 3.5 | 1494 | 1260 | 58 | 55 | 64 | 10 | 30 | 50 | 14 |
98 | 32 | ON | JOSHUA PHILIP MATHEWS | 3.5 | 1441 | 1379 | 61 | 8 | 44 | 18 | 51 | 26 | 13 |
101 | 33 | MI | JADE GE | 3.5 | 1449 | 1277 | 60 | 12 | 50 | 36 | 13 | 15 | 51 |
104 | 34 | MI | MICHAEL JEFFERY THOMAS | 3.5 | 1399 | 1375 | 6 | 60 | 37 | 29 | 25 | 11 | 52 |
107 | 35 | MI | JOSHUA DAVID LEE | 3.5 | 1438 | 1150 | 46 | 38 | 56 | 6 | 57 | 52 | 48 |
110 | 36 | MI | SIDDHARTH JHA | 3.5 | 1355 | 1388 | 13 | 57 | 51 | 33 | NA | 16 | 28 |
113 | 37 | MI | AMIYATOSH PWNANANDAM | 3.5 | 980 | 1385 | NA | 5 | 34 | 27 | NA | 23 | 61 |
116 | 38 | MI | BRIAN LIU | 3.0 | 1423 | 1539 | 11 | 35 | 29 | 12 | NA | 18 | 15 |
119 | 39 | MI | JOEL R HENDON | 3.0 | 1436 | 1430 | 1 | 54 | 40 | 16 | 44 | 21 | 24 |
122 | 40 | MI | FOREST ZHANG | 3.0 | 1348 | 1391 | 20 | 26 | 39 | 59 | 21 | 56 | 22 |
125 | 41 | MI | KYLE WILLIAM MURPHY | 3.0 | 1403 | 1248 | 59 | 17 | 58 | 20 | NA | NA | NA |
128 | 42 | MI | JARED GE | 3.0 | 1332 | 1150 | 12 | 50 | 57 | 60 | 61 | 64 | 56 |
131 | 43 | MI | ROBERT GLEN VASEY | 3.0 | 1283 | 1107 | 21 | 23 | 24 | 63 | 59 | 46 | 55 |
134 | 44 | MI | JUSTIN D SCHILLING | 3.0 | 1199 | 1327 | NA | 14 | 32 | 53 | 39 | 24 | 59 |
137 | 45 | MI | DEREK YAN | 3.0 | 1242 | 1152 | 5 | 51 | 60 | 56 | 63 | 55 | 58 |
140 | 46 | MI | JACOB ALEXANDER LAVALLEY | 3.0 | 377 | 1358 | 35 | 7 | 27 | 50 | 64 | 43 | 23 |
143 | 47 | MI | ERIC WRIGHT | 2.5 | 1362 | 1392 | 18 | 24 | 21 | 61 | 8 | 51 | 25 |
146 | 48 | MI | DANIEL KHAIN | 2.5 | 1382 | 1356 | 17 | 63 | NA | 52 | NA | 29 | 35 |
149 | 49 | MI | MICHAEL J MARTIN | 2.5 | 1291 | 1286 | 26 | 20 | 63 | 64 | 58 | NA | NA |
152 | 50 | MI | SHIVAM JHA | 2.5 | 1056 | 1296 | 29 | 42 | 33 | 46 | NA | 31 | 30 |
155 | 51 | MI | TEJAS AYYAGARI | 2.5 | 1011 | 1356 | 27 | 45 | 36 | 57 | 32 | 47 | 33 |
158 | 52 | MI | ETHAN GUO | 2.5 | 935 | 1495 | 30 | 22 | 19 | 48 | 29 | 35 | 34 |
161 | 53 | MI | JOSE C YBARRA | 2.0 | 1393 | 1345 | NA | 25 | NA | 44 | NA | 57 | NA |
164 | 54 | MI | LARRY HODGE | 2.0 | 1270 | 1206 | 14 | 39 | 61 | NA | 15 | 59 | 64 |
167 | 55 | MI | ALEX KONG | 2.0 | 1186 | 1406 | 62 | 31 | 10 | 30 | NA | 45 | 43 |
170 | 56 | MI | MARISA RICCI | 2.0 | 1153 | 1414 | NA | 11 | 35 | 45 | NA | 40 | 42 |
173 | 57 | MI | MICHAEL LU | 2.0 | 1092 | 1363 | 7 | 36 | 42 | 51 | 35 | 53 | NA |
176 | 58 | MI | VIRAJ MOHILE | 2.0 | 917 | 1391 | 31 | 2 | 41 | 23 | 49 | NA | 45 |
179 | 59 | MI | SEAN M MC CORMICK | 2.0 | 853 | 1319 | 41 | NA | 9 | 40 | 43 | 54 | 44 |
182 | 60 | MI | JULIA SHEN | 1.5 | 967 | 1330 | 33 | 34 | 45 | 42 | 24 | NA | NA |
185 | 61 | ON | JEZZEL FARKAS | 1.5 | 955 | 1327 | 32 | 3 | 54 | 47 | 42 | 30 | 37 |
188 | 62 | MI | ASHWIN BALAJI | 1.0 | 1530 | 1186 | 55 | NA | NA | NA | NA | NA | NA |
191 | 63 | MI | THOMAS JOSEPH HOSMER | 1.0 | 1175 | 1350 | 2 | 48 | 49 | 43 | 45 | NA | NA |
194 | 64 | MI | BEN LI | 1.0 | 1163 | 1263 | 22 | 30 | 31 | 49 | 46 | 42 | 54 |