Project1

In this project, you are given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:

Player’s Name, Players State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents

For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605

Raw Data

Raw Data

library("bitops")
library("RCurl")
library('stringr')
## Warning: package 'stringr' was built under R version 3.3.3
# Load text file from Github repository
url = "https://raw.githubusercontent.com/chrisestevez/DataAnalytics/master/Project1/tournamentinfo.txt"
# empty data frame to hold extracted values
finalDataFrame = data.frame()
Rdata = getURL(url)
MyData = read.csv(text = Rdata,header = FALSE,stringsAsFactors = F)
#initiate a loop and retreive row information
for (i in 1:nrow(MyData)) {
  playerNum= str_extract(MyData[i,],"[[:digit:]]+")
  name = str_extract(MyData[i,],"(\\w+\\s\\w+\\s*\\w*\\s*\\w*)")

    State = str_extract(MyData[i+1,],"[[:alpha:]]+")
    NumPoints =str_extract(MyData[i,],"(\\d\\.\\d)")
   PreRating = str_sub(MyData[i+1,],23, 26)
    r1 =str_sub(MyData[i,], 51, 52)
    r2 = str_sub(MyData[i,], 57, 58)
    r3 = str_sub(MyData[i,], 63, 64)
    r4 = str_sub(MyData[i,], 69, 70)
    r5 = str_sub(MyData[i,], 75, 76)
    r6 = str_sub(MyData[i,], 81, 82)
    r7 = str_sub(MyData[i,], 87, 88)

    #variable will hold data and append to data frame
  capturenum = data.frame("PlayerNum"=playerNum ,"PlayerState"=State,"PlayerName"=name,"TotalNumPoints" =NumPoints, "PlayerPreRating"=PreRating, "AvgPrerate"=0.0,"game1"=r1,"game2"=r2,"game3"=r3,"game4"=r4,"game5"=r5,"game6"=r6,"game7"=r7,stringsAsFactors = F)  
    finalDataFrame =rbind(finalDataFrame,capturenum)
}
# here is the result of the finalDataFrame.
head(finalDataFrame)
##   PlayerNum PlayerState                       PlayerName TotalNumPoints
## 1      <NA>        Pair                             <NA>           <NA>
## 2      <NA>         Num Player Name                                <NA>
## 3         1        <NA>                         USCF ID            <NA>
## 4      <NA>        GARY                             <NA>           <NA>
## 5         1          ON GARY HUA                                    6.0
## 6  15445895        <NA>                             <NA>           <NA>
##   PlayerPreRating AvgPrerate game1 game2 game3 game4 game5 game6 game7
## 1                          0    --    --    --    --    --    --    --
## 2            (Pre          0    nd    nd    nd    nd    nd    nd    nd
## 3            ----          0                                          
## 4                          0    --    --    --    --    --    --    --
## 5            1794          0    39    21    18    14     7    12     4
## 6            ----          0
#remove Nas from data
datanoNas = data.frame()
datanoNas =finalDataFrame[!is.na(finalDataFrame$TotalNumPoints),] 

#Here is the current out put withought Nas
head(datanoNas)
##    PlayerNum PlayerState                       PlayerName TotalNumPoints
## 5          1          ON GARY HUA                                    6.0
## 8          2          MI DAKSHESH DARURI                             6.0
## 11         3          MI ADITYA BAJAJ                                6.0
## 14         4          MI PATRICK H SCHILLING                         5.5
## 17         5          MI HANSHI ZUO                                  5.5
## 20         6          OH HANSEN SONG                                 5.0
##    PlayerPreRating AvgPrerate game1 game2 game3 game4 game5 game6 game7
## 5             1794          0    39    21    18    14     7    12     4
## 8             1553          0    63    58     4    17    16    20     7
## 11            1384          0     8    61    25    21    11    13    12
## 14            1716          0    23    28     2    26     5    19     1
## 17            1655          0    45    37    12    13     4    14    17
## 20            1686          0    34    29    11    35    10    27    21
# Converted values to numeric in order to do calculations.
datanoNas$game1=as.numeric(as.character(datanoNas$game1))
datanoNas$game2=as.numeric(as.character(datanoNas$game2))
datanoNas$game3=as.numeric(as.character(datanoNas$game3))
datanoNas$game4=as.numeric(as.character(datanoNas$game4))
datanoNas$game5=as.numeric(as.character(datanoNas$game5))
datanoNas$game6=as.numeric(as.character(datanoNas$game6))
datanoNas$game7=as.numeric(as.character(datanoNas$game7))
datanoNas$PlayerPreRating=as.numeric(as.character(datanoNas$PlayerPreRating))


# initial implementation 
# using merge function  I was matching the opponents column to the players  pre rating.
#End goal was to sum the players rating and divide by non Nas
#Due to my inability to rename the column after matching to original data I abandon this method.
#please see below example
f1 =data.frame()
l1= data.frame(datanoNas$PlayerNum,datanoNas$PlayerPreRating)  
head(l1)
##   datanoNas.PlayerNum datanoNas.PlayerPreRating
## 1                   1                      1794
## 2                   2                      1553
## 3                   3                      1384
## 4                   4                      1716
## 5                   5                      1655
## 6                   6                      1686
f1=merge(datanoNas,l1,by.x = "game1",by.y = "datanoNas.PlayerNum",all.x = TRUE)
head(f1)
##   game1 PlayerNum PlayerState                       PlayerName
## 1     1        39          MI JOEL R HENDON                   
## 2     2        63          MI THOMAS JOSEPH HOSMER            
## 3     3         8          MI EZEKIEL HOUGHTON                
## 4     4        23          ON ALAN BUI                        
## 5     5        45          MI DEREK YAN                       
## 6     6        34          MI MICHAEL JEFFERY THOMAS          
##   TotalNumPoints PlayerPreRating AvgPrerate game2 game3 game4 game5 game6
## 1            3.0            1436          0    54    40    16    44    21
## 2            1.0            1175          0    48    49    43    45    NA
## 3            5.0            1641          0    32    14     9    47    28
## 4            4.0            1363          0    43    20    58    17    37
## 5            3.0            1242          0    51    60    56    63    55
## 6            3.5            1399          0    60    37    29    25    11
##   game7 datanoNas.PlayerPreRating
## 1    24                      1794
## 2    NA                      1553
## 3    19                      1384
## 4    46                      1716
## 5    58                      1655
## 6    52                      1686
#colnames(f1$datanoNas.PlayerPreRating) ="Round1"
l2= data.frame(datanoNas$PlayerNum,datanoNas$PlayerPreRating)
head(l2)
##   datanoNas.PlayerNum datanoNas.PlayerPreRating
## 1                   1                      1794
## 2                   2                      1553
## 3                   3                      1384
## 4                   4                      1716
## 5                   5                      1655
## 6                   6                      1686
f1=merge(datanoNas,l2,by.x = "game2",by.y = "datanoNas.PlayerNum",all.x = TRUE)
head(f1)
##   game2 PlayerNum PlayerState                       PlayerName
## 1     1        21          ON DINH DANG BUI                   
## 2     2        58          MI VIRAJ MOHILE                    
## 3     3        61          ON JEZZEL FARKAS                   
## 4     4        28          MI             SOFIA ADINA STANESCU
## 5     5        37          MI AMIYATOSH PWNANANDAM            
## 6     6        29          MI CHIEDOZIE OKORIE                
##   TotalNumPoints PlayerPreRating AvgPrerate game1 game3 game4 game5 game6
## 1            4.0            1563          0    43    47     3    40    39
## 2            2.0             917          0    31    41    23    49    NA
## 3            1.5             955          0    32    54    47    42    30
## 4            3.5            1507          0    24    22    19    20     8
## 5            3.5             980          0    NA    34    27    NA    23
## 6            3.5            1602          0    50    38    34    52    48
##   game7 datanoNas.PlayerPreRating
## 1     6                      1794
## 2    45                      1553
## 3    37                      1384
## 4    36                      1716
## 5    61                      1655
## 6    NA                      1686
#Start loop
for (rowNum in 1:nrow(datanoNas)) {
  #empty variable to hold opponents pregame average
  favgpreRate = NULL  
    for (colNum in 1:7) {
      #included nested loop to iterate thru each column
    colName = paste("game", colNum,sep = "") 
    oppRowOriginal = datanoNas[rowNum, colName]
    favgpreRate = c(favgpreRate,datanoNas[oppRowOriginal, "PlayerPreRating"])
  }
  datanoNas[rowNum, "AvgPrerate"] = round(mean(favgpreRate, na.rm = TRUE), 0) 
  
}
#Output with pre game values
#head(datanoNas)
# here I generate CSV file to local default directory.
#http://stackoverflow.com/questions/16630085/write-csv-or-table-variables-to-file

#write.csv(datanoNas[,c("PlayerName","PlayerState","TotalNumPoints", "PlayerPreRating","AvgPrerate")], file="Project1outfile.csv",row.names=FALSE)

knitr::kable(datanoNas)
PlayerNum PlayerState PlayerName TotalNumPoints PlayerPreRating AvgPrerate game1 game2 game3 game4 game5 game6 game7
5 1 ON GARY HUA 6.0 1794 1605 39 21 18 14 7 12 4
8 2 MI DAKSHESH DARURI 6.0 1553 1469 63 58 4 17 16 20 7
11 3 MI ADITYA BAJAJ 6.0 1384 1564 8 61 25 21 11 13 12
14 4 MI PATRICK H SCHILLING 5.5 1716 1574 23 28 2 26 5 19 1
17 5 MI HANSHI ZUO 5.5 1655 1501 45 37 12 13 4 14 17
20 6 OH HANSEN SONG 5.0 1686 1519 34 29 11 35 10 27 21
23 7 MI GARY DEE SWATHELL 5.0 1649 1372 57 46 13 11 1 9 2
26 8 MI EZEKIEL HOUGHTON 5.0 1641 1468 3 32 14 9 47 28 19
29 9 ON STEFANO LEE 5.0 1411 1523 25 18 59 8 26 7 20
32 10 MI ANVIT RAO 5.0 1365 1554 16 19 55 31 6 25 18
35 11 MI CAMERON WILLIAM MC LEMAN 4.5 1712 1468 38 56 6 7 3 34 26
38 12 MI KENNETH J TACK 4.5 1663 1506 42 33 5 38 NA 1 3
41 13 MI TORRANCE HENRY JR 4.5 1666 1498 36 27 7 5 33 3 32
44 14 MI BRADLEY SHAW 4.5 1610 1515 54 44 8 1 27 5 31
47 15 MI ZACHARY JAMES HOUGHTON 4.5 1220 1484 19 16 30 22 54 33 38
50 16 MI MIKE NIKITIN 4.0 1604 1386 10 15 NA 39 2 36 NA
53 17 MI RONALD GRZEGORCZYK 4.0 1629 1499 48 41 26 2 23 22 5
56 18 MI DAVID SUNDEEN 4.0 1600 1480 47 9 1 32 19 38 10
59 19 MI DIPANKAR ROY 4.0 1564 1426 15 10 52 28 18 4 8
62 20 MI JASON ZHENG 4.0 1595 1411 40 49 23 41 28 2 9
65 21 ON DINH DANG BUI 4.0 1563 1470 43 1 47 3 40 39 6
68 22 MI EUGENE L MCCLURE 4.0 1555 1300 64 52 28 15 NA 17 40
71 23 ON ALAN BUI 4.0 1363 1214 4 43 20 58 17 37 46
74 24 MI MICHAEL R ALDRICH 4.0 1229 1357 28 47 43 25 60 44 39
77 25 MI LOREN SCHWIEBERT 3.5 1745 1363 9 53 3 24 34 10 47
80 26 ON MAX ZHU 3.5 1579 1507 49 40 17 4 9 32 11
83 27 MI GAURAV GIDWANI 3.5 1552 1222 51 13 46 37 14 6 NA
86 28 MI SOFIA ADINA STANESCU 3.5 1507 1522 24 4 22 19 20 8 36
89 29 MI CHIEDOZIE OKORIE 3.5 1602 1314 50 6 38 34 52 48 NA
92 30 ON GEORGE AVERY JONES 3.5 1522 1144 52 64 15 55 31 61 50
95 31 MI RISHI SHETTY 3.5 1494 1260 58 55 64 10 30 50 14
98 32 ON JOSHUA PHILIP MATHEWS 3.5 1441 1379 61 8 44 18 51 26 13
101 33 MI JADE GE 3.5 1449 1277 60 12 50 36 13 15 51
104 34 MI MICHAEL JEFFERY THOMAS 3.5 1399 1375 6 60 37 29 25 11 52
107 35 MI JOSHUA DAVID LEE 3.5 1438 1150 46 38 56 6 57 52 48
110 36 MI SIDDHARTH JHA 3.5 1355 1388 13 57 51 33 NA 16 28
113 37 MI AMIYATOSH PWNANANDAM 3.5 980 1385 NA 5 34 27 NA 23 61
116 38 MI BRIAN LIU 3.0 1423 1539 11 35 29 12 NA 18 15
119 39 MI JOEL R HENDON 3.0 1436 1430 1 54 40 16 44 21 24
122 40 MI FOREST ZHANG 3.0 1348 1391 20 26 39 59 21 56 22
125 41 MI KYLE WILLIAM MURPHY 3.0 1403 1248 59 17 58 20 NA NA NA
128 42 MI JARED GE 3.0 1332 1150 12 50 57 60 61 64 56
131 43 MI ROBERT GLEN VASEY 3.0 1283 1107 21 23 24 63 59 46 55
134 44 MI JUSTIN D SCHILLING 3.0 1199 1327 NA 14 32 53 39 24 59
137 45 MI DEREK YAN 3.0 1242 1152 5 51 60 56 63 55 58
140 46 MI JACOB ALEXANDER LAVALLEY 3.0 377 1358 35 7 27 50 64 43 23
143 47 MI ERIC WRIGHT 2.5 1362 1392 18 24 21 61 8 51 25
146 48 MI DANIEL KHAIN 2.5 1382 1356 17 63 NA 52 NA 29 35
149 49 MI MICHAEL J MARTIN 2.5 1291 1286 26 20 63 64 58 NA NA
152 50 MI SHIVAM JHA 2.5 1056 1296 29 42 33 46 NA 31 30
155 51 MI TEJAS AYYAGARI 2.5 1011 1356 27 45 36 57 32 47 33
158 52 MI ETHAN GUO 2.5 935 1495 30 22 19 48 29 35 34
161 53 MI JOSE C YBARRA 2.0 1393 1345 NA 25 NA 44 NA 57 NA
164 54 MI LARRY HODGE 2.0 1270 1206 14 39 61 NA 15 59 64
167 55 MI ALEX KONG 2.0 1186 1406 62 31 10 30 NA 45 43
170 56 MI MARISA RICCI 2.0 1153 1414 NA 11 35 45 NA 40 42
173 57 MI MICHAEL LU 2.0 1092 1363 7 36 42 51 35 53 NA
176 58 MI VIRAJ MOHILE 2.0 917 1391 31 2 41 23 49 NA 45
179 59 MI SEAN M MC CORMICK 2.0 853 1319 41 NA 9 40 43 54 44
182 60 MI JULIA SHEN 1.5 967 1330 33 34 45 42 24 NA NA
185 61 ON JEZZEL FARKAS 1.5 955 1327 32 3 54 47 42 30 37
188 62 MI ASHWIN BALAJI 1.0 1530 1186 55 NA NA NA NA NA NA
191 63 MI THOMAS JOSEPH HOSMER 1.0 1175 1350 2 48 49 43 45 NA NA
194 64 MI BEN LI 1.0 1163 1263 22 30 31 49 46 42 54