Project1

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

Import Data

tournament <- readLines("https://raw.githubusercontent.com/DaisyCai2019/Homework/master/tournamentinfo.txt")
## Warning in readLines("https://raw.githubusercontent.com/DaisyCai2019/
## Homework/master/tournamentinfo.txt"): incomplete final line found
## on 'https://raw.githubusercontent.com/DaisyCai2019/Homework/master/
## tournamentinfo.txt'
head(tournament)
## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"

Extraction

require(stringr)
## Loading required package: stringr
#Player’s Name
name<-unlist(str_extract_all(tournament, "\\w+[^USCF|a-z] ?\\w+ \\w+"))
name
##  [1] "GARY HUA"                 "DAKSHESH DARURI"         
##  [3] "ADITYA BAJAJ"             "PATRICK H SCHILLING"     
##  [5] "HANSHI ZUO"               "HANSEN SONG"             
##  [7] "GARY DEE SWATHELL"        "EZEKIEL HOUGHTON"        
##  [9] "STEFANO LEE"              "ANVIT RAO"               
## [11] "CAMERON WILLIAM MC"       "KENNETH J TACK"          
## [13] "TORRANCE HENRY JR"        "BRADLEY SHAW"            
## [15] "ZACHARY JAMES HOUGHTON"   "MIKE NIKITIN"            
## [17] "RONALD GRZEGORCZYK"       "DAVID SUNDEEN"           
## [19] "DIPANKAR ROY"             "JASON ZHENG"             
## [21] "DINH DANG BUI"            "EUGENE L MCCLURE"        
## [23] "ALAN BUI"                 "MICHAEL R ALDRICH"       
## [25] "LOREN SCHWIEBERT"         "MAX ZHU"                 
## [27] "GAURAV GIDWANI"           "SOFIA ADINA STANESCU"    
## [29] "CHIEDOZIE OKORIE"         "GEORGE AVERY JONES"      
## [31] "RISHI SHETTY"             "JOSHUA PHILIP MATHEWS"   
## [33] "JADE GE"                  "MICHAEL JEFFERY THOMAS"  
## [35] "JOSHUA DAVID LEE"         "SIDDHARTH JHA"           
## [37] "AMIYATOSH PWNANANDAM"     "BRIAN LIU"               
## [39] "JOEL R HENDON"            "FOREST ZHANG"            
## [41] "KYLE WILLIAM MURPHY"      "JARED GE"                
## [43] "ROBERT GLEN VASEY"        "JUSTIN D SCHILLING"      
## [45] "DEREK YAN"                "JACOB ALEXANDER LAVALLEY"
## [47] "ERIC WRIGHT"              "DANIEL KHAIN"            
## [49] "MICHAEL J MARTIN"         "SHIVAM JHA"              
## [51] "TEJAS AYYAGARI"           "ETHAN GUO"               
## [53] "JOSE C YBARRA"            "LARRY HODGE"             
## [55] "ALEX KONG"                "MARISA RICCI"            
## [57] "MICHAEL LU"               "VIRAJ MOHILE"            
## [59] "SEAN M MC"                "JULIA SHEN"              
## [61] "JEZZEL FARKAS"            "ASHWIN BALAJI"           
## [63] "THOMAS JOSEPH HOSMER"     "BEN LI"
#Player’s State. We only have ON, MI and OH three states
state<-unlist(str_extract_all(tournament, "( MI )|( OH )|( ON )"))
state
##  [1] " ON " " MI " " MI " " MI " " MI " " OH " " MI " " MI " " ON " " MI "
## [11] " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI "
## [21] " ON " " MI " " ON " " MI " " MI " " ON " " MI " " MI " " MI " " ON "
## [31] " MI " " ON " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI "
## [41] " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI "
## [51] " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI " " MI "
## [61] " ON " " MI " " MI " " MI "
#Total Number of Points
point<-unlist(str_extract_all(tournament, "\\d\\.\\d"))
point
##  [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5"
## [12] "4.5" "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [23] "4.0" "4.0" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [34] "3.5" "3.5" "3.5" "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0"
## [45] "3.0" "3.0" "2.5" "2.5" "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0"
## [56] "2.0" "2.0" "2.0" "2.0" "1.5" "1.5" "1.0" "1.0" "1.0"
#Player’s Pre-Rating 


#extract information and remove "R:"
preRating<-str_replace(unlist(str_extract_all(tournament, "(R:\\s*)(\\d+)")),"R: ","")

#change to number
preRating<-as.numeric(preRating)
preRating
##  [1] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 1712 1663 1666 1610
## [15] 1220 1604 1629 1600 1564 1595 1563 1555 1363 1229 1745 1579 1552 1507
## [29] 1602 1522 1494 1441 1449 1399 1438 1355  980 1423 1436 1348 1403 1332
## [43] 1283 1199 1242  377 1362 1382 1291 1056 1011  935 1393 1270 1186 1153
## [57] 1092  917  853  967  955 1530 1175 1163
#Average Pre Chess Rating of Opponents
avgRating<-unlist(str_extract_all(tournament,  "\\|[0-9].*"))


#Replace all the blanks without opponents’ information with 00 and extract the number
avgRating <- str_extract_all(str_replace_all(avgRating, "\\s{1,2}\\|","00|"),"\\s\\d{1,2}")


#change it to matrix and number
avgRating <- matrix(unlist(avgRating),byrow=TRUE, nrow=length(avgRating) )
avgRating <- t(apply(avgRating,1,as.numeric)) 

#Loop through the whole avgRating number to replace the opponents’ number with their actual rating. 

for (i in 1:nrow(avgRating)) {
  for (j in 1:ncol(avgRating)){
    if (avgRating[i,j] == 0){
      avgRating[i,j] = NA
    }
  else{avgRating[i,j] <- preRating[avgRating[i,j]]}
  }
}

#Caculate the Rating of Opponents
avgRating <- c(rowMeans(avgRating, na.rm = TRUE))
avgRating<-round(avgRating,0)
avgRating
##  [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515
## [15] 1484 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522
## [29] 1314 1144 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150
## [43] 1107 1327 1152 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414
## [57] 1363 1391 1319 1330 1327 1186 1350 1263

Finalize and export all the data

final<-data.frame(name,state,point,preRating,avgRating)
final
##                        name state point preRating avgRating
## 1                  GARY HUA   ON    6.0      1794      1605
## 2           DAKSHESH DARURI   MI    6.0      1553      1469
## 3              ADITYA BAJAJ   MI    6.0      1384      1564
## 4       PATRICK H SCHILLING   MI    5.5      1716      1574
## 5                HANSHI ZUO   MI    5.5      1655      1501
## 6               HANSEN SONG   OH    5.0      1686      1519
## 7         GARY DEE SWATHELL   MI    5.0      1649      1372
## 8          EZEKIEL HOUGHTON   MI    5.0      1641      1468
## 9               STEFANO LEE   ON    5.0      1411      1523
## 10                ANVIT RAO   MI    5.0      1365      1554
## 11       CAMERON WILLIAM MC   MI    4.5      1712      1468
## 12           KENNETH J TACK   MI    4.5      1663      1506
## 13        TORRANCE HENRY JR   MI    4.5      1666      1498
## 14             BRADLEY SHAW   MI    4.5      1610      1515
## 15   ZACHARY JAMES HOUGHTON   MI    4.5      1220      1484
## 16             MIKE NIKITIN   MI    4.0      1604      1386
## 17       RONALD GRZEGORCZYK   MI    4.0      1629      1499
## 18            DAVID SUNDEEN   MI    4.0      1600      1480
## 19             DIPANKAR ROY   MI    4.0      1564      1426
## 20              JASON ZHENG   MI    4.0      1595      1411
## 21            DINH DANG BUI   ON    4.0      1563      1470
## 22         EUGENE L MCCLURE   MI    4.0      1555      1300
## 23                 ALAN BUI   ON    4.0      1363      1214
## 24        MICHAEL R ALDRICH   MI    4.0      1229      1357
## 25         LOREN SCHWIEBERT   MI    3.5      1745      1363
## 26                  MAX ZHU   ON    3.5      1579      1507
## 27           GAURAV GIDWANI   MI    3.5      1552      1222
## 28     SOFIA ADINA STANESCU   MI    3.5      1507      1522
## 29         CHIEDOZIE OKORIE   MI    3.5      1602      1314
## 30       GEORGE AVERY JONES   ON    3.5      1522      1144
## 31             RISHI SHETTY   MI    3.5      1494      1260
## 32    JOSHUA PHILIP MATHEWS   ON    3.5      1441      1379
## 33                  JADE GE   MI    3.5      1449      1277
## 34   MICHAEL JEFFERY THOMAS   MI    3.5      1399      1375
## 35         JOSHUA DAVID LEE   MI    3.5      1438      1150
## 36            SIDDHARTH JHA   MI    3.5      1355      1388
## 37     AMIYATOSH PWNANANDAM   MI    3.5       980      1385
## 38                BRIAN LIU   MI    3.0      1423      1539
## 39            JOEL R HENDON   MI    3.0      1436      1430
## 40             FOREST ZHANG   MI    3.0      1348      1391
## 41      KYLE WILLIAM MURPHY   MI    3.0      1403      1248
## 42                 JARED GE   MI    3.0      1332      1150
## 43        ROBERT GLEN VASEY   MI    3.0      1283      1107
## 44       JUSTIN D SCHILLING   MI    3.0      1199      1327
## 45                DEREK YAN   MI    3.0      1242      1152
## 46 JACOB ALEXANDER LAVALLEY   MI    3.0       377      1358
## 47              ERIC WRIGHT   MI    2.5      1362      1392
## 48             DANIEL KHAIN   MI    2.5      1382      1356
## 49         MICHAEL J MARTIN   MI    2.5      1291      1286
## 50               SHIVAM JHA   MI    2.5      1056      1296
## 51           TEJAS AYYAGARI   MI    2.5      1011      1356
## 52                ETHAN GUO   MI    2.5       935      1495
## 53            JOSE C YBARRA   MI    2.0      1393      1345
## 54              LARRY HODGE   MI    2.0      1270      1206
## 55                ALEX KONG   MI    2.0      1186      1406
## 56             MARISA RICCI   MI    2.0      1153      1414
## 57               MICHAEL LU   MI    2.0      1092      1363
## 58             VIRAJ MOHILE   MI    2.0       917      1391
## 59                SEAN M MC   MI    2.0       853      1319
## 60               JULIA SHEN   MI    1.5       967      1330
## 61            JEZZEL FARKAS   ON    1.5       955      1327
## 62            ASHWIN BALAJI   MI    1.0      1530      1186
## 63     THOMAS JOSEPH HOSMER   MI    1.0      1175      1350
## 64                   BEN LI   MI    1.0      1163      1263
write.csv(final, "data607_project1.csv", row.names=FALSE)