Introduction

In this project, youโ€™re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:

tournament.data <- read.csv(paste0("https://raw.githubusercontent.com/szx868/data607/master/Project1/tournamentinfo.txt"), header=F)

Remove First Few Rows of headings

results <- tournament.data[-c(1:3),]
head(results)
## [1] "-----------------------------------------------------------------------------------------"
## [2] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [3] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [4] "-----------------------------------------------------------------------------------------"
## [5] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [6] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"

Extraction of the columns using regular expressions

library(stringr)
player.name <- unlist(str_extract_all(results, "\\w+[^USCF|a-z] ?\\w+ \\w+"))
player.state <- unlist(str_extract_all(results, "(?:^|\\W)ON | MI | OH(?:$|\\W)"))
player.points <- unlist(str_extract_all(results, "\\d\\.\\d"))
pre.rating.step1 <-  unlist(str_extract_all(results, "(R:\\s*)(\\d+)"))
pre.rating.step1[1]
## [1] "R: 1794"
pre.rating <- unlist(str_extract_all(pre.rating.step1, "(\\d+)"))
is.vector(pre.rating)
## [1] TRUE

convert strings to numeric

pre.rating <- as.numeric(pre.rating) 
pre.rating[1]
## [1] 1794
avg.calc.step1 <- unlist(str_extract_all(results, "\\|[0-9].*"))
avg.calc.step1[16]
## [1] "|4.0  |D  10|W  15|H    |W  39|L   2|W  36|U    |"
avg.calc.step2 <- str_replace_all(avg.calc.step1, "\\s{1,2}\\|","00|")
avg.calc.step2[16]
## [1] "|4.000|D  10|W  15|H  00|W  39|L   2|W  36|U  00|"
avg.calc.step3 <- (str_extract_all(avg.calc.step2, "\\s\\d{1,2}"))
avg.calc.step3[16]
## [[1]]
## [1] " 10" " 15" " 00" " 39" " 2"  " 36" " 00"
matrix.calc <- matrix(unlist(avg.calc.step3), byrow=TRUE, nrow=length(avg.calc.step3) )
dim(matrix.calc)
## [1] 64  7
new.matrix <- t(apply(matrix.calc, 1,as.numeric)) 
dim(new.matrix)
## [1] 64  7

Data cleaning

for (i in 1:nrow(new.matrix)) 
  {
  for (j in 1:ncol(new.matrix))
    {
    if (new.matrix[i,j] == 0){
      new.matrix[i,j] = NA
    }
    else{
      new.matrix[i,j] <- pre.rating[new.matrix[i,j]]
    }
  }
}
avg.opponents <- c(rowMeans(new.matrix, na.rm = TRUE))

csv <- data.frame(player.name,player.state, player.points,pre.rating,avg.opponents)
colnames(csv) <- c("Player Name","Player State", "Total Points", "Player Pre-Rating", "Opponents Rating")
csv
##                 Player Name Player State Total Points Player Pre-Rating
## 1                  GARY HUA          ON           6.0              1794
## 2           DAKSHESH DARURI          MI           6.0              1553
## 3              ADITYA BAJAJ          MI           6.0              1384
## 4       PATRICK H SCHILLING          MI           5.5              1716
## 5                HANSHI ZUO          MI           5.5              1655
## 6               HANSEN SONG          OH           5.0              1686
## 7         GARY DEE SWATHELL          MI           5.0              1649
## 8          EZEKIEL HOUGHTON          MI           5.0              1641
## 9               STEFANO LEE          ON           5.0              1411
## 10                ANVIT RAO          MI           5.0              1365
## 11       CAMERON WILLIAM MC          MI           4.5              1712
## 12           KENNETH J TACK          MI           4.5              1663
## 13        TORRANCE HENRY JR          MI           4.5              1666
## 14             BRADLEY SHAW          MI           4.5              1610
## 15   ZACHARY JAMES HOUGHTON          MI           4.5              1220
## 16             MIKE NIKITIN          MI           4.0              1604
## 17       RONALD GRZEGORCZYK          MI           4.0              1629
## 18            DAVID SUNDEEN          MI           4.0              1600
## 19             DIPANKAR ROY          MI           4.0              1564
## 20              JASON ZHENG          MI           4.0              1595
## 21            DINH DANG BUI          ON           4.0              1563
## 22         EUGENE L MCCLURE          MI           4.0              1555
## 23                 ALAN BUI          ON           4.0              1363
## 24        MICHAEL R ALDRICH          MI           4.0              1229
## 25         LOREN SCHWIEBERT          MI           3.5              1745
## 26                  MAX ZHU          ON           3.5              1579
## 27           GAURAV GIDWANI          MI           3.5              1552
## 28     SOFIA ADINA STANESCU          MI           3.5              1507
## 29         CHIEDOZIE OKORIE          MI           3.5              1602
## 30       GEORGE AVERY JONES          ON           3.5              1522
## 31             RISHI SHETTY          MI           3.5              1494
## 32    JOSHUA PHILIP MATHEWS          ON           3.5              1441
## 33                  JADE GE          MI           3.5              1449
## 34   MICHAEL JEFFERY THOMAS          MI           3.5              1399
## 35         JOSHUA DAVID LEE          MI           3.5              1438
## 36            SIDDHARTH JHA          MI           3.5              1355
## 37     AMIYATOSH PWNANANDAM          MI           3.5               980
## 38                BRIAN LIU          MI           3.0              1423
## 39            JOEL R HENDON          MI           3.0              1436
## 40             FOREST ZHANG          MI           3.0              1348
## 41      KYLE WILLIAM MURPHY          MI           3.0              1403
## 42                 JARED GE          MI           3.0              1332
## 43        ROBERT GLEN VASEY          MI           3.0              1283
## 44       JUSTIN D SCHILLING          MI           3.0              1199
## 45                DEREK YAN          MI           3.0              1242
## 46 JACOB ALEXANDER LAVALLEY          MI           3.0               377
## 47              ERIC WRIGHT          MI           2.5              1362
## 48             DANIEL KHAIN          MI           2.5              1382
## 49         MICHAEL J MARTIN          MI           2.5              1291
## 50               SHIVAM JHA          MI           2.5              1056
## 51           TEJAS AYYAGARI          MI           2.5              1011
## 52                ETHAN GUO          MI           2.5               935
## 53            JOSE C YBARRA          MI           2.0              1393
## 54              LARRY HODGE          MI           2.0              1270
## 55                ALEX KONG          MI           2.0              1186
## 56             MARISA RICCI          MI           2.0              1153
## 57               MICHAEL LU          MI           2.0              1092
## 58             VIRAJ MOHILE          MI           2.0               917
## 59                SEAN M MC          MI           2.0               853
## 60               JULIA SHEN          MI           1.5               967
## 61            JEZZEL FARKAS          ON           1.5               955
## 62            ASHWIN BALAJI          MI           1.0              1530
## 63     THOMAS JOSEPH HOSMER          MI           1.0              1175
## 64                   BEN LI          MI           1.0              1163
##    Opponents Rating
## 1          1605.286
## 2          1469.286
## 3          1563.571
## 4          1573.571
## 5          1500.857
## 6          1518.714
## 7          1372.143
## 8          1468.429
## 9          1523.143
## 10         1554.143
## 11         1467.571
## 12         1506.167
## 13         1497.857
## 14         1515.000
## 15         1483.857
## 16         1385.800
## 17         1498.571
## 18         1480.000
## 19         1426.286
## 20         1410.857
## 21         1470.429
## 22         1300.333
## 23         1213.857
## 24         1357.000
## 25         1363.286
## 26         1506.857
## 27         1221.667
## 28         1522.143
## 29         1313.500
## 30         1144.143
## 31         1259.857
## 32         1378.714
## 33         1276.857
## 34         1375.286
## 35         1149.714
## 36         1388.167
## 37         1384.800
## 38         1539.167
## 39         1429.571
## 40         1390.571
## 41         1248.500
## 42         1149.857
## 43         1106.571
## 44         1327.000
## 45         1152.000
## 46         1357.714
## 47         1392.000
## 48         1355.800
## 49         1285.800
## 50         1296.000
## 51         1356.143
## 52         1494.571
## 53         1345.333
## 54         1206.167
## 55         1406.000
## 56         1414.400
## 57         1363.000
## 58         1391.000
## 59         1319.000
## 60         1330.200
## 61         1327.286
## 62         1186.000
## 63         1350.200
## 64         1263.000

Export Data to CSV File

write.csv(csv, "DATA_607_Project1.csv", row.names=FALSE)