Loading Data
#Loading the data and libraries
library(stringr)
chess_fields <- c("PairNumber", "Name", "Totpoints", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "EOL")
tournament <- read.table("/Users/christinakasman/Desktop/tournamentinfo.txt", header = FALSE, skip = 4, sep = "|", fill = TRUE, stringsAsFactors = FALSE, col.names = chess_fields)
Cleaning Data
tournament <- subset(tournament, !Name == "", select = c(PairNumber:R7))
head(tournament)
## PairNumber Name Totpoints R1 R2 R3
## 1 1 GARY HUA 6.0 W 39 W 21 W 18
## 2 ON 15445895 / R: 1794 ->1817 N:2 W B W
## 4 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4
## 5 MI 14598900 / R: 1553 ->1663 N:2 B W B
## 7 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25
## 8 MI 14959604 / R: 1384 ->1640 N:2 W B W
## R4 R5 R6 R7
## 1 W 14 W 7 D 12 D 4
## 2 B W B W
## 4 W 17 W 16 W 20 W 7
## 5 W B W B
## 7 W 21 W 11 W 13 W 12
## 8 B W B W
#seperate into two data frames - odd and even rows
tournament[seq(1, nrow(tournament), 2), ]
## PairNumber Name Totpoints R1 R2
## 1 1 GARY HUA 6.0 W 39 W 21
## 4 2 DAKSHESH DARURI 6.0 W 63 W 58
## 7 3 ADITYA BAJAJ 6.0 L 8 W 61
## 10 4 PATRICK H SCHILLING 5.5 W 23 D 28
## 13 5 HANSHI ZUO 5.5 W 45 W 37
## 16 6 HANSEN SONG 5.0 W 34 D 29
## 19 7 GARY DEE SWATHELL 5.0 W 57 W 46
## 22 8 EZEKIEL HOUGHTON 5.0 W 3 W 32
## 25 9 STEFANO LEE 5.0 W 25 L 18
## 28 10 ANVIT RAO 5.0 D 16 L 19
## 31 11 CAMERON WILLIAM MC LEMAN 4.5 D 38 W 56
## 34 12 KENNETH J TACK 4.5 W 42 W 33
## 37 13 TORRANCE HENRY JR 4.5 W 36 W 27
## 40 14 BRADLEY SHAW 4.5 W 54 W 44
## 43 15 ZACHARY JAMES HOUGHTON 4.5 D 19 L 16
## 46 16 MIKE NIKITIN 4.0 D 10 W 15
## 49 17 RONALD GRZEGORCZYK 4.0 W 48 W 41
## 52 18 DAVID SUNDEEN 4.0 W 47 W 9
## 55 19 DIPANKAR ROY 4.0 D 15 W 10
## 58 20 JASON ZHENG 4.0 L 40 W 49
## 61 21 DINH DANG BUI 4.0 W 43 L 1
## 64 22 EUGENE L MCCLURE 4.0 W 64 D 52
## 67 23 ALAN BUI 4.0 L 4 W 43
## 70 24 MICHAEL R ALDRICH 4.0 L 28 L 47
## 73 25 LOREN SCHWIEBERT 3.5 L 9 W 53
## 76 26 MAX ZHU 3.5 W 49 W 40
## 79 27 GAURAV GIDWANI 3.5 W 51 L 13
## 82 28 SOFIA ADINA STANESCU-BELLU 3.5 W 24 D 4
## 85 29 CHIEDOZIE OKORIE 3.5 W 50 D 6
## 88 30 GEORGE AVERY JONES 3.5 L 52 D 64
## 91 31 RISHI SHETTY 3.5 L 58 D 55
## 94 32 JOSHUA PHILIP MATHEWS 3.5 W 61 L 8
## 97 33 JADE GE 3.5 W 60 L 12
## 100 34 MICHAEL JEFFERY THOMAS 3.5 L 6 W 60
## 103 35 JOSHUA DAVID LEE 3.5 L 46 L 38
## 106 36 SIDDHARTH JHA 3.5 L 13 W 57
## 109 37 AMIYATOSH PWNANANDAM 3.5 B L 5
## 112 38 BRIAN LIU 3.0 D 11 W 35
## 115 39 JOEL R HENDON 3.0 L 1 W 54
## 118 40 FOREST ZHANG 3.0 W 20 L 26
## 121 41 KYLE WILLIAM MURPHY 3.0 W 59 L 17
## 124 42 JARED GE 3.0 L 12 L 50
## 127 43 ROBERT GLEN VASEY 3.0 L 21 L 23
## 130 44 JUSTIN D SCHILLING 3.0 B L 14
## 133 45 DEREK YAN 3.0 L 5 L 51
## 136 46 JACOB ALEXANDER LAVALLEY 3.0 W 35 L 7
## 139 47 ERIC WRIGHT 2.5 L 18 W 24
## 142 48 DANIEL KHAIN 2.5 L 17 W 63
## 145 49 MICHAEL J MARTIN 2.5 L 26 L 20
## 148 50 SHIVAM JHA 2.5 L 29 W 42
## 151 51 TEJAS AYYAGARI 2.5 L 27 W 45
## 154 52 ETHAN GUO 2.5 W 30 D 22
## 157 53 JOSE C YBARRA 2.0 H L 25
## 160 54 LARRY HODGE 2.0 L 14 L 39
## 163 55 ALEX KONG 2.0 L 62 D 31
## 166 56 MARISA RICCI 2.0 H L 11
## 169 57 MICHAEL LU 2.0 L 7 L 36
## 172 58 VIRAJ MOHILE 2.0 W 31 L 2
## 175 59 SEAN M MC CORMICK 2.0 L 41 B
## 178 60 JULIA SHEN 1.5 L 33 L 34
## 181 61 JEZZEL FARKAS 1.5 L 32 L 3
## 184 62 ASHWIN BALAJI 1.0 W 55 U
## 187 63 THOMAS JOSEPH HOSMER 1.0 L 2 L 48
## 190 64 BEN LI 1.0 L 22 D 30
## R3 R4 R5 R6 R7
## 1 W 18 W 14 W 7 D 12 D 4
## 4 L 4 W 17 W 16 W 20 W 7
## 7 W 25 W 21 W 11 W 13 W 12
## 10 W 2 W 26 D 5 W 19 D 1
## 13 D 12 D 13 D 4 W 14 W 17
## 16 L 11 W 35 D 10 W 27 W 21
## 19 W 13 W 11 L 1 W 9 L 2
## 22 L 14 L 9 W 47 W 28 W 19
## 25 W 59 W 8 W 26 L 7 W 20
## 28 W 55 W 31 D 6 W 25 W 18
## 31 W 6 L 7 L 3 W 34 W 26
## 34 D 5 W 38 H D 1 L 3
## 37 L 7 D 5 W 33 L 3 W 32
## 40 W 8 L 1 D 27 L 5 W 31
## 43 W 30 L 22 W 54 W 33 W 38
## 46 H W 39 L 2 W 36 U
## 49 L 26 L 2 W 23 W 22 L 5
## 52 L 1 W 32 L 19 W 38 L 10
## 55 W 52 D 28 W 18 L 4 L 8
## 58 W 23 W 41 W 28 L 2 L 9
## 61 W 47 L 3 W 40 W 39 L 6
## 64 L 28 W 15 H L 17 W 40
## 67 L 20 W 58 L 17 W 37 W 46
## 70 W 43 L 25 W 60 W 44 W 39
## 73 L 3 W 24 D 34 L 10 W 47
## 76 W 17 L 4 L 9 D 32 L 11
## 79 W 46 W 37 D 14 L 6 U
## 82 W 22 D 19 L 20 L 8 D 36
## 85 L 38 L 34 W 52 W 48 U
## 88 L 15 W 55 L 31 W 61 W 50
## 91 W 64 L 10 W 30 W 50 L 14
## 94 W 44 L 18 W 51 D 26 L 13
## 97 W 50 D 36 L 13 L 15 W 51
## 100 L 37 W 29 D 25 L 11 W 52
## 103 W 56 L 6 W 57 D 52 W 48
## 106 W 51 D 33 H L 16 D 28
## 109 W 34 L 27 H L 23 W 61
## 112 W 29 L 12 H L 18 L 15
## 115 W 40 L 16 W 44 L 21 L 24
## 118 L 39 W 59 L 21 W 56 L 22
## 121 W 58 L 20 X U U
## 124 L 57 D 60 D 61 W 64 W 56
## 127 L 24 W 63 W 59 L 46 W 55
## 130 L 32 W 53 L 39 L 24 W 59
## 133 D 60 L 56 W 63 D 55 W 58
## 136 L 27 L 50 W 64 W 43 L 23
## 139 L 21 W 61 L 8 D 51 L 25
## 142 H D 52 H L 29 L 35
## 145 D 63 D 64 W 58 H U
## 148 L 33 W 46 H L 31 L 30
## 151 L 36 W 57 L 32 D 47 L 33
## 154 L 19 D 48 L 29 D 35 L 34
## 157 H L 44 U W 57 U
## 160 L 61 B L 15 L 59 W 64
## 163 L 10 L 30 B D 45 L 43
## 166 L 35 W 45 H L 40 L 42
## 169 W 42 L 51 L 35 L 53 B
## 172 L 41 L 23 L 49 B L 45
## 175 L 9 L 40 L 43 W 54 L 44
## 178 D 45 D 42 L 24 H U
## 181 W 54 L 47 D 42 L 30 L 37
## 184 U U U U U
## 187 D 49 L 43 L 45 H U
## 190 L 31 D 49 L 46 L 42 L 54
tournament[seq(2, nrow(tournament), 2), ]
## PairNumber Name Totpoints R1 R2
## 2 ON 15445895 / R: 1794 ->1817 N:2 W B
## 5 MI 14598900 / R: 1553 ->1663 N:2 B W
## 8 MI 14959604 / R: 1384 ->1640 N:2 W B
## 11 MI 12616049 / R: 1716 ->1744 N:2 W B
## 14 MI 14601533 / R: 1655 ->1690 N:2 B W
## 17 OH 15055204 / R: 1686 ->1687 N:3 W B
## 20 MI 11146376 / R: 1649 ->1673 N:3 W B
## 23 MI 15142253 / R: 1641P17->1657P24 N:3 B W
## 26 ON 14954524 / R: 1411 ->1564 N:2 W B
## 29 MI 14150362 / R: 1365 ->1544 N:3 W W
## 32 MI 12581589 / R: 1712 ->1696 N:3 B W
## 35 MI 12681257 / R: 1663 ->1670 N:3 W B
## 38 MI 15082995 / R: 1666 ->1662 N:3 B W
## 41 MI 10131499 / R: 1610 ->1618 N:3 W B
## 44 MI 15619130 / R: 1220P13->1416P20 N:3 B B
## 47 MI 10295068 / R: 1604 ->1613 N:3 B W
## 50 MI 10297702 / R: 1629 ->1610 N:3 W B
## 53 MI 11342094 / R: 1600 ->1600 N:3 B W
## 56 MI 14862333 / R: 1564 ->1570 N:3 W B
## 59 MI 14529060 / R: 1595 ->1569 N:4 W B
## 62 ON 15495066 / R: 1563P22->1562 N:3 B W
## 65 MI 12405534 / R: 1555 ->1529 N:4 W B
## 68 ON 15030142 / R: 1363 ->1371 B W
## 71 MI 13469010 / R: 1229 ->1300 N:4 B W
## 74 MI 12486656 / R: 1745 ->1681 N:4 B W
## 77 ON 15131520 / R: 1579 ->1564 N:4 B W
## 80 MI 14476567 / R: 1552 ->1539 N:4 W B
## 83 MI 14882954 / R: 1507 ->1513 N:3 W W
## 86 MI 15323285 / R: 1602P6 ->1508P12 N:4 B W
## 89 ON 12577178 / R: 1522 ->1444 W B
## 92 MI 15131618 / R: 1494 ->1444 B W
## 95 ON 14073750 / R: 1441 ->1433 N:4 W B
## 98 MI 14691842 / R: 1449 ->1421 B W
## 101 MI 15051807 / R: 1399 ->1400 B W
## 104 MI 14601397 / R: 1438 ->1392 W W
## 107 MI 14773163 / R: 1355 ->1367 N:4 W B
## 110 MI 15489571 / R: 980P12->1077P17 B
## 113 MI 15108523 / R: 1423 ->1439 N:4 W B
## 116 MI 12923035 / R: 1436P23->1413 N:4 B W
## 119 MI 14892710 / R: 1348 ->1346 B B
## 122 MI 15761443 / R: 1403P5 ->1341P9 B W
## 125 MI 14462326 / R: 1332 ->1256 B W
## 128 MI 14101068 / R: 1283 ->1244 W B
## 131 MI 15323504 / R: 1199 ->1199 W
## 134 MI 15372807 / R: 1242 ->1191 W B
## 137 MI 15490981 / R: 377P3 ->1076P10 B W
## 140 MI 12533115 / R: 1362 ->1341 W B
## 143 MI 14369165 / R: 1382 ->1335 B W
## 146 MI 12531685 / R: 1291P12->1259P17 W W
## 149 MI 14773178 / R: 1056 ->1111 W B
## 152 MI 15205474 / R: 1011 ->1097 B W
## 155 MI 14918803 / R: 935 ->1092 N:4 B W
## 158 MI 12578849 / R: 1393 ->1359 B
## 161 MI 12836773 / R: 1270 ->1200 B B
## 164 MI 15412571 / R: 1186 ->1163 W B
## 167 MI 14679887 / R: 1153 ->1140 B
## 170 MI 15113330 / R: 1092 ->1079 B W
## 173 MI 14700365 / R: 917 -> 941 W B
## 176 MI 12841036 / R: 853 -> 878 W
## 179 MI 14579262 / R: 967 -> 984 W B
## 182 ON 15771592 / R: 955P11-> 979P18 B W
## 185 MI 15219542 / R: 1530 ->1535 B
## 188 MI 15057092 / R: 1175 ->1125 W B
## 191 MI 15006561 / R: 1163 ->1112 B W
## R3 R4 R5 R6 R7
## 2 W B W B W
## 5 B W B W B
## 8 W B W B W
## 11 W B W B B
## 14 B W B W B
## 17 W B B W B
## 20 W B B W W
## 23 B W B W W
## 26 W B W B B
## 29 B B W B W
## 32 B W B W B
## 35 W B W B
## 38 B B W W B
## 41 W W B B W
## 44 W W B B W
## 47 B W B
## 50 W B W B W
## 53 B W B W B
## 56 W B W W B
## 59 W B W B W
## 62 B W W B W
## 65 W B W B
## 68 B W B W B
## 71 B B W W B
## 74 B W B W B
## 77 B W B W W
## 80 W B W B
## 83 B W B B W
## 86 B W W B
## 89 B W W B B
## 92 B W B W B
## 95 W B W B W
## 98 B W B W B
## 101 B B W B W
## 104 B W B B W
## 107 W B W B
## 110 W W B W
## 113 W W B B
## 116 B W B W W
## 119 W W B W W
## 122 B W
## 125 B B W W B
## 128 W W B B W
## 131 B B W B W
## 134 W B W B W
## 137 B W B W W
## 140 W B W B W
## 143 B W B
## 146 B W B
## 149 W B B W
## 152 B W B W W
## 155 B W B W B
## 158 W W
## 161 W W B W
## 164 W B W B
## 167 W W B W
## 170 W B W B
## 173 W B W B
## 176 B B W W B
## 179 B W B
## 182 B W B W B
## 185
## 188 W B B
## 191 W B W B B
#combine two data frames using cbind
tourn_new = cbind(tournament[seq(1, nrow(tournament), 2), ], tournament[seq(2, nrow(tournament), 2), ])
#rename the columns
library(splitstackshape)
## Loading required package: data.table
colnames(tourn_new) = c ("ID", "Name", "Totpoints", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "State", "PrePost")
head(tourn_new)
## ID Name Totpoints R1 R2 R3
## 1 1 GARY HUA 6.0 W 39 W 21 W 18
## 4 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4
## 7 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25
## 10 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2
## 13 5 HANSHI ZUO 5.5 W 45 W 37 D 12
## 16 6 HANSEN SONG 5.0 W 34 D 29 L 11
## R4 R5 R6 R7 State PrePost NA
## 1 W 14 W 7 D 12 D 4 ON 15445895 / R: 1794 ->1817 N:2
## 4 W 17 W 16 W 20 W 7 MI 14598900 / R: 1553 ->1663 N:2
## 7 W 21 W 11 W 13 W 12 MI 14959604 / R: 1384 ->1640 N:2
## 10 W 26 D 5 W 19 D 1 MI 12616049 / R: 1716 ->1744 N:2
## 13 D 13 D 4 W 14 W 17 MI 14601533 / R: 1655 ->1690 N:2
## 16 W 35 D 10 W 27 W 21 OH 15055204 / R: 1686 ->1687 N:3
## NA NA NA NA NA NA NA
## 1 W B W B W B W
## 4 B W B W B W B
## 7 W B W B W B W
## 10 W B W B W B B
## 13 B W B W B W B
## 16 W B W B B W B
playerID <- unlist(str_extract_all(tourn_new, "\\s+[[:digit:]]{1,2}\\s+"))
#Extracting the Pre Rating
pre_rating <- unlist(str_extract_all(tourn_new, "[:]\\s+[[:digit:]]+"))
pre_rating <- unlist(str_extract_all(pre_rating, "[[:digit:]]+"))
#Extracting the Post Rating
post_rating <- unlist(str_extract_all(tourn_new, "[>]\\s*[[:digit:]]+"))
post_rating <- unlist(str_extract_all(post_rating, "[[:digit:]]+"))
#extract all names
PlayerName <- unlist(str_extract_all(tourn_new, "[[:alpha:]]+\\s+[[:alpha:]]+\\s[[:alpha:]]*"))
#extract all states
states_extract <- unlist(str_extract_all(tourn_new, "\\W\\s+[[:alpha:]]{2}\\W"))
state <- unlist(str_extract_all(states_extract, "[[:alpha:]]{2}"))
#extract total points
total_points1<- unlist(str_extract_all(tourn_new, "[[:digit:]].{1,2}[[:digit:]]"))
total_points<- unlist(str_extract_all(total_points1, ".[.]."))
id<-seq(1,64,by=1)
pre_rating<-str_trim(pre_rating,"both")
post_rating<-str_trim(post_rating,"both")
playerranks<-cbind(playerID, PlayerName, state, total_points, pre_rating,post_rating)
playerranks<-as.data.frame(playerranks)
head(playerranks)
## playerID PlayerName state total_points pre_rating post_rating
## 1 1 GARY HUA ON 6.0 1794 1817
## 2 2 DAKSHESH DARURI MI 6.0 1553 1663
## 3 3 ADITYA BAJAJ MI 6.0 1384 1640
## 4 4 PATRICK H SCHILLING MI 5.5 1716 1744
## 5 5 HANSHI ZUO MI 5.5 1655 1690
## 6 6 HANSEN SONG OH 5.0 1686 1687
#extract opponents
opponent1 <- data.frame(as.numeric(str_extract_all(tourn_new$`R1`,"[[:digit:]]{1,}")))
opponent2 <- data.frame(as.numeric(str_extract_all(tourn_new$`R2`,"[[:digit:]]{1,}")))
opponent3 <- data.frame(as.numeric(str_extract_all(tourn_new$`R3`,"[[:digit:]]{1,}")))
opponent4 <- data.frame(as.numeric(str_extract_all(tourn_new$`R4`,"[[:digit:]]{1,}")))
opponent5 <- data.frame(as.numeric(str_extract_all(tourn_new$`R5`,"[[:digit:]]{1,}")))
opponent6 <- data.frame(as.numeric(str_extract_all(tourn_new$`R6`,"[[:digit:]]{1,}")))
opponent7 <- data.frame(as.numeric(str_extract_all(tourn_new$`R7`,"[[:digit:]]{1,}")))
opponents <- cbind(tourn_new$'ID', tourn_new$'Name', opponent1, opponent2, opponent3, opponent4, opponent5, opponent6, opponent7)
names(opponents) <- c("PairNumber","Name","Opp 1","Opp 2","Opp 3","Opp 4","Opp 5","Opp 6","Opp 7")
for(i in 1:dim(opponents)[1]){
opponents$NGames[i] <- 7 - as.numeric(sum(is.na(opponents[i,])))
}
head(opponents)
## PairNumber Name Opp 1 Opp 2 Opp 3 Opp 4
## 1 1 GARY HUA 39 21 18 14
## 2 2 DAKSHESH DARURI 63 58 4 17
## 3 3 ADITYA BAJAJ 8 61 25 21
## 4 4 PATRICK H SCHILLING 23 28 2 26
## 5 5 HANSHI ZUO 45 37 12 13
## 6 6 HANSEN SONG 34 29 11 35
## Opp 5 Opp 6 Opp 7 NGames
## 1 7 12 4 7
## 2 16 20 7 7
## 3 11 13 12 7
## 4 5 19 1 7
## 5 4 14 17 7
## 6 10 27 21 7
y <- cbind(playerranks, opponents)
y[is.na(y)] <- " "
head(y)
## playerID PlayerName state total_points pre_rating post_rating
## 1 1 GARY HUA ON 6.0 1794 1817
## 2 2 DAKSHESH DARURI MI 6.0 1553 1663
## 3 3 ADITYA BAJAJ MI 6.0 1384 1640
## 4 4 PATRICK H SCHILLING MI 5.5 1716 1744
## 5 5 HANSHI ZUO MI 5.5 1655 1690
## 6 6 HANSEN SONG OH 5.0 1686 1687
## PairNumber Name Opp 1 Opp 2 Opp 3 Opp 4
## 1 1 GARY HUA 39 21 18 14
## 2 2 DAKSHESH DARURI 63 58 4 17
## 3 3 ADITYA BAJAJ 8 61 25 21
## 4 4 PATRICK H SCHILLING 23 28 2 26
## 5 5 HANSHI ZUO 45 37 12 13
## 6 6 HANSEN SONG 34 29 11 35
## Opp 5 Opp 6 Opp 7 NGames
## 1 7 12 4 7
## 2 16 20 7 7
## 3 11 13 12 7
## 4 5 19 1 7
## 5 4 14 17 7
## 6 10 27 21 7
#Loop to find average value
Average_Opponent_Pre_Rating <- c()
for (i in y$PairNumber){
a <- y[y$'opp 1'==i, "pre_rating"]
a <- a[!is.na(a)]
b <- y[y$'opp 2'==i, "pre_rating"]
b <- b[!is.na(b)]
c <- y[y$'opp 3'==i, "pre_rating"]
d <- y[y$'opp 4'==i, "pre_rating"]
d <- d[!is.na(d)]
e <- y[y$'opp 5'==i, "pre_rating"]
e <- e[!is.na(e)]
f <- y[y$'opp 6'==i, "pre_rating"]
f <- f[!is.na(f)]
g <- y[y$'opp 7'==i, "pre_rating"]
g <- g[!is.na(g)]
u <- mean(c(a, b, c, d, e, f, g))
Average_Opponent_Pre_Rating <- c(Average_Opponent_Pre_Rating, u)
}
final <- cbind(playerranks, Average_Opponent_Pre_Rating)
head(final)
## playerID PlayerName state total_points pre_rating post_rating
## 1 1 GARY HUA ON 6.0 1794 1817
## 2 2 DAKSHESH DARURI MI 6.0 1553 1663
## 3 3 ADITYA BAJAJ MI 6.0 1384 1640
## 4 4 PATRICK H SCHILLING MI 5.5 1716 1744
## 5 5 HANSHI ZUO MI 5.5 1655 1690
## 6 6 HANSEN SONG OH 5.0 1686 1687
## Average_Opponent_Pre_Rating
## 1 NaN
## 2 NaN
## 3 NaN
## 4 NaN
## 5 NaN
## 6 NaN
write.csv(final, "Chess Tournament.csv")