This project makes use of the “stringr” package.
eloreadin1 <- read.csv("https://raw.githubusercontent.com/tagensingh/SPS-DATA607-PROJECT-1/main/tournamentinfo.txt",skip = 3, header = F)
#eloreadin1
##Step 1 Remove the "dashed "-" lines from the data
eloreadin2 <- str_split(eloreadin1[,], "-", simplify=TRUE)
#eloreadin2
## Step 2 Get the Player Names. Apply regex where there is at least a first and last name
pnames <- unlist(str_extract_all(eloreadin2[,], "\\w+[[:space:]]\\w+([[:space:]]\\w+)*", simplify = TRUE))
pnames <- pnames[!pnames[,] == "",]
#pnames
## Get the Player States. Use regex where there are two capital letters followed by a space and "|".
# Remove blank rows from the data
pstates <- unlist(str_extract_all(eloreadin2[,],"[A-Z][A-Z][[:space:]][\\|]"))
pstates <- str_split(pstates, "[[:space:]][\\|]", simplify=TRUE)
pstates <- pstates[, -2]
#pstates
## Get the total Number of Points. Use regex that gets decimal numbers. Remove blank rows from the data
totalPoints <- unlist(str_extract_all(eloreadin2[,], "(\\d+)[.](\\d+)", simplify=TRUE))
totalPoints <- totalPoints[!totalPoints[,] == "",]
#totalPoints
## Get the p-ratings. Use regex that gets numbers after R: and before any number of space. Remove blank rows from the data
pratings <- unlist(str_extract_all(eloreadin2[,], "[R:]([[:space:]]+)([[:alnum:]]+)([[:space:]]*)", simplify=TRUE))
pratings <- unlist(str_extract_all(pratings, "\\d+[[:alnum:]]+", simplify=TRUE))
pratings <- unlist(str_extract_all(pratings, "\\d\\d\\d+", simplify=TRUE))
pratings <- pratings[!pratings[,] == "",]
pratings <- as.numeric(pratings)
#pratings
## Get the opponent strings. Apply regex where there is a | followed by a letter, some space, a number, a |
OpponentData <- unlist(str_extract_all(eloreadin2[,], "([\\|][A-Z]([[:space:]]+)\\d*[\\|])([A-Z]([[:space:]]+)\\d*[\\|])*", simplify=TRUE))
Opponents <- matrix(ncol=7)
## Get the individual Opponent Indexes into a matrix of 7 columns. Remove any blank rows from the data
Opponents <- unlist(str_extract_all(OpponentData[,], "\\d+", simplify=TRUE))
Opponents <- Opponents[rowSums(Opponents=="")!=ncol(Opponents), ]
#Opponents
#The following lines of code were from a web source.
##Instantiate rating avgs
ratingavgs = NULL
##Loop through each row of Opponent Index. Match each Opponent Index with its corresponding p-rating. Get the average Opponent rating for each row
##**This code is modified from another in-house project**
for(row in 1:nrow(Opponents)){
numberOfOpponents = 0
sum = 0
for(col in 1:ncol(Opponents)){
if(Opponents[row, col] != ""){
index <- Opponents[row, col]
index <- strtoi(index, base=0L)
sum = sum + strtoi(pratings[index])
numberOfOpponents = numberOfOpponents + 1
}
}
avg = sum/numberOfOpponents
ratingavgs = rbind(ratingavgs, data.frame(avg))
}
## Creating dataframe for TournamentResults
tournamentresults1 <- data.frame(pnames, pstates, totalPoints, pratings, ratingavgs)
colnames(tournamentresults1) <- c("Player Name","State", "Points", "P-Rating", "Opponent Average P-Rating")
tournamentresults1
## Player Name State Points P-Rating Opponent Average P-Rating
## 1 GARY HUA ON 6.0 1794 1605.286
## 2 DAKSHESH DARURI MI 6.0 1553 1469.286
## 3 ADITYA BAJAJ MI 6.0 1384 1563.571
## 4 PATRICK H SCHILLING MI 5.5 1716 1573.571
## 5 HANSHI ZUO MI 5.5 1655 1500.857
## 6 HANSEN SONG OH 5.0 1686 1518.714
## 7 GARY DEE SWATHELL MI 5.0 1649 1372.143
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468.429
## 9 STEFANO LEE ON 5.0 1411 1523.143
## 10 ANVIT RAO MI 5.0 1365 1554.143
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1467.571
## 12 KENNETH J TACK MI 4.5 1663 1506.167
## 13 TORRANCE HENRY JR MI 4.5 1666 1497.857
## 14 BRADLEY SHAW MI 4.5 1610 1515.000
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1483.857
## 16 MIKE NIKITIN MI 4.0 1604 1385.800
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1498.571
## 18 DAVID SUNDEEN MI 4.0 1600 1480.000
## 19 DIPANKAR ROY MI 4.0 1564 1426.286
## 20 JASON ZHENG MI 4.0 1595 1410.857
## 21 DINH DANG BUI ON 4.0 1563 1470.429
## 22 EUGENE L MCCLURE MI 4.0 1555 1300.333
## 23 ALAN BUI ON 4.0 1363 1213.857
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357.000
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363.286
## 26 MAX ZHU ON 3.5 1579 1506.857
## 27 GAURAV GIDWANI MI 3.5 1552 1221.667
## 28 SOFIA ADINA STANESCU MI 3.5 1507 1313.500
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1144.143
## 30 GEORGE AVERY JONES ON 3.5 1522 1259.857
## 31 RISHI SHETTY MI 3.5 1494 1378.714
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1276.857
## 33 JADE GE MI 3.5 1449 1375.286
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1149.714
## 35 JOSHUA DAVID LEE MI 3.5 1438 1388.167
## 36 SIDDHARTH JHA MI 3.5 1355 1384.800
## 37 AMIYATOSH PWNANANDAM MI 3.0 980 1539.167
## 38 BRIAN LIU MI 3.0 1423 1429.571
## 39 JOEL R HENDON MI 3.0 1436 1390.571
## 40 FOREST ZHANG MI 3.0 1348 1248.500
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1149.857
## 42 JARED GE MI 3.0 1332 1106.571
## 43 ROBERT GLEN VASEY MI 3.0 1283 1327.000
## 44 JUSTIN D SCHILLING MI 3.0 1199 1152.000
## 45 DEREK YAN MI 3.0 1242 1357.714
## 46 JACOB ALEXANDER LAVALLEY MI 2.5 377 1392.000
## 47 ERIC WRIGHT MI 2.5 1362 1355.800
## 48 DANIEL KHAIN MI 2.5 1382 1285.800
## 49 MICHAEL J MARTIN MI 2.5 1291 1296.000
## 50 SHIVAM JHA MI 2.5 1056 1356.143
## 51 TEJAS AYYAGARI MI 2.5 1011 1494.571
## 52 ETHAN GUO MI 2.0 935 1345.333
## 53 JOSE C YBARRA MI 2.0 1393 1206.167
## 54 LARRY HODGE MI 2.0 1270 1406.000
## 55 ALEX KONG MI 2.0 1186 1414.400
## 56 MARISA RICCI MI 2.0 1153 1363.000
## 57 MICHAEL LU MI 2.0 1092 1391.000
## 58 VIRAJ MOHILE MI 2.0 917 1319.000
## 59 SEAN M MC CORMICK MI 1.5 853 1330.200
## 60 JULIA SHEN MI 1.5 967 1327.286
## 61 JEZZEL FARKAS ON 1.0 955 1186.000
## 62 ASHWIN BALAJI MI 1.0 1530 1350.200
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1263.000
## 64 BEN LI MI 3.5 1163 1522.143
## ** This code does not work**
#write.csv(tournamentresults1,file = "https://raw.githubusercontent.com/tagensingh/SPS-DATA607-PROJECT-1/main/tournamentresults1.csv")
write.csv(tournamentresults1,file = "tournamentresultsfinal.csv")
# To verify that the csv file is written accurately
tourney3 <- read.csv("tournamentresultsfinal.csv", header=TRUE, sep = ",")
names(tourney3)
## [1] "X" "Player.Name"
## [3] "State" "Points"
## [5] "P.Rating" "Opponent.Average.P.Rating"
tourney4 <- data.frame(tourney3)
class(tourney4)
## [1] "data.frame"