This project makes use of the “stringr” package.

eloreadin1 <- read.csv("https://raw.githubusercontent.com/tagensingh/SPS-DATA607-PROJECT-1/main/tournamentinfo.txt",skip = 3, header = F)

#eloreadin1

##Step 1 Remove the "dashed "-" lines from the data
eloreadin2 <- str_split(eloreadin1[,], "-", simplify=TRUE)

#eloreadin2
## Step 2 Get the Player Names. Apply regex where there is at least a first and last name
pnames <- unlist(str_extract_all(eloreadin2[,], "\\w+[[:space:]]\\w+([[:space:]]\\w+)*", simplify = TRUE))
pnames <- pnames[!pnames[,] == "",]

#pnames
## Get the Player States. Use regex where there are two capital letters followed by a space and "|".
# Remove blank rows from the data
pstates <- unlist(str_extract_all(eloreadin2[,],"[A-Z][A-Z][[:space:]][\\|]"))
pstates <- str_split(pstates, "[[:space:]][\\|]", simplify=TRUE)
pstates <- pstates[, -2]

#pstates
## Get the total Number of Points. Use regex that gets decimal numbers. Remove blank rows from the data
totalPoints <- unlist(str_extract_all(eloreadin2[,], "(\\d+)[.](\\d+)", simplify=TRUE))
totalPoints <- totalPoints[!totalPoints[,] == "",]

#totalPoints
## Get the p-ratings. Use regex that gets numbers after R: and before any number of space. Remove blank rows from the data
pratings <- unlist(str_extract_all(eloreadin2[,], "[R:]([[:space:]]+)([[:alnum:]]+)([[:space:]]*)", simplify=TRUE))
pratings <- unlist(str_extract_all(pratings, "\\d+[[:alnum:]]+", simplify=TRUE))
pratings <- unlist(str_extract_all(pratings, "\\d\\d\\d+", simplify=TRUE))
pratings <- pratings[!pratings[,] == "",]
pratings <- as.numeric(pratings)

#pratings
## Get the opponent strings. Apply regex where there is a | followed by a letter, some space, a number, a |
OpponentData <- unlist(str_extract_all(eloreadin2[,], "([\\|][A-Z]([[:space:]]+)\\d*[\\|])([A-Z]([[:space:]]+)\\d*[\\|])*", simplify=TRUE))
Opponents <- matrix(ncol=7)

## Get the individual Opponent Indexes into a matrix of 7 columns. Remove any blank rows from the data
Opponents <- unlist(str_extract_all(OpponentData[,], "\\d+", simplify=TRUE))
Opponents <- Opponents[rowSums(Opponents=="")!=ncol(Opponents), ]

#Opponents
#The following lines of code were from a web source.

##Instantiate rating avgs 
ratingavgs = NULL


##Loop through each row of Opponent Index. Match each Opponent Index with its corresponding p-rating. Get the average Opponent rating for each row

##**This code is modified from another in-house project**

for(row in 1:nrow(Opponents)){
  numberOfOpponents = 0
  sum = 0
  
  for(col in 1:ncol(Opponents)){
    
    if(Opponents[row, col] != ""){ 
      index <- Opponents[row, col] 
      index <- strtoi(index, base=0L) 
      sum = sum + strtoi(pratings[index]) 
      numberOfOpponents = numberOfOpponents + 1 
    }
  }
  
  avg = sum/numberOfOpponents
  ratingavgs = rbind(ratingavgs, data.frame(avg))
}

## Creating dataframe for TournamentResults 
tournamentresults1 <- data.frame(pnames, pstates, totalPoints, pratings, ratingavgs)
colnames(tournamentresults1) <- c("Player Name","State", "Points", "P-Rating", "Opponent Average P-Rating")


tournamentresults1
##                 Player Name State Points P-Rating Opponent Average P-Rating
## 1                  GARY HUA    ON    6.0     1794                  1605.286
## 2           DAKSHESH DARURI    MI    6.0     1553                  1469.286
## 3              ADITYA BAJAJ    MI    6.0     1384                  1563.571
## 4       PATRICK H SCHILLING    MI    5.5     1716                  1573.571
## 5                HANSHI ZUO    MI    5.5     1655                  1500.857
## 6               HANSEN SONG    OH    5.0     1686                  1518.714
## 7         GARY DEE SWATHELL    MI    5.0     1649                  1372.143
## 8          EZEKIEL HOUGHTON    MI    5.0     1641                  1468.429
## 9               STEFANO LEE    ON    5.0     1411                  1523.143
## 10                ANVIT RAO    MI    5.0     1365                  1554.143
## 11 CAMERON WILLIAM MC LEMAN    MI    4.5     1712                  1467.571
## 12           KENNETH J TACK    MI    4.5     1663                  1506.167
## 13        TORRANCE HENRY JR    MI    4.5     1666                  1497.857
## 14             BRADLEY SHAW    MI    4.5     1610                  1515.000
## 15   ZACHARY JAMES HOUGHTON    MI    4.5     1220                  1483.857
## 16             MIKE NIKITIN    MI    4.0     1604                  1385.800
## 17       RONALD GRZEGORCZYK    MI    4.0     1629                  1498.571
## 18            DAVID SUNDEEN    MI    4.0     1600                  1480.000
## 19             DIPANKAR ROY    MI    4.0     1564                  1426.286
## 20              JASON ZHENG    MI    4.0     1595                  1410.857
## 21            DINH DANG BUI    ON    4.0     1563                  1470.429
## 22         EUGENE L MCCLURE    MI    4.0     1555                  1300.333
## 23                 ALAN BUI    ON    4.0     1363                  1213.857
## 24        MICHAEL R ALDRICH    MI    4.0     1229                  1357.000
## 25         LOREN SCHWIEBERT    MI    3.5     1745                  1363.286
## 26                  MAX ZHU    ON    3.5     1579                  1506.857
## 27           GAURAV GIDWANI    MI    3.5     1552                  1221.667
## 28     SOFIA ADINA STANESCU    MI    3.5     1507                  1313.500
## 29         CHIEDOZIE OKORIE    MI    3.5     1602                  1144.143
## 30       GEORGE AVERY JONES    ON    3.5     1522                  1259.857
## 31             RISHI SHETTY    MI    3.5     1494                  1378.714
## 32    JOSHUA PHILIP MATHEWS    ON    3.5     1441                  1276.857
## 33                  JADE GE    MI    3.5     1449                  1375.286
## 34   MICHAEL JEFFERY THOMAS    MI    3.5     1399                  1149.714
## 35         JOSHUA DAVID LEE    MI    3.5     1438                  1388.167
## 36            SIDDHARTH JHA    MI    3.5     1355                  1384.800
## 37     AMIYATOSH PWNANANDAM    MI    3.0      980                  1539.167
## 38                BRIAN LIU    MI    3.0     1423                  1429.571
## 39            JOEL R HENDON    MI    3.0     1436                  1390.571
## 40             FOREST ZHANG    MI    3.0     1348                  1248.500
## 41      KYLE WILLIAM MURPHY    MI    3.0     1403                  1149.857
## 42                 JARED GE    MI    3.0     1332                  1106.571
## 43        ROBERT GLEN VASEY    MI    3.0     1283                  1327.000
## 44       JUSTIN D SCHILLING    MI    3.0     1199                  1152.000
## 45                DEREK YAN    MI    3.0     1242                  1357.714
## 46 JACOB ALEXANDER LAVALLEY    MI    2.5      377                  1392.000
## 47              ERIC WRIGHT    MI    2.5     1362                  1355.800
## 48             DANIEL KHAIN    MI    2.5     1382                  1285.800
## 49         MICHAEL J MARTIN    MI    2.5     1291                  1296.000
## 50               SHIVAM JHA    MI    2.5     1056                  1356.143
## 51           TEJAS AYYAGARI    MI    2.5     1011                  1494.571
## 52                ETHAN GUO    MI    2.0      935                  1345.333
## 53            JOSE C YBARRA    MI    2.0     1393                  1206.167
## 54              LARRY HODGE    MI    2.0     1270                  1406.000
## 55                ALEX KONG    MI    2.0     1186                  1414.400
## 56             MARISA RICCI    MI    2.0     1153                  1363.000
## 57               MICHAEL LU    MI    2.0     1092                  1391.000
## 58             VIRAJ MOHILE    MI    2.0      917                  1319.000
## 59        SEAN M MC CORMICK    MI    1.5      853                  1330.200
## 60               JULIA SHEN    MI    1.5      967                  1327.286
## 61            JEZZEL FARKAS    ON    1.0      955                  1186.000
## 62            ASHWIN BALAJI    MI    1.0     1530                  1350.200
## 63     THOMAS JOSEPH HOSMER    MI    1.0     1175                  1263.000
## 64                   BEN LI    MI    3.5     1163                  1522.143
## ** This code does not work**

#write.csv(tournamentresults1,file = "https://raw.githubusercontent.com/tagensingh/SPS-DATA607-PROJECT-1/main/tournamentresults1.csv")

write.csv(tournamentresults1,file = "tournamentresultsfinal.csv")
# To verify that the csv file is written accurately

tourney3 <- read.csv("tournamentresultsfinal.csv", header=TRUE, sep = ",") 

names(tourney3)
## [1] "X"                         "Player.Name"              
## [3] "State"                     "Points"                   
## [5] "P.Rating"                  "Opponent.Average.P.Rating"
tourney4 <- data.frame(tourney3)

class(tourney4)
## [1] "data.frame"