1. Load Packages (these may not all be used)
library(stringr)
library(XML)
library(RCurl)
## Loading required package: bitops
library(bitops)
library(tau)
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

referenced for initial load . . . http://stackoverflow.com/questions/21114598/importing-a-text-file-into-r

  1. Load Data Frame from website
theUrl <- "/Users/scottkarr/IS607Spring2016/project1/tournamentinfo.txt"
l <- readLines(theUrl)
## Warning in readLines(theUrl): incomplete final line found on '/Users/
## scottkarr/IS607Spring2016/project1/tournamentinfo.txt'

remove unnecessary lines

l <- grep("^\\|?-+\\|?$|^$", l, value = TRUE, invert = TRUE)

split

lsplit <- strsplit(l, "\\s*\\|")

set names

dat <- setNames(data.frame(do.call(rbind, lsplit[-1])[ ,-1]), paste(lsplit[[1]],lsplit[[2]])[-1])
## Warning in (function (..., deparse.level = 1) : number of columns of result
## is not a multiple of vector length (arg 2)

add back last column name

colnames(dat)[10] <- "Pair Num"

1st 2 rows were combined for header so remove row 1 which is still left

dat <- dat[-c(1), ]

convert list to data frame

df1 <- data.frame(dat)

subset child and parent recs

df1[,"IsChildRec"] <- str_detect(df1[,1],"[[:digit:]]{1,}")
df1.Csub <- subset(df1,df1$IsChildRec == TRUE )
df1.Psub <- subset(df1,df1$IsChildRec == FALSE )
colnames(df1.Psub)[1]  <- "Name" 
colnames(df1.Csub)[1]  <- "Name" 

build output dataframe

df1.Output <- data.frame(df1.Psub$Pair.Num)
colnames(df1.Output)[1]  <- "ID"
df1.Output["Name"] <- df1.Psub$Name
df1.Output["State"] <- df1.Csub$Pair.Num
df1.Output["Ttl-Pts"] <- df1.Psub$Total..Pts
df1.Output["Pre-Rating"] <- str_trim(str_extract(str_trim(df1.Csub$Name), "[:blank:][:digit:]{1,4}"))
df1.Output["Opp1"] <- as.numeric(str_extract(df1.Psub$Round...1, "[:digit:]{1,}$"))
df1.Output["Opp2"] <- as.numeric(str_extract(df1.Psub$Round...2, "[:digit:]{1,}$"))
df1.Output["Opp3"] <- as.numeric(str_extract(df1.Psub$Round...3, "[:digit:]{1,}$"))
df1.Output["Opp4"] <- as.numeric(str_extract(df1.Psub$Round...4, "[:digit:]{1,}$"))
df1.Output["Opp5"] <- as.numeric(str_extract(df1.Psub$Round...5, "[:digit:]{1,}$"))
df1.Output["Opp6"] <- as.numeric(str_extract(df1.Psub$Round...6, "[:digit:]{1,}$"))
df1.Output["Opp7"] <- as.numeric(str_extract(df1.Psub$Round...7, "[:digit:]{1,}$"))

Last derived column uses lapply to scan each row and apply dplyr to filter indexed data point (scores). While lapply is perhaps more direct then nested loops, I’m uncomfortable with it. Too much indirection. Should be able to collapse the hardcoded column indexes as well, but for another time.

df1.Output["AvgOppScore"] <- 
    unlist(
        lapply(
          1:nrow(df1.Output), 
          function(i) {
              mean(
                c(
                      as.numeric(dplyr::filter(df1.Output, as.numeric(ID) == df1.Output[i,6]+1)[5]),
                      as.numeric(dplyr::filter(df1.Output, as.numeric(ID) == df1.Output[i,7]+1)[5]),
                      as.numeric(dplyr::filter(df1.Output, as.numeric(ID) == df1.Output[i,8]+1)[5]),
                      as.numeric(dplyr::filter(df1.Output, as.numeric(ID) == df1.Output[i,9]+1)[5]),
                      as.numeric(dplyr::filter(df1.Output, as.numeric(ID) == df1.Output[i,10]+1)[5]),
                      as.numeric(dplyr::filter(df1.Output, as.numeric(ID) == df1.Output[i,11]+1)[5]),
                      as.numeric(dplyr::filter(df1.Output, as.numeric(ID) == df1.Output[i,12]+1)[5])                                              
                ),
                na.rm = TRUE
              )                  
            }
          )
        )  

remove opponent id references

df1.Output <- df1.Output[-c(6:12)]
df1.Output
##       ID                        Name State Ttl-Pts Pre-Rating AvgOppScore
## 1      1                    GARY HUA    ON     6.0       1794    1605.286
## 2      2             DAKSHESH DARURI    MI     6.0       1553    1469.286
## 3      3                ADITYA BAJAJ    MI     6.0       1384    1563.571
## 4      4         PATRICK H SCHILLING    MI     5.5       1716    1573.571
## 5      5                  HANSHI ZUO    MI     5.5       1655    1500.857
## 6      6                 HANSEN SONG    OH     5.0       1686    1518.714
## 7      7           GARY DEE SWATHELL    MI     5.0       1649    1372.143
## 8      8            EZEKIEL HOUGHTON    MI     5.0       1641    1468.429
## 9      9                 STEFANO LEE    ON     5.0       1411    1523.143
## 10    10                   ANVIT RAO    MI     5.0       1365    1554.143
## 11    11    CAMERON WILLIAM MC LEMAN    MI     4.5       1712    1467.571
## 12    12              KENNETH J TACK    MI     4.5       1663    1506.167
## 13    13           TORRANCE HENRY JR    MI     4.5       1666    1497.857
## 14    14                BRADLEY SHAW    MI     4.5       1610    1515.000
## 15    15      ZACHARY JAMES HOUGHTON    MI     4.5       1220    1483.857
## 16    16                MIKE NIKITIN    MI     4.0       1604    1385.800
## 17    17          RONALD GRZEGORCZYK    MI     4.0       1629    1498.571
## 18    18               DAVID SUNDEEN    MI     4.0       1600    1480.000
## 19    19                DIPANKAR ROY    MI     4.0       1564    1426.286
## 20    20                 JASON ZHENG    MI     4.0       1595    1410.857
## 21    21               DINH DANG BUI    ON     4.0       1563    1470.429
## 22    22            EUGENE L MCCLURE    MI     4.0       1555    1300.333
## 23    23                    ALAN BUI    ON     4.0       1363    1213.857
## 24    24           MICHAEL R ALDRICH    MI     4.0       1229    1357.000
## 25    25            LOREN SCHWIEBERT    MI     3.5       1745    1363.286
## 26    26                     MAX ZHU    ON     3.5       1579    1506.857
## 27    27              GAURAV GIDWANI    MI     3.5       1552    1221.667
## 28    28  SOFIA ADINA STANESCU-BELLU    MI     3.5       1507    1522.143
## 29    29            CHIEDOZIE OKORIE    MI     3.5       1602    1313.500
## 30    30          GEORGE AVERY JONES    ON     3.5       1522    1144.143
## 31    31                RISHI SHETTY    MI     3.5       1494    1259.857
## 32    32       JOSHUA PHILIP MATHEWS    ON     3.5       1441    1378.714
## 33    33                     JADE GE    MI     3.5       1449    1276.857
## 34    34      MICHAEL JEFFERY THOMAS    MI     3.5       1399    1375.286
## 35    35            JOSHUA DAVID LEE    MI     3.5       1438    1149.714
## 36    36               SIDDHARTH JHA    MI     3.5       1355    1388.167
## 37    37        AMIYATOSH PWNANANDAM    MI     3.5        980    1384.800
## 38    38                   BRIAN LIU    MI     3.0       1423    1539.167
## 39    39               JOEL R HENDON    MI     3.0       1436    1429.571
## 40    40                FOREST ZHANG    MI     3.0       1348    1390.571
## 41    41         KYLE WILLIAM MURPHY    MI     3.0       1403    1248.500
## 42    42                    JARED GE    MI     3.0       1332    1149.857
## 43    43           ROBERT GLEN VASEY    MI     3.0       1283    1106.571
## 44    44          JUSTIN D SCHILLING    MI     3.0       1199    1327.000
## 45    45                   DEREK YAN    MI     3.0       1242    1152.000
## 46    46    JACOB ALEXANDER LAVALLEY    MI     3.0        377    1357.714
## 47    47                 ERIC WRIGHT    MI     2.5       1362    1392.000
## 48    48                DANIEL KHAIN    MI     2.5       1382    1355.800
## 49    49            MICHAEL J MARTIN    MI     2.5       1291    1285.800
## 50    50                  SHIVAM JHA    MI     2.5       1056    1296.000
## 51    51              TEJAS AYYAGARI    MI     2.5       1011    1356.143
## 52    52                   ETHAN GUO    MI     2.5        935    1494.571
## 53    53               JOSE C YBARRA    MI     2.0       1393    1345.333
## 54    54                 LARRY HODGE    MI     2.0       1270    1206.167
## 55    55                   ALEX KONG    MI     2.0       1186    1406.000
## 56    56                MARISA RICCI    MI     2.0       1153    1414.400
## 57    57                  MICHAEL LU    MI     2.0       1092    1363.000
## 58    58                VIRAJ MOHILE    MI     2.0        917    1391.000
## 59    59           SEAN M MC CORMICK    MI     2.0        853    1319.000
## 60    60                  JULIA SHEN    MI     1.5        967    1330.200
## 61    61               JEZZEL FARKAS    ON     1.5        955    1327.286
## 62    62               ASHWIN BALAJI    MI     1.0       1530    1186.000
## 63    63        THOMAS JOSEPH HOSMER    MI     1.0       1175    1350.200
## 64    64                      BEN LI    MI     1.0       1163    1263.000

Export to .csv to your current working directory

write.csv(df1.Output, file = "tournamentinfo.csv")