Project 1 - Create an optimal data set from an in-optimal text file, full of data.
So right away the read.table function comes to mind, but I would like to see if I can do the initial parsing manually.
library(stringr)
#open text file
text = file('C:/Users/Exped/Desktop/tournamentinfo.txt')
#read lines into object
tournamentFile = readLines(con=text)
#seperate the object into different lines
tournyVector = unlist(tournamentFile, recursive = TRUE, use.names = TRUE)
#next three lines are just removing unneccessary lines from the object
remove = tournyVector[seq(1, length(tournyVector),3)]
tournyVector = tournyVector[! tournyVector %in% remove]
tournyVector = tournyVector[3:length(tournyVector)]
# Heres the object
summary(tournyVector)
## Length Class Mode
## 128 character character
head(tournyVector)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [5] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
Now I have a big list of complementary lines, so I seperate them.
nameRow = (tournyVector[seq(1, length(tournyVector),2)])
detailsRow= (tournyVector[seq(2, length(tournyVector),2)])
This loop averages the opponent Pre chess elo of each contender and appends it to the DF
# For loops should generally be avoided in R, nested for loops especially. I originally did this with lapply function, but I thought this loop would be easier to read.
averageOpponent = c()
playerCount = 1
for(player in oppVector){
totalOpponentElo = 0
count = 0
for(opponent in player){
if(opponent!=0){
totalOpponentElo = totalOpponentElo + sufficientData$preElo[opponent]
count = count + 1
}
else{}
}
averageOpponent[playerCount] = totalOpponentElo/count
playerCount = playerCount+1
}
sufficientData$averageOpponentElo = averageOpponent
sufficientData = setNames(sufficientData,c('Name','Pts','State','PrElo','OpPreAvgELO'))
summary(sufficientData$OpPreAvgELO)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1107 1310 1382 1379 1481 1605
write.csv(sufficientData,file = 'C:/Users/Exped/Desktop/chessAverages')