library(stringr)
tst<-data.frame(read.csv(file = 'C:/temp/tmt2.csv', header = TRUE, sep= ",", stringsAsFactors = FALSE))
Copy original dataframe to new list. Use gsub to partially format Ratings data. Remove front and end digits, spaces and miscellanous characters
tst2<-tst
tst2$Rating<-str_trim(gsub(",>\\d*", "", gsub("^ \\d* ", "", gsub("\\d*/ R: ", "", tst$Rating))))
Copy dataframe to another dataframe. Clean up the remaing Rating unwanted strings that included characters, spaces and numbers.
tst3<-tst2
tst3$Rating<- str_trim(str_sub(tst2$Rating, start = 1 ,end =(str_locate(tst2$Rating, pattern = "(.?P. ?.)|( .)")[,1])))
Insert data from tst2 to tst3 where tst3 ratings is NA
for(i in 1:nrow(tst3))
{
if (is.na(tst3$Rating[i]))
{
tst3$Rating[i] <-tst2$Rating[i]
}
}
Extracted numbers from each round by extracting digits only. Binded data to a dataframe and removed any data with NA. Took average of all rounds and added to a new column in tst2 dataframe.
for(k in 1:nrow(tst3))
{
k.w <-as.integer(str_extract(tst3[k,6:12], "\\d{1,2}"))
df<- data.frame(x=numeric())
for (m in k.w)
{
df<- rbind(df, data.frame(sum(as.integer(tst3[m,4], na.rm = TRUE))))
sf<-sum(df, na.rm = TRUE)
mn <- round(sf/na.omit(nrow(df)))
}
tst3$AvgPreChessRating[k]<-mn
}
Updated column names with columns names listed in Project notes
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
tst4 =rename(tst3,
"Player's Name" = "Player.Name",
"Player's State" = "State",
"Total Number of Points" = "RoundTotal",
"Player's Pre-Rating" = "Rating",
"Average Pre Chess Ratings of Opponents" = "AvgPreChessRating")
Display a subset of data with only columns listed in Project notes.
sdata<-subset(tst4, select = c(str_trim("Player's Name"), str_trim("Player's State"),str_trim("Total Number of Points"),
str_trim("Player's Pre-Rating"), "Average Pre Chess Ratings of Opponents"))
sdata
write.csv(sdata, file="sdata.csv")