## Import Libraries
library(stringr)
library(BBmisc)
## Read File after placing file in current Working Directory
chessinfo = read.csv("tournamentinfo.txt",FALSE,sep = "|")
## Data Cleansing for unique items to remove -------
dfdirty = unique(chessinfo)
dfdirty = dfdirty[-1,-11]
colnames(dfdirty) = c("Pair","Player_Name","Total","Round 1","Round 2","Round 3","Round 4","Round 5","Round 6","Round 7")
dfdirty = dfdirty[-1:-2,]
## Cleansing to extract scores, states, and ids
Playernamedirty = unlist(dfdirty$Player_Name)
state = unlist(dfdirty$Pair)
play_states = unlist(str_extract_all(state,"[[:alpha:]]{2}"))
id_scores = str_extract_all(Playernamedirty,"[[:digit:]]{2,}")
uscfid = str_extract_all(id_scores,"[[:digit:]]{8,}")
pre_scores_matrix =matrix(unlist(str_extract_all(id_scores,"[[:digit:]]{3,}")),ncol = 3,byrow= TRUE)
## conversion to numeric to remove NAs and creation of temporary matrix
dfdirty$Pair = as.numeric(as.character(dfdirty$Pair))
## Warning: NAs introduced by coercion
moderately_clean_df = dfdirty[complete.cases(dfdirty$Pair),]
moderately_clean_df$Pre_Scores = pre_scores_matrix[,2]
moderately_clean_df$Post_Scores = pre_scores_matrix[,3]
moderately_clean_df$Player_States = play_states
## extraction of player ids and game results
list_round1_id = str_extract_all(moderately_clean_df$`Round 1`,"[[:digit:]]{2,}")
list_round1_decision = unlist(str_extract_all(moderately_clean_df$`Round 1`,"[[:alpha:]]{1,}"))
list_round2_id = str_extract_all(moderately_clean_df$`Round 2`,"[[:digit:]]{2,}")
list_round2_decision = unlist(str_extract_all(moderately_clean_df$`Round 2`,"[[:alpha:]]{1,}"))
list_round3_id = str_extract_all(moderately_clean_df$`Round 3`,"[[:digit:]]{2,}")
list_round3_decision = unlist(str_extract_all(moderately_clean_df$`Round 3`,"[[:alpha:]]{1,}"))
list_round4_id = str_extract_all(moderately_clean_df$`Round 4`,"[[:digit:]]{2,}")
list_round4_decision = unlist(str_extract_all(moderately_clean_df$`Round 4`,"[[:alpha:]]{1,}"))
list_round5_id = str_extract_all(moderately_clean_df$`Round 5`,"[ [:digit:]]{2,}")
list_round5_id = trimws(list_round5_id)
list_round5_decision = unlist(str_extract_all(moderately_clean_df$`Round 5`,"[[:alpha:]]{1,}"))
list_round6_id = str_extract_all(moderately_clean_df$`Round 6`,"[[:digit:]]{2,}")
list_round6_decision = unlist(str_extract_all(moderately_clean_df$`Round 6`,"[[:alpha:]]{1,}"))
list_round7_id = str_extract_all(moderately_clean_df$`Round 7`,"[ [:digit:]]{2,}")
list_round7_id = trimws(list_round7_id,"l")
list_round7_decision = unlist(str_extract_all(moderately_clean_df$`Round 7`,"[[:alpha:]]{1,}"))
## modification with player ids and results to primary table
moderately_clean_df$Round1_Result = list_round1_decision
moderately_clean_df$Round2_Result = list_round2_decision
moderately_clean_df$Round3_Result = list_round3_decision
moderately_clean_df$Round4_Result = list_round4_decision
moderately_clean_df$Round5_Result = list_round5_decision
moderately_clean_df$Round6_Result = list_round6_decision
moderately_clean_df$Round7_Result = list_round7_decision
moderately_clean_df$Round1_Player_ID = list_round1_id
moderately_clean_df$Round2_Player_ID = list_round2_id
moderately_clean_df$Round3_Player_ID = list_round3_id
moderately_clean_df$Round4_Player_ID = list_round4_id
moderately_clean_df$Round5_Player_ID = list_round5_id
moderately_clean_df$Round6_Player_ID = list_round6_id
moderately_clean_df$Round7_Player_ID = list_round7_id
## Cleansing of newly added columsn
moderately_clean_df$Round1_Player_ID[moderately_clean_df$Round1_Player_ID == "character(0)"] = 0
moderately_clean_df$Round2_Player_ID[moderately_clean_df$Round2_Player_ID == "character(0)"] = 0
moderately_clean_df$Round3_Player_ID[moderately_clean_df$Round3_Player_ID == "character(0)"] = 0
moderately_clean_df$Round4_Player_ID[moderately_clean_df$Round4_Player_ID == "character(0)"] = 0
moderately_clean_df$Round5_Player_ID[moderately_clean_df$Round5_Player_ID == ""] = 0
moderately_clean_df$Round6_Player_ID[moderately_clean_df$Round6_Player_ID == "character(0)"] = 0
moderately_clean_df$Round7_Player_ID[moderately_clean_df$Round7_Player_ID == ""] = 0
## Looping to parse out average pre scores
playeridlist = list()
for(i in 1:length(moderately_clean_df$Pair)){
playeridlist[i] = convertRowsToList(moderately_clean_df[i,21:27])
}
playeridlistnolists = as.numeric(unlist(playeridlist))
temptable = moderately_clean_df[,c("Pair","Pre_Scores")]
rownames(temptable) = temptable$Pair
temptable = temptable[,-1]
list_pre_average_Scores = list()
for(i in 1:length(playeridlistnolists)){
if(as.numeric(playeridlistnolists[i]) == 0){
list_pre_average_Scores[i] = 0
}
else{
list_pre_average_Scores[i] = temptable[playeridlistnolists[i]]}
}
listoflistsprescores = list()
for(i in 1:64){
b = 7 * i
a = ifelse(i == 1, 1, b-6)
listoflistsprescores[i] = list(as.numeric(unlist(list_pre_average_Scores[a:b])))
}
prescoresfinal = list()
for(i in 1:64){
prescoresfinal[i] = sum(listoflistsprescores[[i]])/length(listoflistsprescores[[i]][unlist(listoflistsprescores[[i]])!=0])
}
moderately_clean_df$Average_Pre_Scores = prescoresfinal
## Subset for clean df and write to csv
cleandf = moderately_clean_df[,c("Player_Name","Player_States","Total","Pre_Scores","Average_Pre_Scores")]
cleandf = apply(cleandf,2,as.character)
print(cleandf)
## Player_Name Player_States Total Pre_Scores
## [1,] " GARY HUA " "ON" "6.0 " "1794"
## [2,] " DAKSHESH DARURI " "MI" "6.0 " "1553"
## [3,] " ADITYA BAJAJ " "MI" "6.0 " "1384"
## [4,] " PATRICK H SCHILLING " "MI" "5.5 " "1716"
## [5,] " HANSHI ZUO " "MI" "5.5 " "1655"
## [6,] " HANSEN SONG " "OH" "5.0 " "1686"
## [7,] " GARY DEE SWATHELL " "MI" "5.0 " "1649"
## [8,] " EZEKIEL HOUGHTON " "MI" "5.0 " "1641"
## [9,] " STEFANO LEE " "ON" "5.0 " "1411"
## [10,] " ANVIT RAO " "MI" "5.0 " "1365"
## [11,] " CAMERON WILLIAM MC LEMAN " "MI" "4.5 " "1712"
## [12,] " KENNETH J TACK " "MI" "4.5 " "1663"
## [13,] " TORRANCE HENRY JR " "MI" "4.5 " "1666"
## [14,] " BRADLEY SHAW " "MI" "4.5 " "1610"
## [15,] " ZACHARY JAMES HOUGHTON " "MI" "4.5 " "1220"
## [16,] " MIKE NIKITIN " "MI" "4.0 " "1604"
## [17,] " RONALD GRZEGORCZYK " "MI" "4.0 " "1629"
## [18,] " DAVID SUNDEEN " "MI" "4.0 " "1600"
## [19,] " DIPANKAR ROY " "MI" "4.0 " "1564"
## [20,] " JASON ZHENG " "MI" "4.0 " "1595"
## [21,] " DINH DANG BUI " "ON" "4.0 " "1563"
## [22,] " EUGENE L MCCLURE " "MI" "4.0 " "1555"
## [23,] " ALAN BUI " "ON" "4.0 " "1363"
## [24,] " MICHAEL R ALDRICH " "MI" "4.0 " "1229"
## [25,] " LOREN SCHWIEBERT " "MI" "3.5 " "1745"
## [26,] " MAX ZHU " "ON" "3.5 " "1579"
## [27,] " GAURAV GIDWANI " "MI" "3.5 " "1552"
## [28,] " SOFIA ADINA STANESCU-BELLU " "MI" "3.5 " "1507"
## [29,] " CHIEDOZIE OKORIE " "MI" "3.5 " "1602"
## [30,] " GEORGE AVERY JONES " "ON" "3.5 " "1522"
## [31,] " RISHI SHETTY " "MI" "3.5 " "1494"
## [32,] " JOSHUA PHILIP MATHEWS " "ON" "3.5 " "1441"
## [33,] " JADE GE " "MI" "3.5 " "1449"
## [34,] " MICHAEL JEFFERY THOMAS " "MI" "3.5 " "1399"
## [35,] " JOSHUA DAVID LEE " "MI" "3.5 " "1438"
## [36,] " SIDDHARTH JHA " "MI" "3.5 " "1355"
## [37,] " AMIYATOSH PWNANANDAM " "MI" "3.5 " "980"
## [38,] " BRIAN LIU " "MI" "3.0 " "1423"
## [39,] " JOEL R HENDON " "MI" "3.0 " "1436"
## [40,] " FOREST ZHANG " "MI" "3.0 " "1348"
## [41,] " KYLE WILLIAM MURPHY " "MI" "3.0 " "1403"
## [42,] " JARED GE " "MI" "3.0 " "1332"
## [43,] " ROBERT GLEN VASEY " "MI" "3.0 " "1283"
## [44,] " JUSTIN D SCHILLING " "MI" "3.0 " "1199"
## [45,] " DEREK YAN " "MI" "3.0 " "1242"
## [46,] " JACOB ALEXANDER LAVALLEY " "MI" "3.0 " "377"
## [47,] " ERIC WRIGHT " "MI" "2.5 " "1362"
## [48,] " DANIEL KHAIN " "MI" "2.5 " "1382"
## [49,] " MICHAEL J MARTIN " "MI" "2.5 " "1291"
## [50,] " SHIVAM JHA " "MI" "2.5 " "1056"
## [51,] " TEJAS AYYAGARI " "MI" "2.5 " "1011"
## [52,] " ETHAN GUO " "MI" "2.5 " "935"
## [53,] " JOSE C YBARRA " "MI" "2.0 " "1393"
## [54,] " LARRY HODGE " "MI" "2.0 " "1270"
## [55,] " ALEX KONG " "MI" "2.0 " "1186"
## [56,] " MARISA RICCI " "MI" "2.0 " "1153"
## [57,] " MICHAEL LU " "MI" "2.0 " "1092"
## [58,] " VIRAJ MOHILE " "MI" "2.0 " "917"
## [59,] " SEAN M MC CORMICK " "MI" "2.0 " "853"
## [60,] " JULIA SHEN " "MI" "1.5 " "967"
## [61,] " JEZZEL FARKAS " "ON" "1.5 " "955"
## [62,] " ASHWIN BALAJI " "MI" "1.0 " "1530"
## [63,] " THOMAS JOSEPH HOSMER " "MI" "1.0 " "1175"
## [64,] " BEN LI " "MI" "1.0 " "1163"
## Average_Pre_Scores
## [1,] "1605.28571428571"
## [2,] "1428.16666666667"
## [3,] "1550.66666666667"
## [4,] "1577"
## [5,] "1500.85714285714"
## [6,] "1518.71428571429"
## [7,] "1365.66666666667"
## [8,] "1496.8"
## [9,] "1474.4"
## [10,] "1554.14285714286"
## [11,] "1387.6"
## [12,] "1397"
## [13,] "1449.25"
## [14,] "1378.75"
## [15,] "1483.85714285714"
## [16,] "1385.8"
## [17,] "1489.5"
## [18,] "1431"
## [19,] "1378"
## [20,] "1387.16666666667"
## [21,] "1423"
## [22,] "1300.33333333333"
## [23,] "1130.16666666667"
## [24,] "1357"
## [25,] "1349.6"
## [26,] "1472"
## [27,] "1128.8"
## [28,] "1459.6"
## [29,] "1239"
## [30,] "1144.14285714286"
## [31,] "1259.85714285714"
## [32,] "1335"
## [33,] "1276.85714285714"
## [34,] "1323.5"
## [35,] "1060.33333333333"
## [36,] "1388.16666666667"
## [37,] "1317.25"
## [38,] "1539.16666666667"
## [39,] "1368.83333333333"
## [40,] "1390.57142857143"
## [41,] "1248.5"
## [42,] "1149.85714285714"
## [43,] "1106.57142857143"
## [44,] "1327"
## [45,] "1068.16666666667"
## [46,] "1309.16666666667"
## [47,] "1392"
## [48,] "1355.8"
## [49,] "1285.8"
## [50,] "1296"
## [51,] "1356.14285714286"
## [52,] "1494.57142857143"
## [53,] "1345.33333333333"
## [54,] "1206.16666666667"
## [55,] "1406"
## [56,] "1414.4"
## [57,] "1305.8"
## [58,] "1358.6"
## [59,] "1300.6"
## [60,] "1330.2"
## [61,] "1317.83333333333"
## [62,] "1186"
## [63,] "1299.5"
## [64,] "1263"
write.csv(cleandf, file = "CleanChessData.csv",row.names = FALSE)