## Import Libraries

library(stringr)
library(BBmisc)

## Read File after placing file in current Working Directory

chessinfo = read.csv("tournamentinfo.txt",FALSE,sep = "|")


## Data Cleansing for unique items to remove -------
dfdirty = unique(chessinfo)
dfdirty = dfdirty[-1,-11]
colnames(dfdirty) = c("Pair","Player_Name","Total","Round 1","Round 2","Round 3","Round 4","Round 5","Round 6","Round 7")
dfdirty = dfdirty[-1:-2,]

## Cleansing to extract scores, states, and ids

Playernamedirty = unlist(dfdirty$Player_Name)
state = unlist(dfdirty$Pair)
play_states = unlist(str_extract_all(state,"[[:alpha:]]{2}"))
id_scores = str_extract_all(Playernamedirty,"[[:digit:]]{2,}")
uscfid = str_extract_all(id_scores,"[[:digit:]]{8,}")
pre_scores_matrix =matrix(unlist(str_extract_all(id_scores,"[[:digit:]]{3,}")),ncol = 3,byrow= TRUE)

## conversion to numeric to remove NAs and creation of temporary matrix

dfdirty$Pair = as.numeric(as.character(dfdirty$Pair))
## Warning: NAs introduced by coercion
moderately_clean_df = dfdirty[complete.cases(dfdirty$Pair),]
moderately_clean_df$Pre_Scores = pre_scores_matrix[,2]
moderately_clean_df$Post_Scores = pre_scores_matrix[,3]
moderately_clean_df$Player_States = play_states

## extraction of player ids and game results

list_round1_id = str_extract_all(moderately_clean_df$`Round 1`,"[[:digit:]]{2,}")
list_round1_decision = unlist(str_extract_all(moderately_clean_df$`Round 1`,"[[:alpha:]]{1,}"))
list_round2_id = str_extract_all(moderately_clean_df$`Round 2`,"[[:digit:]]{2,}")
list_round2_decision = unlist(str_extract_all(moderately_clean_df$`Round 2`,"[[:alpha:]]{1,}"))
list_round3_id = str_extract_all(moderately_clean_df$`Round 3`,"[[:digit:]]{2,}")
list_round3_decision = unlist(str_extract_all(moderately_clean_df$`Round 3`,"[[:alpha:]]{1,}"))
list_round4_id = str_extract_all(moderately_clean_df$`Round 4`,"[[:digit:]]{2,}")
list_round4_decision = unlist(str_extract_all(moderately_clean_df$`Round 4`,"[[:alpha:]]{1,}"))
list_round5_id = str_extract_all(moderately_clean_df$`Round 5`,"[ [:digit:]]{2,}")
list_round5_id = trimws(list_round5_id)
list_round5_decision = unlist(str_extract_all(moderately_clean_df$`Round 5`,"[[:alpha:]]{1,}"))
list_round6_id = str_extract_all(moderately_clean_df$`Round 6`,"[[:digit:]]{2,}")
list_round6_decision = unlist(str_extract_all(moderately_clean_df$`Round 6`,"[[:alpha:]]{1,}"))
list_round7_id = str_extract_all(moderately_clean_df$`Round 7`,"[ [:digit:]]{2,}")
list_round7_id = trimws(list_round7_id,"l")
list_round7_decision = unlist(str_extract_all(moderately_clean_df$`Round 7`,"[[:alpha:]]{1,}"))

## modification with player ids and results to primary table

moderately_clean_df$Round1_Result = list_round1_decision
moderately_clean_df$Round2_Result = list_round2_decision
moderately_clean_df$Round3_Result = list_round3_decision
moderately_clean_df$Round4_Result = list_round4_decision
moderately_clean_df$Round5_Result = list_round5_decision
moderately_clean_df$Round6_Result = list_round6_decision
moderately_clean_df$Round7_Result = list_round7_decision
moderately_clean_df$Round1_Player_ID = list_round1_id
moderately_clean_df$Round2_Player_ID = list_round2_id
moderately_clean_df$Round3_Player_ID = list_round3_id
moderately_clean_df$Round4_Player_ID = list_round4_id
moderately_clean_df$Round5_Player_ID = list_round5_id
moderately_clean_df$Round6_Player_ID = list_round6_id
moderately_clean_df$Round7_Player_ID = list_round7_id

## Cleansing of newly added columsn
moderately_clean_df$Round1_Player_ID[moderately_clean_df$Round1_Player_ID == "character(0)"] =  0
moderately_clean_df$Round2_Player_ID[moderately_clean_df$Round2_Player_ID == "character(0)"] =  0
moderately_clean_df$Round3_Player_ID[moderately_clean_df$Round3_Player_ID == "character(0)"] =  0
moderately_clean_df$Round4_Player_ID[moderately_clean_df$Round4_Player_ID == "character(0)"] =  0
moderately_clean_df$Round5_Player_ID[moderately_clean_df$Round5_Player_ID == ""] =  0
moderately_clean_df$Round6_Player_ID[moderately_clean_df$Round6_Player_ID == "character(0)"] =  0
moderately_clean_df$Round7_Player_ID[moderately_clean_df$Round7_Player_ID == ""] =  0

## Looping to parse out average pre scores

playeridlist = list()

for(i in 1:length(moderately_clean_df$Pair)){
  playeridlist[i] = convertRowsToList(moderately_clean_df[i,21:27])
}

playeridlistnolists = as.numeric(unlist(playeridlist))

temptable = moderately_clean_df[,c("Pair","Pre_Scores")]
rownames(temptable) = temptable$Pair
temptable = temptable[,-1]

list_pre_average_Scores = list()

for(i in 1:length(playeridlistnolists)){
  if(as.numeric(playeridlistnolists[i]) == 0){
    list_pre_average_Scores[i] = 0
  } 
  else{
    list_pre_average_Scores[i] = temptable[playeridlistnolists[i]]}
}


listoflistsprescores = list()
for(i in 1:64){
  b = 7 * i
  a = ifelse(i == 1, 1, b-6)
  listoflistsprescores[i] = list(as.numeric(unlist(list_pre_average_Scores[a:b])))
  }

prescoresfinal = list()
for(i in 1:64){
  prescoresfinal[i] = sum(listoflistsprescores[[i]])/length(listoflistsprescores[[i]][unlist(listoflistsprescores[[i]])!=0])
}

moderately_clean_df$Average_Pre_Scores = prescoresfinal


## Subset for clean df and write to csv

cleandf = moderately_clean_df[,c("Player_Name","Player_States","Total","Pre_Scores","Average_Pre_Scores")]

cleandf = apply(cleandf,2,as.character)

print(cleandf)
##       Player_Name                         Player_States Total   Pre_Scores
##  [1,] " GARY HUA                        " "ON"          "6.0  " "1794"    
##  [2,] " DAKSHESH DARURI                 " "MI"          "6.0  " "1553"    
##  [3,] " ADITYA BAJAJ                    " "MI"          "6.0  " "1384"    
##  [4,] " PATRICK H SCHILLING             " "MI"          "5.5  " "1716"    
##  [5,] " HANSHI ZUO                      " "MI"          "5.5  " "1655"    
##  [6,] " HANSEN SONG                     " "OH"          "5.0  " "1686"    
##  [7,] " GARY DEE SWATHELL               " "MI"          "5.0  " "1649"    
##  [8,] " EZEKIEL HOUGHTON                " "MI"          "5.0  " "1641"    
##  [9,] " STEFANO LEE                     " "ON"          "5.0  " "1411"    
## [10,] " ANVIT RAO                       " "MI"          "5.0  " "1365"    
## [11,] " CAMERON WILLIAM MC LEMAN        " "MI"          "4.5  " "1712"    
## [12,] " KENNETH J TACK                  " "MI"          "4.5  " "1663"    
## [13,] " TORRANCE HENRY JR               " "MI"          "4.5  " "1666"    
## [14,] " BRADLEY SHAW                    " "MI"          "4.5  " "1610"    
## [15,] " ZACHARY JAMES HOUGHTON          " "MI"          "4.5  " "1220"    
## [16,] " MIKE NIKITIN                    " "MI"          "4.0  " "1604"    
## [17,] " RONALD GRZEGORCZYK              " "MI"          "4.0  " "1629"    
## [18,] " DAVID SUNDEEN                   " "MI"          "4.0  " "1600"    
## [19,] " DIPANKAR ROY                    " "MI"          "4.0  " "1564"    
## [20,] " JASON ZHENG                     " "MI"          "4.0  " "1595"    
## [21,] " DINH DANG BUI                   " "ON"          "4.0  " "1563"    
## [22,] " EUGENE L MCCLURE                " "MI"          "4.0  " "1555"    
## [23,] " ALAN BUI                        " "ON"          "4.0  " "1363"    
## [24,] " MICHAEL R ALDRICH               " "MI"          "4.0  " "1229"    
## [25,] " LOREN SCHWIEBERT                " "MI"          "3.5  " "1745"    
## [26,] " MAX ZHU                         " "ON"          "3.5  " "1579"    
## [27,] " GAURAV GIDWANI                  " "MI"          "3.5  " "1552"    
## [28,] " SOFIA ADINA STANESCU-BELLU      " "MI"          "3.5  " "1507"    
## [29,] " CHIEDOZIE OKORIE                " "MI"          "3.5  " "1602"    
## [30,] " GEORGE AVERY JONES              " "ON"          "3.5  " "1522"    
## [31,] " RISHI SHETTY                    " "MI"          "3.5  " "1494"    
## [32,] " JOSHUA PHILIP MATHEWS           " "ON"          "3.5  " "1441"    
## [33,] " JADE GE                         " "MI"          "3.5  " "1449"    
## [34,] " MICHAEL JEFFERY THOMAS          " "MI"          "3.5  " "1399"    
## [35,] " JOSHUA DAVID LEE                " "MI"          "3.5  " "1438"    
## [36,] " SIDDHARTH JHA                   " "MI"          "3.5  " "1355"    
## [37,] " AMIYATOSH PWNANANDAM            " "MI"          "3.5  " "980"     
## [38,] " BRIAN LIU                       " "MI"          "3.0  " "1423"    
## [39,] " JOEL R HENDON                   " "MI"          "3.0  " "1436"    
## [40,] " FOREST ZHANG                    " "MI"          "3.0  " "1348"    
## [41,] " KYLE WILLIAM MURPHY             " "MI"          "3.0  " "1403"    
## [42,] " JARED GE                        " "MI"          "3.0  " "1332"    
## [43,] " ROBERT GLEN VASEY               " "MI"          "3.0  " "1283"    
## [44,] " JUSTIN D SCHILLING              " "MI"          "3.0  " "1199"    
## [45,] " DEREK YAN                       " "MI"          "3.0  " "1242"    
## [46,] " JACOB ALEXANDER LAVALLEY        " "MI"          "3.0  " "377"     
## [47,] " ERIC WRIGHT                     " "MI"          "2.5  " "1362"    
## [48,] " DANIEL KHAIN                    " "MI"          "2.5  " "1382"    
## [49,] " MICHAEL J MARTIN                " "MI"          "2.5  " "1291"    
## [50,] " SHIVAM JHA                      " "MI"          "2.5  " "1056"    
## [51,] " TEJAS AYYAGARI                  " "MI"          "2.5  " "1011"    
## [52,] " ETHAN GUO                       " "MI"          "2.5  " "935"     
## [53,] " JOSE C YBARRA                   " "MI"          "2.0  " "1393"    
## [54,] " LARRY HODGE                     " "MI"          "2.0  " "1270"    
## [55,] " ALEX KONG                       " "MI"          "2.0  " "1186"    
## [56,] " MARISA RICCI                    " "MI"          "2.0  " "1153"    
## [57,] " MICHAEL LU                      " "MI"          "2.0  " "1092"    
## [58,] " VIRAJ MOHILE                    " "MI"          "2.0  " "917"     
## [59,] " SEAN M MC CORMICK               " "MI"          "2.0  " "853"     
## [60,] " JULIA SHEN                      " "MI"          "1.5  " "967"     
## [61,] " JEZZEL FARKAS                   " "ON"          "1.5  " "955"     
## [62,] " ASHWIN BALAJI                   " "MI"          "1.0  " "1530"    
## [63,] " THOMAS JOSEPH HOSMER            " "MI"          "1.0  " "1175"    
## [64,] " BEN LI                          " "MI"          "1.0  " "1163"    
##       Average_Pre_Scores
##  [1,] "1605.28571428571"
##  [2,] "1428.16666666667"
##  [3,] "1550.66666666667"
##  [4,] "1577"            
##  [5,] "1500.85714285714"
##  [6,] "1518.71428571429"
##  [7,] "1365.66666666667"
##  [8,] "1496.8"          
##  [9,] "1474.4"          
## [10,] "1554.14285714286"
## [11,] "1387.6"          
## [12,] "1397"            
## [13,] "1449.25"         
## [14,] "1378.75"         
## [15,] "1483.85714285714"
## [16,] "1385.8"          
## [17,] "1489.5"          
## [18,] "1431"            
## [19,] "1378"            
## [20,] "1387.16666666667"
## [21,] "1423"            
## [22,] "1300.33333333333"
## [23,] "1130.16666666667"
## [24,] "1357"            
## [25,] "1349.6"          
## [26,] "1472"            
## [27,] "1128.8"          
## [28,] "1459.6"          
## [29,] "1239"            
## [30,] "1144.14285714286"
## [31,] "1259.85714285714"
## [32,] "1335"            
## [33,] "1276.85714285714"
## [34,] "1323.5"          
## [35,] "1060.33333333333"
## [36,] "1388.16666666667"
## [37,] "1317.25"         
## [38,] "1539.16666666667"
## [39,] "1368.83333333333"
## [40,] "1390.57142857143"
## [41,] "1248.5"          
## [42,] "1149.85714285714"
## [43,] "1106.57142857143"
## [44,] "1327"            
## [45,] "1068.16666666667"
## [46,] "1309.16666666667"
## [47,] "1392"            
## [48,] "1355.8"          
## [49,] "1285.8"          
## [50,] "1296"            
## [51,] "1356.14285714286"
## [52,] "1494.57142857143"
## [53,] "1345.33333333333"
## [54,] "1206.16666666667"
## [55,] "1406"            
## [56,] "1414.4"          
## [57,] "1305.8"          
## [58,] "1358.6"          
## [59,] "1300.6"          
## [60,] "1330.2"          
## [61,] "1317.83333333333"
## [62,] "1186"            
## [63,] "1299.5"          
## [64,] "1263"
write.csv(cleandf, file = "CleanChessData.csv",row.names = FALSE)