library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Read the text file into a character vector
text_lines <- read_lines("https://raw.githubusercontent.com/Doumgit/Project_1_D607/main/tournamentinfo.txt")
text_lines <- tail(text_lines, -4)
# Initialize an empty list to store pairs of lines
my_list <- list()
# Create an empty data frame. Data will be added to this data frame later
df <- data.frame(player_no = character(0), name = character(0), state = character(0), score = character(0), current_rating = character(0), next_round_rating = character(0), pair_ids = list())
pairs_df <- data.frame(player_no = character(0), pair_ids = list())
# Loop through the text_lines vector three lines at a time
for (i in seq(1, length(text_lines), by = 3)){
# Save the first two lines as input strings. Ignore the third line with the dashes
input_string1 <- text_lines[i]
input_string2 <- text_lines[i + 1]
# Define regular expressions to extract the information
player_no <- str_extract(input_string1, r"((?<=\s{3})(\d+)(?=\s\|))")
player_name <- str_extract(input_string1, r"((?<=\|\s)(\b[A-Z,\-,\s]+\b)(?=\s+\|\d+\.\d+\s+\|))")
state <- str_extract(input_string2, r"((\b[A-Z]{2}\b))")
rating <- str_extract(input_string1, r"((\b\d*\.\d*\b))")
current_rating <- str_extract(input_string2, r"((?<=R:\s{0,5})\d{3,4})")
next_round_rating <- str_extract(input_string2, r"((?<=->\s{0,5})\d+)")
pair_ids <- unlist(str_extract_all(input_string1, "(?<=[A-Z]\\s{1,4})(\\d*)(?=\\|)"))
# Add rows one at a time
new_row <- c(id = player_no, name = player_name, state = state, total_pts = rating, pre_rtg = current_rating, post_rtg = next_round_rating, pair_ids)
df <- rbind(df, new_row)
id <- player_no
pair_ids <- pair_ids
temp_pairs_df <- data.frame(id, pair_ids)
pairs_df <- rbind(pairs_df, temp_pairs_df)
}
names(df) <- c("id", "name", "state", "total_pts", "pre_rtg", "post_rtg", "pair_1", "pair_2", "pair_3", "pair_4", "pair_5", "pair_6", "pair_7")
For Loop to determine Average rating
for(i in seq(1:length(df$id))){
for(j in seq(1:length(df$id))){
if(df$pair_1[i] == df$id[j]){
df$pair_1[i] = as.numeric(df$pre_rtg[j])
}
if(df$pair_2[i] == df$id[j]){
df$pair_2[i] = as.numeric(df$pre_rtg[j])
}
if(df$pair_3[i] == df$id[j]){
df$pair_3[i] = as.numeric(df$pre_rtg[j])
}
if(df$pair_4[i] == df$id[j]){
df$pair_4[i] =as.numeric(df$pre_rtg[j])
}
if(df$pair_5[i] == df$id[j]){
df$pair_5[i] = as.numeric(df$pre_rtg[j])
}
if(df$pair_6[i] == df$id[j]){
df$pair_6[i] = as.numeric(df$pre_rtg[j])
}
if(df$pair_7[i] == df$id[j]){
df$pair_7[i] = as.numeric(df$pre_rtg[j])
}
}
}
glimpse(df)
## Rows: 64
## Columns: 13
## $ id <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12…
## $ name <chr> "GARY HUA", "DAKSHESH DARURI", "ADITYA BAJAJ", "PATRICK H SC…
## $ state <chr> "ON", "MI", "MI", "MI", "MI", "OH", "MI", "MI", "ON", "MI", …
## $ total_pts <chr> "6.0", "6.0", "6.0", "5.5", "5.5", "5.0", "5.0", "5.0", "5.0…
## $ pre_rtg <chr> "1794", "1553", "1384", "1716", "1655", "1686", "1649", "164…
## $ post_rtg <chr> "1817", "1663", "1640", "1744", "1690", "1687", "1673", "165…
## $ pair_1 <chr> "1436", "1175", "1641", "1363", "1242", "1399", "1092", "138…
## $ pair_2 <chr> "1563", "917", "955", "1507", "980", "1602", "377", "1441", …
## $ pair_3 <chr> "1600", "1716", "1745", "1553", "1663", "1712", "1666", "161…
## $ pair_4 <chr> "1610", "1629", "1563", "1579", "1666", "1438", "1712", "141…
## $ pair_5 <chr> "1649", "1604", "1712", "1655", "1716", "1365", "1794", "136…
## $ pair_6 <chr> "1663", "1595", "1666", "1564", "1610", "1552", "1411", "150…
## $ pair_7 <chr> "1716", "1649", "1663", "1794", "1629", "1563", "1553", "156…
New data: Calculating the mean This code determines the average pre rating of each player
v = c()
r = c()
for(k in seq(1:length(df$id))){
u = as.integer(c(df$pair_1[k],df$pair_2[k],df$pair_3[k],df$pair_4[k],df$pair_5[k],df$pair_6[k],df$pair_7[k]))
r[k] = append(v,mean(u, na.rm = TRUE))
}
df <- df %>%
mutate(Average = as.integer(r))
New_data <- df %>%
select(name, state, total_pts, pre_rtg, Average)
New_data
## name state total_pts pre_rtg Average
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1563
## 4 PATRICK H SCHILLING MI 5.5 1716 1573
## 5 HANSHI ZUO MI 5.5 1655 1500
## 6 HANSEN SONG OH 5.0 1686 1518
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1467
## 12 KENNETH J TACK MI 4.5 1663 1506
## 13 TORRANCE HENRY JR MI 4.5 1666 1497
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1483
## 16 MIKE NIKITIN MI 4.0 1604 1385
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1498
## 18 DAVID SUNDEEN MI 4.0 1600 1480
## 19 DIPANKAR ROY MI 4.0 1564 1426
## 20 JASON ZHENG MI 4.0 1595 1410
## 21 DINH DANG BUI ON 4.0 1563 1470
## 22 EUGENE L MCCLURE MI 4.0 1555 1300
## 23 ALAN BUI ON 4.0 1363 1213
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363
## 26 MAX ZHU ON 3.5 1579 1506
## 27 GAURAV GIDWANI MI 3.5 1552 1221
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1313
## 30 GEORGE AVERY JONES ON 3.5 1522 1144
## 31 RISHI SHETTY MI 3.5 1494 1259
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1378
## 33 JADE GE MI 3.5 1449 1276
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
## 35 JOSHUA DAVID LEE MI 3.5 1438 1149
## 36 SIDDHARTH JHA MI 3.5 1355 1388
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1384
## 38 BRIAN LIU MI 3.0 1423 1539
## 39 JOEL R HENDON MI 3.0 1436 1429
## 40 FOREST ZHANG MI 3.0 1348 1390
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248
## 42 JARED GE MI 3.0 1332 1149
## 43 ROBERT GLEN VASEY MI 3.0 1283 1106
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327
## 45 DEREK YAN MI 3.0 1242 1152
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1357
## 47 ERIC WRIGHT MI 2.5 1362 1392
## 48 DANIEL KHAIN MI 2.5 1382 1355
## 49 MICHAEL J MARTIN MI 2.5 1291 1285
## 50 SHIVAM JHA MI 2.5 1056 1296
## 51 TEJAS AYYAGARI MI 2.5 1011 1356
## 52 ETHAN GUO MI 2.5 935 1494
## 53 JOSE C YBARRA MI 2.0 1393 1345
## 54 LARRY HODGE MI 2.0 1270 1206
## 55 ALEX KONG MI 2.0 1186 1406
## 56 MARISA RICCI MI 2.0 1153 1414
## 57 MICHAEL LU MI 2.0 1092 1363
## 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 SEAN M MC CORMICK MI 2.0 853 1319
## 60 JULIA SHEN MI 1.5 967 1330
## 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 BEN LI MI 1.0 1163 1263