Project Description

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605

Import Required Packages

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(knitr)

Loading and Reading the Data

tournament_data <- read.csv("https://raw.githubusercontent.com/Nick-Climaco/DATA-607-NC/main/tournament_info.txt",
    header = TRUE, skip = 3)
tournament_data2 <- read_lines("https://raw.githubusercontent.com/Nick-Climaco/DATA-607-NC/main/tournament_info.txt")

Extracting the Data from the txt file

Extraction and Cleaning

# matches a string of characters for the first and last names or match first, middle and
# last names simplify =TRUE returns a character matrix instead list of character vector
player_names <- tournament_data %>%
    str_extract_all("\\w+\\s\\w+ | \\w+\\s\\w+\\s\\w+", simplify = TRUE) %>%
    str_flatten(collapse = ",") %>%
    str_replace_all(",{2,}", ",") %>%
    str_remove_all("^$") %>%
    str_split(",")

player_names <- player_names[[1]] %>%
    str_trim()  # remove leading/trailing white space

player_state <- tournament_data %>%
    str_extract_all("\\s([A-Z][A-Z])\\s\\|", simplify = TRUE) %>%
    str_flatten(collapse = ",") %>%
    str_replace_all(",{2,}", ",") %>%
    str_remove_all("\\|") %>%
    str_split(",")

player_state <- player_state[[1]] %>%
    str_trim()

total_number_points <- tournament_data %>%
    str_extract_all("\\d\\.\\d", simplify = TRUE) %>%
    str_flatten(collapse = ",") %>%
    str_replace_all(",{2,}", ",") %>%
    str_split(",")

total_number_points <- total_number_points[[1]] %>%
    str_trim()

player_pre_rating <- tournament_data %>%
    # not all the pre rating are the same format
str_extract_all("R:\\s*(\\d\\d\\d\\d)|R:\\s*\\d\\d\\d", simplify = TRUE) %>%
    str_flatten(collapse = ",") %>%
    str_replace_all(",{2,}", ",") %>%
    str_remove_all("R: ") %>%
    str_split(",")

player_pre_rating <- player_pre_rating[[1]] %>%
    str_trim()

Adding the cleaned into a data frame…

# checking if it worked
df_tournament <- data.frame(Number = seq(1:64), Name = player_names, State = player_state,
    Total_Points = as.numeric(total_number_points), Pre_Rating = as.numeric(player_pre_rating))


head(df_tournament)

##   Number                Name State Total_Points Pre_Rating
## 1      1            GARY HUA    ON          6.0       1794
## 2      2     DAKSHESH DARURI    MI          6.0       1553
## 3      3        ADITYA BAJAJ    MI          6.0       1384
## 4      4 PATRICK H SCHILLING    MI          5.5       1716
## 5      5          HANSHI ZUO    MI          5.5       1655
## 6      6         HANSEN SONG    OH          5.0       1686

Create a List of opponents player numbers

opponents_data <- tournament_data2[seq(5, 196, 3)]
opponent_num <- str_extract_all(opponents_data, "(?<=\\|([A-Z])\\s{2,3})\\d{1,2}|(?<=\\|)(([A-Z])\\s{4}|\\s{5})") %>%
    unlist() %>%
    as.numeric()

# KEEP TRACK OF REGEX MEANINGS:

# (?<=\\|([A-Z])\\s{2,3}) :regex using a positive look behind token '(?<=..)', match a
# capital letter A-Z; whitespace 2-3 times

# \\d{1,2} : matches decimal digit 1-2 times

#'|' : alternate_match

# (?<=\\|)(([A-Z])\\s{4}|\\s{5}): positive look behind \\|; alternate_match a single
# capital letter followed by 4 with space or no letter just 5 white spaces

# Split the opponent numbers into groups of 7
opponent_groups <- split(opponent_num, rep(1:(length(opponent_num)%/%7), each = 7))

# Create a matrix to store the total and average pre-ratings
opponent_pre_rating <- matrix(0, ncol = 2, nrow = length(opponent_groups))
colnames(opponent_pre_rating) <- c("Total", "Average")

# Calculate the total and average pre-ratings for each group of opponents
opponent_pre_rating[, 1] <- unlist(lapply(opponent_groups, function(opponents) {
    total_pre_rating <- sum(df_tournament$Pre_Rating[df_tournament$Number %in% opponents],
        na.rm = TRUE)
    return(total_pre_rating)
}))

opponent_pre_rating[, 2] <- unlist(lapply(opponent_groups, function(opponents) {
    average_pre_rating <- mean(df_tournament$Pre_Rating[df_tournament$Number %in% opponents],
        na.rm = TRUE)
    return(average_pre_rating)
}))

df_tournament$Average_Opponents_Rating <- round(opponent_pre_rating[, 2], digits = 0)

kable(df_tournament, row.names = NA)

Number	Name	State	Total_Points	Pre_Rating	Average_Opponents_Rating
1	GARY HUA	ON	6.0	1794	1605
2	DAKSHESH DARURI	MI	6.0	1553	1469
3	ADITYA BAJAJ	MI	6.0	1384	1564
4	PATRICK H SCHILLING	MI	5.5	1716	1574
5	HANSHI ZUO	MI	5.5	1655	1501
6	HANSEN SONG	OH	5.0	1686	1519
7	GARY DEE SWATHELL	MI	5.0	1649	1372
8	EZEKIEL HOUGHTON	MI	5.0	1641	1468
9	STEFANO LEE	ON	5.0	1411	1523
10	ANVIT RAO	MI	5.0	1365	1554
11	CAMERON WILLIAM MC	MI	4.5	1712	1468
12	KENNETH J TACK	MI	4.5	1663	1506
13	TORRANCE HENRY JR	MI	4.5	1666	1498
14	BRADLEY SHAW	MI	4.5	1610	1515
15	ZACHARY JAMES HOUGHTON	MI	4.5	1220	1484
16	MIKE NIKITIN	MI	4.0	1604	1386
17	RONALD GRZEGORCZYK	MI	4.0	1629	1499
18	DAVID SUNDEEN	MI	4.0	1600	1480
19	DIPANKAR ROY	MI	4.0	1564	1426
20	JASON ZHENG	MI	4.0	1595	1411
21	DINH DANG BUI	ON	4.0	1563	1470
22	EUGENE L MCCLURE	MI	4.0	1555	1300
23	ALAN BUI	ON	4.0	1363	1214
24	MICHAEL R ALDRICH	MI	4.0	1229	1357
25	LOREN SCHWIEBERT	MI	3.5	1745	1363
26	MAX ZHU	ON	3.5	1579	1507
27	GAURAV GIDWANI	MI	3.5	1552	1222
28	SOFIA ADINA STANESCU	MI	3.5	1507	1522
29	CHIEDOZIE OKORIE	MI	3.5	1602	1314
30	GEORGE AVERY JONES	ON	3.5	1522	1144
31	RISHI SHETTY	MI	3.5	1494	1260
32	JOSHUA PHILIP MATHEWS	ON	3.5	1441	1379
33	JADE GE	MI	3.5	1449	1277
34	MICHAEL JEFFERY THOMAS	MI	3.5	1399	1375
35	JOSHUA DAVID LEE	MI	3.5	1438	1150
36	SIDDHARTH JHA	MI	3.5	1355	1388
37	AMIYATOSH PWNANANDAM	MI	3.5	980	1385
38	BRIAN LIU	MI	3.0	1423	1539
39	JOEL R HENDON	MI	3.0	1436	1430
40	FOREST ZHANG	MI	3.0	1348	1391
41	KYLE WILLIAM MURPHY	MI	3.0	1403	1248
42	JARED GE	MI	3.0	1332	1150
43	ROBERT GLEN VASEY	MI	3.0	1283	1107
44	JUSTIN D SCHILLING	MI	3.0	1199	1327
45	DEREK YAN	MI	3.0	1242	1152
46	JACOB ALEXANDER LAVALLEY	MI	3.0	377	1358
47	ERIC WRIGHT	MI	2.5	1362	1392
48	DANIEL KHAIN	MI	2.5	1382	1356
49	MICHAEL J MARTIN	MI	2.5	1291	1286
50	SHIVAM JHA	MI	2.5	1056	1296
51	TEJAS AYYAGARI	MI	2.5	1011	1356
52	ETHAN GUO	MI	2.5	935	1495
53	JOSE C YBARRA	MI	2.0	1393	1345
54	LARRY HODGE	MI	2.0	1270	1206
55	ALEX KONG	MI	2.0	1186	1406
56	MARISA RICCI	MI	2.0	1153	1414
57	MICHAEL LU	MI	2.0	1092	1363
58	VIRAJ MOHILE	MI	2.0	917	1391
59	SEAN M MC	MI	2.0	853	1319
60	JULIA SHEN	MI	1.5	967	1330
61	JEZZEL FARKAS	ON	1.5	955	1327
62	ASHWIN BALAJI	MI	1.0	1530	1186
63	THOMAS JOSEPH HOSMER	MI	1.0	1175	1350
64	BEN LI	MI	1.0	1163	1263

Write csv file

write.csv(df_tournament, "chess_tournament.csv", row.names = FALSE)

Project 1 Chess Rating

Nick Climaco

2023-02-15