library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(dplyr)
library(stringr)
In this project, we take a text file
chess <- read_delim("https://raw.githubusercontent.com/evelynbartley/Data-607/main/Chess%20File", delim = "|", col_names = c("state", "name", "number", "pointTotal", "prerating", "avgOpp"), skip = 4, show_col_types = FALSE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
# Convert the file to a data frame
chess <- data.frame(chess)
# Remove the NA rows
chess <- subset(chess, chess$state != chess$state[3])
Here we use an iteration to create a data frame from the odd rows and a separate data frame for the even rows, and then horizontally merge those two data frames to create a data frame with 22 variables and 64 observations.
name_rows <- data.frame()
state_rows <- data.frame()
# Use recursion to create two data frames
for (i in 1:nrow(chess)) {
this_row <- chess[i,]
if (i %% 2 == 1) {
name_rows <- rbind(name_rows, this_row)
}
else {
state_rows <- rbind(state_rows, this_row)
}
}
#Combine the two data frames
chess1 = data.frame(name_rows, state_rows, check.names=T)
Delete the columns with “NA” and other irrelevant columns.
chess2 <- select(chess1, -c("X11","X11.1", "number.1", "pointTotal.1", "prerating.1", "avgOpp.1", "X7.1", "X8.1", "X9.1", "X10.1"))
Rename the columns.
chess3 <-
rename(chess2,
"PlayerID" = "state",
"PlayerName" = "name",
"TotalPoints" = "number",
"Round1" = "pointTotal",
"Round2" = "prerating",
"Round3" = "avgOpp",
"Round4" = "X7",
"Round5" = "X8",
"Round6" = "X9",
"Round7" = "X10",
"State" = "state.1",
"Prerating" = "name.1"
)
Let’s make a separate column to isolate the prerating for each player:
chess4 <- separate_wider_delim(chess3, cols = "Prerating", delim = ":", names = "NewCol", too_many = "debug")
## Warning: Debug mode activated: adding variables `Prerating_ok`, `Prerating_pieces`, and
## `Prerating_remainder`.
chess4
## # A tibble: 64 × 16
## PlayerID PlayerName TotalPoints Round1 Round2 Round3 Round4 Round5 Round6
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 " 1 " " GARY HUA … "6.0 " W 39 W 21 W 18 W 14 W 7 D 12
## 2 " 2 " " DAKSHESH DA… "6.0 " W 63 W 58 L 4 W 17 W 16 W 20
## 3 " 3 " " ADITYA BAJA… "6.0 " L 8 W 61 W 25 W 21 W 11 W 13
## 4 " 4 " " PATRICK H S… "5.5 " W 23 D 28 W 2 W 26 D 5 W 19
## 5 " 5 " " HANSHI ZUO … "5.5 " W 45 W 37 D 12 D 13 D 4 W 14
## 6 " 6 " " HANSEN SONG… "5.0 " W 34 D 29 L 11 W 35 D 10 W 27
## 7 " 7 " " GARY DEE SW… "5.0 " W 57 W 46 W 13 W 11 L 1 W 9
## 8 " 8 " " EZEKIEL HOU… "5.0 " W 3 W 32 L 14 L 9 W 47 W 28
## 9 " 9 " " STEFANO LEE… "5.0 " W 25 L 18 W 59 W 8 W 26 L 7
## 10 " 10 " " ANVIT RAO … "5.0 " D 16 L 19 W 55 W 31 D 6 W 25
## # ℹ 54 more rows
## # ℹ 7 more variables: Round7 <chr>, State <chr>, NewCol <chr>, Prerating <chr>,
## # Prerating_ok <lgl>, Prerating_pieces <int>, Prerating_remainder <chr>
chess5 <- separate_wider_delim(chess4, cols = "Prerating_remainder", delim = "->", names = "NewCol2", too_many = "debug")
## Warning: Debug mode activated: adding variables `Prerating_remainder_ok`,
## `Prerating_remainder_pieces`, and `Prerating_remainder_remainder`.
Here we get rid of the irrelevant columns.
chess6 <- select(chess5, -c("NewCol", "Prerating","Prerating_ok", "Prerating_pieces", "Prerating_remainder", "Prerating_remainder_ok", "Prerating_remainder_pieces", "Prerating_remainder_remainder"))
We rename the column that contains the preratings.
chess7 <- rename(chess6,
"PlayerPreRating" = "NewCol2")
We get rid of all the “:”’s that precede our preratings. And expand the players preratings that end with a “P” and a number.
chess7$PlayerPreRating <- gsub(":","",as.character(chess7$PlayerPreRating))
chess8 <- separate_wider_delim(chess7, cols = "PlayerPreRating", delim = "P", names = "PreRating", too_many = "debug")
## Warning: Debug mode activated: adding variables `PlayerPreRating_ok`,
## `PlayerPreRating_pieces`, and `PlayerPreRating_remainder`.
Clean it up.
chessF <- select(chess8, -c("PlayerPreRating", "PlayerPreRating_ok", "PlayerPreRating_pieces", "PlayerPreRating_remainder"))
Here we isolate the player ID’s for each round by removing the letter that represents the result of the round.
chessF$Round1 <- gsub('[A-Z]', "", chessF$Round1)
chessF$Round2 <- gsub('[A-Z]', "", chessF$Round2)
chessF$Round3 <- gsub('[A-Z]', "", chessF$Round3)
chessF$Round4 <- gsub('[A-Z]', "", chessF$Round4)
chessF$Round5 <- gsub('[A-Z]', "", chessF$Round5)
chessF$Round6 <- gsub('[A-Z]', "", chessF$Round6)
chessF$Round7 <- gsub('[A-Z]', "", chessF$Round7)
Now we want to replace each player id that shows in each round by the corresponding player id’s prerating.
I want to change every row with numbers to a numeric vector.
chessF$PlayerID = as.numeric(sub("[A-Z]", "", chessF$PlayerID))
chessF$Round1 = as.numeric(sub("[A-Z]", "", chessF$Round1))
chessF$Round2 = as.numeric(sub("[A-Z]", "", chessF$Round2))
chessF$Round3 = as.numeric(sub("[A-Z]", "", chessF$Round3))
chessF$Round4 = as.numeric(sub("[A-Z]", "", chessF$Round4))
chessF$Round5 = as.numeric(sub("[A-Z]", "", chessF$Round5))
chessF$Round6 = as.numeric(sub("[A-Z]", "", chessF$Round6))
chessF$Round7 = as.numeric(sub("[A-Z]", "", chessF$Round7))
chessF$PreRating = as.numeric(sub("[A-Z]", "", chessF$PreRating))
This for loop replaces the opponent’s ID with their prerating in Round 1.
for (i in chessF$Round1)
{
chessF$Round1[i] <- chessF$PreRating[chessF$Round1[i]]
}
for (i in chessF$Round2)
{
chessF$Round2[i] <- chessF$PreRating[chessF$Round2[i]]
}
for (i in chessF$Round3)
{
chessF$Round3[i] <- chessF$PreRating[chessF$Round3[i]]
}
for (i in chessF$Round4)
{
chessF$Round4[i] <- chessF$PreRating[chessF$Round4[i]]
}
for (i in chessF$Round5)
{
chessF$Round5[i] <- chessF$PreRating[chessF$Round5[i]]
}
for (i in chessF$Round6)
{
chessF$Round6[i] <- chessF$PreRating[chessF$Round6[i]]
}
for (i in chessF$Round7)
{
chessF$Round7[i] <- chessF$PreRating[chessF$Round7[i]]
}
head(chessF)
## # A tibble: 6 × 12
## PlayerID PlayerName TotalPoints Round1 Round2 Round3 Round4 Round5 Round6
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 " GARY HUA … "6.0 " 1436 1563 1600 1610 1649 1663
## 2 2 " DAKSHESH DAR… "6.0 " 1175 917 1716 1629 1604 1595
## 3 3 " ADITYA BAJAJ… "6.0 " 1641 955 1745 1563 1712 1666
## 4 4 " PATRICK H SC… "5.5 " 1363 1507 1553 1579 1655 1564
## 5 5 " HANSHI ZUO … "5.5 " 1242 980 1663 1666 1716 1610
## 6 6 " HANSEN SONG … "5.0 " 1399 1602 1712 1438 1365 1552
## # ℹ 3 more variables: Round7 <dbl>, State <chr>, PreRating <dbl>
This line of code takes the seven opponents prerating numbers and take the average of each row and store them in a new column. (Since I was unable to replace the IDs with their corresponding prerating, my code just averages their ID numbers.)
chessF <- chessF |>
rowwise() |>
mutate('OppAvgPrerating' = mean(c(Round1, Round2, Round3, Round4, Round5, Round6, Round7), na.rm = TRUE))
chessF <- chessF |>
rowwise() |>
mutate('OppAvgPrerating' = round(OppAvgPrerating, digits = 0))
##Conclusion: I am left with a dataframe with information on Player’s Name, Total Points, State, PreRating, and Average Pre Chess Rating of Opponents for each player.
chessF <- select(chessF, -c("PlayerID", "Round1", "Round2", "Round3", "Round4", "Round5", "Round6", "Round7"))
head(chessF)
## # A tibble: 6 × 5
## # Rowwise:
## PlayerName TotalPoints State PreRating OppAvgPrerating
## <chr> <chr> <chr> <dbl> <dbl>
## 1 " GARY HUA … "6.0 " " … 1794 1605
## 2 " DAKSHESH DARURI … "6.0 " " … 1553 1469
## 3 " ADITYA BAJAJ … "6.0 " " … 1384 1564
## 4 " PATRICK H SCHILLING … "5.5 " " … 1716 1574
## 5 " HANSHI ZUO … "5.5 " " … 1655 1501
## 6 " HANSEN SONG … "5.0 " " … 1686 1519
Here is the .csv file:
write_csv(chessF, 'chessFinal.csv')