library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(tidyr)
# read the text file
chess_ratings <- read_delim('https://raw.githubusercontent.com/datanerddhanya/DATA607/main/chess_ratings.txt',delim ="|", col_names = c ("state", "name", "number", "pointTotal", "prerating", "avgOpp"), trim_ws= TRUE, skip = 4, show_col_types = FALSE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
#parse the data into a dataframe
chess_rating <- data.frame(chess_ratings)
#remove the lines having ----
chess_rating <- subset(chess_rating, chess_rating$state != chess_rating$state[3])
# splitting the data frame into two sets to get the name records and the state records
chess_rating_name_rows <- chess_rating[grep("^[0-9]",chess_rating$state),]
chess_rating_name_rows <- chess_rating_name_rows |>
rename(
number = state,
player_name = name,
points_total = number,
round_1 = pointTotal,
round_2 = prerating,
round_3 = avgOpp,
round_4 = X7,
round_5 = X8,
round_6 = X9,
round_7 = X10)
head(chess_rating_name_rows)
## number player_name points_total round_1 round_2 round_3 round_4
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 4 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 7 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 10 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 13 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 16 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## round_5 round_6 round_7 X11
## 1 W 7 D 12 D 4 NA
## 4 W 16 W 20 W 7 NA
## 7 W 11 W 13 W 12 NA
## 10 D 5 W 19 D 1 NA
## 13 D 4 W 14 W 17 NA
## 16 D 10 W 27 W 21 NA
chess_rating_state_rows <- chess_rating[grep("^[A-Z]",chess_rating$state),]
chess_rating_state_rows <- chess_rating_state_rows |>
rename(
ID = name)
head(chess_rating_state_rows)
## state ID number pointTotal prerating avgOpp X7 X8
## 2 ON 15445895 / R: 1794 ->1817 N:2 W B W B W
## 5 MI 14598900 / R: 1553 ->1663 N:2 B W B W B
## 8 MI 14959604 / R: 1384 ->1640 N:2 W B W B W
## 11 MI 12616049 / R: 1716 ->1744 N:2 W B W B W
## 14 MI 14601533 / R: 1655 ->1690 N:2 B W B W B
## 17 OH 15055204 / R: 1686 ->1687 N:3 W B W B B
## X9 X10 X11
## 2 B W NA
## 5 W B NA
## 8 B W NA
## 11 B B NA
## 14 W B NA
## 17 W B NA
# combine two data frames using cbind
chess_rating_final_rows <- cbind(chess_rating_name_rows, chess_rating_state_rows)
#select the required columns
chess_rating_final_rows <- chess_rating_final_rows[,c("number","player_name","points_total","round_1","round_2","round_3" ,"round_4","round_5","round_6","round_7","state","ID")]
# generate new column for rating
chess_rating_final_rows <- chess_rating_final_rows |>
separate_wider_delim(ID, delim = "/ R:", names = c("UFS_ID", "rating"))
# generate new column for pre and post rating
chess_rating_final_rows <- chess_rating_final_rows |>
separate_wider_delim(rating, delim = "->", names = c("pre_ratingpr", "post_rating"))
# generate new column for pre rating without provisioning
chess_rating_final_rows <- chess_rating_final_rows |>
separate_wider_delim(pre_ratingpr, delim = "P", names = c("pre_rating", "provisioning"),too_few = "align_start")
# picking a substring in each round variable and then converting to numeric.
chess_rating_final_rows$round_1 = as.numeric(sub("[A-Z]","", chess_rating_final_rows$round_1))
chess_rating_final_rows$round_2 = as.numeric(sub("[A-Z]","", chess_rating_final_rows$round_2))
chess_rating_final_rows$round_3 = as.numeric(sub("[A-Z]","", chess_rating_final_rows$round_3))
chess_rating_final_rows$round_4 = as.numeric(sub("[A-Z]","", chess_rating_final_rows$round_4))
chess_rating_final_rows$round_5 = as.numeric(sub("[A-Z]","", chess_rating_final_rows$round_5))
chess_rating_final_rows$round_6 = as.numeric(sub("[A-Z]","", chess_rating_final_rows$round_6))
chess_rating_final_rows$round_7 = as.numeric(sub("[A-Z]","", chess_rating_final_rows$round_7))
# converting teh number column and pre_rating column to numeric
chess_rating_final_rows$number = as.numeric(chess_rating_final_rows$number)
chess_rating_final_rows$pre_rating = as.numeric(chess_rating_final_rows$pre_rating)
head(chess_rating_final_rows)
## # A tibble: 6 × 15
## number player_name points_total round_1 round_2 round_3 round_4 round_5
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 GARY HUA 6.0 39 21 18 14 7
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17 16
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21 11
## 4 4 PATRICK H SCHILLI… 5.5 23 28 2 26 5
## 5 5 HANSHI ZUO 5.5 45 37 12 13 4
## 6 6 HANSEN SONG 5.0 34 29 11 35 10
## # ℹ 7 more variables: round_6 <dbl>, round_7 <dbl>, state <chr>, UFS_ID <chr>,
## # pre_rating <dbl>, provisioning <chr>, post_rating <chr>
# Use recursion to populate opponent pre rating
for (i in chess_rating_final_rows$round_1)
{
chess_rating_final_rows$round_1[i] <- chess_rating_final_rows$pre_rating[chess_rating_final_rows$round_1[i]]
}
for (i in chess_rating_final_rows$round_2)
{
chess_rating_final_rows$round_2[i] <- chess_rating_final_rows$pre_rating[chess_rating_final_rows$round_2[i]]
}
for (i in chess_rating_final_rows$round_3)
{
chess_rating_final_rows$round_3[i] <- chess_rating_final_rows$pre_rating[chess_rating_final_rows$round_3[i]]
}
for (i in chess_rating_final_rows$round_4)
{
chess_rating_final_rows$round_4[i] <- chess_rating_final_rows$pre_rating[chess_rating_final_rows$round_4[i]]
}
for (i in chess_rating_final_rows$round_5)
{
chess_rating_final_rows$round_5[i] <- chess_rating_final_rows$pre_rating[chess_rating_final_rows$round_5[i]]
}
for (i in chess_rating_final_rows$round_6)
{
chess_rating_final_rows$round_6[i] <- chess_rating_final_rows$pre_rating[chess_rating_final_rows$round_6[i]]
}
for (i in chess_rating_final_rows$round_7)
{
chess_rating_final_rows$round_7[i] <- chess_rating_final_rows$pre_rating[chess_rating_final_rows$round_7[i]]
}
head(chess_rating_final_rows)
## # A tibble: 6 × 15
## number player_name points_total round_1 round_2 round_3 round_4 round_5
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 GARY HUA 6.0 1436 1563 1600 1610 1649
## 2 2 DAKSHESH DARURI 6.0 1175 917 1716 1629 1604
## 3 3 ADITYA BAJAJ 6.0 1641 955 1745 1563 1712
## 4 4 PATRICK H SCHILLI… 5.5 1363 1507 1553 1579 1655
## 5 5 HANSHI ZUO 5.5 1242 980 1663 1666 1716
## 6 6 HANSEN SONG 5.0 1399 1602 1712 1438 1365
## # ℹ 7 more variables: round_6 <dbl>, round_7 <dbl>, state <chr>, UFS_ID <chr>,
## # pre_rating <dbl>, provisioning <chr>, post_rating <chr>
# find the row wise mean
chess_rating_final_rows <-
chess_rating_final_rows |>
rowwise() |>
mutate (row_mean = round(mean(c(round_1,round_2,round_3,round_4,round_5,round_6,round_7),na.rm=T),0))
head(chess_rating_final_rows)
## # A tibble: 6 × 16
## # Rowwise:
## number player_name points_total round_1 round_2 round_3 round_4 round_5
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 GARY HUA 6.0 1436 1563 1600 1610 1649
## 2 2 DAKSHESH DARURI 6.0 1175 917 1716 1629 1604
## 3 3 ADITYA BAJAJ 6.0 1641 955 1745 1563 1712
## 4 4 PATRICK H SCHILLI… 5.5 1363 1507 1553 1579 1655
## 5 5 HANSHI ZUO 5.5 1242 980 1663 1666 1716
## 6 6 HANSEN SONG 5.0 1399 1602 1712 1438 1365
## # ℹ 8 more variables: round_6 <dbl>, round_7 <dbl>, state <chr>, UFS_ID <chr>,
## # pre_rating <dbl>, provisioning <chr>, post_rating <chr>, row_mean <dbl>
#write to a .csv file
#select the required columns
chess_avg_csv <- chess_rating_final_rows[,c("player_name","state","points_total","pre_rating","row_mean")]
head(chess_avg_csv)
## # A tibble: 6 × 5
## # Rowwise:
## player_name state points_total pre_rating row_mean
## <chr> <chr> <chr> <dbl> <dbl>
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
write.csv(chess_avg_csv, file = "chess_rating_avg.csv")