Create DF with the txt file (uploaded to Github)
# Read txt file into data frame in R
txt_url <- "https://raw.githubusercontent.com/wberritt913/CUNY_DATA607/main/tournamentinfo.txt"
df1 <- read.table(url(txt_url), header = TRUE, sep = '\n')
# Check the dimensions and preview the data
dim(df1)
## [1] 195 1
head(df1, 30)
## X.........................................................................................
## 1 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 2 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 3 -----------------------------------------------------------------------------------------
## 4 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 5 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 6 -----------------------------------------------------------------------------------------
## 7 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## 8 MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## 9 -----------------------------------------------------------------------------------------
## 10 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|
## 11 MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |
## 12 -----------------------------------------------------------------------------------------
## 13 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|
## 14 MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |
## 15 -----------------------------------------------------------------------------------------
## 16 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|
## 17 MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |
## 18 -----------------------------------------------------------------------------------------
## 19 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|
## 20 OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |
## 21 -----------------------------------------------------------------------------------------
## 22 7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|
## 23 MI | 11146376 / R: 1649 ->1673 |N:3 |W |B |W |B |B |W |W |
## 24 -----------------------------------------------------------------------------------------
## 25 8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|
## 26 MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |
## 27 -----------------------------------------------------------------------------------------
## 28 9 | STEFANO LEE |5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|
## 29 ON | 14954524 / R: 1411 ->1564 |N:2 |W |B |W |B |W |B |B |
## 30 -----------------------------------------------------------------------------------------
Create main data frame
# Combine all data frames into one data frame (keeping player_info as base data frame)
player_info$states <- states_df
player_info$total_points <- total_points
player_info$p_rating_before <- p_rating_before
player_info$index <- 1:nrow(player_info)
head(player_info, 15)
## value value value value index
## 1 GARY HUA ON 6.0 1794 1
## 2 DAKSHESH DARURI MI 6.0 1553 2
## 3 ADITYA BAJAJ MI 6.0 1384 3
## 4 PATRICK H SCHILLING MI 5.5 1716 4
## 5 HANSHI ZUO MI 5.5 1655 5
## 6 HANSEN SONG OH 5.0 1686 6
## 7 GARY DEE SWATHELL MI 5.0 1649 7
## 8 EZEKIEL HOUGHTON MI 5.0 1641 8
## 9 STEFANO LEE ON 5.0 1411 9
## 10 ANVIT RAO MI 5.0 1365 10
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 11
## 12 KENNETH J TACK MI 4.5 1663 12
## 13 TORRANCE HENRY JR MI 4.5 1666 13
## 14 BRADLEY SHAW MI 4.5 1610 14
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 15
Join opponents’ rating data to main data frame
# Join the opponents data frame on the index
df3 <- left_join(opponents_df, player_info, by = 'index')
head(df3, 15)
## index value value value value
## 1 39 JOEL R HENDON MI 3.0 1436
## 2 21 DINH DANG BUI ON 4.0 1563
## 3 18 DAVID SUNDEEN MI 4.0 1600
## 4 14 BRADLEY SHAW MI 4.5 1610
## 5 7 GARY DEE SWATHELL MI 5.0 1649
## 6 12 KENNETH J TACK MI 4.5 1663
## 7 4 PATRICK H SCHILLING MI 5.5 1716
## 8 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 9 58 VIRAJ MOHILE MI 2.0 917
## 10 4 PATRICK H SCHILLING MI 5.5 1716
## 11 17 RONALD GRZEGORCZYK MI 4.0 1629
## 12 16 MIKE NIKITIN MI 4.0 1604
## 13 20 JASON ZHENG MI 4.0 1595
## 14 7 GARY DEE SWATHELL MI 5.0 1649
## 15 8 EZEKIEL HOUGHTON MI 5.0 1641
Find average rating for each player’s opponents
# Get average of every 7 rows to calculate opponent average player rating before
groupings <- df3$p_rating_before %>%
group_by(group = (row_number() - 1) %/% 7)
averages <- groupings %>%
summarise(avg_p_rating_before = round(mean(value, na.rm = TRUE)))
head(averages, 15)
## # A tibble: 15 × 2
## group avg_p_rating_before
## <dbl> <dbl>
## 1 0 1605
## 2 1 1469
## 3 2 1564
## 4 3 1574
## 5 4 1501
## 6 5 1519
## 7 6 1372
## 8 7 1468
## 9 8 1523
## 10 9 1554
## 11 10 1468
## 12 11 1506
## 13 12 1498
## 14 13 1515
## 15 14 1484
Create new final data frame
# Add the average to the main data frame and remove the index common key
player_info$average_opponent_rating <- averages$avg_p_rating_before
player_info <- player_info %>% select(-index)
player_data <- player_info
player_data
## value value value value average_opponent_rating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1468
## 12 KENNETH J TACK MI 4.5 1663 1506
## 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
## 16 MIKE NIKITIN MI 4.0 1604 1386
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1499
## 18 DAVID SUNDEEN MI 4.0 1600 1480
## 19 DIPANKAR ROY MI 4.0 1564 1426
## 20 JASON ZHENG MI 4.0 1595 1411
## 21 DINH DANG BUI ON 4.0 1563 1470
## 22 EUGENE L MCCLURE MI 4.0 1555 1300
## 23 ALAN BUI ON 4.0 1363 1214
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363
## 26 MAX ZHU ON 3.5 1579 1507
## 27 GAURAV GIDWANI MI 3.5 1552 1222
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1314
## 30 GEORGE AVERY JONES ON 3.5 1522 1144
## 31 RISHI SHETTY MI 3.5 1494 1260
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1379
## 33 JADE GE MI 3.5 1449 1277
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
## 35 JOSHUA DAVID LEE MI 3.5 1438 1150
## 36 SIDDHARTH JHA MI 3.5 1355 1388
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1385
## 38 BRIAN LIU MI 3.0 1423 1539
## 39 JOEL R HENDON MI 3.0 1436 1430
## 40 FOREST ZHANG MI 3.0 1348 1391
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248
## 42 JARED GE MI 3.0 1332 1150
## 43 ROBERT GLEN VASEY MI 3.0 1283 1107
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327
## 45 DEREK YAN MI 3.0 1242 1152
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1358
## 47 ERIC WRIGHT MI 2.5 1362 1392
## 48 DANIEL KHAIN MI 2.5 1382 1356
## 49 MICHAEL J MARTIN MI 2.5 1291 1286
## 50 SHIVAM JHA MI 2.5 1056 1296
## 51 TEJAS AYYAGARI MI 2.5 1011 1356
## 52 ETHAN GUO MI 2.5 935 1495
## 53 JOSE C YBARRA MI 2.0 1393 1345
## 54 LARRY HODGE MI 2.0 1270 1206
## 55 ALEX KONG MI 2.0 1186 1406
## 56 MARISA RICCI MI 2.0 1153 1414
## 57 MICHAEL LU MI 2.0 1092 1363
## 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 SEAN M MC CORMICK MI 2.0 853 1319
## 60 JULIA SHEN MI 1.5 967 1330
## 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 BEN LI MI 1.0 1163 1263
Check classes for export - fix problematic classes
# Change the column classes
lapply(player_data, class)
## $value
## [1] "character"
##
## $states
## [1] "data.frame"
##
## $total_points
## [1] "data.frame"
##
## $p_rating_before
## [1] "data.frame"
##
## $average_opponent_rating
## [1] "numeric"
# Change column classes + names to create final data frame
player_data$states <- as.character(player_data$states$value)
player_data$total_points <- as.numeric(player_data$total_points$value)
player_data$p_rating_before <- as.numeric(player_data$p_rating_before$value)
player_data <- player_data[, c("value", "states", "total_points", "p_rating_before", "average_opponent_rating")]
# Check class again
lapply(player_data, class)
## $value
## [1] "character"
##
## $states
## [1] "character"
##
## $total_points
## [1] "numeric"
##
## $p_rating_before
## [1] "numeric"
##
## $average_opponent_rating
## [1] "numeric"
# Rename value column
colnames(player_data)[colnames(player_data) == "value"] <- "name"
Create CSV
# Create CSV
write.csv(player_data, "player_data_new1.csv", row.names = FALSE, quote = FALSE)