pcr_matrix[, 2] <- round(pcr_matrix[,2], digits = 0)
# Add average scores to data frame with other processed data and rename for readability
processed_data <- cbind(processed_data, pcr_matrix[, 2])
processed_data <- rename(processed_data, avg_opp_pcr = `pcr_matrix[, 2]`)
processed_data
## player_num player_name player_state total_pts
## 1 1 GARY HUA ON 6.0
## 2 2 DAKSHESH DARURI MI 6.0
## 3 3 ADITYA BAJAJ MI 6.0
## 4 4 PATRICK H SCHILLING MI 5.5
## 5 5 HANSHI ZUO MI 5.5
## 6 6 HANSEN SONG OH 5.0
## 7 7 GARY DEE SWATHELL MI 5.0
## 8 8 EZEKIEL HOUGHTON MI 5.0
## 9 9 STEFANO LEE ON 5.0
## 10 10 ANVIT RAO MI 5.0
## 11 11 CAMERON WILLIAM MC LEMAN MI 4.5
## 12 12 KENNETH J TACK MI 4.5
## 13 13 TORRANCE HENRY JR MI 4.5
## 14 14 BRADLEY SHAW MI 4.5
## 15 15 ZACHARY JAMES HOUGHTON MI 4.5
## 16 16 MIKE NIKITIN MI 4.0
## 17 17 RONALD GRZEGORCZYK MI 4.0
## 18 18 DAVID SUNDEEN MI 4.0
## 19 19 DIPANKAR ROY MI 4.0
## 20 20 JASON ZHENG MI 4.0
## 21 21 DINH DANG BUI ON 4.0
## 22 22 EUGENE L MCCLURE MI 4.0
## 23 23 ALAN BUI ON 4.0
## 24 24 MICHAEL R ALDRICH MI 4.0
## 25 25 LOREN SCHWIEBERT MI 3.5
## 26 26 MAX ZHU ON 3.5
## 27 27 GAURAV GIDWANI MI 3.5
## 28 28 SOFIA ADINA STANESCU-BELLU MI 3.5
## 29 29 CHIEDOZIE OKORIE MI 3.5
## 30 30 GEORGE AVERY JONES ON 3.5
## 31 31 RISHI SHETTY MI 3.5
## 32 32 JOSHUA PHILIP MATHEWS ON 3.5
## 33 33 JADE GE MI 3.5
## 34 34 MICHAEL JEFFERY THOMAS MI 3.5
## 35 35 JOSHUA DAVID LEE MI 3.5
## 36 36 SIDDHARTH JHA MI 3.5
## 37 37 AMIYATOSH PWNANANDAM MI 3.5
## 38 38 BRIAN LIU MI 3.0
## 39 39 JOEL R HENDON MI 3.0
## 40 40 FOREST ZHANG MI 3.0
## 41 41 KYLE WILLIAM MURPHY MI 3.0
## 42 42 JARED GE MI 3.0
## 43 43 ROBERT GLEN VASEY MI 3.0
## 44 44 JUSTIN D SCHILLING MI 3.0
## 45 45 DEREK YAN MI 3.0
## 46 46 JACOB ALEXANDER LAVALLEY MI 3.0
## 47 47 ERIC WRIGHT MI 2.5
## 48 48 DANIEL KHAIN MI 2.5
## 49 49 MICHAEL J MARTIN MI 2.5
## 50 50 SHIVAM JHA MI 2.5
## 51 51 TEJAS AYYAGARI MI 2.5
## 52 52 ETHAN GUO MI 2.5
## 53 53 JOSE C YBARRA MI 2.0
## 54 54 LARRY HODGE MI 2.0
## 55 55 ALEX KONG MI 2.0
## 56 56 MARISA RICCI MI 2.0
## 57 57 MICHAEL LU MI 2.0
## 58 58 VIRAJ MOHILE MI 2.0
## 59 59 SEAN M MC CORMICK MI 2.0
## 60 60 JULIA SHEN MI 1.5
## 61 61 JEZZEL FARKAS ON 1.5
## 62 62 ASHWIN BALAJI MI 1.0
## 63 63 THOMAS JOSEPH HOSMER MI 1.0
## 64 64 BEN LI MI 1.0
## player_pre_rat avg_opp_pcr
## 1 1794 1605
## 2 1553 1469
## 3 1384 1564
## 4 1716 1574
## 5 1655 1501
## 6 1686 1519
## 7 1649 1372
## 8 1641 1468
## 9 1411 1523
## 10 1365 1554
## 11 1712 1468
## 12 1663 1506
## 13 1666 1498
## 14 1610 1515
## 15 1220 1484
## 16 1604 1386
## 17 1629 1499
## 18 1600 1480
## 19 1564 1426
## 20 1595 1411
## 21 1563 1470
## 22 1555 1300
## 23 1363 1214
## 24 1229 1357
## 25 1745 1363
## 26 1579 1507
## 27 1552 1222
## 28 1507 1522
## 29 1602 1314
## 30 1522 1144
## 31 1494 1260
## 32 1441 1379
## 33 1449 1277
## 34 1399 1375
## 35 1438 1150
## 36 1355 1388
## 37 980 1385
## 38 1423 1539
## 39 1436 1430
## 40 1348 1391
## 41 1403 1248
## 42 1332 1150
## 43 1283 1107
## 44 1199 1327
## 45 1242 1152
## 46 377 1358
## 47 1362 1392
## 48 1382 1356
## 49 1291 1286
## 50 1056 1296
## 51 1011 1356
## 52 935 1495
## 53 1393 1345
## 54 1270 1206
## 55 1186 1406
## 56 1153 1414
## 57 1092 1363
## 58 917 1391
## 59 853 1319
## 60 967 1330
## 61 955 1327
## 62 1530 1186
## 63 1175 1350
## 64 1163 1263
path <- getwd()
# Export file to working directory. The file.path function has been used to ensure platform independence (i.e. take into account the different path syntaxes for various operating systems)
write.csv(processed_data, file.path(path, "chess_processed_data.csv"), row.names = FALSE)
head(processed_data, 5)
## player_num player_name player_state total_pts
## 1 1 GARY HUA ON 6.0
## 2 2 DAKSHESH DARURI MI 6.0
## 3 3 ADITYA BAJAJ MI 6.0
## 4 4 PATRICK H SCHILLING MI 5.5
## 5 5 HANSHI ZUO MI 5.5
## player_pre_rat avg_opp_pcr
## 1 1794 1605
## 2 1553 1469
## 3 1384 1564
## 4 1716 1574
## 5 1655 1501
In Project 1, we successfully read and cleaned a movie ratings dataset directly from a GitHub repository. By converting the Excel file to a usable format and removing duplicate entries, we prepared the data for further analysis. This process demonstrates essential skills in data acquisition and preprocessing—key steps in any data science workflow. The cleaned dataset is now ready for exploratory analysis or modeling in future projects.