library(stringr)
library(DT)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.4 v purrr 0.3.4
## v tidyr 1.1.3 v dplyr 1.0.7
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Reading the text file from Github
df = readr::read_lines('https://github.com/mianshariq/SPS/raw/0ac39799e67b32b6c75564847778f012ffa5faa4/Data%20607/Projects/Project%201/ratings.txt')
Removing the dashes and empty rows and first two lines
df1 = str_remove_all(string = df, pattern = "^-+$") #remove the dashes
df1 = df1[sapply(df1, nchar) > 0] # remove the empty rows
df1 = df1[-c(0:2)] # remove the first two lines headers
Spliting data into even odd so I can contcat into one row.
df1_odd <- df1[seq(1, 128, 2)]
df1_even <- df1[seq(2, 128, 2)]
Concat and remove slashes from data
df2 = paste(df1_odd, df1_even, sep = " ")
df2 = str_replace_all(string = df2, pattern = "/", "|")
Using Regex for string minupulation and extraction
name = str_extract(df2, "[[:alpha:]]{2,}([[:blank:]][[:alpha:]]{1,}){1,}")
head(name,3)
## [1] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ"
states = str_extract(df1_even, "[[:alpha:]]{2}")
head(states,3)
## [1] "ON" "MI" "MI"
points = str_extract(df2, "[[:digit:]]+\\.[[:digit:]]")
points = as.numeric(as.character(points))
head(points,3)
## [1] 6 6 6
pre_ratings = str_extract(df2, ".\\: \\s?[[:digit:]]{3,4}")
pre_ratings = as.numeric(str_extract(pre_ratings, "\\(?[0-9,.]+\\)?"))
id <- as.integer(str_extract(df2, "\\d+"))
id<-subset(c(id),c(id)<65)
head(id,3)
## [1] 1 2 3
opp_num<-str_extract_all(str_extract_all(df2,"\\d+\\|"),"\\d+")
opp_num<-subset(c(opp_num),c(opp_num)!="0")
opp_num[1]
## [[1]]
## [1] "39" "21" "18" "14" "7" "12" "4"
Using loop to get the average of the opponnent.
OppAvgRating <- list()
for (i in 1:length(opp_num)){
OppAvgRating[i]<- mean(pre_ratings[as.numeric(unlist(opp_num[id[i]]))])
}
opp_avg <- lapply(OppAvgRating, as.numeric)
opp_avg <- lapply(OppAvgRating, as.numeric)
opp_avg <- data.frame(unlist(opp_avg))
opp_avg = round(opp_avg)
opp_avg
## unlist.opp_avg.
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
## 7 1372
## 8 1468
## 9 1523
## 10 1554
## 11 1468
## 12 1506
## 13 1498
## 14 1515
## 15 1484
## 16 1386
## 17 1499
## 18 1480
## 19 1426
## 20 1411
## 21 1470
## 22 1300
## 23 1214
## 24 1357
## 25 1363
## 26 1507
## 27 1222
## 28 1522
## 29 1314
## 30 1144
## 31 1260
## 32 1379
## 33 1277
## 34 1375
## 35 1150
## 36 1388
## 37 1385
## 38 1539
## 39 1430
## 40 1391
## 41 1248
## 42 1150
## 43 1107
## 44 1327
## 45 1152
## 46 1358
## 47 1392
## 48 1356
## 49 1286
## 50 1296
## 51 1356
## 52 1495
## 53 1345
## 54 1206
## 55 1406
## 56 1414
## 57 1363
## 58 1391
## 59 1319
## 60 1330
## 61 1327
## 62 1186
## 63 1350
## 64 1263
Creating DF of the 5 columns needed.
df3=data.frame(name,states,points,pre_ratings,opp_avg)
datatable(df3, extensions = 'Scroller', options = list(scrollY = 500, scroller = TRUE ))
Creating plot of states.
ggplot()+
geom_bar(data = df3, aes(states))+
coord_flip()
Creating bins for ratings to get a better view of the ratings as a group.
df3$ratingbins=cut(df3$pre_ratings, 15, include.lowest=TRUE, dig.lab = 4)
ggplot()+
geom_bar(data = df3, aes(ratingbins))+
coord_flip()
Creating plot of ratings vs avg opponent ratings, you can see if you average opponent is great and you have many points, that means your ratings is high
ggplot(df3, aes(pre_ratings, unlist.opp_avg., color = ratingbins)) +
geom_point(aes(size = points))+
ggtitle('Pre-rating Vs. Avg Opponent Rating')+
xlab('Pre-rating')+
ylab('unlist.opp_avg')
Displaying the data
head(df3)
## name states points pre_ratings unlist.opp_avg. ratingbins
## 1 GARY HUA ON 6.0 1794 1605 (1700,1795]
## 2 DAKSHESH DARURI MI 6.0 1553 1469 (1511,1605]
## 3 ADITYA BAJAJ MI 6.0 1384 1564 (1322,1416]
## 4 PATRICK H SCHILLING MI 5.5 1716 1574 (1700,1795]
## 5 HANSHI ZUO MI 5.5 1655 1501 (1605,1700]
## 6 HANSEN SONG OH 5.0 1686 1519 (1605,1700]
#Calculating expected score for the players
df3$elo_score<- (1 / (1 + 10^((df3$pre_ratings - df3$unlist.opp_avg.)/400)))
# Assuming The rating of player is updated using the formula rating1 = rating1 + K*(Actual Score – Expected score_ where 'k' weighting factor is assumed as '20'
df3$elo_pts <- df3$pre_ratings + 20*(1-df3$elo_score)
#to compare with final score, let's fetch it from raw data
df3$diff <- df3$pre_ratings - df3$elo_pts
df3
## name states points pre_ratings unlist.opp_avg.
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1468
## 12 KENNETH J TACK MI 4.5 1663 1506
## 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
## 16 MIKE NIKITIN MI 4.0 1604 1386
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1499
## 18 DAVID SUNDEEN MI 4.0 1600 1480
## 19 DIPANKAR ROY MI 4.0 1564 1426
## 20 JASON ZHENG MI 4.0 1595 1411
## 21 DINH DANG BUI ON 4.0 1563 1470
## 22 EUGENE L MCCLURE MI 4.0 1555 1300
## 23 ALAN BUI ON 4.0 1363 1214
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363
## 26 MAX ZHU ON 3.5 1579 1507
## 27 GAURAV GIDWANI MI 3.5 1552 1222
## 28 SOFIA ADINA STANESCU MI 3.5 1507 1522
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1314
## 30 GEORGE AVERY JONES ON 3.5 1522 1144
## 31 RISHI SHETTY MI 3.5 1494 1260
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1379
## 33 JADE GE MI 3.5 1449 1277
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
## 35 JOSHUA DAVID LEE MI 3.5 1438 1150
## 36 SIDDHARTH JHA MI 3.5 1355 1388
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1385
## 38 BRIAN LIU MI 3.0 1423 1539
## 39 JOEL R HENDON MI 3.0 1436 1430
## 40 FOREST ZHANG MI 3.0 1348 1391
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248
## 42 JARED GE MI 3.0 1332 1150
## 43 ROBERT GLEN VASEY MI 3.0 1283 1107
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327
## 45 DEREK YAN MI 3.0 1242 1152
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1358
## 47 ERIC WRIGHT MI 2.5 1362 1392
## 48 DANIEL KHAIN MI 2.5 1382 1356
## 49 MICHAEL J MARTIN MI 2.5 1291 1286
## 50 SHIVAM JHA MI 2.5 1056 1296
## 51 TEJAS AYYAGARI MI 2.5 1011 1356
## 52 ETHAN GUO MI 2.5 935 1495
## 53 JOSE C YBARRA MI 2.0 1393 1345
## 54 LARRY HODGE MI 2.0 1270 1206
## 55 ALEX KONG MI 2.0 1186 1406
## 56 MARISA RICCI MI 2.0 1153 1414
## 57 MICHAEL LU MI 2.0 1092 1363
## 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 SEAN M MC CORMICK MI 2.0 853 1319
## 60 JULIA SHEN MI 1.5 967 1330
## 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 BEN LI MI 1.0 1163 1263
## ratingbins elo_score elo_pts diff
## 1 (1700,1795] 0.25200046 1808.9600 -14.95999082
## 2 (1511,1605] 0.38141588 1565.3717 -12.37168244
## 3 (1322,1416] 0.73810903 1389.2378 -5.23781935
## 4 (1700,1795] 0.30631208 1729.8738 -13.87375833
## 5 (1605,1700] 0.29183361 1669.1633 -14.16332777
## 6 (1605,1700] 0.27661213 1700.4678 -14.46775739
## 7 (1605,1700] 0.16874608 1665.6251 -16.62507831
## 8 (1605,1700] 0.26975459 1655.6049 -14.60490827
## 9 (1322,1416] 0.65582051 1417.8836 -6.88358988
## 10 (1322,1416] 0.74799954 1370.0400 -5.04000918
## 11 (1700,1795] 0.19709083 1728.0582 -16.05818340
## 12 (1605,1700] 0.28827748 1677.2345 -14.23445049
## 13 (1605,1700] 0.27546176 1680.4908 -14.49076486
## 14 (1605,1700] 0.36659230 1622.6682 -12.66815400
## 15 (1133,1227] 0.82049521 1223.5901 -3.59009585
## 16 (1511,1605] 0.22185155 1619.5630 -15.56296908
## 17 (1605,1700] 0.32118308 1642.5763 -13.57633840
## 18 (1511,1605] 0.33386058 1613.3228 -13.32278849
## 19 (1511,1605] 0.31122643 1577.7755 -13.77547137
## 20 (1511,1605] 0.25746444 1609.8507 -14.85071118
## 21 (1511,1605] 0.36926971 1575.6146 -12.61460577
## 22 (1511,1605] 0.18726232 1571.2548 -16.25475363
## 23 (1322,1416] 0.29781740 1377.0437 -14.04365204
## 24 (1227,1322] 0.67630167 1235.4740 -6.47396661
## 25 (1700,1795] 0.09984313 1763.0031 -18.00313735
## 26 (1511,1605] 0.39784191 1591.0432 -12.04316186
## 27 (1511,1605] 0.13015005 1569.3970 -17.39699898
## 28 (1416,1511] 0.52157333 1516.5685 -9.56853334
## 29 (1511,1605] 0.16004931 1618.7990 -16.79901389
## 30 (1511,1605] 0.10193172 1539.9614 -17.96136558
## 31 (1416,1511] 0.20635926 1509.8728 -15.87281486
## 32 (1416,1511] 0.41171003 1452.7658 -11.76579944
## 33 (1416,1511] 0.27089004 1463.5822 -14.58219926
## 34 (1322,1416] 0.46551606 1409.6897 -10.68967889
## 35 (1416,1511] 0.16004931 1454.7990 -16.79901389
## 36 (1322,1416] 0.54734852 1364.0530 -9.05302964
## 37 (943.8,1038] 0.91144177 981.7712 -1.77116460
## 38 (1416,1511] 0.66099909 1429.7800 -6.78001818
## 39 (1416,1511] 0.49136616 1446.1727 -10.17267672
## 40 (1322,1416] 0.56156794 1356.7686 -8.76864122
## 41 (1322,1416] 0.29064537 1417.1871 -14.18709261
## 42 (1322,1416] 0.25967158 1346.8066 -14.80656839
## 43 (1227,1322] 0.26636630 1297.6727 -14.67267405
## 44 (1133,1227] 0.67630167 1205.4740 -6.47396661
## 45 (1227,1322] 0.37330092 1254.5340 -12.53398163
## 46 [375.6,471.5] 0.99648463 377.0703 -0.07030733
## 47 (1322,1416] 0.54306649 1371.1387 -9.13867016
## 48 (1322,1416] 0.46265268 1392.7469 -10.74694634
## 49 (1227,1322] 0.49280492 1301.1439 -10.14390163
## 50 (1038,1133] 0.79923999 1060.0152 -4.01520018
## 51 (943.8,1038] 0.87931715 1013.4137 -2.41365693
## 52 (849.3,943.8] 0.96171350 935.7657 -0.76573008
## 53 (1322,1416] 0.43135861 1404.3728 -11.37282784
## 54 (1227,1322] 0.40892440 1281.8215 -11.82151193
## 55 (1133,1227] 0.78012960 1190.3974 -4.39740792
## 56 (1133,1227] 0.81793763 1156.6412 -3.64124738
## 57 (1038,1133] 0.82635355 1095.4729 -3.47292892
## 58 (849.3,943.8] 0.93869121 918.2262 -1.22617582
## 59 (849.3,943.8] 0.93598677 854.2803 -1.28026451
## 60 (943.8,1038] 0.88988784 969.2022 -2.20224316
## 61 (943.8,1038] 0.89486279 957.1027 -2.10274421
## 62 (1511,1605] 0.12129505 1547.5741 -17.57409902
## 63 (1133,1227] 0.73250729 1180.3499 -5.34985420
## 64 (1133,1227] 0.64006500 1170.1987 -7.19870000
write.csv(df3, "elo_chess.csv", row.names = FALSE)