Libraries

Importing Libraries

library(stringr)
library(DT)
library(ggplot2)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v tibble  3.1.4     v purrr   0.3.4
## v tidyr   1.1.3     v dplyr   1.0.7
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Loading

Reading the text file from Github

df = readr::read_lines('https://github.com/mianshariq/SPS/raw/0ac39799e67b32b6c75564847778f012ffa5faa4/Data%20607/Projects/Project%201/ratings.txt')

Cleaning

Removing the dashes and empty rows and first two lines

df1 = str_remove_all(string = df, pattern = "^-+$")     #remove the dashes
df1 = df1[sapply(df1, nchar) > 0]                     # remove the empty rows
df1 = df1[-c(0:2)]                                    # remove the first two lines headers

Spliting data into even odd so I can contcat into one row.

df1_odd <- df1[seq(1, 128, 2)]
df1_even <- df1[seq(2, 128, 2)]

Concat and remove slashes from data

df2 = paste(df1_odd, df1_even, sep = " ")
df2 = str_replace_all(string = df2, pattern = "/", "|")

Using Regex for string minupulation and extraction

name = str_extract(df2, "[[:alpha:]]{2,}([[:blank:]][[:alpha:]]{1,}){1,}")
head(name,3)

## [1] "GARY HUA"        "DAKSHESH DARURI" "ADITYA BAJAJ"

states = str_extract(df1_even, "[[:alpha:]]{2}")
head(states,3)

## [1] "ON" "MI" "MI"

points = str_extract(df2, "[[:digit:]]+\\.[[:digit:]]")
points = as.numeric(as.character(points))
head(points,3)

## [1] 6 6 6

pre_ratings = str_extract(df2, ".\\: \\s?[[:digit:]]{3,4}")
pre_ratings = as.numeric(str_extract(pre_ratings, "\\(?[0-9,.]+\\)?"))
id <- as.integer(str_extract(df2, "\\d+"))
id<-subset(c(id),c(id)<65)
head(id,3)

## [1] 1 2 3

opp_num<-str_extract_all(str_extract_all(df2,"\\d+\\|"),"\\d+")
opp_num<-subset(c(opp_num),c(opp_num)!="0")
opp_num[1]

## [[1]]
## [1] "39" "21" "18" "14" "7"  "12" "4"

Using loop to get the average of the opponnent.

OppAvgRating <- list()

for (i in 1:length(opp_num)){
  OppAvgRating[i]<- mean(pre_ratings[as.numeric(unlist(opp_num[id[i]]))])
  }

opp_avg <- lapply(OppAvgRating, as.numeric)
opp_avg <- lapply(OppAvgRating, as.numeric)
opp_avg <- data.frame(unlist(opp_avg))
opp_avg = round(opp_avg)
opp_avg

##    unlist.opp_avg.
## 1             1605
## 2             1469
## 3             1564
## 4             1574
## 5             1501
## 6             1519
## 7             1372
## 8             1468
## 9             1523
## 10            1554
## 11            1468
## 12            1506
## 13            1498
## 14            1515
## 15            1484
## 16            1386
## 17            1499
## 18            1480
## 19            1426
## 20            1411
## 21            1470
## 22            1300
## 23            1214
## 24            1357
## 25            1363
## 26            1507
## 27            1222
## 28            1522
## 29            1314
## 30            1144
## 31            1260
## 32            1379
## 33            1277
## 34            1375
## 35            1150
## 36            1388
## 37            1385
## 38            1539
## 39            1430
## 40            1391
## 41            1248
## 42            1150
## 43            1107
## 44            1327
## 45            1152
## 46            1358
## 47            1392
## 48            1356
## 49            1286
## 50            1296
## 51            1356
## 52            1495
## 53            1345
## 54            1206
## 55            1406
## 56            1414
## 57            1363
## 58            1391
## 59            1319
## 60            1330
## 61            1327
## 62            1186
## 63            1350
## 64            1263

Creating Data Frame

Creating DF of the 5 columns needed.

df3=data.frame(name,states,points,pre_ratings,opp_avg)
datatable(df3, extensions = 'Scroller', options = list(scrollY = 500, scroller = TRUE ))

Visualization

Creating plot of states.

ggplot()+
  geom_bar(data = df3, aes(states))+
  coord_flip()

Creating bins for ratings to get a better view of the ratings as a group.

df3$ratingbins=cut(df3$pre_ratings, 15, include.lowest=TRUE, dig.lab = 4)
ggplot()+
geom_bar(data = df3, aes(ratingbins))+
coord_flip()

Creating plot of ratings vs avg opponent ratings, you can see if you average opponent is great and you have many points, that means your ratings is high

ggplot(df3, aes(pre_ratings, unlist.opp_avg., color = ratingbins)) +
geom_point(aes(size = points))+
ggtitle('Pre-rating Vs. Avg Opponent Rating')+
xlab('Pre-rating')+
ylab('unlist.opp_avg')

Displaying the data

head(df3)

##                  name states points pre_ratings unlist.opp_avg.  ratingbins
## 1            GARY HUA     ON    6.0        1794            1605 (1700,1795]
## 2     DAKSHESH DARURI     MI    6.0        1553            1469 (1511,1605]
## 3        ADITYA BAJAJ     MI    6.0        1384            1564 (1322,1416]
## 4 PATRICK H SCHILLING     MI    5.5        1716            1574 (1700,1795]
## 5          HANSHI ZUO     MI    5.5        1655            1501 (1605,1700]
## 6         HANSEN SONG     OH    5.0        1686            1519 (1605,1700]

#Calculating expected score for the players
df3$elo_score<- (1 / (1 + 10^((df3$pre_ratings - df3$unlist.opp_avg.)/400)))
# Assuming The rating of player is updated using the formula rating1 = rating1 + K*(Actual Score – Expected score_ where 'k' weighting factor is assumed as '20'
df3$elo_pts <-  df3$pre_ratings  + 20*(1-df3$elo_score)

#to compare with final score, let's fetch it from raw data

df3$diff <- df3$pre_ratings - df3$elo_pts
df3

##                        name states points pre_ratings unlist.opp_avg.
## 1                  GARY HUA     ON    6.0        1794            1605
## 2           DAKSHESH DARURI     MI    6.0        1553            1469
## 3              ADITYA BAJAJ     MI    6.0        1384            1564
## 4       PATRICK H SCHILLING     MI    5.5        1716            1574
## 5                HANSHI ZUO     MI    5.5        1655            1501
## 6               HANSEN SONG     OH    5.0        1686            1519
## 7         GARY DEE SWATHELL     MI    5.0        1649            1372
## 8          EZEKIEL HOUGHTON     MI    5.0        1641            1468
## 9               STEFANO LEE     ON    5.0        1411            1523
## 10                ANVIT RAO     MI    5.0        1365            1554
## 11 CAMERON WILLIAM MC LEMAN     MI    4.5        1712            1468
## 12           KENNETH J TACK     MI    4.5        1663            1506
## 13        TORRANCE HENRY JR     MI    4.5        1666            1498
## 14             BRADLEY SHAW     MI    4.5        1610            1515
## 15   ZACHARY JAMES HOUGHTON     MI    4.5        1220            1484
## 16             MIKE NIKITIN     MI    4.0        1604            1386
## 17       RONALD GRZEGORCZYK     MI    4.0        1629            1499
## 18            DAVID SUNDEEN     MI    4.0        1600            1480
## 19             DIPANKAR ROY     MI    4.0        1564            1426
## 20              JASON ZHENG     MI    4.0        1595            1411
## 21            DINH DANG BUI     ON    4.0        1563            1470
## 22         EUGENE L MCCLURE     MI    4.0        1555            1300
## 23                 ALAN BUI     ON    4.0        1363            1214
## 24        MICHAEL R ALDRICH     MI    4.0        1229            1357
## 25         LOREN SCHWIEBERT     MI    3.5        1745            1363
## 26                  MAX ZHU     ON    3.5        1579            1507
## 27           GAURAV GIDWANI     MI    3.5        1552            1222
## 28     SOFIA ADINA STANESCU     MI    3.5        1507            1522
## 29         CHIEDOZIE OKORIE     MI    3.5        1602            1314
## 30       GEORGE AVERY JONES     ON    3.5        1522            1144
## 31             RISHI SHETTY     MI    3.5        1494            1260
## 32    JOSHUA PHILIP MATHEWS     ON    3.5        1441            1379
## 33                  JADE GE     MI    3.5        1449            1277
## 34   MICHAEL JEFFERY THOMAS     MI    3.5        1399            1375
## 35         JOSHUA DAVID LEE     MI    3.5        1438            1150
## 36            SIDDHARTH JHA     MI    3.5        1355            1388
## 37     AMIYATOSH PWNANANDAM     MI    3.5         980            1385
## 38                BRIAN LIU     MI    3.0        1423            1539
## 39            JOEL R HENDON     MI    3.0        1436            1430
## 40             FOREST ZHANG     MI    3.0        1348            1391
## 41      KYLE WILLIAM MURPHY     MI    3.0        1403            1248
## 42                 JARED GE     MI    3.0        1332            1150
## 43        ROBERT GLEN VASEY     MI    3.0        1283            1107
## 44       JUSTIN D SCHILLING     MI    3.0        1199            1327
## 45                DEREK YAN     MI    3.0        1242            1152
## 46 JACOB ALEXANDER LAVALLEY     MI    3.0         377            1358
## 47              ERIC WRIGHT     MI    2.5        1362            1392
## 48             DANIEL KHAIN     MI    2.5        1382            1356
## 49         MICHAEL J MARTIN     MI    2.5        1291            1286
## 50               SHIVAM JHA     MI    2.5        1056            1296
## 51           TEJAS AYYAGARI     MI    2.5        1011            1356
## 52                ETHAN GUO     MI    2.5         935            1495
## 53            JOSE C YBARRA     MI    2.0        1393            1345
## 54              LARRY HODGE     MI    2.0        1270            1206
## 55                ALEX KONG     MI    2.0        1186            1406
## 56             MARISA RICCI     MI    2.0        1153            1414
## 57               MICHAEL LU     MI    2.0        1092            1363
## 58             VIRAJ MOHILE     MI    2.0         917            1391
## 59        SEAN M MC CORMICK     MI    2.0         853            1319
## 60               JULIA SHEN     MI    1.5         967            1330
## 61            JEZZEL FARKAS     ON    1.5         955            1327
## 62            ASHWIN BALAJI     MI    1.0        1530            1186
## 63     THOMAS JOSEPH HOSMER     MI    1.0        1175            1350
## 64                   BEN LI     MI    1.0        1163            1263
##       ratingbins  elo_score   elo_pts         diff
## 1    (1700,1795] 0.25200046 1808.9600 -14.95999082
## 2    (1511,1605] 0.38141588 1565.3717 -12.37168244
## 3    (1322,1416] 0.73810903 1389.2378  -5.23781935
## 4    (1700,1795] 0.30631208 1729.8738 -13.87375833
## 5    (1605,1700] 0.29183361 1669.1633 -14.16332777
## 6    (1605,1700] 0.27661213 1700.4678 -14.46775739
## 7    (1605,1700] 0.16874608 1665.6251 -16.62507831
## 8    (1605,1700] 0.26975459 1655.6049 -14.60490827
## 9    (1322,1416] 0.65582051 1417.8836  -6.88358988
## 10   (1322,1416] 0.74799954 1370.0400  -5.04000918
## 11   (1700,1795] 0.19709083 1728.0582 -16.05818340
## 12   (1605,1700] 0.28827748 1677.2345 -14.23445049
## 13   (1605,1700] 0.27546176 1680.4908 -14.49076486
## 14   (1605,1700] 0.36659230 1622.6682 -12.66815400
## 15   (1133,1227] 0.82049521 1223.5901  -3.59009585
## 16   (1511,1605] 0.22185155 1619.5630 -15.56296908
## 17   (1605,1700] 0.32118308 1642.5763 -13.57633840
## 18   (1511,1605] 0.33386058 1613.3228 -13.32278849
## 19   (1511,1605] 0.31122643 1577.7755 -13.77547137
## 20   (1511,1605] 0.25746444 1609.8507 -14.85071118
## 21   (1511,1605] 0.36926971 1575.6146 -12.61460577
## 22   (1511,1605] 0.18726232 1571.2548 -16.25475363
## 23   (1322,1416] 0.29781740 1377.0437 -14.04365204
## 24   (1227,1322] 0.67630167 1235.4740  -6.47396661
## 25   (1700,1795] 0.09984313 1763.0031 -18.00313735
## 26   (1511,1605] 0.39784191 1591.0432 -12.04316186
## 27   (1511,1605] 0.13015005 1569.3970 -17.39699898
## 28   (1416,1511] 0.52157333 1516.5685  -9.56853334
## 29   (1511,1605] 0.16004931 1618.7990 -16.79901389
## 30   (1511,1605] 0.10193172 1539.9614 -17.96136558
## 31   (1416,1511] 0.20635926 1509.8728 -15.87281486
## 32   (1416,1511] 0.41171003 1452.7658 -11.76579944
## 33   (1416,1511] 0.27089004 1463.5822 -14.58219926
## 34   (1322,1416] 0.46551606 1409.6897 -10.68967889
## 35   (1416,1511] 0.16004931 1454.7990 -16.79901389
## 36   (1322,1416] 0.54734852 1364.0530  -9.05302964
## 37  (943.8,1038] 0.91144177  981.7712  -1.77116460
## 38   (1416,1511] 0.66099909 1429.7800  -6.78001818
## 39   (1416,1511] 0.49136616 1446.1727 -10.17267672
## 40   (1322,1416] 0.56156794 1356.7686  -8.76864122
## 41   (1322,1416] 0.29064537 1417.1871 -14.18709261
## 42   (1322,1416] 0.25967158 1346.8066 -14.80656839
## 43   (1227,1322] 0.26636630 1297.6727 -14.67267405
## 44   (1133,1227] 0.67630167 1205.4740  -6.47396661
## 45   (1227,1322] 0.37330092 1254.5340 -12.53398163
## 46 [375.6,471.5] 0.99648463  377.0703  -0.07030733
## 47   (1322,1416] 0.54306649 1371.1387  -9.13867016
## 48   (1322,1416] 0.46265268 1392.7469 -10.74694634
## 49   (1227,1322] 0.49280492 1301.1439 -10.14390163
## 50   (1038,1133] 0.79923999 1060.0152  -4.01520018
## 51  (943.8,1038] 0.87931715 1013.4137  -2.41365693
## 52 (849.3,943.8] 0.96171350  935.7657  -0.76573008
## 53   (1322,1416] 0.43135861 1404.3728 -11.37282784
## 54   (1227,1322] 0.40892440 1281.8215 -11.82151193
## 55   (1133,1227] 0.78012960 1190.3974  -4.39740792
## 56   (1133,1227] 0.81793763 1156.6412  -3.64124738
## 57   (1038,1133] 0.82635355 1095.4729  -3.47292892
## 58 (849.3,943.8] 0.93869121  918.2262  -1.22617582
## 59 (849.3,943.8] 0.93598677  854.2803  -1.28026451
## 60  (943.8,1038] 0.88988784  969.2022  -2.20224316
## 61  (943.8,1038] 0.89486279  957.1027  -2.10274421
## 62   (1511,1605] 0.12129505 1547.5741 -17.57409902
## 63   (1133,1227] 0.73250729 1180.3499  -5.34985420
## 64   (1133,1227] 0.64006500 1170.1987  -7.19870000

Extracting CSV

write.csv(df3, "elo_chess.csv", row.names = FALSE)

Project1

Shariq Mian