DATA 607 Project1
Let’s load the required libraries in R for data analysis
library(dplyr)
library(downloader)
library(stringr)
library(htmlTable)
library(tidyverse)
library(janitor)
library(sqldf)
library(RODBC)
library(DBI)
library(odbc)
##### Download and read the txt file to a dataframe object ##### Filter the empty rows
## [1] "C:/MSDS/Fall_2021/DATA_607/Week4"
Parse the Game Round information to isolate the opponent Player_Nums for further analysis
round1_data <- str_extract_all(new_data_rows$Round_1,boundary("word"), simplify = T)
round2_data <- str_extract_all(new_data_rows$Round_2,boundary("word"), simplify = T)
round3_data <- str_extract_all(new_data_rows$Round_3,boundary("word"), simplify = T)
round4_data <- str_extract_all(new_data_rows$Round_4,boundary("word"), simplify = T)
round5_data <- str_extract_all(new_data_rows$Round_5,boundary("word"), simplify = T)
round6_data <- str_extract_all(new_data_rows$Round_6,boundary("word"), simplify = T)
round7_data <- str_extract_all(new_data_rows$Round_7,boundary("word"), simplify = T)
new_data_rows <- cbind(new_data_rows, Round1_Opponent = round1_data[,2], Round2_Opponent = round2_data[,2],
Round3_Opponent = round3_data[,2], Round4_Opponent = round4_data[,2], Round5_Opponent = round5_data[,2],
Round6_Opponent = round6_data[,2], Round7_Opponent = round7_data[,2])
subset_db_rows <- subset(new_data_rows, select= c(Player_Num, Player_Name, Total_Pts, Player_PreRating, Round1_Opponent,Round2_Opponent,Round3_Opponent,Round4_Opponent,Round5_Opponent,Round6_Opponent,Round7_Opponent))
## Substitute blank Opponent Player_Num to 99999
subset_db_rows[subset_db_rows == ""] <- 99999
db_rows <- subset_db_rows
## Filter the Rating digits, removing the P?? numbers
PreRating <- str_extract_all(subset_db_rows$Player_PreRating, "^[:digit:]+")
PreRating.df <- as.data.frame(t(PreRating))
T_PreRating <- t(PreRating.df)
## Add a new column with just Pre_Rating
db_rows$Pre_Rating <- T_PreRating
## First coerce the data.frame to all-character
db_rows_coerce = data.frame(lapply(db_rows, as.character), stringsAsFactors=FALSE)
Create dataframes to help compute ‘Average Pre Chess Rating of Opponents’
Convert the character value in the dataframe to Integer to help compute the mean
player_opponents.df <- subset(db_rows_coerce, select = c(Player_Num, Pre_Rating, Round1_Opponent, Round2_Opponent, Round3_Opponent, Round4_Opponent, Round5_Opponent, Round6_Opponent, Round7_Opponent) )
data1 <- player_opponents.df
dat <- as.data.frame(sapply(data1, as.numeric)) #<- sapply is here
dat_num <- dat
Loop thru the dataframe to get the Opponent Rating and calculate the Mean_Opponent_Rating
Bypass the byes, forfeit in mean calculation
Create Player dataset who played all the scheduled games
Create Player dataset who had atleast one unplayed game
all_games_played_df <- subset(combined_df, select= c(Player_Num, Player_Name, Total_Pts, Pre_Rating, Opponent_Mean_Rating,Num_Games_Played), row_number=FALSE)
## Players who played 7 Rounds
max_games_played_df <- filter(all_games_played_df, all_games_played_df$Num_Games_Played == 7)
htmlTable(max_games_played_df)
|
|
Player_Num
|
Player_Name
|
Total_Pts
|
Pre_Rating
|
Opponent_Mean_Rating
|
Num_Games_Played
|
|
1
|
1
|
GARY HUA
|
6.0
|
1794
|
1605.29
|
7
|
|
2
|
2
|
DAKSHESH DARURI
|
6.0
|
1553
|
1469.29
|
7
|
|
3
|
3
|
ADITYA BAJAJ
|
6.0
|
1384
|
1563.57
|
7
|
|
4
|
4
|
PATRICK H SCHILLING
|
5.5
|
1716
|
1573.57
|
7
|
|
5
|
5
|
HANSHI ZUO
|
5.5
|
1655
|
1500.86
|
7
|
|
6
|
6
|
HANSEN SONG
|
5.0
|
1686
|
1518.71
|
7
|
|
7
|
7
|
GARY DEE SWATHELL
|
5.0
|
1649
|
1372.14
|
7
|
|
8
|
8
|
EZEKIEL HOUGHTON
|
5.0
|
1641
|
1468.43
|
7
|
|
9
|
9
|
STEFANO LEE
|
5.0
|
1411
|
1523.14
|
7
|
|
10
|
10
|
ANVIT RAO
|
5.0
|
1365
|
1554.14
|
7
|
|
11
|
11
|
CAMERON WILLIAM MC LEMAN
|
4.5
|
1712
|
1467.57
|
7
|
|
12
|
13
|
TORRANCE HENRY JR
|
4.5
|
1666
|
1497.86
|
7
|
|
13
|
14
|
BRADLEY SHAW
|
4.5
|
1610
|
1515.00
|
7
|
|
14
|
15
|
ZACHARY JAMES HOUGHTON
|
4.5
|
1220
|
1483.86
|
7
|
|
15
|
17
|
RONALD GRZEGORCZYK
|
4.0
|
1629
|
1498.57
|
7
|
|
16
|
18
|
DAVID SUNDEEN
|
4.0
|
1600
|
1480.00
|
7
|
|
17
|
19
|
DIPANKAR ROY
|
4.0
|
1564
|
1426.29
|
7
|
|
18
|
20
|
JASON ZHENG
|
4.0
|
1595
|
1410.86
|
7
|
|
19
|
21
|
DINH DANG BUI
|
4.0
|
1563
|
1470.43
|
7
|
|
20
|
23
|
ALAN BUI
|
4.0
|
1363
|
1213.86
|
7
|
|
21
|
24
|
MICHAEL R ALDRICH
|
4.0
|
1229
|
1357.00
|
7
|
|
22
|
25
|
LOREN SCHWIEBERT
|
3.5
|
1745
|
1363.29
|
7
|
|
23
|
26
|
MAX ZHU
|
3.5
|
1579
|
1506.86
|
7
|
|
24
|
28
|
SOFIA ADINA STANESCU-BELLU
|
3.5
|
1507
|
1522.14
|
7
|
|
25
|
30
|
GEORGE AVERY JONES
|
3.5
|
1522
|
1144.14
|
7
|
|
26
|
31
|
RISHI SHETTY
|
3.5
|
1494
|
1259.86
|
7
|
|
27
|
32
|
JOSHUA PHILIP MATHEWS
|
3.5
|
1441
|
1378.71
|
7
|
|
28
|
33
|
JADE GE
|
3.5
|
1449
|
1276.86
|
7
|
|
29
|
34
|
MICHAEL JEFFERY THOMAS
|
3.5
|
1399
|
1375.29
|
7
|
|
30
|
35
|
JOSHUA DAVID LEE
|
3.5
|
1438
|
1149.71
|
7
|
|
31
|
39
|
JOEL R HENDON
|
3.0
|
1436
|
1429.57
|
7
|
|
32
|
40
|
FOREST ZHANG
|
3.0
|
1348
|
1390.57
|
7
|
|
33
|
42
|
JARED GE
|
3.0
|
1332
|
1149.86
|
7
|
|
34
|
43
|
ROBERT GLEN VASEY
|
3.0
|
1283
|
1106.57
|
7
|
|
35
|
45
|
DEREK YAN
|
3.0
|
1242
|
1152.00
|
7
|
|
36
|
46
|
JACOB ALEXANDER LAVALLEY
|
3.0
|
377
|
1357.71
|
7
|
|
37
|
47
|
ERIC WRIGHT
|
2.5
|
1362
|
1392.00
|
7
|
|
38
|
51
|
TEJAS AYYAGARI
|
2.5
|
1011
|
1356.14
|
7
|
|
39
|
52
|
ETHAN GUO
|
2.5
|
935
|
1494.57
|
7
|
|
40
|
61
|
JEZZEL FARKAS
|
1.5
|
955
|
1327.29
|
7
|
|
41
|
64
|
BEN LI
|
1.0
|
1163
|
1263.00
|
7
|
## Players who had one or more unplayed games
games_unplayed_df <- filter(all_games_played_df, all_games_played_df$Num_Games_Played < 7)
htmlTable(games_unplayed_df)
|
|
Player_Num
|
Player_Name
|
Total_Pts
|
Pre_Rating
|
Opponent_Mean_Rating
|
Num_Games_Played
|
|
1
|
12
|
KENNETH J TACK
|
4.5
|
1663
|
1506.17
|
6
|
|
2
|
16
|
MIKE NIKITIN
|
4.0
|
1604
|
1385.80
|
5
|
|
3
|
22
|
EUGENE L MCCLURE
|
4.0
|
1555
|
1300.33
|
6
|
|
4
|
27
|
GAURAV GIDWANI
|
3.5
|
1552
|
1221.67
|
6
|
|
5
|
29
|
CHIEDOZIE OKORIE
|
3.5
|
1602
|
1313.50
|
6
|
|
6
|
36
|
SIDDHARTH JHA
|
3.5
|
1355
|
1388.17
|
6
|
|
7
|
37
|
AMIYATOSH PWNANANDAM
|
3.5
|
980
|
1384.80
|
5
|
|
8
|
38
|
BRIAN LIU
|
3.0
|
1423
|
1539.17
|
6
|
|
9
|
41
|
KYLE WILLIAM MURPHY
|
3.0
|
1403
|
1248.50
|
4
|
|
10
|
44
|
JUSTIN D SCHILLING
|
3.0
|
1199
|
1327.00
|
6
|
|
11
|
48
|
DANIEL KHAIN
|
2.5
|
1382
|
1355.80
|
5
|
|
12
|
49
|
MICHAEL J MARTIN
|
2.5
|
1291
|
1285.80
|
5
|
|
13
|
50
|
SHIVAM JHA
|
2.5
|
1056
|
1296.00
|
6
|
|
14
|
53
|
JOSE C YBARRA
|
2.0
|
1393
|
1345.33
|
3
|
|
15
|
54
|
LARRY HODGE
|
2.0
|
1270
|
1206.17
|
6
|
|
16
|
55
|
ALEX KONG
|
2.0
|
1186
|
1406.00
|
6
|
|
17
|
56
|
MARISA RICCI
|
2.0
|
1153
|
1414.40
|
5
|
|
18
|
57
|
MICHAEL LU
|
2.0
|
1092
|
1363.00
|
6
|
|
19
|
58
|
VIRAJ MOHILE
|
2.0
|
917
|
1391.00
|
6
|
|
20
|
59
|
SEAN M MC CORMICK
|
2.0
|
853
|
1319.00
|
6
|
|
21
|
60
|
JULIA SHEN
|
1.5
|
967
|
1330.20
|
5
|
|
22
|
62
|
ASHWIN BALAJI
|
1.0
|
1530
|
1186.00
|
1
|
|
23
|
63
|
THOMAS JOSEPH HOSMER
|
1.0
|
1175
|
1350.20
|
5
|
Sort the Players by their Pre-Ratings, Points
# Sort by Points and Pre_Ratings
sorted_df <- result_df[order(result_df$Total_Pts, result_df$Pre_Rating, decreasing = TRUE),]
htmlTable(sorted_df, rnames = FALSE)
|
Player_Num
|
Player_Name
|
State
|
Total_Pts
|
Pre_Rating
|
Opponent_Mean_Rating
|
|
1
|
GARY HUA
|
ON
|
6.0
|
1794
|
1605.29
|
|
2
|
DAKSHESH DARURI
|
MI
|
6.0
|
1553
|
1469.29
|
|
3
|
ADITYA BAJAJ
|
MI
|
6.0
|
1384
|
1563.57
|
|
4
|
PATRICK H SCHILLING
|
MI
|
5.5
|
1716
|
1573.57
|
|
5
|
HANSHI ZUO
|
MI
|
5.5
|
1655
|
1500.86
|
|
6
|
HANSEN SONG
|
OH
|
5.0
|
1686
|
1518.71
|
|
7
|
GARY DEE SWATHELL
|
MI
|
5.0
|
1649
|
1372.14
|
|
8
|
EZEKIEL HOUGHTON
|
MI
|
5.0
|
1641
|
1468.43
|
|
9
|
STEFANO LEE
|
ON
|
5.0
|
1411
|
1523.14
|
|
10
|
ANVIT RAO
|
MI
|
5.0
|
1365
|
1554.14
|
|
11
|
CAMERON WILLIAM MC LEMAN
|
MI
|
4.5
|
1712
|
1467.57
|
|
13
|
TORRANCE HENRY JR
|
MI
|
4.5
|
1666
|
1497.86
|
|
12
|
KENNETH J TACK
|
MI
|
4.5
|
1663
|
1506.17
|
|
14
|
BRADLEY SHAW
|
MI
|
4.5
|
1610
|
1515.00
|
|
15
|
ZACHARY JAMES HOUGHTON
|
MI
|
4.5
|
1220
|
1483.86
|
|
17
|
RONALD GRZEGORCZYK
|
MI
|
4.0
|
1629
|
1498.57
|
|
16
|
MIKE NIKITIN
|
MI
|
4.0
|
1604
|
1385.80
|
|
18
|
DAVID SUNDEEN
|
MI
|
4.0
|
1600
|
1480.00
|
|
20
|
JASON ZHENG
|
MI
|
4.0
|
1595
|
1410.86
|
|
19
|
DIPANKAR ROY
|
MI
|
4.0
|
1564
|
1426.29
|
|
21
|
DINH DANG BUI
|
ON
|
4.0
|
1563
|
1470.43
|
|
22
|
EUGENE L MCCLURE
|
MI
|
4.0
|
1555
|
1300.33
|
|
23
|
ALAN BUI
|
ON
|
4.0
|
1363
|
1213.86
|
|
24
|
MICHAEL R ALDRICH
|
MI
|
4.0
|
1229
|
1357.00
|
|
25
|
LOREN SCHWIEBERT
|
MI
|
3.5
|
1745
|
1363.29
|
|
29
|
CHIEDOZIE OKORIE
|
MI
|
3.5
|
1602
|
1313.50
|
|
26
|
MAX ZHU
|
ON
|
3.5
|
1579
|
1506.86
|
|
27
|
GAURAV GIDWANI
|
MI
|
3.5
|
1552
|
1221.67
|
|
30
|
GEORGE AVERY JONES
|
ON
|
3.5
|
1522
|
1144.14
|
|
28
|
SOFIA ADINA STANESCU-BELLU
|
MI
|
3.5
|
1507
|
1522.14
|
|
31
|
RISHI SHETTY
|
MI
|
3.5
|
1494
|
1259.86
|
|
33
|
JADE GE
|
MI
|
3.5
|
1449
|
1276.86
|
|
32
|
JOSHUA PHILIP MATHEWS
|
ON
|
3.5
|
1441
|
1378.71
|
|
35
|
JOSHUA DAVID LEE
|
MI
|
3.5
|
1438
|
1149.71
|
|
34
|
MICHAEL JEFFERY THOMAS
|
MI
|
3.5
|
1399
|
1375.29
|
|
36
|
SIDDHARTH JHA
|
MI
|
3.5
|
1355
|
1388.17
|
|
37
|
AMIYATOSH PWNANANDAM
|
MI
|
3.5
|
980
|
1384.80
|
|
39
|
JOEL R HENDON
|
MI
|
3.0
|
1436
|
1429.57
|
|
38
|
BRIAN LIU
|
MI
|
3.0
|
1423
|
1539.17
|
|
41
|
KYLE WILLIAM MURPHY
|
MI
|
3.0
|
1403
|
1248.50
|
|
40
|
FOREST ZHANG
|
MI
|
3.0
|
1348
|
1390.57
|
|
42
|
JARED GE
|
MI
|
3.0
|
1332
|
1149.86
|
|
43
|
ROBERT GLEN VASEY
|
MI
|
3.0
|
1283
|
1106.57
|
|
45
|
DEREK YAN
|
MI
|
3.0
|
1242
|
1152.00
|
|
44
|
JUSTIN D SCHILLING
|
MI
|
3.0
|
1199
|
1327.00
|
|
46
|
JACOB ALEXANDER LAVALLEY
|
MI
|
3.0
|
377
|
1357.71
|
|
48
|
DANIEL KHAIN
|
MI
|
2.5
|
1382
|
1355.80
|
|
47
|
ERIC WRIGHT
|
MI
|
2.5
|
1362
|
1392.00
|
|
49
|
MICHAEL J MARTIN
|
MI
|
2.5
|
1291
|
1285.80
|
|
50
|
SHIVAM JHA
|
MI
|
2.5
|
1056
|
1296.00
|
|
51
|
TEJAS AYYAGARI
|
MI
|
2.5
|
1011
|
1356.14
|
|
52
|
ETHAN GUO
|
MI
|
2.5
|
935
|
1494.57
|
|
53
|
JOSE C YBARRA
|
MI
|
2.0
|
1393
|
1345.33
|
|
54
|
LARRY HODGE
|
MI
|
2.0
|
1270
|
1206.17
|
|
55
|
ALEX KONG
|
MI
|
2.0
|
1186
|
1406.00
|
|
56
|
MARISA RICCI
|
MI
|
2.0
|
1153
|
1414.40
|
|
57
|
MICHAEL LU
|
MI
|
2.0
|
1092
|
1363.00
|
|
58
|
VIRAJ MOHILE
|
MI
|
2.0
|
917
|
1391.00
|
|
59
|
SEAN M MC CORMICK
|
MI
|
2.0
|
853
|
1319.00
|
|
60
|
JULIA SHEN
|
MI
|
1.5
|
967
|
1330.20
|
|
61
|
JEZZEL FARKAS
|
ON
|
1.5
|
955
|
1327.29
|
|
62
|
ASHWIN BALAJI
|
MI
|
1.0
|
1530
|
1186.00
|
|
63
|
THOMAS JOSEPH HOSMER
|
MI
|
1.0
|
1175
|
1350.20
|
|
64
|
BEN LI
|
MI
|
1.0
|
1163
|
1263.00
|
GARY HUA scored the most points relative to his expected result
## Create the .CSV file from the dataframe
getwd() # Path where the file will be downloaded
## [1] "C:/MSDS/Fall_2021/DATA_607/Week4"
write.csv(result_df,"Chess_Project_Result.csv", row.names = FALSE)