R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

** Project 1 * In this project, you’re given a text file with chess tournament results where the information has some structure.

If you have questions about the meaning of the data or the results, please post them on the discussion forum.

Data science, like chess, is a game of back and forth… The chess rating system (invented by a Minnesota statistician named Arpad Elo) has been used in many other contexts, including assessing relative strength of employment candidates by human resource departments. You may substitute another text file (or set of text files, or data scraped from web pages) of similar or greater complexity, and create your own assignment and solution. You may work in a small team. All of your code should be in an R markdown file (and published to rpubs.com); with your data accessible for the person running the script. **

package

#install.packages("readr")
#install.packages("tidyverse")
#install.packages("DBI")
#install.packages("odbc")
#install.packages("RSQLite")
#install.packages("dblyr")
#install.packages("RMySQL")
#install.packages("stringr")

library

library(readr)
library(stringr)
library(DBI) 
library(odbc)
library(RSQLite)
library(dbplyr)
library(RMySQL)
## 
## Attaching package: 'RMySQL'
## The following object is masked from 'package:RSQLite':
## 
##     isIdCurrent
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ ggplot2   3.5.1     ✔ tidyr     1.3.1
## ✔ lubridate 1.9.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::ident()  masks dbplyr::ident()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::sql()    masks dbplyr::sql()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(RODBC)
library(readxl)
library(dplyr)
library(odbc)

Read data from csv file in github

tournamentinfo_data <- readLines("https://raw.githubusercontent.com/asadny82/Data607/refs/heads/main/tournamentinfo.txt")
## Warning in
## readLines("https://raw.githubusercontent.com/asadny82/Data607/refs/heads/main/tournamentinfo.txt"):
## incomplete final line found on
## 'https://raw.githubusercontent.com/asadny82/Data607/refs/heads/main/tournamentinfo.txt'
player_number <- as.numeric(unlist(str_extract_all(tournamentinfo_data,"(?<=\\s{3,4})\\d{1,2}(?=\\s)")))
player_name <- unlist(str_extract_all(tournamentinfo_data,"(?<=\\d\\s\\|\\s)([A-z, -]*\\s){1,}[[:alpha:]]*(?=\\s*\\|)"))
player_state <- unlist(str_extract_all(tournamentinfo_data, "[[:upper:]]{2}(?=\\s\\|)"))
total_pts <- as.numeric(unlist(str_extract_all(tournamentinfo_data, "(?<=\\|)\\d\\.\\d")))
player_pre_rat <- as.numeric(unlist(str_extract_all(tournamentinfo_data, "(?<=R:\\s{1,2})(\\d{3,4}(?=\\s))|(\\d{3,4}(?=P\\d{1,2}\\s*-))")))

# Create a data frame
dataframe <- data.frame(player_number, player_name, player_state, total_pts, player_pre_rat)
dataframe
##    player_number                      player_name player_state total_pts
## 1              1 GARY HUA                                   ON       6.0
## 2              2 DAKSHESH DARURI                            MI       6.0
## 3              3 ADITYA BAJAJ                               MI       6.0
## 4              4 PATRICK H SCHILLING                        MI       5.5
## 5              5 HANSHI ZUO                                 MI       5.5
## 6              6 HANSEN SONG                                OH       5.0
## 7              7 GARY DEE SWATHELL                          MI       5.0
## 8              8 EZEKIEL HOUGHTON                           MI       5.0
## 9              9 STEFANO LEE                                ON       5.0
## 10            10 ANVIT RAO                                  MI       5.0
## 11            11 CAMERON WILLIAM MC LEMAN                   MI       4.5
## 12            12 KENNETH J TACK                             MI       4.5
## 13            13 TORRANCE HENRY JR                          MI       4.5
## 14            14 BRADLEY SHAW                               MI       4.5
## 15            15 ZACHARY JAMES HOUGHTON                     MI       4.5
## 16            16 MIKE NIKITIN                               MI       4.0
## 17            17 RONALD GRZEGORCZYK                         MI       4.0
## 18            18 DAVID SUNDEEN                              MI       4.0
## 19            19 DIPANKAR ROY                               MI       4.0
## 20            20 JASON ZHENG                                MI       4.0
## 21            21 DINH DANG BUI                              ON       4.0
## 22            22 EUGENE L MCCLURE                           MI       4.0
## 23            23 ALAN BUI                                   ON       4.0
## 24            24 MICHAEL R ALDRICH                          MI       4.0
## 25            25 LOREN SCHWIEBERT                           MI       3.5
## 26            26 MAX ZHU                                    ON       3.5
## 27            27 GAURAV GIDWANI                             MI       3.5
## 28            28 SOFIA ADINA STANESCU-BELLU                 MI       3.5
## 29            29 CHIEDOZIE OKORIE                           MI       3.5
## 30            30 GEORGE AVERY JONES                         ON       3.5
## 31            31 RISHI SHETTY                               MI       3.5
## 32            32 JOSHUA PHILIP MATHEWS                      ON       3.5
## 33            33 JADE GE                                    MI       3.5
## 34            34 MICHAEL JEFFERY THOMAS                     MI       3.5
## 35            35 JOSHUA DAVID LEE                           MI       3.5
## 36            36 SIDDHARTH JHA                              MI       3.5
## 37            37 AMIYATOSH PWNANANDAM                       MI       3.5
## 38            38 BRIAN LIU                                  MI       3.0
## 39            39 JOEL R HENDON                              MI       3.0
## 40            40 FOREST ZHANG                               MI       3.0
## 41            41 KYLE WILLIAM MURPHY                        MI       3.0
## 42            42 JARED GE                                   MI       3.0
## 43            43 ROBERT GLEN VASEY                          MI       3.0
## 44            44 JUSTIN D SCHILLING                         MI       3.0
## 45            45 DEREK YAN                                  MI       3.0
## 46            46 JACOB ALEXANDER LAVALLEY                   MI       3.0
## 47            47 ERIC WRIGHT                                MI       2.5
## 48            48 DANIEL KHAIN                               MI       2.5
## 49            49 MICHAEL J MARTIN                           MI       2.5
## 50            50 SHIVAM JHA                                 MI       2.5
## 51            51 TEJAS AYYAGARI                             MI       2.5
## 52            52 ETHAN GUO                                  MI       2.5
## 53            53 JOSE C YBARRA                              MI       2.0
## 54            54 LARRY HODGE                                MI       2.0
## 55            55 ALEX KONG                                  MI       2.0
## 56            56 MARISA RICCI                               MI       2.0
## 57            57 MICHAEL LU                                 MI       2.0
## 58            58 VIRAJ MOHILE                               MI       2.0
## 59            59 SEAN M MC CORMICK                          MI       2.0
## 60            60 JULIA SHEN                                 MI       1.5
## 61            61 JEZZEL FARKAS                              ON       1.5
## 62            62 ASHWIN BALAJI                              MI       1.0
## 63            63 THOMAS JOSEPH HOSMER                       MI       1.0
## 64            64 BEN LI                                     MI       1.0
##    player_pre_rat
## 1            1794
## 2            1553
## 3            1384
## 4            1716
## 5            1655
## 6            1686
## 7            1649
## 8            1641
## 9            1411
## 10           1365
## 11           1712
## 12           1663
## 13           1666
## 14           1610
## 15           1220
## 16           1604
## 17           1629
## 18           1600
## 19           1564
## 20           1595
## 21           1563
## 22           1555
## 23           1363
## 24           1229
## 25           1745
## 26           1579
## 27           1552
## 28           1507
## 29           1602
## 30           1522
## 31           1494
## 32           1441
## 33           1449
## 34           1399
## 35           1438
## 36           1355
## 37            980
## 38           1423
## 39           1436
## 40           1348
## 41           1403
## 42           1332
## 43           1283
## 44           1199
## 45           1242
## 46            377
## 47           1362
## 48           1382
## 49           1291
## 50           1056
## 51           1011
## 52            935
## 53           1393
## 54           1270
## 55           1186
## 56           1153
## 57           1092
## 58            917
## 59            853
## 60            967
## 61            955
## 62           1530
## 63           1175
## 64           1163
numbers_rows <- tournamentinfo_data[seq(5,length(tournamentinfo_data),3)]

opponent_player_numbers <- as.numeric(unlist(str_extract_all(numbers_rows, "(?<=\\|(W|L|D)\\s{2,3})[[:digit:]]{1,2}(?=\\|)|((?<!->)(?<=\\|(U|H|B|X))\\s{4}(?=\\|))")))

opponent_player_numbers
##   [1] 39 21 18 14  7 12  4 63 58  4 17 16 20  7  8 61 25 21 11 13 12 23 28  2 26
##  [26]  5 19  1 45 37 12 13  4 14 17 34 29 11 35 10 27 21 57 46 13 11  1  9  2  3
##  [51] 32 14  9 47 28 19 25 18 59  8 26  7 20 16 19 55 31  6 25 18 38 56  6  7  3
##  [76] 34 26 42 33  5 38 NA  1  3 36 27  7  5 33  3 32 54 44  8  1 27  5 31 19 16
## [101] 30 22 54 33 38 10 15 NA 39  2 36 NA 48 41 26  2 23 22  5 47  9  1 32 19 38
## [126] 10 15 10 52 28 18  4  8 40 49 23 41 28  2  9 43  1 47  3 40 39  6 64 52 28
## [151] 15 NA 17 40  4 43 20 58 17 37 46 28 47 43 25 60 44 39  9 53  3 24 34 10 47
## [176] 49 40 17  4  9 32 11 51 13 46 37 14  6 NA 24  4 22 19 20  8 36 50  6 38 34
## [201] 52 48 NA 52 64 15 55 31 61 50 58 55 64 10 30 50 14 61  8 44 18 51 26 13 60
## [226] 12 50 36 13 15 51  6 60 37 29 25 11 52 46 38 56  6 57 52 48 13 57 51 33 NA
## [251] 16 28 NA  5 34 27 NA 23 61 11 35 29 12 NA 18 15  1 54 40 16 44 21 24 20 26
## [276] 39 59 21 56 22 59 17 58 20 NA NA NA 12 50 57 60 61 64 56 21 23 24 63 59 46
## [301] 55 NA 14 32 53 39 24 59  5 51 60 56 63 55 58 35  7 27 50 64 43 23 18 24 21
## [326] 61  8 51 25 17 63 NA 52 NA 29 35 26 20 63 64 58 NA NA 29 42 33 46 NA 31 30
## [351] 27 45 36 57 32 47 33 30 22 19 48 29 35 34 NA 25 NA 44 NA 57 NA 14 39 61 NA
## [376] 15 59 64 62 31 10 30 NA 45 43 NA 11 35 45 NA 40 42  7 36 42 51 35 53 NA 31
## [401]  2 41 23 49 NA 45 41 NA  9 40 43 54 44 33 34 45 42 24 NA NA 32  3 54 47 42
## [426] 30 37 55 NA NA NA NA NA NA  2 48 49 43 45 NA NA 22 30 31 49 46 42 54
mTournament <- matrix(unlist(tournamentinfo_data), byrow=TRUE)

matrix1 <- mTournament[seq(5,(length(mTournament)),3)]
head(matrix1)
## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [3] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
## [4] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
## [5] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
## [6] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"
matrix2 <- mTournament[seq(6,length(mTournament),3)]
head(matrix2)
## [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [2] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [3] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [4] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
## [5] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [6] "   OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |"

Create pcr_matrix for total_opp_pcr and average_opp_pcr.

pcr_matrix <- matrix(data = NA, nrow = 64, ncol = 2)

 
colnames(pcr_matrix) <- c("total_opp_pcr", "average_opp_pcr")

 
row_count <- 0

 
for(i in seq(from=1, to=length(opponent_player_numbers)-6, by=7)){
  row_count <- row_count + 1
  
 
  pcr_matrix[row_count, 1] <- (sum(subset(dataframe$player_pre_rat, dataframe$player_number %in% opponent_player_numbers[seq(from=i, to=i+6, by=1)])))
  
 
  pcr_matrix[row_count, 2] <- pcr_matrix[row_count, 1] / length(subset(opponent_player_numbers[seq(from=i, to=i+6, by=1)],!is.na(opponent_player_numbers[seq(from=i, to=i+6, by=1)])))
  
}
 
head(pcr_matrix, 5)
##      total_opp_pcr average_opp_pcr
## [1,]         11237        1605.286
## [2,]         10285        1469.286
## [3,]         10945        1563.571
## [4,]         11015        1573.571
## [5,]         10506        1500.857
# Round the nearest whole number
 Rounds <- str_extract_all(matrix1, '[A-Z]\\s{2,}\\d+')
 # matching numbers
 Rounds <- str_extract_all(Rounds, '\\d+')
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
AvgOppPreChessRating <- c()

for(i in c(1:length(Rounds))){
  AvgOppPreChessRating[i] <- round(mean(player_pre_rat[as.numeric(Rounds[[i]])]),0)
}
AvgOppPreChessRating
##  [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515 1484
## [16] 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522 1314 1144
## [31] 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150 1107 1327 1152
## [46] 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414 1363 1391 1319 1330
## [61] 1327 1186 1350 1263
Project1 <- head(dataframe,5)

head(Project1)
##   player_number                      player_name player_state total_pts
## 1             1 GARY HUA                                   ON       6.0
## 2             2 DAKSHESH DARURI                            MI       6.0
## 3             3 ADITYA BAJAJ                               MI       6.0
## 4             4 PATRICK H SCHILLING                        MI       5.5
## 5             5 HANSHI ZUO                                 MI       5.5
##   player_pre_rat
## 1           1794
## 2           1553
## 3           1384
## 4           1716
## 5           1655
dt<-write_csv(Project1, 'tournament.csv' , append = FALSE)

dt
##   player_number                      player_name player_state total_pts
## 1             1 GARY HUA                                   ON       6.0
## 2             2 DAKSHESH DARURI                            MI       6.0
## 3             3 ADITYA BAJAJ                               MI       6.0
## 4             4 PATRICK H SCHILLING                        MI       5.5
## 5             5 HANSHI ZUO                                 MI       5.5
##   player_pre_rat
## 1           1794
## 2           1553
## 3           1384
## 4           1716
## 5           1655

connect azure database with rstudio.

library(odbc)

# Creating connection


mysqldbTBConn = dbConnect(RMySQL::MySQL(),
                            dbname= 'md.asaduzzaman39',
                            host= 'cunydata607sql.mysql.database.azure.com',
                            port=3306,
                            user='md.asaduzzaman39',
                            password='c1706f410226ffca')
dbListTables(mysqldbTBConn)
## [1] "airline_data" "color"        "manufacturer" "model"        "product"     
## [6] "tournament"
Tournament_data = dbSendQuery(mysqldbTBConn,"select * from  tournament")

TournamentData <- fetch(Tournament_data)

print(TournamentData)
##   player_num                      player_name player_state total_pts
## 1          1 GARY HUA                                   ON       6.0
## 2          2 DAKSHESH DARURI                            MI       6.0
## 3          3 ADITYA BAJAJ                               MI       6.0
## 4          4 PATRICK H SCHILLING                        MI       5.5
## 5          5 HANSHI ZUO                                 MI       5.5
##   player_pre_rat
## 1           1794
## 2           1553
## 3           1384
## 4           1716
## 5           1655