DATA 607 Project 1

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
library(curl)
## Using libcurl 7.64.1 with LibreSSL/2.8.3
## 
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
## 
##     parse_date
library(ggplot2)
library(dplyr)
tourney_data<-read.csv(curl("https://raw.githubusercontent.com/brsingh7/DATA607/main/Week4/tournamentinfo.txt"), header=FALSE, sep="|",skip=4)
#tourney_data <- read.csv("tournamentinfo.txt", header=FALSE, sep="|",skip=4)

2. Separate required data into vectors for use and combine into a data frame with Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating and each round’s opponent. Create a separate table with each player’s pre-match rating in order to perform a join.

rows1 <- tourney_data[seq(1, nrow(tourney_data), 3), ]
rows2 <- tourney_data[seq(2, nrow(tourney_data), 3), ]
player_number <- as.integer(str_match(rows1$V1,"\\d+"))
player_name <- str_trim(rows1$V2)
player_points <- as.double(str_trim(rows1$V3))
player_rating <- as.integer(str_match(str_trim(rows2$V2),"\\s\\d{3,4}+"))
state <- str_trim(rows2$V1)
round1 <- as.integer(str_match(str_trim(rows1$V4),"\\d{1,2}"))
round2 <- as.integer(str_match(str_trim(rows1$V5),"\\d{1,2}"))
round3 <- as.integer(str_match(str_trim(rows1$V6),"\\d{1,2}"))
round4 <- as.integer(str_match(str_trim(rows1$V7),"\\d{1,2}"))
round5 <- as.integer(str_match(str_trim(rows1$V8),"\\d{1,2}"))
round6 <- as.integer(str_match(str_trim(rows1$V9),"\\d{1,2}"))
round7 <- as.integer(str_match(str_trim(rows1$V10),"\\d{1,2}"))

player_df <- data.frame(player_number,player_name,state,player_points,player_rating,round1,round2,round3,round4,round5,round6,round7)
player_ratings_df<-data.frame(player_number,player_rating)

3. Compare the two tables and return each opponent’s rating for each player using a left join.

joined <- left_join(player_df,player_ratings_df,by=c("round1"="player_number"))
joined <- joined %>%
  left_join(player_ratings_df,by=c("round2"="player_number"))%>%
  left_join(player_ratings_df,by=c("round3"="player_number"))%>%
  left_join(player_ratings_df,by=c("round4"="player_number"))%>%
  left_join(player_ratings_df,by=c("round5"="player_number"))%>%
  left_join(player_ratings_df,by=c("round6"="player_number"))%>%
  left_join(player_ratings_df,by=c("round7"="player_number"))
colnames(joined) <- c("Player_ID","Player_Name","State","Player_points","Player_Rating","Round1_Opp","Round2_Opp","Round3_Opp","Round4_Opp","Round5_Opp","Round16_Opp","Round7_Opp","Opp1_Rating","Opp2_Rating","Opp3_Rating","Opp4_Rating","Opp5_Rating","Opp6_Rating","Opp7_Rating")

4. Calculate Average Pre Chess Rating of Opponents.

joined$Opponent_Avg<-rowMeans(joined[,c("Opp1_Rating","Opp2_Rating","Opp3_Rating","Opp4_Rating","Opp5_Rating","Opp6_Rating","Opp7_Rating")],na.rm=TRUE)
joined$Opponent_Avg<-round(joined$Opponent_Avg)

5. Create final data frame with Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents and write to .csv

chess_tournament <- joined %>%
  select("Player_Name","State","Player_points","Player_Rating","Opponent_Avg")
head(chess_tournament,15)
##                 Player_Name State Player_points Player_Rating Opponent_Avg
## 1                  GARY HUA    ON           6.0          1794         1605
## 2           DAKSHESH DARURI    MI           6.0          1553         1469
## 3              ADITYA BAJAJ    MI           6.0          1384         1564
## 4       PATRICK H SCHILLING    MI           5.5          1716         1574
## 5                HANSHI ZUO    MI           5.5          1655         1501
## 6               HANSEN SONG    OH           5.0          1686         1519
## 7         GARY DEE SWATHELL    MI           5.0          1649         1372
## 8          EZEKIEL HOUGHTON    MI           5.0          1641         1468
## 9               STEFANO LEE    ON           5.0          1411         1523
## 10                ANVIT RAO    MI           5.0          1365         1554
## 11 CAMERON WILLIAM MC LEMAN    MI           4.5          1712         1468
## 12           KENNETH J TACK    MI           4.5          1663         1506
## 13        TORRANCE HENRY JR    MI           4.5          1666         1498
## 14             BRADLEY SHAW    MI           4.5          1610         1515
## 15   ZACHARY JAMES HOUGHTON    MI           4.5          1220         1484
write.csv(chess_tournament, "chess_tournament.csv",row.names=FALSE)
#write.csv(chess_tournament,file = file.choose(new = T),row.names=FALSE)