DATA 607 Project 1
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
library(curl)
## Using libcurl 7.64.1 with LibreSSL/2.8.3
##
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
##
## parse_date
library(ggplot2)
library(dplyr)
tourney_data<-read.csv(curl("https://raw.githubusercontent.com/brsingh7/DATA607/main/Week4/tournamentinfo.txt"), header=FALSE, sep="|",skip=4)
#tourney_data <- read.csv("tournamentinfo.txt", header=FALSE, sep="|",skip=4)
2. Separate required data into vectors for use and combine into a data frame with Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating and each round’s opponent. Create a separate table with each player’s pre-match rating in order to perform a join.
rows1 <- tourney_data[seq(1, nrow(tourney_data), 3), ]
rows2 <- tourney_data[seq(2, nrow(tourney_data), 3), ]
player_number <- as.integer(str_match(rows1$V1,"\\d+"))
player_name <- str_trim(rows1$V2)
player_points <- as.double(str_trim(rows1$V3))
player_rating <- as.integer(str_match(str_trim(rows2$V2),"\\s\\d{3,4}+"))
state <- str_trim(rows2$V1)
round1 <- as.integer(str_match(str_trim(rows1$V4),"\\d{1,2}"))
round2 <- as.integer(str_match(str_trim(rows1$V5),"\\d{1,2}"))
round3 <- as.integer(str_match(str_trim(rows1$V6),"\\d{1,2}"))
round4 <- as.integer(str_match(str_trim(rows1$V7),"\\d{1,2}"))
round5 <- as.integer(str_match(str_trim(rows1$V8),"\\d{1,2}"))
round6 <- as.integer(str_match(str_trim(rows1$V9),"\\d{1,2}"))
round7 <- as.integer(str_match(str_trim(rows1$V10),"\\d{1,2}"))
player_df <- data.frame(player_number,player_name,state,player_points,player_rating,round1,round2,round3,round4,round5,round6,round7)
player_ratings_df<-data.frame(player_number,player_rating)
3. Compare the two tables and return each opponent’s rating for each player using a left join.
joined <- left_join(player_df,player_ratings_df,by=c("round1"="player_number"))
joined <- joined %>%
left_join(player_ratings_df,by=c("round2"="player_number"))%>%
left_join(player_ratings_df,by=c("round3"="player_number"))%>%
left_join(player_ratings_df,by=c("round4"="player_number"))%>%
left_join(player_ratings_df,by=c("round5"="player_number"))%>%
left_join(player_ratings_df,by=c("round6"="player_number"))%>%
left_join(player_ratings_df,by=c("round7"="player_number"))
colnames(joined) <- c("Player_ID","Player_Name","State","Player_points","Player_Rating","Round1_Opp","Round2_Opp","Round3_Opp","Round4_Opp","Round5_Opp","Round16_Opp","Round7_Opp","Opp1_Rating","Opp2_Rating","Opp3_Rating","Opp4_Rating","Opp5_Rating","Opp6_Rating","Opp7_Rating")
4. Calculate Average Pre Chess Rating of Opponents.
joined$Opponent_Avg<-rowMeans(joined[,c("Opp1_Rating","Opp2_Rating","Opp3_Rating","Opp4_Rating","Opp5_Rating","Opp6_Rating","Opp7_Rating")],na.rm=TRUE)
joined$Opponent_Avg<-round(joined$Opponent_Avg)
5. Create final data frame with Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents and write to .csv
chess_tournament <- joined %>%
select("Player_Name","State","Player_points","Player_Rating","Opponent_Avg")
head(chess_tournament,15)
## Player_Name State Player_points Player_Rating Opponent_Avg
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1468
## 12 KENNETH J TACK MI 4.5 1663 1506
## 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
write.csv(chess_tournament, "chess_tournament.csv",row.names=FALSE)
#write.csv(chess_tournament,file = file.choose(new = T),row.names=FALSE)