Project 1 CUNY 607

R Markdown

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)

#Load the data, delimited by "|", etc.

player_data <- read.delim('https://raw.githubusercontent.com/murphystout/data-607/master/tournamentinfo.txt',skip = 1, sep = "|", header = TRUE)

#Delete the annoying dash lines.

line_to_delete <- player_data[2,1]
player_data <- filter(player_data, Pair != line_to_delete)


#Take even number rows (i.e. player name and associated data), into own data frame

even_num_rows <- player_data[seq(2,length(player_data[,1]),2),]


#Same with odd, and give it appropriate column names

odd_num_rows <- player_data[seq(3,length(player_data[,1]),2),]

names(odd_num_rows) <- c('State', 'USCF_ID_Rating_Pre_Post','Total','Round1_Outcome','Round2_Outcome','Round3_Outcome','Round4_Outcome','Round5_Outcome','Round6_Outcome','Round7_Outcome')

#Combine the two data frame for ease of analysis.

player_data_wide <- cbind(even_num_rows, odd_num_rows)

#Rename row names to be sequential, as they kept the even numbered names from steps above

row.names(player_data_wide) <- 1:length(player_data_wide[,1])

#Extract Pre Rating
rating_split <- player_data_wide$USCF_ID_Rating_Pre_Post
pre_rating <- substring(rating_split,15,19)
pre_rating <- str_trim(pre_rating)
pre_rating <- as.numeric(pre_rating)



#For the heck of it, Extract End Rating as well

rating_split <- player_data_wide$USCF_ID_Rating_Pre_Post
end_rating <- str_extract_all(rating_split,">([0-9 ]*)")
end_rating <- str_trim(substring(end_rating,2))

#Add to new columns of dataframe

player_data_wide <- cbind(player_data_wide,pre_rating)
player_data_wide <- cbind(player_data_wide,end_rating)

#Extract Opponents IDs

round_1_opp_id <- str_extract(player_data_wide$Round,"[0-9]{1,2}")

round_2_opp_id <- str_extract(player_data_wide$Round.1,"[0-9]{1,2}")

round_3_opp_id <- str_extract(player_data_wide$Round.2,"[0-9]{1,2}")

round_4_opp_id <- str_extract(player_data_wide$Round.3,"[0-9]{1,2}")

round_5_opp_id <- str_extract(player_data_wide$Round.4,"[0-9]{1,2}")

round_6_opp_id <- str_extract(player_data_wide$Round.5,"[0-9]{1,2}")

round_7_opp_id <- str_extract(player_data_wide$Round.6,"[0-9]{1,2}")

#Add all these to the dataframe

player_data_wide <- cbind(player_data_wide,round_1_opp_id,round_2_opp_id,round_3_opp_id,round_4_opp_id,round_5_opp_id,round_6_opp_id, round_7_opp_id)

#Create a linking table with opponent IDs and Pre Ratings.  Will use to look up ratings in next steps.
player_data_wide$Pair <- as.numeric(as.character(player_data_wide$Pair))
linking_table <- player_data_wide[c("Pair","pre_rating")]


#Series of joins to pull in data.  Using the merge function.  We also want to create a redundant data frame to avoid messing up the previous one.

player_data_wide_2 <- player_data_wide



colnames(linking_table)[1]<-"round_1_opp_id"
colnames(linking_table)[2]<-"round_1_pre_rating"
player_data_wide_2<-merge(player_data_wide_2,linking_table,by="round_1_opp_id", all.x = TRUE)

## Warning in merge.data.frame(player_data_wide_2, linking_table, by =
## "round_1_opp_id", : column name 'Total' is duplicated in the result

colnames(linking_table)[1]<-"round_2_opp_id"
colnames(linking_table)[2]<-"round_2_pre_rating"
player_data_wide_2<-merge(player_data_wide_2,linking_table,by="round_2_opp_id", all.x = TRUE)

## Warning in merge.data.frame(player_data_wide_2, linking_table, by =
## "round_2_opp_id", : column name 'Total' is duplicated in the result

colnames(linking_table)[1]<-"round_3_opp_id"
colnames(linking_table)[2]<-"round_3_pre_rating"
player_data_wide_2<-merge(player_data_wide_2,linking_table,by="round_3_opp_id", all.x = TRUE)

## Warning in merge.data.frame(player_data_wide_2, linking_table, by =
## "round_3_opp_id", : column name 'Total' is duplicated in the result

colnames(linking_table)[1]<-"round_4_opp_id"
colnames(linking_table)[2]<-"round_4_pre_rating"
player_data_wide_2<-merge(player_data_wide_2,linking_table,by="round_4_opp_id", all.x = TRUE)

## Warning in merge.data.frame(player_data_wide_2, linking_table, by =
## "round_4_opp_id", : column name 'Total' is duplicated in the result

colnames(linking_table)[1]<-"round_5_opp_id"
colnames(linking_table)[2]<-"round_5_pre_rating"
player_data_wide_2<-merge(player_data_wide_2,linking_table,by="round_5_opp_id", all.x = TRUE)

## Warning in merge.data.frame(player_data_wide_2, linking_table, by =
## "round_5_opp_id", : column name 'Total' is duplicated in the result

colnames(linking_table)[1]<-"round_6_opp_id"
colnames(linking_table)[2]<-"round_6_pre_rating"
player_data_wide_2<-merge(player_data_wide_2,linking_table,by="round_6_opp_id", all.x = TRUE)

## Warning in merge.data.frame(player_data_wide_2, linking_table, by =
## "round_6_opp_id", : column name 'Total' is duplicated in the result

colnames(linking_table)[1]<-"round_7_opp_id"
colnames(linking_table)[2]<-"round_7_pre_rating"
player_data_wide_2<-merge(player_data_wide_2,linking_table,by="round_7_opp_id", all.x = TRUE)

## Warning in merge.data.frame(player_data_wide_2, linking_table, by =
## "round_7_opp_id", : column name 'Total' is duplicated in the result

# Calculate Means

player_data_wide_2$avg_opp_rating <- round(rowMeans(player_data_wide_2[c("round_1_pre_rating","round_2_pre_rating","round_3_pre_rating","round_4_pre_rating","round_5_pre_rating","round_6_pre_rating","round_7_pre_rating")],na.rm = TRUE))


#Put together final solution table

#Sort first

player_data_wide_2 <- player_data_wide_2[order(player_data_wide_2$Pair),]

solution <- player_data_wide_2[,c("Player.Name","State", "Total", "pre_rating","avg_opp_rating")]

colnames(solution) <- c("Player.Name", "State", "Total.Points", "Pre.Rating", "Opponent.Avg.Pre.Rating")

solution

##                          Player.Name  State Total.Points Pre.Rating
## 31  GARY HUA                            ON         6.0         1794
## 52  DAKSHESH DARURI                     MI         6.0         1553
## 4   ADITYA BAJAJ                        MI         6.0         1384
## 1   PATRICK H SCHILLING                 MI         5.5         1716
## 8   HANSHI ZUO                          MI         5.5         1655
## 13  HANSEN SONG                         OH         5.0         1686
## 11  GARY DEE SWATHELL                   MI         5.0         1649
## 10  EZEKIEL HOUGHTON                    MI         5.0         1641
## 12  STEFANO LEE                         ON         5.0         1411
## 9   ANVIT RAO                           MI         5.0         1365
## 18  CAMERON WILLIAM MC LEMAN            MI         4.5         1712
## 20  KENNETH J TACK                      MI         4.5         1663
## 23  TORRANCE HENRY JR                   MI         4.5         1666
## 22  BRADLEY SHAW                        MI         4.5         1610
## 29  ZACHARY JAMES HOUGHTON              MI         4.5         1220
## 58  MIKE NIKITIN                        MI         4.0         1604
## 40  RONALD GRZEGORCZYK                  MI         4.0         1629
## 2   DAVID SUNDEEN                       MI         4.0         1600
## 53  DIPANKAR ROY                        MI         4.0         1564
## 54  JASON ZHENG                         MI         4.0         1595
## 49  DINH DANG BUI                       ON         4.0         1563
## 32  EUGENE L MCCLURE                    MI         4.0         1555
## 37  ALAN BUI                            ON         4.0         1363
## 30  MICHAEL R ALDRICH                   MI         4.0         1229
## 38  LOREN SCHWIEBERT                    MI         3.5         1745
## 3   MAX ZHU                             ON         3.5         1579
## 55  GAURAV GIDWANI                      MI         3.5         1552
## 27  SOFIA ADINA STANESCU-BELLU          MI         3.5         1507
## 57  CHIEDOZIE OKORIE                    MI         3.5         1602
## 41  GEORGE AVERY JONES                  ON         3.5         1522
## 6   RISHI SHETTY                        MI         3.5         1494
## 5   JOSHUA PHILIP MATHEWS               ON         3.5         1441
## 42  JADE GE                             MI         3.5         1449
## 43  MICHAEL JEFFERY THOMAS              MI         3.5         1399
## 39  JOSHUA DAVID LEE                    MI         3.5         1438
## 19  SIDDHARTH JHA                       MI         3.5         1355
## 50  AMIYATOSH PWNANANDAM                MI         3.5          980
## 7   BRIAN LIU                           MI         3.0         1423
## 16  JOEL R HENDON                       MI         3.0         1436
## 14  FOREST ZHANG                        MI         3.0         1348
## 60  KYLE WILLIAM MURPHY                 MI         3.0         1403
## 46  JARED GE                            MI         3.0         1332
## 45  ROBERT GLEN VASEY                   MI         3.0         1283
## 48  JUSTIN D SCHILLING                  MI         3.0         1199
## 47  DEREK YAN                           MI         3.0         1242
## 15  JACOB ALEXANDER LAVALLEY            MI         3.0          377
## 17  ERIC WRIGHT                         MI         2.5         1362
## 26  DANIEL KHAIN                        MI         2.5         1382
## 56  MICHAEL J MARTIN                    MI         2.5         1291
## 21  SHIVAM JHA                          MI         2.5         1056
## 24  TEJAS AYYAGARI                      MI         2.5         1011
## 25  ETHAN GUO                           MI         2.5          935
## 64  JOSE C YBARRA                       MI         2.0         1393
## 51  LARRY HODGE                         MI         2.0         1270
## 34  ALEX KONG                           MI         2.0         1186
## 33  MARISA RICCI                        MI         2.0         1153
## 61  MICHAEL LU                          MI         2.0         1092
## 36  VIRAJ MOHILE                        MI         2.0          917
## 35  SEAN M MC CORMICK                   MI         2.0          853
## 59  JULIA SHEN                          MI         1.5          967
## 28  JEZZEL FARKAS                       ON         1.5          955
## 63  ASHWIN BALAJI                       MI         1.0         1530
## 62  THOMAS JOSEPH HOSMER                MI         1.0         1175
## 44  BEN LI                              MI         1.0         1163
##    Opponent.Avg.Pre.Rating
## 31                    1605
## 52                    1469
## 4                     1564
## 1                     1574
## 8                     1501
## 13                    1519
## 11                    1372
## 10                    1468
## 12                    1523
## 9                     1554
## 18                    1468
## 20                    1506
## 23                    1498
## 22                    1515
## 29                    1484
## 58                    1386
## 40                    1499
## 2                     1480
## 53                    1426
## 54                    1411
## 49                    1470
## 32                    1300
## 37                    1214
## 30                    1357
## 38                    1363
## 3                     1507
## 55                    1222
## 27                    1522
## 57                    1314
## 41                    1144
## 6                     1260
## 5                     1379
## 42                    1277
## 43                    1375
## 39                    1150
## 19                    1388
## 50                    1385
## 7                     1539
## 16                    1430
## 14                    1391
## 60                    1248
## 46                    1150
## 45                    1107
## 48                    1327
## 47                    1152
## 15                    1358
## 17                    1392
## 26                    1356
## 56                    1286
## 21                    1296
## 24                    1356
## 25                    1495
## 64                    1345
## 51                    1206
## 34                    1406
## 33                    1414
## 61                    1363
## 36                    1391
## 35                    1319
## 59                    1330
## 28                    1327
## 63                    1186
## 62                    1350
## 44                    1263

Project 1 CUNY 607

Michael Hayes

February 23, 2019

R Markdown