Read the file by lines and create an empty tibble
library(tidyverse)
data <- read_lines(
"https://raw.githubusercontent.com/qixing810/CUNYSPS-DataScience/master/DS607/dataset/tournamentinfo.txt",
skip=4)
len <- length(data)
df <- tibble(
num = rnorm(len),
states = rnorm(len),
name = rnorm(len),
point = rnorm(len),
pre_rating = rnorm(len),
opponent = rnorm(len),
opponent1 = rnorm(len),
opponent2 = rnorm(len),
opponent3 = rnorm(len),
opponent4 = rnorm(len),
opponent5 = rnorm(len),
opponent6 = rnorm(len),
opponent7 = rnorm(len),
avg_rating = rnorm(len)
)
Replace opponent num to rating and calculating average opponent pre-rating
new_len = nrow(df)
for (i in 1:new_len){
df$opponent1[i] <- df$pre_rating[as.numeric(df$opponent1[i])]
df$opponent2[i] <- df$pre_rating[as.numeric(df$opponent2[i])]
df$opponent3[i] <- df$pre_rating[as.numeric(df$opponent3[i])]
df$opponent4[i] <- df$pre_rating[as.numeric(df$opponent4[i])]
df$opponent5[i] <- df$pre_rating[as.numeric(df$opponent5[i])]
df$opponent6[i] <- df$pre_rating[as.numeric(df$opponent6[i])]
df$opponent7[i] <- df$pre_rating[as.numeric(df$opponent7[i])]
df$avg_rating[i] <- round(mean(c(df$opponent1[i],df$opponent2[i],df$opponent3[i],
df$opponent4[i],df$opponent5[i],df$opponent6[i],
df$opponent7[i]),na.rm = TRUE),0)
}
Creat the final data frame and write to CSV file
output <- select(df,name,states,point,pre_rating,avg_rating)
head(output)
## # A tibble: 6 x 5
## name states point pre_rating avg_rating
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 GARY HUA ON 6 1794 1605
## 2 DAKSHESH DARURI MI 6 1553 1469
## 3 ADITYA BAJAJ MI 6 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5 1686 1519
write_csv(output, "project1.csv")