week 1 assignment

Loading the data

#load the packages

library(tidyverse) 
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(stringr) 
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
#upload the text file to github and download back for this project
urlfile <-'https://raw.githubusercontent.com/jayleecunysps/AssignmentforSPS/main/week1hw.txt' 

fulldata <-read_lines(url(urlfile),skip =1) #remove the first line 1

Pulling out the useful row and remove the —- line

#take all the data beside------ line
player_data1 <- fulldata[seq(4,length(fulldata),3)] #start from line 4, get every 3 lines
player_data2 <- fulldata[seq(5,length(fulldata),3)] #start from line 5, get every 3 lines

Pulling out the useful data

Playername <- str_extract(player_data1,'[A-z].{1,27}') #A-z first first 27 char
Playername <- as.character(str_replace_all(Playername,"-","")) #remove the -
Playername <- str_trim(str_extract(Playername, '.+\\s{2,}')) #remove space
State <- str_extract(player_data2, '[A-Z]{2}') #pick the all cap with max of 2 
Total_Num_of_Points <- as.numeric(str_extract(player_data1,'\\d+\\.\\d')) #number.number
Pre_rating <- str_extract(player_data2,'R:.{8,}->')
Pre_rating <- as.numeric(str_extract(Pre_rating,'\\d{1,4}')) #only number and just 4 digit
avg_chess <- unlist(str_extract_all(player_data1, "\\|[0-9].*"))
avg_chess <- str_replace_all(avg_chess, "\\s{1,2}\\|","00|")
avg_chess <- str_extract_all(avg_chess, "\\s\\d{1,2}")
avgoppprechessrating <- c() 
for (i in c(1:length(avg_chess)))
{
  avgoppprechessrating[i] <- round(mean(Pre_rating[as.numeric(avg_chess[[i]])]),0)
}
avgoppprechessrating
##  [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515 1484
## [16] 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522 1314 1144
## [31] 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150 1107 1327 1152
## [46] 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414 1363 1391 1319 1330
## [61] 1327 1186 1350 1263
Newdata <- data.frame(Playername,State,Total_Num_of_Points,Pre_rating,avgoppprechessrating)
#join all the code i worte back to one.
Newdata
##                   Playername State Total_Num_of_Points Pre_rating
## 1                   GARY HUA    ON                 6.0       1794
## 2            DAKSHESH DARURI    MI                 6.0       1553
## 3               ADITYA BAJAJ    MI                 6.0       1384
## 4        PATRICK H SCHILLING    MI                 5.5       1716
## 5                 HANSHI ZUO    MI                 5.5       1655
## 6                HANSEN SONG    OH                 5.0       1686
## 7          GARY DEE SWATHELL    MI                 5.0       1649
## 8           EZEKIEL HOUGHTON    MI                 5.0       1641
## 9                STEFANO LEE    ON                 5.0       1411
## 10                 ANVIT RAO    MI                 5.0       1365
## 11  CAMERON WILLIAM MC LEMAN    MI                 4.5       1712
## 12            KENNETH J TACK    MI                 4.5       1663
## 13         TORRANCE HENRY JR    MI                 4.5       1666
## 14              BRADLEY SHAW    MI                 4.5       1610
## 15    ZACHARY JAMES HOUGHTON    MI                 4.5       1220
## 16              MIKE NIKITIN    MI                 4.0       1604
## 17        RONALD GRZEGORCZYK    MI                 4.0       1629
## 18             DAVID SUNDEEN    MI                 4.0       1600
## 19              DIPANKAR ROY    MI                 4.0       1564
## 20               JASON ZHENG    MI                 4.0       1595
## 21             DINH DANG BUI    ON                 4.0       1563
## 22          EUGENE L MCCLURE    MI                 4.0       1555
## 23                  ALAN BUI    ON                 4.0       1363
## 24         MICHAEL R ALDRICH    MI                 4.0       1229
## 25          LOREN SCHWIEBERT    MI                 3.5       1745
## 26                   MAX ZHU    ON                 3.5       1579
## 27            GAURAV GIDWANI    MI                 3.5       1552
## 28 SOFIA ADINA STANESCUBELLU    MI                 3.5       1507
## 29          CHIEDOZIE OKORIE    MI                 3.5       1602
## 30        GEORGE AVERY JONES    ON                 3.5       1522
## 31              RISHI SHETTY    MI                 3.5       1494
## 32     JOSHUA PHILIP MATHEWS    ON                 3.5       1441
## 33                   JADE GE    MI                 3.5       1449
## 34    MICHAEL JEFFERY THOMAS    MI                 3.5       1399
## 35          JOSHUA DAVID LEE    MI                 3.5       1438
## 36             SIDDHARTH JHA    MI                 3.5       1355
## 37      AMIYATOSH PWNANANDAM    MI                 3.5        980
## 38                 BRIAN LIU    MI                 3.0       1423
## 39             JOEL R HENDON    MI                 3.0       1436
## 40              FOREST ZHANG    MI                 3.0       1348
## 41       KYLE WILLIAM MURPHY    MI                 3.0       1403
## 42                  JARED GE    MI                 3.0       1332
## 43         ROBERT GLEN VASEY    MI                 3.0       1283
## 44        JUSTIN D SCHILLING    MI                 3.0       1199
## 45                 DEREK YAN    MI                 3.0       1242
## 46  JACOB ALEXANDER LAVALLEY    MI                 3.0        377
## 47               ERIC WRIGHT    MI                 2.5       1362
## 48              DANIEL KHAIN    MI                 2.5       1382
## 49          MICHAEL J MARTIN    MI                 2.5       1291
## 50                SHIVAM JHA    MI                 2.5       1056
## 51            TEJAS AYYAGARI    MI                 2.5       1011
## 52                 ETHAN GUO    MI                 2.5        935
## 53             JOSE C YBARRA    MI                 2.0       1393
## 54               LARRY HODGE    MI                 2.0       1270
## 55                 ALEX KONG    MI                 2.0       1186
## 56              MARISA RICCI    MI                 2.0       1153
## 57                MICHAEL LU    MI                 2.0       1092
## 58              VIRAJ MOHILE    MI                 2.0        917
## 59         SEAN M MC CORMICK    MI                 2.0        853
## 60                JULIA SHEN    MI                 1.5        967
## 61             JEZZEL FARKAS    ON                 1.5        955
## 62             ASHWIN BALAJI    MI                 1.0       1530
## 63      THOMAS JOSEPH HOSMER    MI                 1.0       1175
## 64                    BEN LI    MI                 1.0       1163
##    avgoppprechessrating
## 1                  1605
## 2                  1469
## 3                  1564
## 4                  1574
## 5                  1501
## 6                  1519
## 7                  1372
## 8                  1468
## 9                  1523
## 10                 1554
## 11                 1468
## 12                 1506
## 13                 1498
## 14                 1515
## 15                 1484
## 16                 1386
## 17                 1499
## 18                 1480
## 19                 1426
## 20                 1411
## 21                 1470
## 22                 1300
## 23                 1214
## 24                 1357
## 25                 1363
## 26                 1507
## 27                 1222
## 28                 1522
## 29                 1314
## 30                 1144
## 31                 1260
## 32                 1379
## 33                 1277
## 34                 1375
## 35                 1150
## 36                 1388
## 37                 1385
## 38                 1539
## 39                 1430
## 40                 1391
## 41                 1248
## 42                 1150
## 43                 1107
## 44                 1327
## 45                 1152
## 46                 1358
## 47                 1392
## 48                 1356
## 49                 1286
## 50                 1296
## 51                 1356
## 52                 1495
## 53                 1345
## 54                 1206
## 55                 1406
## 56                 1414
## 57                 1363
## 58                 1391
## 59                 1319
## 60                 1330
## 61                 1327
## 62                 1186
## 63                 1350
## 64                 1263
write_csv(Newdata, "tournament.csv", append = FALSE)