## Loading required package: RPostgreSQL
## Loading required package: DBI
## -- Attaching packages ------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.1 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## -- Conflicts --------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Or
raises/ total hands played
“If it’s good enough to call you gotta be in there raising, all right. I mean tight but aggressive, and I do mean aggressive. That’s your style, Professor. I mean you gotta …you gotta think of it as a war.” — Mike (Matt Damon) in Rounders speaking to his Law Professor (Martin Landau).
my_607 <- read_csv('https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/final%20project/vpip-pfr_analysis.csv')
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## playername = col_character(),
## vpip = col_double(),
## pfr = col_double(),
## total_hands = col_integer(),
## money_won = col_integer(),
## rake = col_integer(),
## threebet = col_double(),
## bb_per_100 = col_double(),
## total_rake_100 = col_double(),
## money_won_100 = col_double(),
## wwsf = col_double()
## )
my_607 <- my_607 %>%
filter(total_hands>500)
small_gap<- my_607 %>%
filter(vpip-pfr<10)
wide_gap<- my_607 %>%
filter(vpip-pfr>20)
par(mfrow=c(2, 1))
hist(small_gap$bb_per_100)
hist(wide_gap$bb_per_100)
small_gap <- small_gap %>%
select(small_vpip=vpip,small_pfr=pfr,small_bb_per_100=bb_per_100) %>%
describe(.)
wide_gap <- wide_gap %>%
select(wide_vpip=vpip,wide_pfr=pfr,wide_bb_100=bb_per_100) %>%
describe(.)
kable(rbind(small_gap,wide_gap))
vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
small_vpip | 1 | 726 | 23.858540 | 4.393240 | 23.600 | 23.751718 | 3.85476 | 8.20 | 44.00 | 35.80 | 0.4122420 | 1.6610858 | 0.1630484 |
small_pfr | 2 | 726 | 17.058127 | 3.946855 | 17.000 | 17.005498 | 3.26172 | 0.40 | 36.20 | 35.80 | 0.3927038 | 2.6525411 | 0.1464815 |
small_bb_per_100 | 3 | 726 | 2.023829 | 24.139195 | 2.680 | 2.489588 | 17.90981 | -124.72 | 100.85 | 225.57 | -0.2625542 | 2.6468372 | 0.8958896 |
wide_vpip | 1 | 2312 | 45.439187 | 10.978408 | 44.200 | 44.639243 | 10.37820 | 22.00 | 89.20 | 67.20 | 0.7220241 | 0.5706857 | 0.2283208 |
wide_pfr | 2 | 2312 | 12.173529 | 6.830626 | 11.300 | 11.638487 | 6.22692 | 0.10 | 47.30 | 47.20 | 0.8755983 | 1.2511863 | 0.1420583 |
wide_bb_100 | 3 | 2312 | -29.867673 | 42.528923 | -24.875 | -27.008108 | 31.77953 | -611.37 | 147.34 | 758.71 | -1.8599677 | 17.0517086 | 0.8844850 |
## [1] "Connecting to database"
## [1] "Connected!"
## Run querys to look thru db
# query 6 returns player stats but with only sn
# query 7 has id and sn
player_names_site_id <- dbGetQuery(conn, 'SELECT * FROM players')
player_stats <- dbGetQuery(conn, 'SELECT * FROM compiledplayerresults limit 1000000')
kable(head(player_names_site_id))
player_id | playername | pokersite_id | cashhands | tourneyhands | optimizationstatus | icon |
---|---|---|---|---|---|---|
11066 | Palaguachi21 | 0 | 76 | 0 | 0 | 0 |
2 | Greeneggs | 0 | 5 | 0 | 0 | -1 |
3 | cagjag2 | 0 | 38 | 0 | 0 | -1 |
4 | rmcconville | 0 | 57 | 0 | 0 | -1 |
5 | skorn32 | 0 | 16 | 0 | 0 | -1 |
6 | unypok | 0 | 21 | 0 | 0 | -1 |
kable(head(player_stats))
compiledplayerresults_id | player_id | playedyearandmonth | numberofplayers | gametype_id | bbgroup_id | totalhands | totalamountwonincents | totalrakeincents | totalbbswon | vpiphands | pfrhands | couldcoldcall | didcoldcall | couldthreebet | didthreebet | couldsqueeze | didsqueeze | facingtwopreflopraisers | calledtwopreflopraisers | raisedtwopreflopraisers | smallblindstealattempted | smallblindstealdefended | smallblindstealreraised | bigblindstealattempted | bigblindstealdefended | bigblindstealreraised | sawnonsmallshowdown | wonnonsmallshowdown | sawlargeshowdown | wonlargeshowdown | sawnonsmallshowdownlimpedflop | wonnonsmallshowdownlimpedflop | sawlargeshowdownlimpedflop | wonlargeshowdownlimpedflop | wonhand | wonhandwhensawflop | wonhandwhensawturn | wonhandwhensawriver | facedthreebetpreflop | foldedtothreebetpreflop | calledthreebetpreflop | raisedthreebetpreflop | facedfourbetpreflop | foldedtofourbetpreflop | calledfourbetpreflop | raisedfourbetpreflop | turnfoldippassonflopcb | turncallippassonflopcb | turnraiseippassonflopcb | riverfoldippassonturncb | rivercallippassonturncb | riverraiseippassonturncb | sawflop | sawshowdown | wonshowdown | totalbets | totalcalls | flopcontinuationbetpossible | flopcontinuationbetmade | turncontinuationbetpossible | turncontinuationbetmade | rivercontinuationbetpossible | rivercontinuationbetmade | facingflopcontinuationbet | foldedtoflopcontinuationbet | calledflopcontinuationbet | raisedflopcontinuationbet | facingturncontinuationbet | foldedtoturncontinuationbet | calledturncontinuationbet | raisedturncontinuationbet | facingrivercontinuationbet | foldedtorivercontinuationbet | calledrivercontinuationbet | raisedrivercontinuationbet | totalpostflopstreetsseen | totalaggressivepostflopstreetsseen |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 1 | 201403 | 6 | 1 | 0 | 34 | -257 | 19 | -12850 | 8 | 7 | 10 | 0 | 10 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 2 | 0 | 1 | 0 | 1 | 0 | 5 | 4 | 2 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 2 | 0 | 13 | 3 | 4 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 19 | 12 |
2 | 2 | 201403 | 6 | 1 | 0 | 5 | -144 | 6 | -7200 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 |
3 | 3 | 201403 | 6 | 1 | 0 | 28 | 310 | 15 | 15500 | 13 | 3 | 4 | 2 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 6 | 6 | 6 | 5 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 4 | 3 | 10 | 8 | 1 | 1 | 1 | 1 | 0 | 0 | 3 | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 33 | 10 |
4 | 4 | 201403 | 6 | 1 | 0 | 34 | 16 | 19 | 800 | 18 | 4 | 7 | 2 | 5 | 0 | 2 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 | 4 | 2 | 2 | 1 | 2 | 1 | 9 | 8 | 7 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | 7 | 3 | 14 | 21 | 3 | 2 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 45 | 14 |
5 | 5 | 201403 | 6 | 1 | 0 | 9 | 252 | 9 | 12600 | 6 | 1 | 4 | 3 | 3 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 3 | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 3 | 2 | 12 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 9 |
6 | 6 | 201403 | 6 | 1 | 0 | 13 | -202 | 9 | -10100 | 10 | 1 | 2 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | 1 | 0 | 2 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 | 3 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | 2 |
player_names_site_id
by pokersite id
player stats
for screennames in this vector.player_names_site_id
merged_wsop_db
is a DF with all the stats and screen names that I need filtered for the poker site I desire. It is in a long format which will be addressed in the next section by summarizing by screen namewsop_players <- player_names_site_id%>%
filter(.,pokersite_id==12) %>%
select(.,c(playername,player_id))
## extract player ids
player_ids <- wsop_players$player_id
## filter other DF by player ids from last filtered query
wsop_player_stats <- player_stats %>%
filter(.,player_id %in% player_ids )
## merge DFs on player id
merged_wsop_db<- merge(wsop_player_stats,wsop_players,by="player_id" ,all = TRUE)
kable(merged_wsop_db[1:5,])
player_id | compiledplayerresults_id | playedyearandmonth | numberofplayers | gametype_id | bbgroup_id | totalhands | totalamountwonincents | totalrakeincents | totalbbswon | vpiphands | pfrhands | couldcoldcall | didcoldcall | couldthreebet | didthreebet | couldsqueeze | didsqueeze | facingtwopreflopraisers | calledtwopreflopraisers | raisedtwopreflopraisers | smallblindstealattempted | smallblindstealdefended | smallblindstealreraised | bigblindstealattempted | bigblindstealdefended | bigblindstealreraised | sawnonsmallshowdown | wonnonsmallshowdown | sawlargeshowdown | wonlargeshowdown | sawnonsmallshowdownlimpedflop | wonnonsmallshowdownlimpedflop | sawlargeshowdownlimpedflop | wonlargeshowdownlimpedflop | wonhand | wonhandwhensawflop | wonhandwhensawturn | wonhandwhensawriver | facedthreebetpreflop | foldedtothreebetpreflop | calledthreebetpreflop | raisedthreebetpreflop | facedfourbetpreflop | foldedtofourbetpreflop | calledfourbetpreflop | raisedfourbetpreflop | turnfoldippassonflopcb | turncallippassonflopcb | turnraiseippassonflopcb | riverfoldippassonturncb | rivercallippassonturncb | riverraiseippassonturncb | sawflop | sawshowdown | wonshowdown | totalbets | totalcalls | flopcontinuationbetpossible | flopcontinuationbetmade | turncontinuationbetpossible | turncontinuationbetmade | rivercontinuationbetpossible | rivercontinuationbetmade | facingflopcontinuationbet | foldedtoflopcontinuationbet | calledflopcontinuationbet | raisedflopcontinuationbet | facingturncontinuationbet | foldedtoturncontinuationbet | calledturncontinuationbet | raisedturncontinuationbet | facingrivercontinuationbet | foldedtorivercontinuationbet | calledrivercontinuationbet | raisedrivercontinuationbet | totalpostflopstreetsseen | totalaggressivepostflopstreetsseen | playername |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
30961 | 292574 | 201604 | 5 | 4 | 0 | 3 | -100 | 9 | -50 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | gooooooomets |
30961 | 337793 | 201706 | 3 | 3 | 0 | 2 | -50 | 8 | -50 | 2 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | gooooooomets |
30961 | 347968 | 201709 | 6 | 3 | 0 | 83 | -6958 | 706 | -6958 | 20 | 18 | 20 | 1 | 16 | 1 | 3 | 1 | 4 | 0 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 15 | 4 | 3 | 1 | 5 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 11 | 3 | 1 | 3 | 3 | 5 | 1 | 0 | 0 | 0 | 0 | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 24 | 3 | gooooooomets |
30961 | 289578 | 201604 | 5 | 4 | 0 | 9 | -305 | 237 | -153 | 2 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 3 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 3 | gooooooomets |
30961 | 308760 | 201609 | 4 | 4 | 0 | 18 | 5367 | 266 | 2684 | 5 | 4 | 3 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 4 | 3 | 2 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 5 | 3 | 2 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11 | 1 | gooooooomets |
columns_for_analysis <-c('gametype_id',"player_id","totalhands","totalbbswon","totalrakeincents","totalamountwonincents","vpiphands","pfrhands","couldcoldcall","didcoldcall","couldthreebet","didthreebet","couldsqueeze","didsqueeze","facingtwopreflopraisers","calledtwopreflopraisers","raisedtwopreflopraisers","smallblindstealattempted","smallblindstealdefended","smallblindstealreraised",
"bigblindstealattempted","bigblindstealdefended","bigblindstealreraised","facedthreebetpreflop","foldedtothreebetpreflop","calledthreebetpreflop","raisedthreebetpreflop","facedfourbetpreflop","foldedtofourbetpreflop","calledfourbetpreflop","raisedfourbetpreflop","sawflop","wonhandwhensawflop","sawshowdown","wonshowdown","flopcontinuationbetpossible","flopcontinuationbetmade","turncontinuationbetpossible","turncontinuationbetmade","rivercontinuationbetpossible","rivercontinuationbetmade","facingflopcontinuationbet","foldedtoflopcontinuationbet","calledflopcontinuationbet","raisedflopcontinuationbet","facingturncontinuationbet","foldedtoturncontinuationbet","calledturncontinuationbet","raisedturncontinuationbet","facingrivercontinuationbet","foldedtorivercontinuationbet","calledrivercontinuationbet","raisedrivercontinuationbet","playername")
## create function to round and sum vectors
#vector x, vector y, round integer z
trans_func <- function(x,y,z){
round(sum(x)/sum(y)*100,z)
}
## create custom stats
my_wsop_finished_db <- merged_wsop_db %>%
select(c(columns_for_analysis)) %>%
group_by(playername) %>%
summarize(
vpip = round(sum(vpiphands)/sum(totalhands)*100,1),
pfr =round(sum(pfrhands)/sum(totalhands)*100,1),
total_hands = sum(totalhands),
money_won = sum(totalamountwonincents),
rake = sum(totalrakeincents),
coldcall = round(sum(didcoldcall)/sum(couldcoldcall)*100,1),
threebet = round(sum(didthreebet)/sum(couldthreebet)*100,1),
squeeze = round(sum(didsqueeze)/sum(couldsqueeze)*100,1),
bb_per_100 = round(sum(totalbbswon)/(sum(totalhands)/100),2),
total_rake_100 = round((sum(totalrakeincents)/100)/sum(totalhands),2),
money_won_100 = round((sum(totalamountwonincents)/100)/(sum(totalhands)/100),2),
cc_3b = round(sum(calledtwopreflopraisers)/sum(facingtwopreflopraisers)*100,1),
cc_4bet = round(sum(raisedtwopreflopraisers)/sum(facingtwopreflopraisers)*100,1),
def_sb = round(sum(smallblindstealdefended)/sum(smallblindstealattempted)*100,1),
three_bet_sb = round(sum(smallblindstealreraised)/sum(smallblindstealattempted)*100,1),
defends_bb = round(sum(bigblindstealdefended)/sum(bigblindstealattempted)*100,1),
three_bet_from_bb= round(sum(bigblindstealreraised)/sum(bigblindstealattempted)*100,1),
fold_3_bet = round(sum(foldedtothreebetpreflop)/sum(facedthreebetpreflop)*100,1),
four_bet = round(sum(raisedthreebetpreflop)/sum(facedthreebetpreflop)*100,1),
fold_4_bet = round(sum(foldedtofourbetpreflop)/sum(facedfourbetpreflop)*100,1),
five_bet=round(sum(raisedfourbetpreflop)/sum(facedfourbetpreflop)*100,1),
wwsf = round(sum(wonhandwhensawflop)/sum(sawflop)*100,1),
won_sd = round(sum(wonshowdown)/sum(sawshowdown)*100,1),
flop_cbet = round(sum(wonshowdown)/sum(sawshowdown)*100,1),
turn_cbet = trans_func(turncontinuationbetmade,turncontinuationbetpossible,1),
river_cbet = trans_func(rivercontinuationbetmade,rivercontinuationbetpossible,1),
fold_flob_cbet =trans_func(foldedtoflopcontinuationbet,facingflopcontinuationbet,1),
fold_turn_cbet =trans_func(foldedtoturncontinuationbet,facingturncontinuationbet,1),
fold_river_cbet=trans_func(foldedtorivercontinuationbet,facingrivercontinuationbet,1),
raise_flop_cbet=trans_func(raisedflopcontinuationbet,facingflopcontinuationbet,1),
raise_turn_cbet=trans_func(raisedturncontinuationbet,facingturncontinuationbet,1),
raise_river_cbet=trans_func(raisedrivercontinuationbet,facingrivercontinuationbet,1),
wtsd =round(sum(sawshowdown)/sum(sawflop)*100,1))
## Display DF summarized by player with custom stats
kable(head(my_wsop_finished_db,10))
playername | vpip | pfr | total_hands | money_won | rake | coldcall | threebet | squeeze | bb_per_100 | total_rake_100 | money_won_100 | cc_3b | cc_4bet | def_sb | three_bet_sb | defends_bb | three_bet_from_bb | fold_3_bet | four_bet | fold_4_bet | five_bet | wwsf | won_sd | flop_cbet | turn_cbet | river_cbet | fold_flob_cbet | fold_turn_cbet | fold_river_cbet | raise_flop_cbet | raise_turn_cbet | raise_river_cbet | wtsd |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
89.8 | 55.4 | 177 | -7882 | 7882 | NaN | NaN | NaN | -6807.91 | 0.45 | -44.53 | NaN | NaN | NaN | NaN | NaN | NaN | 90.6 | 6.2 | NaN | NaN | 0.0 | NaN | NaN | 73.2 | 100 | 72.7 | 46.7 | 85.7 | 4.5 | 10 | 0 | 0.0 | |
..1.. | 21.0 | 5.6 | 1062 | 11179 | 4664 | 11.6 | 0.6 | 1.6 | 2105.27 | 0.04 | 10.53 | 0 | 0 | 5.0 | 0.0 | 34.0 | 0.0 | 71.4 | 14.3 | NaN | NaN | 43.4 | 70.0 | 70.0 | NaN | NaN | 76.7 | 100.0 | NaN | 10.0 | 0 | NaN | 18.3 |
.aaaa. | 33.8 | 7.5 | 80 | -3455 | 596 | 15.2 | 3.1 | 0.0 | -8637.50 | 0.07 | -43.19 | 0 | 0 | 0.0 | 0.0 | 42.9 | 14.3 | NaN | NaN | NaN | NaN | 20.0 | 33.3 | 33.3 | 100.0 | 0 | 100.0 | NaN | NaN | 0.0 | NaN | NaN | 48.0 |
.beast. | 31.5 | 28.8 | 368 | 1241 | 4244 | 6.6 | 18.6 | 27.8 | 284.24 | 0.12 | 3.37 | 0 | 0 | 23.5 | 23.5 | 41.9 | 29.0 | 12.5 | 12.5 | 0 | 100 | 41.7 | 55.6 | 55.6 | 33.3 | 0 | 66.7 | 0.0 | 0.0 | 0.0 | 0 | 0 | 21.4 |
.eluu. | 71.4 | 14.3 | 7 | 966 | 26 | 100.0 | 0.0 | NaN | 27600.00 | 0.04 | 138.00 | NaN | NaN | NaN | NaN | 100.0 | 0.0 | NaN | NaN | NaN | NaN | 75.0 | 50.0 | 50.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 50.0 |
.o0ohh_____. | 38.0 | 18.2 | 137 | -6723 | 726 | 30.6 | 4.2 | 0.0 | -9814.60 | 0.05 | -49.07 | 100 | 0 | 20.0 | 0.0 | 50.0 | 12.5 | 50.0 | 50.0 | NaN | NaN | 43.2 | 50.0 | 50.0 | 0.0 | NaN | 55.6 | 50.0 | 100.0 | 0.0 | 0 | 0 | 32.4 |
bwukzilla | 47.1 | 23.5 | 51 | -40635 | 914 | 36.8 | 29.4 | 0.0 | -39841.18 | 0.18 | -796.76 | 100 | 0 | 25.0 | 25.0 | 66.7 | 33.3 | 0.0 | 0.0 | NaN | NaN | 30.0 | 42.9 | 42.9 | 100.0 | NaN | 75.0 | NaN | NaN | 25.0 | NaN | NaN | 35.0 |
_james68 | 52.6 | 10.5 | 19 | -2000 | 226 | 71.4 | 0.0 | 0.0 | -21052.63 | 0.12 | -105.26 | NaN | NaN | 100.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | 30.0 | 33.3 | 33.3 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 30.0 |
meat | 20.0 | 12.1 | 140 | -6453 | 578 | 14.3 | 7.4 | 7.7 | -9218.57 | 0.04 | -46.09 | 0 | 0 | 20.0 | 20.0 | 33.3 | 0.0 | 100.0 | 0.0 | NaN | NaN | 30.0 | 0.0 | 0.0 | 50.0 | NaN | 66.7 | NaN | NaN | 16.7 | NaN | NaN | 10.0 |
niko | 22.2 | 9.5 | 63 | 2610 | 278 | 15.4 | 4.0 | 0.0 | 8285.71 | 0.04 | 41.43 | 0 | 0 | 0.0 | 0.0 | 50.0 | 0.0 | NaN | NaN | NaN | NaN | 40.0 | 25.0 | 25.0 | 100.0 | NaN | 100.0 | NaN | NaN | 0.0 | NaN | NaN | 26.7 |
## save to csv
write.csv(my_wsop_finished_db,"wsop_table.csv")
wsop <- read_csv("https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/final%20project/wsop_table.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_integer(),
## playername = col_character(),
## total_hands = col_integer(),
## money_won = col_integer(),
## rake = col_integer()
## )
## See spec(...) for full column specifications.
colnames(wsop)
## [1] "X1" "playername" "vpip"
## [4] "pfr" "total_hands" "money_won"
## [7] "rake" "coldcall" "threebet"
## [10] "squeeze" "bb_per_100" "total_rake_100"
## [13] "money_won_100" "cc_3b" "cc_4bet"
## [16] "def_sb" "three_bet_sb" "defends_bb"
## [19] "three_bet_from_bb" "fold_3_bet" "four_bet"
## [22] "fold_4_bet" "five_bet" "wwsf"
## [25] "won_sd" "flop_cbet" "turn_cbet"
## [28] "river_cbet" "fold_flob_cbet" "fold_turn_cbet"
## [31] "fold_river_cbet" "raise_flop_cbet" "raise_turn_cbet"
## [34] "raise_river_cbet" "wtsd"
wsop2 <- wsop %>%
filter(.,total_hands>10) %>%
replace(is.na(.), 0) %>%
select(.,c(playername,vpip,pfr,threebet,fold_3_bet,squeeze,total_hands,
def_sb,defends_bb,three_bet_sb,four_bet,fold_4_bet,five_bet,
fold_flob_cbet,raise_flop_cbet,fold_turn_cbet,raise_turn_cbet,fold_river_cbet,raise_river_cbet,
flop_cbet,turn_cbet,river_cbet,won_sd,cc_3b,cc_4bet,wwsf,wtsd))
('\r\n')
VPIP
becomes \r\n;VPIP
rename_col_attempt_db <- wsop2
rename_col_attempt_db <- rename_col_attempt_db %>%
rename('\r\nDSB'=def_sb,
'\r\nFoldf'=fold_flob_cbet,
'\r\nBetf'=flop_cbet,
SN=playername,
V=vpip,
PF=pfr,
`3B`=threebet,
F3B=fold_3_bet,
SQ=squeeze,
H=total_hands,
DBB=defends_bb,
`3BSB`=three_bet_sb,
`4B`=four_bet,
F4B=fold_4_bet,
`5B`=five_bet,
RF=raise_flop_cbet,
FT=fold_turn_cbet,
RT=raise_turn_cbet,
FR=fold_river_cbet,
RR=raise_river_cbet,
BT=turn_cbet,
BR=river_cbet,
W_sd=won_sd,
CC3=cc_3b,
CC4=cc_4bet)
## store colnames as vector to use in function below
my_names <- colnames(rename_col_attempt_db)
## Get rid of whitespace
player_note_vector <- apply(rename_col_attempt_db,2,function(x)gsub('\\s+', '',x))
## Create function to make big string
player_note_vector <- apply(player_note_vector,1,function(x){
my_string <- do.call(paste, as.list(c(x,sep=",")))
my_string <- trimws(my_string, which = c("both", "left", "right"))
my_string <- unlist(strsplit(my_string, split=","))
my_string <- paste(my_names,my_string,sep="-")
my_string <- paste(my_string, collapse = ' ')
return (my_string)
})
## display concatenated strings
player_note_vector[1:2]
## [1] "SN-0 V-89.8 PF-55.4 3B-0.0 F3B-90.6 SQ-0.0 H-177 \r\nDSB-0.0 DBB-0.0 3BSB-0.0 4B-6.2 F4B-0.0 5B-0.0 \r\nFoldf-72.7 RF-4.5 FT-46.7 RT-10.0 FR-85.7 RR-0.0 \r\nBetf-0.0 BT-73.2 BR-100.0 W_sd-0.0 CC3-0.0 CC4-0.0 wwsf-0.0 wtsd-0.0"
## [2] "SN-..1.. V-21.0 PF-5.6 3B-0.6 F3B-71.4 SQ-1.6 H-1062 \r\nDSB-5.0 DBB-34.0 3BSB-0.0 4B-14.3 F4B-0.0 5B-0.0 \r\nFoldf-76.7 RF-10.0 FT-100.0 RT-0.0 FR-0.0 RR-0.0 \r\nBetf-70.0 BT-0.0 BR-0.0 W_sd-70.0 CC3-0.0 CC4-0.0 wwsf-43.4 wtsd-18.3"
my_sns <- rename_col_attempt_db$SN
finished_postgres_db <- data.frame(my_sns,player_note_vector, stringsAsFactors = FALSE)
## display finished postgres db
finished_postgres_db[1:5,] %>%
kable("html") %>%
kable_styling()
my_sns | player_note_vector |
---|---|
0 | SN-0 V-89.8 PF-55.4 3B-0.0 F3B-90.6 SQ-0.0 H-177 DSB-0.0 DBB-0.0 3BSB-0.0 4B-6.2 F4B-0.0 5B-0.0 Foldf-72.7 RF-4.5 FT-46.7 RT-10.0 FR-85.7 RR-0.0 Betf-0.0 BT-73.2 BR-100.0 W_sd-0.0 CC3-0.0 CC4-0.0 wwsf-0.0 wtsd-0.0 |
..1.. | SN-..1.. V-21.0 PF-5.6 3B-0.6 F3B-71.4 SQ-1.6 H-1062 DSB-5.0 DBB-34.0 3BSB-0.0 4B-14.3 F4B-0.0 5B-0.0 Foldf-76.7 RF-10.0 FT-100.0 RT-0.0 FR-0.0 RR-0.0 Betf-70.0 BT-0.0 BR-0.0 W_sd-70.0 CC3-0.0 CC4-0.0 wwsf-43.4 wtsd-18.3 |
.aaaa. | SN-.aaaa. V-33.8 PF-7.5 3B-3.1 F3B-0.0 SQ-0.0 H-80 DSB-0.0 DBB-42.9 3BSB-0.0 4B-0.0 F4B-0.0 5B-0.0 Foldf-100.0 RF-0.0 FT-0.0 RT-0.0 FR-0.0 RR-0.0 Betf-33.3 BT-100.0 BR-0.0 W_sd-33.3 CC3-0.0 CC4-0.0 wwsf-20.0 wtsd-48.0 |
.beast. | SN-.beast. V-31.5 PF-28.8 3B-18.6 F3B-12.5 SQ-27.8 H-368 DSB-23.5 DBB-41.9 3BSB-23.5 4B-12.5 F4B-0.0 5B-100.0 Foldf-66.7 RF-0.0 FT-0.0 RT-0.0 FR-0.0 RR-0.0 Betf-55.6 BT-33.3 BR-0.0 W_sd-55.6 CC3-0.0 CC4-0.0 wwsf-41.7 wtsd-21.4 |
.o0ohh_____. | SN-.o0ohh_____. V-38.0 PF-18.2 3B-4.2 F3B-50.0 SQ-0.0 H-137 DSB-20.0 DBB-50.0 3BSB-0.0 4B-50.0 F4B-0.0 5B-0.0 Foldf-55.6 RF-0.0 FT-50.0 RT-0.0 FR-100.0 RR-0.0 Betf-50.0 BT-0.0 BR-0.0 W_sd-50.0 CC3-100.0 CC4-0.0 wwsf-43.2 wtsd-32.4 |
library(xml2)
note_file <- read_xml('C:/Users/justin/Documents/GitHub/Justin-Data-607/final project/Notes.xml')
head(xml_children(note_file))
## {xml_nodeset (6)}
## [1] <Note>\n <playerName>richiethec</playerName>\n <quickNote/>\n <pl ...
## [2] <Note>\n <playerName>chi1862</playerName>\n <quickNote/>\n <playe ...
## [3] <Note>\n <playerName>bradstever99</playerName>\n <quickNote/>\n < ...
## [4] <Note>\n <playerName>harvey</playerName>\n <quickNote/>\n <player ...
## [5] <Note>\n <playerName>sevilla21</playerName>\n <quickNote/>\n <pla ...
## [6] <Note>\n <playerName>foldaces221</playerName>\n <quickNote/>\n <p ...
## Extract playername as text from xml
quick_notes <- xml_find_all(note_file, ".//quickNote")
quick_notes_txt <- xml_text(quick_notes)
playerNames <- xml_find_all(note_file, ".//playerName")
XML_player_names <- xml_text(playerNames)
## test how many shared values there are between sqldb and xml
n=0
for (x in my_sns){
if (x %in% XML_player_names){
n=n+1
}
}
print(n)
## [1] 3775
## Print for comparison of XML and Postgres indexing
XML_player_names[1:10]
## [1] "richiethec" "chi1862" "bradstever99" "harvey"
## [5] "sevilla21" "foldaces221" "michaelss82" "venom28"
## [9] "eddiebrock47" "robbynyu"
finished_postgres_db$my_sns[1:10]
## [1] "0" "..1.." ".aaaa." ".beast."
## [5] ".o0ohh_____." "_bwukzilla_" "_james68" "_meat_"
## [9] "_niko_" "_tres"
## Rename columns and join on shared column maintaing xml order
xml_ordered_names <- data.frame(XML_player_names,stringsAsFactors = FALSE)
colnames(xml_ordered_names) <- c("my_sns")
df_for_xml <- plyr::join(xml_ordered_names,finished_postgres_db)
## Joining by: my_sns
## Display our final DF
df_for_xml[1:5,] %>%
kable("html") %>%
kable_styling()
my_sns | player_note_vector |
---|---|
richiethec | SN-richiethec V-40.4 PF-6.5 3B-4.0 F3B-42.9 SQ-2.5 H-1245 DSB-27.5 DBB-57.1 3BSB-0.0 4B-14.3 F4B-33.3 5B-33.3 Foldf-61.9 RF-4.4 FT-52.9 RT-11.8 FR-50.0 RR-0.0 Betf-49.2 BT-68.8 BR-33.3 W_sd-49.2 CC3-6.9 CC4-3.4 wwsf-31.6 wtsd-25.8 |
chi1862 | SN-chi1862 V-42.5 PF-9.5 3B-1.0 F3B-33.3 SQ-7.1 H-275 DSB-26.7 DBB-56.5 3BSB-0.0 4B-16.7 F4B-0.0 5B-0.0 Foldf-50.0 RF-11.5 FT-0.0 RT-20.0 FR-100.0 RR-0.0 Betf-50.0 BT-50.0 BR-0.0 W_sd-50.0 CC3-0.0 CC4-0.0 wwsf-36.3 wtsd-21.6 |
bradstever99 | SN-bradstever99 V-62.1 PF-14.9 3B-0.0 F3B-25.0 SQ-0.0 H-87 DSB-66.7 DBB-50.0 3BSB-0.0 4B-25.0 F4B-0.0 5B-0.0 Foldf-70.0 RF-0.0 FT-33.3 RT-0.0 FR-100.0 RR-0.0 Betf-25.0 BT-50.0 BR-100.0 W_sd-25.0 CC3-0.0 CC4-0.0 wwsf-31.9 wtsd-25.5 |
harvey | SN-harvey V-38.2 PF-7.6 3B-1.7 F3B-25.0 SQ-5.4 H-524 DSB-35.7 DBB-40.0 3BSB-0.0 4B-0.0 F4B-0.0 5B-0.0 Foldf-65.9 RF-7.3 FT-40.0 RT-20.0 FR-100.0 RR-0.0 Betf-56.4 BT-50.0 BR-0.0 W_sd-56.4 CC3-11.1 CC4-0.0 wwsf-35.1 wtsd-27.2 |
sevilla21 | SN-sevilla21 V-39.7 PF-14.0 3B-4.3 F3B-50.0 SQ-4.2 H-378 DSB-60.0 DBB-57.1 3BSB-6.7 4B-0.0 F4B-0.0 5B-0.0 Foldf-66.7 RF-0.0 FT-66.7 RT-0.0 FR-0.0 RR-0.0 Betf-36.4 BT-83.3 BR-0.0 W_sd-36.4 CC3-20.0 CC4-6.7 wwsf-35.8 wtsd-24.1 |
## insertion
xml_text(quick_notes) <- df_for_xml$player_note_vector[match(XML_player_names, df_for_xml$my_sns)]
## write xml to disk
write_xml(note_file, "C:/Users/justin/Documents/mynote6.xml", options = "format")
Final XML
Final XML