## Warning: package 'XML' was built under R version 3.4.4
## Loading required package: RPostgreSQL
## Loading required package: DBI
## -- Attaching packages -------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## -- Conflicts ----------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Warning: package 'xml2' was built under R version 3.4.4
Or raises/ total hands played
“If it’s good enough to call you gotta be in there raising, all right. I mean tight but aggressive, and I do mean aggressive. That’s your style, Professor. I mean you gotta …you gotta think of it as a war.” — Mike (Matt Damon) in Rounders speaking to his Law Professor (Martin Landau).
my_607 <- read_csv("https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/final%20project/vpip-pfr_analysis.csv")## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## playername = col_character(),
## vpip = col_double(),
## pfr = col_double(),
## total_hands = col_integer(),
## money_won = col_integer(),
## rake = col_integer(),
## threebet = col_double(),
## bb_per_100 = col_double(),
## total_rake_100 = col_double(),
## money_won_100 = col_double(),
## wwsf = col_double()
## )
my_607 <- my_607 %>%
filter(total_hands > 500)
small_gap <- my_607 %>%
filter(vpip - pfr < 10)
wide_gap <- my_607 %>%
filter(vpip - pfr > 20)
par(mfrow = c(2, 1))
hist(small_gap$bb_per_100)
hist(wide_gap$bb_per_100)small_gap <- small_gap %>%
select(small_vpip = vpip, small_pfr = pfr, small_bb_per_100 = bb_per_100) %>%
describe(.)
wide_gap <- wide_gap %>% select(wide_vpip = vpip, wide_pfr = pfr, wide_bb_100 = bb_per_100) %>%
describe(.)
kable(rbind(small_gap, wide_gap))| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| small_vpip | 1 | 726 | 23.858540 | 4.393240 | 23.600 | 23.751718 | 3.85476 | 8.20 | 44.00 | 35.80 | 0.4122420 | 1.6610858 | 0.1630484 |
| small_pfr | 2 | 726 | 17.058127 | 3.946855 | 17.000 | 17.005498 | 3.26172 | 0.40 | 36.20 | 35.80 | 0.3927038 | 2.6525411 | 0.1464815 |
| small_bb_per_100 | 3 | 726 | 2.023829 | 24.139195 | 2.680 | 2.489588 | 17.90981 | -124.72 | 100.85 | 225.57 | -0.2625542 | 2.6468372 | 0.8958896 |
| wide_vpip | 1 | 2312 | 45.439187 | 10.978408 | 44.200 | 44.639243 | 10.37820 | 22.00 | 89.20 | 67.20 | 0.7220241 | 0.5706857 | 0.2283208 |
| wide_pfr | 2 | 2312 | 12.173529 | 6.830626 | 11.300 | 11.638487 | 6.22692 | 0.10 | 47.30 | 47.20 | 0.8755983 | 1.2511863 | 0.1420583 |
| wide_bb_100 | 3 | 2312 | -29.867673 | 42.528923 | -24.875 | -27.008108 | 31.77953 | -611.37 | 147.34 | 758.71 | -1.8599677 | 17.0517086 | 0.8844850 |
## [1] "Connecting to database"
## [1] "Connected!"
# Grab screennames
player_names_site_id <- dbGetQuery(conn, "SELECT * FROM players")
# Grab stats
player_stats <- dbGetQuery(conn, "SELECT * FROM compiledplayerresults limit 1000000")
kable(head(player_names_site_id))| player_id | playername | pokersite_id | cashhands | tourneyhands | optimizationstatus | icon |
|---|---|---|---|---|---|---|
| 11066 | Palaguachi21 | 0 | 76 | 0 | 0 | 0 |
| 2 | Greeneggs | 0 | 5 | 0 | 0 | -1 |
| 3 | cagjag2 | 0 | 38 | 0 | 0 | -1 |
| 4 | rmcconville | 0 | 57 | 0 | 0 | -1 |
| 5 | skorn32 | 0 | 16 | 0 | 0 | -1 |
| 6 | unypok | 0 | 21 | 0 | 0 | -1 |
kable(head(player_stats))| compiledplayerresults_id | player_id | playedyearandmonth | numberofplayers | gametype_id | bbgroup_id | totalhands | totalamountwonincents | totalrakeincents | totalbbswon | vpiphands | pfrhands | couldcoldcall | didcoldcall | couldthreebet | didthreebet | couldsqueeze | didsqueeze | facingtwopreflopraisers | calledtwopreflopraisers | raisedtwopreflopraisers | smallblindstealattempted | smallblindstealdefended | smallblindstealreraised | bigblindstealattempted | bigblindstealdefended | bigblindstealreraised | sawnonsmallshowdown | wonnonsmallshowdown | sawlargeshowdown | wonlargeshowdown | sawnonsmallshowdownlimpedflop | wonnonsmallshowdownlimpedflop | sawlargeshowdownlimpedflop | wonlargeshowdownlimpedflop | wonhand | wonhandwhensawflop | wonhandwhensawturn | wonhandwhensawriver | facedthreebetpreflop | foldedtothreebetpreflop | calledthreebetpreflop | raisedthreebetpreflop | facedfourbetpreflop | foldedtofourbetpreflop | calledfourbetpreflop | raisedfourbetpreflop | turnfoldippassonflopcb | turncallippassonflopcb | turnraiseippassonflopcb | riverfoldippassonturncb | rivercallippassonturncb | riverraiseippassonturncb | sawflop | sawshowdown | wonshowdown | totalbets | totalcalls | flopcontinuationbetpossible | flopcontinuationbetmade | turncontinuationbetpossible | turncontinuationbetmade | rivercontinuationbetpossible | rivercontinuationbetmade | facingflopcontinuationbet | foldedtoflopcontinuationbet | calledflopcontinuationbet | raisedflopcontinuationbet | facingturncontinuationbet | foldedtoturncontinuationbet | calledturncontinuationbet | raisedturncontinuationbet | facingrivercontinuationbet | foldedtorivercontinuationbet | calledrivercontinuationbet | raisedrivercontinuationbet | totalpostflopstreetsseen | totalaggressivepostflopstreetsseen |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | 201403 | 6 | 1 | 0 | 34 | -257 | 19 | -12850 | 8 | 7 | 10 | 0 | 10 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 2 | 0 | 1 | 0 | 1 | 0 | 5 | 4 | 2 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 2 | 0 | 13 | 3 | 4 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 19 | 12 |
| 2 | 2 | 201403 | 6 | 1 | 0 | 5 | -144 | 6 | -7200 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 |
| 3 | 3 | 201403 | 6 | 1 | 0 | 28 | 310 | 15 | 15500 | 13 | 3 | 4 | 2 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 6 | 6 | 6 | 5 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 4 | 3 | 10 | 8 | 1 | 1 | 1 | 1 | 0 | 0 | 3 | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 33 | 10 |
| 4 | 4 | 201403 | 6 | 1 | 0 | 34 | 16 | 19 | 800 | 18 | 4 | 7 | 2 | 5 | 0 | 2 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 | 4 | 2 | 2 | 1 | 2 | 1 | 9 | 8 | 7 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | 7 | 3 | 14 | 21 | 3 | 2 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 45 | 14 |
| 5 | 5 | 201403 | 6 | 1 | 0 | 9 | 252 | 9 | 12600 | 6 | 1 | 4 | 3 | 3 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 3 | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 3 | 2 | 12 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 9 |
| 6 | 6 | 201403 | 6 | 1 | 0 | 13 | -202 | 9 | -10100 | 10 | 1 | 2 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | 1 | 0 | 2 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 | 3 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | 2 |
player_names_site_id by pokersite id
player stats for screennames in this vector.player_names_site_idmerged_wsop_db is a DF with all the stats and screen names that I need filtered for the poker site I desire. It is in a long format which will be addressed in the next section by summarizing by screen namewsop_players <- player_names_site_id %>%
filter(., pokersite_id == 12) %>%
select(., c(playername, player_id))
# Extract player ids
player_ids <- wsop_players$player_id
# Filter other DF by player ids from last filtered query
wsop_player_stats <- player_stats %>%
filter(., player_id %in% player_ids)
# Merge DFs on player id
merged_wsop_db <- merge(wsop_player_stats, wsop_players, by = "player_id",
all = TRUE)
kable(merged_wsop_db[1:5, ])| player_id | compiledplayerresults_id | playedyearandmonth | numberofplayers | gametype_id | bbgroup_id | totalhands | totalamountwonincents | totalrakeincents | totalbbswon | vpiphands | pfrhands | couldcoldcall | didcoldcall | couldthreebet | didthreebet | couldsqueeze | didsqueeze | facingtwopreflopraisers | calledtwopreflopraisers | raisedtwopreflopraisers | smallblindstealattempted | smallblindstealdefended | smallblindstealreraised | bigblindstealattempted | bigblindstealdefended | bigblindstealreraised | sawnonsmallshowdown | wonnonsmallshowdown | sawlargeshowdown | wonlargeshowdown | sawnonsmallshowdownlimpedflop | wonnonsmallshowdownlimpedflop | sawlargeshowdownlimpedflop | wonlargeshowdownlimpedflop | wonhand | wonhandwhensawflop | wonhandwhensawturn | wonhandwhensawriver | facedthreebetpreflop | foldedtothreebetpreflop | calledthreebetpreflop | raisedthreebetpreflop | facedfourbetpreflop | foldedtofourbetpreflop | calledfourbetpreflop | raisedfourbetpreflop | turnfoldippassonflopcb | turncallippassonflopcb | turnraiseippassonflopcb | riverfoldippassonturncb | rivercallippassonturncb | riverraiseippassonturncb | sawflop | sawshowdown | wonshowdown | totalbets | totalcalls | flopcontinuationbetpossible | flopcontinuationbetmade | turncontinuationbetpossible | turncontinuationbetmade | rivercontinuationbetpossible | rivercontinuationbetmade | facingflopcontinuationbet | foldedtoflopcontinuationbet | calledflopcontinuationbet | raisedflopcontinuationbet | facingturncontinuationbet | foldedtoturncontinuationbet | calledturncontinuationbet | raisedturncontinuationbet | facingrivercontinuationbet | foldedtorivercontinuationbet | calledrivercontinuationbet | raisedrivercontinuationbet | totalpostflopstreetsseen | totalaggressivepostflopstreetsseen | playername |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 30961 | 292574 | 201604 | 5 | 4 | 0 | 3 | -100 | 9 | -50 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | gooooooomets |
| 30961 | 337793 | 201706 | 3 | 3 | 0 | 2 | -50 | 8 | -50 | 2 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | gooooooomets |
| 30961 | 347968 | 201709 | 6 | 3 | 0 | 83 | -6958 | 706 | -6958 | 20 | 18 | 20 | 1 | 16 | 1 | 3 | 1 | 4 | 0 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 15 | 4 | 3 | 1 | 5 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 11 | 3 | 1 | 3 | 3 | 5 | 1 | 0 | 0 | 0 | 0 | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 24 | 3 | gooooooomets |
| 30961 | 289578 | 201604 | 5 | 4 | 0 | 9 | -305 | 237 | -153 | 2 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 3 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 3 | gooooooomets |
| 30961 | 308760 | 201609 | 4 | 4 | 0 | 18 | 5367 | 266 | 2684 | 5 | 4 | 3 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 4 | 3 | 2 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 5 | 3 | 2 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11 | 1 | gooooooomets |
# Create function to round and sum vectors
#vector x, vector y, round integer z
trans_func <- function(x, y, z)
{
round(sum(x)/sum(y) * 100, z)
}
columns_for_analysis <- c("gametype_id", "player_id", "totalhands", "totalbbswon",
"totalrakeincents", "totalamountwonincents", "vpiphands", "pfrhands",
"couldcoldcall", "didcoldcall", "couldthreebet", "didthreebet", "couldsqueeze",
"didsqueeze", "facingtwopreflopraisers", "calledtwopreflopraisers",
"raisedtwopreflopraisers", "smallblindstealattempted", "smallblindstealdefended",
"smallblindstealreraised", "bigblindstealattempted", "bigblindstealdefended",
"bigblindstealreraised", "facedthreebetpreflop", "foldedtothreebetpreflop",
"calledthreebetpreflop", "raisedthreebetpreflop", "facedfourbetpreflop",
"foldedtofourbetpreflop", "calledfourbetpreflop", "raisedfourbetpreflop",
"sawflop", "wonhandwhensawflop", "sawshowdown", "wonshowdown", "flopcontinuationbetpossible",
"flopcontinuationbetmade", "turncontinuationbetpossible", "turncontinuationbetmade",
"rivercontinuationbetpossible", "rivercontinuationbetmade", "facingflopcontinuationbet",
"foldedtoflopcontinuationbet", "calledflopcontinuationbet", "raisedflopcontinuationbet",
"facingturncontinuationbet", "foldedtoturncontinuationbet", "calledturncontinuationbet",
"raisedturncontinuationbet", "facingrivercontinuationbet", "foldedtorivercontinuationbet",
"calledrivercontinuationbet", "raisedrivercontinuationbet", "playername")
## create custom stats
my_wsop_finished_db <- merged_wsop_db %>%
select(c(columns_for_analysis)) %>%
group_by(playername) %>%
summarize(
vpip = round(sum(vpiphands)/sum(totalhands)*100,1),
pfr =round(sum(pfrhands)/sum(totalhands)*100,1),
total_hands = sum(totalhands),
money_won = sum(totalamountwonincents),
rake = sum(totalrakeincents),
coldcall = trans_func(didcoldcall/couldcoldcall,1),
threebet = trans_func(didthreebet/couldthreebet,1),
squeeze = trans_func(didsqueeze/couldsqueeze,1),
bb_per_100 = round(sum(totalbbswon)/(sum(totalhands)/100),2),
total_rake_100 = round((sum(totalrakeincents)/100)/sum(totalhands),2),
money_won_100 = round((sum(totalamountwonincents)/100)/(sum(totalhands)/100),2),
cc_3b = trans_func(calledtwopreflopraisers/facingtwopreflopraisers,1),
cc_4bet = trans_func(raisedtwopreflopraisers/facingtwopreflopraisers,1),
def_sb = trans_func(smallblindstealdefended/smallblindstealattempted,1),
three_bet_sb = trans_func(smallblindstealreraised/smallblindstealattempted,1),
defends_bb = trans_func(bigblindstealdefended/bigblindstealattempted,1),
three_bet_from_bb= trans_func(bigblindstealreraised/bigblindstealattempted,1),
fold_3_bet = trans_func(foldedtothreebetpreflop/facedthreebetpreflop,1),
four_bet = trans_func(raisedthreebetpreflop/facedthreebetpreflop,1),
fold_4_bet = trans_func(foldedtofourbetpreflop/facedfourbetpreflop,1),
five_bet =trans_func(raisedfourbetpreflop/facedfourbetpreflop,1),
wwsf = trans_func(wonhandwhensawflop/sawflop,1),
won_sd = trans_func(wonshowdown/sawshowdown,1),
flop_cbet = trans_func(wonshowdown/sawshowdown,1),
turn_cbet = trans_func(turncontinuationbetmade,turncontinuationbetpossible,1),
river_cbet = trans_func(rivercontinuationbetmade,rivercontinuationbetpossible,1),
fold_flob_cbet =trans_func(foldedtoflopcontinuationbet,facingflopcontinuationbet,1),
fold_turn_cbet =trans_func(foldedtoturncontinuationbet,facingturncontinuationbet,1),
fold_river_cbet =trans_func(foldedtorivercontinuationbet,facingrivercontinuationbet,1),
raise_flop_cbet =trans_func(raisedflopcontinuationbet,facingflopcontinuationbet,1),
raise_turn_cbet =trans_func(raisedturncontinuationbet,facingturncontinuationbet,1),
raise_river_cbet =trans_func(raisedrivercontinuationbet,facingrivercontinuationbet,1),
wtsd =trans_func(sawshowdown/sawflop,1))
## Display DF summarized by player with custom stats
kable(head(my_wsop_finished_db,10))| playername | vpip | pfr | total_hands | money_won | rake | coldcall | threebet | squeeze | bb_per_100 | total_rake_100 | money_won_100 | cc_3b | cc_4bet | def_sb | three_bet_sb | defends_bb | three_bet_from_bb | fold_3_bet | four_bet | fold_4_bet | five_bet | wwsf | won_sd | flop_cbet | turn_cbet | river_cbet | fold_flob_cbet | fold_turn_cbet | fold_river_cbet | raise_flop_cbet | raise_turn_cbet | raise_river_cbet | wtsd |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 89.8 | 55.4 | 177 | -7882 | 7882 | NaN | NaN | NaN | -6807.91 | 0.45 | -44.53 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 73.2 | 100 | 72.7 | 46.7 | 85.7 | 4.5 | 10 | 0 | NaN | |
| ..1.. | 21.0 | 5.6 | 1062 | 11179 | 4664 | NaN | NaN | NaN | 2105.27 | 0.04 | 10.53 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 76.7 | 100.0 | NaN | 10.0 | 0 | NaN | NaN |
| .aaaa. | 33.8 | 7.5 | 80 | -3455 | 596 | 33 | 6 | NaN | -8637.50 | 0.07 | -43.19 | NaN | NaN | NaN | NaN | 150 | 25 | NaN | NaN | NaN | NaN | 45 | 83 | 83 | 100.0 | 0 | 100.0 | NaN | NaN | 0.0 | NaN | NaN | 157 |
| .beast. | 31.5 | 28.8 | 368 | 1241 | 4244 | NaN | NaN | NaN | 284.24 | 0.12 | 3.37 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 33.3 | 0 | 66.7 | 0.0 | 0.0 | 0.0 | 0 | 0 | NaN |
| .eluu. | 71.4 | 14.3 | 7 | 966 | 26 | 200 | 0 | NaN | 27600.00 | 0.04 | 138.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 167 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 67 |
| .o0ohh_____. | 38.0 | 18.2 | 137 | -6723 | 726 | NaN | NaN | NaN | -9814.60 | 0.05 | -49.07 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | 55.6 | 50.0 | 100.0 | 0.0 | 0 | 0 | NaN |
| bwukzilla | 47.1 | 23.5 | 51 | -40635 | 914 | 108 | 94 | NaN | -39841.18 | 0.18 | -796.76 | NaN | NaN | 100 | 100 | NaN | NaN | NaN | NaN | NaN | NaN | 77 | 133 | 133 | 100.0 | NaN | 75.0 | NaN | NaN | 25.0 | NaN | NaN | 125 |
| _james68 | 52.6 | 10.5 | 19 | -2000 | 226 | 160 | 0 | 0 | -21052.63 | 0.12 | -105.26 | NaN | NaN | 200 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | 43 | 50 | 50 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 62 |
| meat | 20.0 | 12.1 | 140 | -6453 | 578 | 75 | 132 | NaN | -9218.57 | 0.04 | -46.09 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 50.0 | NaN | 66.7 | NaN | NaN | 16.7 | NaN | NaN | NaN |
| niko | 22.2 | 9.5 | 63 | 2610 | 278 | 33 | 11 | NaN | 8285.71 | 0.04 | 41.43 | NaN | NaN | 0 | 0 | 150 | 0 | NaN | NaN | NaN | NaN | 117 | NaN | NaN | 100.0 | NaN | 100.0 | NaN | NaN | 0.0 | NaN | NaN | 67 |
## save to csv
write.csv(my_wsop_finished_db,"wsop_table.csv")wsop <- read_csv("https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/final%20project/wsop_table.csv")## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_integer(),
## playername = col_character(),
## total_hands = col_integer(),
## money_won = col_integer(),
## rake = col_integer()
## )
## See spec(...) for full column specifications.
colnames(wsop)## [1] "X1" "playername" "vpip"
## [4] "pfr" "total_hands" "money_won"
## [7] "rake" "coldcall" "threebet"
## [10] "squeeze" "bb_per_100" "total_rake_100"
## [13] "money_won_100" "cc_3b" "cc_4bet"
## [16] "def_sb" "three_bet_sb" "defends_bb"
## [19] "three_bet_from_bb" "fold_3_bet" "four_bet"
## [22] "fold_4_bet" "five_bet" "wwsf"
## [25] "won_sd" "flop_cbet" "turn_cbet"
## [28] "river_cbet" "fold_flob_cbet" "fold_turn_cbet"
## [31] "fold_river_cbet" "raise_flop_cbet" "raise_turn_cbet"
## [34] "raise_river_cbet" "wtsd"
wsop2 <- wsop %>%
filter(., total_hands > 10) %>%
replace(is.na(.),0) %>%
select(., c(playername, vpip, pfr, threebet, fold_3_bet,
squeeze, total_hands, def_sb, defends_bb, three_bet_sb, four_bet,
fold_4_bet, five_bet, fold_flob_cbet, raise_flop_cbet, fold_turn_cbet,
raise_turn_cbet, fold_river_cbet, raise_river_cbet, flop_cbet,
turn_cbet, river_cbet, won_sd, cc_3b, cc_4bet, wwsf, wtsd))('\r\n')
VPIP becomes \r\n;VPIPrename_col_attempt_db <- wsop2
rename_col_attempt_db <- rename_col_attempt_db %>%
rename(`\r\nDSB` = def_sb, `\r\nFoldf` = fold_flob_cbet,
`\r\nBetf` = flop_cbet, SN = playername, V = vpip,
PF = pfr, `3B` = threebet, F3B = fold_3_bet, SQ = squeeze,
H = total_hands, DBB = defends_bb, `3BSB` = three_bet_sb,
`4B` = four_bet, F4B = fold_4_bet, `5B` = five_bet,
RF = raise_flop_cbet, FT = fold_turn_cbet, RT = raise_turn_cbet,
FR = fold_river_cbet, RR = raise_river_cbet, BT = turn_cbet,
BR = river_cbet, W_sd = won_sd, CC3 = cc_3b, CC4 = cc_4bet)
# Store colnames as vector to use in function below
my_names <- colnames(rename_col_attempt_db)
# Get rid of whitespace
player_note_vector <- apply(rename_col_attempt_db, 2, function(x) gsub("\\s+",
"", x))
# Create function to make big string
player_note_vector <- apply(player_note_vector, 1, function(x)
{
my_string <- do.call(paste, as.list(c(x, sep = ",")))
my_string <- trimws(my_string, which = c("both", "left",
"right"))
my_string <- unlist(strsplit(my_string, split = ","))
my_string <- paste(my_names, my_string, sep = "-")
my_string <- paste(my_string, collapse = " ")
return(my_string)
})
# Display concatenated strings
player_note_vector[1:2]## [1] "SN-0 V-89.8 PF-55.4 3B-0.0 F3B-90.6 SQ-0.0 H-177 \r\nDSB-0.0 DBB-0.0 3BSB-0.0 4B-6.2 F4B-0.0 5B-0.0 \r\nFoldf-72.7 RF-4.5 FT-46.7 RT-10.0 FR-85.7 RR-0.0 \r\nBetf-0.0 BT-73.2 BR-100.0 W_sd-0.0 CC3-0.0 CC4-0.0 wwsf-0.0 wtsd-0.0"
## [2] "SN-..1.. V-21.0 PF-5.6 3B-0.6 F3B-71.4 SQ-1.6 H-1062 \r\nDSB-5.0 DBB-34.0 3BSB-0.0 4B-14.3 F4B-0.0 5B-0.0 \r\nFoldf-76.7 RF-10.0 FT-100.0 RT-0.0 FR-0.0 RR-0.0 \r\nBetf-70.0 BT-0.0 BR-0.0 W_sd-70.0 CC3-0.0 CC4-0.0 wwsf-43.4 wtsd-18.3"
my_sns <- rename_col_attempt_db$SN
finished_postgres_db <- data.frame(my_sns, player_note_vector, stringsAsFactors = FALSE)
## display finished postgres db
finished_postgres_db[1:5, ] %>%
kable("html") %>%
kable_styling()| my_sns | player_note_vector |
|---|---|
| 0 | SN-0 V-89.8 PF-55.4 3B-0.0 F3B-90.6 SQ-0.0 H-177 DSB-0.0 DBB-0.0 3BSB-0.0 4B-6.2 F4B-0.0 5B-0.0 Foldf-72.7 RF-4.5 FT-46.7 RT-10.0 FR-85.7 RR-0.0 Betf-0.0 BT-73.2 BR-100.0 W_sd-0.0 CC3-0.0 CC4-0.0 wwsf-0.0 wtsd-0.0 |
| ..1.. | SN-..1.. V-21.0 PF-5.6 3B-0.6 F3B-71.4 SQ-1.6 H-1062 DSB-5.0 DBB-34.0 3BSB-0.0 4B-14.3 F4B-0.0 5B-0.0 Foldf-76.7 RF-10.0 FT-100.0 RT-0.0 FR-0.0 RR-0.0 Betf-70.0 BT-0.0 BR-0.0 W_sd-70.0 CC3-0.0 CC4-0.0 wwsf-43.4 wtsd-18.3 |
| .aaaa. | SN-.aaaa. V-33.8 PF-7.5 3B-3.1 F3B-0.0 SQ-0.0 H-80 DSB-0.0 DBB-42.9 3BSB-0.0 4B-0.0 F4B-0.0 5B-0.0 Foldf-100.0 RF-0.0 FT-0.0 RT-0.0 FR-0.0 RR-0.0 Betf-33.3 BT-100.0 BR-0.0 W_sd-33.3 CC3-0.0 CC4-0.0 wwsf-20.0 wtsd-48.0 |
| .beast. | SN-.beast. V-31.5 PF-28.8 3B-18.6 F3B-12.5 SQ-27.8 H-368 DSB-23.5 DBB-41.9 3BSB-23.5 4B-12.5 F4B-0.0 5B-100.0 Foldf-66.7 RF-0.0 FT-0.0 RT-0.0 FR-0.0 RR-0.0 Betf-55.6 BT-33.3 BR-0.0 W_sd-55.6 CC3-0.0 CC4-0.0 wwsf-41.7 wtsd-21.4 |
| .o0ohh_____. | SN-.o0ohh_____. V-38.0 PF-18.2 3B-4.2 F3B-50.0 SQ-0.0 H-137 DSB-20.0 DBB-50.0 3BSB-0.0 4B-50.0 F4B-0.0 5B-0.0 Foldf-55.6 RF-0.0 FT-50.0 RT-0.0 FR-100.0 RR-0.0 Betf-50.0 BT-0.0 BR-0.0 W_sd-50.0 CC3-100.0 CC4-0.0 wwsf-43.2 wtsd-32.4 |
note_file <- read_xml("notes.xml")
head(xml_children(note_file))## {xml_nodeset (6)}
## [1] <Note>\n <playerName>richiethec</playerName>\n <quickNote/>\n <pl ...
## [2] <Note>\n <playerName>chi1862</playerName>\n <quickNote/>\n <playe ...
## [3] <Note>\n <playerName>bradstever99</playerName>\n <quickNote/>\n < ...
## [4] <Note>\n <playerName>harvey</playerName>\n <quickNote/>\n <player ...
## [5] <Note>\n <playerName>sevilla21</playerName>\n <quickNote/>\n <pla ...
## [6] <Note>\n <playerName>foldaces221</playerName>\n <quickNote/>\n <p ...
# Extract playername as text from xml
quick_notes <- xml_find_all(note_file, ".//quickNote")
quick_notes_txt <- xml_text(quick_notes)
playerNames <- xml_find_all(note_file, ".//playerName")
XML_player_names <- xml_text(playerNames)
# Test how many shared values there are between sqldb and xml
n <- 0
for (x in my_sns)
{
if (x %in% XML_player_names)
{
n <- n + 1
}
}
print(n)## [1] 3775
## Print for comparison of XML and Postgres indexing
XML_player_names[1:10]## [1] "richiethec" "chi1862" "bradstever99" "harvey"
## [5] "sevilla21" "foldaces221" "michaelss82" "venom28"
## [9] "eddiebrock47" "robbynyu"
finished_postgres_db$my_sns[1:10]## [1] "0" "..1.." ".aaaa." ".beast."
## [5] ".o0ohh_____." "_bwukzilla_" "_james68" "_meat_"
## [9] "_niko_" "_tres"
## Rename columns and join on shared column maintaing xml order
xml_ordered_names <- data.frame(XML_player_names, stringsAsFactors = FALSE)
colnames(xml_ordered_names) <- c("my_sns")
df_for_xml <- plyr::join(xml_ordered_names, finished_postgres_db)## Joining by: my_sns
## Display our final DF
df_for_xml[1:5,] %>%
kable("html") %>%
kable_styling()| my_sns | player_note_vector |
|---|---|
| richiethec | SN-richiethec V-40.4 PF-6.5 3B-4.0 F3B-42.9 SQ-2.5 H-1245 DSB-27.5 DBB-57.1 3BSB-0.0 4B-14.3 F4B-33.3 5B-33.3 Foldf-61.9 RF-4.4 FT-52.9 RT-11.8 FR-50.0 RR-0.0 Betf-49.2 BT-68.8 BR-33.3 W_sd-49.2 CC3-6.9 CC4-3.4 wwsf-31.6 wtsd-25.8 |
| chi1862 | SN-chi1862 V-42.5 PF-9.5 3B-1.0 F3B-33.3 SQ-7.1 H-275 DSB-26.7 DBB-56.5 3BSB-0.0 4B-16.7 F4B-0.0 5B-0.0 Foldf-50.0 RF-11.5 FT-0.0 RT-20.0 FR-100.0 RR-0.0 Betf-50.0 BT-50.0 BR-0.0 W_sd-50.0 CC3-0.0 CC4-0.0 wwsf-36.3 wtsd-21.6 |
| bradstever99 | SN-bradstever99 V-62.1 PF-14.9 3B-0.0 F3B-25.0 SQ-0.0 H-87 DSB-66.7 DBB-50.0 3BSB-0.0 4B-25.0 F4B-0.0 5B-0.0 Foldf-70.0 RF-0.0 FT-33.3 RT-0.0 FR-100.0 RR-0.0 Betf-25.0 BT-50.0 BR-100.0 W_sd-25.0 CC3-0.0 CC4-0.0 wwsf-31.9 wtsd-25.5 |
| harvey | SN-harvey V-38.2 PF-7.6 3B-1.7 F3B-25.0 SQ-5.4 H-524 DSB-35.7 DBB-40.0 3BSB-0.0 4B-0.0 F4B-0.0 5B-0.0 Foldf-65.9 RF-7.3 FT-40.0 RT-20.0 FR-100.0 RR-0.0 Betf-56.4 BT-50.0 BR-0.0 W_sd-56.4 CC3-11.1 CC4-0.0 wwsf-35.1 wtsd-27.2 |
| sevilla21 | SN-sevilla21 V-39.7 PF-14.0 3B-4.3 F3B-50.0 SQ-4.2 H-378 DSB-60.0 DBB-57.1 3BSB-6.7 4B-0.0 F4B-0.0 5B-0.0 Foldf-66.7 RF-0.0 FT-66.7 RT-0.0 FR-0.0 RR-0.0 Betf-36.4 BT-83.3 BR-0.0 W_sd-36.4 CC3-20.0 CC4-6.7 wwsf-35.8 wtsd-24.1 |
## insertion
xml_text(quick_notes) <- df_for_xml$player_note_vector[match(XML_player_names,
df_for_xml$my_sns)]
## write xml to disk
write_xml(note_file, "C:/Users/justin/Documents/mynote6.xml", options = "format")Final XML