Before I begin, I provide a link Data 607 Final The link provides a general explanation on why I had to access my postgres database, and also provides a breif explantaion of some of the stats that I will be using. Copy and pasted from there
Or raises/ total hands played
## Loading required package: RPostgreSQL
## Loading required package: DBI
## -- Attaching packages -------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.1 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## -- Conflicts ----------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
tryCatch({
drv <- dbDriver("PostgreSQL")
print("Connecting to database")
conn <- con
print("Connected!")
},
error=function(cond) {
print("Unable to connect to database.")
})## [1] "Connecting to database"
## [1] "Connected!"
# query 6 returns player stats but with only sn
# query 7 has id and sn
query_6 <- dbGetQuery(conn, 'SELECT * FROM players')
query_7 <- dbGetQuery(conn, 'SELECT * FROM compiledplayerresults limit 1000000')## ALl palyers
all_players <- query_6%>%
select(.,c(playername,player_id,cashhands))
player_ids <- all_players$player_id
all_player_stats <- query_7 %>%
filter(.,player_id %in% player_ids )
all_players_db<- merge(all_player_stats,all_players,by="player_id" ,all = TRUE)
#write.csv(all_players_db,"allplayers.csv")
## choosen columns
columns_for_analysis <-c('gametype_id',"player_id","totalhands","totalbbswon","totalrakeincents","totalamountwonincents","vpiphands","pfrhands","couldcoldcall","didcoldcall","couldthreebet","didthreebet","couldsqueeze","didsqueeze","facingtwopreflopraisers","calledtwopreflopraisers","raisedtwopreflopraisers","smallblindstealattempted","smallblindstealdefended","smallblindstealreraised","bigblindstealattempted","bigblindstealdefended","bigblindstealreraised","facedthreebetpreflop","foldedtothreebetpreflop","calledthreebetpreflop","raisedthreebetpreflop","facedfourbetpreflop","foldedtofourbetpreflop","calledfourbetpreflop","raisedfourbetpreflop","sawflop","wonhandwhensawflop","sawshowdown","wonshowdown","flopcontinuationbetpossible","flopcontinuationbetmade","turncontinuationbetpossible","turncontinuationbetmade","rivercontinuationbetpossible","rivercontinuationbetmade","facingflopcontinuationbet","foldedtoflopcontinuationbet","calledflopcontinuationbet","raisedflopcontinuationbet","facingturncontinuationbet","foldedtoturncontinuationbet","calledturncontinuationbet","raisedturncontinuationbet","facingrivercontinuationbet","foldedtorivercontinuationbet","calledrivercontinuationbet","raisedrivercontinuationbet","playername","cashhands")
## create function to round and sum vectors
#vector x, vector y, round integer z
trans_func <- function(x,y,z){
round(sum(x)/sum(y)*100,z)
}predictors <- c("vpip", "pfr", "threebet", "bb_per_100", "wwsf")
indexes <- c((1:31),65)
#all_players_db$totalbbswon <- all_players_db$totalbbswon*100
all_players_finished_db <- all_players_db %>%
filter(.,gametype_id %in% indexes ) %>%
select(c(columns_for_analysis)) %>%
group_by(playername) %>%
summarize(
vpip= round(sum(vpiphands)/sum(totalhands)*100,1),
pfr=round(sum(pfrhands)/sum(totalhands)*100,1),
total_hands = sum(totalhands),
money_won = sum(totalamountwonincents),
rake = sum(totalrakeincents),
threebet = round(sum(didthreebet)/sum(couldthreebet)*100,1),
bb_per_100 = round(sum(totalbbswon)/(sum(totalhands)),2),
total_rake_100 = round((sum(totalrakeincents)/100)/sum(total_hands),2),
money_won_100 = round((sum(totalamountwonincents)/100)/(sum(total_hands)/100)/100,2),
wwsf = round(sum(wonhandwhensawflop)/sum(sawflop)*100,1))
#all_players_finished_db <- all_players_finished_db %>%
# select(.,c(predictors))
# Print out of my statistics
all_players_finished_db %>%
filter(playername=="RileyFreeman")## # A tibble: 1 x 11
## playe~ vpip pfr total_~ money~ rake thre~ bb_p~ total~ money~ wwsf
## <chr> <dbl> <dbl> <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Riley~ 22.4 17.4 519686 2.02e6 4.39e6 6.00 5.52 0.0800 0.0400 42.7
## vpip pfr threebet
## breaks Numeric,21 Numeric,21 Numeric,21
## counts Integer,20 Integer,20 Integer,20
## density Numeric,20 Numeric,20 Numeric,20
## mids Numeric,20 Numeric,20 Numeric,20
## xname "dots[[1L]][[1L]]" "dots[[1L]][[2L]]" "dots[[1L]][[3L]]"
## equidist TRUE TRUE TRUE
## bb_per_100 wwsf
## breaks Numeric,11 Numeric,21
## counts Integer,10 Integer,20
## density Numeric,10 Numeric,20
## mids Numeric,10 Numeric,20
## xname "dots[[1L]][[4L]]" "dots[[1L]][[5L]]"
## equidist TRUE TRUE
## vpip pfr threebet
## breaks Numeric,21 Numeric,16 Numeric,13
## counts Integer,20 Integer,15 Integer,12
## density Numeric,20 Numeric,15 Numeric,12
## mids Numeric,20 Numeric,15 Numeric,12
## xname "dots[[1L]][[1L]]" "dots[[1L]][[2L]]" "dots[[1L]][[3L]]"
## equidist TRUE TRUE TRUE
## bb_per_100 wwsf
## breaks Numeric,22 Numeric,21
## counts Integer,21 Integer,20
## density Numeric,21 Numeric,20
## mids Numeric,21 Numeric,20
## xname "dots[[1L]][[4L]]" "dots[[1L]][[5L]]"
## equidist TRUE TRUE
## vpip pfr threebet
## breaks Integer,20 Numeric,16 Numeric,12
## counts Integer,19 Integer,15 Integer,11
## density Numeric,19 Numeric,15 Numeric,11
## mids Numeric,19 Numeric,15 Numeric,11
## xname "dots[[1L]][[1L]]" "dots[[1L]][[2L]]" "dots[[1L]][[3L]]"
## equidist TRUE TRUE TRUE
## bb_per_100 wwsf
## breaks Numeric,23 Numeric,21
## counts Integer,22 Integer,20
## density Numeric,22 Numeric,20
## mids Numeric,22 Numeric,20
## xname "dots[[1L]][[4L]]" "dots[[1L]][[5L]]"
## equidist TRUE TRUE
## vpip pfr threebet
## breaks Integer,18 Numeric,13 Numeric,9
## counts Integer,17 Integer,12 Integer,8
## density Numeric,17 Numeric,12 Numeric,8
## mids Numeric,17 Numeric,12 Numeric,8
## xname "dots[[1L]][[1L]]" "dots[[1L]][[2L]]" "dots[[1L]][[3L]]"
## equidist TRUE TRUE TRUE
## bb_per_100 wwsf
## breaks Numeric,18 Integer,9
## counts Integer,17 Integer,8
## density Numeric,17 Numeric,8
## mids Numeric,17 Numeric,8
## xname "dots[[1L]][[4L]]" "dots[[1L]][[5L]]"
## equidist TRUE TRUE
## vpip pfr threebet
## breaks Integer,17 Numeric,10 Numeric,12
## counts Integer,16 Integer,9 Integer,11
## density Numeric,16 Numeric,9 Numeric,11
## mids Numeric,16 Numeric,9 Numeric,11
## xname "dots[[1L]][[1L]]" "dots[[1L]][[2L]]" "dots[[1L]][[3L]]"
## equidist TRUE TRUE TRUE
## bb_per_100 wwsf
## breaks Numeric,13 Integer,15
## counts Integer,12 Integer,14
## density Numeric,12 Numeric,14
## mids Numeric,12 Numeric,14
## xname "dots[[1L]][[4L]]" "dots[[1L]][[5L]]"
## equidist TRUE TRUE
under_50_hands <- all_players_finished_db %>%
filter(.,total_hands<50)
par(mfrow=c(2, 3))
mapply(hist,under_50_hands[,predictors],main=colnames(under_50_hands[,predictors]),xlab=" Under50 hands")## vpip pfr threebet
## breaks Numeric,21 Numeric,21 Numeric,21
## counts Integer,20 Integer,20 Integer,20
## density Numeric,20 Numeric,20 Numeric,20
## mids Numeric,20 Numeric,20 Numeric,20
## xname "dots[[1L]][[1L]]" "dots[[1L]][[2L]]" "dots[[1L]][[3L]]"
## equidist TRUE TRUE TRUE
## bb_per_100 wwsf
## breaks Numeric,11 Numeric,21
## counts Integer,10 Integer,20
## density Numeric,10 Numeric,20
## mids Numeric,10 Numeric,20
## xname "dots[[1L]][[4L]]" "dots[[1L]][[5L]]"
## equidist TRUE TRUE
mtext("Figure 3", SOUTH<-1, line=3, adj=3.0,
col="blue")wwsf_50 <- all_players_finished_db %>%
filter(.,total_hands<50) %>%
select(wwsf) %>%
filter(.,wwsf==50)
wwsf_0 <- all_players_finished_db %>%
filter(.,total_hands<50) %>%
select(wwsf) %>%
filter(.,wwsf==0)
wwsf_100 <- all_players_finished_db %>%
filter(.,total_hands<50) %>%
select(wwsf) %>%
filter(.,wwsf==100)
paste("wwsf of 0 = ",count(wwsf_0),"wwsf of 50 = ",count(wwsf_50)," wwsf of 100 =",count(wwsf_100),"total occurences = 23183" )## [1] "wwsf of 0 = 4465 wwsf of 50 = 2622 wwsf of 100 = 1898 total occurences = 23183"
kable(paste("these 3 values(0,50,100) make up ", round((4462+2618+1897)/23183,2),"% of total occurences"))| x |
|---|
| these 3 values(0,50,100) make up 0.39 % of total occurences |
## Warning: Setting row names on a tibble is deprecated.
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| vpip | 1 | 15363 | 39.68 | 14.44 | 38.20 | 38.80 | 14.68 | 1.5 | 100.0 | 98.5 | 0.57 | 0.22 | 0.12 |
| pfr | 2 | 15363 | 13.43 | 7.75 | 12.70 | 12.92 | 7.71 | 0.0 | 73.4 | 73.4 | 0.89 | 2.04 | 0.06 |
| threebet | 3 | 15362 | 4.84 | 4.74 | 3.90 | 4.15 | 4.00 | 0.0 | 60.0 | 60.0 | 2.09 | 8.70 | 0.04 |
| bb_per_100 | 4 | 15363 | -29.87 | 94.78 | -22.21 | -26.55 | 61.56 | -1199.8 | 837.0 | 2036.8 | -0.62 | 6.95 | 0.76 |
| wwsf | 5 | 15335 | 40.49 | 8.86 | 40.30 | 40.39 | 7.41 | 0.0 | 100.0 | 100.0 | 0.23 | 2.30 | 0.07 |
## Warning: Setting row names on a tibble is deprecated.
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| vpip | 1 | 10635 | 39.93 | 13.66 | 38.50 | 39.06 | 14.08 | 7.40 | 100.00 | 92.60 | 0.59 | 0.17 | 0.13 |
| pfr | 2 | 10635 | 13.59 | 7.42 | 13.00 | 13.16 | 7.26 | 0.00 | 73.40 | 73.40 | 0.81 | 1.90 | 0.07 |
| threebet | 3 | 10634 | 4.85 | 4.11 | 4.00 | 4.35 | 3.26 | 0.00 | 52.20 | 52.20 | 2.03 | 9.07 | 0.04 |
| bb_per_100 | 4 | 10635 | -27.89 | 71.41 | -20.77 | -24.72 | 50.01 | -611.37 | 431.61 | 1042.98 | -0.70 | 4.55 | 0.69 |
| wwsf | 5 | 10631 | 40.67 | 6.95 | 40.50 | 40.57 | 6.08 | 0.00 | 100.00 | 100.00 | 0.25 | 1.84 | 0.07 |
## Warning: Setting row names on a tibble is deprecated.
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| vpip | 1 | 4007 | 37.98 | 12.84 | 36.40 | 37.03 | 13.49 | 8.20 | 89.20 | 81.00 | 0.67 | 0.23 | 0.20 |
| pfr | 2 | 4007 | 13.89 | 6.62 | 13.70 | 13.64 | 6.52 | 0.10 | 58.30 | 58.20 | 0.53 | 1.04 | 0.10 |
| threebet | 3 | 4007 | 4.79 | 3.18 | 4.30 | 4.47 | 2.97 | 0.00 | 39.50 | 39.50 | 1.59 | 6.79 | 0.05 |
| bb_per_100 | 4 | 4007 | -19.07 | 40.07 | -13.56 | -16.51 | 29.74 | -611.37 | 164.67 | 776.04 | -1.59 | 14.37 | 0.63 |
| wwsf | 5 | 4007 | 40.74 | 4.85 | 40.60 | 40.66 | 4.60 | 25.40 | 63.00 | 37.60 | 0.18 | 0.29 | 0.08 |
## Warning: Setting row names on a tibble is deprecated.
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| vpip | 1 | 2348 | 36.96 | 12.43 | 35.30 | 36.05 | 13.49 | 8.20 | 85.10 | 76.90 | 0.66 | 0.14 | 0.26 |
| pfr | 2 | 2348 | 14.21 | 6.44 | 14.20 | 14.02 | 6.38 | 0.10 | 43.30 | 43.20 | 0.35 | 0.35 | 0.13 |
| threebet | 3 | 2348 | 4.85 | 2.93 | 4.40 | 4.57 | 2.82 | 0.00 | 21.10 | 21.10 | 1.10 | 2.01 | 0.06 |
| bb_per_100 | 4 | 2348 | -16.57 | 31.50 | -11.35 | -13.75 | 24.33 | -254.63 | 96.95 | 351.58 | -1.46 | 5.52 | 0.65 |
| wwsf | 5 | 2348 | 40.81 | 4.43 | 40.80 | 40.76 | 4.30 | 25.50 | 58.10 | 32.60 | 0.12 | 0.22 | 0.09 |
## Warning: Setting row names on a tibble is deprecated.
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| vpip | 1 | 1080 | 35.07 | 11.67 | 32.90 | 34.11 | 11.71 | 8.20 | 81.60 | 73.40 | 0.73 | 0.21 | 0.36 |
| pfr | 2 | 1080 | 14.63 | 6.20 | 14.80 | 14.60 | 5.93 | 0.10 | 42.60 | 42.50 | 0.20 | 0.49 | 0.19 |
| threebet | 3 | 1080 | 4.96 | 2.75 | 4.60 | 4.75 | 2.82 | 0.10 | 21.10 | 21.00 | 1.00 | 2.27 | 0.08 |
| bb_per_100 | 4 | 1080 | -12.79 | 23.27 | -8.34 | -10.23 | 18.21 | -178.16 | 44.62 | 222.78 | -1.58 | 5.25 | 0.71 |
| wwsf | 5 | 1080 | 40.81 | 4.06 | 40.90 | 40.76 | 4.00 | 26.90 | 52.40 | 25.50 | 0.07 | 0.11 | 0.12 |
par(mfrow=c(2, 3))
mapply(qqnorm,over_2500_hands[,predictors],main=colnames(over_50_hands[,predictors]),xlab="Figure 9 -over50 hands") ## vpip pfr threebet bb_per_100 wwsf
## x Numeric,1080 Numeric,1080 Numeric,1080 Numeric,1080 Numeric,1080
## y Numeric,1080 Numeric,1080 Numeric,1080 Numeric,1080 Numeric,1080
par(mfrow=c(2, 3)) mapply(qqnorm,over_100_hands[,predictors],main=colnames(over_100_hands[,predictors]),xlab="Figure 10 - Over100 hands") ## vpip pfr threebet bb_per_100 wwsf
## x Numeric,10635 Numeric,10635 Numeric,10635 Numeric,10635 Numeric,10635
## y Numeric,10635 Numeric,10635 Numeric,10635 Numeric,10635 Numeric,10635
par(mfrow=c(2, 3)) mapply(qqnorm,over_500_hands[,predictors],main=colnames(over_2500_hands[,predictors]),xlab="Figure 11 - Over500 hands") ## vpip pfr threebet bb_per_100 wwsf
## x Numeric,4007 Numeric,4007 Numeric,4007 Numeric,4007 Numeric,4007
## y Numeric,4007 Numeric,4007 Numeric,4007 Numeric,4007 Numeric,4007
par(mfrow=c(2, 3)) mapply(qqnorm,over_500_hands[,predictors],main=colnames(over_1000_hands[,predictors]),xlab="Figure 12 - Over1000 hands") ## vpip pfr threebet bb_per_100 wwsf
## x Numeric,4007 Numeric,4007 Numeric,4007 Numeric,4007 Numeric,4007
## y Numeric,4007 Numeric,4007 Numeric,4007 Numeric,4007 Numeric,4007
par(mfrow=c(2, 3)) mapply(qqnorm,over_2500_hands[,predictors],main=colnames(over_2500_hands[,predictors]),xlab="Figure 13 - Over2500 hands") ## vpip pfr threebet bb_per_100 wwsf
## x Numeric,1080 Numeric,1080 Numeric,1080 Numeric,1080 Numeric,1080
## y Numeric,1080 Numeric,1080 Numeric,1080 Numeric,1080 Numeric,1080
#fivenum
# Describe function, filter for numeric columns,round, rename buit df
# describe_table <- describe(over_50_hands)
# describe_table <- describe_table[-1,]
# describe_table_2 <- as_data_frame(lapply(describe_table, function(x){
# if(is.numeric(x)) round(x, 2) else x}))
# colnames(describe_table_2) <- colnames(describe_table)
# rownames(describe_table_2) <- rownames(describe_table)
# kable(describe_table_2)over_2500_hands<- all_players_finished_db %>%
filter(.,total_hands>2500) %>%
select(.,c(predictors))
##Create numeric column
over_2500_hands <- over_2500_hands %>%
mutate(.,vpip_pfr=vpip-pfr)
## Save this vector for use later
numerical_vpip_pfr <- over_2500_hands$vpip_pfr
## create categorical factor column
over_2500_hands$vpip_pfr[over_2500_hands$vpip_pfr<15.001] <- 1
over_2500_hands$vpip_pfr[over_2500_hands$vpip_pfr>15.001] <- 0
my_vector <- str_replace(as.character(over_2500_hands$vpip_pfr),'0', "wide_gap")
my_vector <- str_replace(my_vector,'1', "narrow_gap")
over_2500_hands$vpip_pfr <-as_factor(my_vector)
## display new df summary stats
summary(over_2500_hands)## vpip pfr threebet bb_per_100
## Min. : 8.20 Min. : 0.10 Min. : 0.100 Min. :-178.160
## 1st Qu.:25.90 1st Qu.:10.60 1st Qu.: 2.800 1st Qu.: -23.305
## Median :32.90 Median :14.80 Median : 4.600 Median : -8.335
## Mean :35.07 Mean :14.63 Mean : 4.956 Mean : -12.787
## 3rd Qu.:42.50 3rd Qu.:18.52 3rd Qu.: 6.600 3rd Qu.: 2.335
## Max. :81.60 Max. :42.60 Max. :21.100 Max. : 44.620
## wwsf vpip_pfr
## Min. :26.90 wide_gap :645
## 1st Qu.:38.00 narrow_gap:435
## Median :40.90
## Mean :40.81
## 3rd Qu.:43.40
## Max. :52.40
y <- over_2500_hands$bb_per_100
# y <- y-min(y)+1
vpip_pfr <- over_2500_hands$vpip_pfr
WWSF <- over_2500_hands$wwsf
VPIP <- over_2500_hands$vpip
PFR <- over_2500_hands$pfr
THREE_BET <- over_2500_hands$threebet
fit_1 <- lm(y~vpip_pfr+VPIP+THREE_BET+WWSF+PFR)
layout(matrix(c(1,2,3,4),2,2))
plot(fit_1)summary(fit_1)##
## Call:
## lm(formula = y ~ vpip_pfr + VPIP + THREE_BET + WWSF + PFR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -119.843 -9.064 1.013 10.145 63.616
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.54655 6.24926 2.328 0.0201 *
## vpip_pfrnarrow_gap 2.79640 1.87353 1.493 0.1358
## VPIP -1.12553 0.07442 -15.125 < 2e-16 ***
## THREE_BET -2.04711 0.30375 -6.739 2.59e-11 ***
## WWSF 0.18462 0.15109 1.222 0.2220
## PFR 0.93165 0.13942 6.682 3.77e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.25 on 1074 degrees of freedom
## Multiple R-squared: 0.3881, Adjusted R-squared: 0.3853
## F-statistic: 136.3 on 5 and 1074 DF, p-value: < 2.2e-16
anova(fit_1)## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## vpip_pfr 1 118613 118613 356.2982 < 2.2e-16 ***
## VPIP 1 88708 88708 266.4668 < 2.2e-16 ***
## THREE_BET 1 2811 2811 8.4429 0.00374 **
## WWSF 1 1802 1802 5.4127 0.02018 *
## PFR 1 14865 14865 44.6515 3.773e-11 ***
## Residuals 1074 357539 333
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit_2 <- lm(y~vpip_pfr+VPIP+THREE_BET+PFR)
layout(matrix(c(1,2,3,4),2,2))
plot(fit_2)summary(fit_2)##
## Call:
## lm(formula = y ~ vpip_pfr + VPIP + THREE_BET + PFR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -119.113 -9.040 1.161 10.255 63.411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.36649 2.81137 7.600 6.43e-14 ***
## vpip_pfrnarrow_gap 2.71664 1.87283 1.451 0.147
## VPIP -1.12511 0.07443 -15.116 < 2e-16 ***
## THREE_BET -1.98040 0.29887 -6.626 5.43e-11 ***
## PFR 0.95904 0.13764 6.968 5.61e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.25 on 1075 degrees of freedom
## Multiple R-squared: 0.3873, Adjusted R-squared: 0.385
## F-statistic: 169.9 on 4 and 1075 DF, p-value: < 2.2e-16
anova(fit_2)## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## vpip_pfr 1 118613 118613 356.135 < 2.2e-16 ***
## VPIP 1 88708 88708 266.345 < 2.2e-16 ***
## THREE_BET 1 2811 2811 8.439 0.003748 **
## PFR 1 16170 16170 48.549 5.606e-12 ***
## Residuals 1075 358036 333
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## add vpip-pfr numerical vector
corr_plot_db <- as_data_frame(cbind(over_2500_hands,numerical_vpip_pfr))
##plot correlations
library(corrplot)## Warning: package 'corrplot' was built under R version 3.4.4
## corrplot 0.84 loaded
corrplot(cor(corr_plot_db[,-6]))fit_3 <- lm(y~vpip_pfr+THREE_BET+PFR)
layout(matrix(c(1,2,3,4),2,2))
plot(fit_3)my_fit <- summary(fit_3)
anova(fit_3)## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## vpip_pfr 1 118613 118613 293.9808 <2e-16 ***
## THREE_BET 1 29057 29057 72.0166 <2e-16 ***
## PFR 1 2531 2531 6.2735 0.0124 *
## Residuals 1076 434137 403
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Setting row names on a tibble is deprecated.
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| vpip | 1 | 1080 | 35.07 | 11.67 | 32.90 | 34.11 | 11.71 | 8.20 | 81.60 | 73.40 | 0.73 | 0.21 | 0.36 |
| pfr | 2 | 1080 | 14.63 | 6.20 | 14.80 | 14.60 | 5.93 | 0.10 | 42.60 | 42.50 | 0.20 | 0.49 | 0.19 |
| threebet | 3 | 1080 | 4.96 | 2.75 | 4.60 | 4.75 | 2.82 | 0.10 | 21.10 | 21.00 | 1.00 | 2.27 | 0.08 |
| bb_per_100 | 4 | 1080 | -12.79 | 23.27 | -8.34 | -10.23 | 18.21 | -178.16 | 44.62 | 222.78 | -1.58 | 5.25 | 0.71 |
| wwsf | 5 | 1080 | 40.81 | 4.06 | 40.90 | 40.76 | 4.00 | 26.90 | 52.40 | 25.50 | 0.07 | 0.11 | 0.12 |
my_stats <- all_players_finished_db %>%
filter(playername=="RileyFreeman") %>%
select(.,predictors)
for_comparison <- fit_3$coefficients[1]+fit_3$coefficients[2]+my_stats$pfr*(fit_3$coefficients[4])+my_stats$threebet*(fit_3$coefficients[3])
paste("my actual win rate is", my_stats$bb_per_100,"model predicts ",for_comparison)## [1] "my actual win rate is 5.52 model predicts 0.119438972916067"
##
## Call:
## lm(formula = y ~ vpip_pfr + THREE_BET_2 + PFR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -143.295 -9.489 2.207 11.539 69.712
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -20.57131 1.61279 -12.755 < 2e-16 ***
## vpip_pfrnarrow_gap 23.06659 1.29429 17.822 < 2e-16 ***
## THREE_BET_2 -0.20567 0.02058 -9.994 < 2e-16 ***
## PFR 0.34878 0.12859 2.712 0.00679 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.75 on 1076 degrees of freedom
## Multiple R-squared: 0.2814, Adjusted R-squared: 0.2794
## F-statistic: 140.4 on 3 and 1076 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## vpip_pfr 1 118613 118613 303.942 < 2.2e-16 ***
## THREE_BET_2 1 42945 42945 110.045 < 2.2e-16 ***
## PFR 1 2871 2871 7.357 0.006787 **
## Residuals 1076 419908 390
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] "my actual win rate is 5.52 model predicts 7.33004460602566"
##
## Call:
## lm(formula = y ~ vpip_pfr + THREE_BET_2 + PFR_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -142.786 -9.631 2.215 11.392 70.938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.460608 1.028844 -16.971 <2e-16 ***
## vpip_pfrnarrow_gap 23.787548 1.266364 18.784 <2e-16 ***
## THREE_BET_2 -0.185957 0.021193 -8.774 <2e-16 ***
## PFR_2 0.004231 0.004029 1.050 0.294
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.81 on 1076 degrees of freedom
## Multiple R-squared: 0.2772, Adjusted R-squared: 0.2752
## F-statistic: 137.6 on 3 and 1076 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## vpip_pfr 1 118613 118613 302.1874 <2e-16 ***
## THREE_BET_2 1 42945 42945 109.4099 <2e-16 ***
## PFR_2 1 433 433 1.1026 0.2939
## Residuals 1076 422347 393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] "my actual win rate is 5.52 model predicts 5.28481828673124"
##Create test/train
set.seed(10)
train.idx <- sample(nrow(over_2500_hands),.7*nrow(over_2500_hands),replace = FALSE)
test.idx <- (1:nrow(over_2500_hands))[-train.idx]
## Extract target vector and rest of DF for LM
lm_target <-over_2500_hands[,"bb_per_100"]
lm_inputs <- over_2500_hands[,c('pfr','threebet',"vpip_pfr")]
train_df <- over_2500_hands[train.idx,c('pfr','threebet',"vpip_pfr","bb_per_100")]
test_df <- over_2500_hands[test.idx,c('pfr','threebet',"vpip_pfr","bb_per_100")]
THREE_BET_2 <- THREE_BET**2
PFR_2 <- PFR**2
fit_6 <- lm(bb_per_100~vpip_pfr+(threebet**2)+(pfr**2),data=train_df)
fit_6 <- predict(fit_6,test_df)
actual <- over_2500_hands[test.idx,c("bb_per_100")]
error=actual-fit_6
paste("my RMSE is",sqrt(mean(error^2)))## [1] "my RMSE is 18.6350384277145"
take hands divide by 100 thats our smaples