#### Poker Data - Nueral Network Analysis --------------
##### Step 1: Collecting data -------------------
# This dataset is included on the UCI Machine Learning Repository website - http://archive.ics.uci.edu/ml/datasets/Poker+Hand
# Each record of the dataset includes a poker hand with 5 playing cards. The playing cards for each hand are drawn from a standard deck of 52.
## Step 2: Exploring and preparing the data ----
# read in data and examine structure
Poker <- read.csv("pokerdata.csv")
str(Poker)
'data.frame': 1025012 obs. of 11 variables:
$ S1 : int 1 2 3 4 4 1 1 2 3 4 ...
$ C1 : int 10 11 12 10 1 2 9 1 5 1 ...
$ S2 : int 1 2 3 4 4 1 1 2 3 4 ...
$ C2 : int 11 13 11 11 13 4 12 2 6 4 ...
$ S3 : int 1 2 3 4 4 1 1 2 3 4 ...
$ C3 : int 13 10 13 1 12 5 10 3 9 2 ...
$ S4 : int 1 2 3 4 4 1 1 2 3 4 ...
$ C4 : int 12 12 10 13 11 3 11 4 7 3 ...
$ S5 : int 1 2 3 4 4 1 1 2 3 4 ...
$ C5 : int 1 1 1 12 10 6 13 5 8 5 ...
$ CLASS: int 9 9 9 9 9 8 8 8 8 8 ...
# custom normalization function
normalize <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
# apply normalization to entire data frame
Poker_norm <- as.data.frame(lapply(Poker, normalize))
# confirm that the range is now between zero and one
summary(Poker_norm$S1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000000 0.3333333 0.6666667 0.5002309 0.6666667 1.0000000
summary(Poker_norm$CLASS)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00000000 0.00000000 0.00000000 0.06855638 0.11111110 1.00000000
# compared to the original minimum and maximum
summary(Poker$S1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000000 2.000000 3.000000 2.500693 3.000000 4.000000
summary(Poker$CLASS)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000000 0.0000000 0.0000000 0.6170074 1.0000000 9.0000000
# create training and test data
poker_train <- Poker_norm[1:7512, ]
poker_test <- Poker_norm[7513:10013, ]
## Step 3: Training a model on the data ----
# train the neuralnet model
library(neuralnet)
# simple ANN with only a single hidden neuron
## This will test how the different cards affect the outcome of the poker hand (CLASS) - Modeling the Strength of the Poker hand.
poker_model <- neuralnet(CLASS ~ S1 +
C1 + S2 + C2 +
S3 + C3 + S4 + C4 + S5 + C5,
data = poker_train)
# visualize the network topology
plot(poker_model)
## Step 4: Evaluating model performance ----
# obtain model results
model_results <- compute(poker_model, poker_test[1:10])
# obtain predicted strength values
predicted_CLASS <- model_results$net.result
# examine the correlation between predicted and actual values
cor(predicted_CLASS, poker_test$CLASS)
[,1]
[1,] 0.04677776188
## Step 5: Improving model performance ----
# a more complex neural network topology with 4 hidden neurons
# to guarantee repeatable results
#hidden = 4
#set.seed(12345)
#poker_model2 <- neuralnet(CLASS ~ S1 +
# C1 + S2 + C2 +
# S3 + C3 + S4 + C4 + S5 + C5,
# data = poker_train, hidden = 4, stepmax=1e6)
#poker_model2
#plot(poker_model2)
#Error value too large
#hidden = 10
#set.seed(12345)
#poker_model3 <- neuralnet(CLASS ~ S1 +
# C1 + S2 + C2 +
# S3 + C3 + S4 + C4 + S5 + C5,
# data = poker_train, hidden = 10, stepmax=1e6)
#poker_model3
#Code failed to run generating an error output
#hidden = 6
set.seed(12345)
poker_model5 <- neuralnet(CLASS ~ S1 +
C1 + S2 + C2 +
S3 + C3 + S4 + C4 + S5 + C5,
data = poker_train, hidden = 6, stepmax=1e6)
plot(poker_model5)
# evaluate the results as we did before
model_results2 <- compute(poker_model5, poker_test[1:10])
predicted_class2 <- model_results2$net.result
cor(predicted_class2, poker_test$CLASS)
[,1]
[1,] 0.2800660736
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdA0KICB3b3JkX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCg0KYGBge3Igc2V0dXAsIGluY2x1ZGU9VFJVRX0NCg0KIyMjIyBQb2tlciBEYXRhIC0gTnVlcmFsIE5ldHdvcmsgQW5hbHlzaXMgLS0tLS0tLS0tLS0tLS0NCg0KDQojIyMjIyBTdGVwIDE6IENvbGxlY3RpbmcgZGF0YSAtLS0tLS0tLS0tLS0tLS0tLS0tDQojIFRoaXMgZGF0YXNldCBpcyBpbmNsdWRlZCBvbiB0aGUgVUNJIE1hY2hpbmUgTGVhcm5pbmcgUmVwb3NpdG9yeSB3ZWJzaXRlIC0gaHR0cDovL2FyY2hpdmUuaWNzLnVjaS5lZHUvbWwvZGF0YXNldHMvUG9rZXIrSGFuZCANCg0KIyBFYWNoIHJlY29yZCBvZiB0aGUgZGF0YXNldCBpbmNsdWRlcyBhIHBva2VyIGhhbmQgd2l0aCA1IHBsYXlpbmcgY2FyZHMuICBUaGUgcGxheWluZyBjYXJkcyBmb3IgZWFjaCBoYW5kIGFyZSAgIGRyYXduIGZyb20gYSBzdGFuZGFyZCBkZWNrIG9mIDUyLiANCg0KDQojIyBTdGVwIDI6IEV4cGxvcmluZyBhbmQgcHJlcGFyaW5nIHRoZSBkYXRhIC0tLS0NCiMgcmVhZCBpbiBkYXRhIGFuZCBleGFtaW5lIHN0cnVjdHVyZQ0KDQoNClBva2VyIDwtIHJlYWQuY3N2KCJwb2tlcmRhdGEuY3N2IikNCnN0cihQb2tlcikNCg0KIyBjdXN0b20gbm9ybWFsaXphdGlvbiBmdW5jdGlvbg0Kbm9ybWFsaXplIDwtIGZ1bmN0aW9uKHgpIHsgDQogIHJldHVybigoeCAtIG1pbih4KSkgLyAobWF4KHgpIC0gbWluKHgpKSkNCn0NCg0KIyBhcHBseSBub3JtYWxpemF0aW9uIHRvIGVudGlyZSBkYXRhIGZyYW1lDQpQb2tlcl9ub3JtIDwtIGFzLmRhdGEuZnJhbWUobGFwcGx5KFBva2VyLCBub3JtYWxpemUpKQ0KDQojIGNvbmZpcm0gdGhhdCB0aGUgcmFuZ2UgaXMgbm93IGJldHdlZW4gemVybyBhbmQgb25lDQpzdW1tYXJ5KFBva2VyX25vcm0kUzEpDQoNCnN1bW1hcnkoUG9rZXJfbm9ybSRDTEFTUykNCg0KIyBjb21wYXJlZCB0byB0aGUgb3JpZ2luYWwgbWluaW11bSBhbmQgbWF4aW11bQ0Kc3VtbWFyeShQb2tlciRTMSkNCg0Kc3VtbWFyeShQb2tlciRDTEFTUykNCg0KIyBjcmVhdGUgdHJhaW5pbmcgYW5kIHRlc3QgZGF0YQ0KcG9rZXJfdHJhaW4gPC0gUG9rZXJfbm9ybVsxOjc1MTIsIF0NCnBva2VyX3Rlc3QgPC0gUG9rZXJfbm9ybVs3NTEzOjEwMDEzLCBdDQoNCiMjIFN0ZXAgMzogVHJhaW5pbmcgYSBtb2RlbCBvbiB0aGUgZGF0YSAtLS0tDQojIHRyYWluIHRoZSBuZXVyYWxuZXQgbW9kZWwNCmxpYnJhcnkobmV1cmFsbmV0KQ0KDQojIHNpbXBsZSBBTk4gd2l0aCBvbmx5IGEgc2luZ2xlIGhpZGRlbiBuZXVyb24NCiMjIFRoaXMgd2lsbCB0ZXN0IGhvdyB0aGUgZGlmZmVyZW50IGNhcmRzIGFmZmVjdCB0aGUgb3V0Y29tZSBvZiB0aGUgcG9rZXIgaGFuZCAoQ0xBU1MpIC0gTW9kZWxpbmcgdGhlIFN0cmVuZ3RoIG9mIHRoZSBQb2tlciBoYW5kLg0KDQpwb2tlcl9tb2RlbCA8LSBuZXVyYWxuZXQoQ0xBU1MgfiBTMSArDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBDMSArIFMyICsgQzIgKyANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFMzICsgQzMgKyBTNCArIEM0ICsgUzUgKyBDNSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gcG9rZXJfdHJhaW4pDQoNCiMgdmlzdWFsaXplIHRoZSBuZXR3b3JrIHRvcG9sb2d5DQpwbG90KHBva2VyX21vZGVsKQ0KDQoNCiMjIFN0ZXAgNDogRXZhbHVhdGluZyBtb2RlbCBwZXJmb3JtYW5jZSAtLS0tDQojIG9idGFpbiBtb2RlbCByZXN1bHRzDQptb2RlbF9yZXN1bHRzIDwtIGNvbXB1dGUocG9rZXJfbW9kZWwsIHBva2VyX3Rlc3RbMToxMF0pDQojIG9idGFpbiBwcmVkaWN0ZWQgc3RyZW5ndGggdmFsdWVzDQpwcmVkaWN0ZWRfQ0xBU1MgPC0gbW9kZWxfcmVzdWx0cyRuZXQucmVzdWx0DQojIGV4YW1pbmUgdGhlIGNvcnJlbGF0aW9uIGJldHdlZW4gcHJlZGljdGVkIGFuZCBhY3R1YWwgdmFsdWVzDQpjb3IocHJlZGljdGVkX0NMQVNTLCBwb2tlcl90ZXN0JENMQVNTKSAgDQoNCg0KIyMgU3RlcCA1OiBJbXByb3ZpbmcgbW9kZWwgcGVyZm9ybWFuY2UgLS0tLQ0KIyBhIG1vcmUgY29tcGxleCBuZXVyYWwgbmV0d29yayB0b3BvbG9neSB3aXRoIDQgaGlkZGVuIG5ldXJvbnMNCiMgdG8gZ3VhcmFudGVlIHJlcGVhdGFibGUgcmVzdWx0cw0KDQojaGlkZGVuID0gNA0KI3NldC5zZWVkKDEyMzQ1KSANCiNwb2tlcl9tb2RlbDIgPC0gbmV1cmFsbmV0KENMQVNTIH4gUzEgKw0KIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBDMSArIFMyICsgQzIgKyANCiMgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgUzMgKyBDMyArIFM0ICsgQzQgKyBTNSArIEM1LA0KIyAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gcG9rZXJfdHJhaW4sIGhpZGRlbiA9IDQsIHN0ZXBtYXg9MWU2KQ0KI3Bva2VyX21vZGVsMg0KI3Bsb3QocG9rZXJfbW9kZWwyKQ0KI0Vycm9yIHZhbHVlIHRvbyBsYXJnZQ0KDQojaGlkZGVuID0gMTANCiNzZXQuc2VlZCgxMjM0NSkgDQojcG9rZXJfbW9kZWwzIDwtIG5ldXJhbG5ldChDTEFTUyB+IFMxICsNCiMgICAgICAgICAgICAgICAgICAgICAgICAgICAgQzEgKyBTMiArIEMyICsgDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgIFMzICsgQzMgKyBTNCArIEM0ICsgUzUgKyBDNSwNCiMgICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSBwb2tlcl90cmFpbiwgaGlkZGVuID0gMTAsIHN0ZXBtYXg9MWU2KQ0KI3Bva2VyX21vZGVsMw0KI0NvZGUgZmFpbGVkIHRvIHJ1biBnZW5lcmF0aW5nIGFuIGVycm9yIG91dHB1dA0KDQoNCiNoaWRkZW4gPSA2DQpzZXQuc2VlZCgxMjM0NSkgDQoNCnBva2VyX21vZGVsNSA8LSBuZXVyYWxuZXQoQ0xBU1MgfiBTMSArDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgQzEgKyBTMiArIEMyICsgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgUzMgKyBDMyArIFM0ICsgQzQgKyBTNSArIEM1LA0KICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gcG9rZXJfdHJhaW4sIGhpZGRlbiA9IDYsIHN0ZXBtYXg9MWU2KQ0KDQpwbG90KHBva2VyX21vZGVsNSkNCg0KDQojIGV2YWx1YXRlIHRoZSByZXN1bHRzIGFzIHdlIGRpZCBiZWZvcmUNCm1vZGVsX3Jlc3VsdHMyIDwtIGNvbXB1dGUocG9rZXJfbW9kZWw1LCBwb2tlcl90ZXN0WzE6MTBdKQ0KcHJlZGljdGVkX2NsYXNzMiA8LSBtb2RlbF9yZXN1bHRzMiRuZXQucmVzdWx0DQpjb3IocHJlZGljdGVkX2NsYXNzMiwgcG9rZXJfdGVzdCRDTEFTUykgIA0KDQoNCmBgYA0KDQoNCg==