IMDB dataset
library(keras)
library(tensorflow)
set_maxfreqword_allow = 2300
myimdb = dataset_imdb(num_words = set_maxfreqword_allow)
mydat_train = myimdb$train$x
mydat_train_Labels = myimdb$train$y
mydat_test = myimdb$test$x
mydat_test_Labels = myimdb$test$y
word_to_index = dataset_imdb_word_index()
index_to_word = names(word_to_index)
names(index_to_word ) <- word_to_index
| the | and | a | of | to | is | br | in | it | i | |
|---|---|---|---|---|---|---|---|---|---|---|
| positon | 58319 | 64305 | 37292 | 20341 | 67378 | 16730 | 57051 | 16741 | 13579 | 9680 |
| index | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
| ev | chicatillo | transacting | sics | wheelers | pipe’s | copywrite | artbox | voorhees’ | ‘l’ | |
|---|---|---|---|---|---|---|---|---|---|---|
| positon | 88557 | 88558 | 88559 | 88563 | 88569 | 88572 | 88575 | 88577 | 88580 | 88582 |
| index | 88575 | 88576 | 88577 | 88578 | 88579 | 88580 | 88581 | 88582 | 88583 | 88584 |
An example of comment review decoded by setting set_maxfreqword_allow = 2300
## ? this film was just brilliant casting location scenery story direction ? really ? the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same ? island as myself so i loved the fact there was a real connection with this film the witty ? throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly ? was amazing really ? at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little ? that played the ? of ? and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big ? for the whole film but these children are amazing and should be ? for what they have done don't you think the whole story was so lovely because it was true and was ? life after all that was ? with us all
vec_train = vectorize_sequences(mydat_train )
trainLabs = as.numeric(mydat_train_Labels)
vec_test = vectorize_sequences(mydat_test)
testLabs = as.numeric(mydat_test_Labels)
trainLabels25000 = to_categorical(as.array(trainLabs) )
testLabels25000 = to_categorical(as.array(testLabs) )
str(vec_train)
## num [1:25000, 1:2300] 1 1 1 1 1 1 1 1 1 1 ...
str(trainLabs)
## num [1:25000] 1 0 0 1 0 0 1 0 1 0 ...
str(vec_test)
## num [1:25000, 1:2300] 1 1 1 1 1 1 1 1 1 1 ...
str(testLabs)
## num [1:25000] 0 1 1 0 1 1 1 0 0 1 ...
trainLabels25000 = to_categorical(as.array(trainLabs) )
testLabels25000 = to_categorical(as.array(testLabs) )
+ Construct RNN Model
model = keras_model_sequential()
model %>%
layer_dense(units = 32, activation = "relu",
input_shape = c(set_maxfreqword_allow )) %>%
layer_dense(units = 32, activation = "relu") %>%
layer_dense(units = 2, activation = "sigmoid")
model %>%
compile(optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = c("accuracy") )
+ train 25000 samples : first run
TrainText25000 = model %>% fit(vec_train,
trainLabels25000 ,
epochs = 50,
batch_size = 512,
validation_split = 0.4)
plot(TrainText25000)
+ train 25000 samples : second run
TrainText25000 = model %>% fit(vec_train,
trainLabels25000 ,
epochs = 50,
batch_size = 512,
validation_split = 0.4)
plot(TrainText25000)
+ train 25000 samples : third run
TrainText25000 = model %>% fit(vec_train,
trainLabels25000 ,
epochs = 50,
batch_size = 512,
validation_split = 0.4)
plot(TrainText25000)
+ Prediction with train dataset
## $loss
## [1] 0.6055875
##
## $acc
## [1] 0.9388
## ActualCls
## PredictCls 0 1
## 0 11753 784
## 1 747 11716
| Prob_negative | Prob_positive | PredCls | ActuCls | ActuObjs |
|---|---|---|---|---|
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 0 | 100 | 1 | 1 | positive |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 0 | 100 | 1 | 1 | positive |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 100 | 0 | 0 | 0 | negative |
| 0 | 100 | 1 | 1 | positive |
| 0 | 100 | 1 | 1 | positive |
+ Prediction with test dataset
## $loss
## [1] 1.53563
##
## $acc
## [1] 0.84544
## ActualCls
## PredictCls 0 1
## 0 10641 2003
## 1 1859 10497
| Prob_negative | Prob_positive | PredCls | ActuCls | ActuObjs |
|---|---|---|---|---|
| 99.96 | 0.03 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 100.00 | 0.00 | 0 | 1 | positive |
| 0.00 | 100.00 | 1 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 99.55 | 0.33 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.03 | 99.99 | 1 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 99.92 | 0.09 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 100.00 | 0.00 | 0 | 1 | positive |
| 0.00 | 100.00 | 1 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 99.98 | 0.03 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 100.00 | 0.00 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 99.09 | 0.89 | 0 | 1 | positive |
| 100.00 | 0.00 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 0 | negative |
| 100.00 | 0.00 | 0 | 1 | positive |
| 100.00 | 0.00 | 0 | 0 | negative |
| 99.98 | 0.02 | 0 | 0 | negative |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 0.00 | 100.00 | 1 | 1 | positive |
| 99.58 | 0.53 | 0 | 1 | positive |
| 100.00 | 0.00 | 0 | 1 | positive |
| 99.98 | 0.01 | 0 | 1 | positive |
| 100.00 | 0.00 | 0 | 0 | negative |
| 99.98 | 0.02 | 0 | 0 | negative |