Install / Load Libraries
# Install / Load Libraries
#install.packages("remotes")
#remotes::install_github("rstudio/tensorflow")
#install.packages("tensorflow")
#install.packages("keras")
#install.packages("stringi")
#install.packages("tm")
#install.packages("tokenizers")
#install.packages("word2vec")
#install.packages("knitr")
library(word2vec)
## Warning: package 'word2vec' was built under R version 4.2.3
library(tokenizers)
## Warning: package 'tokenizers' was built under R version 4.2.3
library(tm)
## Warning: package 'tm' was built under R version 4.2.3
## Loading required package: NLP
library(stringi)
## Warning: package 'stringi' was built under R version 4.2.2
library(keras)
## Warning: package 'keras' was built under R version 4.2.3
library(tensorflow)
## Warning: package 'tensorflow' was built under R version 4.2.3
install_tensorflow(envname = "r-tensorflow")
## Virtual environment "r-tensorflow" removed.
## Using Python: C:/Users/panic/AppData/Local/Programs/Python/Python39/python.exe
## Creating virtual environment "r-tensorflow" ...
## + "C:/Users/panic/AppData/Local/Programs/Python/Python39/python.exe" -m venv "C:/Users/panic/Documents/.virtualenvs/r-tensorflow"
## Done!
## Installing packages: pip, wheel, setuptools
## + "C:/Users/panic/Documents/.virtualenvs/r-tensorflow/Scripts/python.exe" -m pip install --upgrade --no-user pip wheel setuptools
## Virtual environment 'r-tensorflow' successfully created.
## Using virtual environment "r-tensorflow" ...
## + "C:/Users/panic/Documents/.virtualenvs/r-tensorflow/Scripts/python.exe" -m pip install --upgrade --no-user "tensorflow==2.13.*"
##
## Installation complete.
library(knitr)
Create Dataframe Objects and Read in Data
# Create empty dataframes for "yelp", "imdb", and "amazon"; then read in all of the lines from their respective text files
yelp_df <- data.frame()
imdb_df <- data.frame()
amazon_df <- data.frame()
yelp_data <- readLines("yelp_labelled.txt", encoding = "UTF-8")
imdb_data <- readLines("imdb_labelled.txt", encoding = "UTF-8")
amazon_data <- readLines("amazon_cells_labelled.txt", encoding = "UTF-8")
For Loops to Extract Data and Place in the Dataframes
# For loops for splitting each line between "sentence" and "integer" and putting the values into their respective dataframes
for (line in yelp_data) {
split_data <- unlist(strsplit(line, "\t"))
yelp_df <- rbind(yelp_df, data.frame(split_data[1], as.integer(split_data[2])))
}
# Clean up for next run
line=""
split_data=""
for (line in imdb_data) {
split_data <- unlist(strsplit(line, "\t"))
imdb_df <- rbind(imdb_df, data.frame(split_data[1], as.integer(split_data[2])))
}
# Clean up for next run
line=""
split_data=""
for (line in amazon_data) {
split_data <- unlist(strsplit(line, "\t"))
amazon_df <- rbind(amazon_df, data.frame(split_data[1], as.integer(split_data[2])))
}
# Clean up as no longer needed
rm(line)
rm(split_data)
Combine the Three Dataframes and Preliminary EDA
# Combine all dataframes into a single dataframe and clean up the column names
merged_df <- rbind(yelp_df, imdb_df, amazon_df)
colnames(merged_df) <- c("Sentence", "Score")
# Count duplicate entries; Will retain if found
sum(duplicated(merged_df))
## [1] 17
# Extract the duplicate entries to visualize
duplicates <- duplicated(merged_df)
duplicate_rows <- merged_df[duplicates, ]
print(duplicate_rows)
## Sentence
## 815 I love this place.
## 817 The food was terrible.
## 844 I won't be back.
## 847 I would not recommend this place.
## 1364 Definitely worth checking out.
## 1586 Not recommended.
## 1789 10/10
## 2286 Great phone!.
## 2408 Works great.
## 2525 Works great!.
## 2544 Don't buy this product.
## 2745 If you like a loud buzzing to override all your conversations, then this phone is for you!
## 2749 Does not fit.
## 2779 This is a great deal.
## 2793 Great Phone.
## 2893 Excellent product for the price.
## 2897 Great phone.
## Score
## 815 1
## 817 0
## 844 0
## 847 0
## 1364 1
## 1586 0
## 1789 1
## 2286 1
## 2408 1
## 2525 1
## 2544 0
## 2745 0
## 2749 0
## 2779 1
## 2793 1
## 2893 1
## 2897 1
# Identification of NULL values
colSums(is.na(merged_df))
## Sentence Score
## 0 0
Loop to Clean the Sentences and Add Them to the Dataframe
# Create a vector for storing the cleaned sentences
cleaned_text <- character(length = nrow(merged_df))
# Loop for cleaning each row
for (i in 1:nrow(merged_df)) {
text <- merged_df$Sentence[i] # Get current sentence
text <- tolower(text) # Make it lowercase
text <- removeWords(text, stopwords("en")) # Remove stopwords now, as they add little to no value
text <- gsub("[^a-z ]", "", text) # Regex to only keep lowercase characters and spacing, replacing with a "" if found
text <- stri_trans_general(text, "Latin-ASCII") # Removes any non-Latin/ASCII characters (such as emojis)
text <- iconv(text, "latin1", "ASCII", sub = "") # Double-check of above: removes any non-Latin/ASCII characters (such as emojis)
text <- stripWhitespace(text) # Strips excess whitespace, excluding spaces (i.e. leading/trailing spaces)
cleaned_text[i] <- text # Store in the vector from above
}
# Add the cleaned text back to the dataframe as a new column
merged_df$Cleaned_Text <- cleaned_text
Preliminary Tokenization, Vocabulary Size, and Top 20
Tokens/Words
# Tokenize the text data
tokens <- unlist(strsplit(tolower(merged_df$Cleaned_Text), "\\s+"))
tokens <- tokens[tokens != ""] # Remove empty token(s)
# Calculate the vocabulary size
vocab_size <- length(unique(tokens))
cat("Vocabulary Size:", vocab_size, "\n")
## Vocabulary Size: 5074
# Create a frequency table and dataframe of the tokens
token_freq <- table(tokens)
token_freq_df <- data.frame(Token = names(token_freq), Frequency = as.numeric(token_freq))
# Sort the dataframe by frequency in descending order
token_freq_df <- token_freq_df[order(-token_freq_df$Frequency), ]
# Plot the top 20 tokens
top_tokens <- token_freq_df[1:20, ]
barplot(top_tokens$Frequency, names.arg = top_tokens$Token, horiz = TRUE,
main = "Top Token Frequencies", xlab = "Frequency", ylab = "", las = 1)

Calculate Sentence Length Distributions, Plot, and Find Longest
Sentence for Word Embedding Length
# Calculate sentence lengths and plot results
sentence_lengths <- sapply(strsplit(merged_df$Cleaned_Text, "\\s+"), length)
summary(sentence_lengths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 6.000 6.701 9.000 42.000
hist(sentence_lengths, main = "Distribution of Sentence Lengths", xlab = "Sentence Length")

# Find the longest sentence
index_of_longest_sentence <- which.max(sentence_lengths)
longest_sentence <- merged_df$Cleaned_Text[index_of_longest_sentence]
# Print the longest sentence and its length
cat("Longest Sentence:", longest_sentence, "\nLength of Longest Sentence:", sentence_lengths[index_of_longest_sentence], "tokens\n")
## Longest Sentence: masterful piece filmmaking many themes simmering occasionally boiling warts study poets bohemian selfindulgent wartime years span aerial bombardments london outward tranquillity welsh coastal retreat borderlines friendship lust love dedication art experience versus practical concerns jealousy rivalry cowardice egotism versus heroism selfsacrifice
## Length of Longest Sentence: 42 tokens
# Proposed word embedding length
embedding_dim <- as.integer(round(sqrt(sqrt(vocab_size)), 0)) # Take the fourth root of the vocab size, which is the squared root of the square root
max_sequence_length <- max(sentence_lengths) # Get the actual max sequence length within the vocab, which will determine the max dimension we can use
cat("Estimated Word Embedding Length:", embedding_dim, "\nProposed Word Embedding Length:", max_sequence_length, "\n")
## Estimated Word Embedding Length: 8
## Proposed Word Embedding Length: 42
Tokenize and Pad Sequences
# Tokenize the cleaned sentences
tokenizer <- text_tokenizer(num_words = vocab_size)
tokenizer$fit_on_texts(merged_df$Cleaned_Text)
sequences <- texts_to_sequences(tokenizer, merged_df$Cleaned_Text)
word_index <- tokenizer$word_index
# Pad sequences to the maximum sequence length
padded_sequences <- pad_sequences(sequences, maxlen = sentence_lengths[index_of_longest_sentence], padding = "post", truncating = "post")
Displaying a Randomly-Selected Sequence and Rebuilding It From the
Word Index
# Randomly selecting a sequence to display
k <- 8
sequences[k]
## [[1]]
## [1] 933 8 2057 258 26 2058 11 318 2059
padded_sequences[k, ]
## [1] 933 8 2057 258 26 2058 11 318 2059 0 0 0 0 0 0
## [16] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [31] 0 0 0 0 0 0 0 0 0 0 0 0
# Loop through the word_index from the tokenizer to find the matching word to the integer in the sequence above
for(i in sequences[k]){
print(word_index[i])
}
## $potatoes
## [1] 933
##
## $like
## [1] 8
##
## $rubber
## [1] 2057
##
## $tell
## [1] 258
##
## $made
## [1] 26
##
## $ahead
## [1] 2058
##
## $time
## [1] 11
##
## $kept
## [1] 318
##
## $warmer
## [1] 2059
# Print the corresponding sentence in the cleaned dataframe column to double-check/confirm
merged_df$Cleaned_Text[k]
## [1] " potatoes like rubber tell made ahead time kept warmer"
Build the Model and Its Layers, then Display Summary
# Create a Keras sequential model
model <- keras_model_sequential()
# Add layers (input and hidden layers)
model %>%
layer_dense(units = 50, activation = "relu", input_shape = c(max_sequence_length)) %>%
layer_dense(units = 25, activation = "relu")
# Add final dense layer with selected activation function
model %>% layer_dense(units = 1, activation = "sigmoid")
# Compile the model
model %>% compile(
loss = "binary_crossentropy",
optimizer = optimizer_adam(),
metrics = c("accuracy")
)
# Display model summary
summary(model)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## dense_1 (Dense) (None, 50) 2150
## dense (Dense) (None, 25) 1275
## dense_2 (Dense) (None, 1) 26
## ================================================================================
## Total params: 3451 (13.48 KB)
## Trainable params: 3451 (13.48 KB)
## Non-trainable params: 0 (0.00 Byte)
## ________________________________________________________________________________
Split Data (50% Training, 25% Test, and 25% Validation)
# Set seed for reproducibility
set.seed(123)
# Vector of row indices
n <- nrow(merged_df)
indices <- sample(1:n)
# Obtain size of each: Training = 50%, Testing = 25%, Validation = 25%
train_size <- floor(0.5 * n)
test_size <- floor(0.25 * n)
# Split the data into training, test, and validation sets
training_indices <- indices[1:train_size]
test_indices <- indices[(train_size + 1):(train_size + test_size)]
validation_indices <- indices[(train_size + test_size + 1):n]
training_data <- merged_df[training_indices, ]
test_data <- merged_df[test_indices, ]
validation_data <- merged_df[validation_indices, ]
# Output to csv
write.csv(training_data, "task2_train.csv")
write.csv(test_data, "task2_test.csv")
write.csv(validation_data, "task2_validation.csv")
Add Stopping Criteria to Model, Train the Model, and Confirm Results
in Test and Validation
# Stopping criteria (validation Loss)
callback <- callback_early_stopping(
monitor = "val_loss",
patience = 10,
restore_best_weights = TRUE
)
# Train the model on the training data, with the validation data as well
history <- model %>% fit(
x = padded_sequences[training_indices, ],
y = training_data$Score,
epochs = 100,
validation_data = list(padded_sequences[validation_indices, ], validation_data$Score),
callbacks = list(callback)
)
## Epoch 1/100
## 47/47 - 1s - loss: 52.3194 - accuracy: 0.5060 - val_loss: 34.8181 - val_accuracy: 0.5040 - 767ms/epoch - 16ms/step
## Epoch 2/100
## 47/47 - 0s - loss: 24.5642 - accuracy: 0.4987 - val_loss: 25.3565 - val_accuracy: 0.5040 - 115ms/epoch - 2ms/step
## Epoch 3/100
## 47/47 - 0s - loss: 15.5026 - accuracy: 0.5080 - val_loss: 19.8455 - val_accuracy: 0.5160 - 81ms/epoch - 2ms/step
## Epoch 4/100
## 47/47 - 0s - loss: 11.4664 - accuracy: 0.5447 - val_loss: 19.1791 - val_accuracy: 0.4693 - 82ms/epoch - 2ms/step
## Epoch 5/100
## 47/47 - 0s - loss: 9.5178 - accuracy: 0.5527 - val_loss: 16.3097 - val_accuracy: 0.4893 - 83ms/epoch - 2ms/step
## Epoch 6/100
## 47/47 - 0s - loss: 7.6397 - accuracy: 0.5560 - val_loss: 16.4038 - val_accuracy: 0.4800 - 95ms/epoch - 2ms/step
## Epoch 7/100
## 47/47 - 0s - loss: 6.9239 - accuracy: 0.5667 - val_loss: 14.9376 - val_accuracy: 0.4880 - 95ms/epoch - 2ms/step
## Epoch 8/100
## 47/47 - 0s - loss: 5.9349 - accuracy: 0.5780 - val_loss: 14.2694 - val_accuracy: 0.4707 - 88ms/epoch - 2ms/step
## Epoch 9/100
## 47/47 - 0s - loss: 5.6040 - accuracy: 0.5840 - val_loss: 12.9651 - val_accuracy: 0.5053 - 84ms/epoch - 2ms/step
## Epoch 10/100
## 47/47 - 0s - loss: 5.8966 - accuracy: 0.5920 - val_loss: 14.7346 - val_accuracy: 0.5187 - 89ms/epoch - 2ms/step
## Epoch 11/100
## 47/47 - 0s - loss: 4.4429 - accuracy: 0.6153 - val_loss: 12.3508 - val_accuracy: 0.4893 - 86ms/epoch - 2ms/step
## Epoch 12/100
## 47/47 - 0s - loss: 4.5397 - accuracy: 0.6193 - val_loss: 15.3783 - val_accuracy: 0.5187 - 83ms/epoch - 2ms/step
## Epoch 13/100
## 47/47 - 0s - loss: 4.7440 - accuracy: 0.5973 - val_loss: 12.3078 - val_accuracy: 0.5107 - 83ms/epoch - 2ms/step
## Epoch 14/100
## 47/47 - 0s - loss: 4.0009 - accuracy: 0.6000 - val_loss: 11.7986 - val_accuracy: 0.4947 - 92ms/epoch - 2ms/step
## Epoch 15/100
## 47/47 - 0s - loss: 3.7832 - accuracy: 0.6127 - val_loss: 13.0101 - val_accuracy: 0.5013 - 80ms/epoch - 2ms/step
## Epoch 16/100
## 47/47 - 0s - loss: 3.5182 - accuracy: 0.6073 - val_loss: 11.1019 - val_accuracy: 0.5013 - 81ms/epoch - 2ms/step
## Epoch 17/100
## 47/47 - 0s - loss: 3.1967 - accuracy: 0.6000 - val_loss: 12.6674 - val_accuracy: 0.5027 - 88ms/epoch - 2ms/step
## Epoch 18/100
## 47/47 - 0s - loss: 3.5379 - accuracy: 0.6080 - val_loss: 11.4331 - val_accuracy: 0.5053 - 90ms/epoch - 2ms/step
## Epoch 19/100
## 47/47 - 0s - loss: 3.0387 - accuracy: 0.6233 - val_loss: 11.5984 - val_accuracy: 0.5053 - 90ms/epoch - 2ms/step
## Epoch 20/100
## 47/47 - 0s - loss: 2.8419 - accuracy: 0.6207 - val_loss: 10.6635 - val_accuracy: 0.4867 - 85ms/epoch - 2ms/step
## Epoch 21/100
## 47/47 - 0s - loss: 3.9931 - accuracy: 0.6093 - val_loss: 10.4876 - val_accuracy: 0.5067 - 89ms/epoch - 2ms/step
## Epoch 22/100
## 47/47 - 0s - loss: 3.4120 - accuracy: 0.6000 - val_loss: 10.4521 - val_accuracy: 0.5347 - 94ms/epoch - 2ms/step
## Epoch 23/100
## 47/47 - 0s - loss: 2.7001 - accuracy: 0.6367 - val_loss: 13.4666 - val_accuracy: 0.5120 - 88ms/epoch - 2ms/step
## Epoch 24/100
## 47/47 - 0s - loss: 2.9629 - accuracy: 0.6060 - val_loss: 9.9912 - val_accuracy: 0.5227 - 85ms/epoch - 2ms/step
## Epoch 25/100
## 47/47 - 0s - loss: 2.2884 - accuracy: 0.6453 - val_loss: 9.9458 - val_accuracy: 0.5240 - 83ms/epoch - 2ms/step
## Epoch 26/100
## 47/47 - 0s - loss: 2.3572 - accuracy: 0.6407 - val_loss: 10.9240 - val_accuracy: 0.5013 - 80ms/epoch - 2ms/step
## Epoch 27/100
## 47/47 - 0s - loss: 2.3190 - accuracy: 0.6420 - val_loss: 10.3131 - val_accuracy: 0.5067 - 80ms/epoch - 2ms/step
## Epoch 28/100
## 47/47 - 0s - loss: 2.4596 - accuracy: 0.6540 - val_loss: 10.2341 - val_accuracy: 0.5040 - 90ms/epoch - 2ms/step
## Epoch 29/100
## 47/47 - 0s - loss: 2.4783 - accuracy: 0.6320 - val_loss: 9.7086 - val_accuracy: 0.5307 - 92ms/epoch - 2ms/step
## Epoch 30/100
## 47/47 - 0s - loss: 2.5239 - accuracy: 0.6460 - val_loss: 10.3411 - val_accuracy: 0.5213 - 102ms/epoch - 2ms/step
## Epoch 31/100
## 47/47 - 0s - loss: 2.7517 - accuracy: 0.6260 - val_loss: 11.0580 - val_accuracy: 0.5320 - 94ms/epoch - 2ms/step
## Epoch 32/100
## 47/47 - 0s - loss: 2.2263 - accuracy: 0.6300 - val_loss: 10.6270 - val_accuracy: 0.5120 - 91ms/epoch - 2ms/step
## Epoch 33/100
## 47/47 - 0s - loss: 1.9377 - accuracy: 0.6607 - val_loss: 10.8650 - val_accuracy: 0.5120 - 89ms/epoch - 2ms/step
## Epoch 34/100
## 47/47 - 0s - loss: 2.2074 - accuracy: 0.6440 - val_loss: 9.8891 - val_accuracy: 0.4987 - 82ms/epoch - 2ms/step
## Epoch 35/100
## 47/47 - 0s - loss: 1.8255 - accuracy: 0.6513 - val_loss: 9.3729 - val_accuracy: 0.5213 - 89ms/epoch - 2ms/step
## Epoch 36/100
## 47/47 - 0s - loss: 1.7513 - accuracy: 0.6540 - val_loss: 9.7910 - val_accuracy: 0.5027 - 82ms/epoch - 2ms/step
## Epoch 37/100
## 47/47 - 0s - loss: 1.7425 - accuracy: 0.6733 - val_loss: 10.6169 - val_accuracy: 0.5200 - 82ms/epoch - 2ms/step
## Epoch 38/100
## 47/47 - 0s - loss: 2.0313 - accuracy: 0.6407 - val_loss: 10.1732 - val_accuracy: 0.4960 - 81ms/epoch - 2ms/step
## Epoch 39/100
## 47/47 - 0s - loss: 1.8696 - accuracy: 0.6573 - val_loss: 9.9096 - val_accuracy: 0.5120 - 85ms/epoch - 2ms/step
## Epoch 40/100
## 47/47 - 0s - loss: 2.1738 - accuracy: 0.6500 - val_loss: 10.0697 - val_accuracy: 0.5267 - 86ms/epoch - 2ms/step
## Epoch 41/100
## 47/47 - 0s - loss: 2.0580 - accuracy: 0.6573 - val_loss: 10.3047 - val_accuracy: 0.5000 - 91ms/epoch - 2ms/step
## Epoch 42/100
## 47/47 - 0s - loss: 2.0123 - accuracy: 0.6473 - val_loss: 9.8234 - val_accuracy: 0.5000 - 93ms/epoch - 2ms/step
## Epoch 43/100
## 47/47 - 0s - loss: 1.4698 - accuracy: 0.6620 - val_loss: 10.1582 - val_accuracy: 0.5200 - 86ms/epoch - 2ms/step
## Epoch 44/100
## 47/47 - 0s - loss: 1.9205 - accuracy: 0.6593 - val_loss: 9.1838 - val_accuracy: 0.5307 - 87ms/epoch - 2ms/step
## Epoch 45/100
## 47/47 - 0s - loss: 1.8189 - accuracy: 0.6753 - val_loss: 9.6248 - val_accuracy: 0.5120 - 90ms/epoch - 2ms/step
## Epoch 46/100
## 47/47 - 0s - loss: 1.7115 - accuracy: 0.6647 - val_loss: 9.7746 - val_accuracy: 0.5160 - 83ms/epoch - 2ms/step
## Epoch 47/100
## 47/47 - 0s - loss: 1.5914 - accuracy: 0.6593 - val_loss: 10.0012 - val_accuracy: 0.4893 - 85ms/epoch - 2ms/step
## Epoch 48/100
## 47/47 - 0s - loss: 2.0462 - accuracy: 0.6533 - val_loss: 10.1705 - val_accuracy: 0.4907 - 84ms/epoch - 2ms/step
## Epoch 49/100
## 47/47 - 0s - loss: 1.7375 - accuracy: 0.6487 - val_loss: 9.8876 - val_accuracy: 0.5187 - 80ms/epoch - 2ms/step
## Epoch 50/100
## 47/47 - 0s - loss: 1.8975 - accuracy: 0.6620 - val_loss: 9.7474 - val_accuracy: 0.5373 - 82ms/epoch - 2ms/step
## Epoch 51/100
## 47/47 - 0s - loss: 1.5605 - accuracy: 0.6847 - val_loss: 9.8519 - val_accuracy: 0.5027 - 91ms/epoch - 2ms/step
## Epoch 52/100
## 47/47 - 0s - loss: 1.4901 - accuracy: 0.6680 - val_loss: 10.2555 - val_accuracy: 0.5173 - 89ms/epoch - 2ms/step
## Epoch 53/100
## 47/47 - 0s - loss: 1.4502 - accuracy: 0.6700 - val_loss: 9.7645 - val_accuracy: 0.4947 - 97ms/epoch - 2ms/step
## Epoch 54/100
## 47/47 - 0s - loss: 1.5067 - accuracy: 0.6560 - val_loss: 9.4868 - val_accuracy: 0.5213 - 95ms/epoch - 2ms/step
# Evaluate the model on the test data
test_results <- model %>% evaluate(
x = padded_sequences[test_indices, ],
y = test_data$Score
)
## 24/24 - 0s - loss: 13.0331 - accuracy: 0.4853 - 34ms/epoch - 1ms/step
# Print test results
test_results
## loss accuracy
## 13.0330811 0.4853333
# Evaluate the model on the validation data
validation_results <- model %>% evaluate(
x = padded_sequences[validation_indices, ],
y = validation_data$Score
)
## 24/24 - 0s - loss: 9.1838 - accuracy: 0.5307 - 34ms/epoch - 1ms/step
# Print validation results
validation_results
## loss accuracy
## 9.1837606 0.5306666
Predict a Randomly-Selected Sentiment
# Evaluate a single sequence to predict users' sentiment
j <- 8
selected_sequence <- padded_sequences[j, , drop = FALSE]
# Make predictions on the selected sequence
predictions <- model %>% predict(selected_sequence)
## 1/1 - 0s - 67ms/epoch - 67ms/step
# Interpret the predictions (assuming 0.5 as the threshold for positive/negative sentiment)
if (predictions >= 0.5) {
sentiment <- "Positive"
} else {
sentiment <- "Negative"
}
# Print the selected sequence and its sentiment prediction
cat("Selected Sequence:", test_data$Cleaned_Text[j], "\nPredicted Sentiment:", sentiment, "\nActual Sentiment:", ifelse(test_data$Score[j] == 1, "Positive", "Negative"), "\n")
## Selected Sequence: get absolutely horrible reception apartment phones problem
## Predicted Sentiment: Negative
## Actual Sentiment: Negative
Save Model
# Save the trained model
model %>% save_model_hdf5("task2.keras")