rm(list = ls())
library(fastText)
print_parameters(command = "supervised")
## Empty input or output path.
## 
## The following arguments are mandatory:
##   -input              training file path
##   -output             output file path
## 
## The following arguments are optional:
##   -verbose            verbosity level [2]
## 
## The following arguments for the dictionary are optional:
##   -minCount           minimal number of word occurences [1]
##   -minCountLabel      minimal number of label occurences [0]
##   -wordNgrams         max length of word ngram [1]
##   -bucket             number of buckets [2000000]
##   -minn               min length of char ngram [0]
##   -maxn               max length of char ngram [0]
##   -t                  sampling threshold [0.0001]
##   -label              labels prefix [__label__]
## 
## The following arguments for training are optional:
##   -lr                 learning rate [0.1]
##   -lrUpdateRate       change the rate of updates for the learning rate [100]
##   -dim                size of word vectors [100]
##   -ws                 size of the context window [5]
##   -epoch              number of epochs [5]
##   -neg                number of negatives sampled [5]
##   -loss               loss function {ns, hs, softmax, one-vs-all} [softmax]
##   -thread             number of threads [12]
##   -pretrainedVectors  pretrained word vectors for supervised learning []
##   -saveOutput         whether output params should be saved [false]
## 
## The following arguments for quantization are optional:
##   -cutoff             number of words and ngrams to retain [0]
##   -retrain            whether embeddings are finetuned if a cutoff is applied [false]
##   -qnorm              whether the norm is quantized separately [false]
##   -qout               whether the classifier is quantized [false]
##   -dsub               size of each sub-vector [2]
printPredictUsage()
## usage: fasttext predict[-prob] <model> <test-data> [<k>] [<th>]
## 
##   <model>      model filename
##   <test-data>  test data filename (if -, read from stdin)
##   <k>          (optional; 1 by default) predict top k labels
##   <th>         (optional; 0.0 by default) probability threshold
# cbow
dir_path <- '/Users/xut2/Desktop/fastText_data/'
file_name <- list.files(dir_path,pattern = "*.",full.names = T, recursive = T)
file_name
##  [1] "/Users/xut2/Desktop/fastText_data//analogy_queries.txt"                                        
##  [2] "/Users/xut2/Desktop/fastText_data//cooking.stackexchange/cooking.stackexchange.id"             
##  [3] "/Users/xut2/Desktop/fastText_data//cooking.stackexchange/cooking.stackexchange.txt"            
##  [4] "/Users/xut2/Desktop/fastText_data//cooking.stackexchange/cooking.train"                        
##  [5] "/Users/xut2/Desktop/fastText_data//cooking.stackexchange/cooking.valid"                        
##  [6] "/Users/xut2/Desktop/fastText_data//cooking.stackexchange/readme.txt"                           
##  [7] "/Users/xut2/Desktop/fastText_data//example_text.txt"                                           
##  [8] "/Users/xut2/Desktop/fastText_data//fasttest.html"                                              
##  [9] "/Users/xut2/Desktop/fastText_data//fasttest.R"                                                 
## [10] "/Users/xut2/Desktop/fastText_data//queries.txt"                                                
## [11] "/Users/xut2/Desktop/fastText_data//rsconnect/documents/fasttest.R/rpubs.com/rpubs/Document.dcf"
## [12] "/Users/xut2/Desktop/fastText_data//text_sentence.txt"
read.delim(grep("example_text.txt",file_name,value = T),header = F,stringsAsFactors = F)
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    V1
## 1 The term planet is ancient, with ties to history, astrology, science, mythology, and religion. Several planets in the Solar System can be seen with the naked eye. These were regarded by many early cultures as divine, or as emissaries of deities. As scientific knowledge advanced, human perception of the planets changed, incorporating a number of disparate objects. In 2006, the International Astronomical Union (IAU) officially adopted a resolution defining planets within the Solar System. This definition is controversial because it excludes many objects of planetary mass based on where or what they orbit. 
## 2                                                                                                                                                                                                                                                           Although eight of the planetary bodies discovered before 1950 remain planets under the modern definition, some celestial bodies, such as Ceres, Pallas, Juno and Vesta (each an object in the solar asteroid belt), and Pluto (the first trans-Neptunian object discovered), that were once considered planets by the scientific community, are no longer viewed as such.
## 3                                                                                                                                                                                                                                                                                               The planets were thought by Ptolemy to orbit Earth in deferent and epicycle motions. Although the idea that the planets orbited the Sun had been suggested many times, it was not until the 17th century that this view was supported by evidence from the first telescopic astronomical observations, performed by Galileo Galilei. 
## 4                                                                             At about the same time, by careful analysis of pre-telescopic observation data collected by Tycho Brahe, Johannes Kepler found the planets orbits were not circular but elliptical. As observational tools improved, astronomers saw that, like Earth, the planets rotated around tilted axes, and some shared such features as ice caps and seasons. Since the dawn of the Space Age, close observation by space probes has found that Earth and the other planets share characteristics such as volcanism, hurricanes, tectonics, and even hydrology.
## 5                                                                                                                                                                                          Planets are generally divided into two main types: large lowdensity giant planets, and smaller rocky terrestrials. Under IAU definitions, there are eight planets in the Solar System. In order of increasing distance from the Sun, they are the four terrestrials, Mercury, Venus, Earth, and Mars, then the four giant planets, Jupiter, Saturn, Uranus, and Neptune. Six of the planets are orbited by one or more natural satellites.
#View(data_1)
list_params = list(command = 'cbow', 
                   lr = 0.1, 
                   dim = 50,
                   input = grep("example_text.txt",file_name,value = T),
                   output = dir_path, 
                   verbose = 2, 
                   thread = 1)
list_params
## $command
## [1] "cbow"
## 
## $lr
## [1] 0.1
## 
## $dim
## [1] 50
## 
## $input
## [1] "/Users/xut2/Desktop/fastText_data//example_text.txt"
## 
## $output
## [1] "/Users/xut2/Desktop/fastText_data/"
## 
## $verbose
## [1] 2
## 
## $thread
## [1] 1
res = fasttext_interface(list_params, 
                         path_output = paste0(dir_path,Sys.Date(),"_", '-cbow_logs_1.txt'),
                         MilliSecs = 5,
                         remove_previous_file = TRUE,
                         print_process_time = TRUE)
## 
Read 0M words
## Number of words:  8
## Number of labels: 0
## 
Progress: 105.5% words/sec/thread:    1924 lr: -0.005537 loss:  4.070489 ETA:   0h 0m
Progress: 100.0% words/sec/thread:    1924 lr:  0.000000 loss:  4.070489 ETA:   0h 0m
## Elapsed time: 0 hours and 0 minutes and 3 seconds.
res
##  [1] "fasttext"                                           
##  [2] "cbow"                                               
##  [3] "-lr"                                                
##  [4] "0.1"                                                
##  [5] "-dim"                                               
##  [6] "50"                                                 
##  [7] "-input"                                             
##  [8] "/Users/xut2/Desktop/fastText_data//example_text.txt"
##  [9] "-output"                                            
## [10] "/Users/xut2/Desktop/fastText_data/"                 
## [11] "-verbose"                                           
## [12] "2"                                                  
## [13] "-thread"                                            
## [14] "1"
read.delim(paste0(dir_path,Sys.Date(),"_", '-cbow_logs_1.txt'),header = F,stringsAsFactors = F)
##                                                                                        V1
## 1 Progress: 1e+02% words/sec/thread:    1924 lr: -0.00553672 loss:   4.07049 ETA:   0h 0m
## 2   Progress: 1e+02% words/sec/thread:    1924 lr:         0 loss:   4.07049 ETA:   0h 0m
#########################################
# supervised
data_1 <- read.delim(grep("cooking.train",file_name,value = T),header = F,stringsAsFactors = F, quote = "")
dim(data_1); data_1[1:5,]
## [1] 12404     1
## [1] "__label__sauce __label__cheese How much does potato starch affect a cheese sauce recipe?"                     
## [2] "__label__food-safety __label__acidity Dangerous pathogens capable of growing in acidic environments"          
## [3] "__label__cast-iron __label__stove How do I cover up the white spots on my cast iron stove?"                   
## [4] "__label__restaurant Michelin Three Star Restaurant; but if the chef is not there"                             
## [5] "__label__knife-skills __label__dicing Without knife skills, how can I quickly and accurately dice vegetables?"
list_params = list(command = 'supervised', 
                   lr = 0.1,
                   dim = 50,
                   input = grep("cooking.train",file_name,value = T),
                   output = paste0(dir_path,Sys.Date(),"_", 'model_cooking'), 
                   verbose = 2, 
                   thread = 4)

res = fasttext_interface(list_params, 
                         path_output = paste0(dir_path,Sys.Date(),"_", 'sup_logs_2.txt'),
                         MilliSecs = 5,
                         remove_previous_file = TRUE,
                         print_process_time = TRUE)
## 
Read 0M words
## Number of words:  14543
## Number of labels: 735
## 
Progress:   0.5% words/sec/thread:  159105 lr:  0.099542 loss: 17.501215 ETA:   0h 0m
Progress:   1.0% words/sec/thread:  171289 lr:  0.099013 loss: 17.636703 ETA:   0h 0m
Progress:   1.6% words/sec/thread:  177999 lr:  0.098369 loss: 17.681360 ETA:   0h 0m
Progress:   2.2% words/sec/thread:  181042 lr:  0.097835 loss: 17.697992 ETA:   0h 0m
Progress:   2.7% words/sec/thread:  181786 lr:  0.097327 loss: 17.709469 ETA:   0h 0m
Progress:   3.3% words/sec/thread:  183486 lr:  0.096677 loss: 17.717424 ETA:   0h 0m
Progress:   3.9% words/sec/thread:  184542 lr:  0.096134 loss: 17.721966 ETA:   0h 0m
Progress:   4.4% words/sec/thread:  185688 lr:  0.095565 loss: 17.725643 ETA:   0h 0m
Progress:   5.0% words/sec/thread:  186384 lr:  0.095018 loss: 17.728609 ETA:   0h 0m
Progress:   5.7% words/sec/thread:  189852 lr:  0.094337 loss: 17.731201 ETA:   0h 0m
Progress:   6.3% words/sec/thread:  192430 lr:  0.093733 loss: 17.697847 ETA:   0h 0m
Progress:   6.9% words/sec/thread:  194403 lr:  0.093105 loss: 17.531673 ETA:   0h 0m
Progress:   7.5% words/sec/thread:  196880 lr:  0.092473 loss: 17.274103 ETA:   0h 0m
Progress:   8.3% words/sec/thread:  198440 lr:  0.091740 loss: 16.976479 ETA:   0h 0m
Progress:   9.0% words/sec/thread:  200641 lr:  0.091034 loss: 16.629093 ETA:   0h 0m
Progress:   9.6% words/sec/thread:  202578 lr:  0.090362 loss: 16.223724 ETA:   0h 0m
Progress:  10.4% words/sec/thread:  204137 lr:  0.089610 loss: 15.888644 ETA:   0h 0m
Progress:  11.1% words/sec/thread:  205062 lr:  0.088909 loss: 15.498935 ETA:   0h 0m
Progress:  11.8% words/sec/thread:  206740 lr:  0.088205 loss: 15.154483 ETA:   0h 0m
Progress:  12.6% words/sec/thread:  207645 lr:  0.087448 loss: 14.905954 ETA:   0h 0m
Progress:  13.3% words/sec/thread:  208841 lr:  0.086679 loss: 14.641057 ETA:   0h 0m
Progress:  14.1% words/sec/thread:  209598 lr:  0.085918 loss: 14.354190 ETA:   0h 0m
Progress:  14.9% words/sec/thread:  211103 lr:  0.085102 loss: 14.131842 ETA:   0h 0m
Progress:  15.7% words/sec/thread:  212350 lr:  0.084283 loss: 13.874787 ETA:   0h 0m
Progress:  16.5% words/sec/thread:  213104 lr:  0.083532 loss: 13.727062 ETA:   0h 0m
Progress:  17.2% words/sec/thread:  214349 lr:  0.082835 loss: 13.592310 ETA:   0h 0m
Progress:  18.0% words/sec/thread:  215584 lr:  0.082031 loss: 13.414429 ETA:   0h 0m
Progress:  18.8% words/sec/thread:  216454 lr:  0.081229 loss: 13.258138 ETA:   0h 0m
Progress:  19.5% words/sec/thread:  216972 lr:  0.080452 loss: 13.093490 ETA:   0h 0m
Progress:  20.3% words/sec/thread:  217375 lr:  0.079679 loss: 12.963587 ETA:   0h 0m
Progress:  21.0% words/sec/thread:  217582 lr:  0.078985 loss: 12.828203 ETA:   0h 0m
Progress:  21.8% words/sec/thread:  217815 lr:  0.078220 loss: 12.738969 ETA:   0h 0m
Progress:  22.5% words/sec/thread:  218335 lr:  0.077512 loss: 12.622124 ETA:   0h 0m
Progress:  23.3% words/sec/thread:  218605 lr:  0.076736 loss: 12.532703 ETA:   0h 0m
Progress:  24.0% words/sec/thread:  219077 lr:  0.076032 loss: 12.448047 ETA:   0h 0m
Progress:  24.8% words/sec/thread:  219577 lr:  0.075237 loss: 12.336203 ETA:   0h 0m
Progress:  25.4% words/sec/thread:  220159 lr:  0.074571 loss: 12.273686 ETA:   0h 0m
Progress:  26.2% words/sec/thread:  220748 lr:  0.073759 loss: 12.169062 ETA:   0h 0m
Progress:  27.1% words/sec/thread:  221510 lr:  0.072921 loss: 12.084665 ETA:   0h 0m
Progress:  27.8% words/sec/thread:  221841 lr:  0.072158 loss: 12.035064 ETA:   0h 0m
Progress:  28.6% words/sec/thread:  222289 lr:  0.071352 loss: 11.965860 ETA:   0h 0m
Progress:  29.3% words/sec/thread:  222640 lr:  0.070706 loss: 11.909260 ETA:   0h 0m
Progress:  30.1% words/sec/thread:  223214 lr:  0.069873 loss: 11.853073 ETA:   0h 0m
Progress:  30.9% words/sec/thread:  223334 lr:  0.069096 loss: 11.783301 ETA:   0h 0m
Progress:  31.6% words/sec/thread:  223302 lr:  0.068381 loss: 11.724166 ETA:   0h 0m
Progress:  32.4% words/sec/thread:  223489 lr:  0.067593 loss: 11.640515 ETA:   0h 0m
Progress:  33.2% words/sec/thread:  223709 lr:  0.066800 loss: 11.586638 ETA:   0h 0m
Progress:  34.0% words/sec/thread:  223732 lr:  0.066034 loss: 11.525537 ETA:   0h 0m
Progress:  34.8% words/sec/thread:  223956 lr:  0.065230 loss: 11.472094 ETA:   0h 0m
Progress:  35.6% words/sec/thread:  224420 lr:  0.064398 loss: 11.423084 ETA:   0h 0m
Progress:  36.4% words/sec/thread:  224757 lr:  0.063587 loss: 11.371353 ETA:   0h 0m
Progress:  37.3% words/sec/thread:  225407 lr:  0.062717 loss: 11.326566 ETA:   0h 0m
Progress:  38.0% words/sec/thread:  225689 lr:  0.062000 loss: 11.289287 ETA:   0h 0m
Progress:  38.8% words/sec/thread:  225811 lr:  0.061242 loss: 11.248221 ETA:   0h 0m
Progress:  39.5% words/sec/thread:  225710 lr:  0.060491 loss: 11.203513 ETA:   0h 0m
Progress:  40.2% words/sec/thread:  225249 lr:  0.059800 loss: 11.175338 ETA:   0h 0m
Progress:  40.9% words/sec/thread:  225415 lr:  0.059117 loss: 11.150327 ETA:   0h 0m
Progress:  41.7% words/sec/thread:  225569 lr:  0.058323 loss: 11.114096 ETA:   0h 0m
Progress:  42.4% words/sec/thread:  225532 lr:  0.057563 loss: 11.078388 ETA:   0h 0m
Progress:  43.1% words/sec/thread:  225509 lr:  0.056940 loss: 11.036054 ETA:   0h 0m
Progress:  43.9% words/sec/thread:  225626 lr:  0.056150 loss: 11.001856 ETA:   0h 0m
Progress:  44.6% words/sec/thread:  225671 lr:  0.055374 loss: 10.968581 ETA:   0h 0m
Progress:  45.3% words/sec/thread:  225721 lr:  0.054708 loss: 10.945724 ETA:   0h 0m
Progress:  46.1% words/sec/thread:  226055 lr:  0.053946 loss: 10.929743 ETA:   0h 0m
Progress:  46.9% words/sec/thread:  226283 lr:  0.053135 loss: 10.905047 ETA:   0h 0m
Progress:  47.7% words/sec/thread:  226684 lr:  0.052287 loss: 10.885260 ETA:   0h 0m
Progress:  48.3% words/sec/thread:  226747 lr:  0.051657 loss: 10.877339 ETA:   0h 0m
Progress:  49.1% words/sec/thread:  226863 lr:  0.050864 loss: 10.845579 ETA:   0h 0m
Progress:  49.9% words/sec/thread:  226934 lr:  0.050072 loss: 10.813026 ETA:   0h 0m
Progress:  50.6% words/sec/thread:  226818 lr:  0.049385 loss: 10.789704 ETA:   0h 0m
Progress:  51.2% words/sec/thread:  226751 lr:  0.048766 loss: 10.777618 ETA:   0h 0m
Progress:  52.0% words/sec/thread:  226630 lr:  0.048022 loss: 10.763622 ETA:   0h 0m
Progress:  52.8% words/sec/thread:  226715 lr:  0.047235 loss: 10.744260 ETA:   0h 0m
Progress:  53.4% words/sec/thread:  225984 lr:  0.046639 loss: 10.725027 ETA:   0h 0m
Progress:  54.0% words/sec/thread:  225984 lr:  0.046019 loss: 10.713577 ETA:   0h 0m
Progress:  54.7% words/sec/thread:  225902 lr:  0.045276 loss: 10.689016 ETA:   0h 0m
Progress:  55.3% words/sec/thread:  225889 lr:  0.044669 loss: 10.674289 ETA:   0h 0m
Progress:  56.1% words/sec/thread:  225769 lr:  0.043918 loss: 10.670632 ETA:   0h 0m
Progress:  56.9% words/sec/thread:  225797 lr:  0.043142 loss: 10.661281 ETA:   0h 0m
Progress:  57.7% words/sec/thread:  225900 lr:  0.042348 loss: 10.642025 ETA:   0h 0m
Progress:  58.4% words/sec/thread:  225956 lr:  0.041559 loss: 10.611085 ETA:   0h 0m
Progress:  59.2% words/sec/thread:  225968 lr:  0.040826 loss: 10.582076 ETA:   0h 0m
Progress:  60.0% words/sec/thread:  226044 lr:  0.040040 loss: 10.568769 ETA:   0h 0m
Progress:  60.7% words/sec/thread:  226018 lr:  0.039277 loss: 10.556641 ETA:   0h 0m
Progress:  61.5% words/sec/thread:  226206 lr:  0.038458 loss: 10.543881 ETA:   0h 0m
Progress:  62.3% words/sec/thread:  226400 lr:  0.037691 loss: 10.527083 ETA:   0h 0m
Progress:  63.1% words/sec/thread:  226443 lr:  0.036911 loss: 10.508780 ETA:   0h 0m
Progress:  63.7% words/sec/thread:  226265 lr:  0.036264 loss: 10.502225 ETA:   0h 0m
Progress:  64.4% words/sec/thread:  225892 lr:  0.035602 loss: 10.494126 ETA:   0h 0m
Progress:  65.1% words/sec/thread:  225986 lr:  0.034915 loss: 10.477097 ETA:   0h 0m
Progress:  65.9% words/sec/thread:  226008 lr:  0.034133 loss: 10.461662 ETA:   0h 0m
Progress:  66.5% words/sec/thread:  226076 lr:  0.033492 loss: 10.443880 ETA:   0h 0m
Progress:  67.2% words/sec/thread:  226142 lr:  0.032835 loss: 10.432328 ETA:   0h 0m
Progress:  67.9% words/sec/thread:  226017 lr:  0.032088 loss: 10.420769 ETA:   0h 0m
Progress:  68.6% words/sec/thread:  226093 lr:  0.031411 loss: 10.407553 ETA:   0h 0m
Progress:  69.4% words/sec/thread:  226201 lr:  0.030621 loss: 10.398392 ETA:   0h 0m
Progress:  70.0% words/sec/thread:  226066 lr:  0.029999 loss: 10.390065 ETA:   0h 0m
Progress:  70.7% words/sec/thread:  226111 lr:  0.029285 loss: 10.374371 ETA:   0h 0m
Progress:  71.4% words/sec/thread:  226127 lr:  0.028636 loss: 10.357211 ETA:   0h 0m
Progress:  72.0% words/sec/thread:  226019 lr:  0.028035 loss: 10.354519 ETA:   0h 0m
Progress:  72.7% words/sec/thread:  226046 lr:  0.027250 loss: 10.342257 ETA:   0h 0m
Progress:  73.4% words/sec/thread:  226077 lr:  0.026601 loss: 10.325818 ETA:   0h 0m
Progress:  74.0% words/sec/thread:  226081 lr:  0.025984 loss: 10.320273 ETA:   0h 0m
Progress:  74.6% words/sec/thread:  226052 lr:  0.025374 loss: 10.302003 ETA:   0h 0m
Progress:  75.3% words/sec/thread:  226037 lr:  0.024729 loss: 10.299859 ETA:   0h 0m
Progress:  76.0% words/sec/thread:  225984 lr:  0.024037 loss: 10.284614 ETA:   0h 0m
Progress:  76.6% words/sec/thread:  226100 lr:  0.023383 loss: 10.268554 ETA:   0h 0m
Progress:  77.4% words/sec/thread:  226095 lr:  0.022605 loss: 10.251839 ETA:   0h 0m
Progress:  78.0% words/sec/thread:  226023 lr:  0.022015 loss: 10.242620 ETA:   0h 0m
Progress:  78.7% words/sec/thread:  225973 lr:  0.021270 loss: 10.235866 ETA:   0h 0m
Progress:  79.3% words/sec/thread:  225954 lr:  0.020655 loss: 10.228118 ETA:   0h 0m
Progress:  79.9% words/sec/thread:  225783 lr:  0.020082 loss: 10.210993 ETA:   0h 0m
Progress:  80.6% words/sec/thread:  225575 lr:  0.019389 loss: 10.202638 ETA:   0h 0m
Progress:  81.2% words/sec/thread:  225327 lr:  0.018781 loss: 10.189840 ETA:   0h 0m
Progress:  81.9% words/sec/thread:  225102 lr:  0.018086 loss: 10.175946 ETA:   0h 0m
Progress:  82.4% words/sec/thread:  224750 lr:  0.017586 loss: 10.174859 ETA:   0h 0m
Progress:  83.1% words/sec/thread:  224466 lr:  0.016924 loss: 10.166746 ETA:   0h 0m
Progress:  83.6% words/sec/thread:  224134 lr:  0.016421 loss: 10.161237 ETA:   0h 0m
Progress:  84.1% words/sec/thread:  223743 lr:  0.015911 loss: 10.144533 ETA:   0h 0m
Progress:  84.6% words/sec/thread:  223486 lr:  0.015391 loss: 10.140337 ETA:   0h 0m
Progress:  85.2% words/sec/thread:  223221 lr:  0.014784 loss: 10.130643 ETA:   0h 0m
Progress:  85.8% words/sec/thread:  222889 lr:  0.014154 loss: 10.124562 ETA:   0h 0m
Progress:  86.4% words/sec/thread:  222522 lr:  0.013595 loss: 10.117062 ETA:   0h 0m
Progress:  86.9% words/sec/thread:  222137 lr:  0.013090 loss: 10.114798 ETA:   0h 0m
Progress:  87.5% words/sec/thread:  221802 lr:  0.012484 loss: 10.106933 ETA:   0h 0m
Progress:  88.1% words/sec/thread:  221282 lr:  0.011867 loss: 10.102676 ETA:   0h 0m
Progress:  88.6% words/sec/thread:  220993 lr:  0.011352 loss: 10.098734 ETA:   0h 0m
Progress:  89.2% words/sec/thread:  220637 lr:  0.010779 loss: 10.086405 ETA:   0h 0m
Progress:  89.7% words/sec/thread:  220370 lr:  0.010273 loss: 10.078354 ETA:   0h 0m
Progress:  90.3% words/sec/thread:  220062 lr:  0.009726 loss: 10.071457 ETA:   0h 0m
Progress:  90.9% words/sec/thread:  219654 lr:  0.009109 loss: 10.068221 ETA:   0h 0m
Progress:  91.4% words/sec/thread:  219323 lr:  0.008602 loss: 10.059877 ETA:   0h 0m
Progress:  91.9% words/sec/thread:  219005 lr:  0.008090 loss: 10.050805 ETA:   0h 0m
Progress:  92.6% words/sec/thread:  218728 lr:  0.007447 loss: 10.047665 ETA:   0h 0m
Progress:  93.1% words/sec/thread:  218330 lr:  0.006923 loss: 10.046105 ETA:   0h 0m
Progress:  93.6% words/sec/thread:  217723 lr:  0.006378 loss: 10.040606 ETA:   0h 0m
Progress:  94.1% words/sec/thread:  217337 lr:  0.005894 loss: 10.033751 ETA:   0h 0m
Progress:  94.6% words/sec/thread:  216939 lr:  0.005416 loss: 10.028817 ETA:   0h 0m
Progress:  95.1% words/sec/thread:  216522 lr:  0.004931 loss: 10.025298 ETA:   0h 0m
Progress:  95.7% words/sec/thread:  216257 lr:  0.004285 loss: 10.014956 ETA:   0h 0m
Progress:  96.3% words/sec/thread:  216073 lr:  0.003691 loss: 10.013096 ETA:   0h 0m
Progress:  96.9% words/sec/thread:  215774 lr:  0.003069 loss: 10.000768 ETA:   0h 0m
Progress:  97.5% words/sec/thread:  215583 lr:  0.002533 loss:  9.992087 ETA:   0h 0m
Progress:  98.1% words/sec/thread:  215227 lr:  0.001947 loss:  9.991216 ETA:   0h 0m
Progress:  98.7% words/sec/thread:  215022 lr:  0.001283 loss:  9.981398 ETA:   0h 0m
Progress:  99.4% words/sec/thread:  214924 lr:  0.000582 loss:  9.971148 ETA:   0h 0m
Progress: 100.0% words/sec/thread:  214789 lr: -0.000003 loss:  9.966265 ETA:   0h 0m
Progress: 100.0% words/sec/thread:  214747 lr:  0.000000 loss:  9.966265 ETA:   0h 0m
## Elapsed time: 0 hours and 0 minutes and 1 seconds.
data_2 <- read.delim(paste0(dir_path,Sys.Date(),"_", 'sup_logs_2.txt'),header = F,stringsAsFactors = F)
dim(data_2)
## [1] 148   1
head(data_2,3)
##                                                                                      V1
## 1 Progress:   0.5% words/sec/thread:  159105 lr: 0.0995422 loss:   17.5012 ETA:   0h 0m
## 2 Progress:     1% words/sec/thread:  171289 lr: 0.0990128 loss:   17.6367 ETA:   0h 0m
## 3 Progress:     2% words/sec/thread:  177999 lr: 0.0983694 loss:   17.6814 ETA:   0h 0m
###################################
res = plot_progress_logs(path = paste0(dir_path,Sys.Date(),"_", 'sup_logs_2.txt'),
                         plot = TRUE)

dim(res);head(res)
## [1] 148   4
##   progress words_sec_thread learning_rate    loss
## 1      0.5           159105     0.0995422 17.5012
## 2      1.0           171289     0.0990128 17.6367
## 3      2.0           177999     0.0983694 17.6814
## 4      2.0           181042     0.0978354 17.6980
## 5      3.0           181786     0.0973268 17.7095
## 6      3.0           183486     0.0966768 17.7174
#######################################
# 'predict' function
data_test <- read.delim( grep("cooking.valid",file_name,value = T),header = F,stringsAsFactors = F, quote = "")
dim(data_test); head(data_test,3)
## [1] 3000    1
##                                                                                                                              V1
## 1                           __label__equipment __label__cast-iron How do I fix a cast iron pot that was heated empty for hours?
## 2                                                            __label__oven How does grill/broil mode in a convection oven work?
## 3 __label__sauce __label__indian-cuisine __label__breakfast What are the names of the breakfast spreads used in Indian cuisine?
list_params = list(command = 'predict',
                   model =  paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'), 
                   test_data = grep("cooking.valid",file_name,value = T),
                   k = 1,
                   th = 0.0)

res = fasttext_interface(list_params, 
                         path_output = paste0(dir_path,Sys.Date(),"_", 'predict_valid_3.txt'))

data_o <- read.delim(paste0(dir_path,Sys.Date(),"_", 'predict_valid_3.txt'),header = F,stringsAsFactors = F)
dim(data_o); head(data_o,3)
## [1] 3000    1
##                V1
## 1 __label__baking
## 2 __label__baking
## 3 __label__baking
# 'predict-prob' function
data_test <- read.delim( grep("cooking.valid",file_name,value = T),header = F,stringsAsFactors = F, quote = "")
dim(data_test); head(data_test,3)
## [1] 3000    1
##                                                                                                                              V1
## 1                           __label__equipment __label__cast-iron How do I fix a cast iron pot that was heated empty for hours?
## 2                                                            __label__oven How does grill/broil mode in a convection oven work?
## 3 __label__sauce __label__indian-cuisine __label__breakfast What are the names of the breakfast spreads used in Indian cuisine?
list_params = list(command = 'predict-prob',
                   model = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'),
                   test_data = grep("cooking.valid",file_name,value = T), 
                   k = 1,
                   th = 0.0)

res = fasttext_interface(list_params, 
                         path_output = paste0(dir_path,Sys.Date(),"_", 'predict_valid_prob_4.txt'))
data_o <- read.delim(paste0(dir_path,Sys.Date(),"_", 'predict_valid_prob_4.txt'),header = F,stringsAsFactors = F)
dim(data_o); head(data_o,10)
## [1] 3000    1
##                                V1
## 1       __label__baking 0.0257802
## 2       __label__baking 0.0524399
## 3       __label__baking 0.0401911
## 4  __label__food-safety 0.0324502
## 5       __label__baking 0.0852046
## 6   __label__food-safety 0.034837
## 7       __label__baking 0.0823611
## 8        __label__baking 0.062369
## 9  __label__food-safety 0.0695802
## 10  __label__food-safety 0.040417
# 'test' function
data_test <- read.delim( grep("cooking.valid",file_name,value = T),header = F,stringsAsFactors = F, quote = "")
dim(data_test); head(data_test,3)
## [1] 3000    1
##                                                                                                                              V1
## 1                           __label__equipment __label__cast-iron How do I fix a cast iron pot that was heated empty for hours?
## 2                                                            __label__oven How does grill/broil mode in a convection oven work?
## 3 __label__sauce __label__indian-cuisine __label__breakfast What are the names of the breakfast spreads used in Indian cuisine?
list_params = list(command = 'test',
                   model = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'),
                   test_data = grep("cooking.valid",file_name,value = T),
                   k = 1,
                   th = 0.0)

res = fasttext_interface(list_params)
## N    3000
## P@1  0.151
## R@1  0.0652
res
## [1] "fasttext"                                                              
## [2] "test"                                                                  
## [3] "/Users/xut2/Desktop/fastText_data/2022-05-20_model_cooking.bin"        
## [4] "/Users/xut2/Desktop/fastText_data//cooking.stackexchange/cooking.valid"
## [5] "1"                                                                     
## [6] "0"
# 'test-label' function
#whereas the ??test-label?? command allows the user to save,
#data_test <- read.delim( grep("cooking.valid",file_name,value = T),header = F,stringsAsFactors = F, quote = "")
#dim(data_test); head(data_test,3)
list_params = list(command = 'test-label',
                   model = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'),
                   test_data = grep("cooking.valid",file_name,value = T),
                   k = 1,
                   th = 0.0)

res = fasttext_interface(list_params, 
                         path_output = paste0(dir_path,Sys.Date(),"_", 'test_label_valid_5.txt'))
## N    3000
## P@1  0.151
## R@1  0.065
data_o <- read.delim(paste0(dir_path,Sys.Date(),"_", 'test_label_valid_5.txt'),header = F,stringsAsFactors = F)
dim(data_o); head(data_o,3)
## [1] 735   1
##                                                                                      V1
## 1        F1-Score : 0.229961  Precision : 0.141815  Recall : 0.607639   __label__baking
## 2   F1-Score : 0.218877  Precision : 0.127444  Recall : 0.774590   __label__food-safety
## 3 F1-Score : 0.367432  Precision : 0.310954  Recall : 0.448980   __label__substitutions
################################
st_dat = read.delim(grep("cooking.stackexchange.txt",file_name,value = T), stringsAsFactors = FALSE)
dim(st_dat); head(st_dat)
## [1] 8705    1
##                                               X__label__sauce.__label__cheese.How.much.does.potato.starch.affect.a.cheese.sauce.recipe.
## 1                                   __label__food-safety __label__acidity Dangerous pathogens capable of growing in acidic environments
## 2                                            __label__cast-iron __label__stove How do I cover up the white spots on my cast iron stove?
## 3                                                      __label__restaurant Michelin Three Star Restaurant; but if the chef is not there
## 4                         __label__knife-skills __label__dicing Without knife skills, how can I quickly and accurately dice vegetables?
## 5                                          __label__storage-method __label__equipment __label__bread What's the purpose of a bread box?
## 6 __label__baking __label__food-safety __label__substitutions __label__peanuts how to seperate peanut oil from roasted peanuts at home?
res_stackexch = unlist(lapply(1:nrow(st_dat), function(y)
  
  strsplit(st_dat[y, ], " ")[[1]][which(sapply(strsplit(st_dat[y, ], " ")[[1]], function(x)
    
    substr(x, 1, 9) == "__label__") == T)])
)
test_label_valid = read.table( paste0(dir_path,Sys.Date(),"_", 'test_label_valid_5.txt'), 
                               quote="\"", comment.char="")
dim(test_label_valid);head(test_label_valid)
## [1] 735  10
##         V1 V2       V3        V4 V5       V6     V7 V8       V9
## 1 F1-Score  : 0.229961 Precision  : 0.141815 Recall  : 0.607639
## 2 F1-Score  : 0.218877 Precision  : 0.127444 Recall  : 0.774590
## 3 F1-Score  : 0.367432 Precision  : 0.310954 Recall  : 0.448980
## 4 F1-Score  : 0.000000 Precision  : -------- Recall  : 0.000000
## 5 F1-Score  : 0.000000 Precision  : -------- Recall  : 0.000000
## 6 F1-Score  : 0.000000 Precision  : -------- Recall  : 0.000000
##                      V10
## 1        __label__baking
## 2   __label__food-safety
## 3 __label__substitutions
## 4     __label__equipment
## 5         __label__bread
## 6       __label__chicken
# number of unique labels of data equal to the rows of the 'test_label_valid.txt' file
length(unique(res_stackexch)) == nrow(test_label_valid)             
## [1] TRUE
head(test_label_valid)
##         V1 V2       V3        V4 V5       V6     V7 V8       V9
## 1 F1-Score  : 0.229961 Precision  : 0.141815 Recall  : 0.607639
## 2 F1-Score  : 0.218877 Precision  : 0.127444 Recall  : 0.774590
## 3 F1-Score  : 0.367432 Precision  : 0.310954 Recall  : 0.448980
## 4 F1-Score  : 0.000000 Precision  : -------- Recall  : 0.000000
## 5 F1-Score  : 0.000000 Precision  : -------- Recall  : 0.000000
## 6 F1-Score  : 0.000000 Precision  : -------- Recall  : 0.000000
##                      V10
## 1        __label__baking
## 2   __label__food-safety
## 3 __label__substitutions
## 4     __label__equipment
## 5         __label__bread
## 6       __label__chicken
#The user can also ‘quantize??? a supervised model to reduce its memory usage with the following command,
# 'quantize' function
#data_1 <- read.delim(grep("model_cooking",file_name,value = T),header = F,stringsAsFactors = F, quote = "")
#dim(data_1); data_1[1:5,]
list_params = list(command = 'quantize',
                   input = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'),
                   output = paste0(dir_path,Sys.Date(),"_", 'model_cooking')) 
list_params 
## $command
## [1] "quantize"
## 
## $input
## [1] "/Users/xut2/Desktop/fastText_data/2022-05-20_model_cooking.bin"
## 
## $output
## [1] "/Users/xut2/Desktop/fastText_data/2022-05-20_model_cooking"
res = fasttext_interface(list_params)
res
## [1] "fasttext"                                                      
## [2] "quantize"                                                      
## [3] "-input"                                                        
## [4] "/Users/xut2/Desktop/fastText_data/2022-05-20_model_cooking.bin"
## [5] "-output"                                                       
## [6] "/Users/xut2/Desktop/fastText_data/2022-05-20_model_cooking"
#print(list.files(dir_path, pattern = '*.ftz', full.names = F, recursive = T))
# print-word-vectors function
list_params = list(command = 'print-word-vectors',
                   model = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'))
list_params
## $command
## [1] "print-word-vectors"
## 
## $model
## [1] "/Users/xut2/Desktop/fastText_data/2022-05-20_model_cooking.bin"
res = fasttext_interface(list_params,
                         path_input = grep("/queries.txt",file_name,value = T),
                         path_output = paste0(dir_path,Sys.Date(),"_", 'word_vecs_queries_6.txt'))

data_i <- read.delim(grep("/queries.txt",file_name,value = T),header = F,stringsAsFactors = F)
dim(data_i); head(data_i)
## [1] 5 1
##       V1
## 1  salt 
## 2   word
## 3 pepper
## 4     do
## 5    not
data_0 <- read.delim(paste0(dir_path,Sys.Date(),"_", 'word_vecs_queries_6.txt'),header = F,stringsAsFactors = F)
dim(data_0); head(data_0)
## [1] 5 1
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    V1
## 1                             salt -0.1255 0.15599 -0.063733 -0.056706 0.064316 0.015561 -0.037703 -0.077345 0.15125 0.15098 -0.028755 0.10873 0.11934 0.14968 -0.053987 -0.039099 -0.076555 0.044171 -0.010716 -0.099138 -0.027962 0.074608 0.092168 -0.10159 -0.044982 -0.064404 0.10958 -0.082442 0.033949 0.12029 -0.065194 0.015164 -0.1047 -0.070342 -0.088541 -0.016293 0.041255 -0.075583 -0.083756 -0.066339 0.064751 0.093882 0.12031 0.0089588 -0.028692 -0.051757 -0.043683 0.055998 -0.089402 -0.020003 
## 2 word 0.019573 -0.0025144 0.0055079 0.010293 -0.0051914 0.014924 0.018109 -0.0084316 -0.017216 0.0032907 -0.0056333 -0.0037829 -0.020543 -0.018398 -0.015694 -0.018159 0.018388 -0.00015707 -0.0016831 0.0037906 0.0093218 0.011967 0.0030731 -0.006125 -0.012557 0.01886 -0.011903 -0.011001 0.00054215 0.012805 -0.013634 -0.017227 0.011511 0.0059908 -0.0032568 0.017379 0.0024809 0.019016 0.017201 0.015748 0.00052414 0.013261 0.015465 -0.0058483 -0.012074 -0.013626 0.015837 0.019197 0.010097 -0.0080901 
## 3    pepper -0.0081992 0.036983 -0.029638 0.0011193 0.029589 0.022429 -0.0089997 -0.0060979 0.02691 0.032926 -0.01408 -0.0105 0.012772 0.0073224 -0.018791 0.015935 0.0073969 0.0023288 0.0030829 0.0042163 0.018184 0.03417 0.010236 -0.03066 -0.015526 0.0013535 -0.0049472 0.0051684 -0.00096683 0.018346 0.014286 -0.0032868 -0.021283 -0.000798 -0.021904 -0.024392 -0.0096988 -0.017696 -0.020491 -0.027901 0.029869 0.019394 0.029545 -7.6811e-05 0.013366 -0.0093097 -0.0072335 -0.013594 -0.032999 0.010379 
## 4                                                     do -0.21367 0.20046 -0.12034 -0.094421 0.17114 0.086503 -0.060959 -0.13046 0.38141 0.36817 -0.14304 0.22006 0.26853 0.17911 -0.17851 -0.034382 -0.12777 0.10159 -0.10397 -0.14898 -0.047099 0.28409 0.17914 -0.18554 -0.016487 -0.19126 0.17579 -0.063955 -0.036835 0.28313 -0.11034 0.045549 -0.16345 -0.20921 -0.27781 -0.039819 0.063025 -0.13556 -0.1697 -0.12788 0.21973 0.33203 0.28053 0.024998 0.025951 -0.10993 0.0026733 -0.066145 -0.22134 0.074525 
## 5                                        not -0.12616 0.15667 -0.11333 -0.038577 0.12162 0.083418 -0.027688 -0.04115 0.22747 0.20353 -0.083234 0.082099 0.13484 0.15922 -0.098326 -0.034565 -0.057983 0.060144 -0.048684 -0.091909 -0.013332 0.19787 0.11619 -0.1299 -0.0048877 -0.11135 0.11225 -0.06165 -0.010358 0.11486 -0.02657 -0.0076293 -0.10934 -0.11277 -0.1585 -0.018458 -0.0038984 -0.059455 -0.10493 -0.067612 0.12349 0.14544 0.13812 0.01769 -0.0085228 -0.038652 -0.02683 0.012572 -0.14683 0.065803
# print-sentence-vectors function
list_params = list(command = 'print-sentence-vectors',
                   model = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'))

res = fasttext_interface(list_params,
                         path_input = grep("text_sentence.txt",file_name,value = T),
                         paste0(dir_path,Sys.Date(),"_", 'word_sentence_queries_7.txt'))
data_i <- read.delim(grep("text_sentence.txt",file_name,value = T),header = F,stringsAsFactors = F)
dim(data_i); head(data_i)
## [1] 5 1
##                                                                  V1
## 1      How much does potato starch affect a cheese sauce recipe</s>
## 2 Dangerous pathogens capable of growing in acidic environments</s>
## 3       How do I cover up the white spots on my cast iron stove</s>
## 4       How do I cover up the white spots on my cast iron stove</s>
## 5   Michelin Three Star Restaurant but if the chef is not there</s>
data_0 <- read.delim(paste0(dir_path,Sys.Date(),"_", 'word_sentence_queries_7.txt'),header = F,stringsAsFactors = F)
dim(data_0); head(data_0)
## [1] 5 1
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                     V1
## 1     -0.22825 0.24177 -0.17461 -0.077236 0.16505 0.067812 -0.029204 -0.076567 0.31997 0.33231 -0.10065 0.13696 0.24512 0.21518 -0.14209 -0.037364 -0.053819 0.090892 -0.038787 -0.097616 -0.054906 0.27077 0.11892 -0.14937 0.010632 -0.1242 0.16426 -0.093122 -0.055677 0.16878 -0.12061 -0.027932 -0.1503 -0.21692 -0.26074 -0.058812 0.038182 -0.12408 -0.13168 -0.11051 0.17071 0.2397 0.2286 0.057338 -0.010729 -0.083604 -0.021546 -0.018641 -0.21971 0.092718 
## 2   -0.27012 0.29163 -0.24371 -0.054296 0.22529 0.13789 -0.034255 -0.066273 0.40267 0.3784 -0.10769 0.16307 0.2632 0.29154 -0.14129 -0.054322 -0.068242 0.088002 -0.0099338 -0.13166 -0.0080961 0.33557 0.14923 -0.24118 -0.0074657 -0.14724 0.22406 -0.12693 -0.043808 0.20871 -0.10105 -0.051181 -0.18322 -0.24508 -0.30689 -0.051461 0.018171 -0.14125 -0.14057 -0.14653 0.21769 0.28052 0.25974 0.063566 -0.0029266 -0.1046 -0.017754 0.0096438 -0.23935 0.096424 
## 3  -0.24324 0.24065 -0.18274 -0.067114 0.19327 0.093977 -0.052701 -0.087012 0.39044 0.36605 -0.1183 0.17973 0.28585 0.23661 -0.16538 -0.058032 -0.089057 0.098124 -0.064789 -0.14791 -0.033792 0.30393 0.15975 -0.19683 0.0025593 -0.16917 0.19628 -0.097588 -0.045504 0.23746 -0.11396 0.0051915 -0.16273 -0.23199 -0.30272 -0.058742 0.035259 -0.15492 -0.14796 -0.13197 0.22107 0.30177 0.25895 0.046912 -0.0057606 -0.10603 -0.023774 -0.017955 -0.23096 0.091592 
## 4  -0.24324 0.24065 -0.18274 -0.067114 0.19327 0.093977 -0.052701 -0.087012 0.39044 0.36605 -0.1183 0.17973 0.28585 0.23661 -0.16538 -0.058032 -0.089057 0.098124 -0.064789 -0.14791 -0.033792 0.30393 0.15975 -0.19683 0.0025593 -0.16917 0.19628 -0.097588 -0.045504 0.23746 -0.11396 0.0051915 -0.16273 -0.23199 -0.30272 -0.058742 0.035259 -0.15492 -0.14796 -0.13197 0.22107 0.30177 0.25895 0.046912 -0.0057606 -0.10603 -0.023774 -0.017955 -0.23096 0.091592 
## 5 -0.17324 0.20263 -0.15854 -0.059153 0.14633 0.066961 -0.042197 -0.04893 0.30194 0.28543 -0.088321 0.13543 0.21976 0.20168 -0.12034 -0.040109 -0.059228 0.077294 -0.027149 -0.12177 -0.025425 0.2401 0.12877 -0.15639 0.0046142 -0.11825 0.14751 -0.087838 -0.043241 0.16546 -0.095735 -0.021828 -0.12783 -0.1722 -0.23412 -0.043671 0.024676 -0.13202 -0.11374 -0.090393 0.17686 0.22704 0.20051 0.03545 -0.0032793 -0.078833 -0.02078 -0.0056645 -0.18179 0.088694
# 'skipgram' function with n-gram enabled
list_params = list(command = 'skipgram', 
                   lr = 0.1,
                   dim = 50,
                   input = grep("example_text.txt",file_name,value = T),
                   output = paste0(dir_path,Sys.Date(),"_", 'word_vectors'),
                   verbose = 2, 
                   thread = 1,
                   minn = 2, 
                   maxn = 2)

res = fasttext_interface(list_params, 
                         path_output = paste0(dir_path,Sys.Date(),"_", 'skipgram_logs_8.txt'),
                         MilliSecs = 5)
## 
Read 0M words
## Number of words:  8
## Number of labels: 0
## 
Progress: 105.5% words/sec/thread:    1897 lr: -0.005537 loss:  4.084502 ETA:   0h 0m
Progress: 100.0% words/sec/thread:    1897 lr:  0.000000 loss:  4.084502 ETA:   0h 0m
data_i <- read.delim(grep("example_text.txt",file_name,value = T),header = F,stringsAsFactors = F)
dim(data_i); head(data_i)
## [1] 5 1
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    V1
## 1 The term planet is ancient, with ties to history, astrology, science, mythology, and religion. Several planets in the Solar System can be seen with the naked eye. These were regarded by many early cultures as divine, or as emissaries of deities. As scientific knowledge advanced, human perception of the planets changed, incorporating a number of disparate objects. In 2006, the International Astronomical Union (IAU) officially adopted a resolution defining planets within the Solar System. This definition is controversial because it excludes many objects of planetary mass based on where or what they orbit. 
## 2                                                                                                                                                                                                                                                           Although eight of the planetary bodies discovered before 1950 remain planets under the modern definition, some celestial bodies, such as Ceres, Pallas, Juno and Vesta (each an object in the solar asteroid belt), and Pluto (the first trans-Neptunian object discovered), that were once considered planets by the scientific community, are no longer viewed as such.
## 3                                                                                                                                                                                                                                                                                               The planets were thought by Ptolemy to orbit Earth in deferent and epicycle motions. Although the idea that the planets orbited the Sun had been suggested many times, it was not until the 17th century that this view was supported by evidence from the first telescopic astronomical observations, performed by Galileo Galilei. 
## 4                                                                             At about the same time, by careful analysis of pre-telescopic observation data collected by Tycho Brahe, Johannes Kepler found the planets orbits were not circular but elliptical. As observational tools improved, astronomers saw that, like Earth, the planets rotated around tilted axes, and some shared such features as ice caps and seasons. Since the dawn of the Space Age, close observation by space probes has found that Earth and the other planets share characteristics such as volcanism, hurricanes, tectonics, and even hydrology.
## 5                                                                                                                                                                                          Planets are generally divided into two main types: large lowdensity giant planets, and smaller rocky terrestrials. Under IAU definitions, there are eight planets in the Solar System. In order of increasing distance from the Sun, they are the four terrestrials, Mercury, Venus, Earth, and Mars, then the four giant planets, Jupiter, Saturn, Uranus, and Neptune. Six of the planets are orbited by one or more natural satellites.
data_0 <- read.delim(paste0(dir_path,Sys.Date(),"_", 'skipgram_logs_8.txt'),header = F,stringsAsFactors = F)
dim(data_0); head(data_0)
## [1] 2 1
##                                                                                        V1
## 1 Progress: 1e+02% words/sec/thread:    1897 lr: -0.00553672 loss:    4.0845 ETA:   0h 0m
## 2   Progress: 1e+02% words/sec/thread:    1897 lr:         0 loss:    4.0845 ETA:   0h 0m
# 'print-ngram' function
list_params = list(command = 'print-ngrams',
                   model = paste0(dir_path,Sys.Date(),"_", 'word_vectors.bin'),
                   word = 'word')

# save output to file
res = fasttext_interface(list_params, 
                         path_output = paste0(dir_path,Sys.Date(),"_", 'ngrams_9.txt'))
data_0 <- read.delim(paste0(dir_path,Sys.Date(),"_", 'ngrams_9.txt'),header = F,stringsAsFactors = F)
dim(data_0); head(data_0)
## [1] 5 1
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            V1
## 1     <w -0.0098269 0.00178 0.019293 0.009935 -0.0033636 -0.010921 -0.0074422 0.0090627 0.017034 -0.011317 -0.0050268 0.0097954 0.0061399 -0.010658 0.0015632 -0.0091288 -0.0073684 0.0020297 -0.017194 -0.014494 -0.0026816 -0.012152 0.012717 0.0072372 -0.0024914 -0.0055023 -0.01498 0.017431 0.0018854 0.015555 0.017535 -0.0050543 0.0048978 0.0060741 -0.0010866 0.0088909 0.015932 -0.016296 -0.0041629 0.016026 0.015111 0.010188 0.0099437 -0.0050831 0.0089823 0.0043541 -0.017571 -0.0066861 0.0076543 0.0081198 
## 2         wo 0.011143 0.015704 0.012938 0.012841 0.00098409 0.018886 -0.015707 0.013251 -0.010995 -0.0032421 -0.0059515 -0.0087225 0.00031082 0.012291 0.019334 0.016656 0.00094482 -0.0012857 0.016194 0.0056833 0.005914 0.018709 -0.019828 -0.012388 -0.007987 -0.0071848 -0.01801 0.017995 -0.018149 -0.0083601 0.014663 -0.0026197 -0.016326 -0.0019464 0.0050893 -0.0019239 -0.0079707 -0.011159 0.016207 -0.019352 -0.0019415 0.016277 -0.010191 -0.014483 0.012371 -0.017579 -0.0026897 0.019553 0.013289 -0.0091467 
## 3 or -0.010347 -0.015846 -0.01671 -0.00042788 -0.016428 0.01629 -0.0088151 0.014814 0.0054929 0.010768 -0.012023 0.0068075 -0.001546 0.0088579 0.018041 -0.0085163 0.018828 -0.0068596 0.0017843 -0.0082543 0.017072 0.012172 -0.011653 0.003049 0.011487 -0.0034104 -0.002449 0.0018191 0.017727 0.015963 -0.0048482 -0.011072 -0.012759 -0.00044858 -0.00088755 -0.000806 -0.0015559 0.0080433 0.0059294 -0.0098861 -0.00062462 0.017488 0.014932 -0.0030576 -0.0097858 -0.011145 -0.018596 -0.014607 -0.010301 -0.0047487 
## 4           rd 0.018402 -0.0037771 -0.014678 0.010876 -0.016157 -0.01098 -0.0051353 -0.0036447 -0.016103 0.013207 0.014478 -0.012411 -0.001341 -0.0091151 0.0017624 0.004272 0.01906 0.0013599 -0.0016341 -0.01951 -0.01794 -0.0041561 0.010877 0.015272 0.018674 -0.013056 -0.0020117 -0.016143 -0.017887 -0.012718 0.0038359 0.0030685 -0.0072444 -0.00030269 -0.009659 0.014794 -0.00048792 -0.018252 0.014603 0.018392 0.0028851 0.016257 0.0074388 -0.0065455 0.011161 0.011287 0.0060946 -0.00020148 0.011262 0.019703 
## 5                d> 0.012683 0.008217 0.0057624 0.0096173 0.0093351 0.015419 -0.016198 -0.011256 -0.017384 0.022771 0.0097221 -0.012594 -0.010828 0.0075007 -0.009891 -0.0060739 0.0072458 -0.0058074 -0.0047083 -0.0015412 0.01332 0.016051 0.0070266 -0.01762 0.010644 -0.015591 -0.0093542 -0.011208 0.010302 -0.020306 0.017566 -0.017068 0.01165 0.0025248 -0.011931 0.0078554 0.0058316 -0.02068 0.00074283 -0.00051274 0.01128 0.015755 0.013824 0.00192 0.017073 -0.015834 -0.0034396 -0.017821 0.00079554 0.0067478
#  print output to console
res = fasttext_interface(list_params, 
                         path_output = "")      
## <w -0.00983 0.00178 0.01929 0.00994 -0.00336 -0.01092 -0.00744 0.00906 0.01703 -0.01132 -0.00503 0.00980 0.00614 -0.01066 0.00156 -0.00913 -0.00737 0.00203 -0.01719 -0.01449 -0.00268 -0.01215 0.01272 0.00724 -0.00249 -0.00550 -0.01498 0.01743 0.00189 0.01556 0.01753 -0.00505 0.00490 0.00607 -0.00109 0.00889 0.01593 -0.01630 -0.00416 0.01603 0.01511 0.01019 0.00994 -0.00508 0.00898 0.00435 -0.01757 -0.00669 0.00765 0.00812 
## wo 0.01114 0.01570 0.01294 0.01284 0.00098 0.01889 -0.01571 0.01325 -0.01100 -0.00324 -0.00595 -0.00872 0.00031 0.01229 0.01933 0.01666 0.00094 -0.00129 0.01619 0.00568 0.00591 0.01871 -0.01983 -0.01239 -0.00799 -0.00718 -0.01801 0.01800 -0.01815 -0.00836 0.01466 -0.00262 -0.01633 -0.00195 0.00509 -0.00192 -0.00797 -0.01116 0.01621 -0.01935 -0.00194 0.01628 -0.01019 -0.01448 0.01237 -0.01758 -0.00269 0.01955 0.01329 -0.00915 
## or -0.01035 -0.01585 -0.01671 -0.00043 -0.01643 0.01629 -0.00882 0.01481 0.00549 0.01077 -0.01202 0.00681 -0.00155 0.00886 0.01804 -0.00852 0.01883 -0.00686 0.00178 -0.00825 0.01707 0.01217 -0.01165 0.00305 0.01149 -0.00341 -0.00245 0.00182 0.01773 0.01596 -0.00485 -0.01107 -0.01276 -0.00045 -0.00089 -0.00081 -0.00156 0.00804 0.00593 -0.00989 -0.00062 0.01749 0.01493 -0.00306 -0.00979 -0.01114 -0.01860 -0.01461 -0.01030 -0.00475 
## rd 0.01840 -0.00378 -0.01468 0.01088 -0.01616 -0.01098 -0.00514 -0.00364 -0.01610 0.01321 0.01448 -0.01241 -0.00134 -0.00912 0.00176 0.00427 0.01906 0.00136 -0.00163 -0.01951 -0.01794 -0.00416 0.01088 0.01527 0.01867 -0.01306 -0.00201 -0.01614 -0.01789 -0.01272 0.00384 0.00307 -0.00724 -0.00030 -0.00966 0.01479 -0.00049 -0.01825 0.01460 0.01839 0.00289 0.01626 0.00744 -0.00655 0.01116 0.01129 0.00609 -0.00020 0.01126 0.01970 
## d> 0.01268 0.00822 0.00576 0.00962 0.00934 0.01542 -0.01620 -0.01126 -0.01738 0.02277 0.00972 -0.01259 -0.01083 0.00750 -0.00989 -0.00607 0.00725 -0.00581 -0.00471 -0.00154 0.01332 0.01605 0.00703 -0.01762 0.01064 -0.01559 -0.00935 -0.01121 0.01030 -0.02031 0.01757 -0.01707 0.01165 0.00252 -0.01193 0.00786 0.00583 -0.02068 0.00074 -0.00051 0.01128 0.01575 0.01382 0.00192 0.01707 -0.01583 -0.00344 -0.01782 0.00080 0.00675
res 
## [1] "fasttext"                                                     
## [2] "print-ngrams"                                                 
## [3] "/Users/xut2/Desktop/fastText_data/2022-05-20_word_vectors.bin"
## [4] "word"
#The command returns the nearest neighbors for a specific word based on the input model,
# 'nn' function
list_params = list(command = 'nn',
                   model = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'),
                   k = 5,
                   query_word = 'sauce')

res = fasttext_interface(list_params, 
                         paste0(dir_path,Sys.Date(),"_", 'nearest_10.txt'))

data_0 <- read.delim(paste0(dir_path,Sys.Date(),"_", 'nearest_10.txt'),header = F,stringsAsFactors = F)
dim(data_0); head(data_0)
## [1] 5 1
##                    V1
## 1     fridge 0.751533
## 2 tenderloin 0.740878
## 3   tomatoes 0.737648
## 4    cooking 0.734361
## 5    fridge? 0.731776
# 'nearest.txt'
#The ‘analogies??? command works for triplets of words (separated by whitespace) and returns ‘k??? rows for each line (triplet) of the input file (separated by an empty line),
# 'analogies' function
list_params = list(command = 'analogies',
                   model = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'),
                   k = 5)

res = fasttext_interface(list_params, 
                         path_input = grep('analogy_queries.txt',file_name,value = T),
                         path_output = paste0(dir_path,Sys.Date(),"_", 'analogies_output-11.txt'))
## Loading model /Users/xut2/Desktop/fastText_data/2022-05-20_model_cooking.bin
data_i <- read.delim(grep("analogy_queries.txt",file_name,value = T),header = F,stringsAsFactors = F)
dim(data_i); head(data_i)
## [1] 4 1
##                     V1
## 1 cheese potato recipe
## 2    beans soup tomato
## 3     bread milk sugar
## 4      salt pepper oil
data_0 <- read.delim(paste0(dir_path,Sys.Date(),"_", 'analogies_output-11.txt'),header = F,stringsAsFactors = F)
dim(data_0); data_0
## [1] 20  1
##                  V1
## 1  recipe? 0.861439
## 2  instead 0.860442
## 3    scones 0.85273
## 4    Using 0.852504
## 5  baking? 0.851962
## 6     have 0.860249
## 7       of 0.859111
## 8    white 0.858004
## 9       as 0.855247
## 10  coffee 0.854399
## 11      my 0.936626
## 12     Why 0.933591
## 13      do 0.925397
## 14     the 0.912933
## 15       My 0.90887
## 16    home 0.825654
## 17  dishes 0.821951
## 18   smell 0.819063
## 19   using 0.817171
## 20   taste 0.814528
#Finally, the ‘dump??? function takes as ‘option??? one of the ‘args???, ‘dict???, ‘input??? or ‘output??? and dumps the output to a text file,
# dump function
list_params = list(command = 'dump',
                   model = paste0(dir_path,Sys.Date(),"_", 'model_cooking.bin'),
                   option = 'args')

res = fasttext_interface(list_params, 
                         path_output = paste0(dir_path,Sys.Date(),"_", 'dump_data_12.txt'),
                         remove_previous_file = TRUE)
data_0 <- read.delim(paste0(dir_path,Sys.Date(),"_", 'dump_data_12.txt'),header = F,stringsAsFactors = F)
dim(data_0); data_0
## [1] 13  1
##                  V1
## 1            dim 50
## 2              ws 5
## 3           epoch 5
## 4        minCount 1
## 5             neg 5
## 6      wordNgrams 1
## 7      loss softmax
## 8         model sup
## 9          bucket 0
## 10           minn 0
## 11           maxn 0
## 12 lrUpdateRate 100
## 13        t 0.00010
#ref https://cran.r-project.org/web/packages/fastText/vignettes/the_fastText_R_package.html