Marc Borowczak
2015-08-17
skip_ngram2C_Level3 <- cmpfun( function( x, n = 1L, skip = 0L, cflag = 0L) { library( magrittr ), library( stringi ) } , options = list( optimize = 3) )
# 2-step process to cleanup repetitions of words of 2 characters or more
names( y ) %<>% gsub( "(?:\\b(\\w+)\\b)(?:\\1(?:|$))+", '*', ., ignore.case = TRUE, perl = TRUE, useBytes = TRUE )
if (n > 2) { names( y ) %<>% gsub( "^(\\*.*)|(.*\\*.*)|(.*\\*)$", '*', ., ignore.case = TRUE, perl = TRUE, useBytes = TRUE ) }
Katz's Backoff (s = 0.4)
if (n > 1) { z <- sapply( 1:n, function(i) { if (i > 1) { s^(n-i) * z[[i]] / sum( z[[i]] ) } else { s^(n-i) * z[[i]] / sum( Gram[[i]] )} } )
New Words Learning
if (n == 0) { newword <<- c( tail( w, 1), newword) ; Gram[[1]] <<- c( Gram[[1]], setNames( 1L, tail( w, 1 ) ) )
Session Statistics
Regex grep 'space' waiting
observe( { validate( need( (grep ("((\\b|[[:punct:]]|\\')+)$", input$id1, ignore.case = TRUE, perl = TRUE, value = FALSE, useBytes = TRUE) > 0), messsage = "waiting for space ...", output$oid1 ) )
ObserveEvent list filling
observeEvent( input$nwordInput,{ updateTextInput( session, inputId = "id1", value = paste0( input$id1, input$nwordInput, " ") ) } )
Microbenchmark Timing
output$newPlot <- renderPlot({ autoplot( microbenchmark( GOOGLE, SGT), times = 1 ) } )