https://github.com/cytomining/cytominer/pull/118 implements cytostats::score_features_sv_entropy below.

library(tidyverse)
library(foreach)
library(doParallel)
library(cytostats)
registerDoParallel(cores = 1)

Reimplement in R

singular_value_entropy_r <- function(A) {
  # calculate the svd
  singular_values <- svd(A, 0, 0)$d
  
  # normalize relative values
  singular_values <- singular_values/sum(singular_values)
  
  # no need to remove negative sv's because the input to score_features_sv_entropy_r will 
  # always be crossprod(A, A) which is PSD
  
  -sum(singular_values * log10(singular_values))
  
}
score_features_sv_entropy_r <- function(data) {
  
  # for each feature calculate the contribution to the entropy by leaving that feature out
  sv_entropy <- 
    foreach(i = 1:ncol(data), .combine = c) %dopar% singular_value_entropy_r(data[-i, -i])
  
  singular_value_entropy_r(data) - sv_entropy
}

Verify that the R implementation is identical to the Rcpp

n <- 10000
k <- 100
A <- matrix(rnorm(n * k), n, k)
testthat::expect_equal(score_features_sv_entropy_r(crossprod(A, A)), 
                       cytostats::score_features_sv_entropy(crossprod(A, A)))

Profile the code over a range of values for k

profile_timings <- function(f) {
  n <- 10000
  
  timings <- 
    seq(10, 150, 5) %>%
    map_df(function(k) {
      A <- matrix(rnorm(n * k), n, k)
      
      te <- 
        rbenchmark::benchmark(
          f(crossprod(A, A)),
          replications = 3) %>%
        magrittr::extract2("elapsed")
      
      data_frame(k, te) 
      
    })
  
  #  because this is O(k^2)
  m <- lm(te ~ poly(k, 2), timings)
  
  print(timings %>%  ggplot(aes(k, te)) + geom_point() + geom_smooth(method = "loess") + xlab("Dimensions") + ylab("Elapsed time (s)"))
  print(summary(m))
  
  timings_estimated <- data_frame(k = c(200, 500, 1000, 1500))
                        
  timings_estimated$te <- predict(m, timings_estimated)  
  
  print(timings_estimated %>%  ggplot(aes(k, te)) + geom_line() +  xlab("Dimensions") + ylab("Elapsed time, estimated (s)"))
}

Rcpp implementation

profile_timings(f = score_features_sv_entropy)

Call:
lm(formula = te ~ poly(k, 2), data = timings)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.098265 -0.017064  0.007229  0.016823  0.129679 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.210931   0.007221   29.21  < 2e-16 ***
poly(k, 2)1 1.149138   0.038885   29.55  < 2e-16 ***
poly(k, 2)2 0.513715   0.038885   13.21 4.83e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.03888 on 26 degrees of freedom
Multiple R-squared:  0.9758,    Adjusted R-squared:  0.9739 
F-statistic: 523.9 on 2 and 26 DF,  p-value: < 2.2e-16

R implementation

profile_timings(f = score_features_sv_entropy_r)

Call:
lm(formula = te ~ poly(k, 2), data = timings)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.062419 -0.015226  0.003148  0.018015  0.073077 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.307000   0.005873   52.27   <2e-16 ***
poly(k, 2)1 1.555503   0.031626   49.18   <2e-16 ***
poly(k, 2)2 0.664002   0.031626   21.00   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.03163 on 26 degrees of freedom
Multiple R-squared:  0.991, Adjusted R-squared:  0.9903 
F-statistic:  1430 on 2 and 26 DF,  p-value: < 2.2e-16

R implementation (4 cores)

registerDoParallel(cores = 4)
profile_timings(f = score_features_sv_entropy_r)

Call:
lm(formula = te ~ poly(k, 2), data = timings)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.037804 -0.018163  0.001008  0.011448  0.055553 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.217103   0.003788   57.32  < 2e-16 ***
poly(k, 2)1 0.666822   0.020397   32.69  < 2e-16 ***
poly(k, 2)2 0.243456   0.020397   11.94 4.72e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0204 on 26 degrees of freedom
Multiple R-squared:  0.979, Adjusted R-squared:  0.9774 
F-statistic: 605.6 on 2 and 26 DF,  p-value: < 2.2e-16

LS0tCnRpdGxlOiAiRXZhbHVhdGluZyBwZXJmb3JtYW5jZSBvZiBTVkQgZW50cm9weSBmZWF0dXJlIHNlbGVjdGlvbiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmh0dHBzOi8vZ2l0aHViLmNvbS9jeXRvbWluaW5nL2N5dG9taW5lci9wdWxsLzExOCBpbXBsZW1lbnRzIGBjeXRvc3RhdHM6OnNjb3JlX2ZlYXR1cmVzX3N2X2VudHJvcHlgIGJlbG93LgoKYGBge3IgbWVzc2FnZT1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZm9yZWFjaCkKbGlicmFyeShkb1BhcmFsbGVsKQpsaWJyYXJ5KGN5dG9zdGF0cykKcmVnaXN0ZXJEb1BhcmFsbGVsKGNvcmVzID0gMSkKYGBgCgoKUmVpbXBsZW1lbnQgaW4gUgpgYGB7cn0Kc2luZ3VsYXJfdmFsdWVfZW50cm9weV9yIDwtIGZ1bmN0aW9uKEEpIHsKICAjIGNhbGN1bGF0ZSB0aGUgc3ZkCiAgc2luZ3VsYXJfdmFsdWVzIDwtIHN2ZChBLCAwLCAwKSRkCiAgCiAgIyBub3JtYWxpemUgcmVsYXRpdmUgdmFsdWVzCiAgc2luZ3VsYXJfdmFsdWVzIDwtIHNpbmd1bGFyX3ZhbHVlcy9zdW0oc2luZ3VsYXJfdmFsdWVzKQogIAogICMgbm8gbmVlZCB0byByZW1vdmUgbmVnYXRpdmUgc3YncyBiZWNhdXNlIHRoZSBpbnB1dCB0byBzY29yZV9mZWF0dXJlc19zdl9lbnRyb3B5X3Igd2lsbCAKICAjIGFsd2F5cyBiZSBjcm9zc3Byb2QoQSwgQSkgd2hpY2ggaXMgUFNECiAgCiAgLXN1bShzaW5ndWxhcl92YWx1ZXMgKiBsb2cxMChzaW5ndWxhcl92YWx1ZXMpKQogIAp9CgpzY29yZV9mZWF0dXJlc19zdl9lbnRyb3B5X3IgPC0gZnVuY3Rpb24oZGF0YSkgewogIAogICMgZm9yIGVhY2ggZmVhdHVyZSBjYWxjdWxhdGUgdGhlIGNvbnRyaWJ1dGlvbiB0byB0aGUgZW50cm9weSBieSBsZWF2aW5nIHRoYXQgZmVhdHVyZSBvdXQKICBzdl9lbnRyb3B5IDwtIAogICAgZm9yZWFjaChpID0gMTpuY29sKGRhdGEpLCAuY29tYmluZSA9IGMpICVkb3BhciUgc2luZ3VsYXJfdmFsdWVfZW50cm9weV9yKGRhdGFbLWksIC1pXSkKICAKICBzaW5ndWxhcl92YWx1ZV9lbnRyb3B5X3IoZGF0YSkgLSBzdl9lbnRyb3B5Cn0KYGBgCgpWZXJpZnkgdGhhdCB0aGUgUiBpbXBsZW1lbnRhdGlvbiBpcyBpZGVudGljYWwgdG8gdGhlIFJjcHAKCmBgYHtyfQpuIDwtIDEwMDAwCmsgPC0gMTAwCkEgPC0gbWF0cml4KHJub3JtKG4gKiBrKSwgbiwgaykKdGVzdHRoYXQ6OmV4cGVjdF9lcXVhbChzY29yZV9mZWF0dXJlc19zdl9lbnRyb3B5X3IoY3Jvc3Nwcm9kKEEsIEEpKSwgCiAgICAgICAgICAgICAgICAgICAgICAgY3l0b3N0YXRzOjpzY29yZV9mZWF0dXJlc19zdl9lbnRyb3B5KGNyb3NzcHJvZChBLCBBKSkpCmBgYAoKClByb2ZpbGUgdGhlIGNvZGUgb3ZlciBhIHJhbmdlIG9mIHZhbHVlcyBmb3IgawoKYGBge3J9Cgpwcm9maWxlX3RpbWluZ3MgPC0gZnVuY3Rpb24oZikgewogIG4gPC0gMTAwMDAKICAKICB0aW1pbmdzIDwtIAogICAgc2VxKDEwLCAxNTAsIDUpICU+JQogICAgbWFwX2RmKGZ1bmN0aW9uKGspIHsKICAgICAgQSA8LSBtYXRyaXgocm5vcm0obiAqIGspLCBuLCBrKQogICAgICAKICAgICAgdGUgPC0gCiAgICAgICAgcmJlbmNobWFyazo6YmVuY2htYXJrKAogICAgICAgICAgZihjcm9zc3Byb2QoQSwgQSkpLAogICAgICAgICAgcmVwbGljYXRpb25zID0gMykgJT4lCiAgICAgICAgbWFncml0dHI6OmV4dHJhY3QyKCJlbGFwc2VkIikKICAgICAgCiAgICAgIGRhdGFfZnJhbWUoaywgdGUpIAogICAgICAKICAgIH0pCiAgCiAgIyAgYmVjYXVzZSB0aGlzIGlzIE8oa14yKQogIG0gPC0gbG0odGUgfiBwb2x5KGssIDIpLCB0aW1pbmdzKQogIAogIHByaW50KHRpbWluZ3MgJT4lICBnZ3Bsb3QoYWVzKGssIHRlKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aChtZXRob2QgPSAibG9lc3MiKSArIHhsYWIoIkRpbWVuc2lvbnMiKSArIHlsYWIoIkVsYXBzZWQgdGltZSAocykiKSkKCiAgcHJpbnQoc3VtbWFyeShtKSkKICAKICB0aW1pbmdzX2VzdGltYXRlZCA8LSBkYXRhX2ZyYW1lKGsgPSBjKDIwMCwgNTAwLCAxMDAwLCAxNTAwKSkKICAgICAgICAgICAgICAgICAgICAgICAgCiAgdGltaW5nc19lc3RpbWF0ZWQkdGUgPC0gcHJlZGljdChtLCB0aW1pbmdzX2VzdGltYXRlZCkgIAogIAogIHByaW50KHRpbWluZ3NfZXN0aW1hdGVkICU+JSAgZ2dwbG90KGFlcyhrLCB0ZSkpICsgZ2VvbV9saW5lKCkgKyAgeGxhYigiRGltZW5zaW9ucyIpICsgeWxhYigiRWxhcHNlZCB0aW1lLCBlc3RpbWF0ZWQgKHMpIikpCn0KYGBgCgpSY3BwIGltcGxlbWVudGF0aW9uCgpgYGB7cn0KcHJvZmlsZV90aW1pbmdzKGYgPSBzY29yZV9mZWF0dXJlc19zdl9lbnRyb3B5KQpgYGAKClIgaW1wbGVtZW50YXRpb24KYGBge3J9CnByb2ZpbGVfdGltaW5ncyhmID0gc2NvcmVfZmVhdHVyZXNfc3ZfZW50cm9weV9yKQpgYGAKClIgaW1wbGVtZW50YXRpb24gKDQgY29yZXMpCmBgYHtyfQpyZWdpc3RlckRvUGFyYWxsZWwoY29yZXMgPSA0KQpwcm9maWxlX3RpbWluZ3MoZiA9IHNjb3JlX2ZlYXR1cmVzX3N2X2VudHJvcHlfcikKYGBgCgoK