IVF PQ Build Times

Set up

library(readr)
library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)

f <- function(dd,fnam,sub){
  m1<-lm(time~nlist+pq_bits+pq_dim,dd)
  print(summary(m1))
  
  m2<-lm(time~factor(nlist)+factor(pq_dim)+factor(pq_bits),dd)
  m3<-lm(time~(factor(nlist)+factor(pq_dim)+factor(pq_bits))^2,dd)
  print(anova(m3,m2))
  print(summary(m2))
  
  tapply(dd$time,dd$nlist,mean) |> barplot(main=fnam, xlab="nlist", cex.main = 0.75, sub=sub)
  tapply(dd$time,dd$pq_dim,mean) |> barplot(main=fnam, xlab="pq_dim", cex.main = 0.75, sub=sub)
  tapply(dd$time,dd$pq_bits,mean) |> barplot(main=fnam, xlab="pq_bits", cex.main = 0.75, sub=sub)
}

Summary

fnams<-c(deepk100="h100/deep-100M-k100-bs10/build/deep-100M,H100,k=100,bs=10 - build_raft_ivf_pq.csv",
         deepk10="h100/deep-100M-k10-bs10/build/deep-100M,H100,k=10,bs=10 - build_raft_ivf_pq.csv",
         wiki="a10g/wiki_all_1M/result/build/raft_ivf_pq,base.csv",
         sift="a10g/sift-128-euclidean/result/build/raft_ivf_pq,base.csv",
         mistral="a10g/mistral_synthetic/result/build/raft_ivf_pq,base.csv")

alldat<-lapply(fnams,read_csv) |> bind_rows(.id="id")
Rows: 64 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 48 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 64 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 61 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 21 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tabdat<-alldat |>
  filter(!(nlist==1024&id=="deepk100")) |>
  group_by(id, nlist, ratio) |>
  summarize(time=mean(time,na.rm=TRUE)) |>
  arrange(ratio,id,nlist) |>
  mutate(ratio=as.character(ratio))
`summarise()` has grouped output by 'id', 'nlist'. You can override using the
`.groups` argument.
  #pivot_wider(names_from=ratio, values_from=time)

ggplot(tabdat,aes(x=log2(nlist),y=log2(time),col=ratio,group=interaction(id,ratio))) + 
  geom_abline(intercept=-10,slope=1,col="white") +
  geom_line() +
  geom_point() +
  ggtitle("Build Time vs nlist") +
  labs(x="Log2 nlist",y="Log2 Seconds")

ind<-c(1:32)[-c(1,4,7,9,13,17,20,23,25,29)]
pctdat<-tibble(tabdat[ind,1:3],round(100*tabdat[ind,4]/tabdat[ind-1,4]-100,1))
ggplot(pctdat,aes(x=log10(nlist),y=time,col=ratio,group=interaction(id,ratio))) + 
  geom_line()

Deep 100M k100 bs10

fnam <- "h100/deep-100M-k100-bs10/build/deep-100M,H100,k=100,bs=10 - build_raft_ivf_pq.csv"
dat <- read_csv(fnam)
Rows: 64 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot(jitter(dat$nlist),dat$time, xlab="nlist", ylab="time") |> title(main=fnam,cex.main=0.75)

dat<-dat[dat$nlist!=1024,]
tapply(dat$time,list(dat$nlist,dat$ratio),mean)
           10       25
2048 36.17847 22.65618
4096 40.12043 24.48505
8192 43.46055 28.15423
f(dat[dat$ratio==25,],fnam,"ratio=25")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.28308 -0.79766  0.05456  0.73279  2.68767 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.7355088  1.4400903  -0.511    0.615    
nlist        0.0008950  0.0001054   8.492 4.58e-08 ***
pq_bits      1.4155993  0.1820222   7.777 1.80e-07 ***
pq_dim       0.2795291  0.0168259  16.613 3.61e-13 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.319 on 20 degrees of freedom
Multiple R-squared:  0.9533,    Adjusted R-squared:  0.9463 
F-statistic: 136.2 on 3 and 20 DF,  p-value: 1.773e-13

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df     RSS  Df Sum of Sq      F Pr(>F)
1      6  3.3528                            
2     17 19.0744 -11   -15.722 2.5577 0.1301

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.7766 -0.5594 -0.1768  0.7109  2.0815 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        16.1401     0.5721  28.214 1.02e-15 ***
factor(nlist)4096   1.8289     0.5296   3.453  0.00304 ** 
factor(nlist)8192   5.4981     0.5296  10.381 8.93e-09 ***
factor(pq_dim)64    8.9449     0.4324  20.685 1.73e-13 ***
factor(pq_bits)5    1.3318     0.6116   2.178  0.04380 *  
factor(pq_bits)6    1.0054     0.6116   1.644  0.11856    
factor(pq_bits)8    5.8373     0.6116   9.545 3.05e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.059 on 17 degrees of freedom
Multiple R-squared:  0.9744,    Adjusted R-squared:  0.9654 
F-statistic: 107.9 on 6 and 17 DF,  p-value: 1.399e-12

f(dat[dat$ratio==10,],fnam,"ratio=10")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.0447 -0.7162 -0.2275  1.1423  2.9985 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -4.5242567  1.7135929  -2.640   0.0157 *  
nlist        0.0011324  0.0001254   9.029 1.71e-08 ***
pq_bits      1.6441523  0.2165919   7.591 2.60e-07 ***
pq_dim       0.6162248  0.0200215  30.778  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.569 on 20 degrees of freedom
Multiple R-squared:  0.9819,    Adjusted R-squared:  0.9792 
F-statistic: 362.2 on 3 and 20 DF,  p-value: < 2.2e-16

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df     RSS  Df Sum of Sq      F  Pr(>F)  
1      6  1.8164                               
2     17 18.6557 -11   -16.839 5.0567 0.02942 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.3585 -0.5910 -0.1042  0.3047  1.9565 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        24.0658     0.5657  42.538  < 2e-16 ***
factor(nlist)4096   3.9420     0.5238   7.526 8.30e-07 ***
factor(nlist)8192   7.2821     0.5238  13.903 1.03e-10 ***
factor(pq_dim)64   19.7192     0.4277  46.109  < 2e-16 ***
factor(pq_bits)5    1.1813     0.6048   1.953   0.0675 .  
factor(pq_bits)6    1.1736     0.6048   1.941   0.0691 .  
factor(pq_bits)8    6.6573     0.6048  11.007 3.73e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.048 on 17 degrees of freedom
Multiple R-squared:  0.9932,    Adjusted R-squared:  0.9907 
F-statistic:   411 on 6 and 17 DF,  p-value: < 2.2e-16

Deep 100M k10 bs10

fnam <- "h100/deep-100M-k10-bs10/build/deep-100M,H100,k=10,bs=10 - build_raft_ivf_pq.csv"
dat <- read_csv(fnam)
Rows: 48 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
f(dat[dat$ratio==25,],fnam,"ratio=25")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.0663 -2.7921 -0.5567  0.6360 13.6207 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 4.688e+00  4.979e+00   0.942    0.358    
nlist       8.845e-04  1.492e-05  59.262  < 2e-16 ***
pq_bits     9.568e-01  6.293e-01   1.520    0.144    
pq_dim      3.176e-01  5.817e-02   5.460 2.41e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.56 on 20 degrees of freedom
Multiple R-squared:  0.9944,    Adjusted R-squared:  0.9935 
F-statistic:  1181 on 3 and 20 DF,  p-value: < 2.2e-16

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df     RSS  Df Sum of Sq      F Pr(>F)
1      6  68.504                            
2     17 187.637 -11   -119.13 0.9486 0.5568

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.2314 -1.8425  0.4141  1.4580  9.0718 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)         60.2590     1.7942  33.585  < 2e-16 ***
factor(nlist)1e+05  51.2720     1.6611  30.866 2.27e-16 ***
factor(nlist)2e+05 134.0820     1.6611  80.717  < 2e-16 ***
factor(pq_dim)64    10.1627     1.3563   7.493 8.80e-07 ***
factor(pq_bits)5     1.1008     1.9181   0.574   0.5736    
factor(pq_bits)6     0.9049     1.9181   0.472   0.6431    
factor(pq_bits)8     3.9873     1.9181   2.079   0.0531 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.322 on 17 degrees of freedom
Multiple R-squared:  0.9975,    Adjusted R-squared:  0.9966 
F-statistic:  1116 on 6 and 17 DF,  p-value: < 2.2e-16

f(dat[dat$ratio==10,],fnam,"ratio=10")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
   Min     1Q Median     3Q    Max 
-5.956 -2.452 -1.498  1.014 12.709 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 5.1967811  5.1042068   1.018   0.3208    
nlist       0.0010484  0.0000153  68.517  < 2e-16 ***
pq_bits     1.5136234  0.6451531   2.346   0.0294 *  
pq_dim      0.6078328  0.0596371  10.192 2.29e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.675 on 20 degrees of freedom
Multiple R-squared:  0.9959,    Adjusted R-squared:  0.9952 
F-statistic:  1601 on 3 and 20 DF,  p-value: < 2.2e-16

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df    RSS  Df Sum of Sq      F Pr(>F)
1      6 142.39                            
2     17 240.49 -11   -98.104 0.3758 0.9241

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.4453 -1.9510 -0.4881  1.4497  8.6353 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)         81.7010     2.0313  40.222  < 2e-16 ***
factor(nlist)1e+05  57.9460     1.8806  30.813 2.34e-16 ***
factor(nlist)2e+05 158.3636     1.8806  84.210  < 2e-16 ***
factor(pq_dim)64    19.4506     1.5355  12.667 4.38e-10 ***
factor(pq_bits)5     1.4814     2.1715   0.682  0.50431    
factor(pq_bits)6    -0.4807     2.1715  -0.221  0.82744    
factor(pq_bits)8     6.4335     2.1715   2.963  0.00872 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.761 on 17 degrees of freedom
Multiple R-squared:  0.9977,    Adjusted R-squared:  0.9969 
F-statistic:  1239 on 6 and 17 DF,  p-value: < 2.2e-16

Wiki All 1M

fnam <- "a10g/wiki_all_1M/result/build/raft_ivf_pq,base.csv"
dat <- read_csv(fnam)
Rows: 64 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
f(dat[dat$ratio==25,],fnam,"ratio=25")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.12424 -0.04840  0.01840  0.05831  0.09975 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 4.642e-01  6.567e-02   7.068 1.09e-07 ***
nlist       2.343e-04  4.598e-06  50.955  < 2e-16 ***
pq_bits     8.435e-02  8.534e-03   9.884 1.24e-10 ***
pq_dim      5.702e-03  7.889e-04   7.229 7.21e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.0714 on 28 degrees of freedom
Multiple R-squared:  0.9899,    Adjusted R-squared:  0.9888 
F-statistic: 915.5 on 3 and 28 DF,  p-value: < 2.2e-16

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df       RSS  Df  Sum of Sq      F Pr(>F)
1      9 0.0011307                             
2     24 0.0053017 -15 -0.0041709 2.2133 0.1152

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.02084 -0.01069 -0.00248  0.00853  0.03706 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)        1.270380   0.007431 170.948   <2e-16 ***
factor(nlist)2048  0.261966   0.007431  35.251   <2e-16 ***
factor(nlist)4096  0.765539   0.007431 103.014   <2e-16 ***
factor(nlist)8192  1.685642   0.007431 226.827   <2e-16 ***
factor(pq_dim)64   0.182478   0.005255  34.726   <2e-16 ***
factor(pq_bits)5  -0.006566   0.007431  -0.884    0.386    
factor(pq_bits)6   0.012464   0.007431   1.677    0.106    
factor(pq_bits)8   0.324446   0.007431  43.659   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.01486 on 24 degrees of freedom
Multiple R-squared:  0.9996,    Adjusted R-squared:  0.9995 
F-statistic:  9143 on 7 and 24 DF,  p-value: < 2.2e-16

f(dat[dat$ratio==10,],fnam,"ratio=10")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.17132 -0.07851 -0.00766  0.07367  0.53658 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 6.950e-01  1.255e-01   5.536 6.42e-06 ***
nlist       2.697e-04  8.788e-06  30.687  < 2e-16 ***
pq_bits     8.509e-02  1.631e-02   5.216 1.54e-05 ***
pq_dim      1.262e-02  1.508e-03   8.367 4.21e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1365 on 28 degrees of freedom
Multiple R-squared:  0.9738,    Adjusted R-squared:  0.9709 
F-statistic: 346.3 on 3 and 28 DF,  p-value: < 2.2e-16

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df      RSS  Df Sum of Sq      F Pr(>F)
1      9 0.098824                            
2     24 0.243325 -15   -0.1445 0.8773  0.605

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.10615 -0.04673 -0.00208  0.02411  0.41818 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        1.84717    0.05035  36.690  < 2e-16 ***
factor(nlist)2048  0.18589    0.05035   3.692  0.00114 ** 
factor(nlist)4096  0.76698    0.05035  15.235 7.74e-14 ***
factor(nlist)8192  1.89957    0.05035  37.731  < 2e-16 ***
factor(pq_dim)64   0.40375    0.03560  11.341 3.98e-11 ***
factor(pq_bits)5  -0.02751    0.05035  -0.547  0.58976    
factor(pq_bits)6  -0.04601    0.05035  -0.914  0.36983    
factor(pq_bits)8   0.32686    0.05035   6.492 1.03e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1007 on 24 degrees of freedom
Multiple R-squared:  0.9878,    Adjusted R-squared:  0.9842 
F-statistic: 276.6 on 7 and 24 DF,  p-value: < 2.2e-16

Sift 128 Euclidean

fnam <- "a10g/sift-128-euclidean/result/build/raft_ivf_pq,base.csv"
dat <- read_csv(fnam)
Rows: 61 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
f(dat[dat$ratio==25,],fnam,"ratio=25")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.028071 -0.010301  0.003018  0.013393  0.026537 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 5.183e-02  1.705e-02   3.039   0.0055 ** 
nlist       7.424e-05  1.121e-06  66.221  < 2e-16 ***
pq_bits     1.886e-02  2.010e-03   9.385 1.14e-09 ***
pq_dim      7.799e-03  1.918e-04  40.669  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.01628 on 25 degrees of freedom
Multiple R-squared:  0.9955,    Adjusted R-squared:  0.9949 
F-statistic:  1824 on 3 and 25 DF,  p-value: < 2.2e-16

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df       RSS  Df  Sum of Sq      F    Pr(>F)    
1      6 0.0000363                                    
2     21 0.0045580 -15 -0.0045217 49.783 4.917e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.018898 -0.012924  0.002513  0.009576  0.021932 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)       0.435392   0.009645  45.143  < 2e-16 ***
factor(nlist)2048 0.088130   0.008638  10.203 1.36e-09 ***
factor(nlist)4096 0.250169   0.008638  28.962  < 2e-16 ***
factor(nlist)8192 0.538890   0.008638  62.388  < 2e-16 ***
factor(pq_dim)64  0.251911   0.005676  44.381  < 2e-16 ***
factor(pq_bits)5  0.029015   0.007875   3.684  0.00138 ** 
factor(pq_bits)6  0.042578   0.007875   5.407 2.31e-05 ***
factor(pq_bits)8  0.080135   0.007688  10.423 9.33e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.01473 on 21 degrees of freedom
Multiple R-squared:  0.9969,    Adjusted R-squared:  0.9958 
F-statistic: 956.3 on 7 and 21 DF,  p-value: < 2.2e-16

f(dat[dat$ratio==10,],fnam,"ratio=10")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.09329 -0.04351 -0.01738  0.00768  0.48621 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.715e-01  9.555e-02  -1.795 0.083467 .  
nlist        7.455e-05  6.689e-06  11.144 8.36e-12 ***
pq_bits      5.202e-02  1.242e-02   4.190 0.000252 ***
pq_dim       1.768e-02  1.148e-03  15.407 3.34e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1039 on 28 degrees of freedom
Multiple R-squared:  0.9312,    Adjusted R-squared:  0.9239 
F-statistic: 126.4 on 3 and 28 DF,  p-value: 2.221e-16

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df     RSS  Df Sum of Sq      F Pr(>F)
1      9 0.10527                            
2     24 0.27241 -15  -0.16714 0.9527 0.5517

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.11670 -0.04953  0.00262  0.02053  0.43734 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        0.72701    0.05327  13.648 8.33e-13 ***
factor(nlist)2048  0.01367    0.05327   0.257 0.799609    
factor(nlist)4096  0.17844    0.05327   3.350 0.002666 ** 
factor(nlist)8192  0.51153    0.05327   9.603 1.08e-09 ***
factor(pq_dim)64   0.56587    0.03767  15.023 1.05e-13 ***
factor(pq_bits)5   0.03914    0.05327   0.735 0.469622    
factor(pq_bits)6   0.05951    0.05327   1.117 0.275001    
factor(pq_bits)8   0.20875    0.05327   3.919 0.000647 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1065 on 24 degrees of freedom
Multiple R-squared:  0.938, Adjusted R-squared:  0.9199 
F-statistic: 51.87 on 7 and 24 DF,  p-value: 5.857e-13

Mistral Synthetic

fnam <- "a10g/mistral_synthetic/result/build/raft_ivf_pq,base.csv"
dat <- read_csv(fnam)
Rows: 21 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): algo_name, index_name
dbl (9): time, threads, cpu_time, GPU, niter, nlist, pq_bits, pq_dim, ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
f(dat[dat$ratio==25,],fnam,"ratio=25")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73696 -0.33772 -0.09933  0.31902  0.89036 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 5.1255192  0.9332102   5.492 0.000914 ***
nlist       0.0009423  0.0001125   8.374  6.8e-05 ***
pq_bits     0.5786647  0.1334009   4.338 0.003405 ** 
pq_dim      0.0091787  0.0130170   0.705 0.503507    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5891 on 7 degrees of freedom
Multiple R-squared:  0.9534,    Adjusted R-squared:  0.9335 
F-statistic: 47.77 on 3 and 7 DF,  p-value: 4.983e-05

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df    RSS Df Sum of Sq   F Pr(>F)
1      0 0.0000                        
2      5 1.8596 -5   -1.8596 NaN    NaN

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
       1        2        3        4        5        6        7        8 
 0.90893 -0.45727 -0.45166  0.01629 -0.45826  0.22410  0.21787 -0.01629 
       9       10       11 
-0.45067  0.23317  0.23379 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)       1.189e+01  4.546e-01  26.162 1.52e-06 ***
factor(nlist)8192 3.807e+00  4.821e-01   7.896 0.000524 ***
factor(pq_dim)64  2.937e-01  4.312e-01   0.681 0.526056    
factor(pq_bits)5  7.139e-04  4.979e-01   0.001 0.998911    
factor(pq_bits)6  8.405e-01  4.979e-01   1.688 0.152227    
factor(pq_bits)8  2.228e+00  5.750e-01   3.874 0.011709 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6099 on 5 degrees of freedom
Multiple R-squared:  0.9643,    Adjusted R-squared:  0.9287 
F-statistic: 27.05 on 5 and 5 DF,  p-value: 0.001255

f(dat[dat$ratio==10,],fnam,"ratio=10")

Call:
lm(formula = time ~ nlist + pq_bits + pq_dim, data = dd)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.55063 -0.23947  0.05224  0.34204  0.37335 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 5.0389453  0.7645348   6.591 0.000586 ***
nlist       0.0013413  0.0001014  13.231 1.15e-05 ***
pq_bits     0.5116543  0.1069908   4.782 0.003056 ** 
pq_dim      0.0195855  0.0100304   1.953 0.098700 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4539 on 6 degrees of freedom
Multiple R-squared:  0.9831,    Adjusted R-squared:  0.9747 
F-statistic: 116.5 on 3 and 6 DF,  p-value: 1.046e-05

Analysis of Variance Table

Model 1: time ~ (factor(nlist) + factor(pq_dim) + factor(pq_bits))^2
Model 2: time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits)
  Res.Df       RSS Df  Sum of Sq   F Pr(>F)
1      0 0.0000000                         
2      4 0.0040451 -4 -0.0040451 NaN    NaN

Call:
lm(formula = time ~ factor(nlist) + factor(pq_dim) + factor(pq_bits), 
    data = dd)

Residuals:
        1         2         3         4         5         6         7         8 
 0.003665 -0.003665  0.035091 -0.022965 -0.017118  0.004993 -0.035091  0.022965 
        9        10 
 0.013453 -0.001327 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)       13.46793    0.02597 518.694 8.29e-11 ***
factor(nlist)8192  5.58871    0.02975 187.877 4.81e-09 ***
factor(pq_dim)64   0.62674    0.02249  27.872 9.86e-06 ***
factor(pq_bits)5  -0.01158    0.02597  -0.446   0.6787    
factor(pq_bits)6   0.13933    0.03045   4.576   0.0102 *  
factor(pq_bits)8   2.02856    0.03045  66.627 3.04e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.0318 on 4 degrees of freedom
Multiple R-squared:  0.9999,    Adjusted R-squared:  0.9999 
F-statistic: 1.448e+04 on 5 and 4 DF,  p-value: 1.335e-08