require(lmtest)
require(ggplot2)
require(ggExtra)
require(ggthemes)
require(ggpubr)
require(hrbrthemes)
require(extrafont)
require(gridExtra)
require(ggrepel)
require(lawstat)
require(nortest)
require(NormalLaplace)
theme_set(theme_ipsum(base_family = "Times New Roman", 
              base_size = 10, axis_title_size = 10))

The study of the influence of parameters

To this analysis we used the data set with 16.000 samples, applying all possible configurations with \(D \in \{3, 4, 5, 6\}\) and \(\tau \in \{1, 2, 3, 4\}\) to the descriptors.

HC.BP = data.frame("H" = numeric(16000), 
                   "C" = numeric(16000),
                   "Dist" = numeric(16000),
                   "D" = numeric(16000),
                   "t" = numeric(16000), 
                   "N" = numeric(16000), 
                   stringsAsFactors=FALSE)
HC.BP$N = as.factor(rep(c(rep(1e+04, 100), rep(2e+04, 100), rep(3e+04, 100), rep(4e+04, 100), rep(5e+04, 100), rep(6e+04, 100), rep(7e+04, 100), rep(8e+04, 100), rep(9e+04, 100), rep(1e+05, 100)), 16))
file.csv = data.frame(read.csv("../Data/HC_series_fk0_16000.csv"))
HC.BP$H = file.csv[,1]
HC.BP$C = file.csv[,2]
HC.BP$Dist = HC.BP$C / HC.BP$H
HC.BP$D= as.factor(file.csv[,3])

Alternative 1 - Calculate the regression as a function of H, D and N.

lm.alternative.1 = lm(data = HC.BP, formula = Dist ~ H + D + N)
summary(lm.alternative.1)

Call:
lm(formula = Dist ~ H + D + N, data = HC.BP)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.0042151 -0.0000925  0.0000249  0.0000921  0.0049616 

Coefficients:
              Estimate Std. Error   t value Pr(>|t|)    
(Intercept)  1.012e+00  3.020e-04  3349.315   <2e-16 ***
H           -1.012e+00  3.050e-04 -3317.271   <2e-16 ***
D4          -4.924e-07  6.975e-06    -0.071   0.9437    
D5           3.852e-06  6.975e-06     0.552   0.5808    
D6           2.546e-06  6.975e-06     0.365   0.7152    
N20000      -1.337e-05  1.103e-05    -1.212   0.2255    
N30000      -4.429e-06  1.103e-05    -0.402   0.6880    
N40000      -1.593e-05  1.103e-05    -1.445   0.1485    
N50000      -9.918e-06  1.103e-05    -0.899   0.3685    
N60000      -8.748e-06  1.103e-05    -0.793   0.4277    
N70000      -1.319e-05  1.103e-05    -1.196   0.2319    
N80000      -8.464e-06  1.103e-05    -0.768   0.4428    
N90000      -1.947e-05  1.103e-05    -1.765   0.0776 .  
N1e+05      -1.662e-05  1.103e-05    -1.507   0.1318    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0003119 on 15986 degrees of freedom
Multiple R-squared:  0.9986,    Adjusted R-squared:  0.9985 
F-statistic: 8.47e+05 on 13 and 15986 DF,  p-value: < 2.2e-16
plot(lm.alternative.1, which = c(1:4), pch = 20)

hist(lm.alternative.1$residuals, breaks = 500)

Alternative 2 - Calculate the regression as a function of H, D and N.

lm.alternative.2 = lm(data = HC.BP, formula = Dist ~ H * D * N)
summary(lm.alternative.2)

Call:
lm(formula = Dist ~ H * D * N, data = HC.BP)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.0040930 -0.0000918  0.0000222  0.0000903  0.0047496 

Coefficients:
              Estimate Std. Error  t value Pr(>|t|)    
(Intercept)  1.014e+00  1.809e-03  560.846  < 2e-16 ***
H           -1.015e+00  1.828e-03 -555.149  < 2e-16 ***
D4           3.186e-03  2.476e-03    1.287 0.198104    
D5          -6.415e-03  2.495e-03   -2.571 0.010159 *  
D6           5.100e-05  2.456e-03    0.021 0.983430    
N20000      -5.305e-03  2.694e-03   -1.969 0.048940 *  
N30000      -7.028e-03  2.771e-03   -2.536 0.011213 *  
N40000      -3.821e-03  2.739e-03   -1.395 0.163111    
N50000      -4.302e-03  2.698e-03   -1.595 0.110824    
N60000      -2.878e-03  2.644e-03   -1.088 0.276416    
N70000      -9.891e-03  2.689e-03   -3.678 0.000236 ***
N80000       7.479e-03  2.601e-03    2.875 0.004041 ** 
N90000      -3.271e-03  2.639e-03   -1.240 0.215175    
N1e+05      -1.754e-03  2.639e-03   -0.665 0.506219    
H:D4        -3.194e-03  2.501e-03   -1.277 0.201556    
H:D5         6.492e-03  2.521e-03    2.575 0.010036 *  
H:D6        -5.071e-05  2.481e-03   -0.020 0.983693    
H:N20000     5.350e-03  2.721e-03    1.967 0.049240 *  
H:N30000     7.097e-03  2.798e-03    2.536 0.011212 *  
H:N40000     3.846e-03  2.768e-03    1.390 0.164621    
H:N50000     4.333e-03  2.725e-03    1.590 0.111879    
H:N60000     2.906e-03  2.671e-03    1.088 0.276649    
H:N70000     9.968e-03  2.716e-03    3.670 0.000243 ***
H:N80000    -7.544e-03  2.628e-03   -2.871 0.004098 ** 
H:N90000     3.301e-03  2.666e-03    1.238 0.215625    
H:N1e+05     1.785e-03  2.665e-03    0.670 0.503065    
D4:N20000   -3.414e-03  3.738e-03   -0.913 0.361041    
D5:N20000    6.434e-03  3.825e-03    1.682 0.092614 .  
D6:N20000    6.008e-03  3.601e-03    1.668 0.095254 .  
D4:N30000    7.094e-03  3.696e-03    1.919 0.054981 .  
D5:N30000    1.011e-02  3.853e-03    2.624 0.008709 ** 
D6:N30000   -8.832e-03  3.781e-03   -2.336 0.019530 *  
D4:N40000   -7.819e-04  3.716e-03   -0.210 0.833359    
D5:N40000    3.434e-03  3.650e-03    0.941 0.346837    
D6:N40000    2.026e-03  3.780e-03    0.536 0.591937    
D4:N50000    8.651e-04  3.646e-03    0.237 0.812442    
D5:N50000    5.312e-03  3.821e-03    1.390 0.164461    
D6:N50000    5.583e-03  3.703e-03    1.508 0.131602    
D4:N60000   -2.585e-03  3.777e-03   -0.684 0.493701    
D5:N60000    1.513e-02  3.757e-03    4.027 5.67e-05 ***
D6:N60000   -2.808e-03  3.647e-03   -0.770 0.441333    
D4:N70000    1.557e-03  3.632e-03    0.429 0.668156    
D5:N70000    1.891e-02  3.675e-03    5.145 2.70e-07 ***
D6:N70000    9.365e-03  3.489e-03    2.684 0.007281 ** 
D4:N80000   -9.144e-03  3.688e-03   -2.479 0.013172 *  
D5:N80000   -6.615e-03  3.684e-03   -1.796 0.072548 .  
D6:N80000   -1.670e-02  3.687e-03   -4.528 5.99e-06 ***
D4:N90000   -1.074e-02  3.736e-03   -2.874 0.004056 ** 
D5:N90000   -1.568e-03  3.862e-03   -0.406 0.684768    
D6:N90000    3.374e-03  3.676e-03    0.918 0.358653    
D4:N1e+05   -1.146e-02  3.698e-03   -3.098 0.001953 ** 
D5:N1e+05    4.590e-03  3.682e-03    1.247 0.212596    
D6:N1e+05    2.817e-03  3.696e-03    0.762 0.446022    
H:D4:N20000  3.410e-03  3.775e-03    0.904 0.366264    
H:D5:N20000 -6.499e-03  3.863e-03   -1.682 0.092494 .  
H:D6:N20000 -6.060e-03  3.638e-03   -1.666 0.095770 .  
H:D4:N30000 -7.171e-03  3.733e-03   -1.921 0.054768 .  
H:D5:N30000 -1.020e-02  3.891e-03   -2.622 0.008745 ** 
H:D6:N30000  8.896e-03  3.819e-03    2.330 0.019838 *  
H:D4:N40000  7.780e-04  3.754e-03    0.207 0.835833    
H:D5:N40000 -3.485e-03  3.688e-03   -0.945 0.344569    
H:D6:N40000 -2.026e-03  3.818e-03   -0.531 0.595735    
H:D4:N50000 -8.824e-04  3.683e-03   -0.240 0.810636    
H:D5:N50000 -5.364e-03  3.859e-03   -1.390 0.164560    
H:D6:N50000 -5.624e-03  3.740e-03   -1.504 0.132678    
H:D4:N60000  2.596e-03  3.814e-03    0.681 0.496081    
H:D5:N60000 -1.527e-02  3.795e-03   -4.024 5.74e-05 ***
H:D6:N60000  2.814e-03  3.684e-03    0.764 0.445033    
H:D4:N70000 -1.590e-03  3.669e-03   -0.433 0.664717    
H:D5:N70000 -1.910e-02  3.713e-03   -5.143 2.73e-07 ***
H:D6:N70000 -9.416e-03  3.524e-03   -2.672 0.007551 ** 
H:D4:N80000  9.205e-03  3.726e-03    2.470 0.013503 *  
H:D5:N80000  6.647e-03  3.721e-03    1.786 0.074050 .  
H:D6:N80000  1.685e-02  3.724e-03    4.525 6.09e-06 ***
H:D4:N90000  1.080e-02  3.773e-03    2.862 0.004211 ** 
H:D5:N90000  1.549e-03  3.900e-03    0.397 0.691179    
H:D6:N90000 -3.400e-03  3.713e-03   -0.916 0.359750    
H:D4:N1e+05  1.150e-02  3.735e-03    3.078 0.002088 ** 
H:D5:N1e+05 -4.657e-03  3.719e-03   -1.252 0.210474    
H:D6:N1e+05 -2.868e-03  3.733e-03   -0.768 0.442401    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0003099 on 15920 degrees of freedom
Multiple R-squared:  0.9986,    Adjusted R-squared:  0.9986 
F-statistic: 1.412e+05 on 79 and 15920 DF,  p-value: < 2.2e-16
plot(lm.alternative.2, which = c(1:4), pch = 20)

hist(lm.alternative.2$residuals, breaks = 500)

Alternative 2 - Calculate the regression based on H

Hypothesis 0: D and N matter

dimension = c(3,4,5,6)
N = c(1e+04, 2e+04, 3e+04, 4e+04, 5e+04, 6e+04, 7e+04, 8e+04, 9e+04, 1e+05)
HC.BP.regression.data = data.frame("H" = numeric(400), 
                                   "Dist" = numeric(400),
                                   stringsAsFactors=FALSE)
lm.hypothesis.0 = array(list(), 40)
b = cc = 0
for(i in 1:length(dimension)){
  for(j in 1:length(N)){
    cc = cc + 1
    a = c((((j - 1) * 100) + 1):(j * 100))
    elements = c(a + b, a + b + 1000, a + b + 2000, a + b + 3000)
    HC.BP.regression.data$H = HC.BP$H[elements]
    HC.BP.regression.data$Dist = HC.BP$Dist[elements]
    lm.hypothesis.0[[cc]] = lm(data = HC.BP.regression.data, formula = Dist ~ H)
  }
  b = b + 4000
}
plot(lm.hypothesis.0[[1]], which = c(1:4), pch = 20)

hist(lm.hypothesis.0[[1]]$residuals, breaks = 200, main = "Histogram of Residuals")

a = c(1:100)
elements = c(a, a + 1000, a + 2000, a + 3000)
HC = data.frame("H" = HC.BP$H[elements], "Dist" = HC.BP$Dist[elements], stringsAsFactors=FALSE)
ggplot(HC, aes(x = H, y = Dist)) +
  geom_point() +
  scale_fill_grey() +
  geom_line(aes(y = predict(lm.hypothesis.0[[1]], HC))) 

LS0tCnRpdGxlOiAiUmVwb3J0IDYgLSBUaGUgc3R1ZHkgb2YgRCB4IEgiCmF1dGhvcjogIkVkdWFyZGEgQ2hhZ2FzIgpkYXRlOiAiTWF5IDI2LCAyMDIwIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAogIHBkZl9kb2N1bWVudDogZGVmYXVsdAotLS0KCmBgYHtyfQpyZXF1aXJlKGxtdGVzdCkKcmVxdWlyZShnZ3Bsb3QyKQpyZXF1aXJlKGdnRXh0cmEpCnJlcXVpcmUoZ2d0aGVtZXMpCnJlcXVpcmUoZ2dwdWJyKQpyZXF1aXJlKGhyYnJ0aGVtZXMpCnJlcXVpcmUoZXh0cmFmb250KQpyZXF1aXJlKGdyaWRFeHRyYSkKcmVxdWlyZShnZ3JlcGVsKQpyZXF1aXJlKGxhd3N0YXQpCnJlcXVpcmUobm9ydGVzdCkKcmVxdWlyZShOb3JtYWxMYXBsYWNlKQoKdGhlbWVfc2V0KHRoZW1lX2lwc3VtKGJhc2VfZmFtaWx5ID0gIlRpbWVzIE5ldyBSb21hbiIsIAogICAgICAgICAgICAgIGJhc2Vfc2l6ZSA9IDEwLCBheGlzX3RpdGxlX3NpemUgPSAxMCkpCmBgYAoKCiMjVGhlIHN0dWR5IG9mIHRoZSBpbmZsdWVuY2Ugb2YgcGFyYW1ldGVycwoKVG8gdGhpcyBhbmFseXNpcyB3ZSB1c2VkIHRoZSBkYXRhIHNldCB3aXRoIDE2LjAwMCBzYW1wbGVzLCBhcHBseWluZyBhbGwgcG9zc2libGUgY29uZmlndXJhdGlvbnMgd2l0aCAkRCBcaW4gXHszLCA0LCA1LCA2XH0kIGFuZCAkXHRhdSBcaW4gXHsxLCAyLCAzLCA0XH0kIHRvIHRoZSBkZXNjcmlwdG9ycy4gCgpgYGB7cn0KSEMuQlAgPSBkYXRhLmZyYW1lKCJIIiA9IG51bWVyaWMoMTYwMDApLCAKICAgICAgICAgICAgICAgICAgICJDIiA9IG51bWVyaWMoMTYwMDApLAogICAgICAgICAgICAgICAgICAgIkRpc3QiID0gbnVtZXJpYygxNjAwMCksCiAgICAgICAgICAgICAgICAgICAiRCIgPSBudW1lcmljKDE2MDAwKSwKICAgICAgICAgICAgICAgICAgICJOIiA9IG51bWVyaWMoMTYwMDApLCAKICAgICAgICAgICAgICAgICAgIHN0cmluZ3NBc0ZhY3RvcnM9RkFMU0UpCgpIQy5CUCROID0gYXMuZmFjdG9yKHJlcChjKHJlcCgxZSswNCwgMTAwKSwgcmVwKDJlKzA0LCAxMDApLCByZXAoM2UrMDQsIDEwMCksIHJlcCg0ZSswNCwgMTAwKSwgcmVwKDVlKzA0LCAxMDApLCByZXAoNmUrMDQsIDEwMCksIHJlcCg3ZSswNCwgMTAwKSwgcmVwKDhlKzA0LCAxMDApLCByZXAoOWUrMDQsIDEwMCksIHJlcCgxZSswNSwgMTAwKSksIDE2KSkKCmZpbGUuY3N2ID0gZGF0YS5mcmFtZShyZWFkLmNzdigiLi4vRGF0YS9IQ19zZXJpZXNfZmswXzE2MDAwLmNzdiIpKQoKSEMuQlAkSCA9IGZpbGUuY3N2WywxXQpIQy5CUCRDID0gZmlsZS5jc3ZbLDJdCkhDLkJQJERpc3QgPSBIQy5CUCRDIC8gSEMuQlAkSApIQy5CUCREPSBhcy5mYWN0b3IoZmlsZS5jc3ZbLDNdKQpgYGAKCiMjQWx0ZXJuYXRpdmUgMSAtIENhbGN1bGF0ZSB0aGUgcmVncmVzc2lvbiBhcyBhIGZ1bmN0aW9uIG9mIEgsIEQgYW5kIE4uCgpgYGB7cn0KbG0uYWx0ZXJuYXRpdmUuMSA9IGxtKGRhdGEgPSBIQy5CUCwgZm9ybXVsYSA9IERpc3QgfiBIICsgRCArIE4pCnN1bW1hcnkobG0uYWx0ZXJuYXRpdmUuMSkKYGBgCgpgYGB7cn0KcGxvdChsbS5hbHRlcm5hdGl2ZS4xLCB3aGljaCA9IGMoMTo0KSwgcGNoID0gMjApCmhpc3QobG0uYWx0ZXJuYXRpdmUuMSRyZXNpZHVhbHMsIGJyZWFrcyA9IDUwMCkKYGBgCgojI0FsdGVybmF0aXZlIDIgLSBDYWxjdWxhdGUgdGhlIHJlZ3Jlc3Npb24gYXMgYSBmdW5jdGlvbiBvZiBILCBEIGFuZCBOLgoKYGBge3J9CmxtLmFsdGVybmF0aXZlLjIgPSBsbShkYXRhID0gSEMuQlAsIGZvcm11bGEgPSBEaXN0IH4gSCAqIEQgKiBOKQpzdW1tYXJ5KGxtLmFsdGVybmF0aXZlLjIpCmBgYAoKYGBge3J9CnBsb3QobG0uYWx0ZXJuYXRpdmUuMiwgd2hpY2ggPSBjKDE6NCksIHBjaCA9IDIwKQpoaXN0KGxtLmFsdGVybmF0aXZlLjIkcmVzaWR1YWxzLCBicmVha3MgPSA1MDApCmBgYAoKIyNBbHRlcm5hdGl2ZSAyIC0gQ2FsY3VsYXRlIHRoZSByZWdyZXNzaW9uIGJhc2VkIG9uIEggCgojIyNIeXBvdGhlc2lzIDA6IEQgYW5kIE4gbWF0dGVyCgpgYGB7cn0KZGltZW5zaW9uID0gYygzLDQsNSw2KQpOID0gYygxZSswNCwgMmUrMDQsIDNlKzA0LCA0ZSswNCwgNWUrMDQsIDZlKzA0LCA3ZSswNCwgOGUrMDQsIDllKzA0LCAxZSswNSkKSEMuQlAucmVncmVzc2lvbi5kYXRhID0gZGF0YS5mcmFtZSgiSCIgPSBudW1lcmljKDQwMCksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJEaXN0IiA9IG51bWVyaWMoNDAwKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFKQpsbS5oeXBvdGhlc2lzLjAgPSBhcnJheShsaXN0KCksIDQwKQoKYiA9IGNjID0gMApmb3IoaSBpbiAxOmxlbmd0aChkaW1lbnNpb24pKXsKICBmb3IoaiBpbiAxOmxlbmd0aChOKSl7CiAgICBjYyA9IGNjICsgMQogICAgYSA9IGMoKCgoaiAtIDEpICogMTAwKSArIDEpOihqICogMTAwKSkKICAgIGVsZW1lbnRzID0gYyhhICsgYiwgYSArIGIgKyAxMDAwLCBhICsgYiArIDIwMDAsIGEgKyBiICsgMzAwMCkKICAgIEhDLkJQLnJlZ3Jlc3Npb24uZGF0YSRIID0gSEMuQlAkSFtlbGVtZW50c10KICAgIEhDLkJQLnJlZ3Jlc3Npb24uZGF0YSREaXN0ID0gSEMuQlAkRGlzdFtlbGVtZW50c10KICAgIGxtLmh5cG90aGVzaXMuMFtbY2NdXSA9IGxtKGRhdGEgPSBIQy5CUC5yZWdyZXNzaW9uLmRhdGEsIGZvcm11bGEgPSBEaXN0IH4gSCkKICB9CiAgYiA9IGIgKyA0MDAwCn0KcGxvdChsbS5oeXBvdGhlc2lzLjBbWzFdXSwgd2hpY2ggPSBjKDE6NCksIHBjaCA9IDIwKQpoaXN0KGxtLmh5cG90aGVzaXMuMFtbMV1dJHJlc2lkdWFscywgYnJlYWtzID0gMjAwLCBtYWluID0gIkhpc3RvZ3JhbSBvZiBSZXNpZHVhbHMiKQpgYGAKCmBgYHtyfQphID0gYygxOjEwMCkKZWxlbWVudHMgPSBjKGEsIGEgKyAxMDAwLCBhICsgMjAwMCwgYSArIDMwMDApCkhDID0gZGF0YS5mcmFtZSgiSCIgPSBIQy5CUCRIW2VsZW1lbnRzXSwgIkRpc3QiID0gSEMuQlAkRGlzdFtlbGVtZW50c10sIHN0cmluZ3NBc0ZhY3RvcnM9RkFMU0UpCgpnZ3Bsb3QoSEMsIGFlcyh4ID0gSCwgeSA9IERpc3QpKSArCiAgZ2VvbV9wb2ludCgpICsKICBzY2FsZV9maWxsX2dyZXkoKSArCiAgZ2VvbV9saW5lKGFlcyh5ID0gcHJlZGljdChsbS5oeXBvdGhlc2lzLjBbWzFdXSwgSEMpKSkgCmBgYAoK