require(lmtest)
require(ggplot2)
require(ggExtra)
require(ggthemes)
require(ggpubr)
require(hrbrthemes)
require(extrafont)
require(gridExtra)
require(ggrepel)
require(lawstat)
require(nortest)
require(NormalLaplace)
theme_set(theme_ipsum(base_family = "Times New Roman",
base_size = 10, axis_title_size = 10))
To this analysis we used the data set with 16.000 samples, applying all possible configurations with \(D \in \{3, 4, 5, 6\}\) and \(\tau \in \{1, 2, 3, 4\}\) to the descriptors.
HC.BP = data.frame("H" = numeric(16000),
"C" = numeric(16000),
"Dist" = numeric(16000),
"D" = numeric(16000),
"t" = numeric(16000),
"N" = numeric(16000),
stringsAsFactors=FALSE)
HC.BP$N = as.factor(rep(c(rep(1e+04, 100), rep(2e+04, 100), rep(3e+04, 100), rep(4e+04, 100), rep(5e+04, 100), rep(6e+04, 100), rep(7e+04, 100), rep(8e+04, 100), rep(9e+04, 100), rep(1e+05, 100)), 16))
file.csv = data.frame(read.csv("../Data/HC_series_fk0_16000.csv"))
HC.BP$H = file.csv[,1]
HC.BP$C = file.csv[,2]
HC.BP$Dist = HC.BP$C / HC.BP$H
HC.BP$D= as.factor(file.csv[,3])
lm.alternative.1 = lm(data = HC.BP, formula = Dist ~ H + D + N)
summary(lm.alternative.1)
Call:
lm(formula = Dist ~ H + D + N, data = HC.BP)
Residuals:
Min 1Q Median 3Q Max
-0.0042151 -0.0000925 0.0000249 0.0000921 0.0049616
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.012e+00 3.020e-04 3349.315 <2e-16 ***
H -1.012e+00 3.050e-04 -3317.271 <2e-16 ***
D4 -4.924e-07 6.975e-06 -0.071 0.9437
D5 3.852e-06 6.975e-06 0.552 0.5808
D6 2.546e-06 6.975e-06 0.365 0.7152
N20000 -1.337e-05 1.103e-05 -1.212 0.2255
N30000 -4.429e-06 1.103e-05 -0.402 0.6880
N40000 -1.593e-05 1.103e-05 -1.445 0.1485
N50000 -9.918e-06 1.103e-05 -0.899 0.3685
N60000 -8.748e-06 1.103e-05 -0.793 0.4277
N70000 -1.319e-05 1.103e-05 -1.196 0.2319
N80000 -8.464e-06 1.103e-05 -0.768 0.4428
N90000 -1.947e-05 1.103e-05 -1.765 0.0776 .
N1e+05 -1.662e-05 1.103e-05 -1.507 0.1318
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0003119 on 15986 degrees of freedom
Multiple R-squared: 0.9986, Adjusted R-squared: 0.9985
F-statistic: 8.47e+05 on 13 and 15986 DF, p-value: < 2.2e-16
plot(lm.alternative.1, which = c(1:4), pch = 20)
hist(lm.alternative.1$residuals, breaks = 500)
lm.alternative.2 = lm(data = HC.BP, formula = Dist ~ H * D * N)
summary(lm.alternative.2)
Call:
lm(formula = Dist ~ H * D * N, data = HC.BP)
Residuals:
Min 1Q Median 3Q Max
-0.0040930 -0.0000918 0.0000222 0.0000903 0.0047496
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.014e+00 1.809e-03 560.846 < 2e-16 ***
H -1.015e+00 1.828e-03 -555.149 < 2e-16 ***
D4 3.186e-03 2.476e-03 1.287 0.198104
D5 -6.415e-03 2.495e-03 -2.571 0.010159 *
D6 5.100e-05 2.456e-03 0.021 0.983430
N20000 -5.305e-03 2.694e-03 -1.969 0.048940 *
N30000 -7.028e-03 2.771e-03 -2.536 0.011213 *
N40000 -3.821e-03 2.739e-03 -1.395 0.163111
N50000 -4.302e-03 2.698e-03 -1.595 0.110824
N60000 -2.878e-03 2.644e-03 -1.088 0.276416
N70000 -9.891e-03 2.689e-03 -3.678 0.000236 ***
N80000 7.479e-03 2.601e-03 2.875 0.004041 **
N90000 -3.271e-03 2.639e-03 -1.240 0.215175
N1e+05 -1.754e-03 2.639e-03 -0.665 0.506219
H:D4 -3.194e-03 2.501e-03 -1.277 0.201556
H:D5 6.492e-03 2.521e-03 2.575 0.010036 *
H:D6 -5.071e-05 2.481e-03 -0.020 0.983693
H:N20000 5.350e-03 2.721e-03 1.967 0.049240 *
H:N30000 7.097e-03 2.798e-03 2.536 0.011212 *
H:N40000 3.846e-03 2.768e-03 1.390 0.164621
H:N50000 4.333e-03 2.725e-03 1.590 0.111879
H:N60000 2.906e-03 2.671e-03 1.088 0.276649
H:N70000 9.968e-03 2.716e-03 3.670 0.000243 ***
H:N80000 -7.544e-03 2.628e-03 -2.871 0.004098 **
H:N90000 3.301e-03 2.666e-03 1.238 0.215625
H:N1e+05 1.785e-03 2.665e-03 0.670 0.503065
D4:N20000 -3.414e-03 3.738e-03 -0.913 0.361041
D5:N20000 6.434e-03 3.825e-03 1.682 0.092614 .
D6:N20000 6.008e-03 3.601e-03 1.668 0.095254 .
D4:N30000 7.094e-03 3.696e-03 1.919 0.054981 .
D5:N30000 1.011e-02 3.853e-03 2.624 0.008709 **
D6:N30000 -8.832e-03 3.781e-03 -2.336 0.019530 *
D4:N40000 -7.819e-04 3.716e-03 -0.210 0.833359
D5:N40000 3.434e-03 3.650e-03 0.941 0.346837
D6:N40000 2.026e-03 3.780e-03 0.536 0.591937
D4:N50000 8.651e-04 3.646e-03 0.237 0.812442
D5:N50000 5.312e-03 3.821e-03 1.390 0.164461
D6:N50000 5.583e-03 3.703e-03 1.508 0.131602
D4:N60000 -2.585e-03 3.777e-03 -0.684 0.493701
D5:N60000 1.513e-02 3.757e-03 4.027 5.67e-05 ***
D6:N60000 -2.808e-03 3.647e-03 -0.770 0.441333
D4:N70000 1.557e-03 3.632e-03 0.429 0.668156
D5:N70000 1.891e-02 3.675e-03 5.145 2.70e-07 ***
D6:N70000 9.365e-03 3.489e-03 2.684 0.007281 **
D4:N80000 -9.144e-03 3.688e-03 -2.479 0.013172 *
D5:N80000 -6.615e-03 3.684e-03 -1.796 0.072548 .
D6:N80000 -1.670e-02 3.687e-03 -4.528 5.99e-06 ***
D4:N90000 -1.074e-02 3.736e-03 -2.874 0.004056 **
D5:N90000 -1.568e-03 3.862e-03 -0.406 0.684768
D6:N90000 3.374e-03 3.676e-03 0.918 0.358653
D4:N1e+05 -1.146e-02 3.698e-03 -3.098 0.001953 **
D5:N1e+05 4.590e-03 3.682e-03 1.247 0.212596
D6:N1e+05 2.817e-03 3.696e-03 0.762 0.446022
H:D4:N20000 3.410e-03 3.775e-03 0.904 0.366264
H:D5:N20000 -6.499e-03 3.863e-03 -1.682 0.092494 .
H:D6:N20000 -6.060e-03 3.638e-03 -1.666 0.095770 .
H:D4:N30000 -7.171e-03 3.733e-03 -1.921 0.054768 .
H:D5:N30000 -1.020e-02 3.891e-03 -2.622 0.008745 **
H:D6:N30000 8.896e-03 3.819e-03 2.330 0.019838 *
H:D4:N40000 7.780e-04 3.754e-03 0.207 0.835833
H:D5:N40000 -3.485e-03 3.688e-03 -0.945 0.344569
H:D6:N40000 -2.026e-03 3.818e-03 -0.531 0.595735
H:D4:N50000 -8.824e-04 3.683e-03 -0.240 0.810636
H:D5:N50000 -5.364e-03 3.859e-03 -1.390 0.164560
H:D6:N50000 -5.624e-03 3.740e-03 -1.504 0.132678
H:D4:N60000 2.596e-03 3.814e-03 0.681 0.496081
H:D5:N60000 -1.527e-02 3.795e-03 -4.024 5.74e-05 ***
H:D6:N60000 2.814e-03 3.684e-03 0.764 0.445033
H:D4:N70000 -1.590e-03 3.669e-03 -0.433 0.664717
H:D5:N70000 -1.910e-02 3.713e-03 -5.143 2.73e-07 ***
H:D6:N70000 -9.416e-03 3.524e-03 -2.672 0.007551 **
H:D4:N80000 9.205e-03 3.726e-03 2.470 0.013503 *
H:D5:N80000 6.647e-03 3.721e-03 1.786 0.074050 .
H:D6:N80000 1.685e-02 3.724e-03 4.525 6.09e-06 ***
H:D4:N90000 1.080e-02 3.773e-03 2.862 0.004211 **
H:D5:N90000 1.549e-03 3.900e-03 0.397 0.691179
H:D6:N90000 -3.400e-03 3.713e-03 -0.916 0.359750
H:D4:N1e+05 1.150e-02 3.735e-03 3.078 0.002088 **
H:D5:N1e+05 -4.657e-03 3.719e-03 -1.252 0.210474
H:D6:N1e+05 -2.868e-03 3.733e-03 -0.768 0.442401
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0003099 on 15920 degrees of freedom
Multiple R-squared: 0.9986, Adjusted R-squared: 0.9986
F-statistic: 1.412e+05 on 79 and 15920 DF, p-value: < 2.2e-16
plot(lm.alternative.2, which = c(1:4), pch = 20)
hist(lm.alternative.2$residuals, breaks = 500)
dimension = c(3,4,5,6)
N = c(1e+04, 2e+04, 3e+04, 4e+04, 5e+04, 6e+04, 7e+04, 8e+04, 9e+04, 1e+05)
HC.BP.regression.data = data.frame("H" = numeric(400),
"Dist" = numeric(400),
stringsAsFactors=FALSE)
lm.hypothesis.0 = array(list(), 40)
b = cc = 0
for(i in 1:length(dimension)){
for(j in 1:length(N)){
cc = cc + 1
a = c((((j - 1) * 100) + 1):(j * 100))
elements = c(a + b, a + b + 1000, a + b + 2000, a + b + 3000)
HC.BP.regression.data$H = HC.BP$H[elements]
HC.BP.regression.data$Dist = HC.BP$Dist[elements]
lm.hypothesis.0[[cc]] = lm(data = HC.BP.regression.data, formula = Dist ~ H)
}
b = b + 4000
}
plot(lm.hypothesis.0[[1]], which = c(1:4), pch = 20)
hist(lm.hypothesis.0[[1]]$residuals, breaks = 200, main = "Histogram of Residuals")
a = c(1:100)
elements = c(a, a + 1000, a + 2000, a + 3000)
HC = data.frame("H" = HC.BP$H[elements], "Dist" = HC.BP$Dist[elements], stringsAsFactors=FALSE)
ggplot(HC, aes(x = H, y = Dist)) +
geom_point() +
scale_fill_grey() +
geom_line(aes(y = predict(lm.hypothesis.0[[1]], HC)))