if(!require(geoR)) install.packages("geoR")
if(!require(stats)) install.packages("stats")
if(!require(ggplot2)) install.packages("ggplot2")
if(!require(nortest)) install.packages("nortest")
if(!require(graphics)) install.packages("graphics")
if(!require(EnvStats)) install.packages("EnvStats")
if(!require(rcompanion)) install.packages("rcompanion")
Para realizar este estudo, utilizamos um dataset com 16.000 valores de entropia de permutação de Shannon e complexidade estatÃstica referentes a ruÃdos brancos gerados artificialmente com amostras de tamanhos \(N \in \{10.000, 20.000, 30.000, 40.000, 50.000, 60.000, 70.000, 80.000, 90.000, 100.000\}\), aplicando todas as possÃveis configurações de \(D \in \{3, 4, 5, 6\}\) e \(\tau \in \{1, 2, 3, 4\}\) aos descritores.
A disposição dos dados no plano \(HC\) pode ser observada abaixo:
HC.BP = data.frame("H" = numeric(16000),
"C" = numeric(16000),
"Dist" = numeric(16000),
"D" = numeric(16000),
"N" = numeric(16000),
stringsAsFactors=FALSE)
HC.BP$N = as.factor(rep(c(rep(1e+04, 100), rep(2e+04, 100), rep(3e+04, 100), rep(4e+04, 100), rep(5e+04, 100), rep(6e+04, 100), rep(7e+04, 100), rep(8e+04, 100), rep(9e+04, 100), rep(1e+05, 100)), 16))
file.csv = data.frame(read.csv("../Data/HC_series_fk0_16000.csv"))
HC.BP$H = file.csv[,1]
HC.BP$C = file.csv[,2]
HC.BP$Dist = HC.BP$C / HC.BP$H
HC.BP$D= as.factor(file.csv[,3])
Transformação de Box-Cox
dimension = c(3,4,5,6)
N = c(1e+04, 2e+04, 3e+04, 4e+04, 5e+04, 6e+04, 7e+04, 8e+04, 9e+04, 1e+05)
HC.test.3 = data.frame("H" = numeric(400),
"Dist" = numeric(400),
stringsAsFactors=FALSE)
lm.3.box.cox = array(list(), 40)
b = cc = 0
for(i in 1:length(dimension)){
for(j in 1:length(N)){
cc = cc + 1
a = c((((j - 1) * 100) + 1):(j * 100))
elements = c(a + b, a + b + 1000, a + b + 2000, a + b + 3000)
HC.test.3$H = HC.BP$H[elements]
HC.test.3$Dist = HC.BP$Dist[elements]
parameters = boxcoxfit(HC.test.3$Dist, lambda2 = F)
lambda = parameters$lambda[1]
if(lambda==0){T.norm = log(HC.test.3$Dist)}
if(lambda!=0){T.norm = ((HC.test.3$Dist)^lambda - 1)/lambda}
HC.test.3$Dist = T.norm
lm.3.box.cox[[cc]] = lm(data = HC.test.3, formula = Dist ~ H)
}
b = b + 4000
}
HC.test.3$H = HC.BP$H[1:100]
HC.test.3$Dist = HC.BP$Dist[1:100]
parameters = boxcoxfit(HC.test.3$Dist, lambda2 = F)
lambda = parameters$lambda[1]
if(lambda==0){T.norm = log(HC.test.3$Dist)}
if(lambda!=0){T.norm = ((HC.test.3$Dist)^lambda - 1)/lambda}
HC.test.3$Dist = T.norm
plot(x = HC.test.3$H, y = HC.test.3$Dist)

ggplot(HC.test.3, aes(x = H, y = Dist)) +
geom_point() +
scale_fill_grey() +
geom_line(aes(y = predict(lm.3.box.cox[[1]], HC.test.3, type = 'response')))

Analisando o plot do modelo:
plot(lm.3.box.cox[[1]])




hist(lm.3.box.cox[[1]]$residuals, breaks = 200)

Examinando o \(QQ-plot\) dos resÃduos:
qqPlot(rstandard(lm.3.box.cox[[1]]))

LS0tCnRpdGxlOiAiUmVwb3J0IDcgLSBCb3gtQ294IFRyYW5zZm9ybWF0aW9uIChEeEgpIgphdXRob3I6ICJFZHVhcmRhIENoYWdhcyIKZGF0ZTogIk1heSAyNiwgMjAyMCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKLS0tCgpgYGB7cn0KaWYoIXJlcXVpcmUoZ2VvUikpIGluc3RhbGwucGFja2FnZXMoImdlb1IiKQppZighcmVxdWlyZShzdGF0cykpIGluc3RhbGwucGFja2FnZXMoInN0YXRzIikKaWYoIXJlcXVpcmUoZ2dwbG90MikpIGluc3RhbGwucGFja2FnZXMoImdncGxvdDIiKQppZighcmVxdWlyZShub3J0ZXN0KSkgaW5zdGFsbC5wYWNrYWdlcygibm9ydGVzdCIpCmlmKCFyZXF1aXJlKGdyYXBoaWNzKSkgaW5zdGFsbC5wYWNrYWdlcygiZ3JhcGhpY3MiKQppZighcmVxdWlyZShFbnZTdGF0cykpIGluc3RhbGwucGFja2FnZXMoIkVudlN0YXRzIikKaWYoIXJlcXVpcmUocmNvbXBhbmlvbikpIGluc3RhbGwucGFja2FnZXMoInJjb21wYW5pb24iKQpgYGAKClBhcmEgcmVhbGl6YXIgZXN0ZSBlc3R1ZG8sIHV0aWxpemFtb3MgdW0gZGF0YXNldCBjb20gMTYuMDAwIHZhbG9yZXMgZGUgZW50cm9waWEgZGUgcGVybXV0YcOnw6NvIGRlIFNoYW5ub24gZSBjb21wbGV4aWRhZGUgZXN0YXTDrXN0aWNhIHJlZmVyZW50ZXMgYSBydcOtZG9zIGJyYW5jb3MgZ2VyYWRvcyBhcnRpZmljaWFsbWVudGUgY29tIGFtb3N0cmFzIGRlIHRhbWFuaG9zICROIFxpbiBcezEwLjAwMCwgMjAuMDAwLCAzMC4wMDAsIDQwLjAwMCwgNTAuMDAwLCA2MC4wMDAsIDcwLjAwMCwgODAuMDAwLCA5MC4wMDAsIDEwMC4wMDBcfSQsIGFwbGljYW5kbyB0b2RhcyBhcyBwb3Nzw612ZWlzIGNvbmZpZ3VyYcOnw7VlcyBkZSAkRCBcaW4gXHszLCA0LCA1LCA2XH0kIGUgICRcdGF1IFxpbiBcezEsIDIsIDMsIDRcfSQgYW9zIGRlc2NyaXRvcmVzLgoKQSBkaXNwb3Npw6fDo28gZG9zIGRhZG9zIG5vIHBsYW5vICRIQyQgcG9kZSBzZXIgb2JzZXJ2YWRhIGFiYWl4bzogCgpgYGB7cn0KSEMuQlAgPSBkYXRhLmZyYW1lKCJIIiA9IG51bWVyaWMoMTYwMDApLCAKICAgICAgICAgICAgICAgICAgICJDIiA9IG51bWVyaWMoMTYwMDApLAogICAgICAgICAgICAgICAgICAgIkRpc3QiID0gbnVtZXJpYygxNjAwMCksCiAgICAgICAgICAgICAgICAgICAiRCIgPSBudW1lcmljKDE2MDAwKSwKICAgICAgICAgICAgICAgICAgICJOIiA9IG51bWVyaWMoMTYwMDApLCAKICAgICAgICAgICAgICAgICAgIHN0cmluZ3NBc0ZhY3RvcnM9RkFMU0UpCgpIQy5CUCROID0gYXMuZmFjdG9yKHJlcChjKHJlcCgxZSswNCwgMTAwKSwgcmVwKDJlKzA0LCAxMDApLCByZXAoM2UrMDQsIDEwMCksIHJlcCg0ZSswNCwgMTAwKSwgcmVwKDVlKzA0LCAxMDApLCByZXAoNmUrMDQsIDEwMCksIHJlcCg3ZSswNCwgMTAwKSwgcmVwKDhlKzA0LCAxMDApLCByZXAoOWUrMDQsIDEwMCksIHJlcCgxZSswNSwgMTAwKSksIDE2KSkKCmZpbGUuY3N2ID0gZGF0YS5mcmFtZShyZWFkLmNzdigiLi4vRGF0YS9IQ19zZXJpZXNfZmswXzE2MDAwLmNzdiIpKQoKSEMuQlAkSCA9IGZpbGUuY3N2WywxXQpIQy5CUCRDID0gZmlsZS5jc3ZbLDJdCkhDLkJQJERpc3QgPSBIQy5CUCRDIC8gSEMuQlAkSApIQy5CUCREPSBhcy5mYWN0b3IoZmlsZS5jc3ZbLDNdKQpgYGAKCiMjVHJhbnNmb3JtYcOnw6NvIGRlIEJveC1Db3gKCmBgYHtyfQpkaW1lbnNpb24gPSBjKDMsNCw1LDYpCk4gPSBjKDFlKzA0LCAyZSswNCwgM2UrMDQsIDRlKzA0LCA1ZSswNCwgNmUrMDQsIDdlKzA0LCA4ZSswNCwgOWUrMDQsIDFlKzA1KQpIQy50ZXN0LjMgPSBkYXRhLmZyYW1lKCJIIiA9IG51bWVyaWMoNDAwKSwgCiAgICAgICAgICAgICAgICAgICAgICAgIkRpc3QiID0gbnVtZXJpYyg0MDApLAogICAgICAgICAgICAgICAgICAgICAgIHN0cmluZ3NBc0ZhY3RvcnM9RkFMU0UpCgpsbS4zLmJveC5jb3ggPSBhcnJheShsaXN0KCksIDQwKQoKYiA9IGNjID0gMApmb3IoaSBpbiAxOmxlbmd0aChkaW1lbnNpb24pKXsKICBmb3IoaiBpbiAxOmxlbmd0aChOKSl7CiAgICBjYyA9IGNjICsgMQogICAgYSA9IGMoKCgoaiAtIDEpICogMTAwKSArIDEpOihqICogMTAwKSkKICAgIGVsZW1lbnRzID0gYyhhICsgYiwgYSArIGIgKyAxMDAwLCBhICsgYiArIDIwMDAsIGEgKyBiICsgMzAwMCkKICAgIAogICAgSEMudGVzdC4zJEggPSBIQy5CUCRIW2VsZW1lbnRzXQogICAgSEMudGVzdC4zJERpc3QgPSBIQy5CUCREaXN0W2VsZW1lbnRzXQogICAgCiAgICBwYXJhbWV0ZXJzID0gYm94Y294Zml0KEhDLnRlc3QuMyREaXN0LCBsYW1iZGEyID0gRikKICAgIGxhbWJkYSA9IHBhcmFtZXRlcnMkbGFtYmRhWzFdCiAgICBpZihsYW1iZGE9PTApe1Qubm9ybSA9IGxvZyhIQy50ZXN0LjMkRGlzdCl9CiAgICBpZihsYW1iZGEhPTApe1Qubm9ybSA9ICgoSEMudGVzdC4zJERpc3QpXmxhbWJkYSAtIDEpL2xhbWJkYX0KICAgIEhDLnRlc3QuMyREaXN0ID0gVC5ub3JtCiAgICAKICAgIGxtLjMuYm94LmNveFtbY2NdXSA9IGxtKGRhdGEgPSBIQy50ZXN0LjMsIGZvcm11bGEgPSBEaXN0IH4gSCkKICB9CiAgYiA9IGIgKyA0MDAwCn0KYGBgCgpgYGB7cn0KSEMudGVzdC4zJEggPSBIQy5CUCRIWzE6MTAwXQpIQy50ZXN0LjMkRGlzdCA9IEhDLkJQJERpc3RbMToxMDBdCgpwYXJhbWV0ZXJzID0gYm94Y294Zml0KEhDLnRlc3QuMyREaXN0LCBsYW1iZGEyID0gRikKbGFtYmRhID0gcGFyYW1ldGVycyRsYW1iZGFbMV0KaWYobGFtYmRhPT0wKXtULm5vcm0gPSBsb2coSEMudGVzdC4zJERpc3QpfQppZihsYW1iZGEhPTApe1Qubm9ybSA9ICgoSEMudGVzdC4zJERpc3QpXmxhbWJkYSAtIDEpL2xhbWJkYX0KSEMudGVzdC4zJERpc3QgPSBULm5vcm0KCnBsb3QoeCA9IEhDLnRlc3QuMyRILCB5ID0gSEMudGVzdC4zJERpc3QpCmBgYAoKYGBge3J9CmdncGxvdChIQy50ZXN0LjMsIGFlcyh4ID0gSCwgeSA9IERpc3QpKSArCiAgZ2VvbV9wb2ludCgpICsKICBzY2FsZV9maWxsX2dyZXkoKSArCiAgZ2VvbV9saW5lKGFlcyh5ID0gcHJlZGljdChsbS4zLmJveC5jb3hbWzFdXSwgSEMudGVzdC4zLCB0eXBlID0gJ3Jlc3BvbnNlJykpKSAKYGBgCgpBbmFsaXNhbmRvIG8gcGxvdCBkbyBtb2RlbG86CgpgYGB7cn0KcGxvdChsbS4zLmJveC5jb3hbWzFdXSkKYGBgCgpgYGB7cn0KaGlzdChsbS4zLmJveC5jb3hbWzFdXSRyZXNpZHVhbHMsIGJyZWFrcyA9IDIwMCkKYGBgCgpFeGFtaW5hbmRvIG8gJFFRLXBsb3QkIGRvcyByZXPDrWR1b3M6CgpgYGB7cn0KcXFQbG90KHJzdGFuZGFyZChsbS4zLmJveC5jb3hbWzFdXSkpIApgYGAKCgo=