Trabajo con datos de instancia 100

Exploración inicial de distribución

#Importing inst100

inst100 <- read_csv("D:/Dropbox/MsC UABC/2o Semestre/Clases/Estadistica/Danisa/2. Tidy dataset/instancia 100.csv")

#Eliminando las columnas sin resultados
inst100 <- inst100[,-3]
inst100 <- inst100[,-4]

colnames(inst100) <- c("C1", "C2", "R2", "RC2")


histogram(~ C1 + C2 + R2 + RC2, data = inst100,
   main="Histogramas de Resultados en Instancia 100 ",
   xlab = "features")

Intentando con Transformaciones de Johnson

Test de Normalidad Anderson-Darling

#Anderson-Darling Normality Test
c1test<-RE.ADT(inst100$C1)
c1test
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.3977163
c2_test <- RE.ADT(inst100$C2)
c2_test
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.009035369
r2_test <- RE.ADT(inst100$R2)
r2_test
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.2585398
rc2_test <- RE.ADT(inst100$RC2)
r2_test
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.2585398

Transformación de Johnson y Visualización

#Johnson Transformations
c1_johnson <- RE.Johnson(inst100$C1)
c2_johnson <- RE.Johnson(inst100$C2)
r2_johnson <- RE.Johnson(inst100$R2)
rc2_johnson <- RE.Johnson(inst100$RC2)

hist(c1_johnson$transformed,
     col = "pink",
     main = "Transformación de Johnson  C1",
     xlab = "Valores",
     ylab = "Frecuencia")

hist(c2_johnson$transformed,
     col = "pink",
     main = "Transformación de Johnson  C2",
     xlab = "Valores",
     ylab = "Frecuencia")

hist(r2_johnson$transformed,
     col = "pink",
     main = "Transformación de Johnson  R2",
     xlab = "Valores",
     ylab = "Frecuencia")

hist(rc2_johnson$transformed,
     col = "pink",
     main = "Transformación de Johnson  RC2",
     xlab = "Valores",
     ylab = "Frecuencia")

Gráficas Q-Q

#C1
qqnorm(c1_johnson$transformed,
       col = "navyblue",
       main = "C1")
qqline(c1_johnson$transformed,
       lwd = 3,
       col = "red")

#C2
qqnorm(c2_johnson$transformed,
       col = "navyblue",
       main = "C2")
qqline(c2_johnson$transformed,
       lwd = 3,
       col = "red")

#R2
qqnorm(r2_johnson$transformed,
       col = "navyblue",
       main = "R2")
qqline(r2_johnson$transformed,
       lwd = 3,
       col = "red")

#RC2
qqnorm(rc2_johnson$transformed,
       col = "navyblue",
       main = "C1")
qqline(rc2_johnson$transformed,
       lwd = 3,
       col = "red")

Comprobación de normalidad de los datos transofrmados con test Anderson-Darling

En el test Anderson-Darling un valor p mayor es mejor

C1

ac1_johnson <- RE.ADT(c1_johnson$transformed)
ac1_johnson
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.5190301

C2

ac2_johnson <- RE.ADT(c2_johnson$transformed)
ac2_johnson
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.3500646

R2

ar2_johnson <- RE.ADT(r2_johnson$transformed)
ar2_johnson
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.8400051

RC2

arc2_johnson <- RE.ADT(rc2_johnson$transformed)
arc2_johnson
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.7858921

Trabajo con Datos de tabla final

#Cargar dataset

dataset <- read_excel("D:/Dropbox/MsC UABC/2o Semestre/Clases/Estadistica/Danisa/1. Raw dataset/DATOS FINALES DOE.xlsx", col_names = FALSE)


#Cortar la base de datos
dataset <- dataset[-11:-36,-1:-13]

dataset <- dataset[-1:-2,-6]
colnames(dataset) <- c("Iteración", "veinticinco", "cincuenta", "cien", "Promedio")
dataset <- dataset[,-5]

Visualización de la Distribución

Histograma

histogram(~ veinticinco + cincuenta + cien, data = dataset,
   main="Histogramas de Resultados por Instancia ",
   xlab = "features")

BoxPlots

boxplot(dataset[,2:4],
        main = "Boxplot de Resultados por Instancia")

Test de normalidad

25

#Anderson-Darling Normality Test
test25<-RE.ADT(dataset$veinticinco)
test25
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.4453185

50

#Anderson-Darling Normality Test
test50<-RE.ADT(dataset$cincuenta)
test50
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.2479992

100

#Anderson-Darling Normality Test
test100<-RE.ADT(dataset$cien)
test100
## [[1]]
## [1] "Anderson-Darling Test"
## 
## $p
## [1] 0.03106897

Transformación de Johnson

#Johnson Transformations
test25_johnson <- RE.Johnson(dataset$veinticinco)
test50_johnson <- RE.Johnson(dataset$cincuenta)
test100_johnson <- RE.Johnson(dataset$cien)

Histogramas de datos transformados

#25
hist(test25_johnson$transformed,
     col = "pink",
     prob = TRUE,
     main = "Transformación de Johnson  25",
     xlab = "Valores",
     ylab = "Frecuencia")
lines(density(test25_johnson$transformed),
      lwd = 3,
      col = "red")

#50
hist(test50_johnson$transformed,
     col = "pink",
     prob = TRUE,
     main = "Transformación de Johnson  50",
     xlab = "Valores",
     ylab = "Frecuencia")
lines(density(test50_johnson$transformed),
      lwd = 3,
      col = "red")

#100
hist(test100_johnson$transformed,
     col = "pink",
     prob = TRUE,
     main = "Transformación de Johnson 100",
     xlab = "Valores",
     ylab = "Frecuencia")
lines(density(test100_johnson$transformed),
      lwd = 3,
      col = "red")

Gráficas Q-Q

25

#25
qqnorm(test25_johnson$transformed,
       col = "navyblue",
       main = "Instancia 25")
qqline(test25_johnson$transformed,
       lwd = 3,
       col = "red")

50

#50
qqnorm(test50_johnson$transformed,
       col = "navyblue",
       main = "Instancia 50")
qqline(test50_johnson$transformed,
       lwd = 3,
       col = "red")

100

#100
qqnorm(test100_johnson$transformed,
       col = "navyblue",
       main = "Instancia 100")
qqline(test100_johnson$transformed,
       lwd = 3,
       col = "red")

Integrando una matriz para que Danisa la pueda usar

#Valores 25
valores25 <- matrix(sort(test25_johnson$transformed, decreasing = TRUE),
       nrow = 8,
       ncol = 1)
colnames(valores25) <- c("Instancia 25")
#Valores 50
valores50 <- matrix(sort(test50_johnson$transformed, decreasing = TRUE),
       nrow = 8,
       ncol = 1)
colnames(valores50) <- c("Instancia 50")
#Valores 100
valores100 <- matrix(sort(test100_johnson$transformed, decreasing = TRUE),
       nrow = 8,
       ncol = 1)
colnames(valores100) <- c("Instancia 100")

##Combinando todo en una sola matriz

final <- cbind(valores25, valores50, valores100)

Se presenta una matriz con los resultados en orden descendente, obtenidos por transformación de datos de Johnson, de cada una de las instancias:

#write.csv(final, file = "Datos Transformados")
final
##      Instancia 25 Instancia 50 Instancia 100
## [1,]    1.8600000   1.56000000     1.7383196
## [2,]    0.7469230   1.39252352     0.6621640
## [3,]    0.5589661   0.35671883     0.4957544
## [4,]   -0.1771684   0.35669458     0.3543305
## [5,]   -0.2776206   0.08227105     0.2840145
## [6,]   -0.4015054  -0.46947451    -0.4933830
## [7,]   -1.4208361  -0.97477719    -0.7736696
## [8,]   -1.8600000  -1.56000000    -1.6302568
boxplot(final)

Promedios Generales

promgral <- matrix(c(18.86840375,13.55024,18.45171131,18.54769356,15.55413375,12.08616962,14.6371108,10.44505313,13.0532618),ncol=1,byrow=TRUE)
colnames(promgral) <- c("general")
promgral
##        general
##  [1,] 18.86840
##  [2,] 13.55024
##  [3,] 18.45171
##  [4,] 18.54769
##  [5,] 15.55413
##  [6,] 12.08617
##  [7,] 14.63711
##  [8,] 10.44505
##  [9,] 13.05326
boxplot(promgral,
        main = "Boxplot del Promedio de Todas las Instancias Combinadas")

Transformaciones de la tabla con TODOS los promedios

#log
boxplot(log(promgral),
        main = "Log")

#sqrt
boxplot(sqrt(promgral),
        main = "sqrt")

#1/x
boxplot(1/promgral,
        main = "1/x")

#x^2
boxplot(promgral^2,
        main = "x^2")