Creación, manipulación y manejo de estructuras de datos

Estructura de datos y diagrama

# TODO: crear cosa de estructuras
# 
# Opciones globales para instalación de paquetes
options(install.packages.check.source = "yes")

# Instalando paquetes (descripción más adelante)
install.packages("openssl", dependencies = TRUE, quiet=TRUE )
install.packages("fs", dependencies = TRUE, quiet=TRUE )
install.packages("broom", dependencies = TRUE, quiet=TRUE )
install.packages("dbplyr", dependencies = TRUE, quiet=TRUE )
install.packages("dplyr", dependencies = TRUE, quiet=TRUE )
install.packages("dplyr", dependencies = TRUE, quiet=TRUE )
install.packages("haven", dependencies = TRUE, quiet=TRUE )
install.packages("httr", dependencies = TRUE, quiet=TRUE )
install.packages("modelr", dependencies = TRUE, quiet=TRUE )
install.packages("readr", dependencies = TRUE, quiet=TRUE )
install.packages("tidyverse", dependencies = TRUE, quiet=TRUE )

install.packages('outForest', dependencies = TRUE, quiet=TRUE)
install.packages('OutlierDetection', dependencies = TRUE, quiet=TRUE)
install.packages('missRanger', dependencies = TRUE, quiet=TRUE)
# Instalando paquetes desde github
install.packages("devtools")
library(devtools)
devtools::install_github("traversc/trqwe", dependencies = T,quiet=TRUE)
library(openssl)   # 
library(fs)        # 
library(broom)     # 
library(dbplyr)    # 
library(dplyr)     # 
library(haven)     # 
library(httr)      # 
library(modelr)    # 
library(readr)     # 
library(tidyverse) # 

library(outForest)
library(OutlierDetection)

manipulación de data

library(tidyverse)

#bg: Background.
replicates   = 3
bg_proteins  = 3000           #Background proteins. 
log2_mean_bg = 27             #Background mean.
log2_sd_bg   = 2              #Background standard deviation.

bg_reps_by_prot             <- rep((2*replicates), bg_proteins)
bg_all_3000_prots_by_6_reps <- rep(1:bg_proteins,bg_reps_by_prot)                   
bg_distrib_all_samples      <- rnorm(2*replicates*bg_proteins, mean = log2_mean_bg, sd = log2_sd_bg) 


sim_null <- data_frame(
  name  = paste0("bg_", bg_all_3000_prots_by_6_reps),
  ID    = bg_all_3000_prots_by_6_reps,
  var   = rep(c("control_1", "control_2", "control_3", "treatment_1","treatment_2","treatment_3"), bg_proteins), 
  val   = 2^bg_distrib_all_samples)
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# Histogram overlaid with kernel density curve
ggplot(as.data.frame(bg_distrib_all_samples), aes(x=bg_distrib_all_samples)) +  geom_histogram(aes(y=..density..),      # Histogram with density instead of count on y-axis
                   binwidth=.5, colour="black", fill="white") +
    geom_density(alpha=.2, fill="#FF6666")  # Overlay with transparent density plot

ggplot(sim_null, aes(x=val)) +     geom_histogram(aes(y=..density..),      # Histogram with density instead of count on y-axis
                   binwidth=.5, colour="black", fill="white") +
    geom_density(alpha=.2, fill="#FF6666")  # Overlay with transparent density plot
## Warning: Computation failed in `stat_bin()`:
## 'by' argument is much too small

Diferencial

DE_proteins            = 300
log2_mean_DE_control   = 25
log2_mean_DE_treatment = 30
log2_sd_DE             = 2



DE_reps_by_prot              <- rep(replicates,    DE_proteins)
DE_all_3000_prots_by_6_reps  <- rep(1:DE_proteins, DE_reps_by_prot) # TODO: arreglar problema de generación de distribuciones con media del tratamiento >= control
# DE_all_3000_prots_by_3_reps
DE_distrib_control_samples   <- rnorm(replicates*DE_proteins, mean = log2_mean_DE_control, sd = log2_sd_DE)
DE_distrib_treatment_samples <- rnorm(replicates*DE_proteins, mean = log2_mean_DE_treatment, sd = log2_sd_DE)

sim_diff <- rbind(
  
  data_frame(
    name = paste0("DE_", DE_all_3000_prots_by_6_reps),
    ID   = rep(  (bg_proteins+1):(bg_proteins+DE_proteins),  DE_reps_by_prot),
    var  = rep(c("control_1", "control_2", "control_3"), DE_proteins), 
    val  = 2^DE_distrib_control_samples),
  
  data_frame(
    name = paste0("DE_", DE_all_3000_prots_by_6_reps),
    ID = rep((bg_proteins+1):(bg_proteins+DE_proteins), DE_reps_by_prot),
    var = rep(c("treatment_1", "treatment_2", "treatment_3"), DE_proteins),
    val = 2^DE_distrib_treatment_samples))


rbind(sim_null, sim_diff) # TODO: genera tabla super larga
rbind(sim_null, sim_diff) %>% tail()
# Combine null and DE data
# Funciones tradicionales de R
sim <- rbind(sim_null, sim_diff) %>% # Esparcir cosa
             spread(key = var,       # Key es columna var
                    value = val) %>% # Value es columna val
             arrange(ID)             # Ordena por el ID

sim %>% tail() # Muestra 
# Operación inversa
# Sirve para funciones de visualización de datos

sim %>% gather(key = "var",     # Asigna var como Key
               value = "val",   # Asigna val como Value
               -name,           # Elimina columna "name"
               -ID) -> antisim  # Elimina colmna "ID"

antisim %>% tail()
# TODO: hacer un grafico de muestra?
# Con funciones nuevas:
sim <-  rbind(sim_null, sim_diff) %>% 
        # Pivot_wider es el nuevo spread()
        pivot_wider(names_from = var, # spread
                    values_from = val) %>% 
        arrange(ID) # Ordena por ID


sim %>% pivot_longer(cols = !c(name,ID), # No usar name, ID
                     names_to = "var",
                     values_to = "val")

Valores faltantes

Mising At Random (Blancos aleatorios)

Faltan datos en un patron aleatorio, como podria ser causado por problemas de mediciones por el equipo.

# Generate a MAR matrix
MAR_fraction = 0.05 # Probabilidad de dato faltante 5%

# Creamos una matriz del mismo tamaño que la que estabamos usando
# Para hacer un screen de dato/no-dato
MAR_matrix <- matrix(data = sample(c(TRUE, FALSE),
                            size = 2*replicates*(bg_proteins+DE_proteins), 
                            replace = TRUE, 
                            prob = c(MAR_fraction, 1-MAR_fraction)), 
                            # Check de probabilidades?
                     nrow = bg_proteins+DE_proteins, 
                     ncol = 2*replicates)

# Introduce missing values at random (MAR)
controls   <- grep("control", colnames(sim))
treatments <- grep("treatment", colnames(sim))
sim[, c(controls, treatments)][MAR_matrix] <- NA

Missing Not At Random

Faltan datos en un patron definido, como podria ser causado por problemas de mediciones en una condición experimental.

# Introduce missing values not at random (MNAR)
MNAR_proteins = 100
DE_protein_IDs <- grep("DE", sim$name) # TODO: corregir posiciones
DE_first_100   <- DE_protein_IDs[1:MNAR_proteins]
sim[DE_first_100, controls] <- NA


sim %>% slice_sample(n=100) # Hace una muestra de 100 datos

Outliers

Son datos fuera de tres desviaciones estandar. Comunmente se eliminan, porque su probabilidad es similar o inferior a la de un error de medición.

select_if(sim, is.numeric)
sim[,-c(1,2)]
sim %>%select(starts_with(c("tr",'co'))) -> only.my.numeric.data

only.my.numeric.data.with.outliers <- generateOutliers(only.my.numeric.data) %>% abs()

# TODO: si le pongo select_if con is,control sirve?
is.na(only.my.numeric.data.with.outliers)  %>% colSums # TODO: corregir esta [LISTO]
## treatment_1 treatment_2 treatment_3   control_1   control_2   control_3 
##         157         166         172         235         246         251
summary(sim)
##      name                 ID           control_1           control_2        
##  Length:3300        Min.   :   1.0   Min.   :4.702e+05   Min.   :1.869e+05  
##  Class :character   1st Qu.: 825.8   1st Qu.:5.015e+07   1st Qu.:4.820e+07  
##  Mode  :character   Median :1650.5   Median :1.246e+08   Median :1.243e+08  
##                     Mean   :1650.5   Mean   :3.418e+08   Mean   :3.164e+08  
##                     3rd Qu.:2475.2   3rd Qu.:3.469e+08   3rd Qu.:3.262e+08  
##                     Max.   :3300.0   Max.   :2.635e+10   Max.   :1.111e+10  
##                                      NA's   :245         NA's   :262        
##    control_3          treatment_1         treatment_2       
##  Min.   :7.797e+05   Min.   :6.623e+05   Min.   :1.072e+06  
##  1st Qu.:4.847e+07   1st Qu.:5.763e+07   1st Qu.:5.804e+07  
##  Median :1.218e+08   Median :1.489e+08   Median :1.548e+08  
##  Mean   :3.421e+08   Mean   :5.890e+08   Mean   :5.769e+08  
##  3rd Qu.:3.350e+08   3rd Qu.:4.320e+08   3rd Qu.:4.329e+08  
##  Max.   :1.531e+10   Max.   :4.172e+10   Max.   :6.176e+10  
##  NA's   :262         NA's   :163         NA's   :174        
##   treatment_3       
##  Min.   :4.289e+05  
##  1st Qu.:5.726e+07  
##  Median :1.484e+08  
##  Mean   :5.963e+08  
##  3rd Qu.:4.130e+08  
##  Max.   :9.243e+10  
##  NA's   :176

Corrección de Outliers

# TODO: nuevas lineas de codigo
cbind(sim[,c(1,2)],only.my.numeric.data.with.outliers) -> sim.final
# TODO: falta desde "library(mice)" en adelante

library(mice)
## 
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
imputed_data <- mice(sim, 
                     m=5,           # minimo 5 iteraciones
                     maxit=50,      # maximo 50 iteraciones
                     method = 'rf', # Usa random forest
                     seed=500)      # hcae algo?
## 
##  iter imp variable
##   1   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   1   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   1   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   1   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   1   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   2   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   2   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   2   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   2   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   2   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   3   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   3   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   3   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   3   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   3   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   4   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   4   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   4   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   4   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   4   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   5   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   5   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   5   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   5   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   5   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   6   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   6   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   6   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   6   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   6   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   7   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   7   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   7   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   7   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   7   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   8   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   8   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   8   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   8   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   8   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   9   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   9   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   9   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   9   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   9   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   10   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   10   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   10   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   10   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   10   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   11   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   11   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   11   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   11   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   11   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   12   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   12   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   12   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   12   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   12   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   13   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   13   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   13   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   13   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   13   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   14   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   14   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   14   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   14   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   14   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   15   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   15   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   15   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   15   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   15   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   16   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   16   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   16   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   16   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   16   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   17   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   17   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   17   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   17   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   17   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   18   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   18   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   18   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   18   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   18   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   19   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   19   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   19   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   19   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   19   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   20   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   20   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   20   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   20   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   20   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   21   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   21   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   21   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   21   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   21   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   22   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   22   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   22   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   22   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   22   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   23   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   23   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   23   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   23   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   23   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   24   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   24   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   24   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   24   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   24   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   25   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   25   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   25   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   25   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   25   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   26   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   26   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   26   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   26   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   26   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   27   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   27   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   27   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   27   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   27   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   28   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   28   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   28   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   28   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   28   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   29   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   29   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   29   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   29   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   29   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   30   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   30   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   30   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   30   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   30   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   31   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   31   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   31   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   31   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   31   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   32   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   32   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   32   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   32   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   32   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   33   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   33   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   33   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   33   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   33   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   34   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   34   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   34   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   34   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   34   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   35   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   35   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   35   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   35   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   35   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   36   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   36   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   36   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   36   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   36   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   37   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   37   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   37   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   37   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   37   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   38   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   38   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   38   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   38   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   38   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   39   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   39   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   39   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   39   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   39   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   40   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   40   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   40   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   40   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   40   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   41   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   41   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   41   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   41   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   41   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   42   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   42   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   42   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   42   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   42   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   43   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   43   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   43   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   43   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   43   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   44   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   44   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   44   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   44   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   44   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   45   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   45   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   45   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   45   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   45   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   46   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   46   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   46   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   46   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   46   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   47   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   47   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   47   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   47   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   47   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   48   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   48   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   48   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   48   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   48   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   49   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   49   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   49   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   49   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   49   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   50   1  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   50   2  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   50   3  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   50   4  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
##   50   5  control_1  control_2  control_3  treatment_1  treatment_2  treatment_3
## Warning: Number of logged events: 1
my.raw.data <- mice::complete(imputed_data)
my.raw.data
# TODO: Podrían especificar, por favor, cuando se hace la corrección de los NA ramdom, que es lo que específicamente hace. Me refiero a sí elimina el dato o lo reemplaza por un valor.
my.raw.data %>%select(starts_with(c("tr",'co'))) -> only.my.numeric.data.with.outliers
out          <- outForest(only.my.numeric.data.with.outliers, splitrule = "extratrees", 
                 num.trees = 50, verbose = 0) 
outliers(out)
summary(out)
## The following outlier counts have been detected:
## 
##             Number of outliers
## treatment_1                 44
## treatment_2                 29
## treatment_3                 24
## control_1                   35
## control_2                   54
## control_3                   47
## 
## These are the worst outliers:
## 
##      row         col    observed  predicted       rmse    score threshold
## 85  3138 treatment_3 92434874958  596090058 2652318048 34.62586         3
## 105  398   control_1 26345944149  348920195  806065724 32.25174         3
## 53  3013 treatment_2 61758914773 1180364742 1933461335 31.33166         3
## 82  3050 treatment_3 70314538681 1343575723 2652318048 26.00403         3
## 34  3236 treatment_1 41723715703  867432789 1750580609 23.33870         3
## 79  3016 treatment_3 60307286501 1385059818 2652318048 22.21537         3
##     replacement
## 85    695974493
## 105    45809803
## 53    165157448
## 82     57539022
## 34   6110999684
## 79   2013500275
# The fixed data
Data(out)
my.raw.data %>%select(!starts_with(c("tr",'co'))) %>% cbind((Data(out))) -> sim.without.outliers
sim.without.outliers

Insertar columna con log2Ratio

sim.without.outliers %>% mutate(treatment = rowMeans(select(., starts_with("treat")))) %>% mutate(control = rowMeans(select(., starts_with("control")))) %>%
    mutate(log2Ratio = log2(.[['treatment']] / .[['control']])) -> A

library(trqwe)

B.names <- c('treatment.B_1', 'treatment.B_2', 'treatment.B_3',  'treatment.B_mean')
C.names <- c('treatment.C_1', 'treatment.C_2', 'treatment.C_3',  'treatment.C_mean')
D.names <- c('treatment.D_1', 'treatment.D_2', 'treatment.D_3',  'treatment.D_mean')
E.names <- c('treatment.E_1', 'treatment.E_2', 'treatment.E_3',  'treatment.E_mean')

my.raw.data %>% select(starts_with("treat")) %>% "*"(2 ) %>%mutate(treatment = rowMeans(select(., starts_with("treat")))) %>% 
  trqwe::set_colnames(B.names) -> B

my.raw.data %>% select(starts_with("treat")) %>% "*"(5 ) %>%mutate(treatment = rowMeans(select(., starts_with("treat"))))%>% 
  trqwe::set_colnames(C.names) -> C

my.raw.data %>% select(starts_with("treat")) %>% "*"(.5 ) %>%mutate(treatment = rowMeans(select(., starts_with("treat"))))%>% 
  trqwe::set_colnames(D.names) -> D

my.raw.data %>% select(starts_with("treat")) %>% "*"(.2 ) %>%mutate(treatment = rowMeans(select(., starts_with("treat"))))%>% 
  trqwe::set_colnames(E.names) -> E

cbind(A,B,C,D,E) %>%
    mutate(log2Ratio.B = log2(.[['treatment.B_mean']] / .[['control']])) %>%
    mutate(log2Ratio.C = log2(.[['treatment.C_mean']] / .[['control']])) %>%
    mutate(log2Ratio.D = log2(.[['treatment.D_mean']] / .[['control']])) %>%
    mutate(log2Ratio.E = log2(.[['treatment.E_mean']] / .[['control']])) %>% select(starts_with(c('na','log'))) -> log2Ratio_matrix

log2Ratio_matrix

pendiente escritura de carpetas y exploración de directorios.