# TODO: crear cosa de estructuras
#
# Opciones globales para instalación de paquetes
options(install.packages.check.source = "yes")
# Instalando paquetes (descripción más adelante)
install.packages("openssl", dependencies = TRUE, quiet=TRUE )
install.packages("fs", dependencies = TRUE, quiet=TRUE )
install.packages("broom", dependencies = TRUE, quiet=TRUE )
install.packages("dbplyr", dependencies = TRUE, quiet=TRUE )
install.packages("dplyr", dependencies = TRUE, quiet=TRUE )
install.packages("dplyr", dependencies = TRUE, quiet=TRUE )
install.packages("haven", dependencies = TRUE, quiet=TRUE )
install.packages("httr", dependencies = TRUE, quiet=TRUE )
install.packages("modelr", dependencies = TRUE, quiet=TRUE )
install.packages("readr", dependencies = TRUE, quiet=TRUE )
install.packages("tidyverse", dependencies = TRUE, quiet=TRUE )
install.packages('outForest', dependencies = TRUE, quiet=TRUE)
install.packages('OutlierDetection', dependencies = TRUE, quiet=TRUE)
install.packages('missRanger', dependencies = TRUE, quiet=TRUE)
# Instalando paquetes desde github
install.packages("devtools")
library(devtools)
devtools::install_github("traversc/trqwe", dependencies = T,quiet=TRUE)
library(openssl) #
library(fs) #
library(broom) #
library(dbplyr) #
library(dplyr) #
library(haven) #
library(httr) #
library(modelr) #
library(readr) #
library(tidyverse) #
library(outForest)
library(OutlierDetection)
library(tidyverse)
#bg: Background.
replicates = 3
bg_proteins = 3000 #Background proteins.
log2_mean_bg = 27 #Background mean.
log2_sd_bg = 2 #Background standard deviation.
bg_reps_by_prot <- rep((2*replicates), bg_proteins)
bg_all_3000_prots_by_6_reps <- rep(1:bg_proteins,bg_reps_by_prot)
bg_distrib_all_samples <- rnorm(2*replicates*bg_proteins, mean = log2_mean_bg, sd = log2_sd_bg)
sim_null <- data_frame(
name = paste0("bg_", bg_all_3000_prots_by_6_reps),
ID = bg_all_3000_prots_by_6_reps,
var = rep(c("control_1", "control_2", "control_3", "treatment_1","treatment_2","treatment_3"), bg_proteins),
val = 2^bg_distrib_all_samples)
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# Histogram overlaid with kernel density curve
ggplot(as.data.frame(bg_distrib_all_samples), aes(x=bg_distrib_all_samples)) + geom_histogram(aes(y=..density..), # Histogram with density instead of count on y-axis
binwidth=.5, colour="black", fill="white") +
geom_density(alpha=.2, fill="#FF6666") # Overlay with transparent density plot
ggplot(sim_null, aes(x=val)) + geom_histogram(aes(y=..density..), # Histogram with density instead of count on y-axis
binwidth=.5, colour="black", fill="white") +
geom_density(alpha=.2, fill="#FF6666") # Overlay with transparent density plot
## Warning: Computation failed in `stat_bin()`:
## 'by' argument is much too small
DE_proteins = 300
log2_mean_DE_control = 25
log2_mean_DE_treatment = 30
log2_sd_DE = 2
DE_reps_by_prot <- rep(replicates, DE_proteins)
DE_all_3000_prots_by_6_reps <- rep(1:DE_proteins, DE_reps_by_prot) # TODO: arreglar problema de generación de distribuciones con media del tratamiento >= control
# DE_all_3000_prots_by_3_reps
DE_distrib_control_samples <- rnorm(replicates*DE_proteins, mean = log2_mean_DE_control, sd = log2_sd_DE)
DE_distrib_treatment_samples <- rnorm(replicates*DE_proteins, mean = log2_mean_DE_treatment, sd = log2_sd_DE)
sim_diff <- rbind(
data_frame(
name = paste0("DE_", DE_all_3000_prots_by_6_reps),
ID = rep( (bg_proteins+1):(bg_proteins+DE_proteins), DE_reps_by_prot),
var = rep(c("control_1", "control_2", "control_3"), DE_proteins),
val = 2^DE_distrib_control_samples),
data_frame(
name = paste0("DE_", DE_all_3000_prots_by_6_reps),
ID = rep((bg_proteins+1):(bg_proteins+DE_proteins), DE_reps_by_prot),
var = rep(c("treatment_1", "treatment_2", "treatment_3"), DE_proteins),
val = 2^DE_distrib_treatment_samples))
rbind(sim_null, sim_diff) # TODO: genera tabla super larga
rbind(sim_null, sim_diff) %>% tail()
# Combine null and DE data
# Funciones tradicionales de R
sim <- rbind(sim_null, sim_diff) %>% # Esparcir cosa
spread(key = var, # Key es columna var
value = val) %>% # Value es columna val
arrange(ID) # Ordena por el ID
sim %>% tail() # Muestra
# Operación inversa
# Sirve para funciones de visualización de datos
sim %>% gather(key = "var", # Asigna var como Key
value = "val", # Asigna val como Value
-name, # Elimina columna "name"
-ID) -> antisim # Elimina colmna "ID"
antisim %>% tail()
# TODO: hacer un grafico de muestra?
# Con funciones nuevas:
sim <- rbind(sim_null, sim_diff) %>%
# Pivot_wider es el nuevo spread()
pivot_wider(names_from = var, # spread
values_from = val) %>%
arrange(ID) # Ordena por ID
sim %>% pivot_longer(cols = !c(name,ID), # No usar name, ID
names_to = "var",
values_to = "val")
Faltan datos en un patron aleatorio, como podria ser causado por problemas de mediciones por el equipo.
# Generate a MAR matrix
MAR_fraction = 0.05 # Probabilidad de dato faltante 5%
# Creamos una matriz del mismo tamaño que la que estabamos usando
# Para hacer un screen de dato/no-dato
MAR_matrix <- matrix(data = sample(c(TRUE, FALSE),
size = 2*replicates*(bg_proteins+DE_proteins),
replace = TRUE,
prob = c(MAR_fraction, 1-MAR_fraction)),
# Check de probabilidades?
nrow = bg_proteins+DE_proteins,
ncol = 2*replicates)
# Introduce missing values at random (MAR)
controls <- grep("control", colnames(sim))
treatments <- grep("treatment", colnames(sim))
sim[, c(controls, treatments)][MAR_matrix] <- NA
Faltan datos en un patron definido, como podria ser causado por problemas de mediciones en una condición experimental.
# Introduce missing values not at random (MNAR)
MNAR_proteins = 100
DE_protein_IDs <- grep("DE", sim$name) # TODO: corregir posiciones
DE_first_100 <- DE_protein_IDs[1:MNAR_proteins]
sim[DE_first_100, controls] <- NA
sim %>% slice_sample(n=100) # Hace una muestra de 100 datos
Son datos fuera de tres desviaciones estandar. Comunmente se eliminan, porque su probabilidad es similar o inferior a la de un error de medición.
select_if(sim, is.numeric)
sim[,-c(1,2)]
sim %>%select(starts_with(c("tr",'co'))) -> only.my.numeric.data
only.my.numeric.data.with.outliers <- generateOutliers(only.my.numeric.data) %>% abs()
# TODO: si le pongo select_if con is,control sirve?
is.na(only.my.numeric.data.with.outliers) %>% colSums # TODO: corregir esta [LISTO]
## treatment_1 treatment_2 treatment_3 control_1 control_2 control_3
## 157 166 172 235 246 251
summary(sim)
## name ID control_1 control_2
## Length:3300 Min. : 1.0 Min. :4.702e+05 Min. :1.869e+05
## Class :character 1st Qu.: 825.8 1st Qu.:5.015e+07 1st Qu.:4.820e+07
## Mode :character Median :1650.5 Median :1.246e+08 Median :1.243e+08
## Mean :1650.5 Mean :3.418e+08 Mean :3.164e+08
## 3rd Qu.:2475.2 3rd Qu.:3.469e+08 3rd Qu.:3.262e+08
## Max. :3300.0 Max. :2.635e+10 Max. :1.111e+10
## NA's :245 NA's :262
## control_3 treatment_1 treatment_2
## Min. :7.797e+05 Min. :6.623e+05 Min. :1.072e+06
## 1st Qu.:4.847e+07 1st Qu.:5.763e+07 1st Qu.:5.804e+07
## Median :1.218e+08 Median :1.489e+08 Median :1.548e+08
## Mean :3.421e+08 Mean :5.890e+08 Mean :5.769e+08
## 3rd Qu.:3.350e+08 3rd Qu.:4.320e+08 3rd Qu.:4.329e+08
## Max. :1.531e+10 Max. :4.172e+10 Max. :6.176e+10
## NA's :262 NA's :163 NA's :174
## treatment_3
## Min. :4.289e+05
## 1st Qu.:5.726e+07
## Median :1.484e+08
## Mean :5.963e+08
## 3rd Qu.:4.130e+08
## Max. :9.243e+10
## NA's :176
# TODO: nuevas lineas de codigo
cbind(sim[,c(1,2)],only.my.numeric.data.with.outliers) -> sim.final
# TODO: falta desde "library(mice)" en adelante
library(mice)
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
imputed_data <- mice(sim,
m=5, # minimo 5 iteraciones
maxit=50, # maximo 50 iteraciones
method = 'rf', # Usa random forest
seed=500) # hcae algo?
##
## iter imp variable
## 1 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 1 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 1 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 1 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 1 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 2 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 2 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 2 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 2 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 2 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 3 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 3 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 3 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 3 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 3 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 4 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 4 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 4 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 4 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 4 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 5 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 5 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 5 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 5 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 5 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 6 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 6 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 6 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 6 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 6 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 7 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 7 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 7 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 7 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 7 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 8 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 8 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 8 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 8 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 8 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 9 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 9 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 9 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 9 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 9 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 10 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 10 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 10 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 10 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 10 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 11 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 11 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 11 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 11 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 11 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 12 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 12 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 12 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 12 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 12 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 13 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 13 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 13 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 13 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 13 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 14 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 14 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 14 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 14 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 14 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 15 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 15 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 15 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 15 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 15 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 16 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 16 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 16 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 16 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 16 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 17 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 17 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 17 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 17 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 17 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 18 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 18 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 18 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 18 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 18 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 19 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 19 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 19 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 19 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 19 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 20 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 20 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 20 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 20 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 20 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 21 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 21 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 21 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 21 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 21 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 22 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 22 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 22 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 22 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 22 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 23 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 23 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 23 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 23 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 23 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 24 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 24 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 24 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 24 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 24 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 25 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 25 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 25 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 25 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 25 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 26 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 26 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 26 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 26 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 26 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 27 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 27 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 27 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 27 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 27 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 28 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 28 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 28 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 28 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 28 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 29 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 29 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 29 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 29 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 29 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 30 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 30 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 30 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 30 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 30 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 31 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 31 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 31 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 31 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 31 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 32 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 32 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 32 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 32 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 32 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 33 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 33 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 33 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 33 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 33 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 34 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 34 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 34 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 34 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 34 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 35 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 35 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 35 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 35 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 35 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 36 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 36 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 36 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 36 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 36 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 37 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 37 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 37 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 37 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 37 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 38 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 38 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 38 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 38 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 38 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 39 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 39 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 39 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 39 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 39 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 40 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 40 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 40 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 40 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 40 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 41 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 41 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 41 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 41 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 41 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 42 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 42 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 42 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 42 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 42 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 43 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 43 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 43 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 43 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 43 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 44 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 44 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 44 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 44 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 44 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 45 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 45 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 45 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 45 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 45 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 46 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 46 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 46 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 46 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 46 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 47 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 47 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 47 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 47 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 47 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 48 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 48 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 48 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 48 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 48 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 49 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 49 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 49 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 49 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 49 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 50 1 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 50 2 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 50 3 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 50 4 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## 50 5 control_1 control_2 control_3 treatment_1 treatment_2 treatment_3
## Warning: Number of logged events: 1
my.raw.data <- mice::complete(imputed_data)
my.raw.data
# TODO: Podrían especificar, por favor, cuando se hace la corrección de los NA ramdom, que es lo que específicamente hace. Me refiero a sí elimina el dato o lo reemplaza por un valor.
my.raw.data %>%select(starts_with(c("tr",'co'))) -> only.my.numeric.data.with.outliers
out <- outForest(only.my.numeric.data.with.outliers, splitrule = "extratrees",
num.trees = 50, verbose = 0)
outliers(out)
summary(out)
## The following outlier counts have been detected:
##
## Number of outliers
## treatment_1 44
## treatment_2 29
## treatment_3 24
## control_1 35
## control_2 54
## control_3 47
##
## These are the worst outliers:
##
## row col observed predicted rmse score threshold
## 85 3138 treatment_3 92434874958 596090058 2652318048 34.62586 3
## 105 398 control_1 26345944149 348920195 806065724 32.25174 3
## 53 3013 treatment_2 61758914773 1180364742 1933461335 31.33166 3
## 82 3050 treatment_3 70314538681 1343575723 2652318048 26.00403 3
## 34 3236 treatment_1 41723715703 867432789 1750580609 23.33870 3
## 79 3016 treatment_3 60307286501 1385059818 2652318048 22.21537 3
## replacement
## 85 695974493
## 105 45809803
## 53 165157448
## 82 57539022
## 34 6110999684
## 79 2013500275
# The fixed data
Data(out)
my.raw.data %>%select(!starts_with(c("tr",'co'))) %>% cbind((Data(out))) -> sim.without.outliers
sim.without.outliers
sim.without.outliers %>% mutate(treatment = rowMeans(select(., starts_with("treat")))) %>% mutate(control = rowMeans(select(., starts_with("control")))) %>%
mutate(log2Ratio = log2(.[['treatment']] / .[['control']])) -> A
library(trqwe)
B.names <- c('treatment.B_1', 'treatment.B_2', 'treatment.B_3', 'treatment.B_mean')
C.names <- c('treatment.C_1', 'treatment.C_2', 'treatment.C_3', 'treatment.C_mean')
D.names <- c('treatment.D_1', 'treatment.D_2', 'treatment.D_3', 'treatment.D_mean')
E.names <- c('treatment.E_1', 'treatment.E_2', 'treatment.E_3', 'treatment.E_mean')
my.raw.data %>% select(starts_with("treat")) %>% "*"(2 ) %>%mutate(treatment = rowMeans(select(., starts_with("treat")))) %>%
trqwe::set_colnames(B.names) -> B
my.raw.data %>% select(starts_with("treat")) %>% "*"(5 ) %>%mutate(treatment = rowMeans(select(., starts_with("treat"))))%>%
trqwe::set_colnames(C.names) -> C
my.raw.data %>% select(starts_with("treat")) %>% "*"(.5 ) %>%mutate(treatment = rowMeans(select(., starts_with("treat"))))%>%
trqwe::set_colnames(D.names) -> D
my.raw.data %>% select(starts_with("treat")) %>% "*"(.2 ) %>%mutate(treatment = rowMeans(select(., starts_with("treat"))))%>%
trqwe::set_colnames(E.names) -> E
cbind(A,B,C,D,E) %>%
mutate(log2Ratio.B = log2(.[['treatment.B_mean']] / .[['control']])) %>%
mutate(log2Ratio.C = log2(.[['treatment.C_mean']] / .[['control']])) %>%
mutate(log2Ratio.D = log2(.[['treatment.D_mean']] / .[['control']])) %>%
mutate(log2Ratio.E = log2(.[['treatment.E_mean']] / .[['control']])) %>% select(starts_with(c('na','log'))) -> log2Ratio_matrix
log2Ratio_matrix