Iniciamos la semilla y generamos los datos de genes y momentos muestrales.
set.seed(789)
num_genes <- 10000
mom_muestral_1 <- 6
mom_muestral_2 <- 6
mitad_1 <- matrix(runif(num_genes*mom_muestral_1,-2,2), nrow = num_genes, ncol = mom_muestral_1)
mitad_2 <- matrix(runif(num_genes*mom_muestral_2,-4,4), nrow = num_genes, ncol = mom_muestral_2)
datosaleaEG01 <- cbind(mitad_1,mitad_2)
dim(datosaleaEG01)
## [1] 10000 12
head(datosaleaEG01)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.79957746 1.4943258 0.40687822 1.816425 -1.2727249 -1.1624271
## [2,] -1.62600451 0.5128004 0.87907447 1.077089 1.5792660 -0.9147813
## [3,] -1.95245273 -1.0258133 -0.06373976 1.447735 -0.5931999 0.3099358
## [4,] 0.36642541 1.6125608 -1.51397604 0.895905 -1.8915467 -1.1032403
## [5,] -0.03140225 -1.8225208 0.48047472 -1.562934 -0.3440984 -1.7929110
## [6,] -1.91934570 -0.5603091 0.47769946 1.185907 0.3340634 -0.2731466
## [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] -0.14484139 -2.6215491 -1.864603 3.116052 -2.2350171 -2.2416845
## [2,] 3.59555582 -1.4881947 3.905833 3.002433 2.1280332 -3.7134398
## [3,] 0.05924968 1.8103155 1.259982 -3.680984 -1.2667359 -2.5608207
## [4,] -3.93094174 -0.3781865 1.681134 1.748022 1.7376943 -1.7946701
## [5,] 2.53134453 -1.9637646 -3.571918 2.462766 -0.1786467 1.4077508
## [6,] -0.44784125 1.9372120 -2.828150 3.601530 0.3162603 -0.8776999
La tabla generada contiene 10000 filas correspondientes a los 10000 genes y 12 columnas correspondientes a los 12 momentos muestrales.
Asignación de nombres a los genes y a los momentos muestrales.
nombre_filas <- sprintf("gen%.5d",1:num_genes)
rownames(datosaleaEG01) = nombre_filas
nombre_columnas <- c(paste0("notratado0",1:mom_muestral_1),paste0("tratado0",1:mom_muestral_2))
colnames(datosaleaEG01) = nombre_columnas
head(datosaleaEG01)
## notratado01 notratado02 notratado03 notratado04 notratado05
## gen00001 0.79957746 1.4943258 0.40687822 1.816425 -1.2727249
## gen00002 -1.62600451 0.5128004 0.87907447 1.077089 1.5792660
## gen00003 -1.95245273 -1.0258133 -0.06373976 1.447735 -0.5931999
## gen00004 0.36642541 1.6125608 -1.51397604 0.895905 -1.8915467
## gen00005 -0.03140225 -1.8225208 0.48047472 -1.562934 -0.3440984
## gen00006 -1.91934570 -0.5603091 0.47769946 1.185907 0.3340634
## notratado06 tratado01 tratado02 tratado03 tratado04 tratado05
## gen00001 -1.1624271 -0.14484139 -2.6215491 -1.864603 3.116052 -2.2350171
## gen00002 -0.9147813 3.59555582 -1.4881947 3.905833 3.002433 2.1280332
## gen00003 0.3099358 0.05924968 1.8103155 1.259982 -3.680984 -1.2667359
## gen00004 -1.1032403 -3.93094174 -0.3781865 1.681134 1.748022 1.7376943
## gen00005 -1.7929110 2.53134453 -1.9637646 -3.571918 2.462766 -0.1786467
## gen00006 -0.2731466 -0.44784125 1.9372120 -2.828150 3.601530 0.3162603
## tratado06
## gen00001 -2.2416845
## gen00002 -3.7134398
## gen00003 -2.5608207
## gen00004 -1.7946701
## gen00005 1.4077508
## gen00006 -0.8776999
Conversión de “datosaleaEG01” en data.frame:
dfEG01 <- as.data.frame(datosaleaEG01)
class(dfEG01)
## [1] "data.frame"
head(dfEG01)
## notratado01 notratado02 notratado03 notratado04 notratado05
## gen00001 0.79957746 1.4943258 0.40687822 1.816425 -1.2727249
## gen00002 -1.62600451 0.5128004 0.87907447 1.077089 1.5792660
## gen00003 -1.95245273 -1.0258133 -0.06373976 1.447735 -0.5931999
## gen00004 0.36642541 1.6125608 -1.51397604 0.895905 -1.8915467
## gen00005 -0.03140225 -1.8225208 0.48047472 -1.562934 -0.3440984
## gen00006 -1.91934570 -0.5603091 0.47769946 1.185907 0.3340634
## notratado06 tratado01 tratado02 tratado03 tratado04 tratado05
## gen00001 -1.1624271 -0.14484139 -2.6215491 -1.864603 3.116052 -2.2350171
## gen00002 -0.9147813 3.59555582 -1.4881947 3.905833 3.002433 2.1280332
## gen00003 0.3099358 0.05924968 1.8103155 1.259982 -3.680984 -1.2667359
## gen00004 -1.1032403 -3.93094174 -0.3781865 1.681134 1.748022 1.7376943
## gen00005 -1.7929110 2.53134453 -1.9637646 -3.571918 2.462766 -0.1786467
## gen00006 -0.2731466 -0.44784125 1.9372120 -2.828150 3.601530 0.3162603
## tratado06
## gen00001 -2.2416845
## gen00002 -3.7134398
## gen00003 -2.5608207
## gen00004 -1.7946701
## gen00005 1.4077508
## gen00006 -0.8776999
Adición 100 valores NA de forma aleatoria a momentos muestrales “notratados” y “tratados”.
set.seed(789)
num_NA = 100
muestra_fila_mitad_1 = sample(1:num_genes, num_NA, replace = TRUE)
muestra_columna_mitad_1 = sample(1:6, num_NA, replace = TRUE)
muestra_fila_mitad_2 = sample(1:num_genes, num_NA, replace = FALSE)
muestra_columna_mitad_2 = sample(7:12, num_NA, replace = TRUE)
head(dfEG01[[1]])
## [1] 0.79957746 -1.62600451 -1.95245273 0.36642541 -0.03140225 -1.91934570
dfEG01[[1]][muestra_fila_mitad_1[muestra_columna_mitad_1==1]] = NA
dfEG01[[2]][muestra_fila_mitad_1[muestra_columna_mitad_1==2]] = NA
dfEG01[[3]][muestra_fila_mitad_1[muestra_columna_mitad_1==3]] = NA
dfEG01[[4]][muestra_fila_mitad_1[muestra_columna_mitad_1==4]] = NA
dfEG01[[5]][muestra_fila_mitad_1[muestra_columna_mitad_1==5]] = NA
dfEG01[[6]][muestra_fila_mitad_1[muestra_columna_mitad_1==6]] = NA
sum( is.na(dfEG01[,1:6]) )
## [1] 100
head(dfEG01[[7]])
## [1] -0.14484139 3.59555582 0.05924968 -3.93094174 2.53134453 -0.44784125
dfEG01[[7]][muestra_fila_mitad_2[muestra_columna_mitad_2==7]] = NA
dfEG01[[8]][muestra_fila_mitad_2[muestra_columna_mitad_2==8]] = NA
dfEG01[[9]][muestra_fila_mitad_2[muestra_columna_mitad_2==9]] = NA
dfEG01[[10]][muestra_fila_mitad_2[muestra_columna_mitad_2==10]] = NA
dfEG01[[11]][muestra_fila_mitad_2[muestra_columna_mitad_2==11]] = NA
dfEG01[[12]][muestra_fila_mitad_2[muestra_columna_mitad_2==12]] = NA
sum( is.na(dfEG01[,7:12]) )
## [1] 100
vtratamiento <- factor(c(rep("notratados",6),rep("tratados",6)))
vtratamiento
## [1] notratados notratados notratados notratados notratados notratados
## [7] tratados tratados tratados tratados tratados tratados
## Levels: notratados tratados
vsexo <- factor(rep(c("hombre","mujer"),6))
vsexo
## [1] hombre mujer hombre mujer hombre mujer hombre mujer hombre mujer
## [11] hombre mujer
## Levels: hombre mujer
dfCar01 <- data.frame(Tratamiento = vtratamiento, Sexo = vsexo)
dfCar01
## Tratamiento Sexo
## 1 notratados hombre
## 2 notratados mujer
## 3 notratados hombre
## 4 notratados mujer
## 5 notratados hombre
## 6 notratados mujer
## 7 tratados hombre
## 8 tratados mujer
## 9 tratados hombre
## 10 tratados mujer
## 11 tratados hombre
## 12 tratados mujer
Creación objeto RData con objetos dfEG01 y dfCar01:
datosEG01 <- c(dfEG01,dfCar01)
save(datosEG01, file = "datosEG01.RData")
summary(dfEG01)
## notratado01 notratado02 notratado03 notratado04
## Min. :-1.999864 Min. :-1.99852 Min. :-1.998455 Min. :-1.999938
## 1st Qu.:-0.975421 1st Qu.:-1.00692 1st Qu.:-0.988136 1st Qu.:-0.988395
## Median : 0.013646 Median :-0.02164 Median : 0.000659 Median :-0.020727
## Mean : 0.009644 Mean :-0.01131 Mean : 0.005422 Mean :-0.008069
## 3rd Qu.: 1.010238 3rd Qu.: 0.98089 3rd Qu.: 0.983029 3rd Qu.: 0.990407
## Max. : 1.999802 Max. : 2.00000 Max. : 1.999921 Max. : 1.999953
## NA's :9 NA's :21 NA's :20 NA's :15
## notratado05 notratado06 tratado01 tratado02
## Min. :-1.99981 Min. :-1.99939 Min. :-3.999126 Min. :-3.999282
## 1st Qu.:-0.96817 1st Qu.:-0.98651 1st Qu.:-1.953489 1st Qu.:-2.034113
## Median : 0.02784 Median : 0.02738 Median : 0.023218 Median : 0.005791
## Mean : 0.02294 Mean : 0.01374 Mean : 0.009655 Mean : 0.013956
## 3rd Qu.: 1.02664 3rd Qu.: 1.02123 3rd Qu.: 1.976667 3rd Qu.: 2.010523
## Max. : 1.99916 Max. : 1.99977 Max. : 3.999996 Max. : 3.998916
## NA's :17 NA's :18 NA's :25 NA's :13
## tratado03 tratado04 tratado05 tratado06
## Min. :-3.998357 Min. :-3.999820 Min. :-3.99987 Min. :-3.999772
## 1st Qu.:-2.011652 1st Qu.:-1.975206 1st Qu.:-2.02836 1st Qu.:-2.001994
## Median : 0.035676 Median : 0.005697 Median :-0.05363 Median : 0.019204
## Mean :-0.001685 Mean : 0.017464 Mean :-0.02608 Mean :-0.006787
## 3rd Qu.: 1.982843 3rd Qu.: 2.057326 3rd Qu.: 1.96745 3rd Qu.: 1.963359
## Max. : 3.999944 Max. : 3.999746 Max. : 3.99898 Max. : 3.999814
## NA's :13 NA's :15 NA's :18 NA's :16
frec_NA_columnas <- colSums(is.na(dfEG01))
frec_NA_columnas
## notratado01 notratado02 notratado03 notratado04 notratado05 notratado06
## 9 21 20 15 17 18
## tratado01 tratado02 tratado03 tratado04 tratado05 tratado06
## 25 13 13 15 18 16
mom_muestral_max_NA <- frec_NA_columnas[which.max(frec_NA_columnas)]
mom_muestral_max_NA
## tratado01
## 25
colmax = which.max(frec_NA_columnas)
colmax
## tratado01
## 7
El momento muestral con número máximo de NA es “tratado01” con 25 valores NA.
posiciones_NA_colmax <- which(is.na(dfEG01[[colmax]]))
posiciones_NA_colmax
## [1] 52 58 892 1503 1902 2292 2357 3009 3176 3749 4781 4807 4813 4877 5034
## [16] 5207 5456 5876 6192 6322 6366 6376 7275 9280 9912
Los números indicados son las posiciones donde se encuentran los valores NA en la columna de momento muestral “tratado01”.
Suma de los valores de las expresiones génicas para los genes “gen00048” y “gen00257”.
suma_valores_gen48 <- sum(dfEG01["gen00048",])
suma_valores_gen257 <- sum(dfEG01["gen00257",])
suma_genes = suma_valores_gen48+suma_valores_gen257
vector_suma_genes = c(suma_genes)
vector_suma_genes
## [1] -13.74306
Con la función complete.cases se forma un vector sin los “missing values” NA.
dim(dfEG01)
## [1] 10000 12
dfEG01noNA = dfEG01[complete.cases(dfEG01),]
class(dfEG01noNA)
## [1] "data.frame"
dim(dfEG01noNA)
## [1] 9801 12
head(dfEG01noNA)
## notratado01 notratado02 notratado03 notratado04 notratado05
## gen00001 0.79957746 1.4943258 0.40687822 1.816425 -1.2727249
## gen00002 -1.62600451 0.5128004 0.87907447 1.077089 1.5792660
## gen00003 -1.95245273 -1.0258133 -0.06373976 1.447735 -0.5931999
## gen00004 0.36642541 1.6125608 -1.51397604 0.895905 -1.8915467
## gen00005 -0.03140225 -1.8225208 0.48047472 -1.562934 -0.3440984
## gen00006 -1.91934570 -0.5603091 0.47769946 1.185907 0.3340634
## notratado06 tratado01 tratado02 tratado03 tratado04 tratado05
## gen00001 -1.1624271 -0.14484139 -2.6215491 -1.864603 3.116052 -2.2350171
## gen00002 -0.9147813 3.59555582 -1.4881947 3.905833 3.002433 2.1280332
## gen00003 0.3099358 0.05924968 1.8103155 1.259982 -3.680984 -1.2667359
## gen00004 -1.1032403 -3.93094174 -0.3781865 1.681134 1.748022 1.7376943
## gen00005 -1.7929110 2.53134453 -1.9637646 -3.571918 2.462766 -0.1786467
## gen00006 -0.2731466 -0.44784125 1.9372120 -2.828150 3.601530 0.3162603
## tratado06
## gen00001 -2.2416845
## gen00002 -3.7134398
## gen00003 -2.5608207
## gen00004 -1.7946701
## gen00005 1.4077508
## gen00006 -0.8776999
save(dfEG01noNA, file = "dfEG01noNA.RData")
Data.frame con expresiones génicas de momentos muestrales “tratados” y “mujeres”.
dfEG01_red01 <- dfEG01noNA[, dfCar01$Tratamiento == "tratados" & dfCar01$Sexo == "mujer"]
dim(dfEG01_red01)
## [1] 9801 3
head(dfEG01_red01)
## tratado02 tratado04 tratado06
## gen00001 -2.6215491 3.116052 -2.2416845
## gen00002 -1.4881947 3.002433 -3.7134398
## gen00003 1.8103155 -3.680984 -2.5608207
## gen00004 -0.3781865 1.748022 -1.7946701
## gen00005 -1.9637646 2.462766 1.4077508
## gen00006 1.9372120 3.601530 -0.8776999
Orden decreciente de los 6 genes que suman mayor expresión génica en todos los momentos muestrales.
suma_todos_genes <- apply(dfEG01noNA, FUN = sum, MARGIN = 1)
sort(suma_todos_genes, decreasing = TRUE)[1:6]
## gen01983 gen03832 gen01602 gen07599 gen01814 gen09992
## 21.68290 21.22535 20.64785 19.68284 19.19854 19.02482