1. IMPORTACIÓN DE DATOS

1.2 Importación de base de datos de genes INMUNES

inmune <- read.table("C:/Users/acard/CloudStation/R-project/Rodney/data/All immune genes_edity.csv", quote="\"", comment.char="")

1.2 Estableciendo las rutas para importar base de datos de RNA-seq por experimento

datapath <- list.files(paste0(dirname(getwd()),"/data/GSE123139_RAW/"))

1.3 Importando la lista de base de datos de ESPECIFICACIONES

data.info <- read.delim("C:/Users/acard/CloudStation/R-project/Rodney/data/GSE123139_series_matrix_edit.txt", header=F)
data.info[1:5,1:10]
##                         V1                    V2                    V3
## 1            !Sample_title                AB1889                AB1890
## 2    !Sample_geo_accession            GSM3496285            GSM3496286
## 3           !Sample_status Public on Dec 27 2018 Public on Dec 27 2018
## 4  !Sample_submission_date           Nov 29 2018           Nov 29 2018
## 5 !Sample_last_update_date           Dec 27 2018           Dec 27 2018
##                      V4                    V5                    V6
## 1                AB1891                AB1892                AB2093
## 2            GSM3496287            GSM3496288            GSM3496289
## 3 Public on Dec 27 2018 Public on Dec 27 2018 Public on Dec 27 2018
## 4           Nov 29 2018           Nov 29 2018           Nov 29 2018
## 5           Dec 27 2018           Dec 27 2018           Dec 27 2018
##                      V7                    V8                    V9
## 1                AB2094                AB2095                AB2096
## 2            GSM3496290            GSM3496291            GSM3496292
## 3 Public on Dec 27 2018 Public on Dec 27 2018 Public on Dec 27 2018
## 4           Nov 29 2018           Nov 29 2018           Nov 29 2018
## 5           Dec 27 2018           Dec 27 2018           Dec 27 2018
##                     V10
## 1                AB2097
## 2            GSM3496293
## 3 Public on Dec 27 2018
## 4           Nov 29 2018
## 5           Dec 27 2018

La base de datos está organizada de forma “transpuesta”, es decir que las variablese están en la columna uno (V1) y los datos en el resto de columnas, por lo que vamos a transponer la base de datos y vamos a usar los nombres de la columna uno para generar la nueva base de datos

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
nombres <- data.info[,1]
nombres <- str_replace(string = nombres, pattern = "!Sample_",replacement = "")
data.info <- t(data.info[,-1])
colnames(data.info) <- nombres
data.info[1:5,1:5]
##    title    geo_accession status                  submission_date
## V2 "AB1889" "GSM3496285"  "Public on Dec 27 2018" "Nov 29 2018"  
## V3 "AB1890" "GSM3496286"  "Public on Dec 27 2018" "Nov 29 2018"  
## V4 "AB1891" "GSM3496287"  "Public on Dec 27 2018" "Nov 29 2018"  
## V5 "AB1892" "GSM3496288"  "Public on Dec 27 2018" "Nov 29 2018"  
## V6 "AB2093" "GSM3496289"  "Public on Dec 27 2018" "Nov 29 2018"  
##    last_update_date
## V2 "Dec 27 2018"   
## V3 "Dec 27 2018"   
## V4 "Dec 27 2018"   
## V5 "Dec 27 2018"   
## V6 "Dec 27 2018"

De esta base de datos necesitamos saber los valores de las columnas 1, 2, 13 y 14. Por lo que vamos a crear una segunda base de datos llamada “data.info.2”, contniendo estas cuatro columnas.

data.info.2 <- data.info[,c(1,2,13,14)]
head(data.info.2)
##    title    geo_accession characteristics_ch1       characteristics_ch1
## V2 "AB1889" "GSM3496285"  "patient id: p2-4-LN-2IT" "facs gate: CD3"   
## V3 "AB1890" "GSM3496286"  "patient id: p2-4-LN-2IT" "facs gate: CD3"   
## V4 "AB1891" "GSM3496287"  "patient id: p2-4-LN-2IT" "facs gate: CD45"  
## V5 "AB1892" "GSM3496288"  "patient id: p2-4-LN-2IT" "facs gate: CD45"  
## V6 "AB2093" "GSM3496289"  "patient id: p2-4-LN-2IT" "facs gate: CD45"  
## V7 "AB2094" "GSM3496290"  "patient id: p2-4-LN-2IT" "facs gate: CD3"

Ahora vamos a darle formato a la base de datos, quitaremos el patrón “patient id:” de la columna 3, y “facs gate:” de la columna 4. Además renombrarmos las columnas a “title”, “geo.accs”, “patient”, “cd”. Además agragaremos una columna donde se fusionarán las columnas 1 y 2 separadas por un guión bajo (para luego importar estas de las bases de datos de secuenciación)

data.info.2 <- data.frame(data.info.2)
data.info.2$characteristics_ch1 <- str_replace(string = data.info.2$characteristics_ch1, pattern = "patient id: ", replacement = "")
data.info.2$characteristics_ch1.1 <- str_replace(string = data.info.2$characteristics_ch1.1, pattern = "facs gate: ", replacement = "")
names(data.info.2) <- c("title", "geo.accs", "patient", "cd")
data.info.2$name <- paste(data.info.2$geo.accs, data.info.2$title , sep = "_")
head(data.info.2)
##     title   geo.accs     patient   cd              name
## V2 AB1889 GSM3496285 p2-4-LN-2IT  CD3 GSM3496285_AB1889
## V3 AB1890 GSM3496286 p2-4-LN-2IT  CD3 GSM3496286_AB1890
## V4 AB1891 GSM3496287 p2-4-LN-2IT CD45 GSM3496287_AB1891
## V5 AB1892 GSM3496288 p2-4-LN-2IT CD45 GSM3496288_AB1892
## V6 AB2093 GSM3496289 p2-4-LN-2IT CD45 GSM3496289_AB2093
## V7 AB2094 GSM3496290 p2-4-LN-2IT  CD3 GSM3496290_AB2094

Eel nombre de los pacientes contiene información como:

  • Código del pacienete (p. ej. p2)
  • Estadío del tumor (p. ej. 4)
  • Localización (p. ej. LN)
  • Tratamiento (p. ej. 2IT)

Por que vamos a generar columnas para estos datos por separado y lo vamos a unir a la base de datos data.info.2

long <- nrow(data.info.2)
tmp <- data.frame(patient.2 =character(long),
                  stage = character(long),
                  localization = character(long),
                  tto = character(long),
                  m.meta = character(long))
for ( i in 1:nrow(data.info.2)){
  l.tmp <- unlist(strsplit(x = data.info.2$patient[i], split = "-"))
  if (length(l.tmp) < 5){
    l.tmp <- c(l.tmp, NA)
  }
  tmp[i,] <- l.tmp
  
}

data.info.2 <- cbind(tmp, data.info.2)
names(data.info.2)
##  [1] "patient.2"    "stage"        "localization" "tto"          "m.meta"      
##  [6] "title"        "geo.accs"     "patient"      "cd"           "name"
data.info.2 <- data.info.2[,c("name", "patient.2", "cd", "stage", "localization", "tto", "m.meta")]
BASE DE DATOS DE PACIENTES
name patient.2 cd stage localization tto m.meta
V2 GSM3496285_AB1889 p2 CD3 4 LN 2IT NA
V3 GSM3496286_AB1890 p2 CD3 4 LN 2IT NA
V4 GSM3496287_AB1891 p2 CD45 4 LN 2IT NA
V5 GSM3496288_AB1892 p2 CD45 4 LN 2IT NA
V6 GSM3496289_AB2093 p2 CD45 4 LN 2IT NA
V7 GSM3496290_AB2094 p2 CD3 4 LN 2IT NA
V8 GSM3496291_AB2095 p2 CD3 4 LN 2IT NA
V9 GSM3496292_AB2096 p2 CD45 4 LN 2IT NA
V10 GSM3496293_AB2097 p2 CD45 4 LN 2IT NA
V11 GSM3496294_AB2098 p11 CD45 3 (S)C N NA
V12 GSM3496295_AB2099 p11 CD45 3 (S)C N NA
V13 GSM3496296_AB2100 p11 CD3 3 (S)C N NA
V14 GSM3496297_AB2101 p11 CD3 3 (S)C N NA
V15 GSM3496298_AB2102 p11 CD3 3 (S)C N NA
V16 GSM3496299_AB2103 p11 CD45 3 (S)C N NA
V17 GSM3496300_AB2104 p11 CD3 3 (S)C N NA
V18 GSM3496301_AB2105 p2 CD3 4 LN 2IT NA
V19 GSM3496302_AB2106 p11 CD45 3 (S)C N NA
V20 GSM3496303_AB2107 p11 CD45 3 (S)C N NA
V21 GSM3496304_AB2108 p11 CD45 3 (S)C N NA
V22 GSM3496305_AB2491 p13 CD3 3 (S)C N NA
V23 GSM3496306_AB2492 p13 CD3 3 (S)C N NA
V24 GSM3496307_AB2493 p13 CD3 3 (S)C N NA
V25 GSM3496308_AB2494 p13 CD45 3 (S)C N NA
V26 GSM3496309_AB2495 p13 CD45 3 (S)C N NA
V27 GSM3496310_AB2496 p13 CD45 3 (S)C N NA
V28 GSM3496311_AB2497 p13 CD3 3 (S)C N NA
V29 GSM3496312_AB2498 p13 CD3 3 (S)C N NA
V30 GSM3496313_AB2499 p13 CD3 3 (S)C N NA
V31 GSM3496314_AB2500 p13 CD45 3 (S)C N NA
V32 GSM3496315_AB2501 p13 CD45 3 (S)C N NA
V33 GSM3496316_AB2502 p13 CD3 3 (S)C N NA
V34 GSM3496317_AB2503 p13 CD3 3 (S)C N NA
V35 GSM3496318_AB2504 p3 CD3 4 LN mIT NA
V36 GSM3496319_AB2505 p3 CD3 4 LN mIT NA
V37 GSM3496320_AB2506 p3 CD3 4 LN mIT NA
V38 GSM3496321_AB2507 p3 CD3 4 LN mIT NA
V39 GSM3496322_AB2508 p3 CD45 4 LN mIT NA
V40 GSM3496323_AB2509 p3 CD45 4 LN mIT NA
V41 GSM3496324_AB2510 p3 CD45 4 LN mIT NA
V42 GSM3496325_AB2511 p3 CD45 4 LN mIT NA
V43 GSM3496326_AB2512 p3 CD3 & CD45 4 LN mIT NA
V44 GSM3496327_AB2513 p3 CD3 & CD45 4 LN mIT NA
V45 GSM3496328_AB2514 p3 CD3 & CD45 4 LN mIT NA
V46 GSM3496329_AB2515 p3 CD3 & CD45 4 LN mIT NA
V47 GSM3496330_AB2516 p3 CD3 4 LN mIT NA
V48 GSM3496331_AB2517 p3 CD45 4 LN mIT NA
V49 GSM3496332_AB2518 p3 CD3 & CD45 4 LN mIT NA
V50 GSM3496333_AB2519 p3 CD3 & CD45 4 LN mIT NA
V51 GSM3496334_AB2520 p3 CD3 & CD45 4 LN mIT NA
V52 GSM3496335_AB2521 p3 CD3 & CD45 4 LN mIT NA
V53 GSM3496336_AB2541 p12 CD3 3 (S)C N 2
V54 GSM3496337_AB2542 p12 CD3 3 (S)C N 2
V55 GSM3496338_AB2543 p12 CD3 3 (S)C N 1
V56 GSM3496339_AB2549 p13 CD45 3 (S)C N NA
V57 GSM3496340_AB2553 p12 CD45 3 (S)C N 2
V58 GSM3496341_AB2570 p12 CD45 3 (S)C N 2
V59 GSM3496342_AB2574 p12 CD3 & CD45 3 (S)C N 1
V60 GSM3496343_AB2898 p15 CD3 & CD45 3 (S)C N NA
V61 GSM3496344_AB2899 p16 CD3 & CD45 3 (S)C T NA
V62 GSM3496345_AB2900 p8 CD3 4 (S)C mIT NA
V63 GSM3496346_AB2901 p17 CD3 & CD45 3 (S)C N 1
V64 GSM3496347_AB2902 p17 CD3 & CD45 3 (S)C N 2
V65 GSM3496348_AB2903 p15 CD3 & CD45 3 (S)C N NA
V66 GSM3496349_AB2904 p16 CD3 & CD45 3 (S)C T NA
V67 GSM3496350_AB2905 p16 CD3 & CD45 3 (S)C T NA
V68 GSM3496351_AB2906 p8 CD3 4 (S)C mIT NA
V69 GSM3496352_AB2907 p8 CD3 4 (S)C mIT NA
V70 GSM3496353_AB2908 p8 CD45 4 (S)C mIT NA
V71 GSM3496354_AB2909 p8 CD45 4 (S)C mIT NA
V72 GSM3496355_AB2910 p8 CD45 4 (S)C mIT NA
V73 GSM3496356_AB2911 p8 CD45 4 (S)C mIT NA
V74 GSM3496357_AB2912 p8 CD3 4 (S)C mIT NA
V75 GSM3496358_AB2913 p8 CD45 4 (S)C mIT NA
V76 GSM3496359_AB2914 p8 CD3 & CD45 4 (S)C mIT NA
V77 GSM3496360_AB2915 p18 CD3 3 (S)C T NA
V78 GSM3496361_AB2916 p18 CD3 3 (S)C T NA
V79 GSM3496362_AB2917 p18 CD3 3 (S)C T NA
V80 GSM3496363_AB2918 p18 CD3 3 (S)C T NA
V81 GSM3496364_AB2919 p18 CD3 3 (S)C T NA
V82 GSM3496365_AB2936 p18 CD45 3 (S)C T NA
V83 GSM3496366_AB2989 p18 CD45 3 (S)C T NA
V84 GSM3496367_AB2990 p18 CD45 3 (S)C T NA
V85 GSM3496368_AB3078 p9 CD3 4 (S)C 2IT NA
V86 GSM3496369_AB3079 p9 CD45 4 (S)C 2IT NA
V87 GSM3496370_AB3086 p9 CD3 4 (S)C 2IT NA
V88 GSM3496371_AB3087 p9 CD3 4 (S)C 2IT NA
V89 GSM3496372_AB3088 p9 CD3 4 (S)C 2IT NA
V90 GSM3496373_AB3089 p9 CD3 4 (S)C 2IT NA
V91 GSM3496374_AB3090 p9 CD45 4 (S)C 2IT NA
V92 GSM3496375_AB3091 p9 CD45 4 (S)C 2IT NA
V93 GSM3496376_AB3092 p9 CD45 4 (S)C 2IT NA
V94 GSM3496377_AB3093 p9 CD45 4 (S)C 2IT NA
V95 GSM3496378_AB3094 p9 CD3 4 (S)C 2IT NA
V96 GSM3496379_AB3095 p9 CD45 4 (S)C 2IT NA
V97 GSM3496380_AB3096 p9 CD3 4 (S)C 2IT NA
V98 GSM3496381_AB3274 p6 CD3 & CD45 4 (S)C 1IT NA
V99 GSM3496382_AB3275 p1 CD3 & CD45 4 LN N NA
V100 GSM3496383_AB3276 p1 CD3 & CD45 4 LN N NA
V101 GSM3496384_AB3277 p19 CD3 & CD45 3 (S)C T NA
V102 GSM3496385_AB3278 p19 CD3 & CD45 3 (S)C T NA
V103 GSM3496386_AB3279 p19 CD3 & CD45 3 (S)C T NA
V104 GSM3496387_AB3280 p19 CD3 & CD45 3 (S)C T NA
V105 GSM3496388_AB3281 p19 CD3 & CD45 3 (S)C T NA
V106 GSM3496389_AB3282 p19 CD3 & CD45 3 (S)C T NA
V107 GSM3496390_AB3283 p19 CD3 & CD45 3 (S)C T NA
V108 GSM3496391_AB3284 p19 Live 3 (S)C T NA
V109 GSM3496392_AB3718 p20 CD3 & CD45 3 (S)C 1IT NA
V110 GSM3496393_AB3719 p20 CD3 & CD45 3 (S)C 1IT NA
V111 GSM3496394_AB3725 p21 CD3 & CD45 3 (S)C N NA
V112 GSM3496395_AB3726 p21 CD3 & CD45 (+ partly live cells) 3 (S)C N NA
V113 GSM3496396_AB3755 p21 Live 3 (S)C N NA
V114 GSM3496397_AB3937 p25 Live 3 p(S)C N NA
V115 GSM3496398_AB3938 p25 Live 3 p(S)C N NA
V116 GSM3496399_AB3939 p26 CD3 & CD45 2 p(S)C N NA
V117 GSM3496400_AB3940 p26 CD3 & CD45 2 p(S)C N NA
V118 GSM3496401_AB3941 p26 Live 2 p(S)C N NA
V119 GSM3496402_AB3942 p10 CD3 4 MSC 2IT NA
V120 GSM3496403_AB3943 p10 CD3 4 MSC 2IT NA
V121 GSM3496404_AB3944 p10 CD3 4 MSC 2IT NA
V122 GSM3496405_AB3945 p10 CD3 4 MSC 2IT NA
V123 GSM3496406_AB3946 p10 CD45 4 MSC 2IT NA
V124 GSM3496407_AB4027 p24 CD3 3 LN N 2
V125 GSM3496408_AB4028 p24 CD3 3 LN N 2
V126 GSM3496409_AB4029 p24 CD45 3 LN N 2
V127 GSM3496410_AB4030 p24 CD45 3 LN N 2
V128 GSM3496411_AB4031 p24 CD3 3 LN N 2
V129 GSM3496412_AB4032 p24 CD45 3 LN N 2
V130 GSM3496413_AB4121 p4 CD3 4 LN 1IT NA
V131 GSM3496414_AB4122 p4 CD3 4 LN 1IT NA
V132 GSM3496415_AB4123 p4 CD3 4 LN 1IT NA
V133 GSM3496416_AB4124 p4 CD3 4 LN 1IT NA
V134 GSM3496417_AB4125 p4 CD45 4 LN 1IT NA
V135 GSM3496418_AB4126 p4 CD45 4 LN 1IT NA
V136 GSM3496419_AB4127 p4 CD45 4 LN 1IT NA
V137 GSM3496420_AB4128 p4 CD45 4 LN 1IT NA
V138 GSM3496421_AB4129 p4 CD3 4 LN 1IT NA
V139 GSM3496422_AB4130 p4 CD3 4 LN 1IT NA
V140 GSM3496423_AB4131 p4 CD3 4 LN 1IT NA
V141 GSM3496424_AB4132 p4 CD45 4 LN 1IT NA
V142 GSM3496425_AB4133 p4 CD3 & CD45 4 LN 1IT NA
V143 GSM3496426_AB4134 p23 CD3 3 LN N NA
V144 GSM3496427_AB4135 p23 CD3 3 LN N NA
V145 GSM3496428_AB4136 p23 CD3 3 LN N NA
V146 GSM3496429_AB4137 p23 CD3 3 LN N NA
V147 GSM3496430_AB4138 p23 CD45 3 LN N NA
V148 GSM3496431_AB4139 p23 CD45 3 LN N NA
V149 GSM3496432_AB4140 p23 CD45 3 LN N NA
V150 GSM3496433_AB4141 p23 CD45 3 LN N NA
V151 GSM3496434_AB4142 p23 CD45 3 LN N NA
V152 GSM3496435_AB4143 p23 CD3 3 LN N NA
V153 GSM3496436_AB4229 p5 CD3 4 LN 2IT NA
V154 GSM3496437_AB4230 p5 CD3 4 LN 2IT NA
V155 GSM3496438_AB4231 p5 CD3 4 LN 2IT NA
V156 GSM3496439_AB4232 p5 CD45 4 LN 2IT NA
V157 GSM3496440_AB4233 p5 CD45 4 LN 2IT NA
V158 GSM3496441_AB4234 p5 CD45 4 LN 2IT NA
V159 GSM3496442_AB4235 p5 CD3 4 LN 2IT NA
V160 GSM3496443_AB4236 p5 CD3 4 LN 2IT NA
V161 GSM3496444_AB4237 p5 CD3 4 LN 2IT NA
V162 GSM3496445_AB4238 p5 CD45 4 LN 2IT NA
V163 GSM3496446_AB4239 p5 CD45 4 LN 2IT NA
V164 GSM3496447_AB4359 p13_PBMC CCR7+ CD45RA+ 3 (S)C N NA
V165 GSM3496448_AB4360 p13_PBMC CCR7+ CD45RA+ 3 (S)C N NA
V166 GSM3496449_AB4361 p13_PBMC CCR7+ CD45RA+ 3 (S)C N NA
V167 GSM3496450_AB4362 p13_PBMC CD3 3 (S)C N NA
V168 GSM3496451_AB4363 p13_PBMC CD3 3 (S)C N NA
V169 GSM3496452_AB4364 p13_PBMC CD45 3 (S)C N NA
V170 GSM3496453_AB4365 p13_PBMC CD45 3 (S)C N NA
V171 GSM3496454_AB4366 p13_PBMC CD3 3 (S)C N NA
V172 GSM3496455_AB4367 p13_PBMC CD45 3 (S)C N NA
V173 GSM3496456_AB5008 p27 CD3 4 (S)C 1IT NA
V174 GSM3496457_AB5009 p27 CD3 4 (S)C 1IT NA
V175 GSM3496458_AB5010 p27 CD3 4 (S)C 1IT NA
V176 GSM3496459_AB5011 p27 CD3 4 (S)C 1IT NA
V177 GSM3496460_AB5012 p27 CD45 4 (S)C 1IT NA
V178 GSM3496461_AB5013 p27 CD45 4 (S)C 1IT NA
V179 GSM3496462_AB5014 p27 CD3 & CD45 4 (S)C 1IT NA
V180 GSM3496463_AB5015 p27 CD3 & CD45 4 (S)C 1IT NA
V181 GSM3496464_AB5016 p27 CD3 4 (S)C 1IT NA
V182 GSM3496465_AB5017 p27 CD45 4 (S)C 1IT NA
V183 GSM3496466_AB5018 p27 CD3 & CD45 4 (S)C 1IT NA
V184 GSM3496467_AB5019 p27 CD3 4 (S)C 1IT NA
V185 GSM3496468_AB5190 p28 CD3 4 (S)C 2IT NA
V186 GSM3496469_AB5191 p28 CD3 4 (S)C 2IT NA
V187 GSM3496470_AB5192 p28 CD3 4 (S)C 2IT NA
V188 GSM3496471_AB5193 p28 CD3 4 (S)C 2IT NA
V189 GSM3496472_AB5194 p28 CD3 & CD45 4 (S)C 2IT NA
V190 GSM3496473_AB5195 p28 CD3 & CD45 4 (S)C 2IT NA
V191 GSM3496474_AB5196 p28 CD3 & CD45 4 (S)C 2IT NA
V192 GSM3496475_AB5197 p28 CD3 & CD45 4 (S)C 2IT NA
V193 GSM3496476_AB5198 p28 CD3 4 (S)C 2IT NA
V194 GSM3496477_AB5199 p28 CD3 & CD45 4 (S)C 2IT NA
V195 GSM3496478_AB5424 p17_PBMC CCR7+ CD45RA+ 3 (S)C N NA
V196 GSM3496479_AB5425 p17_PBMC CCR7+ CD45RA+ 3 (S)C N NA
V197 GSM3496480_AB5426 p17_PBMC CCR7+ CD45RA+ 3 (S)C N NA
V198 GSM3496481_AB5427 p17_PBMC CD3 3 (S)C N NA
V199 GSM3496482_AB5428 p17_PBMC CD45 3 (S)C N NA
V200 GSM3496483_AB5429 p17_PBMC CD3 & CD45 3 (S)C N NA
V201 GSM3496484_AB5490 p27_PBMC CD3 4 (S)C 1IT NA
V202 GSM3496485_AB5491 p27_PBMC CD3 & CD45 4 (S)C 1IT NA
V203 GSM3496486_AB5492 p27_PBMC CCR7+ CD45RA+ 4 (S)C 1IT NA
V204 GSM3496487_AB5493 p27_PBMC CD45 4 (S)C 1IT NA
V205 GSM3496488_AB5494 p27_PBMC CD3 4 (S)C 1IT NA
NUMERO DE EXPERIMENTOS POR CONDICIÓN
CCR7+ CD45RA+ CD3 CD3 & CD45 CD3 & CD45 (+ partly live cells) CD45 Live
p1 0 0 2 0 0 0
p10 0 4 0 0 1 0
p11 0 4 0 0 6 0
p12 0 3 1 0 2 0
p13 0 8 0 0 6 0
p13_PBMC 3 3 0 0 3 0
p15 0 0 2 0 0 0
p16 0 0 3 0 0 0
p17 0 0 2 0 0 0
p17_PBMC 3 1 1 0 1 0
p18 0 5 0 0 3 0
p19 0 0 7 0 0 1
p2 0 5 0 0 5 0
p20 0 0 2 0 0 0
p21 0 0 1 1 0 1
p23 0 5 0 0 5 0
p24 0 3 0 0 3 0
p25 0 0 0 0 0 2
p26 0 0 2 0 0 1
p27 0 6 3 0 3 0
p27_PBMC 1 2 1 0 1 0
p28 0 5 5 0 0 0
p3 0 5 8 0 5 0
p4 0 7 1 0 5 0
p5 0 6 0 0 5 0
p6 0 0 1 0 0 0
p8 0 4 1 0 5 0
p9 0 7 0 0 6 0

2. SESELECCION DE PACIENTES CON CANCER EN NÓDULOS LINFÁTICOS

Primero vamos a seleccionar los pacientes con cancer en los nódulos linfáticos (localization = “LN”)

LN <- subset(x = data.info.2, subset = localization == "LN")
LN
##                   name patient.2         cd stage localization tto m.meta
## V2   GSM3496285_AB1889        p2        CD3     4           LN 2IT   <NA>
## V3   GSM3496286_AB1890        p2        CD3     4           LN 2IT   <NA>
## V4   GSM3496287_AB1891        p2       CD45     4           LN 2IT   <NA>
## V5   GSM3496288_AB1892        p2       CD45     4           LN 2IT   <NA>
## V6   GSM3496289_AB2093        p2       CD45     4           LN 2IT   <NA>
## V7   GSM3496290_AB2094        p2        CD3     4           LN 2IT   <NA>
## V8   GSM3496291_AB2095        p2        CD3     4           LN 2IT   <NA>
## V9   GSM3496292_AB2096        p2       CD45     4           LN 2IT   <NA>
## V10  GSM3496293_AB2097        p2       CD45     4           LN 2IT   <NA>
## V18  GSM3496301_AB2105        p2        CD3     4           LN 2IT   <NA>
## V35  GSM3496318_AB2504        p3        CD3     4           LN mIT   <NA>
## V36  GSM3496319_AB2505        p3        CD3     4           LN mIT   <NA>
## V37  GSM3496320_AB2506        p3        CD3     4           LN mIT   <NA>
## V38  GSM3496321_AB2507        p3        CD3     4           LN mIT   <NA>
## V39  GSM3496322_AB2508        p3       CD45     4           LN mIT   <NA>
## V40  GSM3496323_AB2509        p3       CD45     4           LN mIT   <NA>
## V41  GSM3496324_AB2510        p3       CD45     4           LN mIT   <NA>
## V42  GSM3496325_AB2511        p3       CD45     4           LN mIT   <NA>
## V43  GSM3496326_AB2512        p3 CD3 & CD45     4           LN mIT   <NA>
## V44  GSM3496327_AB2513        p3 CD3 & CD45     4           LN mIT   <NA>
## V45  GSM3496328_AB2514        p3 CD3 & CD45     4           LN mIT   <NA>
## V46  GSM3496329_AB2515        p3 CD3 & CD45     4           LN mIT   <NA>
## V47  GSM3496330_AB2516        p3        CD3     4           LN mIT   <NA>
## V48  GSM3496331_AB2517        p3       CD45     4           LN mIT   <NA>
## V49  GSM3496332_AB2518        p3 CD3 & CD45     4           LN mIT   <NA>
## V50  GSM3496333_AB2519        p3 CD3 & CD45     4           LN mIT   <NA>
## V51  GSM3496334_AB2520        p3 CD3 & CD45     4           LN mIT   <NA>
## V52  GSM3496335_AB2521        p3 CD3 & CD45     4           LN mIT   <NA>
## V99  GSM3496382_AB3275        p1 CD3 & CD45     4           LN   N   <NA>
## V100 GSM3496383_AB3276        p1 CD3 & CD45     4           LN   N   <NA>
## V124 GSM3496407_AB4027       p24        CD3     3           LN   N      2
## V125 GSM3496408_AB4028       p24        CD3     3           LN   N      2
## V126 GSM3496409_AB4029       p24       CD45     3           LN   N      2
## V127 GSM3496410_AB4030       p24       CD45     3           LN   N      2
## V128 GSM3496411_AB4031       p24        CD3     3           LN   N      2
## V129 GSM3496412_AB4032       p24       CD45     3           LN   N      2
## V130 GSM3496413_AB4121        p4        CD3     4           LN 1IT   <NA>
## V131 GSM3496414_AB4122        p4        CD3     4           LN 1IT   <NA>
## V132 GSM3496415_AB4123        p4        CD3     4           LN 1IT   <NA>
## V133 GSM3496416_AB4124        p4        CD3     4           LN 1IT   <NA>
## V134 GSM3496417_AB4125        p4       CD45     4           LN 1IT   <NA>
## V135 GSM3496418_AB4126        p4       CD45     4           LN 1IT   <NA>
## V136 GSM3496419_AB4127        p4       CD45     4           LN 1IT   <NA>
## V137 GSM3496420_AB4128        p4       CD45     4           LN 1IT   <NA>
## V138 GSM3496421_AB4129        p4        CD3     4           LN 1IT   <NA>
## V139 GSM3496422_AB4130        p4        CD3     4           LN 1IT   <NA>
## V140 GSM3496423_AB4131        p4        CD3     4           LN 1IT   <NA>
## V141 GSM3496424_AB4132        p4       CD45     4           LN 1IT   <NA>
## V142 GSM3496425_AB4133        p4 CD3 & CD45     4           LN 1IT   <NA>
## V143 GSM3496426_AB4134       p23        CD3     3           LN   N   <NA>
## V144 GSM3496427_AB4135       p23        CD3     3           LN   N   <NA>
## V145 GSM3496428_AB4136       p23        CD3     3           LN   N   <NA>
## V146 GSM3496429_AB4137       p23        CD3     3           LN   N   <NA>
## V147 GSM3496430_AB4138       p23       CD45     3           LN   N   <NA>
## V148 GSM3496431_AB4139       p23       CD45     3           LN   N   <NA>
## V149 GSM3496432_AB4140       p23       CD45     3           LN   N   <NA>
## V150 GSM3496433_AB4141       p23       CD45     3           LN   N   <NA>
## V151 GSM3496434_AB4142       p23       CD45     3           LN   N   <NA>
## V152 GSM3496435_AB4143       p23        CD3     3           LN   N   <NA>
## V153 GSM3496436_AB4229        p5        CD3     4           LN 2IT   <NA>
## V154 GSM3496437_AB4230        p5        CD3     4           LN 2IT   <NA>
## V155 GSM3496438_AB4231        p5        CD3     4           LN 2IT   <NA>
## V156 GSM3496439_AB4232        p5       CD45     4           LN 2IT   <NA>
## V157 GSM3496440_AB4233        p5       CD45     4           LN 2IT   <NA>
## V158 GSM3496441_AB4234        p5       CD45     4           LN 2IT   <NA>
## V159 GSM3496442_AB4235        p5        CD3     4           LN 2IT   <NA>
## V160 GSM3496443_AB4236        p5        CD3     4           LN 2IT   <NA>
## V161 GSM3496444_AB4237        p5        CD3     4           LN 2IT   <NA>
## V162 GSM3496445_AB4238        p5       CD45     4           LN 2IT   <NA>
## V163 GSM3496446_AB4239        p5       CD45     4           LN 2IT   <NA>

Aquí tenemos 70 experimentos, pero se usaron diferentes CD para el filtrado, y cada uno tiene diferentes tratamientos.

table(LN$cd, LN$tto)
##             
##              1IT 2IT mIT  N
##   CD3          7  11   5  8
##   CD3 & CD45   1   0   8  2
##   CD45         5  10   5  8

Así que vamos a generar difereentes grupos para cada CD de filtrado

LN.CD3 <- subset(x = LN, subset = cd == "CD3")
LN.CD45 <- subset(x = LN, subset = cd == "CD45")
LN.CD3_CD45 <- subset(x = LN, subset = cd == "CD3 & CD45")

2.1 Trabajando con “CD3 & CD45”

CD3 & CD45
name patient.2 cd stage localization tto m.meta
V43 GSM3496326_AB2512 p3 CD3 & CD45 4 LN mIT NA
V44 GSM3496327_AB2513 p3 CD3 & CD45 4 LN mIT NA
V45 GSM3496328_AB2514 p3 CD3 & CD45 4 LN mIT NA
V46 GSM3496329_AB2515 p3 CD3 & CD45 4 LN mIT NA
V49 GSM3496332_AB2518 p3 CD3 & CD45 4 LN mIT NA
V50 GSM3496333_AB2519 p3 CD3 & CD45 4 LN mIT NA
V51 GSM3496334_AB2520 p3 CD3 & CD45 4 LN mIT NA
V52 GSM3496335_AB2521 p3 CD3 & CD45 4 LN mIT NA
V99 GSM3496382_AB3275 p1 CD3 & CD45 4 LN N NA
V100 GSM3496383_AB3276 p1 CD3 & CD45 4 LN N NA
V142 GSM3496425_AB4133 p4 CD3 & CD45 4 LN 1IT NA

Aquí tenemos 3 pacientes, dos con tratamiento y 1 sin tratamiento

Paciente Tto
p1 sin tto
p3 ??
p4 aPD1

2.1.2 Generando UMAP para p4

library(affy)
## Warning: package 'affy' was built under R version 4.0.3
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.3
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
p4 <- LN.CD3_CD45$name[LN.CD3_CD45$patient.2 == "p4"]
p4 <- which(datapath == paste0(p4,".txt.gz"))
p4 <- read.table(gzfile(paste0(dirname(getwd()),"/data/GSE123139_RAW/",datapath[p4])), sep = "\t")
p4[1:10,1:5]
##           WMC290849 WMC290850 WMC290851 WMC290852 WMC290853
## 5S_rRNA           0         0         0         0         0
## 7SK               0         0         0         0         0
## A1BG              0         0         0         0         0
## A1BG-AS1          0         0         0         0         0
## A1CF              0         0         0         0         0
## A2M               0         0         0         0         0
## A2M-AS1           0         0         0         0         0
## A2ML1             0         0         0         0         0
## A2ML1-AS1         0         0         0         0         0
## A2ML1-AS2         0         0         0         0         0

Ahora selecionamos los genes INMUNOME

p4.inmune <- p4[grep(paste0(inmune$V1,collapse = "|"),rownames(p4)),]
p4.inmune[1:10,1:5]
##           WMC290849 WMC290850 WMC290851 WMC290852 WMC290853
## A2M               0         0         0         0         0
## A2M-AS1           0         0         0         0         0
## A2ML1             0         0         0         0         0
## A2ML1-AS1         0         0         0         0         0
## A2ML1-AS2         0         0         0         0         0
## A2MP1             0         0         0         0         0
## ABCC3             0         0         0         0         0
## ABCD1             0         0         0         0         0
## ABCD1P2           0         0         0         0         0
## ABCD1P3           0         0         0         0         0

Ahora generamos el análisis de UMAP

library(umap)
## Warning: package 'umap' was built under R version 4.0.3
p4.umap <- umap::umap(p4)
p4.inmu.umap <- umap::umap(p4.inmune)

Graficando

plot.p4<- as.data.frame(p4.umap$layout)
plot.p4.inmu <- as.data.frame(p4.inmu.umap$layout)
colnames(plot.p4) <- colnames(plot.p4.inmu) <- c("UMAP1","UMAP2")

library(ggplot2)
ggplot2::ggplot(data = plot.p4, aes(x=UMAP1, UMAP2)) +
  geom_point() + ggtitle("p4-LN-aPD1") +
  xlab(LN.CD3_CD45$name[LN.CD3_CD45$patient.2 == "p4"]) 

ggplot2::ggplot(data = plot.p4.inmu, aes(x=UMAP1, UMAP2)) +
  geom_point() + ggtitle("p4-LN-aPD1_SELECT") +
  xlab(LN.CD3_CD45$name[LN.CD3_CD45$patient.2 == "p4"])