inmune <- read.table("C:/Users/acard/CloudStation/R-project/Rodney/data/All immune genes_edity.csv", quote="\"", comment.char="")
datapath <- list.files(paste0(dirname(getwd()),"/data/GSE123139_RAW/"))
data.info <- read.delim("C:/Users/acard/CloudStation/R-project/Rodney/data/GSE123139_series_matrix_edit.txt", header=F)
data.info[1:5,1:10]
## V1 V2 V3
## 1 !Sample_title AB1889 AB1890
## 2 !Sample_geo_accession GSM3496285 GSM3496286
## 3 !Sample_status Public on Dec 27 2018 Public on Dec 27 2018
## 4 !Sample_submission_date Nov 29 2018 Nov 29 2018
## 5 !Sample_last_update_date Dec 27 2018 Dec 27 2018
## V4 V5 V6
## 1 AB1891 AB1892 AB2093
## 2 GSM3496287 GSM3496288 GSM3496289
## 3 Public on Dec 27 2018 Public on Dec 27 2018 Public on Dec 27 2018
## 4 Nov 29 2018 Nov 29 2018 Nov 29 2018
## 5 Dec 27 2018 Dec 27 2018 Dec 27 2018
## V7 V8 V9
## 1 AB2094 AB2095 AB2096
## 2 GSM3496290 GSM3496291 GSM3496292
## 3 Public on Dec 27 2018 Public on Dec 27 2018 Public on Dec 27 2018
## 4 Nov 29 2018 Nov 29 2018 Nov 29 2018
## 5 Dec 27 2018 Dec 27 2018 Dec 27 2018
## V10
## 1 AB2097
## 2 GSM3496293
## 3 Public on Dec 27 2018
## 4 Nov 29 2018
## 5 Dec 27 2018
La base de datos está organizada de forma “transpuesta”, es decir que las variablese están en la columna uno (V1) y los datos en el resto de columnas, por lo que vamos a transponer la base de datos y vamos a usar los nombres de la columna uno para generar la nueva base de datos
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
nombres <- data.info[,1]
nombres <- str_replace(string = nombres, pattern = "!Sample_",replacement = "")
data.info <- t(data.info[,-1])
colnames(data.info) <- nombres
data.info[1:5,1:5]
## title geo_accession status submission_date
## V2 "AB1889" "GSM3496285" "Public on Dec 27 2018" "Nov 29 2018"
## V3 "AB1890" "GSM3496286" "Public on Dec 27 2018" "Nov 29 2018"
## V4 "AB1891" "GSM3496287" "Public on Dec 27 2018" "Nov 29 2018"
## V5 "AB1892" "GSM3496288" "Public on Dec 27 2018" "Nov 29 2018"
## V6 "AB2093" "GSM3496289" "Public on Dec 27 2018" "Nov 29 2018"
## last_update_date
## V2 "Dec 27 2018"
## V3 "Dec 27 2018"
## V4 "Dec 27 2018"
## V5 "Dec 27 2018"
## V6 "Dec 27 2018"
De esta base de datos necesitamos saber los valores de las columnas 1, 2, 13 y 14. Por lo que vamos a crear una segunda base de datos llamada “data.info.2”, contniendo estas cuatro columnas.
data.info.2 <- data.info[,c(1,2,13,14)]
head(data.info.2)
## title geo_accession characteristics_ch1 characteristics_ch1
## V2 "AB1889" "GSM3496285" "patient id: p2-4-LN-2IT" "facs gate: CD3"
## V3 "AB1890" "GSM3496286" "patient id: p2-4-LN-2IT" "facs gate: CD3"
## V4 "AB1891" "GSM3496287" "patient id: p2-4-LN-2IT" "facs gate: CD45"
## V5 "AB1892" "GSM3496288" "patient id: p2-4-LN-2IT" "facs gate: CD45"
## V6 "AB2093" "GSM3496289" "patient id: p2-4-LN-2IT" "facs gate: CD45"
## V7 "AB2094" "GSM3496290" "patient id: p2-4-LN-2IT" "facs gate: CD3"
Ahora vamos a darle formato a la base de datos, quitaremos el patrón “patient id:” de la columna 3, y “facs gate:” de la columna 4. Además renombrarmos las columnas a “title”, “geo.accs”, “patient”, “cd”. Además agragaremos una columna donde se fusionarán las columnas 1 y 2 separadas por un guión bajo (para luego importar estas de las bases de datos de secuenciación)
data.info.2 <- data.frame(data.info.2)
data.info.2$characteristics_ch1 <- str_replace(string = data.info.2$characteristics_ch1, pattern = "patient id: ", replacement = "")
data.info.2$characteristics_ch1.1 <- str_replace(string = data.info.2$characteristics_ch1.1, pattern = "facs gate: ", replacement = "")
names(data.info.2) <- c("title", "geo.accs", "patient", "cd")
data.info.2$name <- paste(data.info.2$geo.accs, data.info.2$title , sep = "_")
head(data.info.2)
## title geo.accs patient cd name
## V2 AB1889 GSM3496285 p2-4-LN-2IT CD3 GSM3496285_AB1889
## V3 AB1890 GSM3496286 p2-4-LN-2IT CD3 GSM3496286_AB1890
## V4 AB1891 GSM3496287 p2-4-LN-2IT CD45 GSM3496287_AB1891
## V5 AB1892 GSM3496288 p2-4-LN-2IT CD45 GSM3496288_AB1892
## V6 AB2093 GSM3496289 p2-4-LN-2IT CD45 GSM3496289_AB2093
## V7 AB2094 GSM3496290 p2-4-LN-2IT CD3 GSM3496290_AB2094
Eel nombre de los pacientes contiene información como:
Por que vamos a generar columnas para estos datos por separado y lo vamos a unir a la base de datos data.info.2
long <- nrow(data.info.2)
tmp <- data.frame(patient.2 =character(long),
stage = character(long),
localization = character(long),
tto = character(long),
m.meta = character(long))
for ( i in 1:nrow(data.info.2)){
l.tmp <- unlist(strsplit(x = data.info.2$patient[i], split = "-"))
if (length(l.tmp) < 5){
l.tmp <- c(l.tmp, NA)
}
tmp[i,] <- l.tmp
}
data.info.2 <- cbind(tmp, data.info.2)
names(data.info.2)
## [1] "patient.2" "stage" "localization" "tto" "m.meta"
## [6] "title" "geo.accs" "patient" "cd" "name"
data.info.2 <- data.info.2[,c("name", "patient.2", "cd", "stage", "localization", "tto", "m.meta")]
| name | patient.2 | cd | stage | localization | tto | m.meta | |
|---|---|---|---|---|---|---|---|
| V2 | GSM3496285_AB1889 | p2 | CD3 | 4 | LN | 2IT | NA |
| V3 | GSM3496286_AB1890 | p2 | CD3 | 4 | LN | 2IT | NA |
| V4 | GSM3496287_AB1891 | p2 | CD45 | 4 | LN | 2IT | NA |
| V5 | GSM3496288_AB1892 | p2 | CD45 | 4 | LN | 2IT | NA |
| V6 | GSM3496289_AB2093 | p2 | CD45 | 4 | LN | 2IT | NA |
| V7 | GSM3496290_AB2094 | p2 | CD3 | 4 | LN | 2IT | NA |
| V8 | GSM3496291_AB2095 | p2 | CD3 | 4 | LN | 2IT | NA |
| V9 | GSM3496292_AB2096 | p2 | CD45 | 4 | LN | 2IT | NA |
| V10 | GSM3496293_AB2097 | p2 | CD45 | 4 | LN | 2IT | NA |
| V11 | GSM3496294_AB2098 | p11 | CD45 | 3 | (S)C | N | NA |
| V12 | GSM3496295_AB2099 | p11 | CD45 | 3 | (S)C | N | NA |
| V13 | GSM3496296_AB2100 | p11 | CD3 | 3 | (S)C | N | NA |
| V14 | GSM3496297_AB2101 | p11 | CD3 | 3 | (S)C | N | NA |
| V15 | GSM3496298_AB2102 | p11 | CD3 | 3 | (S)C | N | NA |
| V16 | GSM3496299_AB2103 | p11 | CD45 | 3 | (S)C | N | NA |
| V17 | GSM3496300_AB2104 | p11 | CD3 | 3 | (S)C | N | NA |
| V18 | GSM3496301_AB2105 | p2 | CD3 | 4 | LN | 2IT | NA |
| V19 | GSM3496302_AB2106 | p11 | CD45 | 3 | (S)C | N | NA |
| V20 | GSM3496303_AB2107 | p11 | CD45 | 3 | (S)C | N | NA |
| V21 | GSM3496304_AB2108 | p11 | CD45 | 3 | (S)C | N | NA |
| V22 | GSM3496305_AB2491 | p13 | CD3 | 3 | (S)C | N | NA |
| V23 | GSM3496306_AB2492 | p13 | CD3 | 3 | (S)C | N | NA |
| V24 | GSM3496307_AB2493 | p13 | CD3 | 3 | (S)C | N | NA |
| V25 | GSM3496308_AB2494 | p13 | CD45 | 3 | (S)C | N | NA |
| V26 | GSM3496309_AB2495 | p13 | CD45 | 3 | (S)C | N | NA |
| V27 | GSM3496310_AB2496 | p13 | CD45 | 3 | (S)C | N | NA |
| V28 | GSM3496311_AB2497 | p13 | CD3 | 3 | (S)C | N | NA |
| V29 | GSM3496312_AB2498 | p13 | CD3 | 3 | (S)C | N | NA |
| V30 | GSM3496313_AB2499 | p13 | CD3 | 3 | (S)C | N | NA |
| V31 | GSM3496314_AB2500 | p13 | CD45 | 3 | (S)C | N | NA |
| V32 | GSM3496315_AB2501 | p13 | CD45 | 3 | (S)C | N | NA |
| V33 | GSM3496316_AB2502 | p13 | CD3 | 3 | (S)C | N | NA |
| V34 | GSM3496317_AB2503 | p13 | CD3 | 3 | (S)C | N | NA |
| V35 | GSM3496318_AB2504 | p3 | CD3 | 4 | LN | mIT | NA |
| V36 | GSM3496319_AB2505 | p3 | CD3 | 4 | LN | mIT | NA |
| V37 | GSM3496320_AB2506 | p3 | CD3 | 4 | LN | mIT | NA |
| V38 | GSM3496321_AB2507 | p3 | CD3 | 4 | LN | mIT | NA |
| V39 | GSM3496322_AB2508 | p3 | CD45 | 4 | LN | mIT | NA |
| V40 | GSM3496323_AB2509 | p3 | CD45 | 4 | LN | mIT | NA |
| V41 | GSM3496324_AB2510 | p3 | CD45 | 4 | LN | mIT | NA |
| V42 | GSM3496325_AB2511 | p3 | CD45 | 4 | LN | mIT | NA |
| V43 | GSM3496326_AB2512 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V44 | GSM3496327_AB2513 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V45 | GSM3496328_AB2514 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V46 | GSM3496329_AB2515 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V47 | GSM3496330_AB2516 | p3 | CD3 | 4 | LN | mIT | NA |
| V48 | GSM3496331_AB2517 | p3 | CD45 | 4 | LN | mIT | NA |
| V49 | GSM3496332_AB2518 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V50 | GSM3496333_AB2519 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V51 | GSM3496334_AB2520 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V52 | GSM3496335_AB2521 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V53 | GSM3496336_AB2541 | p12 | CD3 | 3 | (S)C | N | 2 |
| V54 | GSM3496337_AB2542 | p12 | CD3 | 3 | (S)C | N | 2 |
| V55 | GSM3496338_AB2543 | p12 | CD3 | 3 | (S)C | N | 1 |
| V56 | GSM3496339_AB2549 | p13 | CD45 | 3 | (S)C | N | NA |
| V57 | GSM3496340_AB2553 | p12 | CD45 | 3 | (S)C | N | 2 |
| V58 | GSM3496341_AB2570 | p12 | CD45 | 3 | (S)C | N | 2 |
| V59 | GSM3496342_AB2574 | p12 | CD3 & CD45 | 3 | (S)C | N | 1 |
| V60 | GSM3496343_AB2898 | p15 | CD3 & CD45 | 3 | (S)C | N | NA |
| V61 | GSM3496344_AB2899 | p16 | CD3 & CD45 | 3 | (S)C | T | NA |
| V62 | GSM3496345_AB2900 | p8 | CD3 | 4 | (S)C | mIT | NA |
| V63 | GSM3496346_AB2901 | p17 | CD3 & CD45 | 3 | (S)C | N | 1 |
| V64 | GSM3496347_AB2902 | p17 | CD3 & CD45 | 3 | (S)C | N | 2 |
| V65 | GSM3496348_AB2903 | p15 | CD3 & CD45 | 3 | (S)C | N | NA |
| V66 | GSM3496349_AB2904 | p16 | CD3 & CD45 | 3 | (S)C | T | NA |
| V67 | GSM3496350_AB2905 | p16 | CD3 & CD45 | 3 | (S)C | T | NA |
| V68 | GSM3496351_AB2906 | p8 | CD3 | 4 | (S)C | mIT | NA |
| V69 | GSM3496352_AB2907 | p8 | CD3 | 4 | (S)C | mIT | NA |
| V70 | GSM3496353_AB2908 | p8 | CD45 | 4 | (S)C | mIT | NA |
| V71 | GSM3496354_AB2909 | p8 | CD45 | 4 | (S)C | mIT | NA |
| V72 | GSM3496355_AB2910 | p8 | CD45 | 4 | (S)C | mIT | NA |
| V73 | GSM3496356_AB2911 | p8 | CD45 | 4 | (S)C | mIT | NA |
| V74 | GSM3496357_AB2912 | p8 | CD3 | 4 | (S)C | mIT | NA |
| V75 | GSM3496358_AB2913 | p8 | CD45 | 4 | (S)C | mIT | NA |
| V76 | GSM3496359_AB2914 | p8 | CD3 & CD45 | 4 | (S)C | mIT | NA |
| V77 | GSM3496360_AB2915 | p18 | CD3 | 3 | (S)C | T | NA |
| V78 | GSM3496361_AB2916 | p18 | CD3 | 3 | (S)C | T | NA |
| V79 | GSM3496362_AB2917 | p18 | CD3 | 3 | (S)C | T | NA |
| V80 | GSM3496363_AB2918 | p18 | CD3 | 3 | (S)C | T | NA |
| V81 | GSM3496364_AB2919 | p18 | CD3 | 3 | (S)C | T | NA |
| V82 | GSM3496365_AB2936 | p18 | CD45 | 3 | (S)C | T | NA |
| V83 | GSM3496366_AB2989 | p18 | CD45 | 3 | (S)C | T | NA |
| V84 | GSM3496367_AB2990 | p18 | CD45 | 3 | (S)C | T | NA |
| V85 | GSM3496368_AB3078 | p9 | CD3 | 4 | (S)C | 2IT | NA |
| V86 | GSM3496369_AB3079 | p9 | CD45 | 4 | (S)C | 2IT | NA |
| V87 | GSM3496370_AB3086 | p9 | CD3 | 4 | (S)C | 2IT | NA |
| V88 | GSM3496371_AB3087 | p9 | CD3 | 4 | (S)C | 2IT | NA |
| V89 | GSM3496372_AB3088 | p9 | CD3 | 4 | (S)C | 2IT | NA |
| V90 | GSM3496373_AB3089 | p9 | CD3 | 4 | (S)C | 2IT | NA |
| V91 | GSM3496374_AB3090 | p9 | CD45 | 4 | (S)C | 2IT | NA |
| V92 | GSM3496375_AB3091 | p9 | CD45 | 4 | (S)C | 2IT | NA |
| V93 | GSM3496376_AB3092 | p9 | CD45 | 4 | (S)C | 2IT | NA |
| V94 | GSM3496377_AB3093 | p9 | CD45 | 4 | (S)C | 2IT | NA |
| V95 | GSM3496378_AB3094 | p9 | CD3 | 4 | (S)C | 2IT | NA |
| V96 | GSM3496379_AB3095 | p9 | CD45 | 4 | (S)C | 2IT | NA |
| V97 | GSM3496380_AB3096 | p9 | CD3 | 4 | (S)C | 2IT | NA |
| V98 | GSM3496381_AB3274 | p6 | CD3 & CD45 | 4 | (S)C | 1IT | NA |
| V99 | GSM3496382_AB3275 | p1 | CD3 & CD45 | 4 | LN | N | NA |
| V100 | GSM3496383_AB3276 | p1 | CD3 & CD45 | 4 | LN | N | NA |
| V101 | GSM3496384_AB3277 | p19 | CD3 & CD45 | 3 | (S)C | T | NA |
| V102 | GSM3496385_AB3278 | p19 | CD3 & CD45 | 3 | (S)C | T | NA |
| V103 | GSM3496386_AB3279 | p19 | CD3 & CD45 | 3 | (S)C | T | NA |
| V104 | GSM3496387_AB3280 | p19 | CD3 & CD45 | 3 | (S)C | T | NA |
| V105 | GSM3496388_AB3281 | p19 | CD3 & CD45 | 3 | (S)C | T | NA |
| V106 | GSM3496389_AB3282 | p19 | CD3 & CD45 | 3 | (S)C | T | NA |
| V107 | GSM3496390_AB3283 | p19 | CD3 & CD45 | 3 | (S)C | T | NA |
| V108 | GSM3496391_AB3284 | p19 | Live | 3 | (S)C | T | NA |
| V109 | GSM3496392_AB3718 | p20 | CD3 & CD45 | 3 | (S)C | 1IT | NA |
| V110 | GSM3496393_AB3719 | p20 | CD3 & CD45 | 3 | (S)C | 1IT | NA |
| V111 | GSM3496394_AB3725 | p21 | CD3 & CD45 | 3 | (S)C | N | NA |
| V112 | GSM3496395_AB3726 | p21 | CD3 & CD45 (+ partly live cells) | 3 | (S)C | N | NA |
| V113 | GSM3496396_AB3755 | p21 | Live | 3 | (S)C | N | NA |
| V114 | GSM3496397_AB3937 | p25 | Live | 3 | p(S)C | N | NA |
| V115 | GSM3496398_AB3938 | p25 | Live | 3 | p(S)C | N | NA |
| V116 | GSM3496399_AB3939 | p26 | CD3 & CD45 | 2 | p(S)C | N | NA |
| V117 | GSM3496400_AB3940 | p26 | CD3 & CD45 | 2 | p(S)C | N | NA |
| V118 | GSM3496401_AB3941 | p26 | Live | 2 | p(S)C | N | NA |
| V119 | GSM3496402_AB3942 | p10 | CD3 | 4 | MSC | 2IT | NA |
| V120 | GSM3496403_AB3943 | p10 | CD3 | 4 | MSC | 2IT | NA |
| V121 | GSM3496404_AB3944 | p10 | CD3 | 4 | MSC | 2IT | NA |
| V122 | GSM3496405_AB3945 | p10 | CD3 | 4 | MSC | 2IT | NA |
| V123 | GSM3496406_AB3946 | p10 | CD45 | 4 | MSC | 2IT | NA |
| V124 | GSM3496407_AB4027 | p24 | CD3 | 3 | LN | N | 2 |
| V125 | GSM3496408_AB4028 | p24 | CD3 | 3 | LN | N | 2 |
| V126 | GSM3496409_AB4029 | p24 | CD45 | 3 | LN | N | 2 |
| V127 | GSM3496410_AB4030 | p24 | CD45 | 3 | LN | N | 2 |
| V128 | GSM3496411_AB4031 | p24 | CD3 | 3 | LN | N | 2 |
| V129 | GSM3496412_AB4032 | p24 | CD45 | 3 | LN | N | 2 |
| V130 | GSM3496413_AB4121 | p4 | CD3 | 4 | LN | 1IT | NA |
| V131 | GSM3496414_AB4122 | p4 | CD3 | 4 | LN | 1IT | NA |
| V132 | GSM3496415_AB4123 | p4 | CD3 | 4 | LN | 1IT | NA |
| V133 | GSM3496416_AB4124 | p4 | CD3 | 4 | LN | 1IT | NA |
| V134 | GSM3496417_AB4125 | p4 | CD45 | 4 | LN | 1IT | NA |
| V135 | GSM3496418_AB4126 | p4 | CD45 | 4 | LN | 1IT | NA |
| V136 | GSM3496419_AB4127 | p4 | CD45 | 4 | LN | 1IT | NA |
| V137 | GSM3496420_AB4128 | p4 | CD45 | 4 | LN | 1IT | NA |
| V138 | GSM3496421_AB4129 | p4 | CD3 | 4 | LN | 1IT | NA |
| V139 | GSM3496422_AB4130 | p4 | CD3 | 4 | LN | 1IT | NA |
| V140 | GSM3496423_AB4131 | p4 | CD3 | 4 | LN | 1IT | NA |
| V141 | GSM3496424_AB4132 | p4 | CD45 | 4 | LN | 1IT | NA |
| V142 | GSM3496425_AB4133 | p4 | CD3 & CD45 | 4 | LN | 1IT | NA |
| V143 | GSM3496426_AB4134 | p23 | CD3 | 3 | LN | N | NA |
| V144 | GSM3496427_AB4135 | p23 | CD3 | 3 | LN | N | NA |
| V145 | GSM3496428_AB4136 | p23 | CD3 | 3 | LN | N | NA |
| V146 | GSM3496429_AB4137 | p23 | CD3 | 3 | LN | N | NA |
| V147 | GSM3496430_AB4138 | p23 | CD45 | 3 | LN | N | NA |
| V148 | GSM3496431_AB4139 | p23 | CD45 | 3 | LN | N | NA |
| V149 | GSM3496432_AB4140 | p23 | CD45 | 3 | LN | N | NA |
| V150 | GSM3496433_AB4141 | p23 | CD45 | 3 | LN | N | NA |
| V151 | GSM3496434_AB4142 | p23 | CD45 | 3 | LN | N | NA |
| V152 | GSM3496435_AB4143 | p23 | CD3 | 3 | LN | N | NA |
| V153 | GSM3496436_AB4229 | p5 | CD3 | 4 | LN | 2IT | NA |
| V154 | GSM3496437_AB4230 | p5 | CD3 | 4 | LN | 2IT | NA |
| V155 | GSM3496438_AB4231 | p5 | CD3 | 4 | LN | 2IT | NA |
| V156 | GSM3496439_AB4232 | p5 | CD45 | 4 | LN | 2IT | NA |
| V157 | GSM3496440_AB4233 | p5 | CD45 | 4 | LN | 2IT | NA |
| V158 | GSM3496441_AB4234 | p5 | CD45 | 4 | LN | 2IT | NA |
| V159 | GSM3496442_AB4235 | p5 | CD3 | 4 | LN | 2IT | NA |
| V160 | GSM3496443_AB4236 | p5 | CD3 | 4 | LN | 2IT | NA |
| V161 | GSM3496444_AB4237 | p5 | CD3 | 4 | LN | 2IT | NA |
| V162 | GSM3496445_AB4238 | p5 | CD45 | 4 | LN | 2IT | NA |
| V163 | GSM3496446_AB4239 | p5 | CD45 | 4 | LN | 2IT | NA |
| V164 | GSM3496447_AB4359 | p13_PBMC | CCR7+ CD45RA+ | 3 | (S)C | N | NA |
| V165 | GSM3496448_AB4360 | p13_PBMC | CCR7+ CD45RA+ | 3 | (S)C | N | NA |
| V166 | GSM3496449_AB4361 | p13_PBMC | CCR7+ CD45RA+ | 3 | (S)C | N | NA |
| V167 | GSM3496450_AB4362 | p13_PBMC | CD3 | 3 | (S)C | N | NA |
| V168 | GSM3496451_AB4363 | p13_PBMC | CD3 | 3 | (S)C | N | NA |
| V169 | GSM3496452_AB4364 | p13_PBMC | CD45 | 3 | (S)C | N | NA |
| V170 | GSM3496453_AB4365 | p13_PBMC | CD45 | 3 | (S)C | N | NA |
| V171 | GSM3496454_AB4366 | p13_PBMC | CD3 | 3 | (S)C | N | NA |
| V172 | GSM3496455_AB4367 | p13_PBMC | CD45 | 3 | (S)C | N | NA |
| V173 | GSM3496456_AB5008 | p27 | CD3 | 4 | (S)C | 1IT | NA |
| V174 | GSM3496457_AB5009 | p27 | CD3 | 4 | (S)C | 1IT | NA |
| V175 | GSM3496458_AB5010 | p27 | CD3 | 4 | (S)C | 1IT | NA |
| V176 | GSM3496459_AB5011 | p27 | CD3 | 4 | (S)C | 1IT | NA |
| V177 | GSM3496460_AB5012 | p27 | CD45 | 4 | (S)C | 1IT | NA |
| V178 | GSM3496461_AB5013 | p27 | CD45 | 4 | (S)C | 1IT | NA |
| V179 | GSM3496462_AB5014 | p27 | CD3 & CD45 | 4 | (S)C | 1IT | NA |
| V180 | GSM3496463_AB5015 | p27 | CD3 & CD45 | 4 | (S)C | 1IT | NA |
| V181 | GSM3496464_AB5016 | p27 | CD3 | 4 | (S)C | 1IT | NA |
| V182 | GSM3496465_AB5017 | p27 | CD45 | 4 | (S)C | 1IT | NA |
| V183 | GSM3496466_AB5018 | p27 | CD3 & CD45 | 4 | (S)C | 1IT | NA |
| V184 | GSM3496467_AB5019 | p27 | CD3 | 4 | (S)C | 1IT | NA |
| V185 | GSM3496468_AB5190 | p28 | CD3 | 4 | (S)C | 2IT | NA |
| V186 | GSM3496469_AB5191 | p28 | CD3 | 4 | (S)C | 2IT | NA |
| V187 | GSM3496470_AB5192 | p28 | CD3 | 4 | (S)C | 2IT | NA |
| V188 | GSM3496471_AB5193 | p28 | CD3 | 4 | (S)C | 2IT | NA |
| V189 | GSM3496472_AB5194 | p28 | CD3 & CD45 | 4 | (S)C | 2IT | NA |
| V190 | GSM3496473_AB5195 | p28 | CD3 & CD45 | 4 | (S)C | 2IT | NA |
| V191 | GSM3496474_AB5196 | p28 | CD3 & CD45 | 4 | (S)C | 2IT | NA |
| V192 | GSM3496475_AB5197 | p28 | CD3 & CD45 | 4 | (S)C | 2IT | NA |
| V193 | GSM3496476_AB5198 | p28 | CD3 | 4 | (S)C | 2IT | NA |
| V194 | GSM3496477_AB5199 | p28 | CD3 & CD45 | 4 | (S)C | 2IT | NA |
| V195 | GSM3496478_AB5424 | p17_PBMC | CCR7+ CD45RA+ | 3 | (S)C | N | NA |
| V196 | GSM3496479_AB5425 | p17_PBMC | CCR7+ CD45RA+ | 3 | (S)C | N | NA |
| V197 | GSM3496480_AB5426 | p17_PBMC | CCR7+ CD45RA+ | 3 | (S)C | N | NA |
| V198 | GSM3496481_AB5427 | p17_PBMC | CD3 | 3 | (S)C | N | NA |
| V199 | GSM3496482_AB5428 | p17_PBMC | CD45 | 3 | (S)C | N | NA |
| V200 | GSM3496483_AB5429 | p17_PBMC | CD3 & CD45 | 3 | (S)C | N | NA |
| V201 | GSM3496484_AB5490 | p27_PBMC | CD3 | 4 | (S)C | 1IT | NA |
| V202 | GSM3496485_AB5491 | p27_PBMC | CD3 & CD45 | 4 | (S)C | 1IT | NA |
| V203 | GSM3496486_AB5492 | p27_PBMC | CCR7+ CD45RA+ | 4 | (S)C | 1IT | NA |
| V204 | GSM3496487_AB5493 | p27_PBMC | CD45 | 4 | (S)C | 1IT | NA |
| V205 | GSM3496488_AB5494 | p27_PBMC | CD3 | 4 | (S)C | 1IT | NA |
| CCR7+ CD45RA+ | CD3 | CD3 & CD45 | CD3 & CD45 (+ partly live cells) | CD45 | Live | |
|---|---|---|---|---|---|---|
| p1 | 0 | 0 | 2 | 0 | 0 | 0 |
| p10 | 0 | 4 | 0 | 0 | 1 | 0 |
| p11 | 0 | 4 | 0 | 0 | 6 | 0 |
| p12 | 0 | 3 | 1 | 0 | 2 | 0 |
| p13 | 0 | 8 | 0 | 0 | 6 | 0 |
| p13_PBMC | 3 | 3 | 0 | 0 | 3 | 0 |
| p15 | 0 | 0 | 2 | 0 | 0 | 0 |
| p16 | 0 | 0 | 3 | 0 | 0 | 0 |
| p17 | 0 | 0 | 2 | 0 | 0 | 0 |
| p17_PBMC | 3 | 1 | 1 | 0 | 1 | 0 |
| p18 | 0 | 5 | 0 | 0 | 3 | 0 |
| p19 | 0 | 0 | 7 | 0 | 0 | 1 |
| p2 | 0 | 5 | 0 | 0 | 5 | 0 |
| p20 | 0 | 0 | 2 | 0 | 0 | 0 |
| p21 | 0 | 0 | 1 | 1 | 0 | 1 |
| p23 | 0 | 5 | 0 | 0 | 5 | 0 |
| p24 | 0 | 3 | 0 | 0 | 3 | 0 |
| p25 | 0 | 0 | 0 | 0 | 0 | 2 |
| p26 | 0 | 0 | 2 | 0 | 0 | 1 |
| p27 | 0 | 6 | 3 | 0 | 3 | 0 |
| p27_PBMC | 1 | 2 | 1 | 0 | 1 | 0 |
| p28 | 0 | 5 | 5 | 0 | 0 | 0 |
| p3 | 0 | 5 | 8 | 0 | 5 | 0 |
| p4 | 0 | 7 | 1 | 0 | 5 | 0 |
| p5 | 0 | 6 | 0 | 0 | 5 | 0 |
| p6 | 0 | 0 | 1 | 0 | 0 | 0 |
| p8 | 0 | 4 | 1 | 0 | 5 | 0 |
| p9 | 0 | 7 | 0 | 0 | 6 | 0 |
Primero vamos a seleccionar los pacientes con cancer en los nódulos linfáticos (localization = “LN”)
LN <- subset(x = data.info.2, subset = localization == "LN")
LN
## name patient.2 cd stage localization tto m.meta
## V2 GSM3496285_AB1889 p2 CD3 4 LN 2IT <NA>
## V3 GSM3496286_AB1890 p2 CD3 4 LN 2IT <NA>
## V4 GSM3496287_AB1891 p2 CD45 4 LN 2IT <NA>
## V5 GSM3496288_AB1892 p2 CD45 4 LN 2IT <NA>
## V6 GSM3496289_AB2093 p2 CD45 4 LN 2IT <NA>
## V7 GSM3496290_AB2094 p2 CD3 4 LN 2IT <NA>
## V8 GSM3496291_AB2095 p2 CD3 4 LN 2IT <NA>
## V9 GSM3496292_AB2096 p2 CD45 4 LN 2IT <NA>
## V10 GSM3496293_AB2097 p2 CD45 4 LN 2IT <NA>
## V18 GSM3496301_AB2105 p2 CD3 4 LN 2IT <NA>
## V35 GSM3496318_AB2504 p3 CD3 4 LN mIT <NA>
## V36 GSM3496319_AB2505 p3 CD3 4 LN mIT <NA>
## V37 GSM3496320_AB2506 p3 CD3 4 LN mIT <NA>
## V38 GSM3496321_AB2507 p3 CD3 4 LN mIT <NA>
## V39 GSM3496322_AB2508 p3 CD45 4 LN mIT <NA>
## V40 GSM3496323_AB2509 p3 CD45 4 LN mIT <NA>
## V41 GSM3496324_AB2510 p3 CD45 4 LN mIT <NA>
## V42 GSM3496325_AB2511 p3 CD45 4 LN mIT <NA>
## V43 GSM3496326_AB2512 p3 CD3 & CD45 4 LN mIT <NA>
## V44 GSM3496327_AB2513 p3 CD3 & CD45 4 LN mIT <NA>
## V45 GSM3496328_AB2514 p3 CD3 & CD45 4 LN mIT <NA>
## V46 GSM3496329_AB2515 p3 CD3 & CD45 4 LN mIT <NA>
## V47 GSM3496330_AB2516 p3 CD3 4 LN mIT <NA>
## V48 GSM3496331_AB2517 p3 CD45 4 LN mIT <NA>
## V49 GSM3496332_AB2518 p3 CD3 & CD45 4 LN mIT <NA>
## V50 GSM3496333_AB2519 p3 CD3 & CD45 4 LN mIT <NA>
## V51 GSM3496334_AB2520 p3 CD3 & CD45 4 LN mIT <NA>
## V52 GSM3496335_AB2521 p3 CD3 & CD45 4 LN mIT <NA>
## V99 GSM3496382_AB3275 p1 CD3 & CD45 4 LN N <NA>
## V100 GSM3496383_AB3276 p1 CD3 & CD45 4 LN N <NA>
## V124 GSM3496407_AB4027 p24 CD3 3 LN N 2
## V125 GSM3496408_AB4028 p24 CD3 3 LN N 2
## V126 GSM3496409_AB4029 p24 CD45 3 LN N 2
## V127 GSM3496410_AB4030 p24 CD45 3 LN N 2
## V128 GSM3496411_AB4031 p24 CD3 3 LN N 2
## V129 GSM3496412_AB4032 p24 CD45 3 LN N 2
## V130 GSM3496413_AB4121 p4 CD3 4 LN 1IT <NA>
## V131 GSM3496414_AB4122 p4 CD3 4 LN 1IT <NA>
## V132 GSM3496415_AB4123 p4 CD3 4 LN 1IT <NA>
## V133 GSM3496416_AB4124 p4 CD3 4 LN 1IT <NA>
## V134 GSM3496417_AB4125 p4 CD45 4 LN 1IT <NA>
## V135 GSM3496418_AB4126 p4 CD45 4 LN 1IT <NA>
## V136 GSM3496419_AB4127 p4 CD45 4 LN 1IT <NA>
## V137 GSM3496420_AB4128 p4 CD45 4 LN 1IT <NA>
## V138 GSM3496421_AB4129 p4 CD3 4 LN 1IT <NA>
## V139 GSM3496422_AB4130 p4 CD3 4 LN 1IT <NA>
## V140 GSM3496423_AB4131 p4 CD3 4 LN 1IT <NA>
## V141 GSM3496424_AB4132 p4 CD45 4 LN 1IT <NA>
## V142 GSM3496425_AB4133 p4 CD3 & CD45 4 LN 1IT <NA>
## V143 GSM3496426_AB4134 p23 CD3 3 LN N <NA>
## V144 GSM3496427_AB4135 p23 CD3 3 LN N <NA>
## V145 GSM3496428_AB4136 p23 CD3 3 LN N <NA>
## V146 GSM3496429_AB4137 p23 CD3 3 LN N <NA>
## V147 GSM3496430_AB4138 p23 CD45 3 LN N <NA>
## V148 GSM3496431_AB4139 p23 CD45 3 LN N <NA>
## V149 GSM3496432_AB4140 p23 CD45 3 LN N <NA>
## V150 GSM3496433_AB4141 p23 CD45 3 LN N <NA>
## V151 GSM3496434_AB4142 p23 CD45 3 LN N <NA>
## V152 GSM3496435_AB4143 p23 CD3 3 LN N <NA>
## V153 GSM3496436_AB4229 p5 CD3 4 LN 2IT <NA>
## V154 GSM3496437_AB4230 p5 CD3 4 LN 2IT <NA>
## V155 GSM3496438_AB4231 p5 CD3 4 LN 2IT <NA>
## V156 GSM3496439_AB4232 p5 CD45 4 LN 2IT <NA>
## V157 GSM3496440_AB4233 p5 CD45 4 LN 2IT <NA>
## V158 GSM3496441_AB4234 p5 CD45 4 LN 2IT <NA>
## V159 GSM3496442_AB4235 p5 CD3 4 LN 2IT <NA>
## V160 GSM3496443_AB4236 p5 CD3 4 LN 2IT <NA>
## V161 GSM3496444_AB4237 p5 CD3 4 LN 2IT <NA>
## V162 GSM3496445_AB4238 p5 CD45 4 LN 2IT <NA>
## V163 GSM3496446_AB4239 p5 CD45 4 LN 2IT <NA>
Aquí tenemos 70 experimentos, pero se usaron diferentes CD para el filtrado, y cada uno tiene diferentes tratamientos.
table(LN$cd, LN$tto)
##
## 1IT 2IT mIT N
## CD3 7 11 5 8
## CD3 & CD45 1 0 8 2
## CD45 5 10 5 8
Así que vamos a generar difereentes grupos para cada CD de filtrado
LN.CD3 <- subset(x = LN, subset = cd == "CD3")
LN.CD45 <- subset(x = LN, subset = cd == "CD45")
LN.CD3_CD45 <- subset(x = LN, subset = cd == "CD3 & CD45")
| name | patient.2 | cd | stage | localization | tto | m.meta | |
|---|---|---|---|---|---|---|---|
| V43 | GSM3496326_AB2512 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V44 | GSM3496327_AB2513 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V45 | GSM3496328_AB2514 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V46 | GSM3496329_AB2515 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V49 | GSM3496332_AB2518 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V50 | GSM3496333_AB2519 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V51 | GSM3496334_AB2520 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V52 | GSM3496335_AB2521 | p3 | CD3 & CD45 | 4 | LN | mIT | NA |
| V99 | GSM3496382_AB3275 | p1 | CD3 & CD45 | 4 | LN | N | NA |
| V100 | GSM3496383_AB3276 | p1 | CD3 & CD45 | 4 | LN | N | NA |
| V142 | GSM3496425_AB4133 | p4 | CD3 & CD45 | 4 | LN | 1IT | NA |
Aquí tenemos 3 pacientes, dos con tratamiento y 1 sin tratamiento
| Paciente | Tto |
|---|---|
| p1 | sin tto |
| p3 | ?? |
| p4 | aPD1 |
library(affy)
## Warning: package 'affy' was built under R version 4.0.3
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.3
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
p4 <- LN.CD3_CD45$name[LN.CD3_CD45$patient.2 == "p4"]
p4 <- which(datapath == paste0(p4,".txt.gz"))
p4 <- read.table(gzfile(paste0(dirname(getwd()),"/data/GSE123139_RAW/",datapath[p4])), sep = "\t")
p4[1:10,1:5]
## WMC290849 WMC290850 WMC290851 WMC290852 WMC290853
## 5S_rRNA 0 0 0 0 0
## 7SK 0 0 0 0 0
## A1BG 0 0 0 0 0
## A1BG-AS1 0 0 0 0 0
## A1CF 0 0 0 0 0
## A2M 0 0 0 0 0
## A2M-AS1 0 0 0 0 0
## A2ML1 0 0 0 0 0
## A2ML1-AS1 0 0 0 0 0
## A2ML1-AS2 0 0 0 0 0
Ahora selecionamos los genes INMUNOME
p4.inmune <- p4[grep(paste0(inmune$V1,collapse = "|"),rownames(p4)),]
p4.inmune[1:10,1:5]
## WMC290849 WMC290850 WMC290851 WMC290852 WMC290853
## A2M 0 0 0 0 0
## A2M-AS1 0 0 0 0 0
## A2ML1 0 0 0 0 0
## A2ML1-AS1 0 0 0 0 0
## A2ML1-AS2 0 0 0 0 0
## A2MP1 0 0 0 0 0
## ABCC3 0 0 0 0 0
## ABCD1 0 0 0 0 0
## ABCD1P2 0 0 0 0 0
## ABCD1P3 0 0 0 0 0
Ahora generamos el análisis de UMAP
library(umap)
## Warning: package 'umap' was built under R version 4.0.3
p4.umap <- umap::umap(p4)
p4.inmu.umap <- umap::umap(p4.inmune)
Graficando
plot.p4<- as.data.frame(p4.umap$layout)
plot.p4.inmu <- as.data.frame(p4.inmu.umap$layout)
colnames(plot.p4) <- colnames(plot.p4.inmu) <- c("UMAP1","UMAP2")
library(ggplot2)
ggplot2::ggplot(data = plot.p4, aes(x=UMAP1, UMAP2)) +
geom_point() + ggtitle("p4-LN-aPD1") +
xlab(LN.CD3_CD45$name[LN.CD3_CD45$patient.2 == "p4"])
ggplot2::ggplot(data = plot.p4.inmu, aes(x=UMAP1, UMAP2)) +
geom_point() + ggtitle("p4-LN-aPD1_SELECT") +
xlab(LN.CD3_CD45$name[LN.CD3_CD45$patient.2 == "p4"])