Data exploration of the data set GSE63514
# Loading dataset GSE63514
#(GSEMatrix = TRUE, indicates that we are downloading the gene expression matrix)
library(GEOquery)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
library(limma)
## Warning: package 'limma' was built under R version 4.4.2
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(umap)
## Warning: package 'umap' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(RColorBrewer)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
gset <- getGEO("GSE63514", GSEMatrix =TRUE, getGPL=FALSE)
## Found 1 file(s)
## GSE63514_series_matrix.txt.gz
if (length(gset) > 1) idx <- grep("GPL570", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]
# See the first and last rowns
head(exprs(gset),2)
## GSM1551311 GSM1551312 GSM1551313 GSM1551314 GSM1551315 GSM1551316
## 1007_s_at 10.777813 11.199858 10.818370 11.360644 10.893196 10.135833
## 1053_at 6.702488 7.664251 5.777355 7.971239 7.149726 5.817268
## GSM1551317 GSM1551318 GSM1551319 GSM1551320 GSM1551321 GSM1551322
## 1007_s_at 10.831207 10.451779 10.886191 11.350148 11.298550 10.945115
## 1053_at 5.346934 7.263843 5.885268 7.770745 7.814717 7.602949
## GSM1551323 GSM1551324 GSM1551325 GSM1551326 GSM1551327 GSM1551328
## 1007_s_at 11.23825 11.941445 11.178298 10.593171 11.09725 11.840211
## 1053_at 7.46708 6.980736 7.579214 7.769257 7.46995 6.879813
## GSM1551329 GSM1551330 GSM1551331 GSM1551332 GSM1551333 GSM1551334
## 1007_s_at 11.954979 11.736448 11.317011 11.542374 10.305906 11.09675
## 1053_at 6.406281 8.596336 7.271637 8.751259 7.285995 7.56516
## GSM1551335 GSM1551336 GSM1551337 GSM1551338 GSM1551339 GSM1551340
## 1007_s_at 10.619594 10.557627 10.999108 10.710438 9.732989 11.76326
## 1053_at 6.933845 6.213611 7.843274 7.331869 4.028833 6.67009
## GSM1551341 GSM1551342 GSM1551343 GSM1551344 GSM1551345 GSM1551346
## 1007_s_at 10.728748 9.558855 11.323210 11.19067 11.451253 12.236602
## 1053_at 6.723867 5.799370 8.674091 8.12612 6.683674 8.933919
## GSM1551347 GSM1551348 GSM1551349 GSM1551350 GSM1551351 GSM1551352
## 1007_s_at 10.632670 11.492207 10.680960 10.766693 9.975092 10.762292
## 1053_at 8.096621 7.129486 6.037668 6.970007 6.420281 6.357039
## GSM1551353 GSM1551354 GSM1551355 GSM1551356 GSM1551357 GSM1551358
## 1007_s_at 11.431527 11.562411 11.275369 11.497481 10.929970 11.132153
## 1053_at 7.328941 7.219531 7.343396 8.559383 7.672454 9.034992
## GSM1551359 GSM1551360 GSM1551361 GSM1551362 GSM1551363 GSM1551364
## 1007_s_at 10.828331 11.261676 10.923170 10.212884 11.508148 11.017721
## 1053_at 8.430294 8.258517 8.310965 6.535857 8.856242 8.428499
## GSM1551365 GSM1551366 GSM1551367 GSM1551368 GSM1551369 GSM1551370
## 1007_s_at 10.929274 11.283867 10.689504 11.48709 11.466879 10.935993
## 1053_at 7.834879 9.861923 7.318902 10.05131 8.184141 8.222622
## GSM1551371 GSM1551372 GSM1551373 GSM1551374 GSM1551375 GSM1551376
## 1007_s_at 10.606269 11.298869 10.658100 11.943863 11.060735 11.196990
## 1053_at 7.122583 7.374576 7.454146 5.431135 8.588782 6.753185
## GSM1551377 GSM1551378 GSM1551379 GSM1551380 GSM1551381 GSM1551382
## 1007_s_at 11.567258 10.30746 10.763636 11.183403 9.348174 10.99582
## 1053_at 7.538014 8.32396 8.236432 4.709314 6.060230 8.16370
## GSM1551383 GSM1551384 GSM1551385 GSM1551386 GSM1551387 GSM1551388
## 1007_s_at 11.172493 10.697380 10.248789 11.743592 11.868659 10.643750
## 1053_at 8.633765 8.375691 8.814829 9.826814 9.092444 7.699331
## GSM1551389 GSM1551390 GSM1551391 GSM1551392 GSM1551393 GSM1551394
## 1007_s_at 11.544813 12.657460 11.371573 10.75573 9.966184 9.501328
## 1053_at 8.437233 9.546425 8.833726 8.41522 7.188810 8.167795
## GSM1551395 GSM1551396 GSM1551397 GSM1551398 GSM1551399 GSM1551400
## 1007_s_at 10.681055 11.556429 11.544228 11.459189 11.640151 11.53513
## 1053_at 6.005221 7.959652 8.460899 9.576018 8.645727 8.54374
## GSM1551401 GSM1551402 GSM1551403 GSM1551404 GSM1551405 GSM1551406
## 1007_s_at 11.459110 10.95084 10.944250 11.64163 10.645794 11.78202
## 1053_at 8.303497 6.23627 6.444052 10.15162 9.377801 10.18177
## GSM1551407 GSM1551408 GSM1551409 GSM1551410 GSM1551411 GSM1551412
## 1007_s_at 10.86990 11.849914 11.819453 11.280840 9.577077 10.647461
## 1053_at 8.51525 8.996246 6.354364 8.732964 5.605935 6.950703
## GSM1551413 GSM1551414 GSM1551415 GSM1551416 GSM1551417 GSM1551418
## 1007_s_at 11.349284 10.392327 11.421998 10.886666 10.30292 11.650727
## 1053_at 5.954593 6.970085 8.554235 8.742948 5.16435 5.311493
## GSM1551419 GSM1551420 GSM1551421 GSM1551422 GSM1551423 GSM1551424
## 1007_s_at 10.895523 9.365527 10.36728 12.019754 11.887070 10.583422
## 1053_at 5.553093 5.982912 7.86734 7.943192 8.015636 8.158799
## GSM1551425 GSM1551426 GSM1551427 GSM1551428 GSM1551429 GSM1551430
## 1007_s_at 10.286129 11.365489 10.381620 10.373619 11.351220 10.198903
## 1053_at 8.345025 8.203606 6.438337 9.650425 8.787205 8.209752
## GSM1551431 GSM1551432 GSM1551433 GSM1551434 GSM1551435 GSM1551436
## 1007_s_at 7.999738 9.985798 10.939158 10.905637 10.289586 10.485842
## 1053_at 6.312030 8.619027 8.835091 8.522371 6.986016 8.134119
## GSM1551437 GSM1551438
## 1007_s_at 11.073977 11.018359
## 1053_at 8.874136 8.713384
tail(exprs(gset),2)
## GSM1551311 GSM1551312 GSM1551313 GSM1551314 GSM1551315
## AFFX-TrpnX-5_at 2.375484 2.303456 2.317590 2.291450 2.355261
## AFFX-TrpnX-M_at 2.982005 2.915308 2.959125 2.894786 2.989011
## GSM1551316 GSM1551317 GSM1551318 GSM1551319 GSM1551320
## AFFX-TrpnX-5_at 2.436014 2.292504 2.330254 2.295823 2.253570
## AFFX-TrpnX-M_at 3.113099 2.890632 2.910918 2.879157 2.818381
## GSM1551321 GSM1551322 GSM1551323 GSM1551324 GSM1551325
## AFFX-TrpnX-5_at 2.294791 2.325266 2.251914 2.243408 2.342641
## AFFX-TrpnX-M_at 2.900499 2.900876 2.783429 2.805758 2.932268
## GSM1551326 GSM1551327 GSM1551328 GSM1551329 GSM1551330
## AFFX-TrpnX-5_at 2.281335 2.261202 2.250891 2.302012 2.227935
## AFFX-TrpnX-M_at 2.850396 2.814959 2.796348 2.866598 2.778508
## GSM1551331 GSM1551332 GSM1551333 GSM1551334 GSM1551335
## AFFX-TrpnX-5_at 2.296567 2.276115 2.270051 2.644443 2.306356
## AFFX-TrpnX-M_at 2.867226 2.842854 2.836797 3.280874 2.923933
## GSM1551336 GSM1551337 GSM1551338 GSM1551339 GSM1551340
## AFFX-TrpnX-5_at 2.323700 2.356099 2.298251 2.151226 2.117214
## AFFX-TrpnX-M_at 2.936173 2.963306 2.869391 3.059053 2.921479
## GSM1551341 GSM1551342 GSM1551343 GSM1551344 GSM1551345
## AFFX-TrpnX-5_at 2.272609 2.398052 2.311966 2.256932 2.241538
## AFFX-TrpnX-M_at 2.840157 3.059708 2.886775 2.820692 2.786751
## GSM1551346 GSM1551347 GSM1551348 GSM1551349 GSM1551350
## AFFX-TrpnX-5_at 2.261754 2.280053 2.036509 2.393256 2.367071
## AFFX-TrpnX-M_at 2.823998 2.853316 2.799185 3.007549 2.989599
## GSM1551351 GSM1551352 GSM1551353 GSM1551354 GSM1551355
## AFFX-TrpnX-5_at 2.436477 2.298634 2.288835 2.303941 2.169367
## AFFX-TrpnX-M_at 3.161644 2.890996 2.876846 2.901978 3.101493
## GSM1551356 GSM1551357 GSM1551358 GSM1551359 GSM1551360
## AFFX-TrpnX-5_at 2.267734 2.422178 2.292652 2.320403 2.018767
## AFFX-TrpnX-M_at 2.825374 3.107800 2.860022 2.902757 2.752315
## GSM1551361 GSM1551362 GSM1551363 GSM1551364 GSM1551365
## AFFX-TrpnX-5_at 2.029545 2.065107 2.229450 2.358255 2.247373
## AFFX-TrpnX-M_at 2.790407 2.857139 2.769053 2.939336 2.800282
## GSM1551366 GSM1551367 GSM1551368 GSM1551369 GSM1551370
## AFFX-TrpnX-5_at 2.215076 2.339848 2.040310 2.341208 2.057385
## AFFX-TrpnX-M_at 2.750706 2.934320 2.815526 2.913758 2.838166
## GSM1551371 GSM1551372 GSM1551373 GSM1551374 GSM1551375
## AFFX-TrpnX-5_at 2.328387 2.349750 2.295443 2.311821 2.298411
## AFFX-TrpnX-M_at 2.939909 3.048712 2.893121 2.932585 2.909918
## GSM1551376 GSM1551377 GSM1551378 GSM1551379 GSM1551380
## AFFX-TrpnX-5_at 2.33302 2.275964 2.076030 2.049298 2.122428
## AFFX-TrpnX-M_at 2.94988 2.867620 2.877209 2.827663 2.990251
## GSM1551381 GSM1551382 GSM1551383 GSM1551384 GSM1551385
## AFFX-TrpnX-5_at 2.124879 2.09526 2.119624 2.107675 2.014345
## AFFX-TrpnX-M_at 2.940243 2.89559 2.890786 2.943655 2.783105
## GSM1551386 GSM1551387 GSM1551388 GSM1551389 GSM1551390
## AFFX-TrpnX-5_at 2.099914 2.039989 2.084122 2.075209 2.020487
## AFFX-TrpnX-M_at 2.878003 2.817040 2.881177 2.861741 2.793274
## GSM1551391 GSM1551392 GSM1551393 GSM1551394 GSM1551395
## AFFX-TrpnX-5_at 2.283779 2.033609 1.976609 2.037900 2.061398
## AFFX-TrpnX-M_at 2.852894 2.807241 2.730749 2.821017 2.845791
## GSM1551396 GSM1551397 GSM1551398 GSM1551399 GSM1551400
## AFFX-TrpnX-5_at 2.083383 2.031737 2.077320 2.083653 2.096689
## AFFX-TrpnX-M_at 2.875887 2.800260 2.858835 2.867768 2.885912
## GSM1551401 GSM1551402 GSM1551403 GSM1551404 GSM1551405
## AFFX-TrpnX-5_at 2.051337 2.027336 2.021793 2.065374 2.062607
## AFFX-TrpnX-M_at 2.819871 2.808389 2.806920 2.849277 2.842079
## GSM1551406 GSM1551407 GSM1551408 GSM1551409 GSM1551410
## AFFX-TrpnX-5_at 2.049523 2.028255 2.093488 2.127730 2.265507
## AFFX-TrpnX-M_at 2.830729 2.803406 2.885711 2.965802 2.828443
## GSM1551411 GSM1551412 GSM1551413 GSM1551414 GSM1551415
## AFFX-TrpnX-5_at 2.256503 2.288284 2.323386 2.226686 2.397474
## AFFX-TrpnX-M_at 2.823184 2.870122 2.934165 2.802258 3.087777
## GSM1551416 GSM1551417 GSM1551418 GSM1551419 GSM1551420
## AFFX-TrpnX-5_at 2.284088 1.987205 2.242136 2.144309 2.128561
## AFFX-TrpnX-M_at 2.887137 2.744408 2.785562 2.950031 3.027681
## GSM1551421 GSM1551422 GSM1551423 GSM1551424 GSM1551425
## AFFX-TrpnX-5_at 2.098702 2.028799 2.039577 1.951295 2.091656
## AFFX-TrpnX-M_at 2.891465 2.788220 2.816792 2.715121 2.867521
## GSM1551426 GSM1551427 GSM1551428 GSM1551429 GSM1551430
## AFFX-TrpnX-5_at 2.010785 1.988419 2.047721 2.015520 2.042050
## AFFX-TrpnX-M_at 2.764998 2.735155 2.825817 2.782165 2.818893
## GSM1551431 GSM1551432 GSM1551433 GSM1551434 GSM1551435
## AFFX-TrpnX-5_at 2.055794 2.024184 2.037537 2.056132 2.124954
## AFFX-TrpnX-M_at 2.836085 2.794922 2.810972 2.839293 2.957042
## GSM1551436 GSM1551437 GSM1551438
## AFFX-TrpnX-5_at 2.057252 2.208235 2.056223
## AFFX-TrpnX-M_at 2.840030 2.764710 2.840426
#Rowns and colums number
dim(exprs(gset))
## [1] 54675 128
# Column's names
colnames(exprs(gset))
## [1] "GSM1551311" "GSM1551312" "GSM1551313" "GSM1551314" "GSM1551315"
## [6] "GSM1551316" "GSM1551317" "GSM1551318" "GSM1551319" "GSM1551320"
## [11] "GSM1551321" "GSM1551322" "GSM1551323" "GSM1551324" "GSM1551325"
## [16] "GSM1551326" "GSM1551327" "GSM1551328" "GSM1551329" "GSM1551330"
## [21] "GSM1551331" "GSM1551332" "GSM1551333" "GSM1551334" "GSM1551335"
## [26] "GSM1551336" "GSM1551337" "GSM1551338" "GSM1551339" "GSM1551340"
## [31] "GSM1551341" "GSM1551342" "GSM1551343" "GSM1551344" "GSM1551345"
## [36] "GSM1551346" "GSM1551347" "GSM1551348" "GSM1551349" "GSM1551350"
## [41] "GSM1551351" "GSM1551352" "GSM1551353" "GSM1551354" "GSM1551355"
## [46] "GSM1551356" "GSM1551357" "GSM1551358" "GSM1551359" "GSM1551360"
## [51] "GSM1551361" "GSM1551362" "GSM1551363" "GSM1551364" "GSM1551365"
## [56] "GSM1551366" "GSM1551367" "GSM1551368" "GSM1551369" "GSM1551370"
## [61] "GSM1551371" "GSM1551372" "GSM1551373" "GSM1551374" "GSM1551375"
## [66] "GSM1551376" "GSM1551377" "GSM1551378" "GSM1551379" "GSM1551380"
## [71] "GSM1551381" "GSM1551382" "GSM1551383" "GSM1551384" "GSM1551385"
## [76] "GSM1551386" "GSM1551387" "GSM1551388" "GSM1551389" "GSM1551390"
## [81] "GSM1551391" "GSM1551392" "GSM1551393" "GSM1551394" "GSM1551395"
## [86] "GSM1551396" "GSM1551397" "GSM1551398" "GSM1551399" "GSM1551400"
## [91] "GSM1551401" "GSM1551402" "GSM1551403" "GSM1551404" "GSM1551405"
## [96] "GSM1551406" "GSM1551407" "GSM1551408" "GSM1551409" "GSM1551410"
## [101] "GSM1551411" "GSM1551412" "GSM1551413" "GSM1551414" "GSM1551415"
## [106] "GSM1551416" "GSM1551417" "GSM1551418" "GSM1551419" "GSM1551420"
## [111] "GSM1551421" "GSM1551422" "GSM1551423" "GSM1551424" "GSM1551425"
## [116] "GSM1551426" "GSM1551427" "GSM1551428" "GSM1551429" "GSM1551430"
## [121] "GSM1551431" "GSM1551432" "GSM1551433" "GSM1551434" "GSM1551435"
## [126] "GSM1551436" "GSM1551437" "GSM1551438"
#The colnames start with GSM (corresponds to samples)
In the dataset, the rows represent probeset IDs from a microarray (e.g., 1007_s_at), while the columns represent samples identified by “GSM” (Gene Expression Sample). Each value indicates the gene expression level for a specific probe in a given sample.
Histogram of gene expression
hist(exprs(gset), main="Gene Expression Distribution ", xlab="Expression Value", col="pink", breaks=50)
The histogram suggests a left-skewed distribution. This is quite common in gene expression data. That is, in this case, most genes have low expression levels, indicating that only a small subset has high expression.
Boxplot
boxplot(exprs(gset), main="Gene Expression", xlab="Samples", ylab="Expression Value", col="green")