Data

Variance stabilized transformed count matrix

# Set eval=TRUE if including this chunk, or eval=FALSE if importing a dds object instead.

###### Starting from vst matrix ######
vsd <- read.csv("Cohort_2/Output/VST_NormalizedCounts.csv")

## Examine imported object
head(vsd)
##   X           probe_id      CF49      CF50      CF51      CF52      CF53
## 1 1 ENSCAFG00000000001  7.906532  6.617232  7.617076  7.890763  6.302125
## 2 2 ENSCAFG00000000002  6.069983  6.069983  6.069983  6.069983  6.354145
## 3 3 ENSCAFG00000000005 10.149570  9.616636  9.421598  9.128067  8.638283
## 4 4 ENSCAFG00000000007 12.302883 12.606684 12.229753 12.514866 11.908550
## 5 5 ENSCAFG00000000008 11.239677 11.015540 11.130514 10.938008 11.060130
## 6 6 ENSCAFG00000000009 10.664014 10.376533 10.436903 10.329840 10.315542
##        CF54      CF55      CF56      CF57      CF58      CF59      CF60
## 1  6.069983  7.255948  6.656713  6.506781  6.439188  6.885819  6.069983
## 2  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## 3  9.874435  8.680629  9.066112  9.628650  9.706799  9.212044  7.253734
## 4 13.698210 11.369653 11.866362 12.589967 12.508566 11.911638 11.948687
## 5 11.441619 12.027678 11.871578 11.541158 10.657025 11.413103 12.287049
## 6  9.838909 11.003307 11.161682 11.482888 10.531962 10.628991 11.670294
##        CF61      CF62      CF63      CF64      CF65      CF66      CF67
## 1  7.937810  7.270780  6.813891  6.663362  6.523483  6.069983  7.262578
## 2  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## 3  9.448410  7.221202  9.599761  8.100556  8.962936  8.661115  8.447280
## 4 12.343393 11.534566 11.800571 12.325817 13.457641 12.844597 12.324050
## 5 11.608763 11.255614 11.291497 11.371317 10.748382 10.478557 10.990132
## 6 11.194791 10.778318 10.975177 10.579215 11.244359 11.234983 10.289722
##        CF68      CF69      CF70      CF71      CF72      CF73      CF74
## 1  6.069983  7.936813  6.885994  6.069983  7.171819  7.752949  8.069364
## 2  6.069983  6.069983  6.299092  6.332749  6.069983  6.069983  6.307165
## 3  9.921965  8.434467  8.803646  9.186141  7.653114  7.915587 10.000596
## 4 12.500282 11.692563 12.396753 11.428941 11.319055 11.643281 11.674064
## 5 11.308369 11.450341 11.497740 11.497117 13.282035 11.564241 12.047934
## 6  9.456060 11.215069 10.913134 11.740898 11.620895 12.322277 11.650971
##        CF75      CF76      CF77      CF78      CF79      CF80      CF81
## 1  6.778055  8.034153  6.721436  7.309751  7.035917  6.606152  6.314528
## 2  6.233973  6.069983  6.069983  6.069983  6.436591  6.069983  6.069983
## 3  7.748478  7.493447  8.586288  8.853412 10.121320  9.205939 10.025599
## 4 12.308521 10.858983 12.482436 12.130080 12.326678 11.779101 11.527460
## 5 11.392009 13.021111 11.671469 11.650315 11.257249 11.565528 12.297920
## 6 11.257563 11.124804 10.817596 11.330506 10.887249 11.822137 11.803835
##        CF82      CF83      CF84      CF85      CF86      CF87      CF88
## 1  6.069983  6.874065  6.069983  7.047977  7.610881  7.608950  7.337694
## 2  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## 3  9.265713  9.592614  8.941048  9.286009 10.055468  8.525787  8.770398
## 4 11.817366 11.812438 11.492866 11.938248 12.519929 12.321418 11.821367
## 5 11.574012 10.968001 11.275541 10.840741 10.970071 11.648745 11.993828
## 6 11.272734 10.782646 11.444129 10.992634 10.192318 10.544203 11.367001
##        CF89      CF90      CF91      CF92      CF93      CF94      CF95
## 1  7.155179  7.208355  7.340067  8.567962  7.634463  6.673600  6.555422
## 2  6.069983  6.069983  6.250014  6.069983  6.069983  6.069983  6.069983
## 3 10.716563  6.796208  9.289473 10.526097  8.235135  8.069943  9.104293
## 4 12.037793 11.672743 11.603152 12.199292 12.706759 11.873757 11.997017
## 5 12.483962 11.989356 12.251026 11.592604 12.356519 11.817214 10.863704
## 6 12.002734 12.129584 11.074879 11.355022 11.382067 11.295301 12.114427
##       CF148     CF149     CF150     CF151      CF96      CF97      CF98
## 1  6.069983  6.069983  6.325610  6.388313  7.169693  6.662651  6.767737
## 2  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## 3  7.165662  6.519490  6.325610  6.722006  7.987561 10.349662  8.066896
## 4 11.960332 12.126935 12.073773 11.849331 11.955049 11.931360 11.852032
## 5 11.137408 10.921102 10.857520 11.117001 10.913044 11.819658 11.697680
## 6 11.514445 11.556279 11.801071 11.577687  9.779444 11.357057 11.383614
##        CF99     CF100     CF101     CF102     CF103     CF104     CF105
## 1  8.001018  6.751677  6.840252  6.765973  6.460200  6.340039  7.693329
## 2  6.069983  6.299093  6.069983  6.069983  6.069983  6.069983  6.069983
## 3  8.846445  8.464938  7.897490 10.437177  8.441312  9.459443  9.143636
## 4 11.590153 11.840469 11.542747 12.436833 11.905727 12.830682 12.002903
## 5 11.489211 11.360485 11.464319 11.118663 12.794323 11.196567 11.161310
## 6 11.452354 10.788344 10.925602 11.341787 11.863261 11.129677 10.945218
##       CF106     CF107     CF108     CF109     CF110     CF111     CF112
## 1  9.761368  7.228294  7.551717  6.429807  6.830623  6.309063  6.069983
## 2  6.069983  6.454822  6.069983  6.069983  6.341634  6.239135  6.069983
## 3  9.112420  7.887015  8.834084 10.780165 10.120695  9.574994  8.790488
## 4 12.285723 12.711500 11.956268 11.989101 12.951160 12.613002 12.410168
## 5 11.771212 10.402636 11.277997 12.392903 11.090642 10.570852 11.552404
## 6 11.571356 11.579978 10.782302 11.239428 11.066716 10.764561 10.524824
##       CF113     CF114     CF115     CF116     CF117     CF118     CF119
## 1  6.434470  7.224362  6.922114  7.991624  7.099494  7.776803  6.069983
## 2  6.069983  6.475246  6.069983  6.069983  6.069983  6.069983  6.069983
## 3  7.636450  6.707665  8.930464  9.859937  9.064811 10.401333 10.438890
## 4 11.810392 11.827188 11.930788 11.643098 12.120599 12.425708 13.107351
## 5 12.234964 10.278419 12.213070 10.375554 11.564338 11.282105 12.104560
## 6 11.164535 11.795041 11.510824 10.272635 10.885078 10.630473 10.953170
##       CF120     CF121     CF122     CF123     CF124     CF125     CF126
## 1  7.002597  6.822985  6.069983  6.931813  6.943326  6.985192  6.890742
## 2  6.069983  6.310536  6.069983  6.069983  6.069983  6.069983  6.247227
## 3  7.935339  8.528477  9.951851 10.035480  8.824873  8.691197  9.585183
## 4 11.881903 11.751766 12.387066 12.310394 11.655055 12.056017 11.290760
## 5 11.446690 12.641644 10.342916 11.113285 12.597513 11.599610 11.518154
## 6 11.801143 12.142901  9.371901 11.334928 10.866143 10.451714 11.190834
##       CF127     CF128     CF129     CF130     CF131     CF132     CF133
## 1  6.598369  6.985397  7.146328  6.431827  7.196919  6.411003  6.781065
## 2  6.069983  6.069983  6.069983  6.069983  6.248214  6.069983  6.069983
## 3  8.329687  9.025432  8.769345  8.503908  6.754277  8.762044  9.396858
## 4 12.172411 11.950570 11.849068 12.057176 11.212248 12.094444 12.927278
## 5 11.259576 11.107704 11.531878 11.861825 12.003647 11.661556 11.332390
## 6 11.100889 11.339763 11.503667 10.059185 11.690126 11.960210 11.545935
##       CF134     CF135     CF136     CF137     CF138     CF139     CF140
## 1  6.069983  7.762786  6.891900  6.655907  6.426409  7.244571  7.206824
## 2  6.069983  6.069983  6.320849  6.069983  6.069983  6.069983  6.288277
## 3  9.210216  8.155657  9.094432  7.666343  7.992710  8.530245  8.783995
## 4 11.599278 11.066029 12.004014 11.801109 11.719139 11.247372 12.401414
## 5 12.132255 10.815768 12.797621 11.241331 11.034881 11.985056 11.086234
## 6 11.668321  9.508205 12.024263 11.440035 11.429438 10.987834 11.198433
##       CF141     CF142     CF143     CF144     CF145     CF146     CF147
## 1  7.031967  7.058759  6.069983  6.069983  6.069983  6.069983  6.069983
## 2  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## 3  9.810795  8.816147  9.518549  9.512752  6.638869  6.798964  6.659646
## 4 11.975556 11.992268 11.882635 12.210970 12.383733 12.174482 12.151276
## 5 11.002826 11.861749 11.737576 10.812810 10.859427 10.969300 11.000524
## 6  9.749705 11.744615 10.524707 10.711494 11.166501 11.024332 11.244675
##            gene_name    description
## 1              ENPP1 protein_coding
## 2 ENSCAFG00000000002 protein_coding
## 3             PARD6G protein_coding
## 4              ADNP2 protein_coding
## 5             TXNL4A protein_coding
## 6            SLC66A2 protein_coding
colnames(vsd)
##   [1] "X"           "probe_id"    "CF49"        "CF50"        "CF51"       
##   [6] "CF52"        "CF53"        "CF54"        "CF55"        "CF56"       
##  [11] "CF57"        "CF58"        "CF59"        "CF60"        "CF61"       
##  [16] "CF62"        "CF63"        "CF64"        "CF65"        "CF66"       
##  [21] "CF67"        "CF68"        "CF69"        "CF70"        "CF71"       
##  [26] "CF72"        "CF73"        "CF74"        "CF75"        "CF76"       
##  [31] "CF77"        "CF78"        "CF79"        "CF80"        "CF81"       
##  [36] "CF82"        "CF83"        "CF84"        "CF85"        "CF86"       
##  [41] "CF87"        "CF88"        "CF89"        "CF90"        "CF91"       
##  [46] "CF92"        "CF93"        "CF94"        "CF95"        "CF148"      
##  [51] "CF149"       "CF150"       "CF151"       "CF96"        "CF97"       
##  [56] "CF98"        "CF99"        "CF100"       "CF101"       "CF102"      
##  [61] "CF103"       "CF104"       "CF105"       "CF106"       "CF107"      
##  [66] "CF108"       "CF109"       "CF110"       "CF111"       "CF112"      
##  [71] "CF113"       "CF114"       "CF115"       "CF116"       "CF117"      
##  [76] "CF118"       "CF119"       "CF120"       "CF121"       "CF122"      
##  [81] "CF123"       "CF124"       "CF125"       "CF126"       "CF127"      
##  [86] "CF128"       "CF129"       "CF130"       "CF131"       "CF132"      
##  [91] "CF133"       "CF134"       "CF135"       "CF136"       "CF137"      
##  [96] "CF138"       "CF139"       "CF140"       "CF141"       "CF142"      
## [101] "CF143"       "CF144"       "CF145"       "CF146"       "CF147"      
## [106] "gene_name"   "description"
## Make gene names the row names, using the name of the column in your vsd matrix containing the gene names (as determined above with colnames(vsd))
rownames(vsd) <- make.unique(as.character(vsd$gene_name)) # Make gene symbols the rownames; make.unique accounts for duplicate gene names since R won't allow duplicate rownames

## Remove any columns other than sample count data
vsd <- dplyr::select(vsd, -c("X", "probe_id", "gene_name", "description")) # modify these names based on colnames(vsd); exclude any columns that are not sample IDs

## Examine final object
head(vsd) #make sure columns are sample names and rows are gene symbols.
##                         CF49      CF50      CF51      CF52      CF53      CF54
## ENPP1               7.906532  6.617232  7.617076  7.890763  6.302125  6.069983
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.069983  6.354145  6.069983
## PARD6G             10.149570  9.616636  9.421598  9.128067  8.638283  9.874435
## ADNP2              12.302883 12.606684 12.229753 12.514866 11.908550 13.698210
## TXNL4A             11.239677 11.015540 11.130514 10.938008 11.060130 11.441619
## SLC66A2            10.664014 10.376533 10.436903 10.329840 10.315542  9.838909
##                         CF55      CF56      CF57      CF58      CF59      CF60
## ENPP1               7.255948  6.656713  6.506781  6.439188  6.885819  6.069983
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## PARD6G              8.680629  9.066112  9.628650  9.706799  9.212044  7.253734
## ADNP2              11.369653 11.866362 12.589967 12.508566 11.911638 11.948687
## TXNL4A             12.027678 11.871578 11.541158 10.657025 11.413103 12.287049
## SLC66A2            11.003307 11.161682 11.482888 10.531962 10.628991 11.670294
##                         CF61      CF62      CF63      CF64      CF65      CF66
## ENPP1               7.937810  7.270780  6.813891  6.663362  6.523483  6.069983
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## PARD6G              9.448410  7.221202  9.599761  8.100556  8.962936  8.661115
## ADNP2              12.343393 11.534566 11.800571 12.325817 13.457641 12.844597
## TXNL4A             11.608763 11.255614 11.291497 11.371317 10.748382 10.478557
## SLC66A2            11.194791 10.778318 10.975177 10.579215 11.244359 11.234983
##                         CF67      CF68      CF69      CF70      CF71      CF72
## ENPP1               7.262578  6.069983  7.936813  6.885994  6.069983  7.171819
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.299092  6.332749  6.069983
## PARD6G              8.447280  9.921965  8.434467  8.803646  9.186141  7.653114
## ADNP2              12.324050 12.500282 11.692563 12.396753 11.428941 11.319055
## TXNL4A             10.990132 11.308369 11.450341 11.497740 11.497117 13.282035
## SLC66A2            10.289722  9.456060 11.215069 10.913134 11.740898 11.620895
##                         CF73      CF74      CF75      CF76      CF77      CF78
## ENPP1               7.752949  8.069364  6.778055  8.034153  6.721436  7.309751
## ENSCAFG00000000002  6.069983  6.307165  6.233973  6.069983  6.069983  6.069983
## PARD6G              7.915587 10.000596  7.748478  7.493447  8.586288  8.853412
## ADNP2              11.643281 11.674064 12.308521 10.858983 12.482436 12.130080
## TXNL4A             11.564241 12.047934 11.392009 13.021111 11.671469 11.650315
## SLC66A2            12.322277 11.650971 11.257563 11.124804 10.817596 11.330506
##                         CF79      CF80      CF81      CF82      CF83      CF84
## ENPP1               7.035917  6.606152  6.314528  6.069983  6.874065  6.069983
## ENSCAFG00000000002  6.436591  6.069983  6.069983  6.069983  6.069983  6.069983
## PARD6G             10.121320  9.205939 10.025599  9.265713  9.592614  8.941048
## ADNP2              12.326678 11.779101 11.527460 11.817366 11.812438 11.492866
## TXNL4A             11.257249 11.565528 12.297920 11.574012 10.968001 11.275541
## SLC66A2            10.887249 11.822137 11.803835 11.272734 10.782646 11.444129
##                         CF85      CF86      CF87      CF88      CF89      CF90
## ENPP1               7.047977  7.610881  7.608950  7.337694  7.155179  7.208355
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## PARD6G              9.286009 10.055468  8.525787  8.770398 10.716563  6.796208
## ADNP2              11.938248 12.519929 12.321418 11.821367 12.037793 11.672743
## TXNL4A             10.840741 10.970071 11.648745 11.993828 12.483962 11.989356
## SLC66A2            10.992634 10.192318 10.544203 11.367001 12.002734 12.129584
##                         CF91      CF92      CF93      CF94      CF95     CF148
## ENPP1               7.340067  8.567962  7.634463  6.673600  6.555422  6.069983
## ENSCAFG00000000002  6.250014  6.069983  6.069983  6.069983  6.069983  6.069983
## PARD6G              9.289473 10.526097  8.235135  8.069943  9.104293  7.165662
## ADNP2              11.603152 12.199292 12.706759 11.873757 11.997017 11.960332
## TXNL4A             12.251026 11.592604 12.356519 11.817214 10.863704 11.137408
## SLC66A2            11.074879 11.355022 11.382067 11.295301 12.114427 11.514445
##                        CF149     CF150     CF151      CF96      CF97      CF98
## ENPP1               6.069983  6.325610  6.388313  7.169693  6.662651  6.767737
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## PARD6G              6.519490  6.325610  6.722006  7.987561 10.349662  8.066896
## ADNP2              12.126935 12.073773 11.849331 11.955049 11.931360 11.852032
## TXNL4A             10.921102 10.857520 11.117001 10.913044 11.819658 11.697680
## SLC66A2            11.556279 11.801071 11.577687  9.779444 11.357057 11.383614
##                         CF99     CF100     CF101     CF102     CF103     CF104
## ENPP1               8.001018  6.751677  6.840252  6.765973  6.460200  6.340039
## ENSCAFG00000000002  6.069983  6.299093  6.069983  6.069983  6.069983  6.069983
## PARD6G              8.846445  8.464938  7.897490 10.437177  8.441312  9.459443
## ADNP2              11.590153 11.840469 11.542747 12.436833 11.905727 12.830682
## TXNL4A             11.489211 11.360485 11.464319 11.118663 12.794323 11.196567
## SLC66A2            11.452354 10.788344 10.925602 11.341787 11.863261 11.129677
##                        CF105     CF106     CF107     CF108     CF109     CF110
## ENPP1               7.693329  9.761368  7.228294  7.551717  6.429807  6.830623
## ENSCAFG00000000002  6.069983  6.069983  6.454822  6.069983  6.069983  6.341634
## PARD6G              9.143636  9.112420  7.887015  8.834084 10.780165 10.120695
## ADNP2              12.002903 12.285723 12.711500 11.956268 11.989101 12.951160
## TXNL4A             11.161310 11.771212 10.402636 11.277997 12.392903 11.090642
## SLC66A2            10.945218 11.571356 11.579978 10.782302 11.239428 11.066716
##                        CF111     CF112     CF113     CF114     CF115     CF116
## ENPP1               6.309063  6.069983  6.434470  7.224362  6.922114  7.991624
## ENSCAFG00000000002  6.239135  6.069983  6.069983  6.475246  6.069983  6.069983
## PARD6G              9.574994  8.790488  7.636450  6.707665  8.930464  9.859937
## ADNP2              12.613002 12.410168 11.810392 11.827188 11.930788 11.643098
## TXNL4A             10.570852 11.552404 12.234964 10.278419 12.213070 10.375554
## SLC66A2            10.764561 10.524824 11.164535 11.795041 11.510824 10.272635
##                        CF117     CF118     CF119     CF120     CF121     CF122
## ENPP1               7.099494  7.776803  6.069983  7.002597  6.822985  6.069983
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.069983  6.310536  6.069983
## PARD6G              9.064811 10.401333 10.438890  7.935339  8.528477  9.951851
## ADNP2              12.120599 12.425708 13.107351 11.881903 11.751766 12.387066
## TXNL4A             11.564338 11.282105 12.104560 11.446690 12.641644 10.342916
## SLC66A2            10.885078 10.630473 10.953170 11.801143 12.142901  9.371901
##                        CF123     CF124     CF125     CF126     CF127     CF128
## ENPP1               6.931813  6.943326  6.985192  6.890742  6.598369  6.985397
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.247227  6.069983  6.069983
## PARD6G             10.035480  8.824873  8.691197  9.585183  8.329687  9.025432
## ADNP2              12.310394 11.655055 12.056017 11.290760 12.172411 11.950570
## TXNL4A             11.113285 12.597513 11.599610 11.518154 11.259576 11.107704
## SLC66A2            11.334928 10.866143 10.451714 11.190834 11.100889 11.339763
##                        CF129     CF130     CF131     CF132     CF133     CF134
## ENPP1               7.146328  6.431827  7.196919  6.411003  6.781065  6.069983
## ENSCAFG00000000002  6.069983  6.069983  6.248214  6.069983  6.069983  6.069983
## PARD6G              8.769345  8.503908  6.754277  8.762044  9.396858  9.210216
## ADNP2              11.849068 12.057176 11.212248 12.094444 12.927278 11.599278
## TXNL4A             11.531878 11.861825 12.003647 11.661556 11.332390 12.132255
## SLC66A2            11.503667 10.059185 11.690126 11.960210 11.545935 11.668321
##                        CF135     CF136     CF137     CF138     CF139     CF140
## ENPP1               7.762786  6.891900  6.655907  6.426409  7.244571  7.206824
## ENSCAFG00000000002  6.069983  6.320849  6.069983  6.069983  6.069983  6.288277
## PARD6G              8.155657  9.094432  7.666343  7.992710  8.530245  8.783995
## ADNP2              11.066029 12.004014 11.801109 11.719139 11.247372 12.401414
## TXNL4A             10.815768 12.797621 11.241331 11.034881 11.985056 11.086234
## SLC66A2             9.508205 12.024263 11.440035 11.429438 10.987834 11.198433
##                        CF141     CF142     CF143     CF144     CF145     CF146
## ENPP1               7.031967  7.058759  6.069983  6.069983  6.069983  6.069983
## ENSCAFG00000000002  6.069983  6.069983  6.069983  6.069983  6.069983  6.069983
## PARD6G              9.810795  8.816147  9.518549  9.512752  6.638869  6.798964
## ADNP2              11.975556 11.992268 11.882635 12.210970 12.383733 12.174482
## TXNL4A             11.002826 11.861749 11.737576 10.812810 10.859427 10.969300
## SLC66A2             9.749705 11.744615 10.524707 10.711494 11.166501 11.024332
##                        CF147
## ENPP1               6.069983
## ENSCAFG00000000002  6.069983
## PARD6G              6.659646
## ADNP2              12.151276
## TXNL4A             11.000524
## SLC66A2            11.244675

Metadata

# Import metadata file
metadata <- read.csv(file = "Cohort_2/Input/mutation_metadata.csv")

rownames(metadata) <- metadata$sample_name # Make the sample ID column the rownames; change to reflect the name of the column with sample IDs in your metadata file
#metadata <- dplyr::select(metadata, c("phenotype", "etc")) # Select the columns in your metadata file you want annotated on the GSVA heatmap
metadata <- metadata[, !names(metadata) %in% c("X", "sample_name", "avery_num")]
head(metadata) # Make sure samples are rows and classifiers are columns
##      phenotype DNMT3A_mut KMT2D_mut BCL6_mut TP63_mut MYC_mut BCL11B_mut
## CF49  CD4_PTCL    PRESENT   PRESENT  PRESENT  PRESENT  ABSENT     ABSENT
## CF50  CD4_PTCL     ABSENT    ABSENT   ABSENT   ABSENT PRESENT    PRESENT
## CF51  CD4_PTCL     ABSENT    ABSENT   ABSENT   ABSENT  ABSENT    PRESENT
## CF52  CD4_PTCL     ABSENT   PRESENT   ABSENT   ABSENT  ABSENT    PRESENT
## CF53  CD4_PTCL     ABSENT   PRESENT   ABSENT   ABSENT PRESENT     ABSENT
## CF54  CD4_PTCL     ABSENT   PRESENT   ABSENT   ABSENT  ABSENT     ABSENT
##      TP53_mut ITPR3_mut PTEN_mut CD244_mut STAT3_mut TET2_mut TSC2_mut
## CF49   ABSENT    ABSENT   ABSENT    ABSENT    ABSENT   ABSENT   ABSENT
## CF50   ABSENT    ABSENT   ABSENT    ABSENT    ABSENT   ABSENT   ABSENT
## CF51  PRESENT    ABSENT   ABSENT    ABSENT    ABSENT   ABSENT   ABSENT
## CF52   ABSENT   PRESENT   ABSENT    ABSENT    ABSENT   ABSENT   ABSENT
## CF53   ABSENT    ABSENT  PRESENT    ABSENT    ABSENT   ABSENT   ABSENT
## CF54   ABSENT   PRESENT   ABSENT    ABSENT    ABSENT   ABSENT   ABSENT
##      FASLG_mut PIK3R1_mut
## CF49    ABSENT     ABSENT
## CF50    ABSENT     ABSENT
## CF51    ABSENT     ABSENT
## CF52    ABSENT     ABSENT
## CF53    ABSENT     ABSENT
## CF54    ABSENT     ABSENT

Subset CD4 PTCLs

To include only samples of a certain group or phenotype from your metadata file, edit and run this code chunk.

# Set eval=TRUE if including this chunk
## Extract list of samples belonging to a particular group using metadata
keepGroups <- c("CD4_PTCL") # Define groups to keep (must match metadata file)

# Keep only rows and columns with samples in these groups
metadata <- metadata %>%
  filter(phenotype %in% keepGroups)

# Extract the list of sample IDs remaining
keepList <- row.names(metadata)

## Subset vst data to include only the samples in this list
vsd <- vsd[,keepList]

TP53 and BCL11B mutations

Unsupervised clustering of the top 2000 genes (by median absolute derivation)

# Calculate the median absolute derivation for all rows in the vst transformed data. Note that the "1" in the 'apply' function indicates that the manipulation is performed on rows.
median_absolute_derivation = apply(vsd,1,mad)

# check data distribution
hist(median_absolute_derivation, ylim=c(0,200), breaks=nrow(vsd)*0.1)

# index the vst transformed count data to include only those rows (genes) that appeared in the top 2000 based on median absolute derivation.
mad2k=vsd[rev(order(median_absolute_derivation))[1:2000],]
metadata_sub <- dplyr::select(metadata, c("BCL11B_mut", "TP53_mut"))

ann_colors = list(
  BCL11B_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"),
  TP53_mut = c("ABSENT" = "gray", "PRESENT" = "limegreen"))

# Draw heatmap
mad_heatmap <- pheatmap(mad2k, 
              scale="row",
              color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
              cluster_rows=TRUE,
              cluster_cols=TRUE,
              cutree_rows = 3, # how many clusters to cut the heatmap into by row
              cutree_cols = 3, # how many clusters to cut the heatmap into by column
              main = "Hierarchical Clustering by TP53 and BCL11B Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts for the top 2000 genes by median absolute derivation, \n Clustering: Ward, Distance: Euclidean",
              clustering_distance_rows = "euclidean",
              clustering_distance_cols = "euclidean",
              clustering_method = "ward.D2",
              annotation_col = metadata_sub,
              annotation_colors = ann_colors,
              show_rownames = FALSE) # Change to TRUE if gene symbols should be annotated on the heatmap.

## Clustering based on expression of GATA3-PTCL and TBX21-PTCL gene signatures

geneList <- c("TBX21", "IFNG", "STAT1", "CSF2", "CCL3", "EOMES", "IL2RB", "CXCR3", "CD28", "AXL", "CD40", "CD59", "FTL", "LILRB1", "SIRPA",
              "GATA3", "EGR1", "SEPTIN6", "CAT", "IL18R1", "IK", "ACKR3", "CCR4", "MSH6")
geneGroups <- c("TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL",
                "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL")
geneGroup <- data.frame(Group = geneGroups)
rownames(geneGroup) <- geneList

# subset vst data to include only those genes
vsd_geneList <- vsd %>%
  filter(rownames(vsd) %in% geneList)
ann_colors = list(
  BCL11B_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"),
  TP53_mut = c("ABSENT" = "gray", "PRESENT" = "limegreen"),
  Group = c("TBX21-PTCL" = "steelblue", "GATA3-PTCL" = "red4"))

vst_heatmap <- pheatmap(vsd_geneList,
                        scale="row", 
                        color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
                        cluster_rows=TRUE, 
                        cluster_cols=TRUE,
                        cutree_rows = 2,
                        cutree_cols = 2,
                        clustering_distance_rows = "euclidean",
                        clustering_distance_cols = "euclidean",
                        clustering_method = "ward.D2",
                        main = "Hierarchical Clustering of TBX21-PTCL and GATA3-PTCL Gene Signatures by TP53 and BCL11B Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts, Clustering: Ward, Distance: Euclidean",
                        annotation_col = metadata_sub,
                        annotation_colors = ann_colors,
                       annotation_row = geneGroup,
                       show_rownames = TRUE) # show gene names on the heatmap

DNMT3A

Unsupervised clustering of the top 2000 genes (by median absolute derivation)

metadata_sub <- dplyr::select(metadata, c("DNMT3A_mut"))

ann_colors = list(
  DNMT3A_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"))

# Draw heatmap
mad_heatmap <- pheatmap(mad2k, 
              scale="row",
              color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
              cluster_rows=TRUE,
              cluster_cols=TRUE,
              cutree_rows = 3, # how many clusters to cut the heatmap into by row
              cutree_cols = 3, # how many clusters to cut the heatmap into by column
              main = "Hierarchical Clustering by DNMT3A Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts for the top 2000 genes by median absolute derivation, \n Clustering: Ward, Distance: Euclidean",
              clustering_distance_rows = "euclidean",
              clustering_distance_cols = "euclidean",
              clustering_method = "ward.D2",
              annotation_col = metadata_sub,
              annotation_colors = ann_colors,
              show_rownames = FALSE) # Change to TRUE if gene symbols should be annotated on the heatmap.

Clustering based on expression of GATA3-PTCL and TBX21-PTCL gene signatures

ann_colors = list(
  DNMT3A_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"),
  Group = c("TBX21-PTCL" = "steelblue", "GATA3-PTCL" = "red4"))

vst_heatmap <- pheatmap(vsd_geneList,
                        scale="row", 
                        color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
                        cluster_rows=TRUE, 
                        cluster_cols=TRUE,
                        cutree_rows = 2,
                        cutree_cols = 2,
                        clustering_distance_rows = "euclidean",
                        clustering_distance_cols = "euclidean",
                        clustering_method = "ward.D2",
                        main = "Hierarchical Clustering of TBX21-PTCL and GATA3-PTCL Gene Signatures by DNMT3A Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts, Clustering: Ward, Distance: Euclidean",
                        annotation_col = metadata_sub,
                        annotation_colors = ann_colors,
                       annotation_row = geneGroup,
                       show_rownames = TRUE) # show gene names on the heatmap

KMT2D

Unsupervised clustering of the top 2000 genes (by median absolute derivation)

metadata_sub <- dplyr::select(metadata, c("KMT2D_mut"))

ann_colors = list(
  KMT2D_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"))

# Draw heatmap
mad_heatmap <- pheatmap(mad2k, 
              scale="row",
              color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
              cluster_rows=TRUE,
              cluster_cols=TRUE,
              cutree_rows = 3, # how many clusters to cut the heatmap into by row
              cutree_cols = 3, # how many clusters to cut the heatmap into by column
              main = "Hierarchical Clustering by KMT2D Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts for the top 2000 genes by median absolute derivation, \n Clustering: Ward, Distance: Euclidean",
              clustering_distance_rows = "euclidean",
              clustering_distance_cols = "euclidean",
              clustering_method = "ward.D2",
              annotation_col = metadata_sub,
              annotation_colors = ann_colors,
              show_rownames = FALSE) # Change to TRUE if gene symbols should be annotated on the heatmap.

Clustering based on expression of GATA3-PTCL and TBX21-PTCL gene signatures

ann_colors = list(
  KMT2D_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"),
  Group = c("TBX21-PTCL" = "steelblue", "GATA3-PTCL" = "red4"))

vst_heatmap <- pheatmap(vsd_geneList,
                        scale="row", 
                        color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
                        cluster_rows=TRUE, 
                        cluster_cols=TRUE,
                        cutree_rows = 2,
                        cutree_cols = 2,
                        clustering_distance_rows = "euclidean",
                        clustering_distance_cols = "euclidean",
                        clustering_method = "ward.D2",
                        main = "Hierarchical Clustering of TBX21-PTCL and GATA3-PTCL Gene Signatures by KMT2D Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts, Clustering: Ward, Distance: Euclidean",
                        annotation_col = metadata_sub,
                        annotation_colors = ann_colors,
                       annotation_row = geneGroup,
                       show_rownames = TRUE) # show gene names on the heatmap

Citations

sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Denver
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggplot2_3.5.1               knitr_1.49                 
##  [3] readr_2.1.5                 dplyr_1.1.4                
##  [5] pheatmap_1.0.12             RColorBrewer_1.1-3         
##  [7] DESeq2_1.44.0               SummarizedExperiment_1.34.0
##  [9] Biobase_2.64.0              MatrixGenerics_1.16.0      
## [11] matrixStats_1.4.1           GenomicRanges_1.56.2       
## [13] GenomeInfoDb_1.40.1         IRanges_2.38.1             
## [15] S4Vectors_0.42.1            BiocGenerics_0.50.0        
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6            xfun_0.49               bslib_0.8.0            
##  [4] lattice_0.22-6          tzdb_0.4.0              vctrs_0.6.5            
##  [7] tools_4.4.0             generics_0.1.3          parallel_4.4.0         
## [10] tibble_3.2.1            pkgconfig_2.0.3         Matrix_1.7-0           
## [13] lifecycle_1.0.4         GenomeInfoDbData_1.2.12 farver_2.1.2           
## [16] compiler_4.4.0          munsell_0.5.1           codetools_0.2-20       
## [19] htmltools_0.5.8.1       sass_0.4.9              yaml_2.3.10            
## [22] pillar_1.10.1           crayon_1.5.3            jquerylib_0.1.4        
## [25] BiocParallel_1.38.0     DelayedArray_0.30.1     cachem_1.1.0           
## [28] abind_1.4-8             tidyselect_1.2.1        locfit_1.5-9.10        
## [31] digest_0.6.35           fastmap_1.2.0           grid_4.4.0             
## [34] colorspace_2.1-1        cli_3.6.2               SparseArray_1.4.8      
## [37] magrittr_2.0.3          S4Arrays_1.4.1          withr_3.0.2            
## [40] scales_1.3.0            UCSC.utils_1.0.0        rmarkdown_2.29         
## [43] XVector_0.44.0          httr_1.4.7              hms_1.1.3              
## [46] evaluate_1.0.3          rlang_1.1.3             Rcpp_1.0.13            
## [49] glue_1.7.0              rstudioapi_0.17.1       jsonlite_1.8.9         
## [52] R6_2.5.1                zlibbioc_1.50.0
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.