# Set eval=TRUE if including this chunk, or eval=FALSE if importing a dds object instead.
###### Starting from vst matrix ######
vsd <- read.csv("Cohort_2/Output/VST_NormalizedCounts.csv")
## Examine imported object
head(vsd)
## X probe_id CF49 CF50 CF51 CF52 CF53
## 1 1 ENSCAFG00000000001 7.906532 6.617232 7.617076 7.890763 6.302125
## 2 2 ENSCAFG00000000002 6.069983 6.069983 6.069983 6.069983 6.354145
## 3 3 ENSCAFG00000000005 10.149570 9.616636 9.421598 9.128067 8.638283
## 4 4 ENSCAFG00000000007 12.302883 12.606684 12.229753 12.514866 11.908550
## 5 5 ENSCAFG00000000008 11.239677 11.015540 11.130514 10.938008 11.060130
## 6 6 ENSCAFG00000000009 10.664014 10.376533 10.436903 10.329840 10.315542
## CF54 CF55 CF56 CF57 CF58 CF59 CF60
## 1 6.069983 7.255948 6.656713 6.506781 6.439188 6.885819 6.069983
## 2 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## 3 9.874435 8.680629 9.066112 9.628650 9.706799 9.212044 7.253734
## 4 13.698210 11.369653 11.866362 12.589967 12.508566 11.911638 11.948687
## 5 11.441619 12.027678 11.871578 11.541158 10.657025 11.413103 12.287049
## 6 9.838909 11.003307 11.161682 11.482888 10.531962 10.628991 11.670294
## CF61 CF62 CF63 CF64 CF65 CF66 CF67
## 1 7.937810 7.270780 6.813891 6.663362 6.523483 6.069983 7.262578
## 2 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## 3 9.448410 7.221202 9.599761 8.100556 8.962936 8.661115 8.447280
## 4 12.343393 11.534566 11.800571 12.325817 13.457641 12.844597 12.324050
## 5 11.608763 11.255614 11.291497 11.371317 10.748382 10.478557 10.990132
## 6 11.194791 10.778318 10.975177 10.579215 11.244359 11.234983 10.289722
## CF68 CF69 CF70 CF71 CF72 CF73 CF74
## 1 6.069983 7.936813 6.885994 6.069983 7.171819 7.752949 8.069364
## 2 6.069983 6.069983 6.299092 6.332749 6.069983 6.069983 6.307165
## 3 9.921965 8.434467 8.803646 9.186141 7.653114 7.915587 10.000596
## 4 12.500282 11.692563 12.396753 11.428941 11.319055 11.643281 11.674064
## 5 11.308369 11.450341 11.497740 11.497117 13.282035 11.564241 12.047934
## 6 9.456060 11.215069 10.913134 11.740898 11.620895 12.322277 11.650971
## CF75 CF76 CF77 CF78 CF79 CF80 CF81
## 1 6.778055 8.034153 6.721436 7.309751 7.035917 6.606152 6.314528
## 2 6.233973 6.069983 6.069983 6.069983 6.436591 6.069983 6.069983
## 3 7.748478 7.493447 8.586288 8.853412 10.121320 9.205939 10.025599
## 4 12.308521 10.858983 12.482436 12.130080 12.326678 11.779101 11.527460
## 5 11.392009 13.021111 11.671469 11.650315 11.257249 11.565528 12.297920
## 6 11.257563 11.124804 10.817596 11.330506 10.887249 11.822137 11.803835
## CF82 CF83 CF84 CF85 CF86 CF87 CF88
## 1 6.069983 6.874065 6.069983 7.047977 7.610881 7.608950 7.337694
## 2 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## 3 9.265713 9.592614 8.941048 9.286009 10.055468 8.525787 8.770398
## 4 11.817366 11.812438 11.492866 11.938248 12.519929 12.321418 11.821367
## 5 11.574012 10.968001 11.275541 10.840741 10.970071 11.648745 11.993828
## 6 11.272734 10.782646 11.444129 10.992634 10.192318 10.544203 11.367001
## CF89 CF90 CF91 CF92 CF93 CF94 CF95
## 1 7.155179 7.208355 7.340067 8.567962 7.634463 6.673600 6.555422
## 2 6.069983 6.069983 6.250014 6.069983 6.069983 6.069983 6.069983
## 3 10.716563 6.796208 9.289473 10.526097 8.235135 8.069943 9.104293
## 4 12.037793 11.672743 11.603152 12.199292 12.706759 11.873757 11.997017
## 5 12.483962 11.989356 12.251026 11.592604 12.356519 11.817214 10.863704
## 6 12.002734 12.129584 11.074879 11.355022 11.382067 11.295301 12.114427
## CF148 CF149 CF150 CF151 CF96 CF97 CF98
## 1 6.069983 6.069983 6.325610 6.388313 7.169693 6.662651 6.767737
## 2 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## 3 7.165662 6.519490 6.325610 6.722006 7.987561 10.349662 8.066896
## 4 11.960332 12.126935 12.073773 11.849331 11.955049 11.931360 11.852032
## 5 11.137408 10.921102 10.857520 11.117001 10.913044 11.819658 11.697680
## 6 11.514445 11.556279 11.801071 11.577687 9.779444 11.357057 11.383614
## CF99 CF100 CF101 CF102 CF103 CF104 CF105
## 1 8.001018 6.751677 6.840252 6.765973 6.460200 6.340039 7.693329
## 2 6.069983 6.299093 6.069983 6.069983 6.069983 6.069983 6.069983
## 3 8.846445 8.464938 7.897490 10.437177 8.441312 9.459443 9.143636
## 4 11.590153 11.840469 11.542747 12.436833 11.905727 12.830682 12.002903
## 5 11.489211 11.360485 11.464319 11.118663 12.794323 11.196567 11.161310
## 6 11.452354 10.788344 10.925602 11.341787 11.863261 11.129677 10.945218
## CF106 CF107 CF108 CF109 CF110 CF111 CF112
## 1 9.761368 7.228294 7.551717 6.429807 6.830623 6.309063 6.069983
## 2 6.069983 6.454822 6.069983 6.069983 6.341634 6.239135 6.069983
## 3 9.112420 7.887015 8.834084 10.780165 10.120695 9.574994 8.790488
## 4 12.285723 12.711500 11.956268 11.989101 12.951160 12.613002 12.410168
## 5 11.771212 10.402636 11.277997 12.392903 11.090642 10.570852 11.552404
## 6 11.571356 11.579978 10.782302 11.239428 11.066716 10.764561 10.524824
## CF113 CF114 CF115 CF116 CF117 CF118 CF119
## 1 6.434470 7.224362 6.922114 7.991624 7.099494 7.776803 6.069983
## 2 6.069983 6.475246 6.069983 6.069983 6.069983 6.069983 6.069983
## 3 7.636450 6.707665 8.930464 9.859937 9.064811 10.401333 10.438890
## 4 11.810392 11.827188 11.930788 11.643098 12.120599 12.425708 13.107351
## 5 12.234964 10.278419 12.213070 10.375554 11.564338 11.282105 12.104560
## 6 11.164535 11.795041 11.510824 10.272635 10.885078 10.630473 10.953170
## CF120 CF121 CF122 CF123 CF124 CF125 CF126
## 1 7.002597 6.822985 6.069983 6.931813 6.943326 6.985192 6.890742
## 2 6.069983 6.310536 6.069983 6.069983 6.069983 6.069983 6.247227
## 3 7.935339 8.528477 9.951851 10.035480 8.824873 8.691197 9.585183
## 4 11.881903 11.751766 12.387066 12.310394 11.655055 12.056017 11.290760
## 5 11.446690 12.641644 10.342916 11.113285 12.597513 11.599610 11.518154
## 6 11.801143 12.142901 9.371901 11.334928 10.866143 10.451714 11.190834
## CF127 CF128 CF129 CF130 CF131 CF132 CF133
## 1 6.598369 6.985397 7.146328 6.431827 7.196919 6.411003 6.781065
## 2 6.069983 6.069983 6.069983 6.069983 6.248214 6.069983 6.069983
## 3 8.329687 9.025432 8.769345 8.503908 6.754277 8.762044 9.396858
## 4 12.172411 11.950570 11.849068 12.057176 11.212248 12.094444 12.927278
## 5 11.259576 11.107704 11.531878 11.861825 12.003647 11.661556 11.332390
## 6 11.100889 11.339763 11.503667 10.059185 11.690126 11.960210 11.545935
## CF134 CF135 CF136 CF137 CF138 CF139 CF140
## 1 6.069983 7.762786 6.891900 6.655907 6.426409 7.244571 7.206824
## 2 6.069983 6.069983 6.320849 6.069983 6.069983 6.069983 6.288277
## 3 9.210216 8.155657 9.094432 7.666343 7.992710 8.530245 8.783995
## 4 11.599278 11.066029 12.004014 11.801109 11.719139 11.247372 12.401414
## 5 12.132255 10.815768 12.797621 11.241331 11.034881 11.985056 11.086234
## 6 11.668321 9.508205 12.024263 11.440035 11.429438 10.987834 11.198433
## CF141 CF142 CF143 CF144 CF145 CF146 CF147
## 1 7.031967 7.058759 6.069983 6.069983 6.069983 6.069983 6.069983
## 2 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## 3 9.810795 8.816147 9.518549 9.512752 6.638869 6.798964 6.659646
## 4 11.975556 11.992268 11.882635 12.210970 12.383733 12.174482 12.151276
## 5 11.002826 11.861749 11.737576 10.812810 10.859427 10.969300 11.000524
## 6 9.749705 11.744615 10.524707 10.711494 11.166501 11.024332 11.244675
## gene_name description
## 1 ENPP1 protein_coding
## 2 ENSCAFG00000000002 protein_coding
## 3 PARD6G protein_coding
## 4 ADNP2 protein_coding
## 5 TXNL4A protein_coding
## 6 SLC66A2 protein_coding
colnames(vsd)
## [1] "X" "probe_id" "CF49" "CF50" "CF51"
## [6] "CF52" "CF53" "CF54" "CF55" "CF56"
## [11] "CF57" "CF58" "CF59" "CF60" "CF61"
## [16] "CF62" "CF63" "CF64" "CF65" "CF66"
## [21] "CF67" "CF68" "CF69" "CF70" "CF71"
## [26] "CF72" "CF73" "CF74" "CF75" "CF76"
## [31] "CF77" "CF78" "CF79" "CF80" "CF81"
## [36] "CF82" "CF83" "CF84" "CF85" "CF86"
## [41] "CF87" "CF88" "CF89" "CF90" "CF91"
## [46] "CF92" "CF93" "CF94" "CF95" "CF148"
## [51] "CF149" "CF150" "CF151" "CF96" "CF97"
## [56] "CF98" "CF99" "CF100" "CF101" "CF102"
## [61] "CF103" "CF104" "CF105" "CF106" "CF107"
## [66] "CF108" "CF109" "CF110" "CF111" "CF112"
## [71] "CF113" "CF114" "CF115" "CF116" "CF117"
## [76] "CF118" "CF119" "CF120" "CF121" "CF122"
## [81] "CF123" "CF124" "CF125" "CF126" "CF127"
## [86] "CF128" "CF129" "CF130" "CF131" "CF132"
## [91] "CF133" "CF134" "CF135" "CF136" "CF137"
## [96] "CF138" "CF139" "CF140" "CF141" "CF142"
## [101] "CF143" "CF144" "CF145" "CF146" "CF147"
## [106] "gene_name" "description"
## Make gene names the row names, using the name of the column in your vsd matrix containing the gene names (as determined above with colnames(vsd))
rownames(vsd) <- make.unique(as.character(vsd$gene_name)) # Make gene symbols the rownames; make.unique accounts for duplicate gene names since R won't allow duplicate rownames
## Remove any columns other than sample count data
vsd <- dplyr::select(vsd, -c("X", "probe_id", "gene_name", "description")) # modify these names based on colnames(vsd); exclude any columns that are not sample IDs
## Examine final object
head(vsd) #make sure columns are sample names and rows are gene symbols.
## CF49 CF50 CF51 CF52 CF53 CF54
## ENPP1 7.906532 6.617232 7.617076 7.890763 6.302125 6.069983
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.069983 6.354145 6.069983
## PARD6G 10.149570 9.616636 9.421598 9.128067 8.638283 9.874435
## ADNP2 12.302883 12.606684 12.229753 12.514866 11.908550 13.698210
## TXNL4A 11.239677 11.015540 11.130514 10.938008 11.060130 11.441619
## SLC66A2 10.664014 10.376533 10.436903 10.329840 10.315542 9.838909
## CF55 CF56 CF57 CF58 CF59 CF60
## ENPP1 7.255948 6.656713 6.506781 6.439188 6.885819 6.069983
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## PARD6G 8.680629 9.066112 9.628650 9.706799 9.212044 7.253734
## ADNP2 11.369653 11.866362 12.589967 12.508566 11.911638 11.948687
## TXNL4A 12.027678 11.871578 11.541158 10.657025 11.413103 12.287049
## SLC66A2 11.003307 11.161682 11.482888 10.531962 10.628991 11.670294
## CF61 CF62 CF63 CF64 CF65 CF66
## ENPP1 7.937810 7.270780 6.813891 6.663362 6.523483 6.069983
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## PARD6G 9.448410 7.221202 9.599761 8.100556 8.962936 8.661115
## ADNP2 12.343393 11.534566 11.800571 12.325817 13.457641 12.844597
## TXNL4A 11.608763 11.255614 11.291497 11.371317 10.748382 10.478557
## SLC66A2 11.194791 10.778318 10.975177 10.579215 11.244359 11.234983
## CF67 CF68 CF69 CF70 CF71 CF72
## ENPP1 7.262578 6.069983 7.936813 6.885994 6.069983 7.171819
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.299092 6.332749 6.069983
## PARD6G 8.447280 9.921965 8.434467 8.803646 9.186141 7.653114
## ADNP2 12.324050 12.500282 11.692563 12.396753 11.428941 11.319055
## TXNL4A 10.990132 11.308369 11.450341 11.497740 11.497117 13.282035
## SLC66A2 10.289722 9.456060 11.215069 10.913134 11.740898 11.620895
## CF73 CF74 CF75 CF76 CF77 CF78
## ENPP1 7.752949 8.069364 6.778055 8.034153 6.721436 7.309751
## ENSCAFG00000000002 6.069983 6.307165 6.233973 6.069983 6.069983 6.069983
## PARD6G 7.915587 10.000596 7.748478 7.493447 8.586288 8.853412
## ADNP2 11.643281 11.674064 12.308521 10.858983 12.482436 12.130080
## TXNL4A 11.564241 12.047934 11.392009 13.021111 11.671469 11.650315
## SLC66A2 12.322277 11.650971 11.257563 11.124804 10.817596 11.330506
## CF79 CF80 CF81 CF82 CF83 CF84
## ENPP1 7.035917 6.606152 6.314528 6.069983 6.874065 6.069983
## ENSCAFG00000000002 6.436591 6.069983 6.069983 6.069983 6.069983 6.069983
## PARD6G 10.121320 9.205939 10.025599 9.265713 9.592614 8.941048
## ADNP2 12.326678 11.779101 11.527460 11.817366 11.812438 11.492866
## TXNL4A 11.257249 11.565528 12.297920 11.574012 10.968001 11.275541
## SLC66A2 10.887249 11.822137 11.803835 11.272734 10.782646 11.444129
## CF85 CF86 CF87 CF88 CF89 CF90
## ENPP1 7.047977 7.610881 7.608950 7.337694 7.155179 7.208355
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## PARD6G 9.286009 10.055468 8.525787 8.770398 10.716563 6.796208
## ADNP2 11.938248 12.519929 12.321418 11.821367 12.037793 11.672743
## TXNL4A 10.840741 10.970071 11.648745 11.993828 12.483962 11.989356
## SLC66A2 10.992634 10.192318 10.544203 11.367001 12.002734 12.129584
## CF91 CF92 CF93 CF94 CF95 CF148
## ENPP1 7.340067 8.567962 7.634463 6.673600 6.555422 6.069983
## ENSCAFG00000000002 6.250014 6.069983 6.069983 6.069983 6.069983 6.069983
## PARD6G 9.289473 10.526097 8.235135 8.069943 9.104293 7.165662
## ADNP2 11.603152 12.199292 12.706759 11.873757 11.997017 11.960332
## TXNL4A 12.251026 11.592604 12.356519 11.817214 10.863704 11.137408
## SLC66A2 11.074879 11.355022 11.382067 11.295301 12.114427 11.514445
## CF149 CF150 CF151 CF96 CF97 CF98
## ENPP1 6.069983 6.325610 6.388313 7.169693 6.662651 6.767737
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## PARD6G 6.519490 6.325610 6.722006 7.987561 10.349662 8.066896
## ADNP2 12.126935 12.073773 11.849331 11.955049 11.931360 11.852032
## TXNL4A 10.921102 10.857520 11.117001 10.913044 11.819658 11.697680
## SLC66A2 11.556279 11.801071 11.577687 9.779444 11.357057 11.383614
## CF99 CF100 CF101 CF102 CF103 CF104
## ENPP1 8.001018 6.751677 6.840252 6.765973 6.460200 6.340039
## ENSCAFG00000000002 6.069983 6.299093 6.069983 6.069983 6.069983 6.069983
## PARD6G 8.846445 8.464938 7.897490 10.437177 8.441312 9.459443
## ADNP2 11.590153 11.840469 11.542747 12.436833 11.905727 12.830682
## TXNL4A 11.489211 11.360485 11.464319 11.118663 12.794323 11.196567
## SLC66A2 11.452354 10.788344 10.925602 11.341787 11.863261 11.129677
## CF105 CF106 CF107 CF108 CF109 CF110
## ENPP1 7.693329 9.761368 7.228294 7.551717 6.429807 6.830623
## ENSCAFG00000000002 6.069983 6.069983 6.454822 6.069983 6.069983 6.341634
## PARD6G 9.143636 9.112420 7.887015 8.834084 10.780165 10.120695
## ADNP2 12.002903 12.285723 12.711500 11.956268 11.989101 12.951160
## TXNL4A 11.161310 11.771212 10.402636 11.277997 12.392903 11.090642
## SLC66A2 10.945218 11.571356 11.579978 10.782302 11.239428 11.066716
## CF111 CF112 CF113 CF114 CF115 CF116
## ENPP1 6.309063 6.069983 6.434470 7.224362 6.922114 7.991624
## ENSCAFG00000000002 6.239135 6.069983 6.069983 6.475246 6.069983 6.069983
## PARD6G 9.574994 8.790488 7.636450 6.707665 8.930464 9.859937
## ADNP2 12.613002 12.410168 11.810392 11.827188 11.930788 11.643098
## TXNL4A 10.570852 11.552404 12.234964 10.278419 12.213070 10.375554
## SLC66A2 10.764561 10.524824 11.164535 11.795041 11.510824 10.272635
## CF117 CF118 CF119 CF120 CF121 CF122
## ENPP1 7.099494 7.776803 6.069983 7.002597 6.822985 6.069983
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.069983 6.310536 6.069983
## PARD6G 9.064811 10.401333 10.438890 7.935339 8.528477 9.951851
## ADNP2 12.120599 12.425708 13.107351 11.881903 11.751766 12.387066
## TXNL4A 11.564338 11.282105 12.104560 11.446690 12.641644 10.342916
## SLC66A2 10.885078 10.630473 10.953170 11.801143 12.142901 9.371901
## CF123 CF124 CF125 CF126 CF127 CF128
## ENPP1 6.931813 6.943326 6.985192 6.890742 6.598369 6.985397
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.247227 6.069983 6.069983
## PARD6G 10.035480 8.824873 8.691197 9.585183 8.329687 9.025432
## ADNP2 12.310394 11.655055 12.056017 11.290760 12.172411 11.950570
## TXNL4A 11.113285 12.597513 11.599610 11.518154 11.259576 11.107704
## SLC66A2 11.334928 10.866143 10.451714 11.190834 11.100889 11.339763
## CF129 CF130 CF131 CF132 CF133 CF134
## ENPP1 7.146328 6.431827 7.196919 6.411003 6.781065 6.069983
## ENSCAFG00000000002 6.069983 6.069983 6.248214 6.069983 6.069983 6.069983
## PARD6G 8.769345 8.503908 6.754277 8.762044 9.396858 9.210216
## ADNP2 11.849068 12.057176 11.212248 12.094444 12.927278 11.599278
## TXNL4A 11.531878 11.861825 12.003647 11.661556 11.332390 12.132255
## SLC66A2 11.503667 10.059185 11.690126 11.960210 11.545935 11.668321
## CF135 CF136 CF137 CF138 CF139 CF140
## ENPP1 7.762786 6.891900 6.655907 6.426409 7.244571 7.206824
## ENSCAFG00000000002 6.069983 6.320849 6.069983 6.069983 6.069983 6.288277
## PARD6G 8.155657 9.094432 7.666343 7.992710 8.530245 8.783995
## ADNP2 11.066029 12.004014 11.801109 11.719139 11.247372 12.401414
## TXNL4A 10.815768 12.797621 11.241331 11.034881 11.985056 11.086234
## SLC66A2 9.508205 12.024263 11.440035 11.429438 10.987834 11.198433
## CF141 CF142 CF143 CF144 CF145 CF146
## ENPP1 7.031967 7.058759 6.069983 6.069983 6.069983 6.069983
## ENSCAFG00000000002 6.069983 6.069983 6.069983 6.069983 6.069983 6.069983
## PARD6G 9.810795 8.816147 9.518549 9.512752 6.638869 6.798964
## ADNP2 11.975556 11.992268 11.882635 12.210970 12.383733 12.174482
## TXNL4A 11.002826 11.861749 11.737576 10.812810 10.859427 10.969300
## SLC66A2 9.749705 11.744615 10.524707 10.711494 11.166501 11.024332
## CF147
## ENPP1 6.069983
## ENSCAFG00000000002 6.069983
## PARD6G 6.659646
## ADNP2 12.151276
## TXNL4A 11.000524
## SLC66A2 11.244675
# Import metadata file
metadata <- read.csv(file = "Cohort_2/Input/mutation_metadata.csv")
rownames(metadata) <- metadata$sample_name # Make the sample ID column the rownames; change to reflect the name of the column with sample IDs in your metadata file
#metadata <- dplyr::select(metadata, c("phenotype", "etc")) # Select the columns in your metadata file you want annotated on the GSVA heatmap
metadata <- metadata[, !names(metadata) %in% c("X", "sample_name", "avery_num")]
head(metadata) # Make sure samples are rows and classifiers are columns
## phenotype DNMT3A_mut KMT2D_mut BCL6_mut TP63_mut MYC_mut BCL11B_mut
## CF49 CD4_PTCL PRESENT PRESENT PRESENT PRESENT ABSENT ABSENT
## CF50 CD4_PTCL ABSENT ABSENT ABSENT ABSENT PRESENT PRESENT
## CF51 CD4_PTCL ABSENT ABSENT ABSENT ABSENT ABSENT PRESENT
## CF52 CD4_PTCL ABSENT PRESENT ABSENT ABSENT ABSENT PRESENT
## CF53 CD4_PTCL ABSENT PRESENT ABSENT ABSENT PRESENT ABSENT
## CF54 CD4_PTCL ABSENT PRESENT ABSENT ABSENT ABSENT ABSENT
## TP53_mut ITPR3_mut PTEN_mut CD244_mut STAT3_mut TET2_mut TSC2_mut
## CF49 ABSENT ABSENT ABSENT ABSENT ABSENT ABSENT ABSENT
## CF50 ABSENT ABSENT ABSENT ABSENT ABSENT ABSENT ABSENT
## CF51 PRESENT ABSENT ABSENT ABSENT ABSENT ABSENT ABSENT
## CF52 ABSENT PRESENT ABSENT ABSENT ABSENT ABSENT ABSENT
## CF53 ABSENT ABSENT PRESENT ABSENT ABSENT ABSENT ABSENT
## CF54 ABSENT PRESENT ABSENT ABSENT ABSENT ABSENT ABSENT
## FASLG_mut PIK3R1_mut
## CF49 ABSENT ABSENT
## CF50 ABSENT ABSENT
## CF51 ABSENT ABSENT
## CF52 ABSENT ABSENT
## CF53 ABSENT ABSENT
## CF54 ABSENT ABSENT
To include only samples of a certain group or phenotype from your metadata file, edit and run this code chunk.
# Set eval=TRUE if including this chunk
## Extract list of samples belonging to a particular group using metadata
keepGroups <- c("CD4_PTCL") # Define groups to keep (must match metadata file)
# Keep only rows and columns with samples in these groups
metadata <- metadata %>%
filter(phenotype %in% keepGroups)
# Extract the list of sample IDs remaining
keepList <- row.names(metadata)
## Subset vst data to include only the samples in this list
vsd <- vsd[,keepList]
# Calculate the median absolute derivation for all rows in the vst transformed data. Note that the "1" in the 'apply' function indicates that the manipulation is performed on rows.
median_absolute_derivation = apply(vsd,1,mad)
# check data distribution
hist(median_absolute_derivation, ylim=c(0,200), breaks=nrow(vsd)*0.1)
# index the vst transformed count data to include only those rows (genes) that appeared in the top 2000 based on median absolute derivation.
mad2k=vsd[rev(order(median_absolute_derivation))[1:2000],]
metadata_sub <- dplyr::select(metadata, c("BCL11B_mut", "TP53_mut"))
ann_colors = list(
BCL11B_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"),
TP53_mut = c("ABSENT" = "gray", "PRESENT" = "limegreen"))
# Draw heatmap
mad_heatmap <- pheatmap(mad2k,
scale="row",
color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
cluster_rows=TRUE,
cluster_cols=TRUE,
cutree_rows = 3, # how many clusters to cut the heatmap into by row
cutree_cols = 3, # how many clusters to cut the heatmap into by column
main = "Hierarchical Clustering by TP53 and BCL11B Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts for the top 2000 genes by median absolute derivation, \n Clustering: Ward, Distance: Euclidean",
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2",
annotation_col = metadata_sub,
annotation_colors = ann_colors,
show_rownames = FALSE) # Change to TRUE if gene symbols should be annotated on the heatmap.
## Clustering based on expression of GATA3-PTCL and TBX21-PTCL gene
signatures
geneList <- c("TBX21", "IFNG", "STAT1", "CSF2", "CCL3", "EOMES", "IL2RB", "CXCR3", "CD28", "AXL", "CD40", "CD59", "FTL", "LILRB1", "SIRPA",
"GATA3", "EGR1", "SEPTIN6", "CAT", "IL18R1", "IK", "ACKR3", "CCR4", "MSH6")
geneGroups <- c("TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL", "TBX21-PTCL",
"GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL", "GATA3-PTCL")
geneGroup <- data.frame(Group = geneGroups)
rownames(geneGroup) <- geneList
# subset vst data to include only those genes
vsd_geneList <- vsd %>%
filter(rownames(vsd) %in% geneList)
ann_colors = list(
BCL11B_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"),
TP53_mut = c("ABSENT" = "gray", "PRESENT" = "limegreen"),
Group = c("TBX21-PTCL" = "steelblue", "GATA3-PTCL" = "red4"))
vst_heatmap <- pheatmap(vsd_geneList,
scale="row",
color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
cluster_rows=TRUE,
cluster_cols=TRUE,
cutree_rows = 2,
cutree_cols = 2,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2",
main = "Hierarchical Clustering of TBX21-PTCL and GATA3-PTCL Gene Signatures by TP53 and BCL11B Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts, Clustering: Ward, Distance: Euclidean",
annotation_col = metadata_sub,
annotation_colors = ann_colors,
annotation_row = geneGroup,
show_rownames = TRUE) # show gene names on the heatmap
metadata_sub <- dplyr::select(metadata, c("DNMT3A_mut"))
ann_colors = list(
DNMT3A_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"))
# Draw heatmap
mad_heatmap <- pheatmap(mad2k,
scale="row",
color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
cluster_rows=TRUE,
cluster_cols=TRUE,
cutree_rows = 3, # how many clusters to cut the heatmap into by row
cutree_cols = 3, # how many clusters to cut the heatmap into by column
main = "Hierarchical Clustering by DNMT3A Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts for the top 2000 genes by median absolute derivation, \n Clustering: Ward, Distance: Euclidean",
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2",
annotation_col = metadata_sub,
annotation_colors = ann_colors,
show_rownames = FALSE) # Change to TRUE if gene symbols should be annotated on the heatmap.
ann_colors = list(
DNMT3A_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"),
Group = c("TBX21-PTCL" = "steelblue", "GATA3-PTCL" = "red4"))
vst_heatmap <- pheatmap(vsd_geneList,
scale="row",
color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
cluster_rows=TRUE,
cluster_cols=TRUE,
cutree_rows = 2,
cutree_cols = 2,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2",
main = "Hierarchical Clustering of TBX21-PTCL and GATA3-PTCL Gene Signatures by DNMT3A Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts, Clustering: Ward, Distance: Euclidean",
annotation_col = metadata_sub,
annotation_colors = ann_colors,
annotation_row = geneGroup,
show_rownames = TRUE) # show gene names on the heatmap
metadata_sub <- dplyr::select(metadata, c("KMT2D_mut"))
ann_colors = list(
KMT2D_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"))
# Draw heatmap
mad_heatmap <- pheatmap(mad2k,
scale="row",
color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
cluster_rows=TRUE,
cluster_cols=TRUE,
cutree_rows = 3, # how many clusters to cut the heatmap into by row
cutree_cols = 3, # how many clusters to cut the heatmap into by column
main = "Hierarchical Clustering by KMT2D Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts for the top 2000 genes by median absolute derivation, \n Clustering: Ward, Distance: Euclidean",
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2",
annotation_col = metadata_sub,
annotation_colors = ann_colors,
show_rownames = FALSE) # Change to TRUE if gene symbols should be annotated on the heatmap.
ann_colors = list(
KMT2D_mut = c("ABSENT" = "gray", "PRESENT" = "hotpink"),
Group = c("TBX21-PTCL" = "steelblue", "GATA3-PTCL" = "red4"))
vst_heatmap <- pheatmap(vsd_geneList,
scale="row",
color = colorRampPalette(c("blue", "white", "red"), space = "Lab")(100),
cluster_rows=TRUE,
cluster_cols=TRUE,
cutree_rows = 2,
cutree_cols = 2,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2",
main = "Hierarchical Clustering of TBX21-PTCL and GATA3-PTCL Gene Signatures by KMT2D Mutation Status in CD4+ PTCL\nInput: Vst transformed normalized DESeq2 counts, Clustering: Ward, Distance: Euclidean",
annotation_col = metadata_sub,
annotation_colors = ann_colors,
annotation_row = geneGroup,
show_rownames = TRUE) # show gene names on the heatmap
sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Denver
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggplot2_3.5.1 knitr_1.49
## [3] readr_2.1.5 dplyr_1.1.4
## [5] pheatmap_1.0.12 RColorBrewer_1.1-3
## [7] DESeq2_1.44.0 SummarizedExperiment_1.34.0
## [9] Biobase_2.64.0 MatrixGenerics_1.16.0
## [11] matrixStats_1.4.1 GenomicRanges_1.56.2
## [13] GenomeInfoDb_1.40.1 IRanges_2.38.1
## [15] S4Vectors_0.42.1 BiocGenerics_0.50.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.49 bslib_0.8.0
## [4] lattice_0.22-6 tzdb_0.4.0 vctrs_0.6.5
## [7] tools_4.4.0 generics_0.1.3 parallel_4.4.0
## [10] tibble_3.2.1 pkgconfig_2.0.3 Matrix_1.7-0
## [13] lifecycle_1.0.4 GenomeInfoDbData_1.2.12 farver_2.1.2
## [16] compiler_4.4.0 munsell_0.5.1 codetools_0.2-20
## [19] htmltools_0.5.8.1 sass_0.4.9 yaml_2.3.10
## [22] pillar_1.10.1 crayon_1.5.3 jquerylib_0.1.4
## [25] BiocParallel_1.38.0 DelayedArray_0.30.1 cachem_1.1.0
## [28] abind_1.4-8 tidyselect_1.2.1 locfit_1.5-9.10
## [31] digest_0.6.35 fastmap_1.2.0 grid_4.4.0
## [34] colorspace_2.1-1 cli_3.6.2 SparseArray_1.4.8
## [37] magrittr_2.0.3 S4Arrays_1.4.1 withr_3.0.2
## [40] scales_1.3.0 UCSC.utils_1.0.0 rmarkdown_2.29
## [43] XVector_0.44.0 httr_1.4.7 hms_1.1.3
## [46] evaluate_1.0.3 rlang_1.1.3 Rcpp_1.0.13
## [49] glue_1.7.0 rstudioapi_0.17.1 jsonlite_1.8.9
## [52] R6_2.5.1 zlibbioc_1.50.0
citation()
## To cite R in publications use:
##
## R Core Team (2024). _R: A Language and Environment for Statistical
## Computing_. R Foundation for Statistical Computing, Vienna, Austria.
## <https://www.R-project.org/>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2024},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.