Preparation of Data

load("data_subset_rma.Rdata")
#Subset Data
topgenes1_sub<-topgenes1[topgenes1[,"P.Value"]<=0.001 & abs(topgenes1[,"logFC"])>=1,] 
#ID Extraction and Data Subsetting Continued
ids <-topgenes1_sub[,"ID"]
subset_rma<-data_subset_rma[ids,]
#Data Scaling and Transposing
data_scaled<-t(scale(t(exprs(subset_rma))))

Question One

A. Pearson correlation as distance and average linkage:

#Performing h(clust) for Rows
clust_row_pearson<-hclust(as.dist(1-cor(t(data_scaled),method="pearson")),
                          method="complete")
#Performing h(clust) for Columns
clust_col_pearson<- hclust(as.dist(1-cor(data_scaled, method="pearson")), 
                           method="complete")
#Plotting of Heatmap on PNG File
png("heat_map_hh_pearson.png",height=1000,width=600);
par(oma=c(7,1,1,1))
heatmap.2(data_scaled,
          Rowv=as.dendrogram(clust_row_pearson), 
          Colv=as.dendrogram(clust_col_pearson), 
          labRow =fData(subset_rma)[,2],   
          labCol=paste(pData(data_subset_rma)[,1],
                       pData(data_subset_rma)[,2], 
                       sep="_"),
          dendrogram=c("both"),
          density.info="none", 
          trace="none",
          col=rev(brewer.pal(11,"RdBu")),
          key=TRUE)
title("hi")
dev.off()

Heatmap Plotted:

B. Spearman correlation as distance and average linkage

#Performing h(clust) for Rows
clust_row_spearman <-hclust(as.dist(1-cor(t(data_scaled),method="spearman")),method="complete")
#Performing h(clust) for Columns
clust_col_spearman <-hclust(as.dist(1-cor(data_scaled, method="spearman")),method="complete")
#Plotting of Heatmap on PNG File
png("heat_map_hh_spearman.png",height=1000,width=600);
par(oma=c(7,1,1,1))
heatmap.2(data_scaled,
           Rowv=as.dendrogram(clust_row_spearman), 
           Colv=as.dendrogram(clust_col_spearman), 
           labRow =fData(subset_rma)[,2],   
           labCol=paste(pData(data_subset_rma)[,1],
                       pData(data_subset_rma)[,2], 
                        sep="_"),
           dendrogram=c("both"),
           density.info="none", 
           trace="none",
           col=rev(brewer.pal(11,"RdBu")),
           key=TRUE) 
dev.off()

Heatmap Plotted:

Question Two

A. Use function as.dendrograms() to define new objects for both Pearson and Spearman setups (for clustered rows only). Adjust label size and plot dendrograms on the same page using par(mfrow=c(2,1)). Determine visually how many clusters you will color in part B. Explain your reasoning for deciding on the number of clusters.

#Using dendrogram function to define new objects for Pearson and Spearman
dend_pearson<-as.dendrogram(clust_row_pearson) #Pearson
dend_spearman<-as.dendrogram(clust_row_spearman) #Spearman
#Adjusting label sizes 
labels_cex(dend_pearson)<-0.5 #Pearson
labels_cex(dend_spearman)<-0.5 #Spearman
#Ensuring dendrograms are plotted on the same page 
par(mfrow=c(2,1))
plot(dend_pearson) #Pearson
title("Pearson Dendrogram")
plot(dend_spearman) #Spearman
title("Spearman Dendrogram")

Dendrograms Plotted:

After annotating the dendrogram plots, it was decided that we should color three clusters. This is because, we spotted three distinct breaks (for lack of a better word) within the data in both graphs. On the graph we circled where we believed the clusters would be.

B. Color your dendrograms and plot them again on the same page. Did the color function perform as expected?

#Coloring of dendrograms
dend_pearson_colored<-color_branches(dend_pearson,k=3) #Pearson
dend_spearman_colored<-color_branches(dend_spearman, k=3) #Spearman
#Ensuring colored dendrograms are plotted on the same page
par(mfrow=c(2,1))
plot(dend_pearson_colored) #Pearson
title("Colored Pearson Dendrogram")
plot(dend_spearman_colored) #Spearman
title("Colored Spearman Dendrogram")

Dendrogram Plotted:

To our surprise, yes, the color function performed as expected. In fact, the colored dendrographs are nearly identical to our annotations seen in part A. We were right to assume that the smallest cluster in both dendrograms would be that of the red cluster, as that was the easiest cluster to spot in both dendrograms. However, we had feared that we may have overestimated the width of the blue cluster seen in the Pearson dendrogram, and the green cluster in the Spearman dendrogram.

C. Using functions dendlist() and tanglegram() compare Pearson and Spearman dendrograms.

#Comparing clustering rows using dendlist() and tanglegram() 
tanglegram(dendlist(dend_pearson_colored, dend_spearman_colored),highlight_distinct_edges =FALSE, sort=T, main_left="Pearson", main_right="Spearman")

Tanglegram Plotted:

LS0tCnRpdGxlOiAiU2hvcnQgSG9tZXdvcmsgVHdvIChHcmFjZSBHb3Zlcm1hbiAmIEphbnZpZXIgUmljaGFyZHNvbikiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIyBQcmVwYXJhdGlvbiBvZiBEYXRhCgpgYGB7cn0KbG9hZCgiZGF0YV9zdWJzZXRfcm1hLlJkYXRhIikKI1N1YnNldCBEYXRhCnRvcGdlbmVzMV9zdWI8LXRvcGdlbmVzMVt0b3BnZW5lczFbLCJQLlZhbHVlIl08PTAuMDAxICYgYWJzKHRvcGdlbmVzMVssImxvZ0ZDIl0pPj0xLF0gCiNJRCBFeHRyYWN0aW9uIGFuZCBEYXRhIFN1YnNldHRpbmcgQ29udGludWVkCmlkcyA8LXRvcGdlbmVzMV9zdWJbLCJJRCJdCnN1YnNldF9ybWE8LWRhdGFfc3Vic2V0X3JtYVtpZHMsXQojRGF0YSBTY2FsaW5nIGFuZCBUcmFuc3Bvc2luZwpkYXRhX3NjYWxlZDwtdChzY2FsZSh0KGV4cHJzKHN1YnNldF9ybWEpKSkpCmBgYAoKIyMjIFF1ZXN0aW9uIE9uZQoKQS4gKlBlYXJzb24gY29ycmVsYXRpb24gYXMgZGlzdGFuY2UgYW5kIGF2ZXJhZ2UgbGlua2FnZToqCgpgYGB7cn0KI1BlcmZvcm1pbmcgaChjbHVzdCkgZm9yIFJvd3MKY2x1c3Rfcm93X3BlYXJzb248LWhjbHVzdChhcy5kaXN0KDEtY29yKHQoZGF0YV9zY2FsZWQpLG1ldGhvZD0icGVhcnNvbiIpKSwKICAgICAgICAgICAgICAgICAgICAgICAgICBtZXRob2Q9ImNvbXBsZXRlIikKI1BlcmZvcm1pbmcgaChjbHVzdCkgZm9yIENvbHVtbnMKY2x1c3RfY29sX3BlYXJzb248LSBoY2x1c3QoYXMuZGlzdCgxLWNvcihkYXRhX3NjYWxlZCwgbWV0aG9kPSJwZWFyc29uIikpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgbWV0aG9kPSJjb21wbGV0ZSIpCiNQbG90dGluZyBvZiBIZWF0bWFwIG9uIFBORyBGaWxlCnBuZygiaGVhdF9tYXBfaGhfcGVhcnNvbi5wbmciLGhlaWdodD0xMDAwLHdpZHRoPTYwMCk7CnBhcihvbWE9Yyg3LDEsMSwxKSkKaGVhdG1hcC4yKGRhdGFfc2NhbGVkLAogICAgICAgICAgUm93dj1hcy5kZW5kcm9ncmFtKGNsdXN0X3Jvd19wZWFyc29uKSwgCiAgICAgICAgICBDb2x2PWFzLmRlbmRyb2dyYW0oY2x1c3RfY29sX3BlYXJzb24pLCAKICAgICAgICAgIGxhYlJvdyA9ZkRhdGEoc3Vic2V0X3JtYSlbLDJdLCAgIAogICAgICAgICAgbGFiQ29sPXBhc3RlKHBEYXRhKGRhdGFfc3Vic2V0X3JtYSlbLDFdLAogICAgICAgICAgICAgICAgICAgICAgIHBEYXRhKGRhdGFfc3Vic2V0X3JtYSlbLDJdLCAKICAgICAgICAgICAgICAgICAgICAgICBzZXA9Il8iKSwKICAgICAgICAgIGRlbmRyb2dyYW09YygiYm90aCIpLAogICAgICAgICAgZGVuc2l0eS5pbmZvPSJub25lIiwgCiAgICAgICAgICB0cmFjZT0ibm9uZSIsCiAgICAgICAgICBjb2w9cmV2KGJyZXdlci5wYWwoMTEsIlJkQnUiKSksCiAgICAgICAgICBrZXk9VFJVRSkKdGl0bGUoImhpIikKZGV2Lm9mZigpCmBgYAoKKipIZWF0bWFwIFBsb3R0ZWQ6KioKCiFbXShoZWF0X21hcF9oaF9wZWFyc29uLnBuZykKCkIuICpTcGVhcm1hbiBjb3JyZWxhdGlvbiBhcyBkaXN0YW5jZSBhbmQgYXZlcmFnZSBsaW5rYWdlKgoKYGBge3J9CiNQZXJmb3JtaW5nIGgoY2x1c3QpIGZvciBSb3dzCmNsdXN0X3Jvd19zcGVhcm1hbiA8LWhjbHVzdChhcy5kaXN0KDEtY29yKHQoZGF0YV9zY2FsZWQpLG1ldGhvZD0ic3BlYXJtYW4iKSksbWV0aG9kPSJjb21wbGV0ZSIpCiNQZXJmb3JtaW5nIGgoY2x1c3QpIGZvciBDb2x1bW5zCmNsdXN0X2NvbF9zcGVhcm1hbiA8LWhjbHVzdChhcy5kaXN0KDEtY29yKGRhdGFfc2NhbGVkLCBtZXRob2Q9InNwZWFybWFuIikpLG1ldGhvZD0iY29tcGxldGUiKQojUGxvdHRpbmcgb2YgSGVhdG1hcCBvbiBQTkcgRmlsZQpwbmcoImhlYXRfbWFwX2hoX3NwZWFybWFuLnBuZyIsaGVpZ2h0PTEwMDAsd2lkdGg9NjAwKTsKcGFyKG9tYT1jKDcsMSwxLDEpKQpoZWF0bWFwLjIoZGF0YV9zY2FsZWQsCiAgICAgICAgICAgUm93dj1hcy5kZW5kcm9ncmFtKGNsdXN0X3Jvd19zcGVhcm1hbiksIAogICAgICAgICAgIENvbHY9YXMuZGVuZHJvZ3JhbShjbHVzdF9jb2xfc3BlYXJtYW4pLCAKICAgICAgICAgICBsYWJSb3cgPWZEYXRhKHN1YnNldF9ybWEpWywyXSwgICAKICAgICAgICAgICBsYWJDb2w9cGFzdGUocERhdGEoZGF0YV9zdWJzZXRfcm1hKVssMV0sCiAgICAgICAgICAgICAgICAgICAgICAgcERhdGEoZGF0YV9zdWJzZXRfcm1hKVssMl0sIAogICAgICAgICAgICAgICAgICAgICAgICBzZXA9Il8iKSwKICAgICAgICAgICBkZW5kcm9ncmFtPWMoImJvdGgiKSwKICAgICAgICAgICBkZW5zaXR5LmluZm89Im5vbmUiLCAKICAgICAgICAgICB0cmFjZT0ibm9uZSIsCiAgICAgICAgICAgY29sPXJldihicmV3ZXIucGFsKDExLCJSZEJ1IikpLAogICAgICAgICAgIGtleT1UUlVFKSAKZGV2Lm9mZigpCmBgYAoKKipIZWF0bWFwIFBsb3R0ZWQ6KioKCiFbXShHU0UyMjg4Nl9GSUxFUy9oZWF0X21hcF9oaF9zcGVhcm1hbi5wbmcpCgojIyMgUXVlc3Rpb24gVHdvCgpBLiAqVXNlIGZ1bmN0aW9uIGFzLmRlbmRyb2dyYW1zKCkgdG8gZGVmaW5lIG5ldyBvYmplY3RzIGZvciBib3RoIFBlYXJzb24gYW5kIFNwZWFybWFuIHNldHVwcyAoZm9yIGNsdXN0ZXJlZCByb3dzIG9ubHkpLiBBZGp1c3QgbGFiZWwgc2l6ZSBhbmQgcGxvdCBkZW5kcm9ncmFtcyBvbiB0aGUgc2FtZSBwYWdlIHVzaW5nIHBhcihtZnJvdz1jKDIsMSkpLiBEZXRlcm1pbmUgdmlzdWFsbHkgaG93IG1hbnkgY2x1c3RlcnMgeW91IHdpbGwgY29sb3IgaW4gcGFydCBCLiBFeHBsYWluIHlvdXIgcmVhc29uaW5nIGZvciBkZWNpZGluZyBvbiB0aGUgbnVtYmVyIG9mIGNsdXN0ZXJzLioKCmBgYHtyfQojVXNpbmcgZGVuZHJvZ3JhbSBmdW5jdGlvbiB0byBkZWZpbmUgbmV3IG9iamVjdHMgZm9yIFBlYXJzb24gYW5kIFNwZWFybWFuCmRlbmRfcGVhcnNvbjwtYXMuZGVuZHJvZ3JhbShjbHVzdF9yb3dfcGVhcnNvbikgI1BlYXJzb24KZGVuZF9zcGVhcm1hbjwtYXMuZGVuZHJvZ3JhbShjbHVzdF9yb3dfc3BlYXJtYW4pICNTcGVhcm1hbgojQWRqdXN0aW5nIGxhYmVsIHNpemVzIApsYWJlbHNfY2V4KGRlbmRfcGVhcnNvbik8LTAuNSAjUGVhcnNvbgpsYWJlbHNfY2V4KGRlbmRfc3BlYXJtYW4pPC0wLjUgI1NwZWFybWFuCiNFbnN1cmluZyBkZW5kcm9ncmFtcyBhcmUgcGxvdHRlZCBvbiB0aGUgc2FtZSBwYWdlIApwYXIobWZyb3c9YygyLDEpKQpwbG90KGRlbmRfcGVhcnNvbikgI1BlYXJzb24KdGl0bGUoIlBlYXJzb24gRGVuZHJvZ3JhbSIpCnBsb3QoZGVuZF9zcGVhcm1hbikgI1NwZWFybWFuCnRpdGxlKCJTcGVhcm1hbiBEZW5kcm9ncmFtIikKYGBgCgoqKkRlbmRyb2dyYW1zIFBsb3R0ZWQ6KioKCiFbXShQZWFyc29uJTIwYW5kJTIwU3BlYXJtYW4lMjBEZW5kcm9ncmFtcyUyMEFubm90YXRlZC5wbmcpCgpBZnRlciBhbm5vdGF0aW5nIHRoZSBkZW5kcm9ncmFtIHBsb3RzLCBpdCB3YXMgZGVjaWRlZCB0aGF0IHdlIHNob3VsZCBjb2xvciB0aHJlZSBjbHVzdGVycy4gVGhpcyBpcyBiZWNhdXNlLCB3ZSBzcG90dGVkIHRocmVlIGRpc3RpbmN0IGJyZWFrcyAoZm9yIGxhY2sgb2YgYSBiZXR0ZXIgd29yZCkgd2l0aGluIHRoZSBkYXRhIGluIGJvdGggZ3JhcGhzLiBPbiB0aGUgZ3JhcGggd2UgY2lyY2xlZCB3aGVyZSB3ZSBiZWxpZXZlZCB0aGUgY2x1c3RlcnMgd291bGQgYmUuCgpCLiAqQ29sb3IgeW91ciBkZW5kcm9ncmFtcyBhbmQgcGxvdCB0aGVtIGFnYWluIG9uIHRoZSBzYW1lIHBhZ2UuIERpZCB0aGUgY29sb3IgZnVuY3Rpb24gcGVyZm9ybSBhcyBleHBlY3RlZD8qCgpgYGB7cn0KI0NvbG9yaW5nIG9mIGRlbmRyb2dyYW1zCmRlbmRfcGVhcnNvbl9jb2xvcmVkPC1jb2xvcl9icmFuY2hlcyhkZW5kX3BlYXJzb24saz0zKSAjUGVhcnNvbgpkZW5kX3NwZWFybWFuX2NvbG9yZWQ8LWNvbG9yX2JyYW5jaGVzKGRlbmRfc3BlYXJtYW4sIGs9MykgI1NwZWFybWFuCiNFbnN1cmluZyBjb2xvcmVkIGRlbmRyb2dyYW1zIGFyZSBwbG90dGVkIG9uIHRoZSBzYW1lIHBhZ2UKcGFyKG1mcm93PWMoMiwxKSkKcGxvdChkZW5kX3BlYXJzb25fY29sb3JlZCkgI1BlYXJzb24KdGl0bGUoIkNvbG9yZWQgUGVhcnNvbiBEZW5kcm9ncmFtIikKcGxvdChkZW5kX3NwZWFybWFuX2NvbG9yZWQpICNTcGVhcm1hbgp0aXRsZSgiQ29sb3JlZCBTcGVhcm1hbiBEZW5kcm9ncmFtIikKYGBgCgoqKkRlbmRyb2dyYW0gUGxvdHRlZDoqKgoKIVtdKENvbG9yZWQlMjBEZW5kcm9ncmFtcykKClRvIG91ciBzdXJwcmlzZSwgeWVzLCB0aGUgY29sb3IgZnVuY3Rpb24gcGVyZm9ybWVkIGFzIGV4cGVjdGVkLiBJbiBmYWN0LCB0aGUgY29sb3JlZCBkZW5kcm9ncmFwaHMgYXJlIG5lYXJseSBpZGVudGljYWwgdG8gb3VyIGFubm90YXRpb25zIHNlZW4gaW4gcGFydCBBLiBXZSB3ZXJlIHJpZ2h0IHRvIGFzc3VtZSB0aGF0IHRoZSBzbWFsbGVzdCBjbHVzdGVyIGluIGJvdGggZGVuZHJvZ3JhbXMgd291bGQgYmUgdGhhdCBvZiB0aGUgcmVkIGNsdXN0ZXIsIGFzIHRoYXQgd2FzIHRoZSBlYXNpZXN0IGNsdXN0ZXIgdG8gc3BvdCBpbiBib3RoIGRlbmRyb2dyYW1zLiBIb3dldmVyLCB3ZSBoYWQgZmVhcmVkIHRoYXQgd2UgbWF5IGhhdmUgb3ZlcmVzdGltYXRlZCB0aGUgd2lkdGggb2YgdGhlIGJsdWUgY2x1c3RlciBzZWVuIGluIHRoZSBQZWFyc29uIGRlbmRyb2dyYW0sIGFuZCB0aGUgZ3JlZW4gY2x1c3RlciBpbiB0aGUgU3BlYXJtYW4gZGVuZHJvZ3JhbS4KCkMuICpVc2luZyBmdW5jdGlvbnMgZGVuZGxpc3QoKSBhbmQgdGFuZ2xlZ3JhbSgpIGNvbXBhcmUgUGVhcnNvbiBhbmQgU3BlYXJtYW4gZGVuZHJvZ3JhbXMuKgoKYGBge3J9CiNDb21wYXJpbmcgY2x1c3RlcmluZyByb3dzIHVzaW5nIGRlbmRsaXN0KCkgYW5kIHRhbmdsZWdyYW0oKSAKdGFuZ2xlZ3JhbShkZW5kbGlzdChkZW5kX3BlYXJzb25fY29sb3JlZCwgZGVuZF9zcGVhcm1hbl9jb2xvcmVkKSxoaWdobGlnaHRfZGlzdGluY3RfZWRnZXMgPUZBTFNFLCBzb3J0PVQsIG1haW5fbGVmdD0iUGVhcnNvbiIsIG1haW5fcmlnaHQ9IlNwZWFybWFuIikKCmBgYAoKKipUYW5nbGVncmFtIFBsb3R0ZWQ6KioKCiFbXShQZWFyc29uJTIwdnMuJTIwU3BlYXJtYW4lMjBUYW5nbGVncmFtKQo=