This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(readr, verbose = F)
library(tidyverse, verbose=F)
mat<-read_tsv('/Users/Cyrus/Desktop/828O/Assignment1/Data File S2. Raw genetic interaction datasets_ Matrix format/SGA_NxN_clustered.cdt')
amat <- mat %>%
# extract data rows and columns
slice(-(1:5)) %>%
select(starts_with("dma")) %>%
# convert to numeric matrix
type_convert() %>%
as.matrix()
# turn into unweighted, undirected adjacency matrix
amat <- 1 * (abs(amat) > 0.2)
# extract data about arrays (columns)
coldata <- mat %>%
slice(1:5) %>%
select(GID, starts_with("dma")) %>%
slice(2) %>% select(-1) %>%
gather(dma, orf, starts_with("dma"))
# extract ORF ids for queries (rows)
row_orf <- mat %>%
slice(-(1:5)) %>%
pull(ORF)
# match row and column ORF ids
m <- match(row_orf, coldata$orf)
rows_to_use <- !is.na(m)
cols_to_use <- m[rows_to_use]
# subset matrix into ORFs found in both rows and columns
amat <- amat[rows_to_use, cols_to_use]
# set diagonal and missing entries in matrix to 0
diag(amat) <- 0
amat[is.na(amat)] <- 0
# make the adjacency matrix diagonal
amat <- ceiling(0.5 * (amat + t(amat)))
#Removing repeated rows
Repeated_Rows_id= which(duplicated(row_orf[rows_to_use]))
Repeated_Rows_Name = row_orf[rows_to_use][which(duplicated(row_orf[rows_to_use]))]
Repeated_col_id= match(Repeated_Rows_Name, coldata$orf[cols_to_use])
#Matrix without any repeated rows or column
amat=amat[-Repeated_Rows_id, -Repeated_col_id]
# of Vertices, V
V<- ncol(amat)
# of Egdes, E
E<- sum(amat)
#Average Degree, avDeg
avDeg<- (E/V)/2
#Density
D= (2*avDeg)/(V-1)
print(c(Vertices=V, Edges=E, AvgDegree=avDeg, Density=D))
Vertices Edges AvgDegree Density
2.801000e+03 6.676100e+04 1.191735e+01 8.512394e-03
#Degree Distibution
Deg_Dist=apply(amat, 1, sum)
hist(Deg_Dist, freq=F, breaks=200)

#Log-Log plot
Deg_Dist=Deg_Dist[Deg_Dist>0]
Deg_Prob_logscale=log(table(Deg_Dist)/V)
Deg_logscale=log(as.numeric(rownames(Deg_Prob_logscale)))
plot(Deg_logscale, Deg_Prob_logscale)

#BFS is reinventing the wheel
library(spa)
#Shortest_PathNxN=floyd(amat)
#Average_Distance
mean(Shortest_PathNxN[!is.infinite(Shortest_PathNxN)], na.rm=T)
[1] 0
#Diameter of the Network
max(Shortest_PathNxN[!is.infinite(Shortest_PathNxN)], na.rm=T)
[1] 0
library(ggplot2)
Temp_Data=data.frame()
hist(Shortest_PathNxN)

#Calculating Clustering Coefficient
cluster_coefficient<-function(i){
tryCatch({
Neighbors_id=which(amat[i,]==1)
All_Poss_comb= length(Neighbors_id)*(length(Neighbors_id)-1)/2
Ngbr_Conn_count=sum(sapply(Neighbors_id, function(x) sum(amat[x,Neighbors_id])))
data.frame(Cf=log(All_Poss_comb/Ngbr_Conn_count), Vertices=log(length(Neighbors_id)))
}, error= function(err){ data.frame(Cf=0,Vertices=0) })
}
#Clustering Coefficient:: CC
CC=as.data.frame(t(sapply(1:ncol(amat), function(i) cluster_coefficient(i))))
CC=data.frame(Vertices=unlist(CC$Vertices), Cf=unlist(CC$Cf))
CC=do.call(data.frame,lapply(CC, function(x) replace(x, is.infinite(x),NA)))
CC=na.omit(CC)
#Plotting CC vs Vertices log-log scale
library(ggplot2)
TT=aggregate(CC, list(CC$Vertices), mean)
plot(x=TT$Vertices, y=TT$Cf, main="Vertices vs Cluster Coefficient LogLog", xlab = "Log(Vertices)", ylab="Log(Cluster_Coef")

END
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCg0KVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gDQoNClRyeSBleGVjdXRpbmcgdGhpcyBjaHVuayBieSBjbGlja2luZyB0aGUgKlJ1biogYnV0dG9uIHdpdGhpbiB0aGUgY2h1bmsgb3IgYnkgcGxhY2luZyB5b3VyIGN1cnNvciBpbnNpZGUgaXQgYW5kIHByZXNzaW5nICpDdHJsK1NoaWZ0K0VudGVyKi4gDQoNCmBgYHtyfQ0KbGlicmFyeShyZWFkciwgdmVyYm9zZSA9IEYpDQpsaWJyYXJ5KHRpZHl2ZXJzZSwgdmVyYm9zZT1GKQ0KbWF0PC1yZWFkX3RzdignL1VzZXJzL0N5cnVzL0Rlc2t0b3AvODI4Ty9Bc3NpZ25tZW50MS9EYXRhIEZpbGUgUzIuIFJhdyBnZW5ldGljIGludGVyYWN0aW9uIGRhdGFzZXRzXyBNYXRyaXggZm9ybWF0L1NHQV9OeE5fY2x1c3RlcmVkLmNkdCcpDQoNCmFtYXQgPC0gbWF0ICU+JQ0KICAjIGV4dHJhY3QgZGF0YSByb3dzIGFuZCBjb2x1bW5zDQogIHNsaWNlKC0oMTo1KSkgJT4lDQogIHNlbGVjdChzdGFydHNfd2l0aCgiZG1hIikpICU+JQ0KICANCiAgIyBjb252ZXJ0IHRvIG51bWVyaWMgbWF0cml4DQogIHR5cGVfY29udmVydCgpICU+JQ0KICBhcy5tYXRyaXgoKQ0KICANCiMgdHVybiBpbnRvIHVud2VpZ2h0ZWQsIHVuZGlyZWN0ZWQgYWRqYWNlbmN5IG1hdHJpeCAgDQphbWF0IDwtIDEgKiAoYWJzKGFtYXQpID4gMC4yKQ0KICANCiMgZXh0cmFjdCBkYXRhIGFib3V0IGFycmF5cyAoY29sdW1ucykNCmNvbGRhdGEgPC0gbWF0ICU+JQ0KICBzbGljZSgxOjUpICU+JQ0KICBzZWxlY3QoR0lELCBzdGFydHNfd2l0aCgiZG1hIikpICU+JQ0KICBzbGljZSgyKSAlPiUgc2VsZWN0KC0xKSAlPiUNCiAgZ2F0aGVyKGRtYSwgb3JmLCBzdGFydHNfd2l0aCgiZG1hIikpDQogDQojIGV4dHJhY3QgT1JGIGlkcyBmb3IgcXVlcmllcyAocm93cykNCnJvd19vcmYgPC0gbWF0ICU+JQ0KICBzbGljZSgtKDE6NSkpICU+JQ0KICBwdWxsKE9SRikNCg0KIyBtYXRjaCByb3cgYW5kIGNvbHVtbiBPUkYgaWRzDQptIDwtIG1hdGNoKHJvd19vcmYsIGNvbGRhdGEkb3JmKQ0Kcm93c190b191c2UgPC0gIWlzLm5hKG0pDQpjb2xzX3RvX3VzZSA8LSBtW3Jvd3NfdG9fdXNlXQ0KDQojIHN1YnNldCBtYXRyaXggaW50byBPUkZzIGZvdW5kIGluIGJvdGggcm93cyBhbmQgY29sdW1ucw0KYW1hdCA8LSBhbWF0W3Jvd3NfdG9fdXNlLCBjb2xzX3RvX3VzZV0NCg0KIyBzZXQgZGlhZ29uYWwgYW5kIG1pc3NpbmcgZW50cmllcyBpbiBtYXRyaXggdG8gMA0KZGlhZyhhbWF0KSA8LSAwDQphbWF0W2lzLm5hKGFtYXQpXSA8LSAwDQoNCiMgbWFrZSB0aGUgYWRqYWNlbmN5IG1hdHJpeCBkaWFnb25hbA0KYW1hdCA8LSBjZWlsaW5nKDAuNSAqIChhbWF0ICsgdChhbWF0KSkpDQoNCiNSZW1vdmluZyByZXBlYXRlZCByb3dzDQpSZXBlYXRlZF9Sb3dzX2lkPSB3aGljaChkdXBsaWNhdGVkKHJvd19vcmZbcm93c190b191c2VdKSkNClJlcGVhdGVkX1Jvd3NfTmFtZSA9IHJvd19vcmZbcm93c190b191c2VdW3doaWNoKGR1cGxpY2F0ZWQocm93X29yZltyb3dzX3RvX3VzZV0pKV0NClJlcGVhdGVkX2NvbF9pZD0gbWF0Y2goUmVwZWF0ZWRfUm93c19OYW1lLCBjb2xkYXRhJG9yZltjb2xzX3RvX3VzZV0pDQoNCiNNYXRyaXggd2l0aG91dCBhbnkgcmVwZWF0ZWQgcm93cyBvciBjb2x1bW4NCmFtYXQ9YW1hdFstUmVwZWF0ZWRfUm93c19pZCwgLVJlcGVhdGVkX2NvbF9pZF0NCg0KYGBgDQoNCmBgYHtyfQ0KIyBvZiBWZXJ0aWNlcywgVg0KVjwtIG5jb2woYW1hdCkNCg0KIyBvZiBFZ2RlcywgRSANCkU8LSBzdW0oYW1hdCkNCg0KI0F2ZXJhZ2UgRGVncmVlLCBhdkRlZw0KYXZEZWc8LSAoRS9WKS8yIA0KDQojRGVuc2l0eQ0KRD0gKDIqYXZEZWcpLyhWLTEpDQoNCnByaW50KGMoVmVydGljZXM9ViwgRWRnZXM9RSwgQXZnRGVncmVlPWF2RGVnLCBEZW5zaXR5PUQpKQ0KYGBgDQoNCmBgYHtyfQ0KI0RlZ3JlZSBEaXN0aWJ1dGlvbg0KRGVnX0Rpc3Q9YXBwbHkoYW1hdCwgMSwgc3VtKQ0KaGlzdChEZWdfRGlzdCwgZnJlcT1GLCBicmVha3M9MjAwKQ0KYGBgDQoNCmBgYHtyfQ0KI0xvZy1Mb2cgcGxvdA0KRGVnX0Rpc3Q9RGVnX0Rpc3RbRGVnX0Rpc3Q+MF0NCkRlZ19Qcm9iX2xvZ3NjYWxlPWxvZyh0YWJsZShEZWdfRGlzdCkvVikNCkRlZ19sb2dzY2FsZT1sb2coYXMubnVtZXJpYyhyb3duYW1lcyhEZWdfUHJvYl9sb2dzY2FsZSkpKQ0KcGxvdChEZWdfbG9nc2NhbGUsIERlZ19Qcm9iX2xvZ3NjYWxlKQ0KYGBgDQoNCmBgYHtyfQ0KI0JGUyBpcyByZWludmVudGluZyB0aGUgd2hlZWwNCmxpYnJhcnkoc3BhKQ0KI1Nob3J0ZXN0X1BhdGhOeE49ZmxveWQoYW1hdCkNCmBgYA0KDQpgYGB7cn0NCiNBdmVyYWdlX0Rpc3RhbmNlDQptZWFuKFNob3J0ZXN0X1BhdGhOeE5bIWlzLmluZmluaXRlKFNob3J0ZXN0X1BhdGhOeE4pXSwgbmEucm09VCkNCiNEaWFtZXRlciBvZiB0aGUgTmV0d29yaw0KbWF4KFNob3J0ZXN0X1BhdGhOeE5bIWlzLmluZmluaXRlKFNob3J0ZXN0X1BhdGhOeE4pXSwgbmEucm09VCkNCmxpYnJhcnkoZ2dwbG90MikNClRlbXBfRGF0YT1kYXRhLmZyYW1lKCkNCmhpc3QoU2hvcnRlc3RfUGF0aE54TikNCmBgYA0KYGBge3J9DQojQ2FsY3VsYXRpbmcgQ2x1c3RlcmluZyBDb2VmZmljaWVudA0KDQpjbHVzdGVyX2NvZWZmaWNpZW50PC1mdW5jdGlvbihpKXsNCiAgdHJ5Q2F0Y2goew0KICBOZWlnaGJvcnNfaWQ9d2hpY2goYW1hdFtpLF09PTEpDQogIEFsbF9Qb3NzX2NvbWI9IGxlbmd0aChOZWlnaGJvcnNfaWQpKihsZW5ndGgoTmVpZ2hib3JzX2lkKS0xKS8yDQogIE5nYnJfQ29ubl9jb3VudD1zdW0oc2FwcGx5KE5laWdoYm9yc19pZCwgZnVuY3Rpb24oeCkgc3VtKGFtYXRbeCxOZWlnaGJvcnNfaWRdKSkpDQogIGRhdGEuZnJhbWUoQ2Y9bG9nKEFsbF9Qb3NzX2NvbWIvTmdicl9Db25uX2NvdW50KSwgVmVydGljZXM9bG9nKGxlbmd0aChOZWlnaGJvcnNfaWQpKSkNCiAgfSwgIGVycm9yPSBmdW5jdGlvbihlcnIpeyBkYXRhLmZyYW1lKENmPTAsVmVydGljZXM9MCkgfSkNCn0NCg0KI0NsdXN0ZXJpbmcgQ29lZmZpY2llbnQ6OiBDQw0KQ0M9YXMuZGF0YS5mcmFtZSh0KHNhcHBseSgxOm5jb2woYW1hdCksIGZ1bmN0aW9uKGkpIGNsdXN0ZXJfY29lZmZpY2llbnQoaSkpKSkNCkNDPWRhdGEuZnJhbWUoVmVydGljZXM9dW5saXN0KENDJFZlcnRpY2VzKSwgQ2Y9dW5saXN0KENDJENmKSkNCkNDPWRvLmNhbGwoZGF0YS5mcmFtZSxsYXBwbHkoQ0MsIGZ1bmN0aW9uKHgpIHJlcGxhY2UoeCwgaXMuaW5maW5pdGUoeCksTkEpKSkNCkNDPW5hLm9taXQoQ0MpDQoNCiNQbG90dGluZyBDQyB2cyBWZXJ0aWNlcyBsb2ctbG9nIHNjYWxlDQpsaWJyYXJ5KGdncGxvdDIpDQpUVD1hZ2dyZWdhdGUoQ0MsIGxpc3QoQ0MkVmVydGljZXMpLCBtZWFuKQ0KcGxvdCh4PVRUJFZlcnRpY2VzLCB5PVRUJENmLCBtYWluPSJWZXJ0aWNlcyB2cyBDbHVzdGVyIENvZWZmaWNpZW50IExvZ0xvZyIsIHhsYWIgPSAiTG9nKFZlcnRpY2VzKSIsIHlsYWI9IkxvZyhDbHVzdGVyX0NvZWYiKQ0KYGBgDQoqKkVORCoq