Lectura del fichero
rna = read.table("/home/ricardo/Documents/recerca/RNAjairo/scoresSameFunc",
header = TRUE)
str(rna)
## 'data.frame': 76976 obs. of 7 variables:
## $ pdb1 : Factor w/ 417 levels "17raA","1a1tB",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ pdb2 : Factor w/ 417 levels "1a4tA","1a60A",..: 1 6 10 8 3 4 17 9 2 7 ...
## $ align: int 12 17 14 5 19 19 17 10 13 20 ...
## $ score: num 1.219 1.376 1.304 0.296 1.086 ...
## $ len1 : int 21 21 21 21 21 21 21 21 21 21 ...
## $ len2 : int 15 18 16 17 24 24 19 29 44 30 ...
## $ same : int 0 0 1 0 0 0 0 0 0 0 ...
## All pairs
hist(rna$score, freq = FALSE, ylim = c(0, 1.4), xlim = c(0, 3.4), main = "Histogram of all score's")
lines(density(rna$score), col = "red")
## Pairs NOT SAME
hist(rna$score[rna$same == 0], freq = FALSE, ylim = c(0, 1.4), xlim = c(0, 3.4),
main = "Histogram of score's NOT SAME function")
lines(density(rna$score[rna$same == 0]), col = "red")
## Pairs SAME
hist(rna$score[rna$same == 1], freq = FALSE, ylim = c(0, 1.4), xlim = c(0, 3.4),
main = "Histogram of score's SAME function")
lines(density(rna$score[rna$same == 1]), col = "red")
maxLen = pmax(rna$len1, rna$len2)
boxplot(rna$score ~ maxLen, main = "Score length resistant ALL pairs")
maxLen = pmax(rna$len1[rna$same == 0], rna$len2[rna$same == 0])
boxplot(rna$score[rna$same == 0] ~ maxLen, main = "Score length resistant NOT SAME pairs")
maxLen = pmax(rna$len1[rna$same == 1], rna$len2[rna$same == 1])
boxplot(rna$score[rna$same == 1] ~ maxLen, main = "Score length resistant SAME pairs")
n = length(rna$same)
p.not.same = length(rna$same[rna$same == 0])/n
p.not.same
## [1] 0.9674
p.same = 1 - p.not.same
p.same
## [1] 0.03265
###P(Score/SAME), P(Score/NOSAME)
n = length(rna$same)
p.not.same = length(rna$same[rna$same == 0])/n
p.not.same
## [1] 0.9674
p.same = 1 - p.not.same
p.same
## [1] 0.03265
Loading RNA scores and cut from 0.0 to 1.0 by step 1/k (k=number of intervals) for RNA scores
scoreSAME = rna$score[rna$same == 1]
scoreNOSAME = rna$score[rna$same == 0]
Compute Absolute Frecuencies k intervals, breaks from 0.00 to 1.00 by=step
# Set the parameters for breaks
k <- 100
M = max(rna$score)
M
## [1] 3.084
step <- M/100
step
## [1] 0.03084
breaks <- seq(0, M, by = step)
length <- length(breaks)
# Inicializate the data.frame for results
source.counts.scores <- data.frame(scores = breaks[2:length])
Computing the frequencies….
Counts scores
counts.SAME <- as.numeric(table(cut(scoreSAME, breaks = breaks)))
counts.NOSAME <- as.numeric(table(cut(scoreNOSAME, breaks = breaks)))
source.counts.scores$absolute.freqs.SAME <- counts.SAME
source.counts.scores$absolute.freqs.NOSAME <- counts.NOSAME
fix(source.counts.scores)
Not work
#Writing counts
write.table(source.counts.scores,file=PDTMFreq,row.names=F)
Freqs
N1 = sum(source.counts.scores$absolute.freqs.SAME)
N2 = sum(source.counts.scores$absolute.freqs.NOSAME)
plot(source.counts.scores$scores, source.counts.scores$absolute.freqs.NOSAME/N2,
col = "blue", type = "l", ylab = "Relative frequencies", xlab = "Score")
lines(source.counts.scores$scores, source.counts.scores$absolute.freqs.SAME/N1,
col = "red", type = "l")
legend(x = 0.748, y = 0.099, legend = c("Not Same", "Same"), fill = c("blue",
"red"), cex = 0.7)
Acums
N1 = sum(source.counts.scores$absolute.freqs.SAME)
N2 = sum(source.counts.scores$absolute.freqs.NOSAME)
plot(source.counts.scores$scores, cumsum(source.counts.scores$absolute.freqs.NOSAME)/N2,
col = "blue", type = "l", ylab = "Relative frequencies", xlab = "Score")
lines(source.counts.scores$scores, cumsum(source.counts.scores$absolute.freqs.SAME)/N1,
col = "red", type = "l")
legend(x = 0, y = 0.8, legend = c("Not Same", "Same"), fill = c("blue", "red"),
cex = 0.7)
Same/(Same+NoSame) ###
plot(source.counts.scores$scores, cumsum(source.counts.scores$absolute.freqs.NOSAME)/N2,
col = "blue", type = "l", ylab = "Relative frequencies", xlab = "Score")
lines(source.counts.scores$scores, cumsum(source.counts.scores$absolute.freqs.SAME)/N1,
col = "red", type = "l")
legend(x = 0, y = 0.8, legend = c("Not Same", "Same"), fill = c("blue", "red"),
cex = 0.7)