Built data using BLAST
Exact Chromosome lengths were used to plot against. #Run the graphs independently to see if this is a graphical error.
setwd("/Users/Sid/Projects/CRM_mapping/CRM_files/")
x = read.table("CentC_sequence_rgv1.blast", header = T)
y = read.table("CentC_sequence_rgv2.blast", header = T)
z = read.table("CentC_sequence_rgv3.blast", header = T)
chrv1 = c(304.528171, 238.106451, 233.851825, 250.625444, 220.014323, 171.672219,
173.416676, 177.008375, 154.526921, 151.824418)
chrv1 = chrv1 * 1e+06
chrv2 = c(306.376751, 241.020068, 236.009224, 245.49811, 221.504114, 171.993973,
179.710889, 178.723702, 159.363265, 152.692642)
chrv2 = chrv2 * 1e+06
chrv3 = c(306.501515, 241.88276, 236.116286, 246.096644, 221.592184, 172.2313,
167.573466, 178.300502, 159.655329, 152.126076)
chrv3 = chrv3 * 1e+06
Some Summary Statistics
Reference Genome v1
summary(x)
## query_id subject_id identity alignment_length
## Query_1:48797 Min. : 1.00 Min. : 74.5 Min. : 28
## 1st Qu.: 2.00 1st Qu.: 89.5 1st Qu.: 993
## Median : 5.00 Median : 90.7 Median : 1530
## Mean : 5.52 Mean : 90.5 Mean : 2389
## 3rd Qu.: 9.00 3rd Qu.: 91.7 3rd Qu.: 3239
## Max. :10.00 Max. :100.0 Max. :11386
## mismatches gap_opens q_start q_end
## Min. : 0 Min. : 0.0 Min. : 1 Min. : 35
## 1st Qu.: 71 1st Qu.: 16.0 1st Qu.: 1387 1st Qu.: 1653
## Median : 117 Median : 33.0 Median : 2869 Median : 6427
## Mean : 173 Mean : 47.1 Mean : 4263 Mean : 6626
## 3rd Qu.: 236 3rd Qu.: 66.0 3rd Qu.: 7604 3rd Qu.:11185
## Max. :1105 Max. :276.0 Max. :12504 Max. :12624
## s_start s_end evalue bit_score
## Min. :1.11e+05 Min. :1.11e+05 Min. :0e+00 Min. : 53
## 1st Qu.:5.55e+07 1st Qu.:5.55e+07 1st Qu.:0e+00 1st Qu.: 1247
## Median :6.88e+07 Median :6.88e+07 Median :0e+00 Median : 2047
## Mean :7.66e+07 Mean :7.66e+07 Mean :1e-07 Mean : 3100
## 3rd Qu.:1.05e+08 3rd Qu.:1.05e+08 3rd Qu.:0e+00 3rd Qu.: 4231
## Max. :1.94e+08 Max. :1.94e+08 Max. :3e-03 Max. :15440
Reference Genome v2
summary(y)
## query_id subject_id identity alignment_length
## Query_1:61664 Min. : 1.00 Min. : 73.9 Min. : 28
## 1st Qu.: 3.00 1st Qu.: 89.7 1st Qu.: 958
## Median : 7.00 Median : 90.8 Median : 1498
## Mean : 5.95 Mean : 90.5 Mean : 2320
## 3rd Qu.: 8.00 3rd Qu.: 91.7 3rd Qu.: 3060
## Max. :10.00 Max. :100.0 Max. :11387
## mismatches gap_opens q_start q_end
## Min. : 0 Min. : 0.0 Min. : 1 Min. : 35
## 1st Qu.: 69 1st Qu.: 15.0 1st Qu.: 1393 1st Qu.: 1739
## Median : 114 Median : 31.0 Median : 3021 Median : 6424
## Mean : 167 Mean : 45.1 Mean : 4322 Mean : 6617
## 3rd Qu.: 225 3rd Qu.: 62.0 3rd Qu.: 7691 3rd Qu.:11107
## Max. :1105 Max. :276.0 Max. :12504 Max. :12624
## s_start s_end evalue bit_score
## Min. :3.36e+03 Min. :3.36e+03 Min. :0e+00 Min. : 53
## 1st Qu.:4.82e+07 1st Qu.:4.82e+07 1st Qu.:0e+00 1st Qu.: 1219
## Median :6.24e+07 Median :6.24e+07 Median :0e+00 Median : 2034
## Mean :7.13e+07 Mean :7.13e+07 Mean :1e-07 Mean : 3018
## 3rd Qu.:1.05e+08 3rd Qu.:1.05e+08 3rd Qu.:0e+00 3rd Qu.: 3989
## Max. :1.89e+08 Max. :1.89e+08 Max. :3e-03 Max. :15710
Reference Genome v3
summary(z)
## query_id subject_id identity alignment_length
## Query_1:70876 7 :18269 Min. : 73.9 Min. : 28
## 1 :11237 1st Qu.: 89.7 1st Qu.: 939
## 10 : 9558 Median : 90.8 Median : 1455
## scaffold_498: 5535 Mean : 90.5 Mean : 2308
## 9 : 5289 3rd Qu.: 91.7 3rd Qu.: 3041
## 8 : 4781 Max. :100.0 Max. :11387
## (Other) :16207
## mismatches gap_opens q_start q_end
## Min. : 0 Min. : 0.0 Min. : 1 Min. : 35
## 1st Qu.: 68 1st Qu.: 15.0 1st Qu.: 1394 1st Qu.: 1742
## Median : 112 Median : 31.0 Median : 3031 Median : 6419
## Mean : 166 Mean : 44.7 Mean : 4329 Mean : 6612
## 3rd Qu.: 224 3rd Qu.: 61.0 3rd Qu.: 7698 3rd Qu.:11098
## Max. :1105 Max. :280.0 Max. :12534 Max. :12624
##
## s_start s_end evalue bit_score
## Min. :1.00e+00 Min. :1.00e+00 Min. :0e+00 Min. : 53
## 1st Qu.:2.30e+07 1st Qu.:2.30e+07 1st Qu.:0e+00 1st Qu.: 1199
## Median :5.49e+07 Median :5.49e+07 Median :0e+00 Median : 2013
## Mean :6.32e+07 Mean :6.32e+07 Mean :1e-07 Mean : 3002
## 3rd Qu.:9.79e+07 3rd Qu.:9.79e+07 3rd Qu.:0e+00 3rd Qu.: 3962
## Max. :1.89e+08 Max. :1.89e+08 Max. :3e-03 Max. :15710
##
Pink Plots = Reference Genome v1
Green Plots = Reference Genome v2
Blue Plots = Reference Genome v3
Other colors are overlaps with 2 or all 3 reference genomes
par(mar = c(2, 4, 2, 4))
for (i in 1:10) {
plot(density(subset(x$s_start, x$subject_id == i)), main = i, axes = F,
xlab = " ", ylab = " ", col = rgb(0.8, 0.4, 0.6, 0.8), xlim = c(0, chrv1[i]))
par(new = T)
plot(density(subset(y$s_start, y$subject_id == i)), main = i, axes = F,
xlab = " ", ylab = " ", col = rgb(0.6, 0.8, 0.4, 0.8), xlim = c(0, chrv2[i]))
par(new = T)
plot(density(subset(z$s_start, z$subject_id == i)), main = i, xlab = "Length (bp)",
ylab = " ", col = rgb(0.4, 0.6, 0.8, 0.8), axes = F, xlim = c(0, chrv3[i]))
axis(4, las = 1)
axis(2, las = 1)
axis(1, xlim = c(0, chrv3[i]))
rug(x$s_start, ticksize = 0.05, side = 1, lwd = 1, col = rgb(0.8, 0.4, 0.6,
0.8), quiet = getOption("warn"))
rug(y$s_start, ticksize = 0.1, side = 1, lwd = 1, col = rgb(0.6, 0.8, 0.4,
0.8), quiet = getOption("warn"))
rug(z$s_start, ticksize = 0.15, side = 1, lwd = 1, col = rgb(0.4, 0.6, 0.8,
0.8), quiet = getOption("warn"))
}