CentC Mapping on B73 Reference Genome Versions 1-3

Built data using BLAST

Exact Chromosome lengths were used to plot against. #Run the graphs independently to see if this is a graphical error.

setwd("/Users/Sid/Projects/CRM_mapping/CRM_files/")
x = read.table("CentC_sequence_rgv1.blast", header = T)
y = read.table("CentC_sequence_rgv2.blast", header = T)
z = read.table("CentC_sequence_rgv3.blast", header = T)
chrv1 = c(304.528171, 238.106451, 233.851825, 250.625444, 220.014323, 171.672219, 
    173.416676, 177.008375, 154.526921, 151.824418)
chrv1 = chrv1 * 1e+06
chrv2 = c(306.376751, 241.020068, 236.009224, 245.49811, 221.504114, 171.993973, 
    179.710889, 178.723702, 159.363265, 152.692642)
chrv2 = chrv2 * 1e+06
chrv3 = c(306.501515, 241.88276, 236.116286, 246.096644, 221.592184, 172.2313, 
    167.573466, 178.300502, 159.655329, 152.126076)
chrv3 = chrv3 * 1e+06

Some Summary Statistics

Reference Genome v1

summary(x)
##     query_id       subject_id       identity     alignment_length
##  Query_1:48797   Min.   : 1.00   Min.   : 74.5   Min.   :   28   
##                  1st Qu.: 2.00   1st Qu.: 89.5   1st Qu.:  993   
##                  Median : 5.00   Median : 90.7   Median : 1530   
##                  Mean   : 5.52   Mean   : 90.5   Mean   : 2389   
##                  3rd Qu.: 9.00   3rd Qu.: 91.7   3rd Qu.: 3239   
##                  Max.   :10.00   Max.   :100.0   Max.   :11386   
##    mismatches     gap_opens        q_start          q_end      
##  Min.   :   0   Min.   :  0.0   Min.   :    1   Min.   :   35  
##  1st Qu.:  71   1st Qu.: 16.0   1st Qu.: 1387   1st Qu.: 1653  
##  Median : 117   Median : 33.0   Median : 2869   Median : 6427  
##  Mean   : 173   Mean   : 47.1   Mean   : 4263   Mean   : 6626  
##  3rd Qu.: 236   3rd Qu.: 66.0   3rd Qu.: 7604   3rd Qu.:11185  
##  Max.   :1105   Max.   :276.0   Max.   :12504   Max.   :12624  
##     s_start             s_end              evalue        bit_score    
##  Min.   :1.11e+05   Min.   :1.11e+05   Min.   :0e+00   Min.   :   53  
##  1st Qu.:5.55e+07   1st Qu.:5.55e+07   1st Qu.:0e+00   1st Qu.: 1247  
##  Median :6.88e+07   Median :6.88e+07   Median :0e+00   Median : 2047  
##  Mean   :7.66e+07   Mean   :7.66e+07   Mean   :1e-07   Mean   : 3100  
##  3rd Qu.:1.05e+08   3rd Qu.:1.05e+08   3rd Qu.:0e+00   3rd Qu.: 4231  
##  Max.   :1.94e+08   Max.   :1.94e+08   Max.   :3e-03   Max.   :15440

Reference Genome v2

summary(y)
##     query_id       subject_id       identity     alignment_length
##  Query_1:61664   Min.   : 1.00   Min.   : 73.9   Min.   :   28   
##                  1st Qu.: 3.00   1st Qu.: 89.7   1st Qu.:  958   
##                  Median : 7.00   Median : 90.8   Median : 1498   
##                  Mean   : 5.95   Mean   : 90.5   Mean   : 2320   
##                  3rd Qu.: 8.00   3rd Qu.: 91.7   3rd Qu.: 3060   
##                  Max.   :10.00   Max.   :100.0   Max.   :11387   
##    mismatches     gap_opens        q_start          q_end      
##  Min.   :   0   Min.   :  0.0   Min.   :    1   Min.   :   35  
##  1st Qu.:  69   1st Qu.: 15.0   1st Qu.: 1393   1st Qu.: 1739  
##  Median : 114   Median : 31.0   Median : 3021   Median : 6424  
##  Mean   : 167   Mean   : 45.1   Mean   : 4322   Mean   : 6617  
##  3rd Qu.: 225   3rd Qu.: 62.0   3rd Qu.: 7691   3rd Qu.:11107  
##  Max.   :1105   Max.   :276.0   Max.   :12504   Max.   :12624  
##     s_start             s_end              evalue        bit_score    
##  Min.   :3.36e+03   Min.   :3.36e+03   Min.   :0e+00   Min.   :   53  
##  1st Qu.:4.82e+07   1st Qu.:4.82e+07   1st Qu.:0e+00   1st Qu.: 1219  
##  Median :6.24e+07   Median :6.24e+07   Median :0e+00   Median : 2034  
##  Mean   :7.13e+07   Mean   :7.13e+07   Mean   :1e-07   Mean   : 3018  
##  3rd Qu.:1.05e+08   3rd Qu.:1.05e+08   3rd Qu.:0e+00   3rd Qu.: 3989  
##  Max.   :1.89e+08   Max.   :1.89e+08   Max.   :3e-03   Max.   :15710

Reference Genome v3

summary(z)
##     query_id            subject_id       identity     alignment_length
##  Query_1:70876   7           :18269   Min.   : 73.9   Min.   :   28   
##                  1           :11237   1st Qu.: 89.7   1st Qu.:  939   
##                  10          : 9558   Median : 90.8   Median : 1455   
##                  scaffold_498: 5535   Mean   : 90.5   Mean   : 2308   
##                  9           : 5289   3rd Qu.: 91.7   3rd Qu.: 3041   
##                  8           : 4781   Max.   :100.0   Max.   :11387   
##                  (Other)     :16207                                   
##    mismatches     gap_opens        q_start          q_end      
##  Min.   :   0   Min.   :  0.0   Min.   :    1   Min.   :   35  
##  1st Qu.:  68   1st Qu.: 15.0   1st Qu.: 1394   1st Qu.: 1742  
##  Median : 112   Median : 31.0   Median : 3031   Median : 6419  
##  Mean   : 166   Mean   : 44.7   Mean   : 4329   Mean   : 6612  
##  3rd Qu.: 224   3rd Qu.: 61.0   3rd Qu.: 7698   3rd Qu.:11098  
##  Max.   :1105   Max.   :280.0   Max.   :12534   Max.   :12624  
##                                                                
##     s_start             s_end              evalue        bit_score    
##  Min.   :1.00e+00   Min.   :1.00e+00   Min.   :0e+00   Min.   :   53  
##  1st Qu.:2.30e+07   1st Qu.:2.30e+07   1st Qu.:0e+00   1st Qu.: 1199  
##  Median :5.49e+07   Median :5.49e+07   Median :0e+00   Median : 2013  
##  Mean   :6.32e+07   Mean   :6.32e+07   Mean   :1e-07   Mean   : 3002  
##  3rd Qu.:9.79e+07   3rd Qu.:9.79e+07   3rd Qu.:0e+00   3rd Qu.: 3962  
##  Max.   :1.89e+08   Max.   :1.89e+08   Max.   :3e-03   Max.   :15710  
## 

CentC Density for Chromosomes 1-10

Pink Plots = Reference Genome v1

Green Plots = Reference Genome v2

Blue Plots = Reference Genome v3

Other colors are overlaps with 2 or all 3 reference genomes


par(mar = c(2, 4, 2, 4))

for (i in 1:10) {
    plot(density(subset(x$s_start, x$subject_id == i)), main = i, axes = F, 
        xlab = " ", ylab = " ", col = rgb(0.8, 0.4, 0.6, 0.8), xlim = c(0, chrv1[i]))

    par(new = T)

    plot(density(subset(y$s_start, y$subject_id == i)), main = i, axes = F, 
        xlab = " ", ylab = " ", col = rgb(0.6, 0.8, 0.4, 0.8), xlim = c(0, chrv2[i]))

    par(new = T)

    plot(density(subset(z$s_start, z$subject_id == i)), main = i, xlab = "Length (bp)", 
        ylab = " ", col = rgb(0.4, 0.6, 0.8, 0.8), axes = F, xlim = c(0, chrv3[i]))
    axis(4, las = 1)
    axis(2, las = 1)
    axis(1, xlim = c(0, chrv3[i]))

    rug(x$s_start, ticksize = 0.05, side = 1, lwd = 1, col = rgb(0.8, 0.4, 0.6, 
        0.8), quiet = getOption("warn"))
    rug(y$s_start, ticksize = 0.1, side = 1, lwd = 1, col = rgb(0.6, 0.8, 0.4, 
        0.8), quiet = getOption("warn"))
    rug(z$s_start, ticksize = 0.15, side = 1, lwd = 1, col = rgb(0.4, 0.6, 0.8, 
        0.8), quiet = getOption("warn"))
}

plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5