HM450

Download manifest files

dir.create('~/Downloads/tmp/20180808/', showWarnings = FALSE)
download.file('http://zwdzwd.io/InfiniumAnnotation/20180808/hm450/hm450.hg38.manifest.rds', '~/Downloads/tmp/20180808/hm450.hg38.manifest.rds')
download.file('http://zwdzwd.io/InfiniumAnnotation/20180808/hm450/hm450.hg19.manifest.rds', '~/Downloads/tmp/20180808/hm450.hg19.manifest.rds')
dir.create('~/Downloads/tmp/20180304/', showWarnings = FALSE)
download.file('http://zwdzwd.io/InfiniumAnnotation/20180304/hm450/hm450.hg38.manifest.rds', '~/Downloads/tmp/20180304/hm450.hg38.manifest.rds')
download.file('http://zwdzwd.io/InfiniumAnnotation/20180304/hm450/hm450.hg19.manifest.rds', '~/Downloads/tmp/20180304/hm450.hg19.manifest.rds')
HM450.hg38.manifest.0304 <- readRDS('~/Downloads/tmp/20180304/hm450.hg38.manifest.rds')
HM450.hg19.manifest.0304 <- readRDS('~/Downloads/tmp/20180304/hm450.hg19.manifest.rds')
HM450.hg38.manifest.0808 <- readRDS('~/Downloads/tmp/20180808/hm450.hg38.manifest.rds')[names(HM450.hg38.manifest.0304)]
HM450.hg19.manifest.0808 <- readRDS('~/Downloads/tmp/20180808/hm450.hg19.manifest.rds')[names(HM450.hg19.manifest.0304)]

Summary

There are 46 columns (annotations) and 485577 rows (probes) in the old manifest. There are 51 columns (annotations) and 485577 rows (probes) in the new manifest.

Strand of Mapping

The new manifest

strands <- table(HM450.hg19.manifest.0808[HM450.hg19.manifest.0808$designType=='II']$flag_A, HM450.hg38.manifest.0808[HM450.hg38.manifest.0808$designType=='II']$flag_A)
strands <- strands[,c(1,3,2)]
rownames(strands) <- c('Watson GRCh37','Crick GRCh37')
colnames(strands) <- c('Watson GRCh38','Crick GRCh38','unmapped GRCh38')
kable(strands)
Watson GRCh38 Crick GRCh38 unmapped GRCh38
Watson GRCh37 88115 87001 11
Crick GRCh37 86987 87956 6

The old manifest

strands <- table(HM450.hg19.manifest.0304[HM450.hg19.manifest.0304$designType=='II']$flag_A, HM450.hg38.manifest.0304[HM450.hg38.manifest.0304$designType=='II']$flag_A)
strands <- strands[,c(1,3,2)]
rownames(strands) <- c('Watson GRCh37','Crick GRCh37')
colnames(strands) <- c('Watson GRCh38','Crick GRCh38','unmapped GRCh38')
kable(strands)
Watson GRCh38 Crick GRCh38 unmapped GRCh38
Watson GRCh37 88115 87001 11
Crick GRCh37 86987 87956 6

Masking

The biggest change in this release is that the MASK_general in hg19 becomes different from hg38. In all prior releases, MASK_general from hg19 and hg38 were merged and the merged set is recorded in both hg19 version and hg38 version. In the current version, this merging is abandoned.

masking_cols <- grep('MASK_',colnames(mcols(HM450.hg19.manifest.0304)), value = TRUE)
df <- rbind(
    mcols(HM450.hg19.manifest.0304) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(HM450.hg38.manifest.0304) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(HM450.hg19.manifest.0808) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(HM450.hg38.manifest.0808) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum)
)
rownames(df) <- c('hg19_0304', 'hg38_0304', 'hg19_0808', 'hg38_0808')
kable(t(df))
hg19_0304 hg38_0304 hg19_0808 hg38_0808
MASK_mapping 40517 41446 40490 41712
MASK_typeINextBaseSwitch 1030 1030 1030 1030
MASK_rmsk15 84789 84789 84789 84789
MASK_sub35_copy 10545 10545 10545 10545
MASK_sub30_copy 17568 17568 17568 17568
MASK_sub25_copy 60654 60654 60654 60654
MASK_sub40_copy 6419 6419 6419 6419
MASK_snp5_common 78601 78601 78601 78601
MASK_snp5_GMAF1p 24008 24008 24008 24008
MASK_extBase 105 105 105 105
MASK_general 65894 65894 65574 66041

Another difference is we now masked slightly more probes in MASK_mapping based on NM_A and NM_B tag.

df <- mcols(HM450.hg38.manifest.0808[HM450.hg38.manifest.0808$MASK_mapping & !HM450.hg38.manifest.0304$MASK_mapping])
df$probeID <- rownames(df)
df %>% as.data.frame %>% dplyr::select(probeID, contains('NM')) %>% arrange(-NM_A) %>% head(10)
##       probeID NM_A NM_B wDecoy_NM_A wDecoy_NM_B
## 1  cg04926385    4   NA           4          NA
## 2  cg03982074    3   NA           3          NA
## 3  cg10615091    3   NA           3          NA
## 4  cg10747483    3    3           3           3
## 5  cg22619763    2    2           2           2
## 6  cg10811045    2    2           2           2
## 7  cg05124441    2    2           2           2
## 8  cg20688289    2   NA           2          NA
## 9  cg05302531    2    2           2           2
## 10 cg10259889    2    2           2           2
table(pmax(df$NM_A, ifelse(is.na(df$NM_B),0,df$NM_B)))
## 
##   1   2   3   4 
## 241  21   3   1

On hg19, all the above probes were mapped without issue,

mcols(HM450.hg19.manifest.0808)[df$probeID,] %>% as.data.frame %>% dplyr::select(contains('NM')) %>% head(10)
##            NM_A NM_B wDecoy_NM_A wDecoy_NM_B
## cg24602020    0   NA           0          NA
## cg23887839    0   NA           0          NA
## cg16047567    0   NA           0          NA
## cg01097325    0   NA           0          NA
## cg12639866    0   NA           0          NA
## cg11526198    0   NA           0          NA
## cg19266779    0    0           0           0
## cg21873275    0    0           0           0
## cg15067806    0    0           0           0
## cg21404935    0    0           0           0

97 Probes interrogating the same CpG on hg38

As a matter of fact, under hg38 many probes have been mapped to interrogate the same CpG. Usually all but one of the probe are mapped with mismatches (NM>0).

cgBeg <- start(HM450.hg38.manifest.0808)
idx <- which(cgBeg[-1] == cgBeg[-length(cgBeg)])
length(idx)
## [1] 63
HM450.hg38.manifest.0808[sort(c(idx,idx+1))] %>% mcols %>% as.data.frame %>% 
    mutate(probeID=rownames(.)) %>%  filter(!grepl('\\.1', probeID)) %>%
    dplyr::select(probeID, chrm_A, probeBeg, probeEnd, NM_A, NM_B, designType) -> df
nrow(df)
## [1] 97
df
##       probeID chrm_A  probeBeg  probeEnd NM_A NM_B designType
## 1  cg16673286   chr1   1713347   1713396    2   NA         II
## 2  cg23611477   chr1   1713347   1713396    0   NA         II
## 3  cg20017108   chr1  13410326  13410375    1   NA         II
## 4  cg16566605   chr1  13410326  13410375    0   NA         II
## 5  cg10240587   chr1  13410761  13410810    2   NA         II
## 6  cg21410132   chr1  13410761  13410810    0   NA         II
## 7  cg26329816   chr1 120850952 120851001    1   NA         II
## 8  cg09137068   chr1 120850952 120851001    0   NA         II
## 9  cg06441514   chr1 144420861 144420910    1    2          I
## 10 cg13552272   chr1 144420862 144420911    0   NA         II
## 11 cg20294457   chr1 148152232 148152281    1   NA         II
## 12 cg10127479   chr1 148152231 148152280    0    0          I
## 13 cg03170665   chr2 113575870 113575919    0   NA         II
## 14 cg26523196   chr2 113575870 113575919    1   NA         II
## 15 cg13912721   chr3  75785760  75785809    1   NA         II
## 16 cg04092883   chr3  75785760  75785809    1   NA         II
## 17 cg11938051   chr3  75785760  75785809    0   NA         II
## 18 cg22509113   chr4   4226917   4226966    1    1          I
## 19 cg05936219   chr4   4226917   4226966    0    0          I
## 20 cg02334660   chr4  74446767  74446816    0   NA         II
## 21 cg26611070   chr4  74446767  74446816    1   NA         II
## 22 cg25593776   chr8 144147800 144147849    0    0          I
## 23 cg13177830   chr8 144147800 144147849    1    1          I
## 24 cg26712673   chr8 144277781 144277830    0    0          I
## 25 cg04746089   chr8 144277830 144277879    0   NA         II
## 26 cg10796141   chr9  40992232  40992281    1   NA         II
## 27 cg25976845   chr9  40992232  40992281    0   NA         II
## 28 cg06230664   chr9  41074402  41074451    1   NA         II
##  [ reached getOption("max.print") -- omitted 69 rows ]

EPIC

Download manifest files

dir.create('~/Downloads/tmp/20180808/', showWarnings = FALSE)
download.file('http://zwdzwd.io/InfiniumAnnotation/20180808/EPIC/EPIC.hg38.manifest.rds', '~/Downloads/tmp/20180808/EPIC.hg38.manifest.rds')
download.file('http://zwdzwd.io/InfiniumAnnotation/20180808/EPIC/EPIC.hg19.manifest.rds', '~/Downloads/tmp/20180808/EPIC.hg19.manifest.rds')
dir.create('~/Downloads/tmp/20180304/', showWarnings = FALSE)
download.file('http://zwdzwd.io/InfiniumAnnotation/20180304/EPIC/EPIC.hg38.manifest.rds', '~/Downloads/tmp/20180304/EPIC.hg38.manifest.rds')
download.file('http://zwdzwd.io/InfiniumAnnotation/20180304/EPIC/EPIC.hg19.manifest.rds', '~/Downloads/tmp/20180304/EPIC.hg19.manifest.rds')
EPIC.hg38.manifest.0304 <- readRDS('~/Downloads/tmp/20180304/EPIC.hg38.manifest.rds')
EPIC.hg19.manifest.0304 <- readRDS('~/Downloads/tmp/20180304/EPIC.hg19.manifest.rds')
EPIC.hg38.manifest.0808 <- readRDS('~/Downloads/tmp/20180808/EPIC.hg38.manifest.rds')[names(EPIC.hg38.manifest.0304)]
EPIC.hg19.manifest.0808 <- readRDS('~/Downloads/tmp/20180808/EPIC.hg19.manifest.rds')[names(EPIC.hg19.manifest.0304)]

Summary

There are 46 columns (annotations) and 865918 rows (probes) in the old manifest. There are 51 columns (annotations) and 865918 rows (probes) in the new manifest.

Strand of Mapping

The new manifest

strands <- table(EPIC.hg19.manifest.0808[EPIC.hg19.manifest.0808$designType=='II']$flag_A, EPIC.hg38.manifest.0808[EPIC.hg38.manifest.0808$designType=='II']$flag_A)
strands <- strands[,c(1,3,2)]
rownames(strands) <- c('Watson GRCh37','Crick GRCh37')
colnames(strands) <- c('Watson GRCh38','Crick GRCh38','unmapped GRCh38')
kable(strands)
Watson GRCh38 Crick GRCh38 unmapped GRCh38
Watson GRCh37 181681 180554 16
Crick GRCh37 180519 180980 10

The old manifest

strands <- table(EPIC.hg19.manifest.0304[EPIC.hg19.manifest.0304$designType=='II']$flag_A, EPIC.hg38.manifest.0304[EPIC.hg38.manifest.0304$designType=='II']$flag_A)
strands <- strands[,c(1,3,2)]
rownames(strands) <- c('Watson GRCh37','Crick GRCh37')
colnames(strands) <- c('Watson GRCh38','Crick GRCh38','unmapped GRCh38')
kable(strands)
Watson GRCh38 Crick GRCh38 unmapped GRCh38
Watson GRCh37 181681 180554 16
Crick GRCh37 180519 180980 10

Masking

The biggest change in this release is that the MASK_general in hg19 becomes different from hg38. In all prior releases, MASK_general from hg19 and hg38 were merged and the merged set is recorded in both hg19 version and hg38 version. In the current version, this merging is abandoned.

masking_cols <- grep('MASK_',colnames(mcols(EPIC.hg19.manifest.0304)), value = TRUE)
df <- rbind(
    mcols(EPIC.hg19.manifest.0304) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(EPIC.hg38.manifest.0304) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(EPIC.hg19.manifest.0808) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(EPIC.hg38.manifest.0808) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum)
)
rownames(df) <- c('hg19_0304', 'hg38_0304', 'hg19_0808', 'hg38_0808')
kable(t(df))
hg19_0304 hg38_0304 hg19_0808 hg38_0808
MASK_mapping 63508 65098 63483 65537
MASK_typeINextBaseSwitch 1051 1051 1051 1051
MASK_rmsk15 201251 201251 201251 201251
MASK_sub35_copy 14502 14502 14502 14502
MASK_sub30_copy 25194 25194 25194 25194
MASK_sub25_copy 88173 88173 88173 88173
MASK_sub40_copy 8747 8747 8747 8747
MASK_snp5_common 156200 156200 156200 156200
MASK_snp5_GMAF1p 42159 42159 42159 42159
MASK_extBase 171 171 171 171
MASK_general 108452 108452 107994 108716

Another difference is we now masked slightly more probes in MASK_mapping based on NM_A and NM_B tag.

df <- mcols(EPIC.hg38.manifest.0808[EPIC.hg38.manifest.0808$MASK_mapping & !EPIC.hg38.manifest.0304$MASK_mapping])
df$probeID <- rownames(df)
df %>% as.data.frame %>% dplyr::select(probeID, contains('NM')) %>% arrange(-NM_A) %>% head(10)
##       probeID NM_A NM_B wDecoy_NM_A wDecoy_NM_B
## 1  cg06449993    4   NA           4          NA
## 2  cg03982074    3   NA           3          NA
## 3  cg10615091    3   NA           3          NA
## 4  cg12381370    3   NA           3          NA
## 5  cg09761058    3   NA           3          NA
## 6  cg10747483    3    3           3           3
## 7  cg22619763    2    2           2           2
## 8  cg24771345    2   NA           2          NA
## 9  cg18041845    2   NA           2          NA
## 10 cg10811045    2    2           2           2
table(pmax(df$NM_A, ifelse(is.na(df$NM_B),0,df$NM_B)))
## 
##   1   2   3   4 
## 411  22   5   1

On hg19, all the above probes were mapped without issue,

mcols(EPIC.hg19.manifest.0808)[df$probeID,] %>% as.data.frame %>% dplyr::select(contains('NM')) %>% head(10)
##            NM_A NM_B wDecoy_NM_A wDecoy_NM_B
## cg24602020    0   NA           0          NA
## cg18667738    0   NA           0          NA
## cg16047567    0   NA           0          NA
## cg01097325    0   NA           0          NA
## cg18303314    0   NA           0          NA
## cg20967147    0   NA           0          NA
## cg07942324    0   NA           0          NA
## cg11526198    0   NA           0          NA
## cg19266779    0    0           0           0
## cg12746041    0   NA           0          NA

118 Probes interrogating the same CpG on hg38

As a matter of fact, under hg38 many probes have been mapped to interrogate the same CpG. Usually all but one of the probe are mapped with mismatches (NM>0).

cgBeg <- start(EPIC.hg38.manifest.0808)
idx <- which(cgBeg[-1] == cgBeg[-length(cgBeg)])
length(idx)
## [1] 77
EPIC.hg38.manifest.0808[sort(c(idx,idx+1))] %>% mcols %>% as.data.frame %>% 
    mutate(probeID=rownames(.)) %>%  filter(!grepl('\\.1', probeID)) %>%
    dplyr::select(probeID, chrm_A, probeBeg, probeEnd, NM_A, NM_B, designType) -> df
nrow(df)
## [1] 118
df
##        probeID chrm_A  probeBeg  probeEnd NM_A NM_B designType
## 1   cg20017108   chr1  13410326  13410375    1   NA         II
## 2   cg16566605   chr1  13410326  13410375    0   NA         II
## 3   cg23664334   chr1 121010280 121010329    0    0          I
## 4   cg06741726   chr1 121010279 121010328    1   NA         II
## 5   cg06441514   chr1 144420861 144420910    1    2          I
## 6   cg13552272   chr1 144420862 144420911    0   NA         II
## 7   cg01893993   chr1 146307505 146307554    1   NA         II
## 8   cg07100648   chr1 146307505 146307554    0   NA         II
## 9   cg12512196   chr1 146472583 146472632    1    1          I
## 10  cg06616806   chr1 146472583 146472632    0    0          I
## 11  cg20294457   chr1 148152232 148152281    1   NA         II
## 12  cg10127479   chr1 148152231 148152280    0    0          I
## 13  cg03170665   chr2 113575870 113575919    0   NA         II
## 14  cg26523196   chr2 113575870 113575919    1   NA         II
## 15  cg13912721   chr3  75785760  75785809    1   NA         II
## 16  cg11938051   chr3  75785760  75785809    0   NA         II
## 17  cg01972631   chr4  74441928  74441977    0   NA         II
## 18  cg01231417   chr4  74441928  74441977    1   NA         II
## 19  cg02334660   chr4  74446767  74446816    0   NA         II
## 20  cg26611070   chr4  74446767  74446816    1   NA         II
## 21  cg21205260   chr6    909186    909235    2   NA         II
## 22  cg14475265   chr6    909186    909235    0   NA         II
## 23  cg03481658   chr7   6011675   6011724    0   NA         II
## 24  cg14344864   chr7   6011675   6011724    1   NA         II
## 25  cg18779556   chr7 101681406 101681455    0   NA         II
## 26  cg26670578   chr7 101681406 101681455    1   NA         II
## 27  cg25593776   chr8 144147800 144147849    0    0          I
## 28  cg13177830   chr8 144147800 144147849    1    1          I
##  [ reached getOption("max.print") -- omitted 90 rows ]

HM27

Download manifest files

dir.create('~/Downloads/tmp/20180808/', showWarnings = FALSE)
download.file('http://zwdzwd.io/InfiniumAnnotation/20180808/hm27/hm27.hg38.manifest.rds', '~/Downloads/tmp/20180808/hm27.hg38.manifest.rds')
download.file('http://zwdzwd.io/InfiniumAnnotation/20180808/hm27/hm27.hg19.manifest.rds', '~/Downloads/tmp/20180808/hm27.hg19.manifest.rds')
dir.create('~/Downloads/tmp/20180304/', showWarnings = FALSE)
download.file('http://zwdzwd.io/InfiniumAnnotation/20180304/hm27/hm27.hg38.manifest.rds', '~/Downloads/tmp/20180304/hm27.hg38.manifest.rds')
download.file('http://zwdzwd.io/InfiniumAnnotation/20180304/hm27/hm27.hg19.manifest.rds', '~/Downloads/tmp/20180304/hm27.hg19.manifest.rds')
HM27.hg38.manifest.0304 <- readRDS('~/Downloads/tmp/20180304/hm27.hg38.manifest.rds')
HM27.hg19.manifest.0304 <- readRDS('~/Downloads/tmp/20180304/hm27.hg19.manifest.rds')
HM27.hg38.manifest.0808 <- readRDS('~/Downloads/tmp/20180808/hm27.hg38.manifest.rds')[names(HM27.hg38.manifest.0304)]
HM27.hg19.manifest.0808 <- readRDS('~/Downloads/tmp/20180808/hm27.hg19.manifest.rds')[names(HM27.hg19.manifest.0304)]

Summary

There are 46 columns (annotations) and 27578 rows (probes) in the old manifest. There are 51 columns (annotations) and 27578 rows (probes) in the new manifest.

Strand of Mapping

The new manifest

strands <- table(HM27.hg19.manifest.0808$flag_A, HM27.hg38.manifest.0808$flag_A)
strands <- strands[c(1,3,2),]
rownames(strands) <- c('Watson GRCh38','Crick GRCh38','unmapped GRCh38')
colnames(strands) <- c('Watson GRCh37','Crick GRCh37')
kable(strands)
Watson GRCh37 Crick GRCh37
Watson GRCh38 8056 5777
Crick GRCh38 5778 7964
unmapped GRCh38 1 2

The old manifest

strands <- table(HM27.hg19.manifest.0304$flag_A, HM27.hg38.manifest.0304$flag_A)
strands <- strands[c(1,3,2),]
rownames(strands) <- c('Watson GRCh38','Crick GRCh38','unmapped GRCh38')
colnames(strands) <- c('Watson GRCh37','Crick GRCh37')
kable(strands)
Watson GRCh37 Crick GRCh37
Watson GRCh38 8056 5777
Crick GRCh38 5778 7964
unmapped GRCh38 1 2

Masking

The biggest change in this release is that the MASK_general in hg19 becomes different from hg38. In all prior releases, MASK_general from hg19 and hg38 were merged and the merged set is recorded in both hg19 version and hg38 version. In the current version, this merging is abandoned.

masking_cols <- grep('MASK_',colnames(mcols(HM27.hg19.manifest.0304)), value = TRUE)
df <- rbind(
    mcols(HM27.hg19.manifest.0304) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(HM27.hg38.manifest.0304) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(HM27.hg19.manifest.0808) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum),
    mcols(HM27.hg38.manifest.0808) %>% as.data.frame %>% 
        dplyr::select(starts_with('MASK_')) %>% summarise_all(sum)
)
rownames(df) <- c('hg19_0304', 'hg38_0304', 'hg19_0808', 'hg38_0808')
kable(t(df))
hg19_0304 hg38_0304 hg19_0808 hg38_0808
MASK_mapping 1645 1659 1648 1674
MASK_typeINextBaseSwitch 195 195 195 195
MASK_rmsk15 3207 3207 3207 3207
MASK_sub35_copy 722 722 722 722
MASK_sub30_copy 893 893 893 893
MASK_sub25_copy 2709 2709 2709 2709
MASK_sub40_copy 619 619 619 619
MASK_snp5_common 2640 2640 2640 2640
MASK_snp5_GMAF1p 546 546 546 546
MASK_extBase 9 9 9 9
MASK_general 2443 2443 2442 2449

Another difference is we now masked slightly more probes in MASK_mapping based on NM_A and NM_B tag.

df <- mcols(HM27.hg38.manifest.0808[HM27.hg38.manifest.0808$MASK_mapping & !HM27.hg38.manifest.0304$MASK_mapping])
df$probeID <- rownames(df)
df %>% as.data.frame %>% dplyr::select(probeID, contains('NM')) %>% arrange(-NM_A) %>% head(10)
##       probeID NM_A NM_B wDecoy_NM_A wDecoy_NM_B
## 1  cg10615091    3    3           3           3
## 2  cg23424962    2    2           2           2
## 3  cg13019092    1    1           1           1
## 4  cg08307963    1    1           1           1
## 5  cg04344347    1    1           1           1
## 6  cg00684178    1    1           1           1
## 7  cg04172348    1    1           1           1
## 8  cg06518271    1    1           1           1
## 9  cg19921492    1    1           1           1
## 10 cg17630833    1    1           1           1
table(pmax(df$NM_A, ifelse(is.na(df$NM_B),0,df$NM_B)))
## 
##  1  2  3 
## 13  1  1

On hg19, all the above probes were mapped without issue,

mcols(HM27.hg19.manifest.0808)[df$probeID,] %>% as.data.frame %>% dplyr::select(contains('NM')) %>% head(10)
##            NM_A NM_B wDecoy_NM_A wDecoy_NM_B
## cg13019092    0    0           0           0
## cg08307963    0    0           0           0
## cg04344347    1    1           1           1
## cg00684178    0    0           0           0
## cg04172348    0    0           0           0
## cg06518271    0    0           0           0
## cg10615091    0    0           0           0
## cg19921492    1    1           1           1
## cg17630833    1    1           1           1
## cg23727583    0    0           0           0

24 Probes interrogating the same CpG on hg38

As a matter of fact, under hg38 many probes have been mapped to interrogate the same CpG. Usually all but one of the probe are mapped with mismatches (NM>0).

cgBeg <- start(HM27.hg38.manifest.0808)
idx <- which(cgBeg[-1] == cgBeg[-length(cgBeg)])
length(idx)
## [1] 12
HM27.hg38.manifest.0808[sort(c(idx,idx+1))] %>% mcols %>% as.data.frame %>% 
    mutate(probeID=rownames(.)) %>%  filter(!grepl('\\.1', probeID)) %>%
    dplyr::select(probeID, chrm_A, probeBeg, probeEnd, NM_A, NM_B, designType) -> df
nrow(df)
## [1] 24
df
##       probeID chrm_A  probeBeg  probeEnd NM_A NM_B designType
## 1  cg11037148   chr6  27831434  27831483    0    0          I
## 2  cg00634577   chr6  27831434  27831483    0    0          I
## 3  cg03790787   chr6 170612200 170612249    0    0          I
## 4  cg00578575   chr6 170612200 170612249    0    0          I
## 5  cg08539093   chr9 135546158 135546207    0    0          I
## 6  cg15092802   chr9 135546158 135546207    0    0          I
## 7  cg11673803  chr10  87094414  87094463    0    0          I
## 8  cg11891583  chr10  87094414  87094463    0    0          I
## 9  cg15105987  chr13 112067857 112067906    0    0          I
## 10 cg11208483  chr13 112067857 112067906    0    0          I
## 11 cg15910079  chr14  20891256  20891305    0    0          I
## 12 cg26191951  chr14  20891256  20891305    0    0          I
## 13 cg23663653  chr14 105742822 105742871    0    0          I
## 14 cg23988567  chr14 105742822 105742871    0    0          I
## 15 cg20657421  chr17  36103077  36103126    0    0          I
## 16 cg00896220  chr17  36103077  36103126    1    1          I
## 17 cg02043477  chr19  40851247  40851296    0    0          I
## 18 cg20075229  chr19  40851247  40851296    0    0          I
## 19 cg15408454   chrX 152698701 152698750    0    0          I
## 20 cg07545232   chrX 152698701 152698750    0    0          I
## 21 cg06899808   chrX 152698855 152698904    0    0          I
## 22 cg16390856   chrX 152698855 152698904    0    0          I
## 23 cg23509027   chrX 152733885 152733934    0    0          I
## 24 cg08977028   chrX 152733885 152733934    0    0          I