library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.13.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
getwd()
## [1] "/Users/jooppereira/Downloads/Comp Bio/R and Rmd"
list.files(pattern= "22.2")
## [1] "22.21299928-21539928.ALL.chr22_GRCh38.genotypes.20170504.vcf.gz"
vcf01 <- read.vcfR(file = "22.21299928-21539928.ALL.chr22_GRCh38.genotypes.20170504.vcf.gz" )
## Scanning file to determine attributes.
## File attributes:
## meta lines: 130
## header_line: 131
## variant count: 1911
## column count: 2513
##
Meta line 130 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 1911
## Character matrix gt cols: 2513
## skip: 0
## nrows: 1911
## row_num: 0
##
Processed variant 1000
Processed variant: 1911
## All variants processed
my_snps<- vcfR::read.vcfR("22.21299928-21539928.ALL.chr22_GRCh38.genotypes.20170504.vcf.gz",
convertNA = T)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 130
## header_line: 131
## variant count: 1911
## column count: 2513
##
Meta line 130 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 1911
## Character matrix gt cols: 2513
## skip: 0
## nrows: 1911
## row_num: 0
##
Processed variant 1000
Processed variant: 1911
## All variants processed
head(my_snps)
## [1] "***** Object of class 'vcfR' *****"
## [1] "***** Meta section *****"
## [1] "##fileformat=VCFv4.1"
## [1] "##FILTER=<ID=PASS,Description=\"All filters passed\">"
## [1] "##fileDate=20150218"
## [1] "##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/refe [Truncated]"
## [1] "##source=1000GenomesPhase3Pipeline"
## [1] "##contig=<ID=1,assembly=b37,length=249250621>"
## [1] "First 6 rows."
## [1]
## [1] "***** Fixed section *****"
## CHROM POS ID REF ALT QUAL FILTER
## [1,] "22" "21301490" "rs554578373" "G" "A" "100" "PASS"
## [2,] "22" "21301918" "rs568193212" "T" "C" "100" "PASS"
## [3,] "22" "21303053" "rs552989884" "A" "AAAAT" "100" "PASS"
## [4,] "22" "21306280" "rs534078446" "A" "C" "100" "PASS"
## [5,] "22" "21306539" "rs554333566" "C" "T" "100" "PASS"
## [6,] "22" "21306546" "rs577325004" "G" "A" "100" "PASS"
## [1]
## [1] "***** Genotype section *****"
## FORMAT HG00096 HG00097 HG00099 HG00100 HG00101
## [1,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [2,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [3,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [4,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [5,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [6,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [1] "First 6 columns only."
## [1]
## [1] "Unique GT formats:"
## [1] "GT"
## [1]