library(vcfR)
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.13.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
getwd()
## [1] "/Users/jooppereira/Downloads/Comp Bio/R and Rmd"
list.files(pattern= "22.2")
## [1] "22.21299928-21539928.ALL.chr22_GRCh38.genotypes.20170504.vcf.gz"
vcf01 <- read.vcfR(file = "22.21299928-21539928.ALL.chr22_GRCh38.genotypes.20170504.vcf.gz" )
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 130
##   header_line: 131
##   variant count: 1911
##   column count: 2513
## 
Meta line 130 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 1911
##   Character matrix gt cols: 2513
##   skip: 0
##   nrows: 1911
##   row_num: 0
## 
Processed variant 1000
Processed variant: 1911
## All variants processed
 my_snps<- vcfR::read.vcfR("22.21299928-21539928.ALL.chr22_GRCh38.genotypes.20170504.vcf.gz",
                             convertNA = T)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 130
##   header_line: 131
##   variant count: 1911
##   column count: 2513
## 
Meta line 130 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 1911
##   Character matrix gt cols: 2513
##   skip: 0
##   nrows: 1911
##   row_num: 0
## 
Processed variant 1000
Processed variant: 1911
## All variants processed
head(my_snps)
## [1] "***** Object of class 'vcfR' *****"
## [1] "***** Meta section *****"
## [1] "##fileformat=VCFv4.1"
## [1] "##FILTER=<ID=PASS,Description=\"All filters passed\">"
## [1] "##fileDate=20150218"
## [1] "##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/refe [Truncated]"
## [1] "##source=1000GenomesPhase3Pipeline"
## [1] "##contig=<ID=1,assembly=b37,length=249250621>"
## [1] "First 6 rows."
## [1] 
## [1] "***** Fixed section *****"
##      CHROM POS        ID            REF ALT     QUAL  FILTER
## [1,] "22"  "21301490" "rs554578373" "G" "A"     "100" "PASS"
## [2,] "22"  "21301918" "rs568193212" "T" "C"     "100" "PASS"
## [3,] "22"  "21303053" "rs552989884" "A" "AAAAT" "100" "PASS"
## [4,] "22"  "21306280" "rs534078446" "A" "C"     "100" "PASS"
## [5,] "22"  "21306539" "rs554333566" "C" "T"     "100" "PASS"
## [6,] "22"  "21306546" "rs577325004" "G" "A"     "100" "PASS"
## [1] 
## [1] "***** Genotype section *****"
##      FORMAT HG00096 HG00097 HG00099 HG00100 HG00101
## [1,] "GT"   "0|0"   "0|0"   "0|0"   "0|0"   "0|0"  
## [2,] "GT"   "0|0"   "0|0"   "0|0"   "0|0"   "0|0"  
## [3,] "GT"   "0|0"   "0|0"   "0|0"   "0|0"   "0|0"  
## [4,] "GT"   "0|0"   "0|0"   "0|0"   "0|0"   "0|0"  
## [5,] "GT"   "0|0"   "0|0"   "0|0"   "0|0"   "0|0"  
## [6,] "GT"   "0|0"   "0|0"   "0|0"   "0|0"   "0|0"  
## [1] "First 6 columns only."
## [1] 
## [1] "Unique GT formats:"
## [1] "GT"
## [1]