library(vcfR)
## Warning: package 'vcfR' was built under R version 4.2.2
##
## ***** *** vcfR *** *****
## This is vcfR 1.13.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
getwd()
## [1] "C:/Users/afhar/Desktop/Comp Bio/my_snps"
list.files()
## [1] "3.39417505-39657505.ALL.chr3_GRCh38.genotypes.20170504.vcf.gz"
## [2] "loaded-snp-data.html"
## [3] "loaded-snp-data.Rmd"
## [4] "loaded snp data.R"
## [5] "loaded snp data.Rmd"
vcf<-vcfR::read.vcfR(file ="3.39417505-39657505.ALL.chr3_GRCh38.genotypes.20170504.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 130
## header_line: 131
## variant count: 6889
## column count: 2513
##
Meta line 130 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 6889
## Character matrix gt cols: 2513
## skip: 0
## nrows: 6889
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant: 6889
## All variants processed
vcf
## ***** Object of Class vcfR *****
## 2504 samples
## 1 CHROMs
## 6,889 variants
## Object size: 134.6 Mb
## 0 percent missing data
## ***** ***** *****
This can all be ignored, I was messing around. #myID<-getID(vcf) #myID #nonunique<-duplicated(myID) #which(nonunique)
#vcf_test<-vcf #vcf_df<-extract.gt(vcf_test, IDtoRowNames = F) #vcf_df #summary(vcf) #tvcf<-t(vcf) #e_tvcf<-extract.gt(tvcf) #e_tvcf #gt_df<-extract.gt(vcf[,-1], #element = “GT”, #return.alleles = T) #summary(gt_df)
#help(“duplicated”)