Loading snp data

library(vcfR)

## Warning: package 'vcfR' was built under R version 4.2.2

## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.13.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****

getwd()

## [1] "C:/Users/afhar/Desktop/Comp Bio/my_snps"

list.files()

## [1] "3.39417505-39657505.ALL.chr3_GRCh38.genotypes.20170504.vcf.gz"
## [2] "loaded-snp-data.html"                                         
## [3] "loaded-snp-data.Rmd"                                          
## [4] "loaded snp data.R"                                            
## [5] "loaded snp data.Rmd"

vcf<-vcfR::read.vcfR(file ="3.39417505-39657505.ALL.chr3_GRCh38.genotypes.20170504.vcf.gz")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 130
##   header_line: 131
##   variant count: 6889
##   column count: 2513
## 
Meta line 130 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 6889
##   Character matrix gt cols: 2513
##   skip: 0
##   nrows: 6889
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant: 6889
## All variants processed

vcf

## ***** Object of Class vcfR *****
## 2504 samples
## 1 CHROMs
## 6,889 variants
## Object size: 134.6 Mb
## 0 percent missing data
## *****        *****         *****

This can all be ignored, I was messing around. #myID<-getID(vcf) #myID #nonunique<-duplicated(myID) #which(nonunique)

#vcf_test<-vcf #vcf_df<-extract.gt(vcf_test, IDtoRowNames = F) #vcf_df #summary(vcf) #tvcf<-t(vcf) #e_tvcf<-extract.gt(tvcf) #e_tvcf #gt_df<-extract.gt(vcf[,-1], #element = “GT”, #return.alleles = T) #summary(gt_df)

#help(“duplicated”)

Loading snp data

2022-11-28