library(tidyverse)
x<-read.csv("~/Downloads/suptablu.csv",header=T)
Calculate Genotype Stats by accession
Per accession genotype concordance (excluding hets) for all sites and only Sanger alt
sites. Then merge.
accession_all<-mutate(x,correct=ifelse(I_geno==S_geno,1,0)) %>%
filter(!grepl("/",I_geno)) %>%
group_by(Accession) %>%
summarize(accuracy=mean(correct))
accession_alt<-mutate(x,correct=ifelse(I_geno==S_geno,1,0)) %>%
group_by(Accession) %>%
filter(Sanger=="alt",!grepl("/",I_geno)) %>%
summarize(alt_accuracy=mean(correct))
accession_dat<-merge(accession_all,accession_alt,by="Accession")
Per SNP accuracies (excluding hets); merge.
snp_alt<-mutate(x,correct=ifelse(I_geno==S_geno,1,0)) %>%
group_by(SNP) %>%
filter(Sanger=="alt",!grepl("/",I_geno)) %>%
summarize(alt_accuracy=mean(correct))
snp_all<-mutate(x,correct=ifelse(I_geno==S_geno,1,0)) %>%
group_by(SNP) %>%
filter(!grepl("/",I_geno)) %>%
summarize(accuracy=mean(correct))
snp_dat<-merge(snp_all,snp_alt,by="SNP") %>% gather(type,value,c(accuracy,alt_accuracy))
Heterozygosity per accession, merge with accuracy
accession_het<-mutate(x,het=ifelse(grepl("/",I_geno),1,0)) %>%
group_by(Accession) %>%
summarize(hets=mean(het))
accession_dat<-merge(accession_dat,accession_het,by="Accession")
Read sequencing depth data
y<-read.csv("~/Desktop/suptablu3.csv",header=T)
Merge with genotypes
xy<-merge(accession_dat,y,by=1)
xyz<-gather(xy,type,value,c(accuracy,alt_accuracy))
Graphs
Accuracy by subspecies – maize better than teosinte
ggplot(xyz,aes(y=value,color=Subspecies))+
geom_boxplot(aes(group=Subspecies))+
facet_wrap(~type)+
theme_minimal()

Accuracy vs depth – not much here
ggplot(xyz,aes(x=Average.depth,y=value,color=Subspecies))+
geom_point()+
geom_smooth(method="lm",se=FALSE)+
facet_wrap(~type)+
theme_minimal()

Het calls by subspecies
ggplot(xy,aes(y=hets,color=Subspecies))+
geom_boxplot(aes(group=Subspecies))+theme_minimal()

Accuracy by SNP
ggplot(snp_dat,aes(x=SNP,y=value,color=type))+
geom_point()+
theme_minimal()
