load libraries

the VCF was filtered :

vcftools --vcf snponly_freebayes.vcf --chr NW_019211454.1 --chr NW_019211455.1 --chr NW_019211456.1 --chr NW_019211457.1 --chr NW_019211458.1 --chr NW_019211459.1 --chr NW_019211460.1 --max-alleles 2 --minQ 15000 --minDP 16 --maxDP 40 --max-missing 0.5 --maf 0.2 --recode --recode-INFO-all --out Q15000BIALLDP16HDP40mis.5maf.2Chr7  

load VCF file

vcf <- read.vcfR("/Users/nuriteliash/Documents/GitHub/varroa-linkage-map/data/vcf_filter/minQ_filter/Q15000BIALLDP16HDP40mis.5maf.2Chr7.recode.vcf", verbose = FALSE )
vcf
## ***** Object of Class vcfR *****
## 223 samples
## 7 CHROMs
## 33,925 variants
## Object size: 272.8 Mb
## 0 percent missing data
## *****        *****         *****
# extract the genotype for each site in each individual
gt <- extract.gt(vcf, element = "GT") 
gt <- as.data.frame(t(gt)) %>%
  rownames_to_column("ID")
#clean the ID names
gt$ID <- sub("_[^_]+$", "", gt$ID)

table <-gt %>% 
  t() %>%
  as.data.frame() %>%
  row_to_names(row_number = 1) %>% 
  dplyr::select(contains(c("son", "dat", "fnd"))) # keep only adults of F0, F1 and F2 

# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
  str_extract("[^_]+")  %>%
  unique()

Main figures

Figure 2. F1 male (0/0) x Female (0/1)

Crossing homozygotic male, with heterozygotic Female (5) F1 male (0/0) x Female (0/1) original Rmd: ~/Documents/GitHub/varroa-linkage-map/R_scripts/explore_vcf_F1_F2.Rmd

# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
    dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/0")) %>% # force F0 Female to be homo, like her son
  dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/0")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
  mutate(total = as.numeric(sum(n))) %>%
  mutate(sex = case_when(
    grepl("son", sample) ~ "Male",
    grepl("dat", sample) ~ "Female"))
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1")))
  
# define the group of abnormal males
abnorm_males = tibble(sample = c("412_413a_grnson", "400_401a_grnson", "458_459a_grnson", "240_241c_grnson", "426_427b_grnson",  "300_301a_grnson"), sex = "Male", normality = "abnormal")

# or only the males that are abnormal for this cross (00x01): matrnal prop<0.75
#abnorm_males = tibble(sample = c("412_413a_grnson", "400_401a_grnson", "458_459a_grnson"), sex = "Male", normality = "abnormal")

samples_obs <- left_join(samples, observed, by=c("sample","gt")) %>% mutate(sex = case_when(
    grepl("son", sample) ~ "Male",
    grepl("dat", sample) ~ "Female")) %>%
    group_by(sample) %>%
    replace(is.na(.), 0) %>%
    mutate(total = as.numeric(sum(n))) %>%
    dplyr::mutate(prop = n/total) %>%
    left_join(abnorm_males, by = "sample") %>% 
    dplyr::select(-sex.y) %>% 
    replace(is.na(.), "normal") %>%
    dplyr::rename(sex = sex.x) %>%
    unite("type", sex,normality, remove = FALSE)

# make a table with the expected proportions for the different modes of reproduction:
Haploid = data.frame(mode = "Haploid", gt = c("0/0", "0/1", "1/1"),prop = c(0.5, 0, 0.5))
Automixis = data.frame(mode = "Automixis", gt = c("0/0", "0/1", "1/1"),prop = c(0, 1, 0))
Sexual = data.frame(mode = "Sexual",gt = c("0/0", "0/1", "1/1"),prop = c(0.5, 0.5, 0))

modes = rbind(Haploid,Automixis,Sexual)

Pooled sites

samples_obs_forPlot = samples_obs %>% 
  dplyr::filter(total>=10) %>%
  mutate(class = "Observed") %>%
  dplyr::filter(normality == "normal") %>%
  select(c("sample","gt", "n","total", "sex","prop","class")) %>%
  rename(type = sex)
  
modes_forPlot = modes %>% 
  mutate(class = "Expected") %>%
    rename(type = mode)

pooled_obs_count_00_01 =  samples_obs %>% 
    dplyr::filter(total>=10) %>%
    dplyr::filter(normality == "normal") %>%
    filter(gt =="0/0") %>%
    group_by(sex) %>% 
    mutate(sites = sum(total)) %>% 
   mutate(count_sample = n_distinct(sample)) %>%
   select(c(sex,sites,count_sample)) %>% 
    unique()

pooled_obs_count_00_01
## # A tibble: 2 × 3
## # Groups:   sex [2]
##   sex    sites count_sample
##   <chr>  <dbl>        <int>
## 1 Male    5561           12
## 2 Female  4942           15
df_00_01 = rbind(samples_obs_forPlot, modes_forPlot) 

plot

# (1) 
# order the x axis text 
df_00_01$type <- factor(df_00_01$type, level=c("Haploid" ,"Sexual","Automixis","Female","Male"))

# change the labales 
my_strip_labels <- as_labeller(c(
  "Expected" = "A. Expected proportions, 
different inheritance modes",
  "Observed" = "B. Observed proportions, 
pooled sites"))

# (2) or reverse plot: 
# order the x axis text 
df_00_01$type <- factor(df_00_01$type, level=c("Female","Male","Sexual","Automixis", "Haploid"))
df_00_01$class <- factor(df_00_01$class, level=c("Observed","Expected"))

# change the labales 
my_strip_labels <- as_labeller(c(
  "Observed" = "A. Observed proportions, 
    pooled sites",
"Expected" = "B. Expected proportions, 
    hypothesized inheritance modes"))

df_00_01 %>% ggplot(aes(fill=gt, y=prop, x=type)) + 
    geom_bar(position="fill", stat="identity", ) +
    ylab("Genotype proportion") +
    labs(title = "Offspring genotype, crossing (0/0) male x (0/1) Female") +    
    labs(fill = "Genotype") + 
    theme_classic() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(size=12),
          axis.text.y = element_text(size=12),
          strip.text.x = element_text(size = 12)) +
   scale_fill_manual(values=c("#ffbf00", "#66b032","#1982c4"))+
   scale_x_discrete(labels=c("Female" = "15 Females 
4,942 sites", "Male" = "12 males
5,561 sites",
"Haploid" = "Haploid 
parthenogenesis", 
"Sexual" = "Sexual
reproduction","Automixis" = "Diploid
parthenogenesis 
(Automixis)")) + # change the x axis text to be more informative:
   theme(legend.position = "right", 
            strip.text.x = element_text(angle = 0, hjust = 0)) +
   facet_grid(.~class, scales = 'free',space = 'free',drop = F,
              labeller = my_strip_labels) # add labels 

#only observed
df_00_01 %>% dplyr::filter(class == "Observed") %>%
ggplot(aes(fill=gt, y=prop, x=type)) + 
    geom_bar(position="fill", stat="identity", ) +
    ylab("Genotype proportion") +
    labs(title = "Offspring genotype, crossing (0/0) male x (0/1) Female") +    
    labs(fill = "Genotype") + 
    theme_classic() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(size=12),
          axis.text.y = element_text(size=12),
          strip.text.x = element_text(size = 12)) +
   scale_fill_manual(values=c("#ffbf00", "#66b032","#1982c4"))+
   scale_x_discrete(breaks=c("Female","Male"),
        labels=c(paste(filter(pooled_obs_count_00_01, sex == 'Female')$count_sample, " Females,\n",
filter(pooled_obs_count_00_01, sex == 'Female')$sites, "sites"), 
paste(filter(pooled_obs_count_00_01, sex == 'Male')$count_sample, " males,\n",
filter(pooled_obs_count_00_01, sex == 'Male')$sites, "sites")))

# what is the percentage of maternal genoytpe in the pooled data?
stat_00_01 = df_00_01 %>% dplyr::filter(class == "Observed") %>% 
  dplyr::filter(type == "Male") %>% dplyr::filter(gt == "0/1") %>% 
  stat.desc() %>%
  rownames_to_column("parameter") %>% 
  dplyr::filter(parameter == "sum") %>%
  dplyr::mutate(prop = n/total)
  

# plot distribution of prop
#df_00_01 %>% dplyr::filter(class == "Observed") %>%
 # ggplot(aes(x = type, y = prop, colour = gt)) + 
  #geom_boxplot(outlier.shape = NA, coef=0 ) +
  #geom_jitter(width=0.1, size=2) +
  #scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
  #theme_bw() +
#scale_shape_manual(values=c(1,19)) +
 # ggtitle('F2 offspring of F1 male (0/0) x Female (0/1)') +
  #  facet_wrap( ~gt)  +  guides(color = "none") +
 #theme(plot.margin = margin(1, 1, 1, 1, "cm"))

Figure 3. F1 male (0/1) x Female (0/0)

Crossing homozygotic male, with heterozygotic Female (7) F1 male (0/1) x Female (0/0) original Rmd: ~/Documents/GitHub/varroa-linkage-map/R_scripts/explore_vcf_F1_F2.Rmd

# define a list to put all the data frames in
obs <- list()

for (fam in family) {
 
obs[[fam]] <- table %>%
  dplyr::select(starts_with(fam)) %>%
    dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/1")) %>% # force F0 Female to be homo, like her son
  dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
  dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/0")) %>% 
  dplyr::select(contains("grn")) %>%
  tidyr::pivot_longer(everything())  %>% 
  dplyr::rename(sample = name, gt = value) %>%
  dplyr::count(sample, gt, .drop = FALSE) %>% 
  dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
  mutate(n = as.numeric(n)) %>%
  group_by(sample) %>%
  mutate(total = as.numeric(sum(n))) %>%
  mutate(sex = case_when(
    grepl("son", sample) ~ "Male",
    grepl("dat", sample) ~ "Female"))
}

# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1")))
  
# define the group of abnormal males based on the overall analsys: (~/Documents/GitHub/varroa-linkage-map/R_scripts/pedigree-hetero-count-Q15000.Rmd)
abnorm_males = tibble(sample = c("412_413a_grnson",  "400_401a_grnson", "458_459a_grnson", "240_241c_grnson", "426_427b_grnson",  "300_301a_grnson"), sex = "Male", normality = "abnormal")

# for this specific cross, there are no abnormal males: matrnal prop<0.75

samples_obs <- left_join(samples, observed, by=c("sample","gt")) %>% mutate(sex = case_when(
    grepl("son", sample) ~ "Male",
    grepl("dat", sample) ~ "Female")) %>%
    group_by(sample) %>%
    replace(is.na(.), 0) %>%
    mutate(total = as.numeric(sum(n))) %>%
    dplyr::mutate(prop = n/total) %>%
    left_join(abnorm_males, by = "sample") %>% 
    dplyr::select(-sex.y) %>% 
    replace(is.na(.), "normal") %>%
    dplyr::rename(sex = sex.x) %>%
    unite("type", sex,normality, remove = FALSE)

# make a table with the expected proportions for the different modes of reproduction:
Haploid = data.frame(mode = "Haploid", gt = c("0/0", "0/1", "1/1"),prop = c(1, 0, 0))
Automixis = data.frame(mode = "Automixis", gt = c("0/0", "0/1", "1/1"),prop = c(1, 0, 0))
Sexual = data.frame(mode = "Sexual",gt = c("0/0", "0/1", "1/1"),prop = c(0.5, 0.5, 0))

modes = rbind(Haploid,Automixis,Sexual)

Pooled sites

samples_obs_forPlot = samples_obs %>% 
  dplyr::filter(total>=10) %>%
  mutate(class = "Observed") %>%
  dplyr::filter(normality == "normal") %>%
  select(c("sample","gt", "total", "sex","prop","class")) %>%
  rename(type = sex)
  
modes_forPlot = modes %>% 
  mutate(class = "Expected") %>%
    rename(type = mode)

pooled_obs_count_01_00 =  samples_obs %>% 
    dplyr::filter(total>=10) %>%
    dplyr::filter(normality == "normal") %>%
    filter(gt =="0/0") %>%
    group_by(sex) %>% 
    mutate(sites = sum(total)) %>% 
   mutate(count_sample = n_distinct(sample)) %>%
   select(c(sex,sites,count_sample)) %>% 
    unique()

pooled_obs_count_01_00
## # A tibble: 2 × 3
## # Groups:   sex [2]
##   sex    sites count_sample
##   <chr>  <dbl>        <int>
## 1 Male    1949           14
## 2 Female  2465           14
df_01_00 = rbind(samples_obs_forPlot, modes_forPlot) 

# order the x axis text 
df_01_00$type <- factor(df_01_00$type, level=c("Haploid" ,"Sexual","Automixis","Female","Male"))
df_01_00$class <- factor(df_01_00$class, level=c("Observed","Expected"))

# change the labales 
my_strip_labels <- as_labeller(c(
  "Observed" = "A. Observed proportions, 
     pooled sites",
"Expected" = "B. Expected proportions, 
     hypothesized inheritance modes"))

df_01_00 %>% ggplot(aes(fill=gt, y=prop, x=type)) + 
    geom_bar(position="fill", stat="identity", ) +
    ylab("Genotype proportion") +
   # labs(title = "Offspring genotype, crossing (0/1) male x (0/0) Female") +    
    labs(fill = "Genotype") + 
    theme_classic() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(size=12),
          axis.text.y = element_text(size=12),
          strip.text.x = element_text(size = 12)) +
   scale_fill_manual(values=c("#ffbf00", "#66b032","#1982c4"))+
   scale_x_discrete(labels=c("Haploid" = "Haploid 
parthenogenesis", 
"Sexual" = "Sexual
reproduction","Automixis" = "Diploid
parthenogenesis 
(Automixis)", "Female" = "14 Females 
2,465 sites", "Male" = "14 males
1,949 sites")) + # change the x axis text to be more informative:
   theme(legend.position = "right",
          strip.text.x = element_text(angle = 0, hjust = 0)) +
   facet_grid(.~class, scales = 'free',space = 'free',drop = F,
              labeller = my_strip_labels) # add labels 

#only observed
df_01_00 %>% dplyr::filter(class == "Observed") %>%
  ggplot(aes(fill=gt, y=prop, x=type)) + 
    geom_bar(position="fill", stat="identity", ) +
    ylab("Genotype proportion") +
  # labs(title = "F2 genotype, crossing (0/1) male x (0/0) Female") +    
    labs(fill = "Genotype") + 
    theme_classic() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(size=12),
          axis.text.y = element_text(size=12),
          strip.text.x = element_text(size = 12)) +
   scale_fill_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
   scale_x_discrete(breaks=c("Female","Male"),
        labels=c(paste(filter(pooled_obs_count_01_00, sex == 'Female')$count_sample, " Females,\n",
filter(pooled_obs_count_01_00, sex == 'Female')$sites, "sites"), 
paste(filter(pooled_obs_count_01_00, sex == 'Male')$count_sample, " males,\n",
filter(pooled_obs_count_01_00, sex == 'Male')$sites, "sites")))

Figure 4A. Flow-cytometry

original Rmd: ~/Documents/GitHub/varroa_ploidy/R_scripts/varroa-ploidy.Rmd

data <- read.csv("/Users/nuriteliash/Documents/GitHub/varroa_ploidy/data/ploidy.csv") %>%
  dplyr::mutate(Family = as.character(Family)) %>%
  dplyr::mutate(stage_mature = case_when(
    grepl("adult", Stage) ~ "Mature",
    !grepl("adult", Stage) ~ "Imature")) 

# change the labales 
my_strip_labels <- as_labeller(c(
  "Imature" = "Imature mites
n = 37",
  "Mature" = "Mature mites
n = 38"))

# histogram plot:
p_ploidy_hist = data %>% dplyr::filter(body.part == "Body") %>%
 ggplot(aes(x=Ploidy, color = Sex, fill = Sex)) + 
  geom_histogram(position="dodge") +
   geom_density(alpha=.2) +
  ylab("Count") +
theme_classic2() +
     theme(legend.position = "none") +
ggtitle("Varroa mite ploidy count
Flow-cytometry") +
   facet_grid(.~stage_mature, space = 'free',drop = F,
              labeller = my_strip_labels) # add labels 

# density plot:
p_ploidy_dens = data %>% dplyr::filter(body.part == "Body") %>%
 ggplot(aes(x=Ploidy, color = Sex, fill = Sex)) + 
 # geom_histogram(position="dodge") +
   geom_density(alpha=.2) +
    ylab("Density") +
theme_classic2() +
     theme(legend.position = "none") +
ggtitle("Varroa mite ploidy
Flow-cytometry") +
   facet_grid(.~stage_mature, space = 'free',drop = F,
              labeller = my_strip_labels) # add labels 

#density plot with rug
p_ploidy_dens_rug = data %>% dplyr::filter(body.part == "Body") %>%
    ggplot(aes(x=Ploidy, color = Sex, fill = Sex)) + 
   geom_density(alpha=.2) +
   geom_rug(aes(x=Ploidy,color = Sex), alpha=0.9, size=1.5,length = unit(0.05, "npc"))+
      ylab("Density") +
      theme_classic2() +
      theme(legend.position = "none") +
      ggtitle("Varroa mite ploidy
Flow-cytometry") +
    facet_grid(.~stage_mature, space = 'free',drop = F,
              labeller = my_strip_labels) # add labels 

Figure 4B. Karyotyping

original Rmd: ~/Documents/GitHub/varroa-karyotyping/varroa karyotyping.Rmd

dat <- read.csv("/Users/nuriteliash/Documents/GitHub/varroa-karyotyping/karyo_varroa.csv")
summary(dat)
##        ID            Sex               stage             cell_count    
##  Min.   :30.00   Length:357         Length:357         Min.   : 0.000  
##  1st Qu.:63.00   Class :character   Class :character   1st Qu.: 0.000  
##  Median :79.00   Mode  :character   Mode  :character   Median : 0.000  
##  Mean   :70.29                                         Mean   : 1.454  
##  3rd Qu.:89.00                                         3rd Qu.: 2.000  
##  Max.   :96.00                                         Max.   :25.000  
##    cell_perc      chromo_number
##  Min.   : 0.000   Min.   : 4   
##  1st Qu.: 0.000   1st Qu.: 9   
##  Median : 0.000   Median :14   
##  Mean   : 4.762   Mean   :14   
##  3rd Qu.: 6.452   3rd Qu.:19   
##  Max.   :58.824   Max.   :24
# how many males and Females?
mite_counts = tibble(males = n_distinct(filter(dat, Sex=="male")$ID), 
           Females = n_distinct(filter(dat, Sex=="Female")$ID)) 

mite_counts
## # A tibble: 1 × 2
##   males Females
##   <int>   <int>
## 1     8       0
dat_karyo = dat %>% 
   dplyr::mutate(chromo_number = as.numeric(chromo_number)) %>%
  expandRows("cell_count", drop = FALSE) %>% 
  dplyr::mutate(Indx = row_number(cell_count)) %>%
  group_by(ID) %>%
dplyr::mutate(median_count = median(chromo_number)) %>%
    dplyr::mutate(stage_mature = "Imature mites
n = 17")

# change the labales 
#my_strip_labels <- as_labeller("Imature" = "Imature mites
#n = 17")

p_kar_dens = dat_karyo %>% 
 ggplot(aes(x=chromo_number, color = Sex, fill = Sex)) + 
 # geom_histogram(position="dodge") +
  geom_density(alpha=.2) +
    ylab("Density") +
    xlab("Chromosome count") +
theme_classic2() +
     theme(legend.position = "none") +
ggtitle("Varroa mite chromosome count
Karyotyping") +
    facet_grid(.~stage_mature, space = 'free',drop = F)
  

#density plot with rug
p_karyo_dens_rug = dat_karyo %>% 
 ggplot(aes(x=chromo_number, color = Sex, fill = Sex)) + 
   geom_rug(aes(x=chromo_number,color = Sex), alpha=0.9, size=1.5,  length = unit(0.05, "npc"))+
  geom_density(alpha=.2) +
    ylab("Density") +
    xlab("Chromosome count") +
theme_classic2() +
     theme(legend.position = "none") +
ggtitle("Varroa mite chromosome count
Karyotyping") +
    facet_grid(.~stage_mature, space = 'free',drop = F)

# or histogram:
p_karyo_hist = dat_karyo %>% 
 ggplot(aes(x=chromo_number, color = Sex, fill = Sex)) + 
 geom_density(alpha=.2) +
   geom_histogram(position="dodge", binwidth = 1) +
  ylab("Count") +
    xlab("Chromosome count") +
theme_classic2() +
     theme(legend.position = "none") +
ggtitle("Varroa mite chromosome count
Karyotyping") 

plot Karyotyping and flow cytometry together

Figure 4. Ploidy assessment

ggarrange(p_ploidy_dens_rug, p_karyo_dens_rug, labels = c("A", "B"),
  common.legend = TRUE, legend = "bottom",  widths = c(1, 0.65))

other possible plots for Figure 4

ggarrange(p_ploidy_dens, p_kar_dens, labels = c("A", "B"),
  common.legend = TRUE, legend = "bottom")

ggarrange(p_ploidy_hist, p_karyo_hist, labels = c("A", "B"),
  common.legend = TRUE, legend = "bottom")

Supp figures

Determnie the variant quality threshold

original Rmd: ~/Documents/GitHub/varroa-linkage-map/R_scripts/exceptional-sites.Rmd

The first metric we will look at is the (Phred encoded) site quality. This is a measure of how much confidence we have in our variant calls. First of all, we read in the site quality report we generated using vcftools. We will use the read_delim command from the readr package (part of the the tidyverse) because it is more efficient for reading in large datafiles. It also allows us to set our own column names.

qual_var <- read_delim("/Users/nuriteliash/Documents/GitHub/varroa-linkage-map/data/vcf_filter/Q40BIALLDP16HDP40mis.5Chr7/Q40BIALLDP16HDP40mis.5Chr7.lqual", delim = "\t",
           col_names = c("chr", "pos", "qual"), skip = 1) %>%
   unite(site, c("chr", "pos"))

  ggplot(qual_var, aes(qual)) +
  geom_density(fill = "dodgerblue1", colour = "black", alpha = 0.3) + theme_classic2() +
  ggtitle("Variant quality distribution") +
 scale_x_continuous(n.breaks = 12) +
    theme(axis.text.x=element_text(angle=90,hjust=1)) +
      xlab("Variant quality") + 
      ylab("Density") +
   geom_vline(xintercept = 15000, color = "red")

the quality is same for individuals, - its already normalized.

Supp Figure 1. all F2 mites genotypes

The remaining males (~25%) inherit alleles from mother and father, suggesting sexual reproduction

** in progress


Heterozygosity per individual, DNA

varroa mite DNA

originally from ~/Documents/GitHub/varroa-linkage-map/R_scripts/LinkMap malesHet.Rmd

ind_het <- read_delim("/Users/nuriteliash/Documents/GitHub/varroa-linkage-map/data/vcf_filter/Q40BIALLDP16HDP40mis.5Chr7/Q40BIALLDP16HDP40mis.5Chr7.het", delim = "\t",
           col_names = c("ind","homo_ob", "homo_ex", "nsites", "f"), skip = 1)

# find all 'Female' and 'male' 
Male<- grep("son",ind_het$ind)
Female <- grep("dat|fn|sis",ind_het$ind)

ind_het_sex <- ind_het %>%
  mutate(sex = ifelse(row_number() %in% Female, "Female", ifelse(row_number() %in% Male, "Male", "not-determined"))) %>%
  mutate(hom_prop =  homo_ob/nsites) %>%
  mutate(het_prop = (nsites-homo_ob)/nsites) %>%
#keep only adult mites, for which sex is absolutly determined (exclude nymphs and eggs ("not determined"))
 dplyr::filter(sex %in% c("Male", "Female"))

 # is there a significant difference in the proportion of heterozygotic sites between males and Females?5
wil_var <- wilcox.test(het_prop ~ sex, alternative = "two.sided", data = ind_het_sex)
t_test <- t.test(asin(sqrt(het_prop)) ~ sex, alternative = "two.sided", data = ind_het_sex)
# no significant different (both wilcoxone and welch-test)

# plot heterozygosity proportion, in each sex
p_var_DNA <- ggplot(ind_het_sex) +
    geom_boxplot(aes(x = sex, y = het_prop, fill = sex)) + 
    scale_y_continuous(expand=c(0,0), limits = c(0, 1)) +
    theme_classic() +
    labs(title = "A. Varroa DNA") +
  ylab("Proportion of heterozygotic sites") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
          axis.title.y = element_blank(),
        legend.position = "none",
        legend.title = element_blank(), 
        axis.ticks.x=element_blank()) +scale_fill_manual(values=c("white","black")) 

# get the median and avarage values for the proportions
count_varroa_DNA = ind_het_sex %>%
  group_by(sex) %>%
  summarise(median = median(het_prop), mean = mean(het_prop), n = n(), sd = sd(het_prop)) %>%
  mutate(organism = "varroa") %>%
  mutate(data = "DNA") %>%
  mutate(pvalue = format(round(wil_var$p.value, digits=3)))

Honeybee DNA

~/Documents/GitHub/Variant_Calling_bees/R_scripts/male bees.Rmd Genomic data from BioProject PRJNA311274 (Wragg et al. 2016)

ind_het_bee <- read_delim("/Users/nuriteliash/Documents/GitHub/Variant_Calling_bees/data/vcf_stats/bee.het", delim = "\t",
           col_names = c("ind","homo_ob", "homo_ex", "nsites", "f"), skip = 1)

# add the sex of each sample, and the proportion of homosyzgotic and hetero sites
sex <- read.csv("/Users/nuriteliash/Documents/GitHub/Variant_Calling_bees/data/meta.csv") %>%
  mutate(sex = replace(sex, sex == "female", "Female")) %>%
    mutate(sex = replace(sex, sex == "male", "Male"))

ind_het_bee <- left_join(ind_het_bee,sex, by ="ind") %>%
  mutate(hom_prop =  homo_ob/nsites) %>%
  mutate(het_prop = (nsites-homo_ob)/nsites)

# plot heterozygocity proportion, in each sex
p_bee_DNA <- ggplot(ind_het_bee) +
    geom_boxplot(aes(x = sex, y = het_prop, fill = sex)) + 
    theme_classic() +
     scale_y_continuous(expand=c(0,0), limits = c(0, 1)) +
 labs(title = "C. Honeybee DNA") +
  ylab("Proportion of heterozygotic sites") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
          axis.title.y = element_blank(),
        legend.position = "none",
        legend.title = element_blank(), 
        axis.ticks.x=element_blank())+scale_fill_manual(values=c("white","black")) 

# is there a significant difference in the proportion of heterozygotic sites between males and Females?5
wil_var <- wilcox.test(het_prop ~ sex, alternative = "two.sided", data = ind_het_bee)
t_test <- t.test(asin(sqrt(het_prop)) ~ sex, alternative = "two.sided", data = ind_het_bee)

# get the median and avarage values for the proportions
count_bee_DNA = ind_het_bee %>%
  group_by(sex) %>%
  summarise(median = median(het_prop), mean = mean(het_prop), n = n(), sd = sd(het_prop)) %>%
  mutate(organism = "bee") %>%
  mutate(data = "DNA") %>%
  mutate(pvalue = format(round(wil_var$p.value, digits=3)))

Heterozygosity per individual, RNAseq

varroa mite RNAseq

~/Documents/GitHub/Variant_Calling_VarroaRNA/R_scripts/VCFvarroa_RNAseq.Rmd RNAseq data from BioProject PRJNA380433

ind_het <- read_delim("/Users/nuriteliash/Documents/GitHub/Variant_Calling_VarroaRNA/data/Q40BIALLDP16_40maf0.2Chr7.het", delim = "\t",
           col_names = c("ind","homo_ob", "homo_ex", "nsites", "f"), skip = 1)

# add the sex of each sample, and the proportion of homosyzgotic and hetero sites
info <- read.csv("/Users/nuriteliash/Documents/GitHub/Variant_Calling_VarroaRNA/data/meta.csv")
ind_het_var_RNA <- left_join(ind_het,info, by ="ind") %>%
  mutate(hom_prop =  homo_ob/nsites) %>%
  mutate(het_prop = (nsites-homo_ob)/nsites) %>%
mutate(sex = ifelse(row_number() %in% Female, "Female", ifelse(row_number() %in% Male, "Male", "not-determined"))) %>%
  filter(sex %in% c("Male","Female"))


# plot heterozygosity proportion, in each sex
p_var_RNA = ind_het_var_RNA %>%
  dplyr::filter(sex %in% c("Female","Male")) %>% 
  ggplot() +
    geom_boxplot(aes(x = sex, y = het_prop, fill = sex)) + 
    theme_classic() +
  scale_y_continuous(expand=c(0,0), limits = c(0, 1)) +
   labs(title = "B. Varroa RNA") +
  ylab("Proportion of heterozygotic sites") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
          axis.title.y = element_blank(),
        legend.position = "none",
        legend.title = element_blank(), 
        axis.ticks.x=element_blank())+scale_fill_manual(values=c("white","black")) 

# is there a significant difference in the proportion of heterozygotic sites between males and Females?5
wil_var <- wilcox.test(het_prop ~ sex, alternative = "two.sided", data = ind_het_var_RNA)
t_test <- t.test(asin(sqrt(het_prop)) ~ sex, alternative = "two.sided", data = ind_het_var_RNA)

# get the median and avarage values for the proportions
count_varroa_RNA = ind_het_var_RNA %>%
  group_by(sex) %>%
  summarise(median = median(het_prop), mean = mean(het_prop), n = n(), sd = sd(het_prop)) %>%
  mutate(organism = "varroa") %>%
  mutate(data = "RNA") %>%
  mutate(pvalue = format(round(wil_var$p.value, digits=3)))

# no significant difference (both wilcoxone and welch-test)

Honeybee RNAseq

~/Documents/GitHub/Variant_calling_BeeRNA/R_scripts/VCF_BEE_RNAseq.Rmd RNAseq data from BioProject PRJNA689223

ind_het <- read_delim("/Users/nuriteliash/Documents/GitHub/Variant_calling_BeeRNA/data/Q40BIALLDP16_40mis.5maf0.2Chr/Q40BIALLDP16_40mis.5maf0.2Chr.het", delim = "\t",
           col_names = c("ind","homo_ob", "homo_ex", "nsites", "f"), skip = 1)

# add the sex of each sample, and the proportion of homosyzgotic and hetero sites
info <- read.csv("/Users/nuriteliash/Documents/GitHub/Variant_calling_BeeRNA/data/meta_14.csv")
ind_het_bee_RNA <- left_join(ind_het,info, by ="ind") %>%
  mutate(hom_prop =  homo_ob/nsites) %>%
  mutate(het_prop = (nsites-homo_ob)/nsites) %>%
  mutate(sex = ifelse(row_number() %in% Female, "Female", ifelse(row_number() %in% Male, "Male", "not-determined"))) %>%
    dplyr::filter(sex %in% c("Female","Male")) 

 
# plot heterozygosity proportion, in each sex
p_bee_RNA = ind_het_bee_RNA %>%
 # dplyr::filter(sex %in% c("Female","Male")) %>% 
  ggplot() +
    geom_boxplot(aes(x = sex, y = het_prop, fill = sex)) + 
    theme_classic() + 
    scale_y_continuous(expand=c(0,0), limits = c(0, 1)) +
    labs(title = "D. Honeybee RNA") +
  ylab("Proportion of heterozygotic sites") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
          axis.title.y = element_blank(),
        legend.position = "none",
        legend.title = element_blank(), 
        axis.ticks.x=element_blank())+scale_fill_manual(values=c("white","black")) 

# is there a significant difference in the proportion of heterozygotic sites between males and Females?5
wil_var <- wilcox.test(het_prop ~ sex, alternative = "two.sided", data = ind_het_bee_RNA)
t_test <- t.test(asin(sqrt(het_prop)) ~ sex, alternative = "two.sided", data = ind_het_bee_RNA)


# get the median and avarage values for the proportions
count_bee_RNA = ind_het_bee_RNA %>%
  group_by(sex) %>%
  summarise(median = median(het_prop), mean = mean(het_prop), n = n(), sd = sd(het_prop)) %>%
  mutate(organism = "bee") %>%
  mutate(data = "RNA") %>%
  mutate(pvalue = format(round(wil_var$p.value, digits=3)))

Supp Figure 2. Heterozygosity rates in mites and bees

#grid.arrange(top= grid::textGrob("Proportion of heterozygotic sites in varroa and honeybee", gp=grid::gpar(fontsize=20)), p_bee_DNA, p_var_DNA, 
 #            p_bee_RNA, p_var_RNA, nrow = 2, ncol = 2, lege)

plot_4 = ggarrange(p_var_DNA, p_var_RNA, 
             p_bee_DNA, p_bee_RNA, nrow = 2, ncol = 2, #labels = c("A", "B", "C", "D"),
  common.legend = TRUE, legend = "bottom")


annotate_figure(plot_4,
                left = text_grob("Proportion of heterozygotic sites", rot = 90))

Supp Table 1. Heterozygosity rates in mites and bees

#bind together all data for RNA and DNA , for varroa and bees
count_all = rbind(count_bee_DNA,count_bee_RNA, count_varroa_DNA, count_varroa_RNA) %>% 
  mutate_if(is.numeric, ~round(., 2))

knitr::kable(count_all,  caption = 'Heterozygosity proportion in varroa and honeybee DNA and RNA') %>%
  kable_styling(full_width = T)
Heterozygosity proportion in varroa and honeybee DNA and RNA
sex median mean n sd organism data pvalue
Female 0.48 0.48 30 0.02 bee DNA 0
Male 0.13 0.12 6 0.09 bee DNA 0
Female 0.46 0.40 6 0.14 bee RNA 0.126
Male 0.10 0.18 5 0.17 bee RNA 0.126
Female 0.18 0.20 93 0.12 varroa DNA 0.819
Male 0.20 0.20 56 0.14 varroa DNA 0.819
Female 0.85 0.85 2 0.01 varroa RNA 0.333
Male 0.93 0.93 2 0.04 varroa RNA 0.333

Ploidy supp

load data

data <- read.csv("/Users/nuriteliash/Documents/GitHub/varroa_Ploidy/data/Ploidy.csv") %>%
  dplyr::mutate(Family = as.character(Family)) %>%
  dplyr::mutate(stage_mature = case_when(
    grepl("adult", Stage) ~ "Mature",
    !grepl("adult", Stage) ~ "Imature"))

# order the levels 
data$body.part <- factor(data$body.part, level=c("Body", "Anterior", "Posterior", "Legs","Hemolymph","Ovary","Testes"))

data$Stage <- factor(data$Stage, level=c("Larvae", "Protonymph", "Deuteronymph", "adult"))

data$Stage_original <- factor(data$Stage_original, level=c("Mom", "Son", "Mature","Daughter", "Deuteronymph", "Protonymph", "Larvae", "Immature"))

levels(data$Family) <- c(levels(data$Family), 0)
data$Family <- factor(data$Family, level= c("0", "1", "11","27","3","2","4", "5"))

p_family_adults = data %>% dplyr::filter(body.part %in% c("Body", "Ovary","Testes")) %>%
   dplyr::filter(Stage == "adult") %>%
   mutate_at("Family", ~replace_na(.,"0")) %>%
  ggplot(aes(y=Ploidy, x=Sex, fill = Sex, lable = Stage)) + 
 geom_boxplot(outlier.shape = NA, coef=0 ) +  theme_classic() +  geom_jitter(width=0.1, size=2) +
  facet_wrap(~body.part ) + 
      theme(axis.title.x = element_blank(),
            axis.text.x = element_blank(),
            axis.ticks = element_blank(),
    panel.border=element_rect(colour="black",size=1, fill = NA))+
ggtitle('Mite ploidy in differnet body parts') +  ylim(0, 3)

p_fam_body = data %>% dplyr::filter(body.part == "Body") %>%
 dplyr::filter(Stage %in% c("Larvae", "Protonymph", "Deuteronymph", "adult")) %>%
  ggplot(aes(y=Ploidy, x=Sex, fill = Sex, lable = Stage)) + 
 geom_boxplot(outlier.shape = NA, coef=0 ) +  theme_classic() +  geom_jitter(width=0.1, size=2) +
facet_wrap(~Stage, nrow = 1 ) + 
  ggtitle('Mite Ploidy in whole body, in differnet developmental stage') +
theme(axis.title.x = element_blank(),
            axis.text.x = element_blank(),
            axis.ticks = element_blank(),
    panel.border=element_rect(colour="black",size=1, fill = NA))+
  theme(legend.position='right')+  ylim(0, 3)

plots

Supp Figure 3 a. Ploidy assessment, development stages

all developmental stages, males and Females, whole body

Supp Figure 3 b. Ploidy assessment, body parts

all developmental stages, males and Females, in different body parts