In vivolution

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# RESULTADOS PLASMIDFINDER CON ENSAMBLADOS ANTES DE CERRAR GENOMA/PLASMIDS

library(readxl)  
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(ggplot2)
library(ggpubr)

barcodes <- read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/strains_metadata.tsv") 
plasmidfinder <-
  read_excel("~/MEGA/Tesis/In_vivolution/Bioinfo/tabla_all_plasmidfinder.xlsx")
data <-
  read_excel("~/MEGA/Tesis/In_vivolution/Bioinfo/invivolution.xlsx",
             skip =)
plasmidfinder <- plasmidfinder %>%
  select(strain, Plasmid, Contig) %>%
  #
  mutate(Plasmid = ifelse(
    Plasmid == "IncFIB(K)",
    yes = "IncFIB(K)/IncFII(K)",
    no = ifelse(Plasmid == "IncFII(K)", yes = "IncFIB(K)/IncFII(K)", no = Plasmid)
  )) %>%
  unique()

atbresistance <-
  read_excel("~/MEGA/Tesis/In_vivolution/Bioinfo/tabla_all_resfinder.xlsx")

atbresistance$`#FILE` =  str_remove(string = atbresistance$`#FILE`, "../spades/")
atbresistance$`#FILE` =  str_remove(string = atbresistance$`#FILE`, "_careful/scaffolds.fasta")
atbresistance$`#FILE` =  str_remove(string = atbresistance$`#FILE`, "/scaffolds.fasta")


atbresistance <- atbresistance %>%
  mutate(strain = `#FILE`) %>%
  select(-`#FILE`) %>%
  mutate(Contig = SEQUENCE) %>%
  select(-SEQUENCE)

# En esta tabla solo salen los genes de resistencia de los plásmidos IncF e IncQ
TOTAL <-
  left_join(plasmidfinder, atbresistance, by = c("strain", "Contig"))

TOTAL <- left_join(TOTAL, data, by = c("strain"))

TOTAL <-
  TOTAL %>%  mutate(strain_type = ifelse(grepl("P", strain), yes = "Portadores", no = "Infección"))

TOTAL %>%
  filter(!is.na(GENE)) %>%
  # filter(strain_type == "Infección") %>%
  ggplot(aes(
    x = Plasmid,
    y = GENE,
    color = species,
    fill = species
  )) +
  geom_point() +
  facet_wrap( ~ strain) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

atbresistance <- left_join(atbresistance, data, by = c("strain"))

atbresistance %>% filter(GENE != "GENE") %>% 
  ggplot(aes(
    x = strain,
    y = GENE,
    color = species,
    fill = species
  )) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# HEATMAPS

# Heatmap genes de resistencia
atbresistance <- atbresistance %>% 
  mutate(aislado = ifelse(grepl("P", strain), yes= "Portadores", no = "Infeccion")) %>% 
  filter(aislado == "Infeccion")

adj <-
  atbresistance %>% select(strain, species, tratamiento) %>% distinct()
adj <- column_to_rownames(adj, var = 'strain')

zz <-
  atbresistance %>% select(GENE, strain) %>% filter(GENE != 'GENE')
zz$value <- 1

zz_matrix <-
  zz %>% distinct() %>%  pivot_wider(names_from = GENE,
                                     values_from = value,
                                     values_fill = 0) %>% column_to_rownames(var = "strain")
f1 <- pheatmap::pheatmap(zz_matrix,
                   clustering_method = "ward.D2",
                   annotation_row = adj)

# Heatmap  x plasmido
adj <- plasmidfinder %>% select(strain) %>% distinct()
adj <- column_to_rownames(adj, var = 'strain')

zz <- plasmidfinder %>% select(Plasmid, strain)
zz$value <- 1

zz_matrix <-
  zz %>% distinct() %>%  pivot_wider(names_from = Plasmid,
                                     values_from = value,
                                     values_fill = 0) %>% column_to_rownames(var = "strain")
pheatmap::pheatmap(zz_matrix, clustering_method = "ward.D2")

# Heatmap  genes x plasmido
adj <-
  TOTAL %>% select(strain, species, tratamiento) %>% distinct()
adj <- column_to_rownames(adj, var = 'strain')

zz <- TOTAL %>% select(GENE, Plasmid) %>% filter(GENE != 'GENE')
zz$value <- 1

zz_matrix <-
  zz %>% distinct() %>%  pivot_wider(names_from = GENE,
                                     values_from = value,
                                     values_fill = 0) %>% column_to_rownames(var = "Plasmid")
pheatmap::pheatmap(zz_matrix)

knitr::opts_chunk$set(warning = FALSE, message = FALSE)  

# PLOTS

# Cepas por fecha

position <-
  c(
    "23/11/2020",
    "6/12/2020",
    "16/12/2020",
    "21/1/2021",
    "6/4/2021",
    "8/4/2021",
    "13/4/2021",
    "14/4/2021"
  )


TOTAL %>%
  ggplot(aes(
    x = GENE,
    y = Plasmid,
    color = strain,
    fill = strain
  )) +
  geom_point(alpha = 0.4, size = 7) +
  # scale_x_discrete(limits=position) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1))

# Genes de resistencia por cepas

position1 <-
  c(
    "C1A1",
    "C1B1",
    "C2C1",
    "C3A1",
    "C3B1",
    "C4C1",
    "C5.1C1",
    "C5.2C1",
    "C5.2C2",
    "P1C1",
    "P2C2",
    "P3C1",
    "P4C1",
    "P5C1.I",
    "P6C1",
    "P7C1",
    "P8C1",
    "P9C1",
    "P10C1",
    "C6A1",
    "C6B1"
  )

atbresistance %>%
  filter(GENE != 'GENE') %>%
  filter(species == "Ec") %>%
  ggplot(aes(
    x = strain,
    y = GENE,
    color = species,
    fill = species
  )) +
  geom_point(alpha = 0.6, size = 7) +
  # facet_wrap(~species) +
  scale_x_discrete(limits = position1) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

TOTAL %>%
  ggplot(aes(
    x = strain,
    y = Plasmid,
    color = species,
    fill = species
  )) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

TOTAL %>%
  filter(!is.na(GENE)) %>%
  ggplot(aes(
    x = strain,
    y = GENE,
    color = Plasmid,
    fill = Plasmid
  )) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

#Genes de resistencia y plásmidos por contig
a1 <- TOTAL %>% filter(!is.na(Plasmid)) %>% filter(strain =="C1B1") %>% 
  ggplot(aes(x = Contig, y = Plasmid, color = strain, fill = strain)) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)) 
  # scale_fill_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) +
  # scale_color_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) 

a2 <- atbresistance %>% filter(strain =="C1B1") %>%
  ggplot(aes(x = Contig, y = PRODUCT, color = strain, fill = strain)) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)) 
  # scale_fill_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) +
  # scale_color_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) 

ggarrange(a1, a2)

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# ANALISIS SNIPPY_CONTIGS GENOMAS CERRADOS

# Leemos la tabla del snippy para las cepas con el plásmido

Invivolution.snps <- read.delim2("~/MEGA/Tesis/In_vivolution/Ensamblados/Snippy/compilation_tabs/tabla_all_SNPS_variantesInvivolution.txt")

Invivolution.snps = Invivolution.snps %>%
  separate(filename.CHROM,
           into = c("strain", "contig"),
           sep = ",") %>%
  separate(strain, into = c("strain", "cosa"), sep = "_") %>%
  select(-cosa) %>%
  filter(strain != "filename")

Invivolution.snps = Invivolution.snps %>%  full_join(data)

Invivolution.snps.count = Invivolution.snps %>%
  group_by(strain, TYPE, species) %>%
  summarise(NumberSNPs = n())

# Number snps per strain
Invivolution.snps.count %>%
  filter(!is.na(TYPE)) %>%
  ggplot() +
  geom_col(aes(strain, NumberSNPs, color = species, fill = species)) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

# TYPE snps per strain
Invivolution.snps.count %>%
  filter(!is.na(TYPE)) %>%
  ggplot() +
  geom_col(aes(TYPE, NumberSNPs)) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 


# ANALISIS SNIPPY CON LOS READS SIN LIMPIAR

# Colis contra C3A1 como referencia Y Klebsiellas contra C1C1 como referencia

Invivolution.snps <- read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/Snippy/tabla_all_SNPS_reads_Invivolution.txt")

Invivolution.snps = Invivolution.snps %>%
  separate(filename.CHROM,
           into = c("fastq", "contig"),
           sep = ",") %>%
  separate(fastq, into = c("fastq", "cosa"), sep = "_snps") %>%
  select(-cosa) %>%
  filter(fastq != "filename")

Invivolution.snps = Invivolution.snps %>% full_join(barcodes)

# Limpiamos los SNPs de las referencias contra ellas mismas
SNPs.referencias <- Invivolution.snps %>% 
  filter(sample == "C3A1" | sample == "C1C1") 

Invivolution.snps <- Invivolution.snps %>% 
  anti_join(SNPs.referencias, by = c("POS", "TYPE", "REF", "ALT")) %>% view()

Invivolution.snps %>% 
  group_by(sample,LOCUS_TAG) %>% 
summarise(N=n())

## # A tibble: 85 × 3
## # Groups:   sample [21]
##    sample LOCUS_TAG          N
##    <chr>  <chr>          <int>
##  1 C1A1   <NA>               1
##  2 C1B1   <NA>               1
##  3 C2C1   <NA>               1
##  4 C3B1   LGONEGAH_03709     1
##  5 C4C1   LGONEGAH_03644     3
##  6 C4C1   LGONEGAH_03677     1
##  7 C4C1   LGONEGAH_03709     1
##  8 C4C1   LGONEGAH_03826     1
##  9 C5.1C1 <NA>               1
## 10 C5.2C1 <NA>               1
## # … with 75 more rows

Invivolution.snps.count = Invivolution.snps %>%
  group_by(sample, TYPE, organism) %>%
  summarise(NumberSNPs = n())

# Number snps per strain
Invivolution.snps.count %>%
  filter(!is.na(TYPE)) %>%
  ggplot() +
  geom_col(aes(sample, NumberSNPs, color = organism, fill = organism)) +
  facet_wrap(~organism, scales="free_x","free_y")+
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

# TYPE snps per strain
Invivolution.snps.count %>% left_join(barcodes) %>% 
  filter(!is.na(TYPE)) %>%
  ggplot() +
  geom_col(aes(TYPE, NumberSNPs, color = pulsotype, fill = pulsotype)) +
  facet_wrap(~sample) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

# POS snps per strain
# Invivolution.snps %>% group_by(sample,POS) %>% summarise(N=n()) %>% 
#   ggplot() +
#   geom_col(aes(POS, N)) +
#   facet_wrap(~sample) +
#   theme_bw() +
#   theme(
#     panel.background = element_blank(),
#     panel.grid = element_blank(),
#     aspect.ratio = 1,
#     #legend.position = "none",
#     strip.background = element_blank(),
#     axis.text.x = element_text(angle = 45,  hjust = 1)
#   )

  # Invivolution.snps %>%
  #   ggplot(aes(x = sample, y = GENE, color = sample, fill = sample)) +
  #   geom_tile() +
  #   facet_wrap(~organism,scales="free_x") +
  #   theme_bw() +
  #   theme(
  #     panel.background = element_blank(),
  #     panel.grid = element_blank(),
  #     aspect.ratio = 1,
  #     #legend.position = "none",
  #     strip.background = element_blank(),
  #     axis.text.x = element_text(angle = 45,  hjust = 1)
  #   )

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# SNIPPY CON LOS READS LIMPIOS CALIDAD 20

# Colis contra C3A1 como referencia Y Klebsiellas contra C1C1 como referencia
  
  Ec.snps <- read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/Snippy/tabla_all_SNPS_Ec_20.txt")
  Kp.snps <- read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/Snippy/tabla_all_SNPS_Kp_20.txt")
  
  
Invivolution.snps.20 = Ec.snps %>% rbind(Kp.snps) %>% 
    separate(filename.CHROM,
             into = c("fastq", "contig"),
             sep = ",") %>%
    separate(fastq, into = c("fastq", "cosa"), sep = "_snps") %>%
    select(-cosa) %>%
    filter(fastq != "filename")
  
Invivolution.snps.20 = Invivolution.snps.20 %>%  full_join(barcodes)
  
  # Limpiamos los SNPs de las referencias contra ellas mismas
  SNPs.referencias.20 <- Invivolution.snps.20 %>% 
    filter(sample == "C3A1" | sample == "C1C1")
  
  Invivolution.snps.20 <- Invivolution.snps.20 %>% 
    anti_join(SNPs.referencias.20, by = c("POS", "TYPE", "REF", "ALT"))
  
  Invivolution.snps.20 %>% 
    group_by(sample,LOCUS_TAG) %>% 
    summarise(N=n())

## # A tibble: 141 × 3
## # Groups:   sample [21]
##    sample LOCUS_TAG            N
##    <chr>  <chr>            <int>
##  1 C1A1   ""                   9
##  2 C1A1   "DNEPDMAL_04732"     1
##  3 C1A1   "DNEPDMAL_04743"     1
##  4 C1B1   ""                   8
##  5 C1B1   "DNEPDMAL_04732"     1
##  6 C1B1   "DNEPDMAL_04743"     1
##  7 C2C1   ""                  10
##  8 C2C1   "DNEPDMAL_00059"     1
##  9 C2C1   "DNEPDMAL_00565"     1
## 10 C2C1   "DNEPDMAL_01973"     1
## # … with 131 more rows

  Invivolution.snps.20 <- Invivolution.snps.20 %>% mutate( species = ifelse(grepl("coli", organism), yes = "Ec", 
                                                                         no = ifelse(grepl("pneumoniae", organism), yes = "Kp", 
                                                                                     no = NA)))
  Invivolution.snps.count.20 = Invivolution.snps.20 %>%
    group_by(sample, TYPE, organism) %>%
    summarise(NumberSNPs = n())
  
# Number snps per strain
  Invivolution.snps.count.20 %>%
    filter(!is.na(TYPE)) %>%
    ggplot() +
    geom_col(aes(sample, NumberSNPs, color = organism, fill = organism)) +
    facet_wrap(~organism, scales="free_x","free_y")+
    theme_bw() +
    theme(
      panel.background = element_blank(),
      panel.grid = element_blank(),
      aspect.ratio = 1,
      #legend.position = "none",
      strip.background = element_blank(),
      axis.text.x = element_text(angle = 45,  hjust = 1)
    )

# TYPE snps per strain
  Invivolution.snps.count.20 %>% left_join(barcodes) %>% 
    filter(!is.na(TYPE)) %>%
    ggplot() +
    geom_col(aes(TYPE, NumberSNPs, color = sample, fill = sample)) +
    facet_wrap(~organism) +
    theme_bw() +
    theme(
      panel.background = element_blank(),
      panel.grid = element_blank(),
      aspect.ratio = 1,
      #legend.position = "none",
      strip.background = element_blank(),
      axis.text.x = element_text(angle = 45,  hjust = 1)
    )

# POS snps per strain
Invivolution.snps.20 %>%
    ggplot(aes(x = sample, y = GENE, color = organism, fill = organism)) +
    geom_tile() +
    facet_wrap(~species,scales="free_x") +
    theme_bw() +
    theme(
      panel.background = element_blank(),
      panel.grid = element_blank(),
      aspect.ratio = 1,
      #legend.position = "none",
      strip.background = element_blank(),
      axis.text.x = element_text(angle = 45,  hjust = 1)
    )

# Tabla SNPs entre C3A1, C3B1, C4 y P1

table.1 = Invivolution.snps.20 %>% 
  filter(species == "Ec") %>% 
  filter(sample == "C3B1" | sample == "C4C1" | sample == "P1C1") %>% view()

write.table(table.1, file="~/MEGA/Tesis/In_vivolution/Bioinfo/SNPs_C3_C4_P1", 
             sep="\t", quote = F, row.names = T, col.names = T)

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# RESULTADOS CON ENSAMBLADOS CERRADOS POR NANOPORE

## GENES DE RESISTENCIA
data <- read_excel("~/MEGA/Tesis/In_vivolution/Bioinfo/invivolution.xlsx", skip =)
barcodes <- read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/strains_metadata.tsv") 

# plasmidfinder.nanopore <- plasmidfinder.nanopore %>%
  # select(strain, Plasmid, Contig) %>%
  # mutate(Plasmid = ifelse(Plasmid == "IncFIB(K)", yes = "IncFIB(K)/IncFII(K)", no = ifelse(Plasmid == "IncFII(K)", yes = "IncFIB(K)/IncFII(K)", no = Plasmid))) %>%
  # unique()

# Aquí solo están cerrados los plásmidos
atbresistance.nanopore <- read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/tabla_abricate_nanopore.txt")

atbresistance.nanopore$filename..FILE =  str_remove(string = atbresistance.nanopore$filename..FILE, "_abricate_plsdb.tsv,/mnt/lustre/scratch/home/csanchez/Invivolution/pilon")
atbresistance.nanopore$filename..FILE =  str_remove(string = atbresistance.nanopore$filename..FILE, "_abricate_resfinder.tsv,/mnt/lustre/scratch/home/csanchez/Invivolution/pilon/")
atbresistance.nanopore$filename..FILE =  str_remove(string = atbresistance.nanopore$filename..FILE, "_pilon_round4.fasta")

atbresistance.nanopore <- atbresistance.nanopore %>% 
  filter(SEQUENCE != "SEQUENCE") %>% 
  separate(filename..FILE, into = c("nanopore_barcode", "cosa"), sep = "barcode") %>% 
  mutate(nanopore_barcode = cosa) %>% 
  select(-cosa)

atbresistance.nanopore$nanopore_barcode =  str_remove(string = atbresistance.nanopore$nanopore_barcode, "/")
atbresistance.nanopore$SEQUENCE =  str_remove(string = atbresistance.nanopore$SEQUENCE, "_pilon_pilon_pilon_pilon")
atbresistance.nanopore$nanopore_barcode = as.numeric(atbresistance.nanopore$nanopore_barcode)

atbresistance.nanopore <- atbresistance.nanopore %>% 
  mutate(nanopore_barcode = nanopore_barcode) %>% left_join(barcodes, by = "nanopore_barcode")

atbresistance.nanopore <- atbresistance.nanopore %>% filter(DATABASE != "plsdb") %>%   mutate(Contig = SEQUENCE)

#PLASMIDOS

plsdb.nanopore <- atbresistance.nanopore %>%
  filter(DATABASE == "plsdb")

plasmidfinder.nanopore <- read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/tabla_plasmidfinder_nanopore.txt")

plasmidfinder.nanopore$filename.Database =  str_remove(string = plasmidfinder.nanopore$filename.Database, "_results_tab.tsv,enterobacteriales")

plasmidfinder.nanopore <- plasmidfinder.nanopore %>% 
  filter(Plasmid != "Plasmid") %>% 
  separate(filename.Database, into = c("cosa", "nanopore_barcode"), sep = ("barcode")) %>% 
  select(-cosa, -Note) 

plasmidfinder.nanopore$Contig = str_remove(string = plasmidfinder.nanopore$Contig, "_pilon_pilon_pilon_pilon") 

plasmidfinder.nanopore$nanopore_barcode = as.numeric(plasmidfinder.nanopore$nanopore_barcode)

plasmidfinder.nanopore <- plasmidfinder.nanopore %>% 
  right_join(barcodes, by = "nanopore_barcode") 
  
TOTAL.nanopore <- plasmidfinder.nanopore %>% left_join (atbresistance.nanopore, by=c("sample", "Contig")) %>% left_join(barcodes)

atbresistance.nanopore %>%
  ggplot(aes(x = sample, y = GENE, color = organism, fill = organism)) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

# Heatmap genes de resistencia
adj <- atbresistance.nanopore %>% select(sample, organism) %>% distinct()
adj <- column_to_rownames(adj, var = 'sample')

zz <- atbresistance.nanopore %>% select(GENE, sample) 
zz$value <- 1

zz_matrix <- zz %>% distinct() %>%  pivot_wider(names_from = GENE,
                                     values_from = value,
                                     values_fill = 0) %>% column_to_rownames(var = "sample")
f2 <- pheatmap::pheatmap(zz_matrix,
                   clustering_method = "ward.D2",
                   annotation_row = adj)

# Heatmap  x plasmido
adj <- plasmidfinder.nanopore %>% select(sample) %>% distinct()
adj <- column_to_rownames(adj, var = 'sample')

zz <- plasmidfinder.nanopore %>% select(Plasmid, sample) %>% filter(Plasmid != "NA")
zz$value <- 1

zz_matrix <- zz %>% distinct() %>%  pivot_wider(names_from = Plasmid,
                                     values_from = value,
                                     values_fill = 0) %>% column_to_rownames(var = "sample")
pheatmap::pheatmap(zz_matrix, clustering_method = "ward.D2")

# Heatmap  genes x plasmido
# adj <- plasmidfinder.nanopore %>% select(sample, organism) %>% distinct()
# adj <- column_to_rownames(adj, var = 'sample')
# 
# zz <- plasmidfinder.nanopore %>% select(GENE, Plasmid)
# 
# zz$value <- 1
# 
# zz_matrix <-
#   zz %>% distinct() %>%  pivot_wider(names_from = GENE,
#                                      values_from = value,
#                                      values_fill = 0) %>% column_to_rownames(var = "Plasmid")
# 
# pheatmap::pheatmap(zz_matrix)

TOTAL.nanopore %>%  
  filter(!is.na(GENE)) %>% 
ggplot(aes(x = Plasmid, y = GENE, color = organism, fill = organism)) +
  geom_point() +
  # facet_wrap(~Plasmid)+
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

# Geom_TILE plasmidos por cepas
TOTAL.nanopore %>% filter(!is.na(Plasmid)) %>% 
  ggplot(aes(x = sample, y = Plasmid, color = organism, fill = organism)) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)) +
  scale_fill_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) +
  scale_color_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF"))

TOTAL.nanopore %>% filter(!is.na(Plasmid)) %>% 
  ggplot(aes(x = sample, y = Plasmid, color = organism, fill = organism)) +
  geom_point(size = 5, stroke = 4) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)) +
  scale_fill_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) +
  scale_color_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF"))

# PLOT Gen resistencia y plasmid por Contig

f1 <- TOTAL.nanopore %>% filter(!is.na(Plasmid)) %>% 
  ggplot(aes(x = Contig, y = Plasmid, color = cepa, fill = cepa)) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)) 
  # scale_fill_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) +
  # scale_color_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) 

f2 <- TOTAL.nanopore %>% filter(!is.na(Plasmid)) %>% filter(!is.na(PRODUCT)) %>% 
  ggplot(aes(x = Contig, y = PRODUCT, color = cepa, fill = cepa)) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)) 
  # scale_fill_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) +
  # scale_color_manual(values=c("#B9C0DA", "#998DA0", "#C4DACF")) 

ggarrange(f1, f2)

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# ANALISIS SNIPPY CON LOS CONTIGS DE LOS GENOMAS CERRADOS

# Leemos la tabla del snippy para las cepas con el plásmido

Invivolution.snps <- read.delim2("~/MEGA/Tesis/In_vivolution/Ensamblados/Snippy/compilation_tabs/tabla_all_SNPS_variantesInvivolution.txt")

Invivolution.snps = Invivolution.snps %>%
  separate(filename.CHROM,
           into = c("strain", "contig"),
           sep = ",") %>%
  separate(strain, into = c("strain", "cosa"), sep = "_") %>%
  select(-cosa) %>%
  filter(strain != "filename")

Invivolution.snps = Invivolution.snps %>%  full_join(data)

Invivolution.snps <- Invivolution.snps %>% separate(EFFECT, c("TYPE_EFFECT", "COSA"), sep ="_") 

Invivolution.snps.count = Invivolution.snps %>%
  group_by(strain, TYPE_EFFECT, species) %>%
  summarise(NumberSNPs = n())

# Number snps per strain
Invivolution.snps.count %>%
  filter(!is.na(TYPE_EFFECT)) %>%
  ggplot() +
  geom_col(aes(strain, NumberSNPs, color = species, fill = species)) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

# TYPE snps per strain
Invivolution.snps.count %>%
  filter(!is.na(TYPE_EFFECT)) %>%
  ggplot() +
  geom_col(aes(TYPE_EFFECT, NumberSNPs, color = strain, fill = strain)) +
  facet_wrap(~strain) +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

# Genes de resistencia  presentes en el chr

atbresistance <- rename(atbresistance, sample = strain)
genes <- atbresistance %>% 
  select(GENE, sample, tratamiento)

genes.plasmid <- atbresistance.nanopore %>% 
  select(GENE, sample, organism)

genes.chr <- anti_join(genes, genes.plasmid) %>% filter(GENE != "GENE") 

genes.chr %>%
  ggplot(aes(x = sample, y = GENE,color = sample, fill = sample)) +
  geom_tile() +
  theme_bw() +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    aspect.ratio = 1,
    #legend.position = "none",
    strip.background = element_blank(),
    axis.text.x = element_text(angle = 45,  hjust = 1)
  )

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# BLAST C2C1 vs C4C1

# Hacemos un BLAST para ver si el INC-L de C2C1 es el pOXA48 pero sin el gen de OXA-48

# Length = length contig
BLAST.C2vsC4 = read.delim(file = "~/MEGA/Tesis/In_vivolution/Bioinfo/Blast/C2C1vsC4C1_nanopore.blast", header = TRUE, sep = "", dec = ",")
BLAST.C5.2vsC4 = read.delim(file = "~/MEGA/Tesis/In_vivolution/Bioinfo/Blast/C5.2vsC4.blast", header = TRUE, sep = "", dec = ",")

# SNIPPY del pOXA-48 de las cepas C2C1 y C5.2C2 contra el pOXA-48 de C4C1

pOXA48.snps <-read.delim2("~/MEGA/Tesis/In_vivolution/Snippy/tabla_all_SNPS_pOXA48.txt")

pOXA48.snps = pOXA48.snps %>%
  separate(filename.CHROM,
           into = c("strain", "contig"),
           sep = ",") %>%
  separate(strain, into = c("strain", "cosa"), sep = "_") %>%
  select(-cosa) %>%
  filter(strain != "filename")

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# Antibiogramas

Antibiogramas <- read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/Antibiogramas.txt") 

Antibiogramas <- Antibiogramas %>% pivot_longer(names_to = "Antibiotic", values_to = "value", cols = c(FOS50:COL10))  %>% mutate(value=as.numeric(value))

Ec <- Antibiogramas %>% filter(Antibiotic == "AMC30" | Antibiotic == "CHL30" | Antibiotic == "CTX30") %>% filter(Strain == "C3A1"| Strain == "C3A1 BLEE" | Strain == "C3B1"| Strain == "C3B1BLEE" | Strain == "C4C1" | Strain == "C4 SIN BLEE" | Strain == "P1" | Strain == "P1 + OXA48" | Strain == "P8" | Strain == " P8 BLEE clon 1") %>% filter(Species =="E. coli") %>%
  ggplot()+
  geom_col(aes(x = Strain, y = value, color = Species, fill = Species)) +   
  facet_wrap(~Antibiotic, scales="free_x","free_y") +
  theme_bw() +
    theme(
      panel.background = element_blank(),
      panel.grid = element_blank(),
      aspect.ratio = 1,
      #legend.position = "none",
      strip.background = element_blank(),
      axis.text.x = element_text(angle = 45,  hjust = 1))


Ec + scale_fill_manual(values=c("#d1cfe2ff")) +
  scale_color_manual(values=c("#d1cfe2ff"))

Kp <- Antibiogramas %>% filter(Species =="K. pneumoniae") %>% filter(Antibiotic == "AMC30" | Antibiotic == "CHL30" | Antibiotic == "CTX30") %>% filter(Strain == "C1B1" | Strain == " C1B1 pOXA-48 clon 1" | Strain == "C5.1C1" | Strain == " C5.1C1 pOXA48") %>% 
  ggplot()+
  geom_col(aes(x = Strain, y = value, color = Species, fill = Species)) +   
  facet_wrap(~Antibiotic, scales="free_x","free_y") +
  theme_bw() +
    theme(
      panel.background = element_blank(),
      panel.grid = element_blank(),
      aspect.ratio = 1,
      #legend.position = "none",
      strip.background = element_blank(),
      axis.text.x = element_text(angle = 45,  hjust = 1))

Kp + scale_fill_manual(values=c("#7ec4cfff")) +
  scale_color_manual(values=c("#7ec4cfff"))

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# CHORD DIAGRAM - TODAS LAS CEPAS

##Vamos a hacer un chord diagram de los datos de plasmidfinder con los contigs de illumina

# install.packages("circlize")
library(circlize)
library(viridisLite)

# Create an adjacency matrix: 

# data <- plasmidfinder %>% 
#   pivot_wider(names_from=strain, values_from= strain) %>% select(-Contig) 
# 
# data <- data %>% 
#    mutate(IncFI_IncFII = data$`IncFIB(K)/IncFII(K)`) %>% select(-`IncFIB(K)/IncFII(K)`) %>% 
#    mutate(IncL = ifelse(is.na(IncL), yes= 0, no= 1 )) %>% 
#    mutate(IncFI_IncFII = ifelse(is.na(IncFI_IncFII), yes= 0, no= 1 )) %>% 
#    mutate(`Col(MG828)` = ifelse(is.na(`Col(MG828)`), yes= 0, no= 1 )) %>%
#    mutate(IncQ1 = ifelse(is.na(IncQ1), yes= 0, no= 1 )) %>%
#    mutate(`IncFIB(AP001918)` = ifelse(is.na(`IncFIB(AP001918)`), yes= 0, no= 1 )) %>%
#    mutate(Col156 = ifelse(is.na(Col156), yes= 0, no= 1 )) %>%
#    mutate(pXuzhou21 = ifelse(is.na(pXuzhou21), yes= 0, no= 1 )) %>%
#    mutate(IncFII = ifelse(is.na(IncFII), yes= 0, no= 1 ))

# Guardar para cambiar en excel los nombres 
# write.table(data, file="~/MEGA/Tesis/In_vivolution/Bioinfo/data.txt", 
            # sep="\t", quote = F, row.names = T, col.names = T)

data <-read.delim2("~/MEGA/Tesis/In_vivolution/Bioinfo/data.txt")

data1= as.matrix(data %>% select(-strain))
rownames(data1) = data$strain

mycolor <- viridis(10, alpha = 1, begin = 0, end = 1, option = "D")
mycolor <- mycolor[sample(1:10)]

# Make the circular plot
chordDiagram(data1, annotationTrack = c("name", "grid"), transparency = 0.5)

#saving the plot (high definition)
dev.copy(jpeg,'plot.png', width=8, height=8, units="in", res=500)

## jpeg 
##    3

dev.off()

## png 
##   2

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# CURVAS DE CRECIMIENTO

library(tidyverse)
library(readxl)
library(lattice)
library(deSolve)
library(growthrates)
library(caTools)
library(gridExtra)
library(flux)
library(ggplot2)
library(ggrepel)
library(ggsci)
library(dplyr)
library(tidyr)

#La carpeta dónde están las curvas
path_to_txt= "~/MEGA/Tesis/In_vivolution/curve_data//"

#donde guardar las vmax
path_to_Growthrates="~/MEGA/Tesis/In_vivolution/Growth_curves/"

#La carpeta dónde guardaremos el output
path_to_output="~/MEGA/Tesis/In_vivolution/Growth_curves/"

#La piedra de rosetta!
Rosetta <- read.delim("~/MEGA/Tesis/In_vivolution/Rosetta")

#Leer todos los archivos de la carpeta "path_to_txt"
file.list <- list.files(path =path_to_txt, full.names = F)
df.list <- lapply(paste0(path_to_txt, file.list), 
                  function(x)read.delim(x, header=T, nrows=133, dec=","))

#No me acuerdo que habia que poner en  (0, (133*10)-10, 10)        
attr(df.list, "names") <- file.list
df <- bind_rows(df.list, .id = "id") %>%
  mutate(Time=rep(seq(0, (133*10)-10, 10),length(file.list)))


df_test<-df %>% 
  #select -> para seleccionar columnas, en este caso - Optical density es para deseleccionar la Tª
  select( -`Optical.Density.600`) %>% 
  #mutate -> para cambiar la columna plate por id
  mutate(Plate=id) %>%
  select(-id) %>% 
  #gather(data, key, value) -> data is the dataframe you are working with; key is the name of the key column to create; value is the name of the value column to create.
  gather(-Time,-Plate,  key = Well, value = OD ) 

#Ahora tenemos una tabla (df_test) con la OD en función de la placa, los pocillos y el tiempo. La guardamos con el código de abajo

# write.table(df_test, file=paste0(path_to_output, "Curvas_en_formato_R"))

#Ahora juntamos la tabla df_test con la tabla Rosetta con la funcion left_join
curve_data <- df_test %>% 
  left_join(Rosetta %>% mutate(Plate=as.character(Plate), 
                                        Well=as.character(Well))) 

#Filtramos en la tabla curve_data los blancos para quitarlos y ponemos la OD como una variable numérica
curve_data <- curve_data %>% 
  filter(Replicate != "blanco") %>% 
  mutate(OD=as.numeric(OD))

#
if (file.exists(paste0(path_to_Growthrates, "GrowthRates_results"))){
 Growthrate_results<-read.table((paste0(path_to_Growthrates, "GrowthRates_results")), header=T) 
} else{
             #all_easylinear ->Determine maximum growth rates from log-linear part of the growth curve for a series of experiments
manysplits<- all_easylinear(OD~Time | Strain + Morfotipo +  Replicate + Plate,
                            data=curve_data)
# write.table(results(manysplits), paste0(path_to_Growthrates, "GrowthRates_results"))

Growthrate_results<-results(manysplits)
}


data_analysed<- curve_data %>% group_by(Strain, Morfotipo, Replicate, Plate) %>% 
  group_modify(~ as.data.frame(flux::auc(.x$Time, .x$OD))) %>%
  mutate(AUC=`flux::auc(.x$Time, .x$OD)`) %>% 
  select(-`flux::auc(.x$Time, .x$OD)`)%>% 
  ungroup() 

data_analysed_2<- data_analysed %>% 
  left_join(curve_data %>% 
  group_by(Strain, Morfotipo, Replicate, Plate) %>% 
  summarise(ODmax=max(OD, na.rm = T)))
  
data<-data_analysed_2 %>% 
  mutate(Replicate=as.numeric(Replicate),
         Plate=as.numeric(Plate)) %>% 
  left_join(Growthrate_results) %>% 
  left_join(Rosetta %>%  mutate(Replicate=as.numeric(Replicate),
                                Plate=as.numeric(Plate))) %>% 
  mutate(Species=ifelse(Pulsotype %in% c("A1", "A"), "Kpn","Eco")) %>% 
  mutate(Plasmid=ifelse(pOXA48=="yes", 
                        ifelse(pBlee=="yes", "pBlee+pOXA48", "pOXA48"), 
                        ifelse(pBlee=="yes", "pBlee",
                        ifelse(Inc.l=="yes", "INC-L", "Plasmid-free")))
  )

curve_data <- curve_data %>%
  mutate(Replicate=as.numeric(Replicate),
                                   Plate=as.numeric(Plate)) %>% 
  left_join(data %>% select(c("Strain", "Replicate", "Plate", "Plasmid", "Well", "Species")))
                      

data %>%
  # unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=AUC, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="AUC")

data %>%
  unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=mumax, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Vmax")

data %>%
  ggplot(aes(y=lag, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Lag")

plot(data$AUC, data$ODmax)

# Curvas por plasmido
curve_data %>% 
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Plasmid)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# Curvas por especie
curve_data %>% 
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Species)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# Correlación entre llevar pOXA y pBLEE y crecer mejor?
 
data_analysed_2$Replicate <- as.numeric(data_analysed_2$Replicate)
data_analysed_2$Plate <- as.numeric(data_analysed_2$Plate)
 
analisis_corrrelacion <- left_join(data_analysed_2, curve_data)

# cor.test(analisis_corrrelacion$AUC, analisis_corrrelacion$pOXA48)

knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

# CURVAS DE CRECIMIENTO - CONJUGACIONES

#La carpeta dónde están las curvas
path_to_txt= "~/MEGA/Tesis/In_vivolution/curve_data_total_conj/"

#donde guardar las vmax
path_to_Growthrates="~/MEGA/Tesis/In_vivolution/Growth_curves/"

#La carpeta dónde guardaremos el output
path_to_output="~/MEGA/Tesis/In_vivolution/Growth_curves/"

#La piedra de rosetta!
Rosetta <- read.delim("~/MEGA/Tesis/In_vivolution/Rosetta_conj_total")

#Leer todos los archivos de la carpeta "path_to_txt"
file.list <- list.files(path =path_to_txt, full.names = F)
df.list <- lapply(paste0(path_to_txt, file.list), 
                  function(x)read.delim(x, header=T, nrows=133, dec=","))

#No me acuerdo que habia que poner en  (0, (133*10)-10, 10)        
attr(df.list, "names") <- file.list
df <- bind_rows(df.list, .id = "id") %>%
  mutate(Time=rep(seq(0, (133*10)-10, 10),length(file.list)))

df_test<-df %>% 
  #select -> para seleccionar columnas, en este caso - Optical density es para deseleccionar la Tª
  select( -`Optical.Density.600`) %>% 
  #mutate -> para cambiar la columna plate por id
  mutate(Plate=id) %>%
  select(-id) %>% 
  #gather(data, key, value) -> data is the dataframe you are working with; key is the name of the key column to create; value is the name of the value column to create.
  gather(-Time,-Plate,  key = Well, value = OD ) 

#Ahora tenemos una tabla (df_test) con la OD en función de la placa, los pocillos y el tiempo. La guardamos con el código de abajo

# write.table(df_test, file=paste0(path_to_output, "Curvas_en_formato_R"))

#Ahora juntamos la tabla df_test con la tabla Rosetta con la funcion left_join
curve_data <- df_test %>% 
  left_join(Rosetta %>% mutate(Plate=as.character(Plate), 
                                        Well=as.character(Well))) 

#Filtramos en la tabla curve_data los blancos para quitarlos y ponemos la OD como una variable numérica
curve_data <- curve_data %>% 
  filter(Replicate != "blanco") %>% 
  mutate(OD=as.numeric(OD))

#
if (file.exists(paste0(path_to_Growthrates, "GrowthRates_results"))){
 Growthrate_results<-read.table((paste0(path_to_Growthrates, "GrowthRates_results")), header=T) 
} else{
             #all_easylinear ->Determine maximum growth rates from log-linear part of the growth curve for a series of experiments
manysplits<- all_easylinear(OD~Time | Strain + Morfotipo +  Replicate + Plate,
                            data=curve_data)
# write.table(results(manysplits), paste0(path_to_Growthrates, "GrowthRates_results"))

Growthrate_results<-results(manysplits)
}


data_analysed<- curve_data %>% group_by(Strain, Morfotipo, Replicate, Plate) %>% 
  group_modify(~ as.data.frame(flux::auc(.x$Time, .x$OD))) %>%
  mutate(AUC=`flux::auc(.x$Time, .x$OD)`) %>% 
  select(-`flux::auc(.x$Time, .x$OD)`)%>% 
  ungroup() 

data_analysed_2<- data_analysed %>% 
  left_join(curve_data %>% 
  group_by(Strain, Morfotipo, Replicate, Plate) %>% 
  summarise(ODmax=max(OD, na.rm = T)))
  
data<-data_analysed_2 %>% 
  mutate(Replicate=as.numeric(Replicate),
         Plate=as.numeric(Plate)) %>% 
  left_join(Growthrate_results) %>% 
  left_join(Rosetta %>%  mutate(Replicate=as.numeric(Replicate),
                                Plate=as.numeric(Plate))) %>% 
  mutate(Species=ifelse(Pulsotype %in% c("A1", "A"), "Kpn","Eco")) %>% 
  mutate(Plasmid=ifelse(pOXA48=="yes", 
                        ifelse(pBlee=="yes", "pBlee+pOXA48", "pOXA48"), 
                        ifelse(pBlee=="yes", "pBlee",
                        ifelse(Inc.l=="yes", "INC-L", "Plasmid-free")))
  )

curve_data <- curve_data %>%
  mutate(Replicate=as.numeric(Replicate),
                                   Plate=as.numeric(Plate)) %>% 
  left_join(data %>% select(c("Strain", "Replicate", "Plate", "Plasmid", "Well", "Species")))

# CONJUGACIONES DEL DÍA 5/07/22
                      
# AUC Conjugaciones 5/07/22
data %>%
  filter(Plate==1) %>%  
  # unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=AUC, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="AUC")

# Vmax Conjugaciones 5/07/22
data %>%
  filter(Plate==1) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=mumax, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Vmax")

# LAG Conjugaciones 5/07/22
data %>%
  filter(Plate==1) %>%  
  ggplot(aes(y=lag, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Lag")

# Curvas Conjugaciones 5/07/22 
curve_data %>% 
  filter(Plate==1) %>% 
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  # facet_grid(~Strain)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# Curvas Conjugaciones 5/07/22 por plásmido
curve_data %>% 
  filter(Plate==1) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Plasmid)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# CONJUGACIONES DEL DÍA 21/10/22

# AUC Conjugaciones 21/10/22
data %>%
  filter(Plate==2) %>%  
  # unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=AUC, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="AUC")

# Vmax Conjugaciones 21/10/22
data %>%
  filter(Plate==2) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=mumax, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Vmax")

# LAG Conjugaciones 21/10/22
data %>%
  filter(Plate==2) %>%  
  ggplot(aes(y=lag, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Lag")

# Curvas Conjugaciones 21/10/22 
curve_data %>% 
  filter(Plate==2) %>% 
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Species)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# Curvas Conjugaciones 21/10/22 por plásmido
curve_data %>% 
  filter(Plate==2) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Plasmid~Species)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# CONJUGACIONES DEL DÍA 7/10/22
                      
# AUC Conjugaciones 7/10/22
data %>%
  filter(Plate==3) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=AUC, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="AUC")

# Vmax Conjugaciones 7/10/22
data %>%
  filter(Plate==3) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=mumax, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Vmax")

# LAG Conjugaciones 7/10/22
data %>%
  filter(Plate==3) %>%
  unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=lag, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Lag")

# Curvas Conjugaciones 7/10/22 
curve_data %>% 
  filter(Plate==3) %>% 
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  # facet_grid(~Strain)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# Curvas Conjugaciones 7/10/22 por plásmido
curve_data %>% 
  filter(Plate==3) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Plasmid)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# CONJUGACIONES DEL DÍA 2/11/22
                      
# AUC Conjugaciones 2/11/22
data %>%
  filter(Plate==4) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=AUC, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="AUC")

# Vmax Conjugaciones 2/11/22
data %>%
  filter(Plate==4) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  ggplot(aes(y=mumax, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Vmax")

# LAG Conjugaciones 2/11/22
data %>%
  filter(Plate==4) %>%  
  ggplot(aes(y=lag, x=Strain, color=Plasmid, fill=Plasmid)) +
  geom_jitter(alpha=.4, width=.1)+
  geom_boxplot(alpha=.2)+
  facet_wrap(~Species, scale="free_x")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, 
        #legend.position = "none",
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45,  hjust=1))+
  labs(title="Lag")

# Curvas Conjugaciones 2/11/22 
curve_data %>% 
  filter(Plate==4) %>% 
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Species)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# Curvas Conjugaciones 2/11/22 por plásmido
curve_data %>% 
  filter(Plate==4) %>%  
  unite(Strain, Strain, Morfotipo) %>% 
  # filter(Strain == "C3B_B" |Strain == "C3A_A" | Strain =="C4_A" | Strain =="P4_C" | Strain =="P8_A"| Strain =="C6A_A"| Strain =="C6B_B") %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ungroup() %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Plasmid~Species)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# Curvas B3914
curve_data %>% filter(Plate==2) %>% filter(Strain != "B3914 BLEE" ) %>% filter(Strain != "B3914 pOXA48" ) %>% filter(Strain != "B3914 BLEE pOXA48" ) %>% 
  filter(Strain != "blanco") %>% 
  unite(Strain, Strain, Morfotipo) %>% 
  group_by(Time, Strain,  Species, Plasmid) %>% 
  summarise(media=mean(OD), STDEV=sd(OD)) %>% 
  ggplot()+
  geom_ribbon(aes(ymin=media-STDEV, ymax=media+STDEV, x=Time, fill=Strain), alpha=.2)+
  geom_line(aes(y=media, x=Time, color=Strain))+
  facet_grid(~Species)+
  xlab("Time (min)")+
  ylab("OD (600nm)")+
  theme_bw()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), aspect.ratio = 1,
        # legend.position = "none",
        strip.background = element_blank())

# Correlación entre llevar pOXA y pBLEE y crecer mejor?
 
data_analysed_2$Replicate <- as.numeric(data_analysed_2$Replicate)
data_analysed_2$Plate <- as.numeric(data_analysed_2$Plate)
 
analisis_corrrelacion <- left_join(data_analysed_2, curve_data)

# cor.test(analisis_corrrelacion$AUC, analisis_corrrelacion$pOXA48)