Profiling of Antibiotic Resistance of Bacterial Species Recovered from Routine Clinical Isolates in Ethiopia.

Robert-Jan ten Hove¹
Melaku Tesfaye¹
Witold Frederik ten Hove²
Mesfin Nigussie¹
^1. International Clinical Laboratories, Addis Ababa, Ethiopia
^2. HAN University of Applied Sciences, Nijmegen, the Netherlands

This is a guide to explain the aggregation and visualisation of raw data extracted from the Polytech Laboratory Information Management System, at International Clinical Laboratories, Addis Ababa, Ethiopia. The guide starts with a piece of R-code, followed by an explanation.

setwd("~/Documents/workspace/R-project/Antibiogram")

# libraries
library(plyr)
library(dplyr)
library(tidyr)
library(stringr)
library(xlsx)
library(ggplot2)
library(knitr)
library(reshape2)
library(scales)
library(knitr)

The working directory need to be set and, I like to put all libraries together.

# Read text file (file should be in the same working directory as this script)
con <- file("antibiogram_raw.txt")
txtData <- readLines(con, warn = FALSE, skipNul = TRUE)
close(con)
head(txtData)

## [1] "Page 1/176\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                            
## [2] "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                      
## [3] "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                      
## [4] "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                      
## [5] "Sensitive/Resistant/Intermediate Count\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
## [6] "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"

The raw data was exported by a query from LIMS. It resulted in 176 PDF pages. With Copy-Paste, the data was then saved as a simple .txt file. This text file is read into R. First as a file(con) and then as a string (txtData).

# Clean up
txtData <- gsub("\t{1,}", ";", txtData)
txtData <- gsub("^Page", NA, txtData)
txtData <- gsub("Site:", "", txtData)
txtData <- gsub("^;$", NA, txtData)
txtData <- gsub("^Sensitive.*", "New", txtData)
txtData <- gsub("^;", "", txtData)
txtData <- gsub(";$", "", txtData)
txtData <- gsub("S;R;I", "Data", txtData)
txtData <- na.omit(txtData)

With the gsub command, specific texts can be replaced (also by ‘nothing’:“”). The function na.omit(txtData) returns the object txtData with listwise deletion of missing values.

# Rename Antibiotics
txtData <- gsub("^Ampic[a-z]{1,}", "AMP10", txtData)        
txtData <- gsub("^Amox/K Clav'ate", "AMC30", txtData)   
txtData <- gsub("^Chloramp[a-z]{1,}", "C30", txtData) 
txtData <- gsub("^Ceftriax[a-z]{1,}", "CRO30", txtData) 
txtData <- gsub("^Cephaloph[a-z]{1,}", "CF30", txtData) #Cephalophin doesn't exist
txtData <- gsub("^Cephaloth[a-z]{1,}", "CF30", txtData) 
txtData <- gsub("^Ciproflo[a-z]{1,}", "CIP5", txtData) 
txtData <- gsub("^Clindam[a-z]{1,}", "CC2", txtData) 
txtData <- gsub("^Cefotax[a-z]{1,}", "CTX30", txtData) 
txtData <- gsub("^Cefurox[a-z]{1,}", "CXM30", txtData) 
txtData <- gsub("^Cefoxit[a-z]{1,}", "FOX30", txtData) 
txtData <- gsub("^Erythrom[a-z]{1,}", "E15", txtData) 
txtData <- gsub("^Gentam[a-z]{1,}", "GM10", txtData) 
txtData <- gsub("^Metroni[a-z]{1,}", "METR", txtData)
txtData <- gsub("^Nalidixic Acid", "NA30", txtData) 
txtData <- gsub("^Norfloxa[a-z]{1,}", "NOR10", txtData) 
txtData <- gsub("^Nitrofur[a-z]{1,}", "FM300", txtData) 
txtData <- gsub("^Oxac[a-z]{1,}", "OX1", txtData) 
txtData <- gsub("^Penic[a-z]{1,}", "P10", txtData) 
txtData <- gsub("^Pyrazinamide{1,}", "PYR", txtData)
txtData <- gsub("^Sulfameth[a-z]{1,}", "SXT25", txtData) 
txtData <- gsub("^Tetrac[a-z]{1,}", "TE30", txtData) 
txtData <- gsub("^Vancom[a-z]{1,}", "VA30", txtData) 
txtData <- gsub("^Imip[a-z]{1,}", "IMP", txtData) 
txtData <- gsub("^Rifamp[a-z]{1,}", "RIF", txtData)  
txtData <- gsub("^Ethamb[a-z]{1,}", "ETB", txtData) 
txtData <- gsub("^Streptom[a-z]{1,}", "STREP", txtData)

Antibiotics are renamed into easy to use abbreviations. Also, often the antibiotics are mispelled, e.a. Ampicilline / Ampiciline / Ampicyline…

# Function to split vector into list
VectorToList <- function(vec, ind) {
  
  n = 1
  splitIndex <- which(vec == ind)
  myList <- NULL
  
  for (i in splitIndex[-length(splitIndex)]) {
    
    myList[[n]] <- vec[(i+1):(splitIndex[n+1]-1)]
    n <- n+1
  }
  return(myList)
}

Prepare a function which will split the vector (txtData) into a list (myList).

# Function to split vector into data frame
VectorToDF <- function(vec, ind){
  
  d <- which(vec[[1]] == ind)+1
  l <- length(vec[[1]])
  
  myDf <- data.frame(
    matrix(unlist(
      strsplit(
        vec[[1]][d:l], split = ";", fixed = FALSE, perl = FALSE, useBytes = FALSE
      )
    )
    , ncol = 4, byrow = TRUE
    )
  )
  
  return(myDf)
  
}

The next function is to split the vector and make a dataframe (myDF) from it.

SelectListItem <- function(lst, elnt = 1, lkp) {
  
  itemVec <- sapply(lst, function(x) x[[elnt]][[1]]) # Create vector with all items from selected list element
  myList <- lst[which(itemVec == lkp)] # Use which function on vector to create vector of indexnumbers and select items from original list
  
  return(myList)
}

Function to select elements from list.

# Create list of separate vectors
txtList <- VectorToList(txtData, "New")

‘txtList’ is created with comprehensable chunks of data (lists).

# Create list of lists with results in data frame
newList <- NULL
for (m in 1:length(txtList)) {
  
  testresDf <- VectorToDF(txtList[m], "Data")
  colnames(testresDf) <- c("Antibiotics", "S", "R", "I")
  newList[[m]] <- list(bacteria = txtList[[m]][1], source = txtList[[m]][2], result = testresDf)
  
}

The function VectorToDF aggregates txtList into Dataframe

# Create data frame from list
newDF <- NULL
for (s in 1:length(newList)) {
  
  cntRows <- length(newList[[s]]$result[,1])
  bact <- rep_len(newList[[s]]$bacteria,cntRows)
  srce <-  rep_len(newList[[s]]$source,cntRows)
  tmpDF <- cbind(Bacteria = bact, Source = srce, newList[[s]]$result)
  newDF <- rbind(newDF, tmpDF)
}
newDF$Bacteria <- gsub("(^|[[:space:]])([[:alpha:]])", "\\1\\U\\2", newDF$Bacteria, perl=TRUE) # Change all first letters of bacteria names to upper case

Now to make a workable database.

# Correct Bacteria species
newDF$Bacteria <- gsub("?Species", "species", newDF$Bacteria) 
newDF$Bacteria <- gsub("Actinomyces Viscosus", "Actinomyces viscosus", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Fragilis", "fragilis", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Braakii", "braakii", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Farmeri", "farmeri", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Freundii", "freundii", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Koseri", "koseri", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Cloacae", "cloacae", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Faecalis", "faecalis", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Coli", "coli", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Influenzae", "influenzae", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Biotype", "biotype", newDF$Bacteria) 
newDF$Bacteria <- gsub("HEMOPHILUS INFLUENZA", "Haemophilus influenzae", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Ornithinolytica", "ornithinolytica", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Oxytoca", "oxytoca", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Pneumoniae", "pneumoniae", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Terrigena", "terrigena", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Lacunata", "lacunata", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Morganii", "morganii", newDF$Bacteria) 
newDF$Bacteria <- gsub("Mycobacteria Tuberculosis Conplex", "Mycobacterium tuberculosis complex", newDF$Bacteria)
newDF$Bacteria <- gsub("Mycobacterium Tuberculosis Complex", "Mycobacterium tuberculosis complex",newDF$Bacteria)
newDF$Bacteria <- gsub("?Mirabilis", "mirabilis", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Penneri", "penneri", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Vulgaris", "vulgaris", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Aeruginosa", "aeruginosa", newDF$Bacteria) 
newDF$Bacteria <- gsub("SALMONELLA GROUP.*", "Salmonella group", newDF$Bacteria) 
newDF$Bacteria <- gsub("?Paratyphi", "paratyphi", newDF$Bacteria) 
newDF$Bacteria <- gsub("?SALMONELLA PARATYPHI", "Salmonella paratyphi", newDF$Bacteria) 
newDF$Bacteria <- gsub("Boydii", "boydii", newDF$Bacteria)
newDF$Bacteria <- gsub("Dysenteriae", "dysenteriae", newDF$Bacteria) 
newDF$Bacteria <- gsub("SHIGELLA GROUP", "Shigella group", newDF$Bacteria) 
newDF$Bacteria <- gsub("Sonnei", "sonnei", newDF$Bacteria)
newDF$Bacteria <- gsub("Aureus", "aureus", newDF$Bacteria) 
newDF$Bacteria <- gsub("Lugdunensis", "lugdunensis", newDF$Bacteria) 
newDF$Bacteria <- gsub("Saprophyticus", "saprophyticus", newDF$Bacteria)
newDF$Bacteria <- gsub("Agalactiae", "agalactiae", newDF$Bacteria) 
newDF$Bacteria <- gsub("Streptococcus Anginosus/milleri", "Streptococcus anginosus/milleri", newDF$Bacteria)
newDF$Bacteria <- gsub("Pyogenes", "pyogenes", newDF$Bacteria)
newDF$Bacteria <- gsub("4\\(\\ Erwina species)", "4 (Erwina species)", newDF$Bacteria) 
newDF$Bacteria <- gsub("Viridans Streptococcus", "Viridans streptococci", newDF$Bacteria)
newDF$Bacteria <- gsub("Coagulase-negative Staphylococcus species", "CNS", newDF$Bacteria)

The bacteria species names contain many mistakes. They need to be corrected one by one.

## change factors into numeric
newDF$S <- as.numeric(newDF$S)
newDF$I <- as.numeric(newDF$I)
newDF$R <- as.numeric(newDF$R)
## summ all cultures (Sensitive + Intermediate + Resitant)
newDF$N <- newDF$S + newDF$I + newDF$R

The numbers under S, I & R are stored as ‘factors’. They can be changed into numeric with command ‘as.numeric’. ‘N’ is the sum off antibiograms.

###  Tuberculosis and its drugs
myco <- newDF[grep("Mycobacterium tuberculosis complex", newDF$Bacteria), ]
sum(myco$N)

## [1] 334

write.csv(myco, "myco.csv")
newDF <- newDF[-grep("Mycobacterium tuberculosis complex", newDF$Bacteria), ]
sum(newDF$N)

## [1] 15989

First make a seperate dataframe for Mycobacteria species. Next is to remove Mycobacterium from the dataframe by using the command ‘-grep’. The commands ‘sum’ show that 334 Mycobacterium antibiograms were removed, leaving 15989 remaining antibiograms.

# Cleaning up Source
## Rename 'W' and 'WOUND' as 'Wound' 
newDF$Source <- gsub("WOUND", "AIAIAI", fixed = TRUE, newDF$Source) 
newDF$Source <- gsub("W", "Wound", fixed = TRUE, newDF$Source) 
newDF$Source <- gsub("AIAIAI", "Wound", fixed = TRUE, newDF$Source) 

newDF$Source <- gsub("BODY FLUID", "Body fluid", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("BLOOD-PEADITRIC", "Blood-pediatric", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("BLOOD-AEROBIC", "Blood-aerobic", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("BLOOD-ANAEROBIC", "Blood-anaerobic", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("EAR CULTURE", "Ear culture", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("NASAL DISCHARGE", "Nasal discharge", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("PUS", "Pus", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("ROUTINE", "Routine", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("SPUTUM", "Sputum", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("STOOL", "Stool", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("Thorat culture", "Throat culture", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("EYE CULTURE", "Eye culture", fixed = TRUE, newDF$Source)

Correct the ‘SOURCE’ and change to lower case

### Merge Blood-anaerobic with Blood-aerobic to simply 'Blood' 
newDF$Source <- gsub("Blood-aerobic", "Blood", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("Blood-anaerobic", "Blood", fixed = TRUE, newDF$Source)

### Merge "Throat culture" with "Throat swab" to simply "Throat"
newDF$Source <- gsub("Throat swab", "Throat", fixed = TRUE, newDF$Source)
newDF$Source <- gsub("Throat culture", "Throat", fixed = TRUE, newDF$Source)

Merging the ‘Blood-aerobic’ and ‘Blood-anaerobic into simply ’Blood’ The same for ‘Throat swab’ and ‘Throat culture’.

### Aggregate double sources and antibiotics.
newDF <- aggregate(cbind(S,I,R,N)~Bacteria+Source+Antibiotics, data=newDF, sum, na.rm=TRUE)
sum(newDF$N)

## [1] 15989

Many lines now contain the same Bacterium species with same source Source. These are grouped together with the R, S and I’s summed up.

## Export data
write.csv(newDF, "newDF.csv")

An intermediate raw-data file is made.

newDF
Bacteria	Source	Antibiotics	S	I	R	N
CNS	Blood	AMC30	11	3	6	20
Citrobacter braakii	Blood	AMC30	4	2	2	8
Enterobacter cloacae	Blood	AMC30	1	1	3	5
Escherichia coli	Blood	AMC30	7	14	21	42
Group C Streptococcus	Blood	AMC30	1	1	3	5
Klebsiella pneumoniae	Blood	AMC30	2	2	5	9
Pseudomonas species	Blood	AMC30	1	3	3	7
Staphylococcus aureus	Blood	AMC30	10	4	2	16
Staphylococcus lugdunensis	Blood	AMC30	4	2	6	12
Streptococcus pyogenes (Group A)	Blood	AMC30	4	2	2	8
Actinomyces viscosus	Blood-pediatric	AMC30	2	1	1	4
Bacteroides fragilis	Blood-pediatric	AMC30	2	1	1	4
CNS	Blood-pediatric	AMC30	8	1	1	10
Escherichia coli	Blood-pediatric	AMC30	3	2	3	8
Group A Streptococcus	Blood-pediatric	AMC30	2	1	1	4
Klebsiella oxytoca	Blood-pediatric	AMC30	2	2	6	10
Pseudomonas species	Blood-pediatric	AMC30	2	2	6	10
The dataframe (first 17 rows).

newDF2 <- read.csv("newDF.csv", sep = ",")
newDF2 <- subset(newDF, select=c(Bacteria, Source, Antibiotics, S, I, R, N))

To start with a fresh new Dataset

### Aggregate on Species.
DFspecies <- aggregate(cbind(N)~Bacteria, data=newDF2, sum, na.rm=TRUE)

Bacteria species with number of antibiograms
Bacteria	N
Acinetobacter species	54
Actinomyces viscosus	37
Bacteroides fragilis	8
Beta Hemolytic Non-group A	30
CNS	2585
Citrobacter braakii	64
Citrobacter farmeri	33
Citrobacter freundii	39
Citrobacter koseri	40
Enterobacter cloacae	140
Enterobacter species	68
Enterococcus faecalis	619
Enterococcus species	50
Escherichia coli	2931
Group A Streptococcus	128
Group B Streptococcus	224
Group C Streptococcus	113
Group F Streptococcus	38
Group G Streptococcus	42
Haemophilus influenzae	47
To give an overview on the nu	mber of antibiograms per Bacteria species (only first 20 rows).

##### Group species together
##### \\ = space    [a-z] = any character   {,1} = repeat previous 1 or more time
# put together Citrobacter species (C. braakii, farmeri, freundii & koseri)
newDF2$Bacteria <- gsub("Citrobacter\\s[a-z]{1,}", "Citrobacter spp.", newDF2$Bacteria) 
# put together Enterobacter cloacae & species
newDF2$Bacteria <- gsub("Enterobacter\\s[a-z]{1,}", "Enterobacter spp.", newDF2$Bacteria) 
# put together Enterococcus faecalis & species
newDF2$Bacteria <- gsub("Enterococcus\\s[a-z]{1,}", "Enterococcus spp.", newDF2$Bacteria) 
# put together Klebsiella ornithinolytica, oxytoca, pneumoniae, terrigena & species
newDF2$Bacteria <- gsub("Klebsiella\\s[a-z]{1,}", "Klebsiella spp.", newDF2$Bacteria) 
# put together Proteus mirabilis, penneri, vulgaris & species
newDF2$Bacteria <- gsub("Proteus\\s[a-z]{1,}", "Proteus spp.", newDF2$Bacteria) 
# put together Pseudomonas auruginosa & species
newDF2$Bacteria <- gsub("Pseudomonas\\s[a-z]{1,}", "Pseudomonas spp.", newDF2$Bacteria) 
# put together Salmonella group, paratyphi A & B & species
newDF2$Bacteria <- gsub("Salmonella\\s[a-z]{1,}", "Salmonella spp.", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Salmonella spp.\\s[AB]", "Salmonella spp.", newDF2$Bacteria) 
# put together Shigella boydii, dysenteriae, group A1, sonnei & species
newDF2$Bacteria <- gsub("Shigella\\s[a-z]{1,}", "Shigella spp.", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Shigella spp.\\sA1", "Shigella spp.", newDF2$Bacteria)
# put together Staphylococcus lugdunensis & saprophyticus as CNS (keep S. aureus seperate)
newDF2$Bacteria <- gsub("Staphylococcus lugdunensis", "CNS", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Staphylococcus saprophyticus", "CNS", newDF2$Bacteria)
# Rename Group [A-Z] Streptococcus as 'Streptococcus Group [A-Z]'
newDF2$Bacteria <- gsub("Group\\sA\\sStreptococcus", "Streptococcus Group A", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Group\\sB\\sStreptococcus", "Streptococcus Group B", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Group\\sC\\sStreptococcus", "Streptococcus Group C", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Group\\sF\\sStreptococcus", "Streptococcus Group F", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Group\\sG\\sStreptococcus", "Streptococcus Group G", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Streptococcus\\sagalactiae\\s[:(:]Group\\sB[:):]", "Streptococcus Group B", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Streptococcus\\sanginosus[:/:]milleri", "Streptococcus Group F", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Streptococcus\\spyogenes\\s[:(:]Group\\sA[:):]", "Streptococcus Group A", newDF2$Bacteria)
# Put together Streptococcus species & Viridans
newDF2$Bacteria <- gsub("Streptococcus\\sspecies", "Streptococcus spp.", newDF2$Bacteria) 
newDF2$Bacteria <- gsub("Viridans\\sstreptococci", "Streptococcus spp.", newDF2$Bacteria) 
# In case needed, put all Streptococcus Group [A-Z] together as Beta-hemolytic Streptococci
newDF2$Bacteria <- gsub("Streptococcus\\sGroup\\s[A-Z]", "Beta-hemolytic Streptococci", newDF2$Bacteria)

Instead of having small groups of Bacteria species, they are merged together into bacteria genus.

# aggregate new groups.
newDF2 <- aggregate(newDF2[,c("S","I","R", "N")], by=list(newDF2$Bacteria, newDF2$Source, newDF2$Antibiotics), "sum")
names(newDF2)[names(newDF2)=="Group.1"] <- "Bacteria"
names(newDF2)[names(newDF2)=="Group.2"] <- "Source"
names(newDF2)[names(newDF2)=="Group.3"] <- "Antibiotics"

The same Bacteria genus rows are merged together with aggregate comment. New names are put in the header.

newDF2$n <- (newDF2$S + newDF2$R)

The sum is calculated for sensitive plus resistant antibiograms.

# When calculating %resistance, leave out the intermediates. 
newDF2$Resis <- ((newDF2$R/(newDF2$S + newDF2$R))*100)
newDF2$Resis <- formatC(newDF2$Resis, digits=3)
newDF2$Resis <- as.numeric(newDF2$Resis)
#sumDF$n <- (sumDF$S + sumDF$R)
sum(newDF2$N)

## [1] 15989

sum(newDF2$n)

## [1] 13073

For this study, the resistance rate is calculated from the sum of sensitive plus resistant cultures, leaving the intermediate out.

newDF3 <- newDF2[newDF2$n > 6,]

After keeping out the intermediates, also groups are left out that contain only 6 or less cultures. Setting the threshold on 6 is on one hand ambiguous. However, after checking all the group-size, it was observed that a particular large proportion of ‘small groups’ was removed when setting the threshold on 7 or more cultures per group.

### new dataset without SOURCE, aggregated again ####
Plot7 <- aggregate(newDF3[,c("S","R","n")], by=list(newDF3$Bacteria, newDF3$Antibiotics), "sum")
names(Plot7)[names(Plot7)=="Group.1"] <- "Bacteria"
names(Plot7)[names(Plot7)=="Group.2"] <- "Antibiotics"

Plot7$Resis <- ((Plot7$R/Plot7$n)*100)

Plot7$Resis <- formatC(Plot7$Resis, digits=3)
Plot7$Resis <- as.numeric(Plot7$Resis)
  

sum(newDF3$n)

## [1] 10684

sum(Plot7$n)

## [1] 10684

Now, the dataset is aggregated with antibiotics on the Y-axis and bacteria genus on the x-axis.

### MAKE PLOT

Plot7g <- ggplot(Plot7) + geom_point(aes(x = Bacteria, y = Antibiotics, colour = Resis, size = n)) +
  scale_colour_gradient(name = '% resistance', high = "red", low = "green") +
  scale_size(name = 'sample size', range = c(2,9)) +
  xlab('Bacteria groups') + ylab('Antibiotic') +
  theme(text = element_text(size = 12), axis.text.x = element_text(angle=60, hjust=1, size=12) )

Plot7g

And plotting the aggregated dataset.