Salmonella isolates 2019-2023

2024-05-01

Linoy.z

{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)

rm (list=ls())
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Orginizing & cleaning the salmonella data file

Firs I will start with uploading the file as an CRV file

setwd("G:/.shortcut-targets-by-id/1OCnVJ6euOfHZdOlKqGKkb1ToC8FQqs_Z/Linoy Zeman/Data files/Poultry/CSVs")
# remember to delete the exact file name at the end of the path and leave only the directory path
Salmonella <-read.csv ("G:/.shortcut-targets-by-id/1OCnVJ6euOfHZdOlKqGKkb1ToC8FQqs_Z/Linoy Zeman/Data files/Poultry/CSVs/Sal_019-023_updt.csv" , header = T, na.strings = c(""," ","na",NA," "))
# remember to first save the file as an "CSV UTF-8 (comma delimited) (*.csv)"

Orginizing the columns

I will change the order of the column to better reasonable order to look at the data
I will change the name of the column to a basic writing pattern that will be common for all of the data set.

##Changing the column names
colnames(Salmonella)[colnames(Salmonella) == "region"] <- "Region"
colnames(Salmonella)[colnames(Salmonella) == "Farm.kinde"] <- "Farm_kinde"
colnames(Salmonella)[colnames(Salmonella) == "settlement.lab"] <- "Lab_region"
colnames(Salmonella)[colnames(Salmonella) == "performing..lab"] <- "Performing_lab"
colnames(Salmonella)[colnames(Salmonella) == "test.kinde"] <- "Test_kinde"
colnames(Salmonella)[colnames(Salmonella) == "chicken.kinde"] <- "Poultry"
colnames(Salmonella)[colnames(Salmonella) == "Poultry.branches"] <- "Poultry_branches"
colnames(Salmonella)[colnames(Salmonella) == "generation"] <- "Generation"
colnames(Salmonella)[colnames(Salmonella) == "breed"] <- "Breed"

##Changing the columns locations
Salmonella <- relocate(Salmonella, "Region", .before = "Farm_kinde")
Salmonella <- relocate(Salmonella, "Farm_kinde", .before = "Lab_region")
Salmonella <- relocate(Salmonella, "Performing_lab", .after = "Lab_region")
Salmonella <- relocate(Salmonella, "Test_kinde", .after = "Performing_lab")
Salmonella <- relocate(Salmonella, "Poultry", .after = "Test_kinde")
Salmonella <- relocate(Salmonella, "Branch_line", .after = "Poultry")
Salmonella <- relocate(Salmonella, "Poultry_branches", .after = "Branch_line")
Salmonella <- relocate(Salmonella, "Generation", .after = "Poultry_branches")
Salmonella <- relocate(Salmonella, "Breed", .after = "Generation")

Translate the importent columnt content to Englis instead of Hebrew.

##Region
Salmonella$temp <- Salmonella$Region

Salmonella$Region <-sub("צפון", "North", Salmonella$Region)
Salmonella$Region <-sub("השפלה וההר", "Shfela", Salmonella$Region)
Salmonella$Region <-sub("העמקים", "The valleys", Salmonella$Region)
Salmonella$Region <-sub("המרכז", "Central", Salmonella$Region)
Salmonella$Region <-sub("הדרום", "South", Salmonella$Region)

unique(Salmonella$Region)

## [1] "North"       "Shfela"      "The valleys" "South"       "Central"

Temp.1_Region <-summarise(group_by(Salmonella,temp,Region),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.1_Region)

## # A tibble: 5 × 4
## # Groups:   temp [5]
##   temp       Region          n   per
##   <chr>      <chr>       <int> <dbl>
## 1 הדרום      South        1781  10.5
## 2 המרכז      Central      2856  16.8
## 3 העמקים     The valleys  5581  32.9
## 4 השפלה וההר Shfela       2605  15.4
## 5 צפון       North        4133  24.4

##settlement lab
Salmonella$temp <- Salmonella$Lab_region

Salmonella$Lab_region <-sub("מעבדה דרומית", "Southern", Salmonella$Lab_region)
Salmonella$Lab_region <-sub("דרומית", "Southern", Salmonella$Lab_region)
Salmonella$Lab_region <-sub("מעבדה צפונית", "Northern", Salmonella$Lab_region)
Salmonella$Lab_region <-sub("צפונית", "Northern", Salmonella$Lab_region)
Salmonella$Lab_region <-sub("מפקח ארצי", "National_inspector", Salmonella$Lab_region)

unique(Salmonella$Lab_region)

## [1] "Northern"           "Southern"           "National_inspector"
## [4] NA

Temp.2_Lab_region <-summarise(group_by(Salmonella,temp,Lab_region),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.2_Lab_region)

## # A tibble: 6 × 4
## # Groups:   temp [6]
##   temp         Lab_region             n   per
##   <chr>        <chr>              <int> <dbl>
## 1 דרומית       Southern            5981 35.3 
## 2 מעבדה דרומית Southern              87  0.51
## 3 מעבדה צפונית Northern             151  0.89
## 4 מפקח ארצי    National_inspector    13  0.08
## 5 צפונית       Northern           10723 63.2 
## 6 <NA>         <NA>                   1  0.01

#performing  lab
Salmonella$temp <- Salmonella$Performing_lab

Salmonella$Performing_lab <-sub("מעבדה דרומית", "Southern", Salmonella$Performing_lab)
Salmonella$Performing_lab <-sub("מעבדה צפונית", "Northern", Salmonella$Performing_lab)
Salmonella$Performing_lab <-sub("חטיבה לעופות, מכון הוטרינרי", "The_Veterinary_Institute ", Salmonella$Performing_lab)

unique(Salmonella$Performing_lab)

## [1] "Southern"                  "Northern"                 
## [3] "The_Veterinary_Institute "

Temp.3_Performing_lab <-summarise(group_by(Salmonella,temp,Performing_lab),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.3_Performing_lab)

## # A tibble: 3 × 4
## # Groups:   temp [3]
##   temp                        Performing_lab                  n   per
##   <chr>                       <chr>                       <int> <dbl>
## 1 חטיבה לעופות, מכון הוטרינרי "The_Veterinary_Institute "     2  0.01
## 2 מעבדה דרומית                "Southern"                  16394 96.7 
## 3 מעבדה צפונית                "Northern"                    560  3.3

##test kind
Salmonella$temp <- Salmonella$Test_kinde

Salmonella$Test_kinde <-sub("אברים פנימיים", "Internal_organs", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("אחר", "Other", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("אפרוחים", "Chicks", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("חיידק", "Bacterium", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("חיתולים", "Diapers", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("כבד", "Liver", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("לב", "Heart", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("לשלשת", "Secretions", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מח עצם", "B_marrow", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש אבק", "Dust_swab", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר לול ריק", "Dragged_swab_roost-empty", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר לול", "Dragged.swab", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר מגש בקיעה", "Dragged_swab_Hatching_tray", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר מדגריה", "Dragged.swab.hatchery", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר", "Dragged_swab", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מעבדה למחלות עופות", "Poultry_diseases_lab", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("פרק", "Joint", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("ריאה", "Lung", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("תערובת", "Feed", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("אבק", "Dust", Salmonella$Test_kinde)

unique(Salmonella$Test_kinde)

##  [1] "Dragged.swab"               "Dust_swab"                 
##  [3] "Dust"                       "Dragged_swab_roost-empty"  
##  [5] "Dragged_swab"               "Chicks"                    
##  [7] "Dragged.swab.hatchery"      "Dragged_swab_Hatching_tray"
##  [9] "Internal_organs"            "B_marrow"                  
## [11] "Lung"                       "Diapers"                   
## [13] "Bacterium"                  "Secretions"                
## [15] "Heart"                      "Feed"                      
## [17] "Other"                      "Liver"                     
## [19] "Joint"                      "Poultry_diseases_lab"

Temp.4_Test_kinde <-summarise(group_by(Salmonella,temp,Test_kinde),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print((Temp.4_Test_kinde),n=123)

## # A tibble: 20 × 4
## # Groups:   temp [20]
##    temp                Test_kinde                     n   per
##    <chr>               <chr>                      <int> <dbl>
##  1 אבק                 Dust                         663  3.91
##  2 אברים פנימיים       Internal_organs              381  2.25
##  3 אחר                 Other                          2  0.01
##  4 אפרוחים             Chicks                       327  1.93
##  5 חיידק               Bacterium                    400  2.36
##  6 חיתולים             Diapers                       31  0.18
##  7 כבד                 Liver                          2  0.01
##  8 לב                  Heart                          4  0.02
##  9 לשלשת               Secretions                     6  0.04
## 10 מח עצם              B_marrow                      49  0.29
## 11 מטוש אבק            Dust_swab                    672  3.96
## 12 מטוש נגרר           Dragged_swab                1699 10.0 
## 13 מטוש נגרר לול       Dragged.swab               11147 65.7 
## 14 מטוש נגרר לול ריק   Dragged_swab_roost-empty     624  3.68
## 15 מטוש נגרר מגש בקיעה Dragged_swab_Hatching_tray   400  2.36
## 16 מטוש נגרר מדגריה    Dragged.swab.hatchery        544  3.21
## 17 מעבדה למחלות עופות  Poultry_diseases_lab           1  0.01
## 18 פרק                 Joint                          1  0.01
## 19 ריאה                Lung                           2  0.01
## 20 תערובת              Feed                           1  0.01

##Chicken kind-> Poultry
Salmonella$temp <- Salmonella$Poultry

Salmonella$Poultry <-sub("תרנגולות", "Chickens", Salmonella$Poultry)
Salmonella$Poultry <-sub("ברווזים", "Ducks", Salmonella$Poultry)
Salmonella$Poultry <-sub("הודים", "Turkey", Salmonella$Poultry)
Salmonella$Poultry <-sub("יונים", "Pigeons", Salmonella$Poultry)
Salmonella$Poultry <-sub("שלווים", "Quail", Salmonella$Poultry)

unique(Salmonella$Poultry)

## [1] "Chickens" "Turkey"   "Ducks"    "Pigeons"  "Quail"

Temp.5_Poultry <-summarise(group_by(Salmonella,temp,Poultry),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.5_Poultry)

## # A tibble: 5 × 4
## # Groups:   temp [5]
##   temp     Poultry      n   per
##   <chr>    <chr>    <int> <dbl>
## 1 ברווזים  Ducks       62  0.37
## 2 הודים    Turkey    2576 15.2 
## 3 יונים    Pigeons      2  0.01
## 4 שלווים   Quail        3  0.02
## 5 תרנגולות Chickens 14313 84.4

##Branch_line
Salmonella$temp <- Salmonella$Branch_line

Salmonella$Branch_line <-sub("הטלה", "Layers", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("נוי", "Ornaments", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("פטום", "Broilers", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("רבייה*", "Breeding", Salmonella$Branch_line)
# Since the word "רבייה" already changed to "breeding" the next changes for the "רבייבה כבדה" and "רבייה קלה" will be made accordingly 
Salmonella$Branch_line <-sub("Breeding כבדה", "Breeding", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("Breeding קלה", "Breeding", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("שלווים ביצי מאכל", "Quail", Salmonella$Branch_line)

unique(Salmonella$Branch_line)

## [1] "Layers"    "Breeding"  "Broilers"  "Ornaments" "Quail"

Temp.6_Branch_line <-summarise(group_by(Salmonella,temp,Branch_line),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.6_Branch_line)

## # A tibble: 7 × 4
## # Groups:   temp [7]
##   temp             Branch_line     n   per
##   <chr>            <chr>       <int> <dbl>
## 1 הטלה             Layers       5856 34.5 
## 2 נוי              Ornaments       5  0.03
## 3 פטום             Broilers     3056 18.0 
## 4 רבייה            Breeding     1933 11.4 
## 5 רבייה כבדה       Breeding     5857 34.5 
## 6 רבייה קלה        Breeding      248  1.46
## 7 שלווים ביצי מאכל Quail           1  0.01

##Poultry branches
Salmonella$temp <- Salmonella$Poultry_branches

Salmonella$Poultry_branches <-sub("ברווזים - פיטום*", "Broilers", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("הודים - פיטום", "Broilers", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("עופות - שונים", "Poultry_various", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("שלווים - ביצי מאכל", "Quail_eggs", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("תרנגולות - הטלה", "Layers", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("תרנגולות - פיטום", "Broilers", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("תרנגולות - רביה קלה", "Light_breeders", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("תרנגולות - רבייה כבדה", "Heavy_breeders", Salmonella$Poultry_branches)

unique(Salmonella$Poultry_branches)

## [1] "Layers"          "Heavy_breeders"  "Light_breeders"  "Broilers"       
## [5] NA                "Poultry_various" "Quail_eggs"

Temp.7_Poultry_branches <-summarise(group_by(Salmonella,temp,Poultry_branches),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.7_Poultry_branches)

## # A tibble: 9 × 4
## # Groups:   temp [9]
##   temp                  Poultry_branches     n   per
##   <chr>                 <chr>            <int> <dbl>
## 1 ברווזים - פיטום       Broilers             6  0.04
## 2 הודים - פיטום         Broilers           700  4.13
## 3 עופות - שונים         Poultry_various     42  0.25
## 4 שלווים - ביצי מאכל    Quail_eggs           1  0.01
## 5 תרנגולות - הטלה       Layers            5859 34.6 
## 6 תרנגולות - פיטום      Broilers          2351 13.9 
## 7 תרנגולות - רביה קלה   Light_breeders     245  1.44
## 8 תרנגולות - רבייה כבדה Heavy_breeders    5857 34.5 
## 9 <NA>                  <NA>              1895 11.2

##Farm kind
Salmonella$temp <- Salmonella$Farm_kinde

Salmonella$Farm_kinde <-sub("אחר", "Other", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("הודונים", "Turkey-young", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("חופש", "Freedom", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("מדגריה", "Hatchery", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("פרגיות", "Pullets", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("רבייה הטלה", "Breeders", Salmonella$Farm_kinde)

unique(Salmonella$Farm_kinde)

## [1] "Other"        "Breeders"     "Freedom"      "Hatchery"     "Pullets"     
## [6] "Turkey-young"

Temp.8_Farm_kinde <-summarise(group_by(Salmonella,temp,Farm_kinde),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.8_Farm_kinde)

## # A tibble: 6 × 4
## # Groups:   temp [6]
##   temp       Farm_kinde       n   per
##   <chr>      <chr>        <int> <dbl>
## 1 אחר        Other         8011 47.2 
## 2 הודונים    Turkey-young   487  2.87
## 3 חופש       Freedom        299  1.76
## 4 מדגריה     Hatchery       597  3.52
## 5 פרגיות     Pullets       1606  9.47
## 6 רבייה הטלה Breeders      5956 35.1

##generation
Salmonella$temp <- Salmonella$Generation

Salmonella$Generation <-sub("אמהות", "Mothers", Salmonella$Generation)
Salmonella$Generation <-sub("מסחרי", "Industrial", Salmonella$Generation)
Salmonella$Generation <-sub("סבתות", "Grandmothers", Salmonella$Generation)

unique(Salmonella$Generation)

## [1] "Industrial"   "Mothers"      "Grandmothers"

Temp.9_Generation <-summarise(group_by(Salmonella,temp,Generation),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.9_Generation)

## # A tibble: 3 × 4
## # Groups:   temp [3]
##   temp  Generation       n   per
##   <chr> <chr>        <int> <dbl>
## 1 אמהות Mothers       7244 42.7 
## 2 מסחרי Industrial    9259 54.6 
## 3 סבתות Grandmothers   453  2.67

##breed
Salmonella$temp <- Salmonella$Breed

Salmonella$Breed <-sub("\\-\\-\\-", NA, Salmonella$Breed)
Salmonella$Breed <-sub("אביר \\(Abir\\)", "Abir", Salmonella$Breed)
Salmonella$Breed <-sub("אחר או לא ידוע \\(Unknown\\)", NA, Salmonella$Breed)
Salmonella$Breed <-sub("ביוטי \\(BUT\\)", "Beauti", Salmonella$Breed)
Salmonella$Breed <-sub("ברווזים\\ Pekin", "Pekin", Salmonella$Breed)
Salmonella$Breed <-sub("דיקלב \\(DeKalb\\)", "DeKalb", Salmonella$Breed)
Salmonella$Breed <-sub("היבריד \\(Hybrid\\)", "Hybrid", Salmonella$Breed)
Salmonella$Breed <-sub("היילין \\(Hyline W80\\)", "Hyline_W80", Salmonella$Breed)
Salmonella$Breed <-sub("היליין\\ \\ \\(Hyline W36\\)", "Hyline_W36", Salmonella$Breed)
Salmonella$Breed <-sub("היליין\\ \\ \\(Hyline CV24\\)", "Hyline_CV24", Salmonella$Breed)
Salmonella$Breed <-sub("יותר \\מאחד", "Multiple", Salmonella$Breed)
Salmonella$Breed <-sub("לומן \\(Lohman\\)", "Lohman", Salmonella$Breed)
Salmonella$Breed <-sub("ניקולס \\(Nicholas\\)", "Nicholas", Salmonella$Breed)
Salmonella$Breed <-sub("קוב \\(Cobb\\)", "Cobb", Salmonella$Breed)
Salmonella$Breed <-sub("פקין \\(PAKIN\\)", "Pekin", Salmonella$Breed)
Salmonella$Breed <-sub("רוס \\(Ross\\)", "Ross", Salmonella$Breed)
Salmonella$Breed <-sub("הברד \\(Hubbard\\)", "Hubbard", Salmonella$Breed)

unique(Salmonella$Breed)

##  [1] "Lohman"      "Hyline_W36"  "Ross"        "Hyline_W80"  "DeKalb"     
##  [6] "Multiple"    NA            "Beauti"      "Hybrid"      "Cobb"       
## [11] "Nicholas"    "Abir"        "Pekin"       "Hyline_CV24" "Hubbard"

Temp.10_Breed <-summarise(group_by(Salmonella,temp,Breed),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
print(Temp.10_Breed)

## # A tibble: 17 × 4
## # Groups:   temp [17]
##    temp                     Breed           n   per
##    <chr>                    <chr>       <int> <dbl>
##  1 ---                      <NA>          131  0.77
##  2 אביר (Abir)              Abir           35  0.21
##  3 אחר או לא ידוע (Unknown) <NA>          291  1.72
##  4 ביוטי (BUT)              Beauti       1952 11.5 
##  5 ברווזים Pekin            Pekin           4  0.02
##  6 דיקלב (DeKalb)           DeKalb        379  2.24
##  7 הברד (Hubbard)           Hubbard        16  0.09
##  8 היבריד (Hybrid)          Hybrid        370  2.18
##  9 היילין (Hyline W80)      Hyline_W80    623  3.67
## 10 היליין  (Hyline CV24)    Hyline_CV24     3  0.02
## 11 היליין  (Hyline W36)     Hyline_W36    518  3.05
## 12 יותר מאחד                Multiple      869  5.13
## 13 לומן (Lohman)            Lohman       4321 25.5 
## 14 ניקולס (Nicholas)        Nicholas       47  0.28
## 15 פקין (PAKIN)             Pekin          10  0.06
## 16 קוב (Cobb)               Cobb          743  4.38
## 17 רוס (Ross)               Ross         6644 39.2

Unite the duplicate serovars

create new column that will contain the data that appears in the column “serotype” for us to have a copy column that we will be able to work on freely
Unite all of the duplicates:

Salmonella$temp.serotype <- Salmonella$Serotype
Salmonella$Serotype <-sub("Enteritidis* \\(מעבדה מועצה\\)", "Enteritidis", Salmonella$Serotype)
# I've added the "\\" to escape the special behavior of certain signs such as dots(.), square brackets, etc. By escaping these, we are informing R that we want to search the specific character and ignore its special behavior
Salmonella$Serotype <-sub("Typhimurium* \\(מעבדה מועצה\\)", "Typhimurium", Salmonella$Serotype)
Salmonella$Serotype <-sub("Virginia*", "Muenchen", Salmonella$Serotype)
Salmonella$Serotype <-sub("\\/", "_", Salmonella$Serotype)
Salmonella$Serotype <-sub("Virginia\\/Muenchen*", "Muenchen", Salmonella$Serotype)
#The outcome is "Muenchen/Muenchen" so I'll change it to "Muenchen" too.
Salmonella$Serotype <-sub("Muenchen\\_Muenchen", "Muenchen", Salmonella$Serotype)
#To check ourselves we will create new "temp" data frame that will contain tow columns- the "temp" column (the one that shoes the original data) and the the target column that we made the changes in, and compare between theme;
Temp.11_Serotype <-summarise(group_by(Salmonella,Serotype,temp.serotype),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
sort(unique((Salmonella$Serotype)))

##   [1] "13,23:i:-"                               
##   [2] "16:b:-"                                  
##   [3] "16:lv:-"                                 
##   [4] "18:z4,z23:-"                             
##   [5] "28:l,v:-"                                
##   [6] "3,10:y:-"                                
##   [7] "30:y:-"                                  
##   [8] "4,12:-:1,7"                              
##   [9] "4,12:b:-"                                
##  [10] "4,12:e,h:1,2,5"                          
##  [11] "4,12:Rough"                              
##  [12] "4,12:y:- "                               
##  [13] "4,5,12:i:-"                              
##  [14] "4,5,12:rough:1,2"                        
##  [15] "42:b:e,n,x,z15"                          
##  [16] "47:b:e,n,x,z15"                          
##  [17] "6,7:f,g,t:-"                             
##  [18] "6,8:eh:-"                                
##  [19] "8,20:-:z6"                               
##  [20] "8,20:i:- (Kentucky)"                     
##  [21] "9,12:lv:-"                               
##  [22] "9,46:rough"                              
##  [23] "Abony"                                   
##  [24] "Adamstown"                               
##  [25] "Afula"                                   
##  [26] "Agona"                                   
##  [27] "Alachua"                                 
##  [28] "Altona"                                  
##  [29] "Anatum"                                  
##  [30] "Auto agglutination"                      
##  [31] "Bardo"                                   
##  [32] "Blockley"                                
##  [33] "Bonn"                                    
##  [34] "Bovismorbificans"                        
##  [35] "Braenderup"                              
##  [36] "Brancaster"                              
##  [37] "Brandenburg"                             
##  [38] "Bredeney"                                
##  [39] "Cerro"                                   
##  [40] "Charity"                                 
##  [41] "Chomedey"                                
##  [42] "Coeln"                                   
##  [43] "Concord"                                 
##  [44] "Corvalis"                                
##  [45] "Cotham"                                  
##  [46] "Cubana"                                  
##  [47] "Degania"                                 
##  [48] "Dublin"                                  
##  [49] "Eastbourne"                              
##  [50] "Edinburg"                                
##  [51] "Emek"                                    
##  [52] "Enteritidis"                             
##  [53] "Falkensee"                               
##  [54] "Freetown"                                
##  [55] "Fresno"                                  
##  [56] "Frintrop"                                
##  [57] "Give"                                    
##  [58] "Goldcoast"                               
##  [59] "Group B"                                 
##  [60] "Group C"                                 
##  [61] "Group D"                                 
##  [62] "Group E"                                 
##  [63] "Group G"                                 
##  [64] "Hadar"                                   
##  [65] "Haifa"                                   
##  [66] "Halle"                                   
##  [67] "Hato (o:4)"                              
##  [68] "Havana"                                  
##  [69] "Herzliya"                                
##  [70] "Hessarek"                                
##  [71] "Hindmarsh"                               
##  [72] "Hvittingfoss"                            
##  [73] "I 9,46:HME pos (unknown h Ag)"           
##  [74] "Idikan"                                  
##  [75] "II21:z:-"                                
##  [76] "IIIa 48:z4,z23,z32:- or IIIa 48:z4,z23:-"
##  [77] "IIIb 21:-:z"                             
##  [78] "IIIb 21:z10:z "                          
##  [79] "IIIb 35:z52:e,n,x,z15"                   
##  [80] "IIIb 38:I,v:z53"                         
##  [81] "IIIb 40:k:z"                             
##  [82] "IIIb 47:c:e,n,x,z15"                     
##  [83] "IIIb 48:i:z35"                           
##  [84] "IIIb 50:I,-:e,n,x,z15"                   
##  [85] "IIIb 50:I,v:e,n,x,z15"                   
##  [86] "IIIb 61:z52:z53"                         
##  [87] "IIIb38:l,v:z35"                          
##  [88] "IIIb53:z52:z53"                          
##  [89] "IIIb58:z52:z"                            
##  [90] "IIIb61:i:z53"                            
##  [91] "Ilala"                                   
##  [92] "Illb40:l,z13:z53"                        
##  [93] "Indiana"                                 
##  [94] "Infantis"                                
##  [95] "Irumu"                                   
##  [96] "Isangi"                                  
##  [97] "Istanbul"                                
##  [98] "IV 50:z:z35 (O:50)"                      
##  [99] "Java"                                    
## [100] "Kedougou"                                
## [101] "Kentucky"                                
## [102] "Khami  II47:b:e,n,x,zl5"                 
## [103] "Kotbus"                                  
## [104] "Larochelle"                              
## [105] "Lexington"                               
## [106] "Liverpool"                               
## [107] "Livingstone"                             
## [108] "Llandoff"                                
## [109] "Manhattan"                               
## [110] "Matopeni  "                              
## [111] "Mbandaka"                                
## [112] "Meleagridis"                             
## [113] "Mikawasima"                              
## [114] "Mishmarhaemek"                           
## [115] "Montevideo"                              
## [116] "Morehead"                                
## [117] "Muenchen"                                
## [118] "Nachshonim"                              
## [119] "Newport"                                 
## [120] "Nima"                                    
## [121] "Ohio"                                    
## [122] "Oranienburg"                             
## [123] "Orion"                                   
## [124] "Oslo"                                    
## [125] "Ouakam"                                  
## [126] "Pensacola"                               
## [127] "Polyvalent Minus"                        
## [128] "Polyvalent Plus"                         
## [129] "Reading"                                 
## [130] "Richmond"                                
## [131] "Rissen"                                  
## [132] "rough"                                   
## [133] "Rough:b:1,2"                             
## [134] "Rough:b:rough"                           
## [135] "Rough:d:-"                               
## [136] "Rough:d:1,2"                             
## [137] "Rough:eh:1,2"                            
## [138] "Rough:f,g,t:-"                           
## [139] "Rough:f,g:-"                             
## [140] "rough:i:1,5"                             
## [141] "rough:r:1,5"                             
## [142] "Rough:r:z6"                              
## [143] "Rough:v:1,7"                             
## [144] "Rough:y:1,5"                             
## [145] "Rough:z10:e,n,x"                         
## [146] "Rubislaw"                                
## [147] "Saint-Paul"                              
## [148] "Salford"                                 
## [149] "Schwarzengrund"                          
## [150] "Senftenberg"                             
## [151] "Sharon"                                  
## [152] "SII40:Z4Z24:Z39"                         
## [153] "Soerenga"                                
## [154] "Sofia"                                   
## [155] "Stanley"                                 
## [156] "Tennessee"                               
## [157] "Typhimurium"                             
## [158] "Uganda"                                  
## [159] "Virchow"                                 
## [160] "Vitkin"                                  
## [161] "Wangata"                                 
## [162] "Widemarsh"                               
## [163] "Yoruba"

Next I will create a new column that will contain the sero group w/o NA characters

# creating a new column ("sero.group") that matches the "ק..סרולוגית" column values
Salmonella$sero.group.temp <- Salmonella$Serologic_group_0

Uploading the serologic group VS serotype index thate I’ve created seperatly

Sero_index <-read.csv ("G:/.shortcut-targets-by-id/1OCnVJ6euOfHZdOlKqGKkb1ToC8FQqs_Z/Linoy Zeman/Data files/Poultry/CSVs/Index_serotyp.VS.sero-group.final.csv" , header = T, na.strings = c(""," ","na",NA," "))
#need to change the column name so i will be able to differentiate between the files
colnames(Sero_index)[colnames(Sero_index) == "Serotype"] <- "serotype_ind"
colnames(Sero_index)[colnames(Sero_index) == "Sero.Group"] <- "Sero.Group_ind"
sort(unique((Sero_index$serotype_ind)))

##  [1] "Afula"            "Agona"            "Altona"           "Anatum"          
##  [5] "Bardo"            "Blockley"         "Bonn"             "Bovismorbificans"
##  [9] "Braenderup"       "Brancaster"       "Brandenburg"      "Bredeney"        
## [13] "Cerro"            "Charity"          "Chomedey"         "Coeln"           
## [17] "Concord"          "Corvalis"         "Cubana"           "Dublin"          
## [21] "Eastbourne"       "Edinburg"         "Emek"             "Enteritidis"     
## [25] "Falkensee"        "Fresno"           "Frintrop"         "Give"            
## [29] "Goldcoast"        "Group B"          "Group C"          "Group D"         
## [33] "Group E"          "Group G"          "Group I"          "Hadar"           
## [37] "Haifa"            "Havana"           "Hindmarsh"        "Idikan"          
## [41] "Infantis"         "Irumu"            "Isangi"           "Istanbul"        
## [45] "Kedougou"         "Kentucky"         "Larochelle"       "Lexington"       
## [49] "Liverpool"        "Livingstone"      "Llandoff"         "Manhattan"       
## [53] "Mbandaka"         "Meleagridis"      "Mikawasima"       "Mishmarhaemek"   
## [57] "Montevideo"       "Muenchen"         "Nachshonim"       "Newport"         
## [61] "Ohio"             "Oranienburg"      "Orion"            "Oslo"            
## [65] "Ouakam"           "Reading"          "Richmond"         "Rissen"          
## [69] "Saint-Paul"       "Schwarzengrund"   "Senftenberg"      "Tennessee"       
## [73] "Typhimurium"      "Uganda"           "Virchow"          "Wangata"         
## [77] "Yoruba"

Salmonella_merged <- merge (Salmonella, Sero_index,
                   by.x = "Serotype",
                   by.y = "serotype_ind", 
                   all.x = TRUE)

Salmonella_merged <- relocate(Salmonella_merged, "Serologic_group_0", .before = "Serotype")
Salmonella_merged <- relocate(Salmonella_merged, "Sero.Group_ind", .before = "Serotype")
Salmonella_merged <- subset(Salmonella_merged, select = -Serologic_group_0)

Temp.12_merged <-summarise(group_by(Salmonella_merged,Sero.Group_ind,Serotype),n=n(),per=round(100*n/length(Salmonella_merged$Isolate),2))

## `summarise()` has grouped output by 'Sero.Group_ind'. You can override using
## the `.groups` argument.

Salmonella_merged <- subset(Salmonella_merged, select = -sent.to.m..of.health.for.diagnosis)
Salmonella_merged <- subset(Salmonella_merged, select = -icpi.ivpi)

Adding a “Year” column

Salmonella_merged$Year <-Salmonella_merged$Date.of.test.visit
Salmonella_merged <- relocate(Salmonella_merged, "Year", .before = "Date.of.test.visit")
Salmonella_merged$Year <- format(as.Date(Salmonella_merged$Year, format="%d/%m/%Y"),"%Y")

Sal_Human <-read.csv ("G:/.shortcut-targets-by-id/1OCnVJ6euOfHZdOlKqGKkb1ToC8FQqs_Z/Linoy Zeman/Data files/Human/human_serotype_pervalence_3Y.csv" , header = T, na.strings = c(""," ","na",NA," "))
colnames(Sal_Human)[colnames(Sal_Human) == "Serotype"] <- "serotype_H"

Creat a new data frame with the relevant information

Salmonella_Relevant <- Salmonella_merged %>%
  select(Year, Isolate, Sero.Group_ind, Serotype, Branch_line)

Salmonella_Relevant$Serotype <-sub("Group B", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group C", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group D", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group E", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group G", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group I", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Auto agglutination", NA, Salmonella_Relevant$Serotype)

Salmonella_Relevant <- Salmonella_Relevant %>% 
  filter(!is.na(Salmonella_Relevant$Serotype))

colnames(Salmonella_Relevant)[colnames(Salmonella_Relevant) == "Sero.Group_ind"] <- "Sero.Group"

#Year tables summarize tables:
Isolates.by.year_p <- Salmonella_Relevant  %>%
  group_by(Year) %>%
  summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
Isolates.by.year_p <- rbind(Isolates.by.year_p, data.frame(Year='Total', t(colSums(Isolates.by.year_p[, -1]))))
print(Isolates.by.year_p)

## # A tibble: 6 × 3
##   Year  No_of_Isolates Percent
##   <chr>          <dbl>   <dbl>
## 1 2019             447    11.3
## 2 2020             753    19  
## 3 2021             958    24.2
## 4 2022             941    23.7
## 5 2023             864    21.8
## 6 Total           3963   100

Isolates.by.year_H <- Sal_Human

#Serotype tables summarize tables:
Isolates.by.Serotype_P <- Salmonella_Relevant %>% 
  group_by(Serotype) %>%
  summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
Isolates.by.Serotype_P <- Isolates.by.Serotype_P[order(-Isolates.by.Serotype_P$Percent),]
print(Isolates.by.Serotype_P)

## # A tibble: 157 × 3
##    Serotype         No_of_Isolates Percent
##    <chr>                     <int>   <dbl>
##  1 Muenchen                    927    23.4
##  2 Enteritidis                 391     9.9
##  3 Typhimurium                 354     8.9
##  4 Bredeney                    291     7.3
##  5 Infantis                    136     3.4
##  6 Polyvalent Minus            119     3  
##  7 Brancaster                  113     2.9
##  8 Polyvalent Plus             100     2.5
##  9 Kentucky                     87     2.2
## 10 Montevideo                   83     2.1
## # ℹ 147 more rows

Isolates.by.Serotype_P.VS.H <- merge (Isolates.by.Serotype_P, Isolates.by.year_H,
                   by.x = "Serotype","Percent",
                   by.y = "serotype_H", "Av_prevelance.3Y.",
                   all.x = TRUE ,all.y = TRUE)
colnames(Isolates.by.Serotype_P.VS.H)[colnames(Isolates.by.Serotype_P.VS.H) == "Percent"] <- "Poultry"
colnames(Isolates.by.Serotype_P.VS.H)[colnames(Isolates.by.Serotype_P.VS.H) == "Av_prevelance.3Y."] <- "Human"
colnames(Isolates.by.Serotype_P.VS.H)[colnames(Isolates.by.Serotype_P.VS.H) == "No_of_Isolates"] <- "No.of.Isolates_Poultry"
Isolates.by.Serotype_P.VS.H <- Isolates.by.Serotype_P.VS.H[order(-Isolates.by.Serotype_P.VS.H$Human),]
Isolates.by.Serotype_P.VS.H <- rbind(Isolates.by.Serotype_P.VS.H, data.frame(Serotype='Total', t(colSums(Isolates.by.Serotype_P.VS.H[, -1]))))
Isolates.by.Serotype_P <- rbind(Isolates.by.Serotype_P, data.frame(Serotype='Total', t(colSums(Isolates.by.Serotype_P[, -1]))))
The.Main.Serotype.Prev_H.vs.P <- subset(Isolates.by.Serotype_P.VS.H, No.of.Isolates_Poultry>20)

#Sero.Group tables summarize tables:
Isolates.by.Sero.Group_P <- Salmonella_Relevant %>%
  group_by(Sero.Group) %>%
  summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
Isolates.by.Sero.Group_P <- Isolates.by.Sero.Group_P[order(-Isolates.by.Sero.Group_P$Percent),]
Isolates.by.Sero.Group_P <- rbind(Isolates.by.Sero.Group_P, data.frame(Sero.Group='Total', t(colSums(Isolates.by.Sero.Group_P[, -1]))))
print(Isolates.by.Sero.Group_P)

## # A tibble: 8 × 3
##   Sero.Group No_of_Isolates Percent
##   <chr>               <dbl>   <dbl>
## 1 Group C              1623    41  
## 2 Group B               912    23  
## 3 <NA>                  670    16.9
## 4 Group D               475    12  
## 5 Group E               147     3.7
## 6 Group G               112     2.8
## 7 Group I                24     0.6
## 8 Total                3963   100

#Branch.line tables summarize tables:
Isolates.by.Branch_line<- Salmonella_Relevant %>%
  group_by(Branch_line) %>%
  summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
Isolates.by.Branch_line <- Isolates.by.Branch_line[order(-Isolates.by.Branch_line$Percent),]
Isolates.by.Branch_line <- rbind(Isolates.by.Branch_line, data.frame(Branch_line='Total', t(colSums(Isolates.by.Branch_line[, -1]))))
print(Isolates.by.Branch_line)

## # A tibble: 5 × 3
##   Branch_line No_of_Isolates Percent
##   <chr>                <dbl>   <dbl>
## 1 Layers                2108    53.2
## 2 Breeding              1531    38.6
## 3 Broilers               321     8.1
## 4 Ornaments                3     0.1
## 5 Total                 3963   100

Serotype_By_Branch.line <- Salmonella_Relevant %>% 
  group_by(Branch_line, Serotype) %>%
  summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
Serotype_By_Branch.line <- Serotype_By_Branch.line[order(-Serotype_By_Branch.line$Percent),]
print(Serotype_By_Branch.line)

## # A tibble: 239 × 4
## # Groups:   Branch_line [4]
##    Branch_line Serotype    No_of_Isolates Percent
##    <chr>       <chr>                <int>   <dbl>
##  1 Breeding    Muenchen               582    14.7
##  2 Layers      Enteritidis            372     9.4
##  3 Layers      Muenchen               332     8.4
##  4 Layers      Typhimurium            236     6  
##  5 Breeding    Bredeney               188     4.7
##  6 Layers      Infantis               102     2.6
##  7 Broilers    Bredeney                88     2.2
##  8 Breeding    Typhimurium             85     2.1
##  9 Breeding    Hadar                   77     1.9
## 10 Layers      Montevideo              76     1.9
## # ℹ 229 more rows

Serotype_By_Branch.line_Sorted <- sort_by(Serotype_By_Branch.line, list(Serotype_By_Branch.line$Branch_line))
print(Serotype_By_Branch.line_Sorted)

## # A tibble: 239 × 4
## # Groups:   Branch_line [4]
##    Branch_line Serotype    No_of_Isolates Percent
##    <chr>       <chr>                <int>   <dbl>
##  1 Breeding    Muenchen               582    14.7
##  2 Breeding    Bredeney               188     4.7
##  3 Breeding    Typhimurium             85     2.1
##  4 Breeding    Hadar                   77     1.9
##  5 Breeding    Brancaster              59     1.5
##  6 Breeding    Kentucky                52     1.3
##  7 Breeding    Orion                   42     1.1
##  8 Breeding    Rough:d:1,2             40     1  
##  9 Breeding    Infantis                33     0.8
## 10 Breeding    Kedougou                26     0.7
## # ℹ 229 more rows

Sero.Group_By_Branch.line <- Salmonella_Relevant %>% 
  group_by(Branch_line, Sero.Group) %>%
  summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
Sero.Group_By_Branch.line <- Sero.Group_By_Branch.line[order(-Sero.Group_By_Branch.line$Percent),]
print(Sero.Group_By_Branch.line)

## # A tibble: 22 × 4
## # Groups:   Branch_line [4]
##    Branch_line Sero.Group No_of_Isolates Percent
##    <chr>       <chr>               <int>   <dbl>
##  1 Breeding    Group C               828    20.9
##  2 Layers      Group C               779    19.7
##  3 Layers      Group D               442    11.2
##  4 Layers      Group B               390     9.8
##  5 Layers      <NA>                  374     9.4
##  6 Breeding    Group B               358     9  
##  7 Breeding    <NA>                  175     4.4
##  8 Broilers    Group B               161     4.1
##  9 Broilers    <NA>                  121     3.1
## 10 Breeding    Group E                82     2.1
## # ℹ 12 more rows

Sero.Group_By_Branch.line_Sorted <- sort_by(Sero.Group_By_Branch.line, list(Sero.Group_By_Branch.line$Branch_line))
print(Sero.Group_By_Branch.line_Sorted)

## # A tibble: 22 × 4
## # Groups:   Branch_line [4]
##    Branch_line Sero.Group No_of_Isolates Percent
##    <chr>       <chr>               <int>   <dbl>
##  1 Breeding    Group C               828    20.9
##  2 Breeding    Group B               358     9  
##  3 Breeding    <NA>                  175     4.4
##  4 Breeding    Group E                82     2.1
##  5 Breeding    Group G                55     1.4
##  6 Breeding    Group D                25     0.6
##  7 Breeding    Group I                 8     0.2
##  8 Broilers    Group B               161     4.1
##  9 Broilers    <NA>                  121     3.1
## 10 Broilers    Group C                16     0.4
## # ℹ 12 more rows