Salmonella isolates 2019-2023
2024-05-01
{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)
rm (list=ls())
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Orginizing & cleaning the salmonella data file
Firs I will start with uploading the file as an CRV file
setwd("G:/.shortcut-targets-by-id/1OCnVJ6euOfHZdOlKqGKkb1ToC8FQqs_Z/Linoy Zeman/Data files/Poultry/CSVs")
# remember to delete the exact file name at the end of the path and leave only the directory path
<-read.csv ("G:/.shortcut-targets-by-id/1OCnVJ6euOfHZdOlKqGKkb1ToC8FQqs_Z/Linoy Zeman/Data files/Poultry/CSVs/Sal_019-023_updt.csv" , header = T, na.strings = c(""," ","na",NA," "))
Salmonella # remember to first save the file as an "CSV UTF-8 (comma delimited) (*.csv)"
Orginizing the columns
- I will change the order of the column to better reasonable order to look at the data
- I will change the name of the column to a basic writing pattern that will be common for all of the data set.
##Changing the column names
colnames(Salmonella)[colnames(Salmonella) == "region"] <- "Region"
colnames(Salmonella)[colnames(Salmonella) == "Farm.kinde"] <- "Farm_kinde"
colnames(Salmonella)[colnames(Salmonella) == "settlement.lab"] <- "Lab_region"
colnames(Salmonella)[colnames(Salmonella) == "performing..lab"] <- "Performing_lab"
colnames(Salmonella)[colnames(Salmonella) == "test.kinde"] <- "Test_kinde"
colnames(Salmonella)[colnames(Salmonella) == "chicken.kinde"] <- "Poultry"
colnames(Salmonella)[colnames(Salmonella) == "Poultry.branches"] <- "Poultry_branches"
colnames(Salmonella)[colnames(Salmonella) == "generation"] <- "Generation"
colnames(Salmonella)[colnames(Salmonella) == "breed"] <- "Breed"
##Changing the columns locations
<- relocate(Salmonella, "Region", .before = "Farm_kinde")
Salmonella <- relocate(Salmonella, "Farm_kinde", .before = "Lab_region")
Salmonella <- relocate(Salmonella, "Performing_lab", .after = "Lab_region")
Salmonella <- relocate(Salmonella, "Test_kinde", .after = "Performing_lab")
Salmonella <- relocate(Salmonella, "Poultry", .after = "Test_kinde")
Salmonella <- relocate(Salmonella, "Branch_line", .after = "Poultry")
Salmonella <- relocate(Salmonella, "Poultry_branches", .after = "Branch_line")
Salmonella <- relocate(Salmonella, "Generation", .after = "Poultry_branches")
Salmonella <- relocate(Salmonella, "Breed", .after = "Generation") Salmonella
Translate the importent columnt content to Englis instead of Hebrew.
##Region
$temp <- Salmonella$Region
Salmonella
$Region <-sub("צפון", "North", Salmonella$Region)
Salmonella$Region <-sub("השפלה וההר", "Shfela", Salmonella$Region)
Salmonella$Region <-sub("העמקים", "The valleys", Salmonella$Region)
Salmonella$Region <-sub("המרכז", "Central", Salmonella$Region)
Salmonella$Region <-sub("הדרום", "South", Salmonella$Region)
Salmonella
unique(Salmonella$Region)
## [1] "North" "Shfela" "The valleys" "South" "Central"
.1_Region <-summarise(group_by(Salmonella,temp,Region),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.1_Region)
## # A tibble: 5 × 4
## # Groups: temp [5]
## temp Region n per
## <chr> <chr> <int> <dbl>
## 1 הדרום South 1781 10.5
## 2 המרכז Central 2856 16.8
## 3 העמקים The valleys 5581 32.9
## 4 השפלה וההר Shfela 2605 15.4
## 5 צפון North 4133 24.4
##settlement lab
$temp <- Salmonella$Lab_region
Salmonella
$Lab_region <-sub("מעבדה דרומית", "Southern", Salmonella$Lab_region)
Salmonella$Lab_region <-sub("דרומית", "Southern", Salmonella$Lab_region)
Salmonella$Lab_region <-sub("מעבדה צפונית", "Northern", Salmonella$Lab_region)
Salmonella$Lab_region <-sub("צפונית", "Northern", Salmonella$Lab_region)
Salmonella$Lab_region <-sub("מפקח ארצי", "National_inspector", Salmonella$Lab_region)
Salmonella
unique(Salmonella$Lab_region)
## [1] "Northern" "Southern" "National_inspector"
## [4] NA
.2_Lab_region <-summarise(group_by(Salmonella,temp,Lab_region),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.2_Lab_region)
## # A tibble: 6 × 4
## # Groups: temp [6]
## temp Lab_region n per
## <chr> <chr> <int> <dbl>
## 1 דרומית Southern 5981 35.3
## 2 מעבדה דרומית Southern 87 0.51
## 3 מעבדה צפונית Northern 151 0.89
## 4 מפקח ארצי National_inspector 13 0.08
## 5 צפונית Northern 10723 63.2
## 6 <NA> <NA> 1 0.01
#performing lab
$temp <- Salmonella$Performing_lab
Salmonella
$Performing_lab <-sub("מעבדה דרומית", "Southern", Salmonella$Performing_lab)
Salmonella$Performing_lab <-sub("מעבדה צפונית", "Northern", Salmonella$Performing_lab)
Salmonella$Performing_lab <-sub("חטיבה לעופות, מכון הוטרינרי", "The_Veterinary_Institute ", Salmonella$Performing_lab)
Salmonella
unique(Salmonella$Performing_lab)
## [1] "Southern" "Northern"
## [3] "The_Veterinary_Institute "
.3_Performing_lab <-summarise(group_by(Salmonella,temp,Performing_lab),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.3_Performing_lab)
## # A tibble: 3 × 4
## # Groups: temp [3]
## temp Performing_lab n per
## <chr> <chr> <int> <dbl>
## 1 חטיבה לעופות, מכון הוטרינרי "The_Veterinary_Institute " 2 0.01
## 2 מעבדה דרומית "Southern" 16394 96.7
## 3 מעבדה צפונית "Northern" 560 3.3
##test kind
$temp <- Salmonella$Test_kinde
Salmonella
$Test_kinde <-sub("אברים פנימיים", "Internal_organs", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("אחר", "Other", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("אפרוחים", "Chicks", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("חיידק", "Bacterium", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("חיתולים", "Diapers", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("כבד", "Liver", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("לב", "Heart", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("לשלשת", "Secretions", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מח עצם", "B_marrow", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש אבק", "Dust_swab", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר לול ריק", "Dragged_swab_roost-empty", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר לול", "Dragged.swab", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר מגש בקיעה", "Dragged_swab_Hatching_tray", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר מדגריה", "Dragged.swab.hatchery", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מטוש נגרר", "Dragged_swab", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("מעבדה למחלות עופות", "Poultry_diseases_lab", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("פרק", "Joint", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("ריאה", "Lung", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("תערובת", "Feed", Salmonella$Test_kinde)
Salmonella$Test_kinde <-sub("אבק", "Dust", Salmonella$Test_kinde)
Salmonella
unique(Salmonella$Test_kinde)
## [1] "Dragged.swab" "Dust_swab"
## [3] "Dust" "Dragged_swab_roost-empty"
## [5] "Dragged_swab" "Chicks"
## [7] "Dragged.swab.hatchery" "Dragged_swab_Hatching_tray"
## [9] "Internal_organs" "B_marrow"
## [11] "Lung" "Diapers"
## [13] "Bacterium" "Secretions"
## [15] "Heart" "Feed"
## [17] "Other" "Liver"
## [19] "Joint" "Poultry_diseases_lab"
.4_Test_kinde <-summarise(group_by(Salmonella,temp,Test_kinde),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint((Temp.4_Test_kinde),n=123)
## # A tibble: 20 × 4
## # Groups: temp [20]
## temp Test_kinde n per
## <chr> <chr> <int> <dbl>
## 1 אבק Dust 663 3.91
## 2 אברים פנימיים Internal_organs 381 2.25
## 3 אחר Other 2 0.01
## 4 אפרוחים Chicks 327 1.93
## 5 חיידק Bacterium 400 2.36
## 6 חיתולים Diapers 31 0.18
## 7 כבד Liver 2 0.01
## 8 לב Heart 4 0.02
## 9 לשלשת Secretions 6 0.04
## 10 מח עצם B_marrow 49 0.29
## 11 מטוש אבק Dust_swab 672 3.96
## 12 מטוש נגרר Dragged_swab 1699 10.0
## 13 מטוש נגרר לול Dragged.swab 11147 65.7
## 14 מטוש נגרר לול ריק Dragged_swab_roost-empty 624 3.68
## 15 מטוש נגרר מגש בקיעה Dragged_swab_Hatching_tray 400 2.36
## 16 מטוש נגרר מדגריה Dragged.swab.hatchery 544 3.21
## 17 מעבדה למחלות עופות Poultry_diseases_lab 1 0.01
## 18 פרק Joint 1 0.01
## 19 ריאה Lung 2 0.01
## 20 תערובת Feed 1 0.01
##Chicken kind-> Poultry
$temp <- Salmonella$Poultry
Salmonella
$Poultry <-sub("תרנגולות", "Chickens", Salmonella$Poultry)
Salmonella$Poultry <-sub("ברווזים", "Ducks", Salmonella$Poultry)
Salmonella$Poultry <-sub("הודים", "Turkey", Salmonella$Poultry)
Salmonella$Poultry <-sub("יונים", "Pigeons", Salmonella$Poultry)
Salmonella$Poultry <-sub("שלווים", "Quail", Salmonella$Poultry)
Salmonella
unique(Salmonella$Poultry)
## [1] "Chickens" "Turkey" "Ducks" "Pigeons" "Quail"
.5_Poultry <-summarise(group_by(Salmonella,temp,Poultry),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.5_Poultry)
## # A tibble: 5 × 4
## # Groups: temp [5]
## temp Poultry n per
## <chr> <chr> <int> <dbl>
## 1 ברווזים Ducks 62 0.37
## 2 הודים Turkey 2576 15.2
## 3 יונים Pigeons 2 0.01
## 4 שלווים Quail 3 0.02
## 5 תרנגולות Chickens 14313 84.4
##Branch_line
$temp <- Salmonella$Branch_line
Salmonella
$Branch_line <-sub("הטלה", "Layers", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("נוי", "Ornaments", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("פטום", "Broilers", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("רבייה*", "Breeding", Salmonella$Branch_line)
Salmonella# Since the word "רבייה" already changed to "breeding" the next changes for the "רבייבה כבדה" and "רבייה קלה" will be made accordingly
$Branch_line <-sub("Breeding כבדה", "Breeding", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("Breeding קלה", "Breeding", Salmonella$Branch_line)
Salmonella$Branch_line <-sub("שלווים ביצי מאכל", "Quail", Salmonella$Branch_line)
Salmonella
unique(Salmonella$Branch_line)
## [1] "Layers" "Breeding" "Broilers" "Ornaments" "Quail"
.6_Branch_line <-summarise(group_by(Salmonella,temp,Branch_line),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.6_Branch_line)
## # A tibble: 7 × 4
## # Groups: temp [7]
## temp Branch_line n per
## <chr> <chr> <int> <dbl>
## 1 הטלה Layers 5856 34.5
## 2 נוי Ornaments 5 0.03
## 3 פטום Broilers 3056 18.0
## 4 רבייה Breeding 1933 11.4
## 5 רבייה כבדה Breeding 5857 34.5
## 6 רבייה קלה Breeding 248 1.46
## 7 שלווים ביצי מאכל Quail 1 0.01
##Poultry branches
$temp <- Salmonella$Poultry_branches
Salmonella
$Poultry_branches <-sub("ברווזים - פיטום*", "Broilers", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("הודים - פיטום", "Broilers", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("עופות - שונים", "Poultry_various", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("שלווים - ביצי מאכל", "Quail_eggs", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("תרנגולות - הטלה", "Layers", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("תרנגולות - פיטום", "Broilers", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("תרנגולות - רביה קלה", "Light_breeders", Salmonella$Poultry_branches)
Salmonella$Poultry_branches <-sub("תרנגולות - רבייה כבדה", "Heavy_breeders", Salmonella$Poultry_branches)
Salmonella
unique(Salmonella$Poultry_branches)
## [1] "Layers" "Heavy_breeders" "Light_breeders" "Broilers"
## [5] NA "Poultry_various" "Quail_eggs"
.7_Poultry_branches <-summarise(group_by(Salmonella,temp,Poultry_branches),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.7_Poultry_branches)
## # A tibble: 9 × 4
## # Groups: temp [9]
## temp Poultry_branches n per
## <chr> <chr> <int> <dbl>
## 1 ברווזים - פיטום Broilers 6 0.04
## 2 הודים - פיטום Broilers 700 4.13
## 3 עופות - שונים Poultry_various 42 0.25
## 4 שלווים - ביצי מאכל Quail_eggs 1 0.01
## 5 תרנגולות - הטלה Layers 5859 34.6
## 6 תרנגולות - פיטום Broilers 2351 13.9
## 7 תרנגולות - רביה קלה Light_breeders 245 1.44
## 8 תרנגולות - רבייה כבדה Heavy_breeders 5857 34.5
## 9 <NA> <NA> 1895 11.2
##Farm kind
$temp <- Salmonella$Farm_kinde
Salmonella
$Farm_kinde <-sub("אחר", "Other", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("הודונים", "Turkey-young", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("חופש", "Freedom", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("מדגריה", "Hatchery", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("פרגיות", "Pullets", Salmonella$Farm_kinde)
Salmonella$Farm_kinde <-sub("רבייה הטלה", "Breeders", Salmonella$Farm_kinde)
Salmonella
unique(Salmonella$Farm_kinde)
## [1] "Other" "Breeders" "Freedom" "Hatchery" "Pullets"
## [6] "Turkey-young"
.8_Farm_kinde <-summarise(group_by(Salmonella,temp,Farm_kinde),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.8_Farm_kinde)
## # A tibble: 6 × 4
## # Groups: temp [6]
## temp Farm_kinde n per
## <chr> <chr> <int> <dbl>
## 1 אחר Other 8011 47.2
## 2 הודונים Turkey-young 487 2.87
## 3 חופש Freedom 299 1.76
## 4 מדגריה Hatchery 597 3.52
## 5 פרגיות Pullets 1606 9.47
## 6 רבייה הטלה Breeders 5956 35.1
##generation
$temp <- Salmonella$Generation
Salmonella
$Generation <-sub("אמהות", "Mothers", Salmonella$Generation)
Salmonella$Generation <-sub("מסחרי", "Industrial", Salmonella$Generation)
Salmonella$Generation <-sub("סבתות", "Grandmothers", Salmonella$Generation)
Salmonella
unique(Salmonella$Generation)
## [1] "Industrial" "Mothers" "Grandmothers"
.9_Generation <-summarise(group_by(Salmonella,temp,Generation),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.9_Generation)
## # A tibble: 3 × 4
## # Groups: temp [3]
## temp Generation n per
## <chr> <chr> <int> <dbl>
## 1 אמהות Mothers 7244 42.7
## 2 מסחרי Industrial 9259 54.6
## 3 סבתות Grandmothers 453 2.67
##breed
$temp <- Salmonella$Breed
Salmonella
$Breed <-sub("\\-\\-\\-", NA, Salmonella$Breed)
Salmonella$Breed <-sub("אביר \\(Abir\\)", "Abir", Salmonella$Breed)
Salmonella$Breed <-sub("אחר או לא ידוע \\(Unknown\\)", NA, Salmonella$Breed)
Salmonella$Breed <-sub("ביוטי \\(BUT\\)", "Beauti", Salmonella$Breed)
Salmonella$Breed <-sub("ברווזים\\ Pekin", "Pekin", Salmonella$Breed)
Salmonella$Breed <-sub("דיקלב \\(DeKalb\\)", "DeKalb", Salmonella$Breed)
Salmonella$Breed <-sub("היבריד \\(Hybrid\\)", "Hybrid", Salmonella$Breed)
Salmonella$Breed <-sub("היילין \\(Hyline W80\\)", "Hyline_W80", Salmonella$Breed)
Salmonella$Breed <-sub("היליין\\ \\ \\(Hyline W36\\)", "Hyline_W36", Salmonella$Breed)
Salmonella$Breed <-sub("היליין\\ \\ \\(Hyline CV24\\)", "Hyline_CV24", Salmonella$Breed)
Salmonella$Breed <-sub("יותר \\מאחד", "Multiple", Salmonella$Breed)
Salmonella$Breed <-sub("לומן \\(Lohman\\)", "Lohman", Salmonella$Breed)
Salmonella$Breed <-sub("ניקולס \\(Nicholas\\)", "Nicholas", Salmonella$Breed)
Salmonella$Breed <-sub("קוב \\(Cobb\\)", "Cobb", Salmonella$Breed)
Salmonella$Breed <-sub("פקין \\(PAKIN\\)", "Pekin", Salmonella$Breed)
Salmonella$Breed <-sub("רוס \\(Ross\\)", "Ross", Salmonella$Breed)
Salmonella$Breed <-sub("הברד \\(Hubbard\\)", "Hubbard", Salmonella$Breed)
Salmonella
unique(Salmonella$Breed)
## [1] "Lohman" "Hyline_W36" "Ross" "Hyline_W80" "DeKalb"
## [6] "Multiple" NA "Beauti" "Hybrid" "Cobb"
## [11] "Nicholas" "Abir" "Pekin" "Hyline_CV24" "Hubbard"
.10_Breed <-summarise(group_by(Salmonella,temp,Breed),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempprint(Temp.10_Breed)
## # A tibble: 17 × 4
## # Groups: temp [17]
## temp Breed n per
## <chr> <chr> <int> <dbl>
## 1 --- <NA> 131 0.77
## 2 אביר (Abir) Abir 35 0.21
## 3 אחר או לא ידוע (Unknown) <NA> 291 1.72
## 4 ביוטי (BUT) Beauti 1952 11.5
## 5 ברווזים Pekin Pekin 4 0.02
## 6 דיקלב (DeKalb) DeKalb 379 2.24
## 7 הברד (Hubbard) Hubbard 16 0.09
## 8 היבריד (Hybrid) Hybrid 370 2.18
## 9 היילין (Hyline W80) Hyline_W80 623 3.67
## 10 היליין (Hyline CV24) Hyline_CV24 3 0.02
## 11 היליין (Hyline W36) Hyline_W36 518 3.05
## 12 יותר מאחד Multiple 869 5.13
## 13 לומן (Lohman) Lohman 4321 25.5
## 14 ניקולס (Nicholas) Nicholas 47 0.28
## 15 פקין (PAKIN) Pekin 10 0.06
## 16 קוב (Cobb) Cobb 743 4.38
## 17 רוס (Ross) Ross 6644 39.2
Unite the duplicate serovars
- create new column that will contain the data that appears in the column “serotype” for us to have a copy column that we will be able to work on freely
- Unite all of the duplicates:
$temp.serotype <- Salmonella$Serotype
Salmonella$Serotype <-sub("Enteritidis* \\(מעבדה מועצה\\)", "Enteritidis", Salmonella$Serotype)
Salmonella# I've added the "\\" to escape the special behavior of certain signs such as dots(.), square brackets, etc. By escaping these, we are informing R that we want to search the specific character and ignore its special behavior
$Serotype <-sub("Typhimurium* \\(מעבדה מועצה\\)", "Typhimurium", Salmonella$Serotype)
Salmonella$Serotype <-sub("Virginia*", "Muenchen", Salmonella$Serotype)
Salmonella$Serotype <-sub("\\/", "_", Salmonella$Serotype)
Salmonella$Serotype <-sub("Virginia\\/Muenchen*", "Muenchen", Salmonella$Serotype)
Salmonella#The outcome is "Muenchen/Muenchen" so I'll change it to "Muenchen" too.
$Serotype <-sub("Muenchen\\_Muenchen", "Muenchen", Salmonella$Serotype)
Salmonella#To check ourselves we will create new "temp" data frame that will contain tow columns- the "temp" column (the one that shoes the original data) and the the target column that we made the changes in, and compare between theme;
.11_Serotype <-summarise(group_by(Salmonella,Serotype,temp.serotype),n=n(),per=round(100*n/length(Salmonella$Isolate),2))
Tempsort(unique((Salmonella$Serotype)))
## [1] "13,23:i:-"
## [2] "16:b:-"
## [3] "16:lv:-"
## [4] "18:z4,z23:-"
## [5] "28:l,v:-"
## [6] "3,10:y:-"
## [7] "30:y:-"
## [8] "4,12:-:1,7"
## [9] "4,12:b:-"
## [10] "4,12:e,h:1,2,5"
## [11] "4,12:Rough"
## [12] "4,12:y:- "
## [13] "4,5,12:i:-"
## [14] "4,5,12:rough:1,2"
## [15] "42:b:e,n,x,z15"
## [16] "47:b:e,n,x,z15"
## [17] "6,7:f,g,t:-"
## [18] "6,8:eh:-"
## [19] "8,20:-:z6"
## [20] "8,20:i:- (Kentucky)"
## [21] "9,12:lv:-"
## [22] "9,46:rough"
## [23] "Abony"
## [24] "Adamstown"
## [25] "Afula"
## [26] "Agona"
## [27] "Alachua"
## [28] "Altona"
## [29] "Anatum"
## [30] "Auto agglutination"
## [31] "Bardo"
## [32] "Blockley"
## [33] "Bonn"
## [34] "Bovismorbificans"
## [35] "Braenderup"
## [36] "Brancaster"
## [37] "Brandenburg"
## [38] "Bredeney"
## [39] "Cerro"
## [40] "Charity"
## [41] "Chomedey"
## [42] "Coeln"
## [43] "Concord"
## [44] "Corvalis"
## [45] "Cotham"
## [46] "Cubana"
## [47] "Degania"
## [48] "Dublin"
## [49] "Eastbourne"
## [50] "Edinburg"
## [51] "Emek"
## [52] "Enteritidis"
## [53] "Falkensee"
## [54] "Freetown"
## [55] "Fresno"
## [56] "Frintrop"
## [57] "Give"
## [58] "Goldcoast"
## [59] "Group B"
## [60] "Group C"
## [61] "Group D"
## [62] "Group E"
## [63] "Group G"
## [64] "Hadar"
## [65] "Haifa"
## [66] "Halle"
## [67] "Hato (o:4)"
## [68] "Havana"
## [69] "Herzliya"
## [70] "Hessarek"
## [71] "Hindmarsh"
## [72] "Hvittingfoss"
## [73] "I 9,46:HME pos (unknown h Ag)"
## [74] "Idikan"
## [75] "II21:z:-"
## [76] "IIIa 48:z4,z23,z32:- or IIIa 48:z4,z23:-"
## [77] "IIIb 21:-:z"
## [78] "IIIb 21:z10:z "
## [79] "IIIb 35:z52:e,n,x,z15"
## [80] "IIIb 38:I,v:z53"
## [81] "IIIb 40:k:z"
## [82] "IIIb 47:c:e,n,x,z15"
## [83] "IIIb 48:i:z35"
## [84] "IIIb 50:I,-:e,n,x,z15"
## [85] "IIIb 50:I,v:e,n,x,z15"
## [86] "IIIb 61:z52:z53"
## [87] "IIIb38:l,v:z35"
## [88] "IIIb53:z52:z53"
## [89] "IIIb58:z52:z"
## [90] "IIIb61:i:z53"
## [91] "Ilala"
## [92] "Illb40:l,z13:z53"
## [93] "Indiana"
## [94] "Infantis"
## [95] "Irumu"
## [96] "Isangi"
## [97] "Istanbul"
## [98] "IV 50:z:z35 (O:50)"
## [99] "Java"
## [100] "Kedougou"
## [101] "Kentucky"
## [102] "Khami II47:b:e,n,x,zl5"
## [103] "Kotbus"
## [104] "Larochelle"
## [105] "Lexington"
## [106] "Liverpool"
## [107] "Livingstone"
## [108] "Llandoff"
## [109] "Manhattan"
## [110] "Matopeni "
## [111] "Mbandaka"
## [112] "Meleagridis"
## [113] "Mikawasima"
## [114] "Mishmarhaemek"
## [115] "Montevideo"
## [116] "Morehead"
## [117] "Muenchen"
## [118] "Nachshonim"
## [119] "Newport"
## [120] "Nima"
## [121] "Ohio"
## [122] "Oranienburg"
## [123] "Orion"
## [124] "Oslo"
## [125] "Ouakam"
## [126] "Pensacola"
## [127] "Polyvalent Minus"
## [128] "Polyvalent Plus"
## [129] "Reading"
## [130] "Richmond"
## [131] "Rissen"
## [132] "rough"
## [133] "Rough:b:1,2"
## [134] "Rough:b:rough"
## [135] "Rough:d:-"
## [136] "Rough:d:1,2"
## [137] "Rough:eh:1,2"
## [138] "Rough:f,g,t:-"
## [139] "Rough:f,g:-"
## [140] "rough:i:1,5"
## [141] "rough:r:1,5"
## [142] "Rough:r:z6"
## [143] "Rough:v:1,7"
## [144] "Rough:y:1,5"
## [145] "Rough:z10:e,n,x"
## [146] "Rubislaw"
## [147] "Saint-Paul"
## [148] "Salford"
## [149] "Schwarzengrund"
## [150] "Senftenberg"
## [151] "Sharon"
## [152] "SII40:Z4Z24:Z39"
## [153] "Soerenga"
## [154] "Sofia"
## [155] "Stanley"
## [156] "Tennessee"
## [157] "Typhimurium"
## [158] "Uganda"
## [159] "Virchow"
## [160] "Vitkin"
## [161] "Wangata"
## [162] "Widemarsh"
## [163] "Yoruba"
Next I will create a new column that will contain the sero group w/o NA characters
# creating a new column ("sero.group") that matches the "ק..סרולוגית" column values
$sero.group.temp <- Salmonella$Serologic_group_0 Salmonella
Uploading the serologic group VS serotype index thate I’ve created seperatly
<-read.csv ("G:/.shortcut-targets-by-id/1OCnVJ6euOfHZdOlKqGKkb1ToC8FQqs_Z/Linoy Zeman/Data files/Poultry/CSVs/Index_serotyp.VS.sero-group.final.csv" , header = T, na.strings = c(""," ","na",NA," "))
Sero_index #need to change the column name so i will be able to differentiate between the files
colnames(Sero_index)[colnames(Sero_index) == "Serotype"] <- "serotype_ind"
colnames(Sero_index)[colnames(Sero_index) == "Sero.Group"] <- "Sero.Group_ind"
sort(unique((Sero_index$serotype_ind)))
## [1] "Afula" "Agona" "Altona" "Anatum"
## [5] "Bardo" "Blockley" "Bonn" "Bovismorbificans"
## [9] "Braenderup" "Brancaster" "Brandenburg" "Bredeney"
## [13] "Cerro" "Charity" "Chomedey" "Coeln"
## [17] "Concord" "Corvalis" "Cubana" "Dublin"
## [21] "Eastbourne" "Edinburg" "Emek" "Enteritidis"
## [25] "Falkensee" "Fresno" "Frintrop" "Give"
## [29] "Goldcoast" "Group B" "Group C" "Group D"
## [33] "Group E" "Group G" "Group I" "Hadar"
## [37] "Haifa" "Havana" "Hindmarsh" "Idikan"
## [41] "Infantis" "Irumu" "Isangi" "Istanbul"
## [45] "Kedougou" "Kentucky" "Larochelle" "Lexington"
## [49] "Liverpool" "Livingstone" "Llandoff" "Manhattan"
## [53] "Mbandaka" "Meleagridis" "Mikawasima" "Mishmarhaemek"
## [57] "Montevideo" "Muenchen" "Nachshonim" "Newport"
## [61] "Ohio" "Oranienburg" "Orion" "Oslo"
## [65] "Ouakam" "Reading" "Richmond" "Rissen"
## [69] "Saint-Paul" "Schwarzengrund" "Senftenberg" "Tennessee"
## [73] "Typhimurium" "Uganda" "Virchow" "Wangata"
## [77] "Yoruba"
<- merge (Salmonella, Sero_index,
Salmonella_merged by.x = "Serotype",
by.y = "serotype_ind",
all.x = TRUE)
<- relocate(Salmonella_merged, "Serologic_group_0", .before = "Serotype")
Salmonella_merged <- relocate(Salmonella_merged, "Sero.Group_ind", .before = "Serotype")
Salmonella_merged <- subset(Salmonella_merged, select = -Serologic_group_0)
Salmonella_merged
.12_merged <-summarise(group_by(Salmonella_merged,Sero.Group_ind,Serotype),n=n(),per=round(100*n/length(Salmonella_merged$Isolate),2)) Temp
## `summarise()` has grouped output by 'Sero.Group_ind'. You can override using
## the `.groups` argument.
<- subset(Salmonella_merged, select = -sent.to.m..of.health.for.diagnosis)
Salmonella_merged <- subset(Salmonella_merged, select = -icpi.ivpi) Salmonella_merged
Adding a “Year” column
$Year <-Salmonella_merged$Date.of.test.visit
Salmonella_merged<- relocate(Salmonella_merged, "Year", .before = "Date.of.test.visit")
Salmonella_merged $Year <- format(as.Date(Salmonella_merged$Year, format="%d/%m/%Y"),"%Y") Salmonella_merged
<-read.csv ("G:/.shortcut-targets-by-id/1OCnVJ6euOfHZdOlKqGKkb1ToC8FQqs_Z/Linoy Zeman/Data files/Human/human_serotype_pervalence_3Y.csv" , header = T, na.strings = c(""," ","na",NA," "))
Sal_Human colnames(Sal_Human)[colnames(Sal_Human) == "Serotype"] <- "serotype_H"
Creat a new data frame with the relevant information
<- Salmonella_merged %>%
Salmonella_Relevant select(Year, Isolate, Sero.Group_ind, Serotype, Branch_line)
$Serotype <-sub("Group B", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group C", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group D", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group E", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group G", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Group I", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant$Serotype <-sub("Auto agglutination", NA, Salmonella_Relevant$Serotype)
Salmonella_Relevant
<- Salmonella_Relevant %>%
Salmonella_Relevant filter(!is.na(Salmonella_Relevant$Serotype))
colnames(Salmonella_Relevant)[colnames(Salmonella_Relevant) == "Sero.Group_ind"] <- "Sero.Group"
#Year tables summarize tables:
<- Salmonella_Relevant %>%
Isolates.by.year_p group_by(Year) %>%
summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
<- rbind(Isolates.by.year_p, data.frame(Year='Total', t(colSums(Isolates.by.year_p[, -1]))))
Isolates.by.year_p print(Isolates.by.year_p)
## # A tibble: 6 × 3
## Year No_of_Isolates Percent
## <chr> <dbl> <dbl>
## 1 2019 447 11.3
## 2 2020 753 19
## 3 2021 958 24.2
## 4 2022 941 23.7
## 5 2023 864 21.8
## 6 Total 3963 100
<- Sal_Human
Isolates.by.year_H
#Serotype tables summarize tables:
<- Salmonella_Relevant %>%
Isolates.by.Serotype_P group_by(Serotype) %>%
summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
<- Isolates.by.Serotype_P[order(-Isolates.by.Serotype_P$Percent),]
Isolates.by.Serotype_P print(Isolates.by.Serotype_P)
## # A tibble: 157 × 3
## Serotype No_of_Isolates Percent
## <chr> <int> <dbl>
## 1 Muenchen 927 23.4
## 2 Enteritidis 391 9.9
## 3 Typhimurium 354 8.9
## 4 Bredeney 291 7.3
## 5 Infantis 136 3.4
## 6 Polyvalent Minus 119 3
## 7 Brancaster 113 2.9
## 8 Polyvalent Plus 100 2.5
## 9 Kentucky 87 2.2
## 10 Montevideo 83 2.1
## # ℹ 147 more rows
<- merge (Isolates.by.Serotype_P, Isolates.by.year_H,
Isolates.by.Serotype_P.VS.H by.x = "Serotype","Percent",
by.y = "serotype_H", "Av_prevelance.3Y.",
all.x = TRUE ,all.y = TRUE)
colnames(Isolates.by.Serotype_P.VS.H)[colnames(Isolates.by.Serotype_P.VS.H) == "Percent"] <- "Poultry"
colnames(Isolates.by.Serotype_P.VS.H)[colnames(Isolates.by.Serotype_P.VS.H) == "Av_prevelance.3Y."] <- "Human"
colnames(Isolates.by.Serotype_P.VS.H)[colnames(Isolates.by.Serotype_P.VS.H) == "No_of_Isolates"] <- "No.of.Isolates_Poultry"
<- Isolates.by.Serotype_P.VS.H[order(-Isolates.by.Serotype_P.VS.H$Human),]
Isolates.by.Serotype_P.VS.H <- rbind(Isolates.by.Serotype_P.VS.H, data.frame(Serotype='Total', t(colSums(Isolates.by.Serotype_P.VS.H[, -1]))))
Isolates.by.Serotype_P.VS.H <- rbind(Isolates.by.Serotype_P, data.frame(Serotype='Total', t(colSums(Isolates.by.Serotype_P[, -1]))))
Isolates.by.Serotype_P <- subset(Isolates.by.Serotype_P.VS.H, No.of.Isolates_Poultry>20)
The.Main.Serotype.Prev_H.vs.P
#Sero.Group tables summarize tables:
<- Salmonella_Relevant %>%
Isolates.by.Sero.Group_P group_by(Sero.Group) %>%
summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
<- Isolates.by.Sero.Group_P[order(-Isolates.by.Sero.Group_P$Percent),]
Isolates.by.Sero.Group_P <- rbind(Isolates.by.Sero.Group_P, data.frame(Sero.Group='Total', t(colSums(Isolates.by.Sero.Group_P[, -1]))))
Isolates.by.Sero.Group_P print(Isolates.by.Sero.Group_P)
## # A tibble: 8 × 3
## Sero.Group No_of_Isolates Percent
## <chr> <dbl> <dbl>
## 1 Group C 1623 41
## 2 Group B 912 23
## 3 <NA> 670 16.9
## 4 Group D 475 12
## 5 Group E 147 3.7
## 6 Group G 112 2.8
## 7 Group I 24 0.6
## 8 Total 3963 100
#Branch.line tables summarize tables:
<- Salmonella_Relevant %>%
Isolates.by.Branch_linegroup_by(Branch_line) %>%
summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
<- Isolates.by.Branch_line[order(-Isolates.by.Branch_line$Percent),]
Isolates.by.Branch_line <- rbind(Isolates.by.Branch_line, data.frame(Branch_line='Total', t(colSums(Isolates.by.Branch_line[, -1]))))
Isolates.by.Branch_line print(Isolates.by.Branch_line)
## # A tibble: 5 × 3
## Branch_line No_of_Isolates Percent
## <chr> <dbl> <dbl>
## 1 Layers 2108 53.2
## 2 Breeding 1531 38.6
## 3 Broilers 321 8.1
## 4 Ornaments 3 0.1
## 5 Total 3963 100
<- Salmonella_Relevant %>%
Serotype_By_Branch.line group_by(Branch_line, Serotype) %>%
summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
<- Serotype_By_Branch.line[order(-Serotype_By_Branch.line$Percent),]
Serotype_By_Branch.line print(Serotype_By_Branch.line)
## # A tibble: 239 × 4
## # Groups: Branch_line [4]
## Branch_line Serotype No_of_Isolates Percent
## <chr> <chr> <int> <dbl>
## 1 Breeding Muenchen 582 14.7
## 2 Layers Enteritidis 372 9.4
## 3 Layers Muenchen 332 8.4
## 4 Layers Typhimurium 236 6
## 5 Breeding Bredeney 188 4.7
## 6 Layers Infantis 102 2.6
## 7 Broilers Bredeney 88 2.2
## 8 Breeding Typhimurium 85 2.1
## 9 Breeding Hadar 77 1.9
## 10 Layers Montevideo 76 1.9
## # ℹ 229 more rows
<- sort_by(Serotype_By_Branch.line, list(Serotype_By_Branch.line$Branch_line))
Serotype_By_Branch.line_Sorted print(Serotype_By_Branch.line_Sorted)
## # A tibble: 239 × 4
## # Groups: Branch_line [4]
## Branch_line Serotype No_of_Isolates Percent
## <chr> <chr> <int> <dbl>
## 1 Breeding Muenchen 582 14.7
## 2 Breeding Bredeney 188 4.7
## 3 Breeding Typhimurium 85 2.1
## 4 Breeding Hadar 77 1.9
## 5 Breeding Brancaster 59 1.5
## 6 Breeding Kentucky 52 1.3
## 7 Breeding Orion 42 1.1
## 8 Breeding Rough:d:1,2 40 1
## 9 Breeding Infantis 33 0.8
## 10 Breeding Kedougou 26 0.7
## # ℹ 229 more rows
<- Salmonella_Relevant %>%
Sero.Group_By_Branch.line group_by(Branch_line, Sero.Group) %>%
summarize(No_of_Isolates = n(), Percent = round(n() / nrow(Salmonella_Relevant) * 100 ,1))
<- Sero.Group_By_Branch.line[order(-Sero.Group_By_Branch.line$Percent),]
Sero.Group_By_Branch.line print(Sero.Group_By_Branch.line)
## # A tibble: 22 × 4
## # Groups: Branch_line [4]
## Branch_line Sero.Group No_of_Isolates Percent
## <chr> <chr> <int> <dbl>
## 1 Breeding Group C 828 20.9
## 2 Layers Group C 779 19.7
## 3 Layers Group D 442 11.2
## 4 Layers Group B 390 9.8
## 5 Layers <NA> 374 9.4
## 6 Breeding Group B 358 9
## 7 Breeding <NA> 175 4.4
## 8 Broilers Group B 161 4.1
## 9 Broilers <NA> 121 3.1
## 10 Breeding Group E 82 2.1
## # ℹ 12 more rows
<- sort_by(Sero.Group_By_Branch.line, list(Sero.Group_By_Branch.line$Branch_line))
Sero.Group_By_Branch.line_Sorted print(Sero.Group_By_Branch.line_Sorted)
## # A tibble: 22 × 4
## # Groups: Branch_line [4]
## Branch_line Sero.Group No_of_Isolates Percent
## <chr> <chr> <int> <dbl>
## 1 Breeding Group C 828 20.9
## 2 Breeding Group B 358 9
## 3 Breeding <NA> 175 4.4
## 4 Breeding Group E 82 2.1
## 5 Breeding Group G 55 1.4
## 6 Breeding Group D 25 0.6
## 7 Breeding Group I 8 0.2
## 8 Broilers Group B 161 4.1
## 9 Broilers <NA> 121 3.1
## 10 Broilers Group C 16 0.4
## # ℹ 12 more rows