The following file connects to ElasticSearch Server and create visuals using R package called ggplot When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library("elastic")
library(ggplot2)
options(stringsAsFactors = FALSE)
connect() #connecting to ElasticSearch Server
## transport: http
## host: 127.0.0.1
## port: 9200
## path: NULL
## username: NULL
## password: <secret>
## errors: simple
## headers (names): NULL
ping() #pinging the server
## $name
## [1] "lKOYePO"
##
## $cluster_name
## [1] "elasticsearch_priyankaangadi"
##
## $cluster_uuid
## [1] "CJh7Ht53TnGKjsF1ldLSbg"
##
## $version
## $version$number
## [1] "6.0.0"
##
## $version$build_hash
## [1] "8f0685b"
##
## $version$build_date
## [1] "2017-11-10T18:41:22.859Z"
##
## $version$build_snapshot
## [1] FALSE
##
## $version$lucene_version
## [1] "7.0.1"
##
## $version$minimum_wire_compatibility_version
## [1] "5.6.0"
##
## $version$minimum_index_compatibility_version
## [1] "5.0.0"
##
##
## $tagline
## [1] "You Know, for Search"
Let us explore the data and see the features
#Can retrieve all the data
out <- Search(index="diabetes")
head(out$hits$hits[[1]])
## $`_index`
## [1] "diabetes"
##
## $`_type`
## [1] "doc"
##
## $`_id`
## [1] "19"
##
## $`_score`
## [1] 1
##
## $`_source`
## $`_source`$encounter_id
## [1] 89682
##
## $`_source`$patient_nbr
## [1] 107389323
##
## $`_source`$race
## [1] "AfricanAmerican"
##
## $`_source`$gender
## [1] "Male"
##
## $`_source`$age
## [1] "[70-80)"
##
## $`_source`$weight
## [1] ""
##
## $`_source`$admission_type_id
## NULL
##
## $`_source`$discharge_disposition_id
## [1] "Expired at home. Medicaid only, hospice."
##
## $`_source`$admission_source_id
## [1] " Not Mapped"
##
## $`_source`$time_in_hospital
## [1] 5
##
## $`_source`$payer_code
## [1] ""
##
## $`_source`$medical_specialty
## [1] ""
##
## $`_source`$num_lab_procedures
## [1] 35
##
## $`_source`$num_procedures
## [1] 5
##
## $`_source`$num_medications
## [1] 23
##
## $`_source`$number_outpatient
## [1] 0
##
## $`_source`$number_emergency
## [1] 0
##
## $`_source`$number_inpatient
## [1] 0
##
## $`_source`$diag_1
## [1] "402"
##
## $`_source`$diag_2
## [1] "425"
##
## $`_source`$diag_3
## [1] "416"
##
## $`_source`$number_diagnoses
## [1] 9
##
## $`_source`$max_glu_serum
## [1] "None"
##
## $`_source`$A1Cresult
## [1] "None"
##
## $`_source`$metformin
## [1] "No"
##
## $`_source`$repaglinide
## [1] "No"
##
## $`_source`$nateglinide
## [1] "No"
##
## $`_source`$chlorpropamide
## [1] "No"
##
## $`_source`$glimepiride
## [1] "No"
##
## $`_source`$acetohexamide
## [1] "No"
##
## $`_source`$glipizide
## [1] "No"
##
## $`_source`$glyburide
## [1] "No"
##
## $`_source`$tolbutamide
## [1] "No"
##
## $`_source`$pioglitazone
## [1] "No"
##
## $`_source`$rosiglitazone
## [1] "No"
##
## $`_source`$acarbose
## [1] "No"
##
## $`_source`$miglitol
## [1] "No"
##
## $`_source`$troglitazone
## [1] "No"
##
## $`_source`$tolazamide
## [1] "No"
##
## $`_source`$examide
## [1] "No"
##
## $`_source`$citoglipton
## [1] "No"
##
## $`_source`$insulin
## [1] "Steady"
##
## $`_source`$`glyburide-metformin`
## [1] "No"
##
## $`_source`$`glipizide-metformin`
## [1] "No"
##
## $`_source`$`glimepiride-pioglitazone`
## [1] "No"
##
## $`_source`$`metformin-rosiglitazone`
## [1] "No"
##
## $`_source`$`metformin-pioglitazone`
## [1] "No"
##
## $`_source`$change
## [1] "No"
##
## $`_source`$diabetesMed
## [1] "Yes"
cat("Examining the structure of the result:")
## Examining the structure of the result:
print(names(out))
## [1] "took" "timed_out" "_shards" "hits"
cat("Examining the structure of the each element:")
## Examining the structure of the each element:
lapply(out,names)
## $took
## NULL
##
## $timed_out
## NULL
##
## $`_shards`
## [1] "total" "successful" "skipped" "failed"
##
## $hits
## [1] "total" "max_score" "hits"
#Get features of the data
AllHits <- out$hits$hits
gsub("_source","",names(AllHits[[1]]["_source"][[1]]))
## [1] "encounter_id" "patient_nbr"
## [3] "race" "gender"
## [5] "age" "weight"
## [7] "admission_type_id" "discharge_disposition_id"
## [9] "admission_id" "time_in_hospital"
## [11] "payer_code" "medical_specialty"
## [13] "num_lab_procedures" "num_procedures"
## [15] "num_medications" "number_outpatient"
## [17] "number_emergency" "number_inpatient"
## [19] "diag_1" "diag_2"
## [21] "diag_3" "number_diagnoses"
## [23] "max_glu_serum" "A1Cresult"
## [25] "metformin" "repaglinide"
## [27] "nateglinide" "chlorpropamide"
## [29] "glimepiride" "acetohexamide"
## [31] "glipizide" "glyburide"
## [33] "tolbutamide" "pioglitazone"
## [35] "rosiglitazone" "acarbose"
## [37] "miglitol" "troglitazone"
## [39] "tolazamide" "examide"
## [41] "citoglipton" "insulin"
## [43] "glyburide-metformin" "glipizide-metformin"
## [45] "glimepiride-pioglitazone" "metformin-rosiglitazone"
## [47] "metformin-pioglitazone" "change"
## [49] "diabetesMed"
#Check if the feature exists``
docs_get(index='diabetes', type='doc',id=10,fields = 'admission_type_id')
## http://127.0.0.1:9200/diabetes/doc/10?stored_fields=admission_type_id
## $`_index`
## [1] "diabetes"
##
## $`_type`
## [1] "doc"
##
## $`_id`
## [1] "10"
##
## $`_version`
## [1] 1
##
## $found
## [1] TRUE
#Get mappings
Res <- mapping_get(index = "diabetes", type = "doc")
print((Res$diabetes$mappings$doc$properties)[1:4])
## $A1Cresult
## $A1Cresult$type
## [1] "text"
##
## $A1Cresult$fields
## $A1Cresult$fields$keyword
## $A1Cresult$fields$keyword$type
## [1] "keyword"
##
## $A1Cresult$fields$keyword$ignore_above
## [1] 256
##
##
##
##
## $acarbose
## $acarbose$type
## [1] "text"
##
## $acarbose$fields
## $acarbose$fields$keyword
## $acarbose$fields$keyword$type
## [1] "keyword"
##
## $acarbose$fields$keyword$ignore_above
## [1] 256
##
##
##
##
## $acetohexamide
## $acetohexamide$type
## [1] "text"
##
## $acetohexamide$fields
## $acetohexamide$fields$keyword
## $acetohexamide$fields$keyword$type
## [1] "keyword"
##
## $acetohexamide$fields$keyword$ignore_above
## [1] 256
##
##
##
##
## $admission_source_id
## $admission_source_id$type
## [1] "text"
##
## $admission_source_id$fields
## $admission_source_id$fields$keyword
## $admission_source_id$fields$keyword$type
## [1] "keyword"
##
## $admission_source_id$fields$keyword$ignore_above
## [1] 256
#Can retrive specific data
Result <- Search(index="diabetes", type="doc")
Total <- Result$hits$total
cat(paste("Total number of documents in the row :",Total,sep=""))
## Total number of documents in the row :95000
cat(paste("Total Number of Patients in Nephrology inpatient dept : ",Search(index="diabetes", q="Nephrolo*")$hits$total,sep=""))
## Total Number of Patients in Nephrology inpatient dept : 1588
WordsToSearch <- c("Caucasian","AfricanAmerican","Other")
SearchWords <- function(word){
Search(index="diabetes", type="doc", q=word)$hits$total
}
Bar <- lapply(WordsToSearch,function(x) SearchWords(x))
Bar <- as.data.frame(cbind(Words=WordsToSearch,Val=as.numeric(unlist(Bar))))
Bar[,2] <- as.numeric(Bar[,2])
p <- ggplot(data=Bar, aes(x=Words, y=Val))+
geom_bar(stat="identity",fill="steelblue")
p + labs (y = "Count")
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.