R Markdown

The following file connects to ElasticSearch Server and create visuals using R package called ggplot When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library("elastic")
library(ggplot2)
options(stringsAsFactors = FALSE)
connect() #connecting to ElasticSearch Server
## transport:  http 
## host:       127.0.0.1 
## port:       9200 
## path:       NULL 
## username:   NULL 
## password:   <secret> 
## errors:     simple 
## headers (names):  NULL
ping() #pinging the server
## $name
## [1] "lKOYePO"
## 
## $cluster_name
## [1] "elasticsearch_priyankaangadi"
## 
## $cluster_uuid
## [1] "CJh7Ht53TnGKjsF1ldLSbg"
## 
## $version
## $version$number
## [1] "6.0.0"
## 
## $version$build_hash
## [1] "8f0685b"
## 
## $version$build_date
## [1] "2017-11-10T18:41:22.859Z"
## 
## $version$build_snapshot
## [1] FALSE
## 
## $version$lucene_version
## [1] "7.0.1"
## 
## $version$minimum_wire_compatibility_version
## [1] "5.6.0"
## 
## $version$minimum_index_compatibility_version
## [1] "5.0.0"
## 
## 
## $tagline
## [1] "You Know, for Search"

R Markdown

Let us explore the data and see the features

#Can retrieve all the data
out <- Search(index="diabetes")
head(out$hits$hits[[1]])
## $`_index`
## [1] "diabetes"
## 
## $`_type`
## [1] "doc"
## 
## $`_id`
## [1] "19"
## 
## $`_score`
## [1] 1
## 
## $`_source`
## $`_source`$encounter_id
## [1] 89682
## 
## $`_source`$patient_nbr
## [1] 107389323
## 
## $`_source`$race
## [1] "AfricanAmerican"
## 
## $`_source`$gender
## [1] "Male"
## 
## $`_source`$age
## [1] "[70-80)"
## 
## $`_source`$weight
## [1] ""
## 
## $`_source`$admission_type_id
## NULL
## 
## $`_source`$discharge_disposition_id
## [1] "Expired at home. Medicaid only, hospice."
## 
## $`_source`$admission_source_id
## [1] " Not Mapped"
## 
## $`_source`$time_in_hospital
## [1] 5
## 
## $`_source`$payer_code
## [1] ""
## 
## $`_source`$medical_specialty
## [1] ""
## 
## $`_source`$num_lab_procedures
## [1] 35
## 
## $`_source`$num_procedures
## [1] 5
## 
## $`_source`$num_medications
## [1] 23
## 
## $`_source`$number_outpatient
## [1] 0
## 
## $`_source`$number_emergency
## [1] 0
## 
## $`_source`$number_inpatient
## [1] 0
## 
## $`_source`$diag_1
## [1] "402"
## 
## $`_source`$diag_2
## [1] "425"
## 
## $`_source`$diag_3
## [1] "416"
## 
## $`_source`$number_diagnoses
## [1] 9
## 
## $`_source`$max_glu_serum
## [1] "None"
## 
## $`_source`$A1Cresult
## [1] "None"
## 
## $`_source`$metformin
## [1] "No"
## 
## $`_source`$repaglinide
## [1] "No"
## 
## $`_source`$nateglinide
## [1] "No"
## 
## $`_source`$chlorpropamide
## [1] "No"
## 
## $`_source`$glimepiride
## [1] "No"
## 
## $`_source`$acetohexamide
## [1] "No"
## 
## $`_source`$glipizide
## [1] "No"
## 
## $`_source`$glyburide
## [1] "No"
## 
## $`_source`$tolbutamide
## [1] "No"
## 
## $`_source`$pioglitazone
## [1] "No"
## 
## $`_source`$rosiglitazone
## [1] "No"
## 
## $`_source`$acarbose
## [1] "No"
## 
## $`_source`$miglitol
## [1] "No"
## 
## $`_source`$troglitazone
## [1] "No"
## 
## $`_source`$tolazamide
## [1] "No"
## 
## $`_source`$examide
## [1] "No"
## 
## $`_source`$citoglipton
## [1] "No"
## 
## $`_source`$insulin
## [1] "Steady"
## 
## $`_source`$`glyburide-metformin`
## [1] "No"
## 
## $`_source`$`glipizide-metformin`
## [1] "No"
## 
## $`_source`$`glimepiride-pioglitazone`
## [1] "No"
## 
## $`_source`$`metformin-rosiglitazone`
## [1] "No"
## 
## $`_source`$`metformin-pioglitazone`
## [1] "No"
## 
## $`_source`$change
## [1] "No"
## 
## $`_source`$diabetesMed
## [1] "Yes"
cat("Examining the structure of the result:")
## Examining the structure of the result:
print(names(out))
## [1] "took"      "timed_out" "_shards"   "hits"
cat("Examining the structure of the each element:")
## Examining the structure of the each element:
lapply(out,names)
## $took
## NULL
## 
## $timed_out
## NULL
## 
## $`_shards`
## [1] "total"      "successful" "skipped"    "failed"    
## 
## $hits
## [1] "total"     "max_score" "hits"
#Get features of the data
AllHits <- out$hits$hits
gsub("_source","",names(AllHits[[1]]["_source"][[1]]))
##  [1] "encounter_id"             "patient_nbr"             
##  [3] "race"                     "gender"                  
##  [5] "age"                      "weight"                  
##  [7] "admission_type_id"        "discharge_disposition_id"
##  [9] "admission_id"             "time_in_hospital"        
## [11] "payer_code"               "medical_specialty"       
## [13] "num_lab_procedures"       "num_procedures"          
## [15] "num_medications"          "number_outpatient"       
## [17] "number_emergency"         "number_inpatient"        
## [19] "diag_1"                   "diag_2"                  
## [21] "diag_3"                   "number_diagnoses"        
## [23] "max_glu_serum"            "A1Cresult"               
## [25] "metformin"                "repaglinide"             
## [27] "nateglinide"              "chlorpropamide"          
## [29] "glimepiride"              "acetohexamide"           
## [31] "glipizide"                "glyburide"               
## [33] "tolbutamide"              "pioglitazone"            
## [35] "rosiglitazone"            "acarbose"                
## [37] "miglitol"                 "troglitazone"            
## [39] "tolazamide"               "examide"                 
## [41] "citoglipton"              "insulin"                 
## [43] "glyburide-metformin"      "glipizide-metformin"     
## [45] "glimepiride-pioglitazone" "metformin-rosiglitazone" 
## [47] "metformin-pioglitazone"   "change"                  
## [49] "diabetesMed"
#Check if the feature exists``
docs_get(index='diabetes', type='doc',id=10,fields = 'admission_type_id')
## http://127.0.0.1:9200/diabetes/doc/10?stored_fields=admission_type_id
## $`_index`
## [1] "diabetes"
## 
## $`_type`
## [1] "doc"
## 
## $`_id`
## [1] "10"
## 
## $`_version`
## [1] 1
## 
## $found
## [1] TRUE
#Get mappings
Res <- mapping_get(index = "diabetes", type = "doc")
print((Res$diabetes$mappings$doc$properties)[1:4])
## $A1Cresult
## $A1Cresult$type
## [1] "text"
## 
## $A1Cresult$fields
## $A1Cresult$fields$keyword
## $A1Cresult$fields$keyword$type
## [1] "keyword"
## 
## $A1Cresult$fields$keyword$ignore_above
## [1] 256
## 
## 
## 
## 
## $acarbose
## $acarbose$type
## [1] "text"
## 
## $acarbose$fields
## $acarbose$fields$keyword
## $acarbose$fields$keyword$type
## [1] "keyword"
## 
## $acarbose$fields$keyword$ignore_above
## [1] 256
## 
## 
## 
## 
## $acetohexamide
## $acetohexamide$type
## [1] "text"
## 
## $acetohexamide$fields
## $acetohexamide$fields$keyword
## $acetohexamide$fields$keyword$type
## [1] "keyword"
## 
## $acetohexamide$fields$keyword$ignore_above
## [1] 256
## 
## 
## 
## 
## $admission_source_id
## $admission_source_id$type
## [1] "text"
## 
## $admission_source_id$fields
## $admission_source_id$fields$keyword
## $admission_source_id$fields$keyword$type
## [1] "keyword"
## 
## $admission_source_id$fields$keyword$ignore_above
## [1] 256
#Can retrive specific data
Result <- Search(index="diabetes", type="doc")
Total <- Result$hits$total
cat(paste("Total number of documents in the row :",Total,sep=""))
## Total number of documents in the row :95000
cat(paste("Total Number of Patients in Nephrology inpatient dept : ",Search(index="diabetes", q="Nephrolo*")$hits$total,sep=""))
## Total Number of Patients in Nephrology inpatient dept : 1588

Creating example visuals

WordsToSearch <- c("Caucasian","AfricanAmerican","Other")

SearchWords <- function(word){
  Search(index="diabetes", type="doc", q=word)$hits$total
}

Bar <- lapply(WordsToSearch,function(x) SearchWords(x))
Bar <- as.data.frame(cbind(Words=WordsToSearch,Val=as.numeric(unlist(Bar))))
Bar[,2] <- as.numeric(Bar[,2])
p <- ggplot(data=Bar, aes(x=Words, y=Val))+
  geom_bar(stat="identity",fill="steelblue")
p + labs (y = "Count")

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.