The Division of Trauma & Acute Care Surgery at Loma Linda University Medical Center is a hub of top of the line medical care as well as clinical research.

This is an example of an approximation to the database of clinical trials maintained by the National Library of Medicine (NLM) of the National Institutes of Health (NIH). This tool facilitates the exploration and visualization of clinical trials recognized by the government and reported at ClinicalTrials.gov.

The rest of the document will show a step by step approximation to the process of datamining and visualization of the information.

Searching and datamining the site.

Initial subsetting and downloading from website.

The following table shows the first 6 studies of a total of 137 found after subsetting accross the whole ClinicalTrials.gov dataset for maches with the terms “acute AND care AND surgery”.

Once the selection is located and downloaded, the next step is to extract and keep only the locations in the United States.

library(rclinicaltrials)
# With the search parameters, download the data.  The search is meatn to download less than 100, if there are more than 100 results, Clincal Trials.gov will only give the first 100
a <- clinicaltrials_download(query = c('term=acute AND care AND surgery','recr=Open', 'type=Intr', 'cntry1=NA%3AUS'), count = 200, include_results = TRUE)
Count is too large (>100), only returning top 100 results. Use query and count = NULL to return all results
# We want to extract all the locations in the United States to 
b <- a[1]$study_information$locations
c <- b[which(b$address.country=='United States'), ]
# First five results of the search (about 1500+ locations):
head(c)

Extracting address and getting rid of the rest.

d <- c[c(2, 3, 5)]
d$address <- paste(d$address.city, d$address.state, d$address.country,sep=",")
# Sumary of results with a frequency (cities with more than one clinical trial)
library(plyr)
e <- count(d, 'address')
# then sort assending just to list these results
e <- e[order(-e$freq),]
# Highest occurence 20 results from the search:
head(e, 10)

Visualization

Geolocation of cities

Using the Google Maps API to obtain latitud and longitude coordinates from city names and states.

library(ggmap)


geocoded <- data.frame(e)

# Loop through the addresses to get rid of "United States" since all the addresses are in the same country.
for(i in 1:nrow(geocoded))
{
  # Print("Working...")
  vectorText <- geocoded$address[i]
  vectorText <- gsub(",United States","", vectorText)
  geocoded$address[i] <-vectorText
  
}

# Loop through the addressess to add columns of latitude, longitude and addressess according to GOOGLE MAPS API to compare.

for(i in 1:nrow(geocoded))
{
  # Print("Working...")
  result <- geocode(geocoded$address[i], output = "latlona", source = "google")
  geocoded$lon[i] <- as.numeric(result[1])
  geocoded$lat[i] <- as.numeric(result[2])
  geocoded$geoAddress[i] <- as.character(result[3])
}

Visualizing results in a map

Getting the map background with the right zoom.

# load the required libraries
library(ggplot2)
library(ggmap)
# download the map background images
map<-get_map(location='united states', zoom=4, maptype = "terrain",
             source='google',color='color', force=TRUE)
ggmap(map) +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.margin = unit(c(0, 0, -1, -1), 'lines')) +
  xlab('') +
  ylab('')

Mapping frequency of trials by city as density.

# Getting rid of "United States" in e to match address in "geocoded"
for(i in 1:nrow(e))
{
  # Print("Working...")
  vectorText <- e$address[i]
  vectorText <- gsub(",United States","", vectorText)
  e$address[i] <-vectorText
  
}
g <- merge(e, geocoded,by="address")
ggmap(map) + geom_point(
  aes(x=lon, y=lat, show_guide = TRUE, colour=freq.x), 
  data=g, alpha=.5, na.rm = T, size = g$freq.x*0.8)  + 
  scale_color_gradient(low="green", high="red") +
   theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.margin = unit(c(0, 0, -1, -1), 'lines')) +
  xlab('') +
  ylab('')

Maping the Heatmap for the same trials.

for(i in 1:nrow(d))
{
  # Print("Working...")
  vectorText <- d$address[i]
  vectorText <- gsub(",United States","", vectorText)
  d$address[i] <-vectorText
  
}
h <- merge(d, geocoded,by="address")
ggmap(map) + geom_density2d(data = h,  aes(x = lon, y = lat), size = 0.3)+
  stat_density2d(data=h, aes(fill = ..level.., alpha = ..level..), geom="polygon", bins=15) +
  scale_fill_gradient(low = "green", high = "red")+
  scale_alpha(range = c(0.1, 0.3), guide = FALSE) +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.margin = unit(c(0, 0, -1, -1), 'lines')) +
  xlab('') +
  ylab('')

Isolating trials in California

cali <- subset(c,c$address.state=="California")
cali[,1:10]
LS0tCnRpdGxlOiAiVmlzdWFsaXphdGlvbiBvZiBDcml0aWNhbCBDYXJlIFJlbGF0ZWQgQ2xpbmljYWwgVHJpYWxzIGluIHRoZSBVU0EiCm91dHB1dDogaHRtbF9ub3RlYm9vawphdXRob3I6ICJHZW5lciBKIEF2aWxlcy1Sb2RyaWd1ZXoiCi0tLQoKVGhlIERpdmlzaW9uIG9mIFRyYXVtYSAmIEFjdXRlIENhcmUgU3VyZ2VyeSBhdCBMb21hIExpbmRhIFVuaXZlcnNpdHkgTWVkaWNhbCBDZW50ZXIgaXMgYSBodWIgb2YgdG9wIG9mIHRoZSBsaW5lIG1lZGljYWwgY2FyZSBhcyB3ZWxsIGFzIGNsaW5pY2FsIHJlc2VhcmNoLgoKVGhpcyBpcyBhbiBleGFtcGxlIG9mIGFuIGFwcHJveGltYXRpb24gdG8gdGhlIGRhdGFiYXNlIG9mIGNsaW5pY2FsIHRyaWFscyBtYWludGFpbmVkIGJ5IHRoZSBOYXRpb25hbCBMaWJyYXJ5IG9mIE1lZGljaW5lIChOTE0pIG9mIHRoZSBOYXRpb25hbCBJbnN0aXR1dGVzIG9mIEhlYWx0aCAoTklIKS4gVGhpcyB0b29sIGZhY2lsaXRhdGVzIHRoZSBleHBsb3JhdGlvbiBhbmQgdmlzdWFsaXphdGlvbiBvZiBjbGluaWNhbCB0cmlhbHMgcmVjb2duaXplZCBieSB0aGUgZ292ZXJubWVudCBhbmQgcmVwb3J0ZWQgYXQgW0NsaW5pY2FsVHJpYWxzLmdvdl0oaHR0cHM6Ly9jbGluaWNhbHRyaWFscy5nb3YvKS4KClRoZSByZXN0IG9mIHRoZSBkb2N1bWVudCB3aWxsIHNob3cgYSBzdGVwIGJ5IHN0ZXAgYXBwcm94aW1hdGlvbiB0byB0aGUgcHJvY2VzcyBvZiBkYXRhbWluaW5nIGFuZCB2aXN1YWxpemF0aW9uIG9mIHRoZSBpbmZvcm1hdGlvbi4KCiMjIFNlYXJjaGluZyBhbmQgZGF0YW1pbmluZyB0aGUgc2l0ZS4KCiMjIyBJbml0aWFsIHN1YnNldHRpbmcgYW5kIGRvd25sb2FkaW5nIGZyb20gd2Vic2l0ZS4KClRoZSBmb2xsb3dpbmcgdGFibGUgc2hvd3MgdGhlIGZpcnN0IDYgc3R1ZGllcyBvZiBhIHRvdGFsIG9mIGByIGRpbShjKVsxXWAgZm91bmQgYWZ0ZXIgc3Vic2V0dGluZyBhY2Nyb3NzIHRoZSB3aG9sZSAqQ2xpbmljYWxUcmlhbHMuZ292KiBkYXRhc2V0IGZvciBtYWNoZXMgd2l0aCB0aGUgdGVybXMgIiphY3V0ZSogQU5EICpjYXJlKiBBTkQgKnN1cmdlcnkqIi4KCk9uY2UgdGhlIHNlbGVjdGlvbiBpcyBsb2NhdGVkIGFuZCBkb3dubG9hZGVkLCB0aGUgbmV4dCBzdGVwIGlzIHRvIGV4dHJhY3QgYW5kIGtlZXAgb25seSB0aGUgbG9jYXRpb25zIGluIHRoZSBVbml0ZWQgU3RhdGVzLgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPVRSVUUsIHBhZ2VkLnByaW50PVRSVUV9CmxpYnJhcnkocmNsaW5pY2FsdHJpYWxzKQojIFdpdGggdGhlIHNlYXJjaCBwYXJhbWV0ZXJzLCBkb3dubG9hZCB0aGUgZGF0YS4gIFRoZSBzZWFyY2ggaXMgbWVhdG4gdG8gZG93bmxvYWQgbGVzcyB0aGFuIDEwMCwgaWYgdGhlcmUgYXJlIG1vcmUgdGhhbiAxMDAgcmVzdWx0cywgQ2xpbmNhbCBUcmlhbHMuZ292IHdpbGwgb25seSBnaXZlIHRoZSBmaXJzdCAxMDAKYSA8LSBjbGluaWNhbHRyaWFsc19kb3dubG9hZChxdWVyeSA9IGMoJ3Rlcm09YWN1dGUgQU5EIGNhcmUgQU5EIHN1cmdlcnknLCdyZWNyPU9wZW4nLCAndHlwZT1JbnRyJywgJ2NudHJ5MT1OQSUzQVVTJyksIGNvdW50ID0gMjAwLCBpbmNsdWRlX3Jlc3VsdHMgPSBUUlVFKQpgYGAKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1UUlVFLCBwYWdlZC5wcmludD1UUlVFfQojIFdlIHdhbnQgdG8gZXh0cmFjdCBhbGwgdGhlIGxvY2F0aW9ucyBpbiB0aGUgVW5pdGVkIFN0YXRlcyB0byAKYiA8LSBhWzFdJHN0dWR5X2luZm9ybWF0aW9uJGxvY2F0aW9ucwpjIDwtIGJbd2hpY2goYiRhZGRyZXNzLmNvdW50cnk9PSdVbml0ZWQgU3RhdGVzJyksIF0KCiMgRmlyc3QgZml2ZSByZXN1bHRzIG9mIHRoZSBzZWFyY2ggKGFib3V0IDE1MDArIGxvY2F0aW9ucyk6CmhlYWQoYykKYGBgCgojIyMgRXh0cmFjdGluZyBhZGRyZXNzIGFuZCBnZXR0aW5nIHJpZCBvZiB0aGUgcmVzdC4KYGBge3J9CmQgPC0gY1tjKDIsIDMsIDUpXQpkJGFkZHJlc3MgPC0gcGFzdGUoZCRhZGRyZXNzLmNpdHksIGQkYWRkcmVzcy5zdGF0ZSwgZCRhZGRyZXNzLmNvdW50cnksc2VwPSIsIikKCiMgU3VtYXJ5IG9mIHJlc3VsdHMgd2l0aCBhIGZyZXF1ZW5jeSAoY2l0aWVzIHdpdGggbW9yZSB0aGFuIG9uZSBjbGluaWNhbCB0cmlhbCkKbGlicmFyeShwbHlyKQplIDwtIGNvdW50KGQsICdhZGRyZXNzJykKIyB0aGVuIHNvcnQgYXNzZW5kaW5nIGp1c3QgdG8gbGlzdCB0aGVzZSByZXN1bHRzCmUgPC0gZVtvcmRlcigtZSRmcmVxKSxdCgojIEhpZ2hlc3Qgb2NjdXJlbmNlIDIwIHJlc3VsdHMgZnJvbSB0aGUgc2VhcmNoOgpoZWFkKGUsIDEwKQpgYGAKCiMjIFZpc3VhbGl6YXRpb24KCiMjIyBHZW9sb2NhdGlvbiBvZiBjaXRpZXMKCiMjIyMgVXNpbmcgdGhlIFtHb29nbGUgTWFwcyBBUEldKGh0dHBzOi8vY2xvdWQuZ29vZ2xlLmNvbS9tYXBzLXBsYXRmb3JtLykgdG8gb2J0YWluIGxhdGl0dWQgYW5kIGxvbmdpdHVkZSBjb29yZGluYXRlcyBmcm9tIGNpdHkgbmFtZXMgYW5kIHN0YXRlcy4KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgcGFnZWQucHJpbnQ9RkFMU0V9CmxpYnJhcnkoZ2dtYXApCgoKZ2VvY29kZWQgPC0gZGF0YS5mcmFtZShlKQoKIyBMb29wIHRocm91Z2ggdGhlIGFkZHJlc3NlcyB0byBnZXQgcmlkIG9mICJVbml0ZWQgU3RhdGVzIiBzaW5jZSBhbGwgdGhlIGFkZHJlc3NlcyBhcmUgaW4gdGhlIHNhbWUgY291bnRyeS4KZm9yKGkgaW4gMTpucm93KGdlb2NvZGVkKSkKewogICMgUHJpbnQoIldvcmtpbmcuLi4iKQogIHZlY3RvclRleHQgPC0gZ2VvY29kZWQkYWRkcmVzc1tpXQogIHZlY3RvclRleHQgPC0gZ3N1YigiLFVuaXRlZCBTdGF0ZXMiLCIiLCB2ZWN0b3JUZXh0KQogIGdlb2NvZGVkJGFkZHJlc3NbaV0gPC12ZWN0b3JUZXh0CiAgCn0KCiMgTG9vcCB0aHJvdWdoIHRoZSBhZGRyZXNzZXNzIHRvIGFkZCBjb2x1bW5zIG9mIGxhdGl0dWRlLCBsb25naXR1ZGUgYW5kIGFkZHJlc3Nlc3MgYWNjb3JkaW5nIHRvIEdPT0dMRSBNQVBTIEFQSSB0byBjb21wYXJlLgoKZm9yKGkgaW4gMTpucm93KGdlb2NvZGVkKSkKewogICMgUHJpbnQoIldvcmtpbmcuLi4iKQogIHJlc3VsdCA8LSBnZW9jb2RlKGdlb2NvZGVkJGFkZHJlc3NbaV0sIG91dHB1dCA9ICJsYXRsb25hIiwgc291cmNlID0gImdvb2dsZSIpCiAgZ2VvY29kZWQkbG9uW2ldIDwtIGFzLm51bWVyaWMocmVzdWx0WzFdKQogIGdlb2NvZGVkJGxhdFtpXSA8LSBhcy5udW1lcmljKHJlc3VsdFsyXSkKICBnZW9jb2RlZCRnZW9BZGRyZXNzW2ldIDwtIGFzLmNoYXJhY3RlcihyZXN1bHRbM10pCn0KYGBgCgojIyMgVmlzdWFsaXppbmcgcmVzdWx0cyBpbiBhIG1hcAoKCkdldHRpbmcgdGhlIG1hcCBiYWNrZ3JvdW5kIHdpdGggdGhlIHJpZ2h0IHpvb20uCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHBhZ2VkLnByaW50PUZBTFNFfQojIGxvYWQgdGhlIHJlcXVpcmVkIGxpYnJhcmllcwpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoZ2dtYXApCiMgZG93bmxvYWQgdGhlIG1hcCBiYWNrZ3JvdW5kIGltYWdlcwptYXA8LWdldF9tYXAobG9jYXRpb249J3VuaXRlZCBzdGF0ZXMnLCB6b29tPTQsIG1hcHR5cGUgPSAidGVycmFpbiIsCiAgICAgICAgICAgICBzb3VyY2U9J2dvb2dsZScsY29sb3I9J2NvbG9yJywgZm9yY2U9VFJVRSkKCmdnbWFwKG1hcCkgKwogIHRoZW1lKGF4aXMubGluZSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBheGlzLnRleHQgPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgYXhpcy50aWNrcyA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBwbG90Lm1hcmdpbiA9IHVuaXQoYygwLCAwLCAtMSwgLTEpLCAnbGluZXMnKSkgKwogIHhsYWIoJycpICsKICB5bGFiKCcnKQpgYGAKCiMjIyBNYXBwaW5nIGZyZXF1ZW5jeSBvZiB0cmlhbHMgYnkgY2l0eSBhcyBkZW5zaXR5LgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBwYWdlZC5wcmludD1GQUxTRX0KIyBHZXR0aW5nIHJpZCBvZiAiVW5pdGVkIFN0YXRlcyIgaW4gZSB0byBtYXRjaCBhZGRyZXNzIGluICJnZW9jb2RlZCIKZm9yKGkgaW4gMTpucm93KGUpKQp7CiAgIyBQcmludCgiV29ya2luZy4uLiIpCiAgdmVjdG9yVGV4dCA8LSBlJGFkZHJlc3NbaV0KICB2ZWN0b3JUZXh0IDwtIGdzdWIoIixVbml0ZWQgU3RhdGVzIiwiIiwgdmVjdG9yVGV4dCkKICBlJGFkZHJlc3NbaV0gPC12ZWN0b3JUZXh0CiAgCn0KCmcgPC0gbWVyZ2UoZSwgZ2VvY29kZWQsYnk9ImFkZHJlc3MiKQoKZ2dtYXAobWFwKSArIGdlb21fcG9pbnQoCiAgYWVzKHg9bG9uLCB5PWxhdCwgc2hvd19ndWlkZSA9IFRSVUUsIGNvbG91cj1mcmVxLngpLCAKICBkYXRhPWcsIGFscGhhPS41LCBuYS5ybSA9IFQsIHNpemUgPSBnJGZyZXEueCowLjgpICArIAogIHNjYWxlX2NvbG9yX2dyYWRpZW50KGxvdz0iZ3JlZW4iLCBoaWdoPSJyZWQiKSArCiAgIHRoZW1lKGF4aXMubGluZSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBheGlzLnRleHQgPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgYXhpcy50aWNrcyA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBwbG90Lm1hcmdpbiA9IHVuaXQoYygwLCAwLCAtMSwgLTEpLCAnbGluZXMnKSkgKwogIHhsYWIoJycpICsKICB5bGFiKCcnKQpgYGAKCiMjIyBNYXBpbmcgdGhlIEhlYXRtYXAgZm9yIHRoZSBzYW1lIHRyaWFscy4KYGBge3J9CmZvcihpIGluIDE6bnJvdyhkKSkKewogICMgUHJpbnQoIldvcmtpbmcuLi4iKQogIHZlY3RvclRleHQgPC0gZCRhZGRyZXNzW2ldCiAgdmVjdG9yVGV4dCA8LSBnc3ViKCIsVW5pdGVkIFN0YXRlcyIsIiIsIHZlY3RvclRleHQpCiAgZCRhZGRyZXNzW2ldIDwtdmVjdG9yVGV4dAogIAp9CgpoIDwtIG1lcmdlKGQsIGdlb2NvZGVkLGJ5PSJhZGRyZXNzIikKCgoKCmdnbWFwKG1hcCkgKyBnZW9tX2RlbnNpdHkyZChkYXRhID0gaCwgIGFlcyh4ID0gbG9uLCB5ID0gbGF0KSwgc2l6ZSA9IDAuMykrCiAgc3RhdF9kZW5zaXR5MmQoZGF0YT1oLCBhZXMoZmlsbCA9IC4ubGV2ZWwuLiwgYWxwaGEgPSAuLmxldmVsLi4pLCBnZW9tPSJwb2x5Z29uIiwgYmlucz0xNSkgKwogIHNjYWxlX2ZpbGxfZ3JhZGllbnQobG93ID0gImdyZWVuIiwgaGlnaCA9ICJyZWQiKSsKICBzY2FsZV9hbHBoYShyYW5nZSA9IGMoMC4xLCAwLjMpLCBndWlkZSA9IEZBTFNFKSArCiAgdGhlbWUoYXhpcy5saW5lID0gZWxlbWVudF9ibGFuaygpLAogICAgICAgIGF4aXMudGV4dCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBheGlzLnRpY2tzID0gZWxlbWVudF9ibGFuaygpLAogICAgICAgIHBsb3QubWFyZ2luID0gdW5pdChjKDAsIDAsIC0xLCAtMSksICdsaW5lcycpKSArCiAgeGxhYignJykgKwogIHlsYWIoJycpCmBgYAoKIyMjIElzb2xhdGluZyB0cmlhbHMgaW4gQ2FsaWZvcm5pYQpgYGB7cn0KY2FsaSA8LSBzdWJzZXQoYyxjJGFkZHJlc3Muc3RhdGU9PSJDYWxpZm9ybmlhIikKCmNhbGlbLDE6MTBdCmBgYAoK