Project: Kaggle Predictive Medicine Competition as Motivation

Code Adapted from: Paco Nathan

Mental Model for Problem Approach

Query and Visualize data with Elasticsearch


from elasticsearch import Elasticsearch
from Bio import Entrez
import json
import csv

#es = Elasticsearch()

#es.indices.create(index='pubdemo1', ignore=400)


def search(query):
    Entrez.email = 'dixonrj@vcu.edu'
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='2000',
                            retmode='xml',
                            term=query)
    results = Entrez.read(handle)
    return results


def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'dixonrj@vcu.edu'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results


#query medpub api and return results as json
#results = search('bone marrow transplant')
#id_list = results['IdList']
#papers = fetch_details(id_list)

#for i, paper in enumerate(papers['PubmedArticle']):
#    print("{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))

#json_data = json.dumps(papers)


#Write to JSON
#json.dump(papers, open('data/infile.json', 'w'))


#Write to CSV
#f = csv.writer(open('data/pubTest1.csv', 'w', encoding='utf-8'))

# Write CSV Header
#f.writerow(json_data[0].keys())

#for x in json_data:
#    f.writerow(x.values())


#Post to Elasticsearch
#for i, paper in enumerate(papers['PubmedArticle']):
#    es.index("pubdemo", "pubs", json_data)

Kibana Visualization

Query MedPub API. For demo I use 30. For the project 3,000 publications.


import json
from Bio import Entrez


def search(query):
    Entrez.email = 'dixonrj@vcu.edu'
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='30',
                            retmode='xml',
                            term=query)
    results = Entrez.read(handle)
    return results


def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'dixonrj@vcu.edu'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

#query medpub api and return results as json
#results = search('bone marrow transplant')
#id_list = results['IdList']
#papers = fetch_details(id_list)

#json_data = json.dumps(papers)

#print(json_data)
#json.dump(papers, open('data/infile_demo.json', 'w'))

Here we open file from API query and format for Pytextrank algorithm.


import json
import csv

sourcefile = open("data/infile_demo.json", "rb")

json_data = json.load(sourcefile)

summaries = []
for article in json_data['PubmedArticle']:
    try:
        summaries.append({
            'id': article['MedlineCitation']['PMID'],
            'text': article['MedlineCitation']['Article']['Abstract']['AbstractText']
        })
    except KeyError:
        pass

json.dump(summaries, open('data/demo_outfile.json', 'w'))


#Write to CSV
f = csv.writer(open('pubTest_demo.csv', 'w', encoding='utf-8'))

# Write CSV Header
f.writerow(summaries[0].keys())

for x in summaries:
    f.writerow(x.values())

Pytextrank requires JSON in a very specific format and not designed for multiple documents.

For a future exercise, loading all JSON into panda dataframe and then using a map/apply function to iterate with Pytextrank of each JSON document.

For purposes of project going to manually edit demo_outfile.json to suz2.json for demonstration purposes.

Stage 1:

import pytextrank



#json_file = "data/suz.json"
#json_file = "data/suz1.json"
json_file = "data/suz2.json"

#Stage1
path_stage1 = "data/o1.json"

with open(path_stage1, 'w') as f:
    for graf in pytextrank.parse_doc(pytextrank.json_iter(json_file)):
        f.write("%s\n" % pytextrank.pretty_print(graf._asdict()))
        print(pytextrank.pretty_print(graf))
## ["24289469", "db6f24673e21c1c083eba95239fa6fa31506b432", [[0, "The", "the", "DT", 0, 0], [1, "development", "development", "NN", 1, 1], [0, "of", "of", "IN", 0, 2], [0, "genetically", "genetically", "RB", 0, 3], [2, "modified", "modify", "VBN", 1, 4], [3, "pigs", "pig", "NNS", 1, 5], [0, ",", ",", ".", 0, 6], [0, "which", "which", "WDT", 0, 7], [4, "lack", "lack", "VBP", 1, 8], [0, "the", "the", "DT", 0, 9], [5, "expression", "expression", "NN", 1, 10], [0, "of", "of", "IN", 0, 11], [6, "alpha", "alpha", "NN", 1, 12], [0, "1-3", "1-3", "CD", 0, 13], [7, "galactosyl", "galactosyl", "NN", 1, 14], [8, "transferase", "transferase", "NN", 1, 15], [0, ",", ",", ".", 0, 16], [0, "(", "(", ".", 0, 17], [9, "GalT-KO", "galt-ko", "NNP", 1, 18], [3, "pigs", "pig", "NNS", 1, 19], [0, ")", ")", ".", 0, 20], [10, "has", "have", "VBZ", 1, 21], [11, "facilitated", "facilitate", "VBN", 1, 22], [0, "the", "the", "DT", 0, 23], [12, "xenogeneic", "xenogeneic", "NN", 1, 24], [13, "transplantation", "transplantation", "NN", 1, 25], [0, "of", "of", "IN", 0, 26], [14, "porcine", "porcine", "JJ", 1, 27], [15, "organs", "organ", "NNS", 1, 28], [0, "and", "and", "CC", 0, 29], [16, "tissues", "tissue", "NNS", 1, 30], [0, "into", "into", "IN", 0, 31], [17, "primates", "primate", "NNS", 1, 32], [0, "by", "by", "IN", 0, 33], [18, "avoiding", "avoid", "VBG", 1, 34], [19, "hyperacute", "hyperacute", "NN", 1, 35], [20, "rejection", "rejection", "NN", 1, 36], [21, "due", "due", "JJ", 1, 37], [0, "to", "to", "IN", 0, 38], [22, "pre-existing", "pre-exist", "NN", 1, 39], [23, "antibodies", "antibody", "NNS", 1, 40], [0, "against", "against", "IN", 0, 41], [0, "the", "the", "DT", 0, 42], [24, "Gal", "gal", "NNP", 1, 43], [25, "epitope", "epitope", "NN", 1, 44], [0, ".", ".", ".", 0, 45]]]
## ["24289469", "275720b71866399ab97c199ff7ad526be15831ee", [[0, "However", "however", "RB", 0, 46], [0, ",", ",", ".", 0, 47], [23, "antibodies", "antibody", "NNS", 1, 48], [0, "against", "against", "IN", 0, 49], [26, "other", "other", "JJ", 1, 50], [27, "antigens", "antigen", "NNS", 1, 51], [0, "(", "(", ".", 0, 52], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 53], [23, "antibodies", "antibody", "NNS", 1, 54], [0, ")", ")", ".", 0, 55], [0, ",", ",", ".", 0, 56], [29, "are", "be", "VBP", 1, 57], [30, "found", "find", "VBN", 1, 58], [0, "at", "at", "IN", 0, 59], [31, "varying", "vary", "VBG", 1, 60], [32, "levels", "level", "NNS", 1, 61], [0, "in", "in", "IN", 0, 62], [0, "the", "the", "DT", 0, 63], [33, "pre-transplant", "pre-transplant", "NN", 1, 64], [34, "sera", "sera", "NN", 1, 65], [0, "of", "of", "IN", 0, 66], [35, "most", "most", "JJS", 1, 67], [17, "primates", "primate", "NNS", 1, 68], [0, ".", ".", ".", 0, 69]]]
## ["24289469", "07ae503eba64062d3b2a633825934b2119282921", [[0, "We", "we", "PRP", 0, 70], [10, "have", "have", "VBP", 1, 71], [0, "previously", "previously", "RB", 0, 72], [30, "found", "find", "VBN", 1, 73], [0, "that", "that", "IN", 0, 74], [36, "baboons", "baboon", "NNS", 1, 75], [0, "with", "with", "IN", 0, 76], [37, "high", "high", "JJ", 1, 77], [32, "levels", "level", "NNS", 1, 78], [0, "of", "of", "IN", 0, 79], [33, "pre-transplant", "pre-transplant", "NN", 1, 80], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 81], [38, "IgG", "igg", "NNP", 1, 82], [0, ",", ",", ".", 0, 83], [39, "conditioned", "condition", "VBN", 1, 84], [0, "with", "with", "IN", 0, 85], [0, "a", "a", "DT", 0, 86], [40, "non-myeloablative", "non-myeloablative", "NN", 1, 87], [41, "conditioning", "conditioning", "NN", 1, 88], [42, "regimen", "regimen", "NN", 1, 89], [0, ",", ",", ".", 0, 90], [43, "failed", "fail", "VBD", 1, 91], [0, "to", "to", "IN", 0, 92], [44, "engraft", "engraft", "VB", 1, 93], [45, "following", "follow", "VBG", 1, 94], [46, "pig-to-baboon", "pig-to-baboon", "NN", 1, 95], [47, "bone", "bone", "NN", 1, 96], [48, "marrow", "marrow", "NN", 1, 97], [13, "transplantation", "transplantation", "NN", 1, 98], [0, "(", "(", ".", 0, 99], [49, "Xenotransplantation", "xenotransplantation", "NNP", 1, 100], [0, ",", ",", ".", 0, 101], [0, "17", "17", "CD", 0, 102], [0, ",", ",", ".", 0, 103], [0, "2010", "2010", "CD", 0, 104], [0, "and", "and", "CC", 0, 105], [0, "300", "300", "CD", 0, 106], [0, ")", ")", ".", 0, 107], [0, ".", ".", ".", 0, 108]]]
## ["24289469", "ecb606286bd18e97862951f873d046878a288965", [[0, "Two", "two", "CD", 0, 109], [36, "baboons", "baboon", "NNS", 1, 110], [0, "with", "with", "IN", 0, 111], [50, "low", "low", "JJ", 1, 112], [32, "levels", "level", "NNS", 1, 113], [0, "of", "of", "IN", 0, 114], [33, "pre-transplant", "pre-transplant", "NN", 1, 115], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 116], [38, "IgG", "igg", "NNP", 1, 117], [0, ",", ",", ".", 0, 118], [39, "conditioned", "condition", "VBN", 1, 119], [0, "with", "with", "IN", 0, 120], [0, "the", "the", "DT", 0, 121], [51, "same", "same", "JJ", 1, 122], [42, "regimen", "regimen", "NN", 1, 123], [0, ",", ",", ".", 0, 124], [52, "showed", "show", "VBD", 1, 125], [14, "porcine", "porcine", "JJ", 1, 126], [47, "bone", "bone", "NN", 1, 127], [48, "marrow", "marrow", "NN", 1, 128], [53, "progenitors", "progenitor", "NNS", 1, 129], [0, "at", "at", "IN", 0, 130], [54, "28\\xa0days", "28\\xa0day", "NNS", 1, 131], [45, "following", "follow", "VBG", 1, 132], [13, "transplantation", "transplantation", "NN", 1, 133], [0, ",", ",", ".", 0, 134], [55, "suggesting", "suggest", "VBG", 1, 135], [56, "engraftment", "engraftment", "NN", 1, 136], [0, ".", ".", ".", 0, 137]]]
## ["24289469", "ad1e96a5f3a0e171da4904aaa655d3f64739938a", [[0, "These", "these", "DT", 0, 138], [36, "baboons", "baboon", "NNS", 1, 139], [0, "also", "also", "RB", 0, 140], [52, "showed", "show", "VBD", 1, 141], [57, "evidence", "evidence", "NN", 1, 142], [0, "of", "of", "IN", 0, 143], [58, "donor-specific", "donor-specific", "NN", 1, 144], [59, "hyporesponsiveness", "hyporesponsiveness", "NN", 1, 145], [0, ".", ".", ".", 0, 146]]]
## ["24289469", "7cfdd227c62fcc840752efcb8c271e9ba7a562ad", [[0, "This", "this", "DT", 0, 147], [60, "observation", "observation", "NN", 1, 148], [61, "led", "lead", "VBD", 1, 149], [0, "us", "us", "PRP", 0, 150], [0, "to", "to", "TO", 0, 151], [62, "investigate", "investigate", "VB", 1, 152], [0, "the", "the", "DT", 0, 153], [63, "hypothesis", "hypothesis", "NN", 1, 154], [0, "that", "that", "IN", 0, 155], [64, "selecting", "select", "VBG", 1, 156], [0, "for", "for", "IN", 0, 157], [36, "baboon", "baboon", "NN", 1, 158], [65, "recipients", "recipient", "NNS", 1, 159], [0, "with", "with", "IN", 0, 160], [50, "low", "low", "JJ", 1, 161], [33, "pre-transplant", "pre-transplant", "NN", 1, 162], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 163], [38, "IgG", "igg", "NNP", 1, 164], [32, "levels", "level", "NNS", 1, 165], [0, "might", "might", "MD", 0, 166], [66, "improve", "improve", "VB", 1, 167], [56, "engraftment", "engraftment", "NN", 1, 168], [32, "levels", "level", "NNS", 1, 169], [45, "following", "follow", "VBG", 1, 170], [9, "GalT-KO", "galt-ko", "NNP", 1, 171], [46, "pig-to-baboon", "pig-to-baboon", "NN", 1, 172], [47, "bone", "bone", "NN", 1, 173], [48, "marrow", "marrow", "NN", 1, 174], [13, "transplantation", "transplantation", "NN", 1, 175], [0, ".", ".", ".", 0, 176], [0, "'", "'", ".", 0, 177], [0, ",", ",", ".", 0, 178], [0, "'", "'", ".", 0, 179], [0, "Five", "five", "CD", 0, 180], [36, "baboons", "baboon", "NNS", 1, 181], [0, ",", ",", ".", 0, 182], [0, "with", "with", "IN", 0, 183], [50, "low", "low", "JJ", 1, 184], [33, "pre-transplant", "pre-transplant", "NN", 1, 185], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 186], [38, "IgG", "igg", "NNP", 1, 187], [32, "levels", "level", "NNS", 1, 188], [0, ",", ",", ".", 0, 189], [67, "received", "receive", "VBD", 1, 190], [13, "transplantation", "transplantation", "NN", 1, 191], [0, "of", "of", "IN", 0, 192], [47, "bone", "bone", "NN", 1, 193], [48, "marrow", "marrow", "NN", 1, 194], [68, "cells", "cell", "NNS", 1, 195], [0, "(", "(", ".", 0, 196], [0, "1-5\\xa0\u00d7\\xa010(9", "1-5\\xa0\u00d7\\xa010(9", "CD", 0, 197], [0, ")", ")", ".", 0, 198], [0, "/kg", "/kg", "CD", 0, 199], [0, "of", "of", "IN", 0, 200], [65, "recipient", "recipient", "NN", 1, 201], [69, "weight", "weight", "NN", 1, 202], [0, ")", ")", ".", 0, 203], [0, "from", "from", "IN", 0, 204], [9, "GalT-KO", "galt-ko", "NNP", 1, 205], [3, "pigs", "pig", "NNS", 1, 206], [0, ".", ".", ".", 0, 207]]]
## ["24289469", "577bb4861f1f067de332861ee385be7112cdb9fc", [[0, "They", "they", "PRP", 0, 208], [67, "received", "receive", "VBD", 1, 209], [0, "a", "a", "DT", 0, 210], [0, "non-myeloablative", "non-myeloablative", "AFX", 0, 211], [41, "conditioning", "conditioning", "NN", 1, 212], [42, "regimen", "regimen", "NN", 1, 213], [70, "consisting", "consist", "VBG", 1, 214], [0, "of", "of", "IN", 0, 215], [71, "low-dose", "low-dose", "JJ", 1, 216], [72, "total", "total", "JJ", 1, 217], [73, "body", "body", "NN", 1, 218], [74, "irradiation", "irradiation", "NN", 1, 219], [0, "(", "(", ".", 0, 220], [75, "TBI", "tbi", "NNP", 1, 221], [0, ")", ")", ".", 0, 222], [0, "(", "(", ".", 0, 223], [0, "150\\xa0cGy", "150\\xa0cgy", "CD", 0, 224], [0, ")", ")", ".", 0, 225], [0, ",", ",", ".", 0, 226], [76, "thymic", "thymic", "JJ", 1, 227], [74, "irradiation", "irradiation", "NN", 1, 228], [0, "(", "(", ".", 0, 229], [77, "700\\xa0cGy", "700\\xa0cgy", "NNP", 1, 230], [0, ")", ")", ".", 0, 231], [0, ",", ",", ".", 0, 232], [78, "anti", "anti", "JJ", 1, 233], [0, "-", "-", ".", 0, 234], [79, "thymocyte", "thymocyte", "NN", 1, 235], [80, "globulin", "globulin", "NN", 1, 236], [0, "(", "(", ".", 0, 237], [81, "ATG", "atg", "NNP", 1, 238], [0, ")", ")", ".", 0, 239], [0, ",", ",", ".", 0, 240], [0, "and", "and", "CC", 0, 241], [82, "tacrolimus", "tacrolimus", "NN", 1, 242], [0, ".", ".", ".", 0, 243]]]
## ["24289469", "00bef883e6498b8a82939703e33becdcb8afbe29", [[0, "In", "in", "IN", 0, 244], [83, "addition", "addition", "NN", 1, 245], [0, ",", ",", ".", 0, 246], [0, "two", "two", "CD", 0, 247], [36, "baboons", "baboon", "NNS", 1, 248], [67, "received", "receive", "VBD", 1, 249], [84, "Rituximab", "rituximab", "NNP", 1, 250], [0, "and", "and", "CC", 0, 251], [85, "Bortezomib", "bortezomib", "NNP", 1, 252], [0, "(", "(", ".", 0, 253], [86, "Velcade", "velcade", "NNP", 1, 254], [0, ")", ")", ".", 0, 255], [87, "treatment", "treatment", "NN", 1, 256], [0, "as", "as", "RB", 0, 257], [0, "well", "well", "RB", 0, 258], [0, "as", "as", "IN", 0, 259], [88, "extra-corporeal", "extra-corporeal", "JJ", 1, 260], [89, "immunoadsorption", "immunoadsorption", "NN", 1, 261], [90, "using", "use", "VBG", 1, 262], [9, "GalT-KO", "galt-ko", "NNP", 1, 263], [3, "pig", "pig", "NN", 1, 264], [91, "livers", "liver", "NNS", 1, 265], [0, ".", ".", ".", 0, 266]]]
## ["24289469", "f0192980a054d9a01312015a01dcfafe8b5bc91e", [[47, "Bone", "bone", "NNP", 1, 267], [48, "marrow", "marrow", "NN", 1, 268], [56, "engraftment", "engraftment", "NN", 1, 269], [29, "was", "be", "VBD", 1, 270], [92, "assessed", "assess", "VBN", 1, 271], [0, "by", "by", "IN", 0, 272], [93, "porcine-specific", "porcine-specific", "JJ", 1, 273], [94, "PCR", "pcr", "NN", 1, 274], [0, "on", "on", "IN", 0, 275], [95, "colony", "colony", "NN", 1, 276], [96, "forming", "form", "VBG", 1, 277], [97, "units", "unit", "NNS", 1, 278], [0, "(", "(", ".", 0, 279], [98, "CFU", "cfu", "NNP", 1, 280], [0, ")", ")", ".", 0, 281], [0, "of", "of", "IN", 0, 282], [99, "day", "day", "NN", 1, 283], [0, "28", "28", "CD", 0, 284], [47, "bone", "bone", "NN", 1, 285], [48, "marrow", "marrow", "NN", 1, 286], [100, "aspirates", "aspirate", "NNS", 1, 287], [0, ".", ".", ".", 0, 288]]]
## ["24289469", "286f8d1b9055998edddd7572d58cfe40910bf44b", [[0, "Anti-non-Gal", "anti-non-gal", "RB", 0, 289], [23, "antibody", "antibody", "NN", 1, 290], [32, "levels", "level", "NNS", 1, 291], [29, "were", "be", "VBD", 1, 292], [92, "assessed", "assess", "VBN", 1, 293], [0, "by", "by", "IN", 0, 294], [101, "serum", "serum", "NN", 1, 295], [102, "binding", "bind", "VBG", 1, 296], [0, "toward", "toward", "IN", 0, 297], [9, "GalT-KO", "galt-ko", "NNP", 1, 298], [103, "PBMC", "pbmc", "NNP", 1, 299], [90, "using", "use", "VBG", 1, 300], [104, "flow", "flow", "NN", 1, 301], [105, "cytometry", "cytometry", "NN", 1, 302], [0, "(", "(", ".", 0, 303], [106, "FACS", "facs", "NNP", 1, 304], [0, ")", ")", ".", 0, 305], [0, ".", ".", ".", 0, 306]]]
## ["24289469", "2b68747c683e60fb9742cd8bc2020be975583033", [[107, "Peripheral", "peripheral", "NNP", 1, 307], [108, "macro-chimerism", "macro-chimerism", "NN", 1, 308], [29, "was", "be", "VBD", 1, 309], [109, "measured", "measure", "VBN", 1, 310], [0, "by", "by", "IN", 0, 311], [110, "FACS", "fac", "NNS", 1, 312], [90, "using", "use", "VBG", 1, 313], [3, "pig", "pig", "NN", 1, 314], [0, "and", "and", "CC", 0, 315], [111, "baboon-specific", "baboon-specific", "NN", 1, 316], [23, "antibodies", "antibody", "NNS", 1, 317], [0, "and", "and", "CC", 0, 318], [36, "baboon", "baboon", "NN", 1, 319], [112, "anti-pig", "anti-pig", "JJ", 1, 320], [113, "cellular", "cellular", "NN", 1, 321], [114, "responses", "response", "NNS", 1, 322], [29, "were", "be", "VBD", 1, 323], [92, "assessed", "assess", "VBN", 1, 324], [0, "by", "by", "IN", 0, 325], [115, "mixed", "mixed", "JJ", 1, 326], [116, "lymphocyte", "lymphocyte", "JJ", 1, 327], [117, "reactions", "reaction", "NNS", 1, 328], [0, "(", "(", ".", 0, 329], [118, "MLR", "mlr", "NNP", 1, 330], [0, ")", ")", ".", 0, 331], [0, ".", ".", ".", 0, 332], [0, "'", "'", ".", 0, 333], [0, ",", ",", ".", 0, 334], [0, "'", "'", ".", 0, 335], [0, "As", "as", "IN", 0, 336], [0, "previously", "previously", "RB", 0, 337], [119, "reported", "report", "VBN", 1, 338], [0, ",", ",", ".", 0, 339], [0, "two", "two", "CD", 0, 340], [0, "of", "of", "IN", 0, 341], [0, "five", "five", "CD", 0, 342], [36, "baboons", "baboon", "NNS", 1, 343], [120, "demonstrated", "demonstrate", "VBD", 1, 344], [121, "detectable", "detectable", "JJ", 1, 345], [47, "bone", "bone", "NN", 1, 346], [48, "marrow", "marrow", "NN", 1, 347], [56, "engraftment", "engraftment", "NN", 1, 348], [0, "at", "at", "IN", 0, 349], [0, "4\\xa0weeks", "4\\xa0weeks", "CD", 0, 350], [0, "after", "after", "IN", 0, 351], [13, "transplantation", "transplantation", "NN", 1, 352], [0, ".", ".", ".", 0, 353]]]
## ["24289469", "440a08683f61fd3a09121920f81d881c0fd2927a", [[56, "Engraftment", "engraftment", "NNP", 1, 354], [29, "was", "be", "VBD", 1, 355], [122, "associated", "associate", "VBN", 1, 356], [0, "with", "with", "IN", 0, 357], [4, "lack", "lack", "NN", 1, 358], [0, "of", "of", "IN", 0, 359], [0, "an", "an", "DT", 0, 360], [123, "increase", "increase", "NN", 1, 361], [0, "in", "in", "IN", 0, 362], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 363], [38, "IgG", "igg", "NNP", 1, 364], [32, "levels", "level", "NNS", 1, 365], [0, "as", "as", "RB", 0, 366], [0, "well", "well", "RB", 0, 367], [0, "as", "as", "IN", 0, 368], [113, "cellular", "cellular", "JJ", 1, 369], [59, "hyporesponsiveness", "hyporesponsiveness", "NN", 1, 370], [0, "toward", "toward", "IN", 0, 371], [3, "pig", "pig", "NN", 1, 372], [0, ".", ".", ".", 0, 373]]]
## ["24289469", "54b52e5d0cc2452cd601c125ec22b698ed7dbd81", [[0, "Three", "three", "CD", 0, 374], [124, "subsequent", "subsequent", "JJ", 1, 375], [36, "baboons", "baboon", "NNS", 1, 376], [0, "with", "with", "IN", 0, 377], [0, "similarly", "similarly", "RB", 0, 378], [50, "low", "low", "JJ", 1, 379], [32, "levels", "level", "NNS", 1, 380], [0, "of", "of", "IN", 0, 381], [125, "pre", "pre", "NN", 1, 382], [0, "-", "-", ".", 0, 383], [126, "existing", "exist", "VBG", 1, 384], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 385], [38, "IgG", "igg", "NNP", 1, 386], [52, "showed", "show", "VBD", 1, 387], [0, "no", "no", "DT", 0, 388], [56, "engraftment", "engraftment", "NN", 1, 389], [0, "and", "and", "CC", 0, 390], [0, "an", "an", "DT", 0, 391], [123, "increase", "increase", "NN", 1, 392], [0, "in", "in", "IN", 0, 393], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 394], [38, "IgG", "igg", "NNP", 1, 395], [23, "antibody", "antibody", "NN", 1, 396], [32, "levels", "level", "NNS", 1, 397], [45, "following", "follow", "VBG", 1, 398], [13, "transplantation", "transplantation", "NN", 1, 399], [0, ".", ".", ".", 0, 400]]]
## ["24289469", "8bb168b477337a6664c9cbc1a3f4c1da7ebb6d6f", [[107, "Peripheral", "peripheral", "JJ", 1, 401], [127, "macrochimerism", "macrochimerism", "NN", 1, 402], [29, "was", "be", "VBD", 1, 403], [0, "only", "only", "RB", 0, 404], [128, "seen", "see", "VBN", 1, 405], [0, "for", "for", "IN", 0, 406], [0, "a", "a", "DT", 0, 407], [129, "few", "few", "JJ", 1, 408], [99, "days", "day", "NNS", 1, 409], [45, "following", "follow", "VBG", 1, 410], [13, "transplantation", "transplantation", "NN", 1, 411], [0, "regardless", "regardless", "RB", 0, 412], [0, "of", "of", "IN", 0, 413], [23, "antibody", "antibody", "NN", 1, 414], [1, "development", "development", "NN", 1, 415], [0, ".", ".", ".", 0, 416], [0, "'", "'", ".", 0, 417], [0, ",", ",", ".", 0, 418], [0, "'", "'", ".", 0, 419], [64, "Selecting", "select", "VBG", 1, 420], [0, "for", "for", "IN", 0, 421], [36, "baboon", "baboon", "NN", 1, 422], [65, "recipients", "recipient", "NNS", 1, 423], [0, "with", "with", "IN", 0, 424], [50, "low", "low", "JJ", 1, 425], [32, "levels", "level", "NNS", 1, 426], [0, "of", "of", "IN", 0, 427], [33, "pre-transplant", "pre-transplant", "NN", 1, 428], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 429], [38, "IgG", "igg", "NNP", 1, 430], [130, "did", "do", "VBD", 1, 431], [0, "not", "not", "RB", 0, 432], [131, "ensure", "ensure", "VB", 1, 433], [47, "bone", "bone", "NN", 1, 434], [48, "marrow", "marrow", "NN", 1, 435], [56, "engraftment", "engraftment", "NN", 1, 436], [0, ".", ".", ".", 0, 437]]]
## ["24289469", "7ce84c923d3349cc1ec3b3ffa90c62add5516f57", [[132, "Failure", "failure", "NN", 1, 438], [0, "to", "to", "IN", 0, 439], [44, "engraft", "engraft", "NN", 1, 440], [29, "was", "be", "VBD", 1, 441], [122, "associated", "associate", "VBN", 1, 442], [0, "with", "with", "IN", 0, 443], [0, "an", "an", "DT", 0, 444], [123, "increase", "increase", "NN", 1, 445], [0, "in", "in", "IN", 0, 446], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 447], [38, "IgG", "igg", "NNP", 1, 448], [32, "levels", "level", "NNS", 1, 449], [45, "following", "follow", "VBG", 1, 450], [13, "transplantation", "transplantation", "NN", 1, 451], [0, ".", ".", ".", 0, 452]]]
## ["24289469", "a8a0169e5a17701fa83b54ed7e4be706476482b5", [[0, "These", "these", "DT", 0, 453], [133, "results", "result", "NNS", 1, 454], [55, "suggest", "suggest", "VBP", 1, 455], [0, "that", "that", "IN", 0, 456], [134, "anti-non-Gal-IgG", "anti-non-gal-igg", "JJ", 1, 457], [29, "is", "be", "VBZ", 1, 458], [135, "likely", "likely", "JJ", 1, 459], [136, "involved", "involve", "VBN", 1, 460], [0, "in", "in", "IN", 0, 461], [137, "early", "early", "JJ", 1, 462], [47, "bone", "bone", "NN", 1, 463], [48, "marrow", "marrow", "NN", 1, 464], [20, "rejection", "rejection", "NN", 1, 465], [0, "and", "and", "CC", 0, 466], [0, "that", "that", "DT", 0, 467], [138, "successful", "successful", "JJ", 1, 468], [139, "strategies", "strategy", "NNS", 1, 469], [0, "for", "for", "IN", 0, 470], [140, "combating", "combat", "VBG", 1, 471], [28, "anti-non-Gal", "anti-non-gal", "JJ", 1, 472], [38, "IgG", "igg", "NNP", 1, 473], [1, "development", "development", "NN", 1, 474], [0, "may", "may", "MD", 0, 475], [141, "allow", "allow", "VB", 1, 476], [142, "better", "better", "JJR", 1, 477], [56, "engraftment", "engraftment", "NN", 1, 478], [0, ".", ".", ".", 0, 479]]]
## ["24289469", "209db2c0ca7ee73c556d24d81be8b34d97fb674d", [[0, "Since", "since", "IN", 0, 480], [56, "engraftment", "engraftment", "NN", 1, 481], [29, "was", "be", "VBD", 1, 482], [0, "only", "only", "RB", 0, 483], [50, "low", "low", "JJ", 1, 484], [0, "and", "and", "CC", 0, 485], [143, "transient", "transient", "JJ", 1, 486], [0, "regardless", "regardless", "RB", 0, 487], [0, "of", "of", "IN", 0, 488], [23, "antibody", "antibody", "NN", 1, 489], [1, "development", "development", "NN", 1, 490], [0, ",", ",", ".", 0, 491], [144, "innate", "innate", "JJ", 1, 492], [145, "immune", "immune", "NN", 1, 493], [0, ",", ",", ".", 0, 494], [0, "or", "or", "CC", 0, 495], [146, "species", "specie", "NNS", 1, 496], [147, "compatibility", "compatibility", "NN", 1, 497], [148, "mechanisms", "mechanism", "NNS", 1, 498], [0, "will", "will", "MD", 0, 499], [0, "likely", "likely", "RB", 0, 500], [0, "also", "also", "RB", 0, 501], [149, "need", "need", "VB", 1, 502], [0, "to", "to", "TO", 0, 503], [29, "be", "be", "VB", 1, 504], [150, "addressed", "address", "VBN", 1, 505], [0, "to", "to", "TO", 0, 506], [151, "achieve", "achieve", "VB", 1, 507], [152, "long", "long", "JJ", 1, 508], [153, "term", "term", "NN", 1, 509], [56, "engraftment", "engraftment", "NN", 1, 510], [0, ".", ".", ".", 0, 511]]]

Stage 2

import pytextrank

path_stage0 = "data/demo_outfile.json"
path_stage1 = "data/o1.json"

#Stage 2
path_stage2 = "data/o2.json"
graph, ranks = pytextrank.text_rank(path_stage1)
pytextrank.render_ranks(graph, ranks)

with open(path_stage2, 'w') as f:
    for rl in pytextrank.normalize_key_phrases(path_stage1, ranks):
        f.write("%s\n" % pytextrank.pretty_print(rl._asdict()))
        print(pytextrank.pretty_print(rl))
## ["xenogeneic transplantation", 0.033661803235213554, [12, 13], "np", 1]
## ["engraftment", 0.03314572477199031, [56], "np", 3]
## ["pig-to-baboon bone marrow transplantation", 0.029617060945844317, [46, 47, 48, 13], "np", 1]
## ["pre-existing antibodies", 0.027146696578949355, [22, 23], "np", 1]
## ["high levels", 0.026143826504640966, [37, 32], "np", 1]
## ["galt-ko pigs", 0.020815563632529996, [9, 3], "np", 2]
## ["anti-non-gal antibodies", 0.020813603751696462, [28, 23], "np", 1]
## ["low pre-transplant anti-non-gal igg levels", 0.01962545047044707, [50, 33, 28, 38, 32], "np", 2]
## ["engraftment levels", 0.016572862385995157, [56, 32], "np", 1]
## ["marrow", 0.014808530472922158, [48], "nn", 9]
## ["porcine bone marrow progenitors", 0.014808530472922158, [14, 47, 48, 53], "np", 1]
## ["antibody", 0.013573348289474678, [23], "nn", 8]
## ["be", 0.01333290063596773, [29], "vb", 11]
## ["bone", 0.01311388151243413, [47], "nn", 9]
## ["levels", 0.013071913252320483, [32], "nns", 12]
## ["low levels", 0.013071913252320483, [50, 32], "np", 3]
## ["most primates", 0.011708664942470559, [35, 17], "np", 1]
## ["following", 0.011349450470622809, [45], "vbg", 6]
## ["hyperacute rejection", 0.011227449378217517, [19, 20], "np", 1]
## ["transplantation", 0.01122060107840452, [13], "np", 5]
## ["bone marrow engraftment", 0.011048574923996772, [47, 48, 56], "np", 2]
## ["baboon recipients", 0.010737394440877412, [36, 65], "np", 2]
## ["a few days", 0.010452544030804227, [0, 129, 99], "np", 1]
## ["galt-ko", 0.010407781816264998, [9], "np", 2]
## ["galt-ko pig-to-baboon bone marrow transplantation", 0.009872353648614772, [9, 46, 47, 48, 13], "np", 1]
## ["igg", 0.009812725235223535, [38], "nnp", 10]
## ["anti-non-gal igg levels", 0.009812725235223535, [28, 38, 32], "np", 2]
## ["pre-transplant sera", 0.009551166675731042, [33, 34], "np", 1]
## ["cfu", 0.009296339169599438, [98], "np", 1]
## ["galt-ko pig livers", 0.009166359399088058, [9, 3, 91], "np", 1]
## ["porcine organs", 0.009124576964142228, [14, 15], "np", 1]
## ["antibody levels", 0.009048898859649785, [23, 32], "np", 1]
## ["facs", 0.00903217942046866, [106], "np", 1]
## ["mlr", 0.008391697539496645, [118], "np", 1]
## ["detectable bone marrow engraftment", 0.008286431192997578, [121, 47, 48, 56], "np", 1]
## ["compatibility mechanisms", 0.007766138766669359, [147, 148], "np", 1]
## ["antibody development", 0.007441389529252127, [23, 1], "np", 2]
## ["bone marrow cells", 0.007404265236461079, [47, 48, 68], "np", 1]
## ["atg", 0.007327162594329261, [81], "np", 1]
## ["assessed", 0.0070843413238611385, [92], "vbn", 3]
## ["xenotransplantation", 0.006891045367680502, [49], "np", 1]
## ["galactosyl transferase", 0.00688155943933944, [7, 8], "np", 1]
## ["baboon-specific antibodies", 0.006786674144737339, [111, 23], "np", 1]
## ["species", 0.006645487865211458, [146], "np", 1]
## ["better engraftment", 0.006629144954398063, [142, 56], "np", 1]
## ["anti-non-gal igg", 0.006541816823482357, [28, 38], "np", 1]
## ["units", 0.006309034798549533, [97], "np", 1]
## ["low-dose total body irradiation", 0.006280275626570015, [71, 72, 73, 74], "np", 1]
## ["flow cytometry", 0.006249205634991044, [104, 105], "np", 1]
## ["innate immune", 0.006198540808138186, [144, 145], "np", 1]
## ["pig", 0.006110906266058705, [3], "np", 1]
## ["donor-specific hyporesponsiveness", 0.0060030695393377695, [58, 59], "np", 1]
## ["28\\xa0days", 0.0058808886382407406, [54], "np", 1]
## ["primates", 0.005854332471235279, [17], "nns", 2]
## ["tbi", 0.00574684416954047, [75], "np", 1]
## ["non-myeloablative conditioning regimen", 0.00570059048877115, [40, 41, 42], "np", 1]
## ["rejection", 0.005613724689108758, [20], "nn", 2]
## ["early bone marrow rejection", 0.005613724689108758, [137, 47, 48, 20], "np", 1]
## ["colony", 0.0055879910160115814, [95], "np", 1]
## ["mixed lymphocyte reactions", 0.0055879910160115814, [115, 116, 117], "np", 1]
## ["long term engraftment", 0.005524287461998386, [152, 153, 56], "np", 1]
## ["anti-non-gal igg antibody levels", 0.005429339315789872, [28, 38, 23, 32], "np", 1]
## ["baboons", 0.005368697220438706, [36], "np", 3]
## ["increase", 0.005356239129979068, [123], "nn", 3]
## ["porcine-specific pcr", 0.0053301573642717875, [93, 94], "np", 1]
## ["galt-ko pbmc", 0.005251189543072789, [9, 103], "np", 2]
## ["days", 0.005226272015402113, [99], "nns", 2]
## ["few days", 0.005226272015402113, [129, 99], "np", 1]
## ["thymocyte globulin", 0.005182433621659398, [79, 80], "np", 1]
## ["bone marrow aspirates", 0.004936176824307386, [47, 48, 100], "np", 1]
## ["pre-transplant", 0.004775583337865521, [33], "np", 3]
## ["700\\xa0cgy", 0.004633271426865554, [77], "np", 1]
## ["serum", 0.00457773520403939, [101], "np", 1]
## ["successful strategies", 0.0045584141816457854, [138, 139], "np", 1]
## ["pre-existing", 0.0043666343220573, [22], "nn", 1]
## ["using", 0.0042854040560253055, [90], "vbg", 3]
## ["tissues", 0.004229579038259492, [16], "nns", 1]
## ["other antigens", 0.004156151883297225, [26, 27], "np", 1]
## ["rituximab and bortezomib", 0.0041220928292107105, [84, 0, 85], "np", 1]
## ["lack", 0.004100241652999701, [4], "nn", 2]
## ["hyperacute", 0.0040773362362084625, [19], "nn", 1]
## ["pig-to-baboon", 0.00405453005708657, [46], "nn", 2]
## ["facs", 0.004053629704769318, [110], "np", 2]
## ["anti-non-gal igg development", 0.003925090094089414, [28, 38, 1], "np", 1]
## ["mechanisms", 0.0038830693833346794, [148], "nns", 1]
## ["gal epitope", 0.0038572860130719395, [24, 25], "np", 1]
## ["development", 0.0037206947646260635, [1], "nn", 4]
## ["suggest", 0.003647051439659811, [55], "vbp", 2]
## ["subsequent baboons", 0.0035791314802924704, [124, 36], "np", 1]
## ["associated", 0.003568404393669542, [122], "vbn", 2]
## ["transferase", 0.00344077971966972, [8], "nn", 1]
## ["anti-pig cellular responses", 0.003372694026763056, [112, 113, 114], "np", 1]
## ["same regimen", 0.003362997454589551, [51, 42], "np", 1]
## ["showed", 0.00331759313334249, [52], "vbd", 3]
## ["livers", 0.00331190274959894, [91], "nns", 1]
## ["engraft", 0.0032716606376440543, [44], "np", 2]
## ["organs", 0.003252865120027248, [15], "nns", 1]
## ["avoiding", 0.0031772001273580068, [18], "vbg", 1]
## ["irradiation", 0.0031401378132850074, [74], "nn", 2]
## ["thymic irradiation", 0.0031401378132850074, [76, 74], "np", 1]
## ["cytometry", 0.003124602817495522, [105], "nn", 1]
## ["immune", 0.003099270404069093, [145], "nn", 1]
## ["progenitors", 0.0030194279366508184, [53], "nns", 1]
## ["forming", 0.00300857725703402, [96], "vbg", 1]
## ["hyporesponsiveness", 0.0030015347696688848, [59], "nn", 2]
## ["cellular hyporesponsiveness", 0.0030015347696688848, [113, 59], "np", 1]
## ["gal", 0.002916298549454395, [24], "np", 1]
## ["regimen", 0.002850295244385575, [42], "nn", 3]
## ["reactions", 0.0027939955080057907, [117], "nns", 1]
## ["have", 0.0027915725788409816, [10], "vbp", 2]
## ["facilitated", 0.0027781278902852796, [11], "vbn", 1]
## ["evidence", 0.0027739083608650113, [57], "np", 1]
## ["treatment", 0.0027634358903628874, [87], "nn", 1]
## ["flow", 0.002757222797754126, [104], "nn", 1]
## ["alpha", 0.002755341327796325, [6], "nn", 1]
## ["pcr", 0.0026650786821358938, [94], "nn", 1]
## ["binding", 0.0026650786821358938, [102], "vbg", 1]
## ["xenogeneic", 0.0026602132901830046, [12], "nn", 1]
## ["involved", 0.0026449400061074515, [136], "vbn", 1]
## ["pbmc", 0.0026255947715363944, [103], "nnp", 1]
## ["globulin", 0.002591216810829699, [80], "nn", 1]
## ["selecting", 0.0025546953869046676, [64], "vbg", 2]
## ["baboon-specific", 0.0025076137223822635, [111], "nn", 1]
## ["velcade", 0.002440644285586526, [86], "nnp", 1]
## ["expression", 0.0024312388226921393, [5], "nn", 1]
## ["combating", 0.002362027172405099, [140], "vbg", 1]
## ["conditioned", 0.002336076336226733, [39], "vbn", 2]
## ["cells", 0.0022437889034017857, [68], "nns", 1]
## ["aspirates", 0.0022437889034017857, [100], "nns", 1]
## ["recipients", 0.002231012336327477, [65], "nns", 3]
## ["recipient weight", 0.002231012336327477, [65, 69], "np", 1]
## ["hypothesis", 0.002194975092938351, [63], "nn", 1]
## ["compatibility", 0.0020998099934317524, [147], "nn", 1]
## ["antigens", 0.0020780759416486124, [27], "nns", 1]
## ["found", 0.002076112694923989, [30], "vbn", 2]
## ["bortezomib", 0.0020610464146053552, [85], "nnp", 1]
## ["term", 0.0020561766556330408, [153], "nn", 1]
## ["body", 0.001997903461014818, [73], "nn", 1]
## ["achieve", 0.0019558956272194583, [151], "vb", 1]
## ["epitope", 0.0019286430065359697, [25], "nn", 1]
## ["conditioning regimen", 0.0019001968295903832, [41, 42], "np", 1]
## ["ensure", 0.00188551674693394, [131], "vb", 1]
## ["received", 0.0018850978046258064, [67], "vbd", 3]
## ["varying", 0.0018734653403589562, [31], "vbg", 1]
## ["galactosyl", 0.001859861692474501, [7], "nn", 1]
## ["thymocyte", 0.0018184816235827877, [79], "nn", 1]
## ["did", 0.0017830500007326735, [130], "vbd", 1]
## ["investigate", 0.0017718847176174035, [62], "vb", 1]
## ["extra-corporeal immunoadsorption", 0.0017678779993414344, [88, 89], "np", 1]
## ["allow", 0.0017605572046914648, [141], "vb", 1]
## ["donor-specific", 0.0017482308528969572, [58], "nn", 1]
## ["measured", 0.0017448466493150791, [109], "vbn", 1]
## ["conditioning", 0.001723737069192804, [41], "nn", 2]
## ["non-myeloablative", 0.0016814987272947754, [40], "nn", 1]
## ["strategies", 0.0016574445968852841, [139], "nns", 1]
## ["improve", 0.0016569000659714763, [66], "vb", 1]
## ["responses", 0.0016320983117784436, [114], "nns", 1]
## ["seen", 0.0016277570531285331, [128], "vbn", 1]
## ["macrochimerism", 0.001611758537759373, [127], "np", 1]
## ["peripheral macro-chimerism", 0.001611758537759373, [107, 108], "np", 1]
## ["addressed", 0.0015779923467293799, [150], "vbn", 1]
## ["failed", 0.0015395513091008685, [43], "vbd", 1]
## ["consisting", 0.0015395513091008685, [70], "vbg", 1]
## ["pre", 0.0015182337061987498, [125], "nn", 1]
## ["cellular", 0.0013949401212865723, [113], "nn", 1]
## ["observation", 0.0013775793453862806, [60], "np", 1]
## ["results", 0.0013775793453862806, [133], "np", 1]
## ["existing", 0.0013340243311133566, [126], "vbg", 1]
## ["modified", 0.0013215589794041743, [2], "vbn", 1]
## ["led", 0.001274237653625871, [61], "vbd", 1]
## ["rituximab", 0.0012374388052191633, [84], "np", 1]
## ["weight", 0.0010680808153894804, [69], "nn", 1]
## ["sera", 0.000959406668965877, [34], "nn", 1]
## ["demonstrated", 0.0009169874113474707, [120], "vbd", 1]
## ["immunoadsorption", 0.0008839389996707172, [89], "nn", 1]
## ["macro-chimerism", 0.0008058792688796865, [108], "nn", 1]
## ["addition", 0.0006887896726931403, [83], "nn", 1]
## ["peripheral", 0.0006887896726931403, [107], "nnp", 1]
## ["failure", 0.0006887896726931403, [132], "nn", 1]
## ["need", 0.0006887896726931403, [149], "vb", 1]
## ["2010", 0.0, [0], "np", 2]

Visualize Word Graph Networks

import pytextrank
import networkx as nx
import pylab as plt

path_stage1 = "data/o1.json"

#Visualize Network
#graph, ranks = pytextrank.text_rank(path_stage1)
#nx.draw(graph, with_labels=True)
#plt.show()



Stage 3

import pytextrank

path_stage1 = "data/o1.json"
path_stage2 = "data/o2.json"

#Stage 3
path_stage3 = "data/o3.json"
kernel = pytextrank.rank_kernel(path_stage2)

with open(path_stage3, 'w') as f:
    for s in pytextrank.top_sentences(kernel, path_stage1):
        f.write(pytextrank.pretty_print(s._asdict()))
        f.write("\n")
        print(pytextrank.pretty_print(s._asdict()))
## {"dist": 0.04237277961916629, "idx": 3, "text": "Two baboons with low levels of pre-transplant anti-non-Gal IgG , conditioned with the same regimen , showed porcine bone marrow progenitors at 28\\xa0days following transplantation , suggesting engraftment ."}
## {"dist": 0.039406144097468364, "idx": 12, "text": "Three subsequent baboons with similarly low levels of pre - existing anti-non-Gal IgG showed no engraftment and an increase in anti-non-Gal IgG antibody levels following transplantation ."}
## {"dist": 0.03829803426218381, "idx": 5, "text": "This observation led us to investigate the hypothesis that selecting for baboon recipients with low pre-transplant anti-non-Gal IgG levels might improve engraftment levels following GalT-KO pig-to-baboon bone marrow transplantation . ' , ' Five baboons , with low pre-transplant anti-non-Gal IgG levels , received transplantation of bone marrow cells ( 1-5\\xa0\u00d7\\xa010(9 ) /kg of recipient weight ) from GalT-KO pigs ."}
## {"dist": 0.0367332846868216, "idx": 13, "text": "Peripheral macrochimerism was only seen for a few days following transplantation regardless of antibody development . ' , ' Selecting for baboon recipients with low levels of pre-transplant anti-non-Gal IgG did not ensure bone marrow engraftment ."}
## {"dist": 0.035933761870284915, "idx": 2, "text": "We have previously found that baboons with high levels of pre-transplant anti-non-Gal IgG , conditioned with a non-myeloablative conditioning regimen , failed to engraft following pig-to-baboon bone marrow transplantation ( Xenotransplantation , 17 , 2010 and 300 ) ."}
## {"dist": 0.03442086823589334, "idx": 14, "text": "Failure to engraft was associated with an increase in anti-non-Gal IgG levels following transplantation ."}
## {"dist": 0.033570204513693754, "idx": 11, "text": "Engraftment was associated with lack of an increase in anti-non-Gal IgG levels as well as cellular hyporesponsiveness toward pig ."}
## {"dist": 0.027933646603512163, "idx": 8, "text": "Bone marrow engraftment was assessed by porcine-specific PCR on colony forming units ( CFU ) of day 28 bone marrow aspirates ."}
## {"dist": 0.027641470233678328, "idx": 1, "text": "However , antibodies against other antigens ( anti-non-Gal antibodies ) , are found at varying levels in the pre-transplant sera of most primates ."}
## {"dist": 0.026693457480180618, "idx": 10, "text": "Peripheral macro-chimerism was measured by FACS using pig and baboon-specific antibodies and baboon anti-pig cellular responses were assessed by mixed lymphocyte reactions ( MLR ) . ' , ' As previously reported , two of five baboons demonstrated detectable bone marrow engraftment at 4\\xa0weeks after transplantation ."}
## {"dist": 0.026115121605505193, "idx": 9, "text": "Anti-non-Gal antibody levels were assessed by serum binding toward GalT-KO PBMC using flow cytometry ( FACS ) ."}
## {"dist": 0.025770635264117357, "idx": 15, "text": "These results suggest that anti-non-Gal-IgG is likely involved in early bone marrow rejection and that successful strategies for combating anti-non-Gal IgG development may allow better engraftment ."}
## {"dist": 0.019052642142826214, "idx": 0, "text": "The development of genetically modified pigs , which lack the expression of alpha 1-3 galactosyl transferase , ( GalT-KO pigs ) has facilitated the xenogeneic transplantation of porcine organs and tissues into primates by avoiding hyperacute rejection due to pre-existing antibodies against the Gal epitope ."}
## {"dist": 0.015513783573889927, "idx": 16, "text": "Since engraftment was only low and transient regardless of antibody development , innate immune , or species compatibility mechanisms will likely also need to be addressed to achieve long term engraftment ."}
## {"dist": 0.012420271257558829, "idx": 7, "text": "In addition , two baboons received Rituximab and Bortezomib ( Velcade ) treatment as well as extra-corporeal immunoadsorption using GalT-KO pig livers ."}
## {"dist": 0.008981084787826625, "idx": 4, "text": "These baboons also showed evidence of donor-specific hyporesponsiveness ."}
## {"dist": 0.006365404863360709, "idx": 6, "text": "They received a non-myeloablative conditioning regimen consisting of low-dose total body irradiation ( TBI ) ( 150\\xa0cGy ) , thymic irradiation ( 700\\xa0cGy ) , anti - thymocyte globulin ( ATG ) , and tacrolimus ."}

Stage 4

import pytextrank

path_stage2 = "data/o2.json"
path_stage3 = "data/o3.json"


#Stage 4
phrases = ", ".join(set([p for p in pytextrank.limit_keyphrases(path_stage2, phrase_limit=25)]))
sent_iter = sorted(pytextrank.limit_sentences(path_stage3, word_limit=50), key=lambda x: x[1])
s = []
for sent_text, idx in sent_iter:
      s.append(pytextrank.make_sentence(sent_text))
      graf_text = " ".join(s)
      print("**excerpts:** %s\n\n**keywords:** %s" % (graf_text, phrases,))

      print(phrases)
## **excerpts:** Two baboons with low levels of pre-transplant anti-non-Gal IgG, conditioned with the same regimen, showed porcine bone marrow progenitors at 28\xa0days following transplantation, suggesting engraftment.
## 
## **keywords:** xenogeneic transplantation, levels, most primates, antibody, a few days, galt-ko pig-to-baboon bone marrow transplantation, marrow, low pre-transplant anti-non-gal igg levels, high levels, engraftment levels, low levels, transplantation, baboon recipients, anti-non-gal igg levels, anti-non-gal antibodies, engraftment, hyperacute rejection, porcine bone marrow progenitors, pig-to-baboon bone marrow transplantation, bone marrow engraftment, galt-ko pigs, galt-ko, pre-transplant sera, igg, bone, pre-existing antibodies
## xenogeneic transplantation, levels, most primates, antibody, a few days, galt-ko pig-to-baboon bone marrow transplantation, marrow, low pre-transplant anti-non-gal igg levels, high levels, engraftment levels, low levels, transplantation, baboon recipients, anti-non-gal igg levels, anti-non-gal antibodies, engraftment, hyperacute rejection, porcine bone marrow progenitors, pig-to-baboon bone marrow transplantation, bone marrow engraftment, galt-ko pigs, galt-ko, pre-transplant sera, igg, bone, pre-existing antibodies

NEXT STEPS

  1. Turn this workbook into a pytextrank function.
  2. Load outfile.json into python dictionary, use additional processing to format and remove bad characters.
  3. Use apply function to iterate pytextrank function over each json document.
  4. Use Keywords produced from STAGE 4 to train and predict on the model.
    • Sort by frequency and Class
    • Test
    • Evaluate the model

Full project posted here: Jerome’s github repo