05/31/2020##r chunk
library(reticulate)
##python chunk
import nltk
import spacy
from spacy import displacy
from __future__ import unicode_literals, print_function
import plac
import random
from pathlib import Path
grammar1 from the lecture notes to account for the following sentences:
grammar1 = nltk.CFG.fromstring("""
S -> NP VP
VP -> V NP | V NP PP | V PP |V PP PP
PP -> P NP
V -> "saw" | "ate" | "walked"
NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
Det -> "a" | "an" | "the" | "my" | "The"
N -> "man" | "dog" | "cat" | "telescope" | "park" | "food"
P -> "in" | "on" | "by" | "with"
""")
RecursiveDescentParser and ShiftReduceParser.RD_parser = nltk.RecursiveDescentParser(grammar1)
SR_parser = nltk.ShiftReduceParser(grammar1)
Input = 'The dog ate the food'.split()
print(Input)
## ['The', 'dog', 'ate', 'the', 'food']
for tree in RD_parser.parse(Input):
print(tree)
## (S (NP (Det The) (N dog)) (VP (V ate) (NP (Det the) (N food))))
for tree in SR_parser.parse(Input):
print(tree)
## (S (NP (Det The) (N dog)) (VP (V ate) (NP (Det the) (N food))))
Input2 = 'The dog walked by the cat in the park'.split()
print(Input2)
## ['The', 'dog', 'walked', 'by', 'the', 'cat', 'in', 'the', 'park']
for tree in RD_parser.parse(Input2):
print(tree)
## (S
## (NP (Det The) (N dog))
## (VP
## (V walked)
## (PP
## (P by)
## (NP (Det the) (N cat) (PP (P in) (NP (Det the) (N park)))))))
## (S
## (NP (Det The) (N dog))
## (VP
## (V walked)
## (PP (P by) (NP (Det the) (N cat)))
## (PP (P in) (NP (Det the) (N park)))))
for tree in RD_parser.parse(Input2):
print(tree)
## (S
## (NP (Det The) (N dog))
## (VP
## (V walked)
## (PP
## (P by)
## (NP (Det the) (N cat) (PP (P in) (NP (Det the) (N park)))))))
## (S
## (NP (Det The) (N dog))
## (VP
## (V walked)
## (PP (P by) (NP (Det the) (N cat)))
## (PP (P in) (NP (Det the) (N park)))))
for tree in SR_parser.parse(Input2):
print(tree)
##Data
TRAIN_DATA = [
#sentence
("WHAT ARE Kerin Pccord LOOKING AT?",
{
'heads': [4, 0, 1, 4, 5, 5, 6],
#Types
'deps': ['nsubj', 'cc', 'det', 'compound', 'nsubj', 'ROOT', 'punct']
}
),
#sentence2
("CHECKING IS READY FOR ALL THOSE GUESTS NOW.",
{
'heads': [3, 0, 3, 5, 5, 6, 6, 5, 5],
#Types
'deps': ['nsubj', 'cc', 'verb', 'nmod', 'conj', 'ROOT', 'compound', 'nsubj', 'punct']
}
)
]
nlp = spacy.blank('en')
parser = nlp.create_pipe('parser')
nlp.add_pipe(parser, first=True)
for _, annotations in TRAIN_DATA:
for dep in annotations.get('deps', []):
parser.add_label(dep)
optimizer = nlp.begin_training()
n_iter = 30
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update([text], [annotations], sgd=optimizer, losses=losses)
print(losses)
## {'parser': 8.134955611079931}
## {'parser': 7.991602323949337}
## {'parser': 7.793125256896019}
## {'parser': 6.795093823224306}
## {'parser': 5.666830036789179}
## {'parser': 5.226629305630922}
## {'parser': 6.708837806072552}
## {'parser': 5.634589904831955}
## {'parser': 5.114576845895499}
## {'parser': 3.7603552383370697}
## {'parser': 3.1893526348867454}
## {'parser': 2.8198586363287177}
## {'parser': 2.021109055400302}
## {'parser': 1.2504578033622238}
## {'parser': 0.6491885765735788}
## {'parser': 0.43006787499462007}
## {'parser': 0.02420258096071848}
## {'parser': 0.00584799695292304}
## {'parser': 0.0015191255707414086}
## {'parser': 0.0005105559188849362}
## {'parser': 5.535496564101905e-05}
## {'parser': 2.7150608380266394e-05}
## {'parser': 1.7168184453311697e-05}
## {'parser': 3.5332352368230846e-06}
## {'parser': 2.055418188422198e-06}
## {'parser': 7.172803732273612e-07}
## {'parser': 1.8336525899878206e-07}
## {'parser': 8.541792580196717e-08}
## {'parser': 4.434963011662159e-08}
## {'parser': 3.2653880947829984e-08}
##Test your dependency model on a similar tweet.
test_text = "WE enjoy making the cakes."
doc = nlp(test_text)
print('Dependencies', [(p.text, p.dep_, p.head.text) for p in doc])
## Dependencies [('WE', 'ROOT', 'WE'), ('enjoy', 'verb', 'making'), ('making', 'cc', 'WE'), ('the', 'conj', 'cakes'), ('cakes', 'cc', 'WE'), ('.', 'cc', 'cakes')]
displacy.render(doc,
options={'distance': 30,
'arrow_stroke': 2,
'arrow_width': 6})
## '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="f40c29a9497f4298af64d140cffc7d0c-0" class="displacy" width="200" height="182.0" direction="ltr" style="max-width: none; height: 182.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="92.0">\n <tspan class="displacy-word" fill="currentColor" x="50">WE</tspan>\n <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50"></tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="92.0">\n <tspan class="displacy-word" fill="currentColor" x="80">enjoy</tspan>\n <tspan class="displacy-tag" dy="2em" fill="currentColor" x="80"></tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="92.0">\n <tspan class="displacy-word" fill="currentColor" x="110">making</tspan>\n <tspan class="displacy-tag" dy="2em" fill="currentColor" x="110"></tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="92.0">\n <tspan class="displacy-word" fill="currentColor" x="140">the</tspan>\n <tspan class="displacy-tag" dy="2em" fill="currentColor" x="140"></tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="92.0">\n <tspan class="displacy-word" fill="currentColor" x="170">cakes.</tspan>\n <tspan class="displacy-tag" dy="2em" fill="currentColor" x="170"></tspan>\n</text>\n\n<g class="displacy-arrow">\n <path class="displacy-arc" id="arrow-f40c29a9497f4298af64d140cffc7d0c-0-0" stroke-width="2px" d="M100,47.0 C100,32.0 100.0,32.0 100.0,47.0" fill="none" stroke="currentColor"/>\n <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n <textPath xlink:href="#arrow-f40c29a9497f4298af64d140cffc7d0c-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">verb</textPath>\n </text>\n <path class="displacy-arrowhead" d="M100,49.0 L96,41.0 104,41.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n <path class="displacy-arc" id="arrow-f40c29a9497f4298af64d140cffc7d0c-0-1" stroke-width="2px" d="M70,47.0 C70,17.0 105.0,17.0 105.0,47.0" fill="none" stroke="currentColor"/>\n <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n <textPath xlink:href="#arrow-f40c29a9497f4298af64d140cffc7d0c-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">cc</textPath>\n </text>\n <path class="displacy-arrowhead" d="M105.0,49.0 L109.0,41.0 101.0,41.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n <path class="displacy-arc" id="arrow-f40c29a9497f4298af64d140cffc7d0c-0-2" stroke-width="2px" d="M160,47.0 C160,32.0 160.0,32.0 160.0,47.0" fill="none" stroke="currentColor"/>\n <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n <textPath xlink:href="#arrow-f40c29a9497f4298af64d140cffc7d0c-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">conj</textPath>\n </text>\n <path class="displacy-arrowhead" d="M160,49.0 L156,41.0 164,41.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n <path class="displacy-arc" id="arrow-f40c29a9497f4298af64d140cffc7d0c-0-3" stroke-width="2px" d="M70,47.0 C70,2.0 170.0,2.0 170.0,47.0" fill="none" stroke="currentColor"/>\n <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n <textPath xlink:href="#arrow-f40c29a9497f4298af64d140cffc7d0c-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">cc</textPath>\n </text>\n <path class="displacy-arrowhead" d="M170.0,49.0 L174.0,41.0 166.0,41.0" fill="currentColor"/>\n</g>\n</svg>'