Libraries / R Setup

library(reticulate)
library(wordnet)
library(qdap)
## Loading required package: qdapDictionaries
## Loading required package: qdapRegex
## Loading required package: qdapTools
## Loading required package: RColorBrewer
## Registered S3 methods overwritten by 'qdap':
##   method               from
##   t.DocumentTermMatrix tm  
##   t.TermDocumentMatrix tm
## 
## Attaching package: 'qdap'
## The following object is masked from 'package:wordnet':
## 
##     synonyms
## The following object is masked from 'package:base':
## 
##     Filter
##python chunk
from nltk.corpus import wordnet as wn
import pandas as pd
from nltk.corpus import wordnet_ic
import spacy

Synsets

##python chunk
basement_sets_py = wn.synsets("basement")
print(basement_sets_py)
## [Synset('basement.n.01'), Synset('basement.n.02')]
basement_df = pd.DataFrame([
  {"Synset": each_synset, 
  "Part of Speech": each_synset.pos(), 
  "Definition": each_synset.definition(), 
  "Lemmas": each_synset.lemma_names(), 
  "Examples": each_synset.examples()} 
  for each_synset in basement_sets_py])

basement_df
##                     Synset Part of Speech  ...              Lemmas Examples
## 0  Synset('basement.n.01')              n  ...  [basement, cellar]       []
## 1  Synset('basement.n.02')              n  ...          [basement]       []
## 
## [2 rows x 5 columns]

Nyms

##python chunk
synset = wn.synsets("basement")
synset
## [Synset('basement.n.01'), Synset('basement.n.02')]
basement_syn = synset[0]

basement_syn.hypernyms()
## [Synset('floor.n.02')]
basement_syn.hyponyms()
## [Synset('cellarage.n.02')]

Similarity

##python chunk
cellar = wn.synsets("cellar")
cellar[0].definition()
## 'the lowermost portion of a structure partly or wholly below ground level; often used for storage'
cellar = cellar[0]

vault = wn.synsets("vault")
vault[0].definition()
## 'a burial chamber (usually underground)'
vault = vault[0]



semcor_ic = wordnet_ic.ic('ic-semcor.dat')
cellar.jcn_similarity(basement_syn, semcor_ic)
## 1e+300
basement_syn.jcn_similarity(vault, semcor_ic)
## 0.08578163163699828
cellar.lin_similarity(basement_syn, semcor_ic)
## 1.0
basement_syn.lin_similarity(vault, semcor_ic)
## 0.4034803086377192

NER Tagging

##python chunk
nlp = spacy.load("en_core_web_sm")
nlp = spacy.blank("en")
##python chunk
ner = nlp.create_pipe('ner')

nlp.add_pipe(ner, last=True)  
##python chunk
training_data = [

  (u"These Media Posts will serve as notification to the United States Congress that should Iran strike any U.S. person or target, the United States will quickly & fully strike back, & perhaps in a disproportionate manner. Such legal notice is not required, but is given nevertheless!", 
  {'entities': [ (87,91,'COUNTRY') ] }),
  
  (u"The United States just spent Two Trillion Dollars on Military Equipment. We are the biggest and by far the BEST in the World! If Iran attacks an American Base, or any American, we will be sending some of that brand new beautiful equipment their way...and without hesitation!", 
  {'entities': [ (0,17,'COUNTRY') ] }),
  
    (u"They attacked us, & we hit back. If they attack again, which I would strongly advise them not to do, we will hit them harder than they have ever been hit before! https://t.co/qI5RfWsSCH", 
  {'entities': [ (162,185,'URL') ] }),
  
      (u"Nobody but Donald Trump will save Israel. You are wasting your time with these politicians and political clowns. Best!", 
  {'entities': [ (11,23,'PERSON'),(34,40,'COUNTRY') ] }),
  
        (u"The talks between the U.S. and Iran are going on forever,  WORLD'S LONGEST NEGOTIATION. Obama has no idea what he is doing - incompetent!", 
  {'entities': [ (88,93,'PERSON')] })
]
##python chunk
nlp.entity.add_label('PERSON')
nlp.entity.add_label('COUNTRY')
nlp.entity.add_label('URL')
##python chunk
optimizer = nlp.begin_training()
import random

#run through training
for i in range(20):
    random.shuffle(training_data)
    for text, annotations in training_data:
        nlp.update([text], [annotations], sgd=optimizer)

Using your NER Tagger

##python chunk