In order to define those words not identified as boosted or attenuated the subset including all words apart from activation and negation tagged words was parsed using the Stanford Grammatical Dependency Parser. Again, the following code was written in Python and therefore the following chunk includes the Python code as a comment only.
#import os
#import numpy as np
#import pandas as pd
#nltk.download()
#os.chdir("/Users/lisaherzog/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Feature Set3/Input")
DATA PREPROCESSING
#Data = pd.read_excel('6. POS Set.xlsx')
#1. Importing NLTK
#import nltk
#from nltk.corpus import stopwords
#from nltk.tokenize import word_tokenize, sent_tokenize
#stop_words = set(stopwords.words('english'))
CONVERT DATAFRAME TO LIST
#Text =Data['POS_Text'].tolist()
TAGGING
The POS was performed using the Stanford Parser and stored as “TaggedText”
#TaggedText = [None]*1000
#for i in range(0,1000):
#txt = Text[i]
#tokenized = sent_tokenize(txt)
#for j in tokenized:
#wordList = nltk.word_tokenize(j)
#wordList= [w for w in wordList if not w in stop_words]
#tagged=nltk.pos_tag(wordList)
#TaggedText[i]=tagged
#print(tokenized)
#tokenized = sent_tokenize(txt)
#tokenized2 = sent_tokenize(Test)
#for i in tokenized:
#wordList = nltk.word_tokenize(i)
#wordList = [w for w in wordList if not w in stop_words]
#tagged= nltk.pos_tag(wordList)
#print(tagged)