The following document describes the feature extraction process of feature set F5. Since the extraction code was written in Python the following example presents the Python code in text. In order to insert the Python coe it was put was a comment. Regular expressions were used to extract feature set F5. In order to protect the copyright of the provided REGEX the following code is only exemplary:
In case a given sentence matches the REGEX pattern, “TRUE” is returned for this sentence (see below)
\(Sentence\) MATCHES “(SAMPLE_REGULAR_EXPRESSION).*“=>TRUE
nrRegex = 7
#def negative_match(line, regex):
#print line
#pattern = re.compile(regex)
#m = pattern.match(line)
#if m:
#print ("negative match") #, m.groups()
#return 1
#else:
#return 0
#def positive_match(line, regex):
# type: (object, object) -> object
#print line
#pattern = re.compile(regex)
#m = pattern.match(line)
#if m:
#print ("positive match") #, m.groups()
#return 1
#else:
#return 0
#def match(line, totalResult):
#result = [0] * nrRegex
#result[0] = positive_match(line, r".*(SAMPLE REGEX).*")
#result[1] = positive_match(line, r".*(SAMPLE REGEX).*")
#result[2] = negative_match(line, r".*(SAMPLE REGEX).*")
#result[3] = negative_match(line, r".*(SAMPLE REGEX).*")
#result[4] = negative_match(line, r".*(SAMPLE REGEX).*")
#result[5] = negative_match(line, r".*(SAMPLE REGEX).*")
#result[6] = negative_match(line, r".*(SAMPLE REGEX).*")
#for i in range(0,nrRegex,1):
#totalResult[i] += result[i]
#return (result, totalResult)
#all_files = fm.load('resources') # fm.load('resources','w') will open in write mode
#all_files = ['file.csv']
#outputFile = 'result.csv'
#def readCSV(fileName, outputFile, totalResult):
#with open(fileName, 'r') as csvfile:
#readCSV = csv.reader(csvfile, delimiter=',')
#for row in readCSV:
#id = row[0]
#nr = row[1]
#text = row[2]
#(result, totalResult) = match(text, totalResult)
#outputFile.writerow((id, nr, "", result[0], result[1], result[2], result[3] , result[4], result[5], result[6]))
#return totalResult
#with open(outputFile, 'w') as csvOutFile:
#writer = csv.writer(csvOutFile, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
#writer.writerow(('nr','id','text','p1','p2','n1','n2','n3','n4','n5'))
#totalResult = [0] * nrRegex
#for f in all_files:
#if (-1 == f.find(".csv")):
#readNonCSV(f, writer, totalResult)
#else:
#totalResult = readCSV('resources/' + f, writer, totalResult)
#write the total results
# writer.writerow(('-1', '-1', '', totalResult[0], totalResult[1], totalResult[2], totalResult[3], totalResult[4], #totalResult[5], totalResult[6]))