e@0
|
1 #!/usr/bin/env python3
|
e@0
|
2 # -*- coding: utf-8 -*-
|
e@0
|
3 """
|
e@0
|
4 Created on Sun Apr 1 14:05:17 2018
|
e@0
|
5
|
e@0
|
6 @author: Emmanouil Theofanis Chourdakis
|
e@0
|
7 """
|
e@0
|
8
|
e@0
|
9 from pypeg2 import *
|
e@0
|
10 import re
|
e@0
|
11
|
e@0
|
12 def var_generator(T):
|
e@0
|
13 I = 0
|
e@0
|
14 while True:
|
e@0
|
15 I+=1
|
e@0
|
16 yield "{}{}".format(T, I)
|
e@0
|
17
|
e@0
|
18
|
e@0
|
19
|
e@0
|
20 def l_label_generator(T):
|
e@0
|
21 I = 0
|
e@0
|
22 while True:
|
e@0
|
23
|
e@0
|
24 I+=1
|
e@0
|
25 yield "<{}LINE{}>".format(T, I)
|
e@0
|
26
|
e@0
|
27 annot_var = re.compile("[A-Z][0-9]+")
|
e@0
|
28 annot_pos = re.compile("[0-9]+ [0-9]+(\;[0-9]+ [0-9]+)*")
|
e@0
|
29 annot_label = re.compile('[A-Za-z0-9_]+')
|
e@0
|
30 label_var_tuple = re.compile(r'[A-Za-z0-9_]+\:[A-Z][0-9]+')
|
e@0
|
31
|
e@0
|
32 class AnnotationType(Keyword):
|
e@0
|
33 grammar = Enum(K("Place"),
|
e@0
|
34 K("Character"),
|
e@0
|
35 K("Character_Line"),
|
e@0
|
36 K("Motion"),
|
e@0
|
37 K("Motion_Signal"),
|
e@0
|
38 K("Says"),
|
e@0
|
39 K("Spatial_Signal"))
|
e@0
|
40
|
e@0
|
41 class AttributeType(Keyword):
|
e@0
|
42 grammar = Enum(K("Age"), K("Gender"))
|
e@0
|
43
|
e@0
|
44 class AnnotationTuple:
|
e@0
|
45 grammar = attr('variable',annot_var),\
|
e@0
|
46 attr('type',AnnotationType),\
|
e@0
|
47 attr('idx',annot_pos),\
|
e@0
|
48 attr('annotation',restline)
|
e@0
|
49
|
e@0
|
50
|
e@0
|
51 class AttributeTuple:
|
e@0
|
52 grammar = attr('variable', annot_var),\
|
e@0
|
53 attr('type',AttributeType), \
|
e@0
|
54 attr('target', annot_var), \
|
e@0
|
55 attr('annotation', restline)
|
e@0
|
56
|
e@0
|
57 class VarArg:
|
e@0
|
58 grammar = attr('label', annot_label), ':', attr('target', annot_var)
|
e@0
|
59
|
e@0
|
60 class VarArgs(List):
|
e@0
|
61 grammar = some(VarArg)
|
e@0
|
62
|
e@0
|
63 class RelationTuple:
|
e@0
|
64 grammar = attr('variable', annot_var),\
|
e@0
|
65 attr('args', VarArgs)
|
e@0
|
66
|
e@0
|
67 class AnnotLine(List):
|
e@0
|
68 grammar = [AnnotationTuple, AttributeTuple, RelationTuple]
|
e@0
|
69
|
e@0
|
70 class AnnotationFile(List):
|
e@0
|
71 grammar = some(AnnotLine)
|
e@0
|
72
|
e@0
|
73 def get_tokens_by_label(label, sent_tokens, sent_labels):
|
e@0
|
74
|
e@0
|
75 tokens = []
|
e@0
|
76 blabel = "B-{}".format(label)
|
e@0
|
77 ilabel = 'I-{}'.format(label)
|
e@0
|
78
|
e@0
|
79 tok_ = []
|
e@0
|
80 for n,l in enumerate(sent_labels):
|
e@0
|
81 if l == blabel:
|
e@0
|
82 if len(tok_) > 0:
|
e@0
|
83 tokens.append(tok_)
|
e@0
|
84 tok_ = [sent_tokens[n]]
|
e@0
|
85 elif l == ilabel:
|
e@0
|
86 tok_.append(sent_tokens[n])
|
e@0
|
87 else:
|
e@0
|
88 if len(tok_)>0:
|
e@0
|
89 tokens.append(tok_)
|
e@0
|
90 tok_ = []
|
e@0
|
91 return tokens
|
e@0
|
92
|
e@0
|
93 def get_token_head(span):
|
e@0
|
94
|
e@0
|
95 span_idx = [tok.i for tok in span]
|
e@0
|
96 head = span[0]
|
e@0
|
97 while head.head.i in span_idx:
|
e@0
|
98 if head == head.head:
|
e@0
|
99 return head
|
e@0
|
100
|
e@0
|
101 head = head.head
|
e@0
|
102 return head
|
e@0
|
103
|
e@0
|
104
|
e@0
|
105 def get_min_dep_path(a, b, doc, LCA):
|
e@0
|
106
|
e@0
|
107 lca_idx = LCA[a,b]
|
e@0
|
108
|
e@0
|
109 if lca_idx == -1:
|
e@0
|
110 return "<UND>"
|
e@0
|
111
|
e@0
|
112 lca = doc[LCA[a, b]]
|
e@0
|
113
|
e@0
|
114 m_a = []
|
e@0
|
115 m_b = []
|
e@0
|
116
|
e@0
|
117 # From tra go up to the LCA.
|
e@0
|
118
|
e@0
|
119 tok = doc[a]
|
e@0
|
120 while tok != lca:
|
e@0
|
121 if tok.head != None:
|
e@0
|
122 m_a.append(('up', tok.dep_))
|
e@0
|
123 tok = tok.head
|
e@0
|
124
|
e@0
|
125 tok = doc[b]
|
e@0
|
126 while tok != lca:
|
e@0
|
127 if tok.head != None:
|
e@0
|
128 m_b.append(('down', tok.dep_))
|
e@0
|
129 tok = tok.head
|
e@0
|
130 m_b.reverse()
|
e@0
|
131
|
e@0
|
132 path = m_a + m_b
|
e@0
|
133
|
e@0
|
134 return "::".join("{}|{}".format(tup[0], tup[1]) for tup in path)
|
e@0
|
135 def get_dep_with_head(tok):
|
e@0
|
136 dep_ = []
|
e@0
|
137 while tok.head != tok:
|
e@0
|
138 dep_.append(tok.dep_)
|
e@0
|
139 tok = tok.head
|
e@0
|
140
|
e@0
|
141 if len(dep_) == 1:
|
e@0
|
142 return dep_[0], tok.lemma_
|
e@0
|
143 else:
|
e@0
|
144 return None, tok.lemma_
|
e@0
|
145
|
e@0
|
146 def var_generator(T):
|
e@0
|
147 I = 0
|
e@0
|
148 while True:
|
e@0
|
149 I+=1
|
e@0
|
150 yield "{}{}".format(T, I)
|
e@0
|
151
|
e@0
|
152 def get_dep_with_head(tok):
|
e@0
|
153 dep_ = []
|
e@0
|
154 while tok.head != tok:
|
e@0
|
155 dep_.append(tok.dep_)
|
e@0
|
156 tok = tok.head
|
e@0
|
157
|
e@0
|
158 if len(dep_) == 1:
|
e@0
|
159 return dep_[0], tok.lemma_
|
e@0
|
160 else:
|
e@0
|
161 return None, tok.lemma_
|
e@0
|
162
|
e@0
|
163 class Document:
|
e@0
|
164 def __init__(self, doc):
|
e@0
|
165
|
e@0
|
166 self.doc = doc
|
e@0
|
167 self.LCA = doc.get_lca_matrix()
|
e@0
|
168 self.text = doc.text
|
e@0
|
169 self.sentences = [str(s) for s in doc.sents]
|
e@0
|
170
|
e@0
|
171 self.tokens = []
|
e@0
|
172 self.token_sentences = []
|
e@0
|
173
|
e@0
|
174 self.relations = []
|
e@0
|
175
|
e@0
|
176 for m, sent in enumerate(doc.sents):
|
e@0
|
177 tlist = []
|
e@0
|
178 for n, tok in enumerate(sent):
|
e@0
|
179 token = Token(tok, doc, tok.i, sent, n)
|
e@0
|
180 tlist.append(token)
|
e@0
|
181 self.token_sentences.append(tlist)
|
e@0
|
182 self.tokens += tlist
|
e@0
|
183
|
e@0
|
184 def add_token(self, token, doc, doc_idx, sent, sent_idx, label='NONE'):
|
e@0
|
185 token = Token(token, doc, doc_idx, sent, sent_idx, label)
|
e@0
|
186 self.tokens.append(token)
|
e@0
|
187
|
e@0
|
188 def add_relation(self, trigger, arg1, arg2, label):
|
e@0
|
189 self.relations.append(Relation(arg1, arg2, trigger, self.LCA, label))
|
e@0
|
190
|
e@0
|
191 def find_tokens(self, start, end):
|
e@0
|
192 tokens = []
|
e@0
|
193 for tok in self.tokens:
|
e@0
|
194 if tok.start >= start and tok.end <= end:
|
e@0
|
195 tokens.append(tok)
|
e@0
|
196
|
e@0
|
197 return tokens
|
e@0
|
198
|
e@0
|
199 def assign_label_to_tokens(self, start, end, label):
|
e@0
|
200 tokens = self.find_tokens(start, end)
|
e@0
|
201 for n, token in enumerate(tokens):
|
e@0
|
202 if n == 0:
|
e@0
|
203 IOB = 'B'
|
e@0
|
204 else:
|
e@0
|
205 IOB = 'I'
|
e@0
|
206
|
e@0
|
207 token.set_label('{}-{}'.format(IOB, label))
|
e@0
|
208
|
e@0
|
209 def assign_label_to_tokens_by_matching_lemma(self, lemma, label):
|
e@0
|
210 for t in self.tokens:
|
e@0
|
211 if t.token.lemma_ == lemma:
|
e@0
|
212 t.label = 'B-{}'.format(label)
|
e@0
|
213
|
e@0
|
214 def assign_attribute_to_tokens(self, start, end, label, attribute):
|
e@0
|
215 tokens = self.find_tokens(start, end)
|
e@0
|
216 for n, token in enumerate(tokens):
|
e@0
|
217 token.set_attribute(label, attribute)
|
e@0
|
218
|
e@0
|
219 def get_token_features_labels(self):
|
e@0
|
220 features = []
|
e@0
|
221 labels = []
|
e@0
|
222
|
e@0
|
223 for sentence in self.token_sentences:
|
e@0
|
224 sentence_features = []
|
e@0
|
225 sentence_labels = []
|
e@0
|
226
|
e@0
|
227 for token in sentence:
|
e@0
|
228 sentence_features.append(token.get_feature_vector())
|
e@0
|
229 sentence_labels.append(token.label)
|
e@0
|
230
|
e@0
|
231 features.append(sentence_features)
|
e@0
|
232 labels.append(sentence_labels)
|
e@0
|
233
|
e@0
|
234 return features, labels
|
e@0
|
235
|
e@0
|
236 def get_token_features_attributes(self, label):
|
e@0
|
237 features = []
|
e@0
|
238 labels = []
|
e@0
|
239
|
e@0
|
240 for sentence in self.token_sentences:
|
e@0
|
241 sentence_features = []
|
e@0
|
242 sentence_labels = []
|
e@0
|
243
|
e@0
|
244 for token in sentence:
|
e@0
|
245 sentence_features.append(token.get_feature_vector())
|
e@0
|
246 if label in token.attributes:
|
e@0
|
247 sentence_labels.append(token.attributes[label])
|
e@0
|
248 else:
|
e@0
|
249 sentence_labels.append('O')
|
e@0
|
250
|
e@0
|
251 features.append(sentence_features)
|
e@0
|
252 labels.append(sentence_labels)
|
e@0
|
253
|
e@0
|
254 return features, labels
|
e@0
|
255
|
e@0
|
256 def get_gold_relation_feature_labels(self):
|
e@0
|
257 features = []
|
e@0
|
258 labels = []
|
e@0
|
259 for r in self.relations:
|
e@0
|
260 feat = r.get_feature_vector()
|
e@0
|
261 label = r.label
|
e@0
|
262
|
e@0
|
263 features.append(feat)
|
e@0
|
264 labels.append(label)
|
e@0
|
265
|
e@0
|
266 return features, labels
|
e@0
|
267
|
e@0
|
268 def get_candidate_relation_feature_labels(self):
|
e@0
|
269 features = []
|
e@0
|
270 labels = []
|
e@0
|
271
|
e@0
|
272 candidate_relations = self.get_candidate_relations()
|
e@0
|
273 for r in candidate_relations:
|
e@0
|
274 feat = r.get_feature_vector()
|
e@0
|
275 label = r.label
|
e@0
|
276
|
e@0
|
277 features.append(feat)
|
e@0
|
278 labels.append(label)
|
e@0
|
279
|
e@0
|
280 return features, labels
|
e@0
|
281
|
e@0
|
282
|
e@0
|
283 def get_tokens_with_label(self, label):
|
e@0
|
284
|
e@0
|
285 blabel = "B-{}".format(label)
|
e@0
|
286 ilabel = 'I-{}'.format(label)
|
e@0
|
287
|
e@0
|
288 tokens = []
|
e@0
|
289
|
e@0
|
290 for I in range(len(self.token_sentences)):
|
e@0
|
291 tokens_ = []
|
e@0
|
292 sent_tokens = self.token_sentences[I]
|
e@0
|
293 sent_labels = [t.label for t in sent_tokens]
|
e@0
|
294
|
e@0
|
295 tok_ = []
|
e@0
|
296 for n,l in enumerate(sent_labels):
|
e@0
|
297 if l == blabel:
|
e@0
|
298 if len(tok_) > 0:
|
e@0
|
299 tokens_.append(tok_)
|
e@0
|
300 tok_ = [sent_tokens[n]]
|
e@0
|
301 elif l == ilabel:
|
e@0
|
302 tok_.append(sent_tokens[n])
|
e@0
|
303 else:
|
e@0
|
304 if len(tok_)>0:
|
e@0
|
305 tokens_.append(tok_)
|
e@0
|
306 tok_ = []
|
e@0
|
307 tokens.append(tokens_)
|
e@0
|
308
|
e@0
|
309 return tokens
|
e@0
|
310
|
e@0
|
311 def get_candidate_relations(self):
|
e@0
|
312 candidate_relations = []
|
e@0
|
313
|
e@0
|
314 characters = self.get_tokens_with_label('Character')
|
e@0
|
315 places = self.get_tokens_with_label('Place')
|
e@0
|
316 spatial_signals = self.get_tokens_with_label('Spatial_Signal')
|
e@0
|
317 say_words = self.get_tokens_with_label('Says')
|
e@0
|
318 character_lines = self.get_tokens_with_label('Character_Line')
|
e@0
|
319
|
e@0
|
320 for I in range(len(spatial_signals)):
|
e@0
|
321 for sp in spatial_signals[I]:
|
e@0
|
322 for ch in characters[I]:
|
e@0
|
323 for pl in places[I]:
|
e@0
|
324 rel = Relation(ch, pl, sp, self.LCA)
|
e@0
|
325 candidate_relations.append(rel)
|
e@0
|
326
|
e@0
|
327 for I in range(len(say_words)):
|
e@0
|
328 for sw in say_words[I]:
|
e@0
|
329 for ch in characters[I]:
|
e@0
|
330 for cl in character_lines[I]:
|
e@0
|
331 rel = Relation(ch, cl, sw, self.LCA)
|
e@0
|
332 candidate_relations.append(rel)
|
e@0
|
333
|
e@0
|
334 for cr in candidate_relations:
|
e@0
|
335 for r in self.relations:
|
e@0
|
336 if cr == r:
|
e@0
|
337 cr.label = r.label
|
e@0
|
338
|
e@0
|
339 return candidate_relations
|
e@0
|
340
|
e@0
|
341 def predict_relations(self, model):
|
e@0
|
342 relations = self.get_candidate_relations()
|
e@0
|
343
|
e@0
|
344 for n, r in enumerate(relations):
|
e@0
|
345 f = r.get_feature_vector()
|
e@0
|
346 label = model.predict([f])[0]
|
e@0
|
347 if label != 'NONE':
|
e@0
|
348 r.label = label
|
e@0
|
349 self.relations.append(r)
|
e@0
|
350
|
e@0
|
351 def __str__(self):
|
e@0
|
352 return self.text
|
e@0
|
353
|
e@0
|
354 class Relation:
|
e@0
|
355 """ relation, has arg1, arg2, trigger as tokens, also label """
|
e@0
|
356 def __init__(self, arg1, arg2, trigger, lca, label='NONE'):
|
e@0
|
357 self.arg1 = arg1
|
e@0
|
358 self.arg2 = arg2
|
e@0
|
359 self.trigger = trigger
|
e@0
|
360 self.doc = trigger[0].doc
|
e@0
|
361 self.LCA = lca
|
e@0
|
362 self.label = label
|
e@0
|
363
|
e@0
|
364 def __repr__(self):
|
e@0
|
365 return "<{}| trigger: {}, arg1: {}, arg2: {}>".format(self.label, self.trigger, self.arg1, self.arg2)
|
e@0
|
366
|
e@0
|
367 def __eq__(self, other):
|
e@0
|
368 return all([self.arg1[n].text == other.arg1[n].text for n in range(min(len(self.arg1), len(other.arg1)))]) \
|
e@0
|
369 and all([self.arg2[n].text == other.arg2[n].text for n in range(min(len(self.arg2), len(other.arg2)))]) \
|
e@0
|
370 and all([self.trigger[n].text == other.trigger[n].text for n in range(min(len(self.trigger), len(other.trigger)))])
|
e@0
|
371
|
e@0
|
372 def get_feature_vector(self):
|
e@0
|
373 rf = {}
|
e@0
|
374
|
e@0
|
375 arg1 = get_token_head([t.token for t in self.arg1])
|
e@0
|
376 arg2 = get_token_head([t.token for t in self.arg2])
|
e@0
|
377 trigger = get_token_head([t.token for t in self.trigger])
|
e@0
|
378
|
e@0
|
379 arg1_type = self.arg1[0].label.replace('B-', '')
|
e@0
|
380 arg2_type = self.arg2[0].label.replace('B-', '')
|
e@0
|
381
|
e@0
|
382 rf['10'] = arg1_type+ '::'+ arg2_type
|
e@0
|
383
|
e@0
|
384 if trigger.i < arg1.i:
|
e@0
|
385 arg1_direction = 'right'
|
e@0
|
386 if trigger.i > arg1.i:
|
e@0
|
387 arg1_direction = 'left'
|
e@0
|
388
|
e@0
|
389 if trigger.i < arg2.i:
|
e@0
|
390 arg2_direction = 'right'
|
e@0
|
391 if trigger.i > arg2.i:
|
e@0
|
392 arg2_direction = 'left'
|
e@0
|
393
|
e@0
|
394 rf['12.1'] = arg1_direction
|
e@0
|
395 rf['12.2'] = arg2_direction
|
e@0
|
396 rf['13'] = arg1_direction+ '::'+ arg2_direction
|
e@0
|
397
|
e@0
|
398 rf['1'] = trigger.text.lower()
|
e@0
|
399 rf['2'] = trigger.lemma_
|
e@0
|
400 rf['3'] = trigger.pos_
|
e@0
|
401 rf['4'] = rf['2'] + '::' + rf['3']
|
e@0
|
402 rf['11'] = rf['10'] + '::' + rf['2']
|
e@0
|
403 rf['14'] = rf['13'] + '::' + rf['2']
|
e@0
|
404
|
e@0
|
405 # RF15
|
e@0
|
406
|
e@0
|
407 for i, token in enumerate([arg1, arg2]):
|
e@0
|
408 rf['5.{}'.format(i)] = token.text.lower()
|
e@0
|
409 rf['6.{}'.format(i)] = token.lemma_
|
e@0
|
410 rf['7.{}'.format(i)] = token.pos_
|
e@0
|
411 rf['8.{}'.format(i)] = token.lemma_ + '::' + token.pos_
|
e@0
|
412 rf['9.{}'.format(i)] = arg1_type
|
e@0
|
413 rf['17.{}'.format(i)] = get_min_dep_path(token.i, trigger.i, self.doc, self.LCA)
|
e@0
|
414 rf['20'] = len(rf['17.{}'.format(i)].split('::'))
|
e@0
|
415
|
e@0
|
416 rf['22.{}'.format(i)] = max(arg1.i, trigger.i) - min(arg1.i, trigger.i)
|
e@0
|
417
|
e@0
|
418
|
e@0
|
419
|
e@0
|
420
|
e@0
|
421 rf['18'] = rf['17.0'] + '::' + rf['17.1']
|
e@0
|
422
|
e@0
|
423 deppath = get_min_dep_path(arg1.i, arg2.i, self.doc, self.LCA)
|
e@0
|
424 rf['19'] = deppath
|
e@0
|
425 rf['23'] = rf['22.0'] + rf['22.1']
|
e@0
|
426
|
e@0
|
427 return rf
|
e@0
|
428
|
e@0
|
429 class Token:
|
e@0
|
430 """ Named entity, has doc, sent, doc_idx, sent_idx, and label """
|
e@0
|
431 def __init__(self, token, doc, doc_idx, sent, sent_idx, label='O'):
|
e@0
|
432 self.token = token
|
e@0
|
433 self.text = token.text
|
e@0
|
434 self.doc = doc
|
e@0
|
435 self.doc_idx = doc_idx
|
e@0
|
436 self.sent = sent
|
e@0
|
437 self.sent_idx = sent_idx
|
e@0
|
438 self.attributes = {}
|
e@0
|
439
|
e@0
|
440 self.label = label
|
e@0
|
441 self.start = self.token.idx
|
e@0
|
442 self.end = self.token.idx + len(self.token)
|
e@0
|
443
|
e@0
|
444 def __repr__(self):
|
e@0
|
445 return "[{} -> {}]".format(repr(self.token), self.label)
|
e@0
|
446
|
e@0
|
447 def set_label(self, label):
|
e@0
|
448 # print("Token {} label changed to {}".format(self.text, label))
|
e@0
|
449 self.label = label
|
e@0
|
450
|
e@0
|
451 def set_attribute(self, label, value):
|
e@0
|
452 self.attributes[label] = value
|
e@0
|
453
|
e@0
|
454 def get_feature_vector(self):
|
e@0
|
455
|
e@0
|
456 def find_ngrams(input_list, n):
|
e@0
|
457 return zip(*[input_list[i:] for i in range(n)])
|
e@0
|
458
|
e@0
|
459 # Stores featuer dictionary
|
e@0
|
460 feat_dict = {}
|
e@0
|
461
|
e@0
|
462 #1. Create token spans
|
e@0
|
463
|
e@0
|
464 # 5 token span
|
e@0
|
465 large_span = self.sent[max(0, self.sent_idx - 2):min(len(self.sent), self.sent_idx + 3)]
|
e@0
|
466
|
e@0
|
467 # 3 token span
|
e@0
|
468 short_span = self.sent[max(0, self.sent_idx - 1):min(len(self.sent), self.sent_idx + 2)]
|
e@0
|
469
|
e@0
|
470 for i, t in enumerate(large_span):
|
e@0
|
471 feat_dict['F.1_{}'.format(i)] = t.text
|
e@0
|
472 feat_dict['F.2_{}'.format(i)] = t.lemma_
|
e@0
|
473 feat_dict['F.3_{}'.format(i)] = t.pos_
|
e@0
|
474 feat_dict['F.4_{}'.format(i)] = t.ent_type_
|
e@0
|
475
|
e@0
|
476 for i, t in enumerate(short_span):
|
e@0
|
477 feat_dict['F.5_{}'.format(i)] = "::".join([t.lemma_, t.pos_])
|
e@0
|
478 feat_dict['F.6_{}'.format(i)] = "::".join([t.ent_type_, t.pos_])
|
e@0
|
479
|
e@0
|
480 ngrams = find_ngrams([t.pos_ for t in large_span], 2) # POS bigrams
|
e@0
|
481 for i, ng in enumerate(ngrams):
|
e@0
|
482 feat_dict['F.10_{}'.format(i)] = " ".join(ng)
|
e@0
|
483
|
e@0
|
484 ngrams = find_ngrams([t.text for t in short_span], 2) # Raw-string bigrams
|
e@0
|
485 for i, ng in enumerate(ngrams):
|
e@0
|
486 feat_dict['F.11_{}'.format(i)] = " ".join(ng)
|
e@0
|
487
|
e@0
|
488 # Get dependency with head if it exists
|
e@0
|
489 dirdep, headlemma = get_dep_with_head(self.token)
|
e@0
|
490 if dirdep is not None:
|
e@0
|
491 feat_dict['F.7'] = dirdep
|
e@0
|
492 feat_dict['F.8'] = "::".join([dirdep, headlemma])
|
e@0
|
493
|
e@0
|
494 # Get glove vector
|
e@0
|
495 vector = self.token.vector
|
e@0
|
496 for i in range(len(vector)):
|
e@0
|
497 feat_dict['F.9_{}'.format(i)] = vector[i]
|
e@0
|
498
|
e@0
|
499
|
e@0
|
500 return feat_dict
|
e@0
|
501
|
e@0
|
502 class Character:
|
e@0
|
503 """ Named Entity consisting of one or more tokens """
|
e@0
|
504 def __init__(self, name, age='none', gender='none'):
|
e@0
|
505 self.name = name
|
e@0
|
506 self.age = age
|
e@0
|
507 self.gender = gender
|
e@0
|
508
|
e@0
|
509 def __repr__(self):
|
e@0
|
510 return "<CHARACTER name='{}' age='{}' gender='{}'>".format(self.name,
|
e@0
|
511 self.age,
|
e@0
|
512 self.gender)
|
e@0
|
513
|
e@0
|
514 def __eq__(self, other):
|
e@0
|
515 return self.name.lower() == other.name.lower()
|
e@0
|
516
|
e@0
|
517 class Place:
|
e@0
|
518 """ Named Entity consisting of one or more tokens """
|
e@0
|
519 def __init__(self, name):
|
e@0
|
520 self.name = name
|
e@0
|
521
|
e@0
|
522 def __repr__(self):
|
e@0
|
523 return "<PLACE name='{}'>".format(self.name)
|
e@0
|
524
|
e@0
|
525 def __eq__(self, other):
|
e@0
|
526 return self.name.lower() == other.name.lower()
|
e@0
|
527
|
e@0
|
528 class Sayword:
|
e@0
|
529 """ Named Entity consisting of one or more tokens """
|
e@0
|
530 def __init__(self, name):
|
e@0
|
531 self.name = name
|
e@0
|
532
|
e@0
|
533 def __repr__(self):
|
e@0
|
534 return "<SAYWORD name='{}'>".format(self.name)
|
e@0
|
535
|
e@0
|
536 def __eq__(self, other):
|
e@0
|
537 return self.name.lower() == other.name.lower()
|
e@0
|
538
|
e@0
|
539 class CharacterLine:
|
e@0
|
540 """ Named Entity consisting of one or more tokens """
|
e@0
|
541 def __init__(self, name):
|
e@0
|
542 self.name = name
|
e@0
|
543
|
e@0
|
544 def __repr__(self):
|
e@0
|
545 return "<CHARACTER_LINE name='{}'>".format(self.name)
|
e@0
|
546
|
e@0
|
547 def __eq__(self, other):
|
e@0
|
548 return self.name.lower() == other.name.lower()
|
e@0
|
549
|
e@0
|
550 class SpatialSignal:
|
e@0
|
551 """ Named Entity consisting of one or more tokens """
|
e@0
|
552 def __init__(self, name):
|
e@0
|
553 self.name = name
|
e@0
|
554
|
e@0
|
555 def __repr__(self):
|
e@0
|
556 return "<SPATIAL_SIGNAL name='{}'>".format(self.name)
|
e@0
|
557
|
e@0
|
558 def __eq__(self, other):
|
e@0
|
559 return self.name.lower() == other.name.lower()
|
e@0
|
560
|
e@0
|
561
|
e@0
|
562
|
e@0
|
563 |