from-my-pen-to-your-ears-supplementary-material: demo/text2ann.py~ annotate

annotate demo/text2ann.py~ @ 1:eb3b846ae0ef tip

second commit

author	Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date	Wed, 16 May 2018 18:13:41 +0100
parents	4dad87badb0c
children

rev	line source
e@0	1 #!/usr/bin/env python3
e@0	2 # -- coding: utf-8 --
e@0	3 """
e@0	4 Created on Sat Apr 28 14:17:15 2018
e@0	5
e@0	6 @author: Emmanouil Theofanis Chourdakis
e@0	7
e@0	8 Takes a .txt story and annotates it based on:
e@0	9
e@0	10 characters,
e@0	11 places,
e@0	12 saywords,
e@0	13 character_lines,
e@0	14 spatial_indicators,
e@0	15
e@0	16 @output:
e@0	17 .ann file with the same name
e@0	18
e@0	19 """
e@0	20
e@0	21 import argparse
e@0	22 from sklearn.externals import joblib
e@0	23 import ner
e@0	24 import spacy
e@0	25
e@0	26
e@0	27 def annotate(text, model, character_lut, saywords_lut):
e@0	28 """
e@0	29 Function which annotates entities in text
e@0	30 using the model in "model"
e@0	31 """
e@0	32
e@0	33 # Create document from text
e@0	34 nlp = spacy.load('en')
e@0	35 doc = nlp(text)
e@0	36
e@0	37 # Parse using LUTs
e@0	38
e@0	39 # *- Characters
e@0	40
e@0	41
e@0	42
e@0	43
e@0	44
e@0	45
e@0	46
e@0	47 if __name__=="__main__":
e@0	48 argparser = argparse.ArgumentParser()
e@0	49 argparser.add_argument('input_path', help='.txt file to parse')
e@0	50 argparser.add_argument('model_path', help='.pkl file containing model')
e@0	51 argparser.add_argument('--say-lut', help='.txt file with list of saywords')
e@0	52 argparser.add_argument('--char-lut', help='.txt file with known characters')
e@0	53 argparser.add_argument('--place-lut', help='.txt file with known places')
e@0	54
e@0	55 args = argparser.parse_args()
e@0	56
e@0	57 # Load text file
e@0	58 with open(args.input_path) as f:
e@0	59 text = f.read()
e@0	60
e@0	61 # Load model file
e@0	62 model = joblib.load(args.model_path)
e@0	63
e@0	64 # Load saywords
e@0	65 if args.say_lut:
e@0	66 saylut_path = args.say_lut
e@0	67 else:
e@0	68 saylut_path = 'saywords.txt'
e@0	69
e@0	70 with open(saylut_path) as f:
e@0	71 saylut = [s for s in f.read().split('\n') if s.strip() != '']
e@0	72
e@0	73 # Load places LUT
e@0	74 if args.place_lut:
e@0	75 placelut_path = args.place_lut
e@0	76 else:
e@0	77 placelut_path = 'places.txt'
e@0	78
e@0	79 with open(placelut_path) as f:
e@0	80 placelut = [s for s in f.read().split('\n') if s.strip() != '']
e@0	81
e@0	82 # Load character LUT
e@0	83 if args.char_lut:
e@0	84 charlut_path = args.char_lut
e@0	85 else:
e@0	86 charlut_path = 'characters.txt'
e@0	87
e@0	88 with open(charlut_path) as f:
e@0	89
e@0	90 charlist = [s for s in f.read().split('\n') if s.strip() != ''] # One character per line
e@0	91
e@0	92 character_lut = {} # Stores character attributes indexed by name
e@0	93 for l in charlist:
e@0	94 name, attributes = l.split(':')
e@0	95
e@0	96 gender = None
e@0	97 age = None
e@0	98
e@0	99 for a in attributes.split(','):
e@0	100 if 'male' in a:
e@0	101 gender = a
e@0	102 elif a.lower() in ['young', 'old']:
e@0	103 age = a
e@0	104
e@0	105 character_lut[name] = {}
e@0	106 if gender:
e@0	107 character_lut[name]['gender'] = gender
e@0	108 if age:
e@0	109 character_lut[name]['age'] = age
e@0	110
e@0	111 annotation_dict = annotate(text, model, character_lut, saylut)
e@0	112
e@0	113
e@0	114
e@0	115
e@0	116

Mercurial > hg > from-my-pen-to-your-ears-supplementary-material

annotate demo/text2ann.py~ @ 1:eb3b846ae0ef tip