view demo.py @ 1:2082aeb1f1be tip

added demo and readme file
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Wed, 19 Dec 2018 06:51:16 +0000
parents
children
line wrap: on
line source
import clausiepy as cp
quote="Crows are feeding on rubbish at a garbage dump."

# Extract clauses
clauses = cp.clausie(quote)

# Extract propositions
propositions = cp.extract_propositions(clauses)

queries = []

# For every proposition, remove auxiliary verb and construct queries
# (see paper)

keys = ('subject', 'verb',  'indirect object',  'direct object', 'complement', 'adverb')

queries = []

for prop in propositions:

    # Normal queries based on propositions with verbs
    for L in range(len(keys), 1, -1):
        chosen_keys = keys[:L]
        propo = {}
        for key in chosen_keys:
            if key in prop:
                propo[key] = prop[key]
        p0 = cp.proposition_text(propo, chosen_keys)
        prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det']]) for p in p0 if len(p) > 0)
        if len(prop_text) > 0 and prop_text not in queries:
            queries.append(prop_text)

    # Subjects and objects independently
    p0 = cp.proposition_text(prop, ['subject'])
    prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det']]) for p in p0 if len(p) > 0)
    if len(prop_text) > 0 and prop_text not in queries:
        queries.append(prop_text)

    p0 = cp.proposition_text(prop, ['indirect object'])
    prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det', 'prep']]) for p in p0 if len(p) > 0)
    if len(prop_text) > 0 and prop_text not in queries:
        queries.append(prop_text)

    p0 = cp.proposition_text(prop, ['direct object'])
    prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det', 'prep']]) for p in p0 if len(p) > 0)
    if len(prop_text) > 0 and prop_text not in queries:
        queries.append(prop_text)        

    # Adverb
    p0 = cp.proposition_text(prop, ['adverb'])
    prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det', 'prep']]) for p in p0 if len(p) > 0)
    if len(prop_text) > 0 and prop_text not in queries:
        queries.append(prop_text)        
        

print("Queries:")
for query in queries:
    print(query)