# HG changeset patch # User Emmanouil Theofanis Chourdakis # Date 1545202276 0 # Node ID 2082aeb1f1be1c92f8f094ffa30c262c03128fbe # Parent 56c43da2d64c8e76d80d2243f2df4f5ea403ce1b added demo and readme file diff -r 56c43da2d64c -r 2082aeb1f1be README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,42 @@ +# Grammar Informed Sound Effect Retrieval for SoundScape Generation Demo + +## Requirements + +`spacy>=2.0.0` + +## Installation + +1. Install `clausiepy`: + +``` +git clone git@github.com:mmxgn/clausiepy.git +cd clausiepy +python3 setup.py build +python3 setup.py install [--user] +``` + +## Demo + +2. Edit `demo.py` to change the query if needed, then run it: + +``` +python3 demo.py + +Queries: +Crows feeding on rubbish +Crows feeding +Crows +rubbish +Crows feeding at garbage dump +garbage dump +``` + +## Citing paper +In the rare chance you are citing this work or use this in your work, please also cite + +``` +Del Corro Luciano, and Rainer Gemulla: "Clausie: clause-based open information extraction." +Proceedings of the 22nd international conference on World Wide Web. ACM, 2013. +``` + +since `clausiepy` is based on that paper. diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/HEAD --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/HEAD Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,1 @@ +ref: refs/heads/master diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/config Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,11 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true +[remote "origin"] + url = git@github.com:mmxgn/clausiepy.git + fetch = +refs/heads/*:refs/remotes/origin/* +[branch "master"] + remote = origin + merge = refs/heads/master diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/description Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,1 @@ +Unnamed repository; edit this file 'description' to name the repository. diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/applypatch-msg.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/applypatch-msg.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,15 @@ +#!/bin/sh +# +# An example hook script to check the commit log message taken by +# applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. The hook is +# allowed to edit the commit message file. +# +# To enable this hook, rename this file to "applypatch-msg". + +. git-sh-setup +commitmsg="$(git rev-parse --git-path hooks/commit-msg)" +test -x "$commitmsg" && exec "$commitmsg" ${1+"$@"} +: diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/commit-msg.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/commit-msg.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,24 @@ +#!/bin/sh +# +# An example hook script to check the commit log message. +# Called by "git commit" with one argument, the name of the file +# that has the commit message. The hook should exit with non-zero +# status after issuing an appropriate message if it wants to stop the +# commit. The hook is allowed to edit the commit message file. +# +# To enable this hook, rename this file to "commit-msg". + +# Uncomment the below to add a Signed-off-by line to the message. +# Doing this in a hook is a bad idea in general, but the prepare-commit-msg +# hook is more suited to it. +# +# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1" + +# This example catches duplicate Signed-off-by lines. + +test "" = "$(grep '^Signed-off-by: ' "$1" | + sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || { + echo >&2 Duplicate Signed-off-by lines. + exit 1 +} diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/fsmonitor-watchman.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/fsmonitor-watchman.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,114 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use IPC::Open2; + +# An example hook script to integrate Watchman +# (https://facebook.github.io/watchman/) with git to speed up detecting +# new and modified files. +# +# The hook is passed a version (currently 1) and a time in nanoseconds +# formatted as a string and outputs to stdout all files that have been +# modified since the given time. Paths must be relative to the root of +# the working tree and separated by a single NUL. +# +# To enable this hook, rename this file to "query-watchman" and set +# 'git config core.fsmonitor .git/hooks/query-watchman' +# +my ($version, $time) = @ARGV; + +# Check the hook interface version + +if ($version == 1) { + # convert nanoseconds to seconds + $time = int $time / 1000000000; +} else { + die "Unsupported query-fsmonitor hook version '$version'.\n" . + "Falling back to scanning...\n"; +} + +my $git_work_tree; +if ($^O =~ 'msys' || $^O =~ 'cygwin') { + $git_work_tree = Win32::GetCwd(); + $git_work_tree =~ tr/\\/\//; +} else { + require Cwd; + $git_work_tree = Cwd::cwd(); +} + +my $retry = 1; + +launch_watchman(); + +sub launch_watchman { + + my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty') + or die "open2() failed: $!\n" . + "Falling back to scanning...\n"; + + # In the query expression below we're asking for names of files that + # changed since $time but were not transient (ie created after + # $time but no longer exist). + # + # To accomplish this, we're using the "since" generator to use the + # recency index to select candidate nodes and "fields" to limit the + # output to file names only. Then we're using the "expression" term to + # further constrain the results. + # + # The category of transient files that we want to ignore will have a + # creation clock (cclock) newer than $time_t value and will also not + # currently exist. + + my $query = <<" END"; + ["query", "$git_work_tree", { + "since": $time, + "fields": ["name"], + "expression": ["not", ["allof", ["since", $time, "cclock"], ["not", "exists"]]] + }] + END + + print CHLD_IN $query; + close CHLD_IN; + my $response = do {local $/; }; + + die "Watchman: command returned no output.\n" . + "Falling back to scanning...\n" if $response eq ""; + die "Watchman: command returned invalid output: $response\n" . + "Falling back to scanning...\n" unless $response =~ /^\{/; + + my $json_pkg; + eval { + require JSON::XS; + $json_pkg = "JSON::XS"; + 1; + } or do { + require JSON::PP; + $json_pkg = "JSON::PP"; + }; + + my $o = $json_pkg->new->utf8->decode($response); + + if ($retry > 0 and $o->{error} and $o->{error} =~ m/unable to resolve root .* directory (.*) is not watched/) { + print STDERR "Adding '$git_work_tree' to watchman's watch list.\n"; + $retry--; + qx/watchman watch "$git_work_tree"/; + die "Failed to make watchman watch '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + + # Watchman will always return all files on the first query so + # return the fast "everything is dirty" flag to git and do the + # Watchman query just to get it over with now so we won't pay + # the cost in git to look up each individual file. + print "/\0"; + eval { launch_watchman() }; + exit 0; + } + + die "Watchman: $o->{error}.\n" . + "Falling back to scanning...\n" if $o->{error}; + + binmode STDOUT, ":utf8"; + local $, = "\0"; + print @{$o->{files}}; +} diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/post-update.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/post-update.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,8 @@ +#!/bin/sh +# +# An example hook script to prepare a packed repository for use over +# dumb transports. +# +# To enable this hook, rename this file to "post-update". + +exec git update-server-info diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/pre-applypatch.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/pre-applypatch.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,14 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed +# by applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-applypatch". + +. git-sh-setup +precommit="$(git rev-parse --git-path hooks/pre-commit)" +test -x "$precommit" && exec "$precommit" ${1+"$@"} +: diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/pre-commit.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/pre-commit.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,49 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed. +# Called by "git commit" with no arguments. The hook should +# exit with non-zero status after issuing an appropriate message if +# it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-commit". + +if git rev-parse --verify HEAD >/dev/null 2>&1 +then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=$(git hash-object -t tree /dev/null) +fi + +# If you want to allow non-ASCII filenames set this variable to true. +allownonascii=$(git config --bool hooks.allownonascii) + +# Redirect output to stderr. +exec 1>&2 + +# Cross platform projects tend to avoid non-ASCII filenames; prevent +# them from being added to the repository. We exploit the fact that the +# printable range starts at the space character and ends with tilde. +if [ "$allownonascii" != "true" ] && + # Note that the use of brackets around a tr range is ok here, (it's + # even required, for portability to Solaris 10's /usr/bin/tr), since + # the square bracket bytes happen to fall in the designated range. + test $(git diff --cached --name-only --diff-filter=A -z $against | + LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 +then + cat <<\EOF +Error: Attempt to add a non-ASCII file name. + +This can cause problems if you want to work with people on other platforms. + +To be portable it is advisable to rename the file. + +If you know what you are doing you can disable this check using: + + git config hooks.allownonascii true +EOF + exit 1 +fi + +# If there are whitespace errors, print the offending file names and fail. +exec git diff-index --check --cached $against -- diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/pre-push.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/pre-push.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,53 @@ +#!/bin/sh + +# An example hook script to verify what is about to be pushed. Called by "git +# push" after it has checked the remote status, but before anything has been +# pushed. If this script exits with a non-zero status nothing will be pushed. +# +# This hook is called with the following parameters: +# +# $1 -- Name of the remote to which the push is being done +# $2 -- URL to which the push is being done +# +# If pushing without using a named remote those arguments will be equal. +# +# Information about the commits which are being pushed is supplied as lines to +# the standard input in the form: +# +# +# +# This sample shows how to prevent push of commits where the log message starts +# with "WIP" (work in progress). + +remote="$1" +url="$2" + +z40=0000000000000000000000000000000000000000 + +while read local_ref local_sha remote_ref remote_sha +do + if [ "$local_sha" = $z40 ] + then + # Handle delete + : + else + if [ "$remote_sha" = $z40 ] + then + # New branch, examine all commits + range="$local_sha" + else + # Update to existing branch, examine new commits + range="$remote_sha..$local_sha" + fi + + # Check for WIP commit + commit=`git rev-list -n 1 --grep '^WIP' "$range"` + if [ -n "$commit" ] + then + echo >&2 "Found WIP commit in $local_ref, not pushing" + exit 1 + fi + fi +done + +exit 0 diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/pre-rebase.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/pre-rebase.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,169 @@ +#!/bin/sh +# +# Copyright (c) 2006, 2008 Junio C Hamano +# +# The "pre-rebase" hook is run just before "git rebase" starts doing +# its job, and can prevent the command from running by exiting with +# non-zero status. +# +# The hook is called with the following parameters: +# +# $1 -- the upstream the series was forked from. +# $2 -- the branch being rebased (or empty when rebasing the current branch). +# +# This sample shows how to prevent topic branches that are already +# merged to 'next' branch from getting rebased, because allowing it +# would result in rebasing already published history. + +publish=next +basebranch="$1" +if test "$#" = 2 +then + topic="refs/heads/$2" +else + topic=`git symbolic-ref HEAD` || + exit 0 ;# we do not interrupt rebasing detached HEAD +fi + +case "$topic" in +refs/heads/??/*) + ;; +*) + exit 0 ;# we do not interrupt others. + ;; +esac + +# Now we are dealing with a topic branch being rebased +# on top of master. Is it OK to rebase it? + +# Does the topic really exist? +git show-ref -q "$topic" || { + echo >&2 "No such branch $topic" + exit 1 +} + +# Is topic fully merged to master? +not_in_master=`git rev-list --pretty=oneline ^master "$topic"` +if test -z "$not_in_master" +then + echo >&2 "$topic is fully merged to master; better remove it." + exit 1 ;# we could allow it, but there is no point. +fi + +# Is topic ever merged to next? If so you should not be rebasing it. +only_next_1=`git rev-list ^master "^$topic" ${publish} | sort` +only_next_2=`git rev-list ^master ${publish} | sort` +if test "$only_next_1" = "$only_next_2" +then + not_in_topic=`git rev-list "^$topic" master` + if test -z "$not_in_topic" + then + echo >&2 "$topic is already up to date with master" + exit 1 ;# we could allow it, but there is no point. + else + exit 0 + fi +else + not_in_next=`git rev-list --pretty=oneline ^${publish} "$topic"` + /usr/bin/perl -e ' + my $topic = $ARGV[0]; + my $msg = "* $topic has commits already merged to public branch:\n"; + my (%not_in_next) = map { + /^([0-9a-f]+) /; + ($1 => 1); + } split(/\n/, $ARGV[1]); + for my $elem (map { + /^([0-9a-f]+) (.*)$/; + [$1 => $2]; + } split(/\n/, $ARGV[2])) { + if (!exists $not_in_next{$elem->[0]}) { + if ($msg) { + print STDERR $msg; + undef $msg; + } + print STDERR " $elem->[1]\n"; + } + } + ' "$topic" "$not_in_next" "$not_in_master" + exit 1 +fi + +<<\DOC_END + +This sample hook safeguards topic branches that have been +published from being rewound. + +The workflow assumed here is: + + * Once a topic branch forks from "master", "master" is never + merged into it again (either directly or indirectly). + + * Once a topic branch is fully cooked and merged into "master", + it is deleted. If you need to build on top of it to correct + earlier mistakes, a new topic branch is created by forking at + the tip of the "master". This is not strictly necessary, but + it makes it easier to keep your history simple. + + * Whenever you need to test or publish your changes to topic + branches, merge them into "next" branch. + +The script, being an example, hardcodes the publish branch name +to be "next", but it is trivial to make it configurable via +$GIT_DIR/config mechanism. + +With this workflow, you would want to know: + +(1) ... if a topic branch has ever been merged to "next". Young + topic branches can have stupid mistakes you would rather + clean up before publishing, and things that have not been + merged into other branches can be easily rebased without + affecting other people. But once it is published, you would + not want to rewind it. + +(2) ... if a topic branch has been fully merged to "master". + Then you can delete it. More importantly, you should not + build on top of it -- other people may already want to + change things related to the topic as patches against your + "master", so if you need further changes, it is better to + fork the topic (perhaps with the same name) afresh from the + tip of "master". + +Let's look at this example: + + o---o---o---o---o---o---o---o---o---o "next" + / / / / + / a---a---b A / / + / / / / + / / c---c---c---c B / + / / / \ / + / / / b---b C \ / + / / / / \ / + ---o---o---o---o---o---o---o---o---o---o---o "master" + + +A, B and C are topic branches. + + * A has one fix since it was merged up to "next". + + * B has finished. It has been fully merged up to "master" and "next", + and is ready to be deleted. + + * C has not merged to "next" at all. + +We would want to allow C to be rebased, refuse A, and encourage +B to be deleted. + +To compute (1): + + git rev-list ^master ^topic next + git rev-list ^master next + + if these match, topic has not merged in next at all. + +To compute (2): + + git rev-list master..topic + + if this is empty, it is fully merged to "master". + +DOC_END diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/pre-receive.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/pre-receive.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,24 @@ +#!/bin/sh +# +# An example hook script to make use of push options. +# The example simply echoes all push options that start with 'echoback=' +# and rejects all pushes when the "reject" push option is used. +# +# To enable this hook, rename this file to "pre-receive". + +if test -n "$GIT_PUSH_OPTION_COUNT" +then + i=0 + while test "$i" -lt "$GIT_PUSH_OPTION_COUNT" + do + eval "value=\$GIT_PUSH_OPTION_$i" + case "$value" in + echoback=*) + echo "echo from the pre-receive-hook: ${value#*=}" >&2 + ;; + reject) + exit 1 + esac + i=$((i + 1)) + done +fi diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/prepare-commit-msg.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/prepare-commit-msg.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,42 @@ +#!/bin/sh +# +# An example hook script to prepare the commit log message. +# Called by "git commit" with the name of the file that has the +# commit message, followed by the description of the commit +# message's source. The hook's purpose is to edit the commit +# message file. If the hook fails with a non-zero status, +# the commit is aborted. +# +# To enable this hook, rename this file to "prepare-commit-msg". + +# This hook includes three examples. The first one removes the +# "# Please enter the commit message..." help message. +# +# The second includes the output of "git diff --name-status -r" +# into the message, just before the "git status" output. It is +# commented because it doesn't cope with --amend or with squashed +# commits. +# +# The third example adds a Signed-off-by line to the message, that can +# still be edited. This is rarely a good idea. + +COMMIT_MSG_FILE=$1 +COMMIT_SOURCE=$2 +SHA1=$3 + +/usr/bin/perl -i.bak -ne 'print unless(m/^. Please enter the commit message/..m/^#$/)' "$COMMIT_MSG_FILE" + +# case "$COMMIT_SOURCE,$SHA1" in +# ,|template,) +# /usr/bin/perl -i.bak -pe ' +# print "\n" . `git diff --cached --name-status -r` +# if /^#/ && $first++ == 0' "$COMMIT_MSG_FILE" ;; +# *) ;; +# esac + +# SOB=$(git var GIT_COMMITTER_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# git interpret-trailers --in-place --trailer "$SOB" "$COMMIT_MSG_FILE" +# if test -z "$COMMIT_SOURCE" +# then +# /usr/bin/perl -i.bak -pe 'print "\n" if !$first_line++' "$COMMIT_MSG_FILE" +# fi diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/hooks/update.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/hooks/update.sample Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,128 @@ +#!/bin/sh +# +# An example hook script to block unannotated tags from entering. +# Called by "git receive-pack" with arguments: refname sha1-old sha1-new +# +# To enable this hook, rename this file to "update". +# +# Config +# ------ +# hooks.allowunannotated +# This boolean sets whether unannotated tags will be allowed into the +# repository. By default they won't be. +# hooks.allowdeletetag +# This boolean sets whether deleting tags will be allowed in the +# repository. By default they won't be. +# hooks.allowmodifytag +# This boolean sets whether a tag may be modified after creation. By default +# it won't be. +# hooks.allowdeletebranch +# This boolean sets whether deleting branches will be allowed in the +# repository. By default they won't be. +# hooks.denycreatebranch +# This boolean sets whether remotely creating branches will be denied +# in the repository. By default this is allowed. +# + +# --- Command line +refname="$1" +oldrev="$2" +newrev="$3" + +# --- Safety check +if [ -z "$GIT_DIR" ]; then + echo "Don't run this script from the command line." >&2 + echo " (if you want, you could supply GIT_DIR then run" >&2 + echo " $0 )" >&2 + exit 1 +fi + +if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then + echo "usage: $0 " >&2 + exit 1 +fi + +# --- Config +allowunannotated=$(git config --bool hooks.allowunannotated) +allowdeletebranch=$(git config --bool hooks.allowdeletebranch) +denycreatebranch=$(git config --bool hooks.denycreatebranch) +allowdeletetag=$(git config --bool hooks.allowdeletetag) +allowmodifytag=$(git config --bool hooks.allowmodifytag) + +# check for no description +projectdesc=$(sed -e '1q' "$GIT_DIR/description") +case "$projectdesc" in +"Unnamed repository"* | "") + echo "*** Project description file hasn't been set" >&2 + exit 1 + ;; +esac + +# --- Check types +# if $newrev is 0000...0000, it's a commit to delete a ref. +zero="0000000000000000000000000000000000000000" +if [ "$newrev" = "$zero" ]; then + newrev_type=delete +else + newrev_type=$(git cat-file -t $newrev) +fi + +case "$refname","$newrev_type" in + refs/tags/*,commit) + # un-annotated tag + short_refname=${refname##refs/tags/} + if [ "$allowunannotated" != "true" ]; then + echo "*** The un-annotated tag, $short_refname, is not allowed in this repository" >&2 + echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2 + exit 1 + fi + ;; + refs/tags/*,delete) + # delete tag + if [ "$allowdeletetag" != "true" ]; then + echo "*** Deleting a tag is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/tags/*,tag) + # annotated tag + if [ "$allowmodifytag" != "true" ] && git rev-parse $refname > /dev/null 2>&1 + then + echo "*** Tag '$refname' already exists." >&2 + echo "*** Modifying a tag is not allowed in this repository." >&2 + exit 1 + fi + ;; + refs/heads/*,commit) + # branch + if [ "$oldrev" = "$zero" -a "$denycreatebranch" = "true" ]; then + echo "*** Creating a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/heads/*,delete) + # delete branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/remotes/*,commit) + # tracking branch + ;; + refs/remotes/*,delete) + # delete tracking branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a tracking branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + *) + # Anything else (is there anything else?) + echo "*** Update hook: unknown type of update to ref $refname of type $newrev_type" >&2 + exit 1 + ;; +esac + +# --- Finished +exit 0 diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/index Binary file clausiepy/.git/index has changed diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/info/exclude --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/info/exclude Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,6 @@ +# git ls-files --others --exclude-from=.git/info/exclude +# Lines that start with '#' are comments. +# For a project mostly in C, the following would be a good set of +# exclude patterns (uncomment them if you want to use them): +# *.[oa] +# *~ diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/logs/HEAD --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/logs/HEAD Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,1 @@ +0000000000000000000000000000000000000000 47ba6a8606d1e87515dd9d723cb760959f2a5f56 Emmanouil Theofanis Chourdakis 1545198089 +0000 clone: from git@github.com:mmxgn/clausiepy.git diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/logs/refs/heads/master --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/logs/refs/heads/master Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,1 @@ +0000000000000000000000000000000000000000 47ba6a8606d1e87515dd9d723cb760959f2a5f56 Emmanouil Theofanis Chourdakis 1545198089 +0000 clone: from git@github.com:mmxgn/clausiepy.git diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/logs/refs/remotes/origin/HEAD --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/logs/refs/remotes/origin/HEAD Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,1 @@ +0000000000000000000000000000000000000000 47ba6a8606d1e87515dd9d723cb760959f2a5f56 Emmanouil Theofanis Chourdakis 1545198089 +0000 clone: from git@github.com:mmxgn/clausiepy.git diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/objects/pack/pack-41a88d45adeee24113bd14d6c5f3dcaa68e31c43.idx Binary file clausiepy/.git/objects/pack/pack-41a88d45adeee24113bd14d6c5f3dcaa68e31c43.idx has changed diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/objects/pack/pack-41a88d45adeee24113bd14d6c5f3dcaa68e31c43.pack Binary file clausiepy/.git/objects/pack/pack-41a88d45adeee24113bd14d6c5f3dcaa68e31c43.pack has changed diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/packed-refs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/packed-refs Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,2 @@ +# pack-refs with: peeled fully-peeled sorted +47ba6a8606d1e87515dd9d723cb760959f2a5f56 refs/remotes/origin/master diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/refs/heads/master --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/refs/heads/master Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,1 @@ +47ba6a8606d1e87515dd9d723cb760959f2a5f56 diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/.git/refs/remotes/origin/HEAD --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/.git/refs/remotes/origin/HEAD Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,1 @@ +ref: refs/remotes/origin/master diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/README.md Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,89 @@ +# clausiepy +Implementation of the ClausIE information extraction system for python+spacy + +## Credits +While this is a re-implementation by me, original research work (and also the dictionaries) is attributed to Luciano Del Corro +and Rainer Gemulla. If you use it in your code please note that there are slight modifications in the code in order to make it work with the spacy dependency parser, and also cite: +``` +Del Corro Luciano, and Rainer Gemulla: "Clausie: clause-based open information extraction." +Proceedings of the 22nd international conference on World Wide Web. ACM, 2013. +``` +## Requirements +`spacy>=2.0.0` + +## Installation +``` +$ git clone https://github.com/mmxgn/clausiepy.git +$ cd clausiepy +$ python3 setup.py build +$ python3 setup.py install [--user] +``` + +## Usage + +### Python + +``` +$ ipython3 + +In [1]: import clausiepy as clausie +In [2]: clauses = clausie.clausie('Albert Einstein died in Princeton in 1955.') +In [3]: clauses +Out[3]: +[{'S': [Einstein], + 'V': [died], + 'O': [], + 'IO': [], + 'XCOMP': [], + 'C': [], + 'type': 'SV', + 'A?': [in, in]}] +In [4]: propositions = clausie.extract_propositions(clauses) +In [5]: clausie.print_propositions(propositions) +Out [5]: +([Einstein], [died], [], [], [], []) +([Einstein], [died], [], [], [], [in, Princeton]) +([Einstein], [died], [], [], [], [in, 1955]) +``` +Note that `clausie`, and `extract_propositions` here return dictionaries and lists of `spacy` span objects which you +can subsequently use however you like. + +### Problog + +Copy `problog/clausiepy_pl.py` at the same directory as your problog `.pl` files, include it +in your scripts with: + +``` +:- use_module('clausiepy_pl.py'). +``` + +And use it via the `clausie/7` predicate. An example can be seen in `problog/test_clausie.pl`: + +``` +:-use_module('clausiepy_pl.py'). + +query(clausie('Albert Einstein, a scientist of the 20th century, died in Princeton in 1955.', Subject, Verb, IndirectObject, DirectObject, Complement, Adverb)). + +``` + +You can run it with: + +``` +problog test_clausie.pl +``` + +and get the output: + +``` + clausie('Albert Einstein, a scientist of the 20th century, died in Princeton in 1955.',Einstein,died,,,,): 1 + clausie('Albert Einstein, a scientist of the 20th century, died in Princeton in 1955.',Einstein,died,,,,in 1955): 1 + clausie('Albert Einstein, a scientist of the 20th century, died in Princeton in 1955.',Einstein,died,,,,in Princeton): 1 +clausie('Albert Einstein, a scientist of the 20th century, died in Princeton in 1955.',Einstein,is,,,a scientist of the 20th century,): 1 +``` + +The variables `Subject`, `Verb`, etc. are self explanatory. + + +## License + +This code is licensed under the [Creative Commons Attribution-ShareAlike 3.0 Unported License](https://creativecommons.org/licenses/by-sa/3.0/). diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/__init__.py Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 25 17:57:27 2018 + +@author: Emmanouil Theofanis Chourdakis + +Reimplementation in spacy+python of: + +Del Corro Luciano, and Rainer Gemulla. +"Clausie: clause-based open information extraction." +Proceedings of the 22nd international conference on World Wide Web. ACM, 2013. + +""" + +from .clausiepy import * \ No newline at end of file diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/__pycache__/__init__.cpython-37.pyc Binary file clausiepy/clausiepy/__pycache__/__init__.cpython-37.pyc has changed diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/__pycache__/clausiepy.cpython-37.pyc Binary file clausiepy/clausiepy/__pycache__/clausiepy.cpython-37.pyc has changed diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/clausiepy.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/clausiepy.py Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,638 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 25 17:57:27 2018 + +@author: Emmanouil Theofanis Chourdakis + +Reimplementation in spacy+python of: + +Del Corro Luciano, and Rainer Gemulla. +"Clausie: clause-based open information extraction." +Proceedings of the 22nd international conference on World Wide Web. ACM, 2013. + +""" + +import os +dirpath = os.path.dirname(os.path.realpath(__file__)) + +# Load NLP model +import spacy +from spacy import displacy +nlp = spacy.load('en') + +# Dictionaries +dict_non_ext_copular = ['die', 'walk'] +dict_ext_copular = ['act', + 'appear', + 'be', + 'become', + 'come', + 'come out', + 'end up', + 'get', + 'go', + 'grow', + 'fall', + 'feel', + 'keep', + 'leave', + 'look', + 'prove', + 'remain', + 'seem', + 'smell', + 'sound', + 'stay', + 'taste', + 'turn', + 'turn up', + 'wind up', + 'live', + 'come', + 'go', + 'stand', + 'lie', + 'love', + 'do', + 'try'] + +dict_copular = ['act', + 'appear', + 'be', + 'become', + 'come', + 'come out', + 'end up', + 'get', + 'go', + 'grow', + 'fall', + 'feel', + 'keep', + 'leave', + 'look', + 'prove', + 'remain', + 'seem', + 'smell', + 'sound', + 'stay', + 'taste', + 'turn', + 'turn up', + 'wind up'] + +dict_complex_transitive = ['bring', + 'catch', + 'drive', + 'get', + 'keep', + 'lay', + 'lead', + 'place', + 'put', + 'set', + 'sit', + 'show', + 'stand', + 'slip', + 'take'] + + +dict_ignore = ['so', 'then', 'thus', 'why', 'as', 'even'] + + +def translate_clause(clause): + """ Modifies clause so that relative clause indicators (whose, which, where) + are resolved before subsequent processing + """ + + for n, token in enumerate(clause['S']): + # If you have a "which" or a "whose", replace it with the token pointed + # by the relcl dependency of the antidescendant. + + if token.text.lower() in ['which', 'who']: + if token.head.dep_ == 'relcl': + clause['S'].remove(token) + clause['S'].insert(0, token.head.head) + + if 'A' in clause: + for n, token in enumerate(clause['A']): + if token.text.lower() in ['where']: + if token.head.dep_ == 'relcl': + clause['A'].remove(token) + clause['A'].insert(0, token.head.head.head) + + if 'A?' in clause: + for n, token in enumerate(clause['A?']): + if token.text.lower() in ['where']: + if token.head.dep_ == 'relcl': + clause['A?'].remove(token) + clause['A?'].insert(0, token.head.head.head) + + return clause + + +def empty_clause(): + return {'S':[], 'V':[], 'O':[], 'IO': [], 'XCOMP': [], 'A':[], 'C':[]} + +def has_object(clause): + return has_dobj(clause) or has_iobj(clause) + +def has_dobj(clause): + return len(clause['O']) > 0 + +def has_iobj(clause): + return len(clause['IO']) > 0 + +def has_complement(clause): + return len(clause['C']) > 0 or len(clause['XCOMP']) > 0 + +def has_candidate_adverbial(clause): + for verb in clause['V']: + for adv in clause['A']: + if adv in verb.subtree and adv.i > verb.i: + return True + + else: + return False + +def has_known_non_ext_copular(clause): + for verb in clause['V']: + if nlp(verb.text)[0].lemma_ in dict_non_ext_copular: + return True + else: + return False + +def has_known_ext_copular(clause): + for verb in clause['V']: + if nlp(verb.text)[0].lemma_ in dict_ext_copular: + return True + else: + return False + +def is_known_ext_copular(verb): + return nlp(verb.text)[0].lemma_ in dict_ext_copular + + +def is_known_copular(verb): + return str(verb) in dict_copular + +def has_potentially_complex_transitive(clause): + for verb in clause['V']: + if nlp(verb.text)[0].lemma_ in dict_complex_transitive: + return True + else: + return False + +def is_in_ignore_list(adverb): + return nlp(adverb.text)[0].lemma_ in dict_ignore + +def clausie(sent, conservative=True): + + def process_dependants(token, clause): + dependants = [c for c in token.head.subtree if c not in token.subtree] + for d in dependants: + if d.dep_ in ['dobj']: + clause['O'].append(d) + elif d.dep_ in ['iobj', 'dative']: + clause['IO'].append(d) + elif d.dep_ in ['ccomp', 'acomp', 'attr']: + clause['C'].append(d) + + elif d.dep_ in ['xcomp']: + if is_known_copular(d): + clause['XCOMP'].append(d.head) + else: + clause['O'].append(d) + elif d.dep_ in ['advmod', 'advcl', 'npadvmod']: + clause['A'].append(d) + elif d.dep_ in ['oprd'] and d.head in clause['V']: + clause['A'].append(d) + elif d.dep_ in ['prep']: + # Capture "prep_in(X, Y)". + # which is prep(X, in) and pobj(in, Y) + for c in d.children: + if c.dep_ == 'pobj': + # clause['A'].append(c) + clause['A'].append(d) + + doc = nlp(sent) + clauses = [] + + clause = empty_clause() + + # Check if root is not a verb + root = [t for t in doc if t.dep_ == 'ROOT'][0] + if root.pos_ != 'VERB': + doc = nlp("There is " + sent) + + # Subjects of a verb + for token in doc: + # print("{}({},{})".format(token.dep_, token.head, token)) + if token.dep_ in ['nsubj', 'nsubjpass', 'attr']: + clause['S'].append(token) + clause['V'].append(token.head) + + # Take dependants: + process_dependants(token, clause) + + clauses.append(translate_clause(clause)) + clause = empty_clause() + + elif token.dep_ in ['csubj']: + clause['S'].append(token) + clause['V'].append(token.head) + + # Take dependants: + dependants = [c for c in token.head.subtree if c not in token.subtree] + #dependants = token.head.children + for d in dependants: + if d.dep_ in ['dobj']: + clause['O'].append(d) + + clauses.append(translate_clause(clause)) + clause = empty_clause() + elif token.dep_ in ['appos']: + # Subjects without a verb + # E.g. Sam is my brother in: Sam, my brother. + clause['S'].append(token.head) + clause['V'].append(nlp('is')[0]) + clause['C'].append(token) + clauses.append(translate_clause(clause)) + clause = empty_clause() + elif token.dep_ in ['poss']: + # Subjects declaring possesion + # E.g. my brother: in: Sam, my brother. + toktext = token.text + if token.text.lower() == 'his': + clause['S'].append(nlp('he')[0]) + clause['V'].append(nlp('has')[0]) + elif token.text.lower() == 'her': + clause['S'].append(nlp('she')[0]) + clause['V'].append(nlp('has')[0]) + elif token.text.lower() == 'my': + clause['S'].append(nlp('I')[0]) + clause['V'].append(nlp('have')[0]) + elif token.text.lower() == 'its': + clause['S'].append(nlp('it')[0]) + clause['V'].append(nlp('has')[0]) + elif token.text.lower() == 'our': + clause['S'].append(nlp('we')[0]) + clause['V'].append(nlp('have')[0]) + elif token.text.lower() == 'your': + clause['S'].append(nlp('you')[0]) + clause['V'].append(nlp('have')[0]) + elif token.text.lower() == 'their': + clause['S'].append(nlp('they')[0]) + clause['V'].append(nlp('have')[0]) + else: + clause['S'].append(token) + clause['V'].append(nlp('has')[0]) + clause['O'].append(token.head) + clauses.append(translate_clause(clause)) + clause = empty_clause() + elif token.dep_ in ['acl']: + # Create a synthetic from participial modifiers (partmod). + clause['S'].append(token.head) + new_sent = nlp("are {}".format(" ".join([t.text for t in token.subtree]))) + r = [t for t in new_sent if t.dep_ == 'ROOT'][0] + clause['V'].append(r) + + process_dependants(token, clause) + + + clauses.append(translate_clause(clause)) + clause = empty_clause() + + # Identify clause types + for clause in clauses: + type_ = 'OTHER' + if not has_object(clause): # Q1 + if has_complement(clause): #Q2 + type_ = 'SVC' + else: + # Q3 + if not has_candidate_adverbial(clause): + type_ = 'SV' + else: + # Q4 + if has_known_non_ext_copular(clause): + type_ = 'SV' + else: + # Q5 + if has_known_ext_copular(clause): + type_ = 'SVA' + else: + # Q6: Cases we want conservative or non-conservative estimation + if conservative: + type_ = 'SVA' + else: + type_ = 'SV' + + else: + # Q7 + if has_dobj(clause) and has_iobj(clause): + type_ = 'SVOO' + else: + # Q8 + if has_complement(clause): + type_ = 'SVOC' + else: + #Q9 + if not has_candidate_adverbial(clause) and has_dobj(clause): + type_ = 'SVO' + else: + # Q10 + if has_potentially_complex_transitive(clause): + type_ = 'SVOA' + else: + # Q11 + if conservative: + type_ = 'SVOA' + else: + type_ = 'SVO' + + clause['type'] = type_ + + if type_ in ['SVC', 'SVOO', 'SVOC', 'SV', 'SVO']: + clause['A?'] = clause['A'] + clause.pop('A', None) + + return clauses + +def append_conjugates(L): + for l in L: + if type(l) == str: + continue + for c in l.children: + if c.dep_ in ['conj']: + L.append(c) + +def extract_propositions(clauses): + propositions = [] + for clause in clauses: + + subjects = clause['S'] + append_conjugates(subjects) + + verbs = clause['V'] + append_conjugates(verbs) + + type_ = clause['type'] + + for s in subjects: + for v in verbs: + + if type(v) == str: + v = nlp(v)[0] + + prop = (s, v) + + if type_ in ['SV']: + if prop not in propositions: + if v.text in ['is' ,'are']: + propositions.append({'subject':s, 'verb':nlp("exists")[0]}) + else: + propositions.append({'subject': s, 'verb':v}) + + + adverbs = clause['A?'] + append_conjugates(adverbs) + for a in adverbs: + if not is_in_ignore_list(a): + prop = {'subject': s, 'verb':v, 'adverb':a} + if prop not in propositions: + propositions.append(prop) + elif type_ in ['SVO']: + objects = clause['O'] + append_conjugates(objects) + adverbs = clause['A?'] + append_conjugates(adverbs) + + for o in objects: + + + prop = {'subject': s, 'verb':v, 'direct object':o} + if prop not in propositions: + propositions.append(prop) + for a in adverbs: + if not is_in_ignore_list(a): + prop = {'subject': s, 'verb':v, 'direct object':o, 'adverb':a} + if prop not in propositions: + propositions.append(prop) + + # Extractions of form: + # AE had a faboulous hairstyle -> Hairstyle was faboulous + for c in o.children: + if c.dep_ == 'amod': + prop = {'subject': o, 'verb':[t for t in nlp('is')][0], 'complement':c} + if prop not in propositions: + propositions.append(prop) + + + elif type_ in ['SVA']: + adverbs = clause['A'] + append_conjugates(adverbs) + for a in adverbs: + if not is_in_ignore_list(a): + prop = {'subject': s, 'verb':v, 'adverb':a} + if prop not in propositions: + propositions.append(prop) + elif type_ in ['SVC']: + comp = clause['C'] + adverbs = clause['A?'] + append_conjugates(adverbs) + append_conjugates(comp) + for c in comp: + prop = {'subject': s, 'verb':v, 'complement':c} + if prop not in propositions: + propositions.append(prop) + for a in adverbs: + if not is_in_ignore_list(a): + prop = {'subject': s, 'verb':v, 'complement':c, 'adverb':a} + if prop not in propositions: + propositions.append(prop) + elif type_ in ['SVOO']: + dobjects = clause['O'] + iobjects = clause['IO'] + + append_conjugates(dobjects) + append_conjugates(iobjects) + adverbs = clause['A?'] + append_conjugates(adverbs) + + for io in iobjects: + for do in dobjects: + prop = {'subject': s, 'verb':v, 'indirect object':io, 'direct object':do} + if prop not in propositions: + propositions.append(prop) + + for a in adverbs: + if not is_in_ignore_list(a): + for do in dobjects: + prop = {'subject': s, 'verb':v, 'indirect object':io, 'direct object':do, 'adverb': a} + if prop not in propositions: + propositions.append(prop) + + elif type_ in ['SVOA']: + dobjects = clause['O'] + append_conjugates(dobjects) + adverbs = clause['A'] + append_conjugates(adverbs) + + for a in adverbs: + if not is_in_ignore_list(a): + for do in dobjects: + prop = {'subject': s, 'verb':v, 'direct object':do, 'adverb': a} + if prop not in propositions: + propositions.append(prop) + elif type_ in ['SVOC']: + dobjects = clause['O'] + append_conjugates(dobjects) + + comp = clause['C'] + append_conjugates(comp) + + adverbs = clause['A?'] + append_conjugates(adverbs) + + for c in comp: + for do in dobjects: + prop = {'subject': s, 'verb':v, 'direct object':do, 'complement': c} + + if prop not in propositions: + propositions.append(prop) + + for a in adverbs: + if not is_in_ignore_list(a): + for do in dobjects: + prop = {'subject': s, 'verb':v, 'direct object':do, 'complement':c, 'adverb': a} + if prop not in propositions: + propositions.append(prop) + return propositions + +def get_conj_text(token): + L = [token] + token_old = None + while token_old != token: + token_old = token + for c in token.children: + if c.dep_ in ['cc', 'punct']: + L.append(c) + if c.dep_ == 'conj': + L.append(c) + token = c + break + + return " ".join([t.text for t in L]) + +def proposition_text(prop): + + # subject = [t for t in prop['subject'].children if t.dep_ in ['det', 'amod']] + [prop['subject']] + subject = [t for t in prop['subject'].lefts] + [prop['subject']] + + # Add of the + for t in prop['subject'].rights: + if t.dep_ in ['prep']: + subject += [d for d in t.subtree] + + + + if 'indirect object' in prop: + indirect_object = [t for t in prop['indirect object'].children if t.dep_ in ['det', 'amod', 'compound']] + [prop['indirect object']] + else: + indirect_object = [] + + if 'direct object' in prop: + direct_object = [t for t in prop['direct object'].children if t.dep_ in ['det', 'amod', 'compound']] + [prop['direct object']] + else: + direct_object = [] + + if 'complement' in prop: + complement = [t for t in prop['complement'].subtree] + else: + complement = [] + + if 'adverb' in prop: + adv = prop['adverb'] + if adv.dep_ == 'pobj' and adv.head.dep_ =='prep': + # Prepositional phrase + adverb = [adv.head] + [t for t in prop['adverb'].subtree] + elif adv.dep_ == 'advmod' and adv.head.dep_ == 'npadvmod': + adverb = [t for t in adv.head.subtree] + else: + adverb = [t for t in prop['adverb'].subtree] + else: + adverb = [] + + verb_aux = [p for p in prop['verb'].lefts if p.dep_ in ['aux', 'auxpass']] + verb = verb_aux+[prop['verb']] + + return subject , verb , indirect_object , direct_object , complement , adverb + +def proposition_text_str(prop): + """ Like proposition_text(prop) but returns a string isntead """ + L = proposition_text(prop) + + str_list = [] + + for l in L: + if len(l)>0: + str_list += l + + return " ".join([t.text for t in str_list]) + " ." + +def print_propositions(plist): + for prop in plist: + text = proposition_text(prop) + print(text) + +if __name__ == "__main__": + + from util import * + + print("Testing with various sentences") + sentences = [ + "Bell , a telecommunication company based in Los Angeles , makes and distributes electronic , computer and building products", + "AE died.", + "AE remained in Princeton.", + "AE is smart.", + "AE has won the Nobel Prize.", + "RSAS gave AE the Nobel Prize.", + "The doorman showed AE to his office .", + "AE declared the meeting open .", + "AE died in Princeton in 1955 .", + "AE remained in Princeton until his death .", + "AE is a scientist of the 20th century .", + "AE has won the Nobel Prize in 1921 .", + "In 1921, AE has won the Nobel Prize . ", + "Nicolas Cage graciously ate and enjoyed the blue fruit and the yellow steak.", + "A bull was feeding in a meadow until a lion approached the bull", + "The attack of the lion caused the death of the bull.", + "Some crows are eating rubbish at a garbage dump.", + "AE knocked the door three times.", + "All crows have a beak.", + "AE had a faboulous hairstyle.", + ] + + for sent in sentences: + print("Sentence:") + print(sent) + print("Dependencies:") + tree_from_doc(nlp(sent)).show() + print("Clauses:") + clauses = clausie(sent) + print() + for clause in clauses: + print("\t{}".format(clause)) + print() + print("Propositions:") + propositions = extract_propositions(clauses) + for prop in propositions: + print(proposition_text_str(prop)) + #print_propositions(propositions) + + print("-----") + diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/dictionaries/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/dictionaries/README.md Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,2 @@ +Those were taken from minie: +https://github.com/rgemulla/minie diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/dictionaries/dict-adverbs-ignore.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/dictionaries/dict-adverbs-ignore.txt Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,6 @@ +so +then +thus +why +as +even \ No newline at end of file diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/dictionaries/dict-adverbs-include.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/dictionaries/dict-adverbs-include.txt Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,5 @@ +hardly +barely +scarcely +seldom +rarely \ No newline at end of file diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/dictionaries/dict-complex-transitive.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/dictionaries/dict-complex-transitive.txt Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,15 @@ +bring +catch +drive +get +keep +lay +lead +place +put +set +sit +show +stand +slip +take \ No newline at end of file diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/dictionaries/dict-copular.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/dictionaries/dict-copular.txt Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,25 @@ +act +appear +be +become +come +come out +end up +get +go +grow +fall +feel +keep +leave +look +prove +remain +seem +smell +sound +stay +taste +turn +turn up +wind up \ No newline at end of file diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/dictionaries/dict-ext-copular.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/dictionaries/dict-ext-copular.txt Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,33 @@ +act +appear +be +become +come +come out +end up +get +go +grow +fall +feel +keep +leave +look +prove +remain +seem +smell +sound +stay +taste +turn +turn up +wind up +live +come +go +stand +lie +love +do +try \ No newline at end of file diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/dictionaries/dict-not-ext-copular.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/dictionaries/dict-not-ext-copular.txt Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,2 @@ +die +walk \ No newline at end of file diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/clausiepy/util.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/clausiepy/util.py Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,26 @@ +from pprint import pprint +from treelib import Node, Tree + +def tree_from_token(root): + def add_children(root, tree): + for c in root.children: + tree.create_node("{}:{}/{}".format(c.dep_,c,c.pos_), hash(c), parent=hash(root), data=c) + add_children(c, tree) + + tree = Tree() + tree.create_node("{}/{}".format(root, root.pos_), hash(root), data=root) + add_children(root, tree) + return tree + + + +def tree_from_doc(doc): + # 1. Find root + root = [t for t in doc if t.dep_ == 'ROOT'][0] + return tree_from_token(root) + +def tree_from_annotation(annot): + doc = nlp(annot[0]) + labels = annot[1]['entities'] + root = [t for t in doc if t.dep_ == 'ROOT'][0] + return tree_from_token_with_labels(root, labels) diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/problog/clausiepy_pl.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/problog/clausiepy_pl.py Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Sep 26 13:38:38 2018 + +@author: Emmanouil Theofanis Chourdakis + +Problog module for extracting information from a sentence using clausiepy + +""" + +from problog.extern import problog_export_nondet + +import clausiepy as cl + +def remove_apostrophe(string): + # Remove "'"S + if string[0] == "'": + string = string[1:] + if string[-1] == "'": + string = string[:-1] + + return string + +@problog_export_nondet('+str', '-str', '-str', '-str', '-str', '-str', '-str') +def clausie(sent): + + sent = remove_apostrophe(sent) + + clauses = cl.clausie(sent) + + propositions = cl.extract_propositions(clauses) + + result = [] + for proposition in propositions: + ptext = cl.proposition_text(proposition) + + prop = [] + + for p in ptext: + prop.append(" ".join([pp.text for pp in p])) + + result.append(tuple(prop)) + + return result \ No newline at end of file diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/problog/test_clausie.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/problog/test_clausie.pl Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,5 @@ +:-use_module('clausiepy_pl.py'). + +query(clausie('Albert Einstein, a scientist of the 20th century, died in Princeton in 1955.', Subject, Verb, IndirectObject, DirectObject, Complement, Adverb)). + + diff -r 56c43da2d64c -r 2082aeb1f1be clausiepy/setup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clausiepy/setup.py Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 25 18:21:51 2018 + +@author: Emmanouil Theofanis Chourdakis +""" + + + +from setuptools import setup, find_packages +setup( + name="clausiepy", + version="0.0.1", + packages=find_packages(), + #scripts=['clausiepy/clausiepy.py', 'clausiepy/__init__.py'], + install_requires=['spacy>=2.0.0'], + + author="Emmanouil Theofanis Chourdakis", + author_email="e.t.chourdakis@qmul.ac.uk", + description="A reimplementation of ClausIE Information Extraction System in python", + url="https://github.com/mmxgn/clausiepy", + keywords="openie clausie information extraction", + include_package_data=True, + +) diff -r 56c43da2d64c -r 2082aeb1f1be demo.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/demo.py Wed Dec 19 06:51:16 2018 +0000 @@ -0,0 +1,58 @@ +import clausiepy as cp +quote="Crows are feeding on rubbish at a garbage dump." + +# Extract clauses +clauses = cp.clausie(quote) + +# Extract propositions +propositions = cp.extract_propositions(clauses) + +queries = [] + +# For every proposition, remove auxiliary verb and construct queries +# (see paper) + +keys = ('subject', 'verb', 'indirect object', 'direct object', 'complement', 'adverb') + +queries = [] + +for prop in propositions: + + # Normal queries based on propositions with verbs + for L in range(len(keys), 1, -1): + chosen_keys = keys[:L] + propo = {} + for key in chosen_keys: + if key in prop: + propo[key] = prop[key] + p0 = cp.proposition_text(propo, chosen_keys) + prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det']]) for p in p0 if len(p) > 0) + if len(prop_text) > 0 and prop_text not in queries: + queries.append(prop_text) + + # Subjects and objects independently + p0 = cp.proposition_text(prop, ['subject']) + prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det']]) for p in p0 if len(p) > 0) + if len(prop_text) > 0 and prop_text not in queries: + queries.append(prop_text) + + p0 = cp.proposition_text(prop, ['indirect object']) + prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det', 'prep']]) for p in p0 if len(p) > 0) + if len(prop_text) > 0 and prop_text not in queries: + queries.append(prop_text) + + p0 = cp.proposition_text(prop, ['direct object']) + prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det', 'prep']]) for p in p0 if len(p) > 0) + if len(prop_text) > 0 and prop_text not in queries: + queries.append(prop_text) + + # Adverb + p0 = cp.proposition_text(prop, ['adverb']) + prop_text = " ".join(" ".join([p1.text for p1 in p if p1.dep_ not in ['aux', 'det', 'prep']]) for p in p0 if len(p) > 0) + if len(prop_text) > 0 and prop_text not in queries: + queries.append(prop_text) + + +print("Queries:") +for query in queries: + print(query)