chartparse Module (chartparse.py)


"""Will be a chartparser and related NLP tools.  Not at all working yet.
(Chapter 22)"""

class Grammar:
    def __init__(self, __start__='S', **rules):
        pass

class Edge:
    def __init__(self):
        pass
    def __repr__(self):
        return '[%(start)d,%(end)d %(lhs)s -> %(found)s . %(remains)s]' \
               % vars(self)

class Chart:
    def __init__(self, grammar): self.grammar = grammar

    def parse(self, words):
        self.ends_at = [[] for i in range(len(words)+1)]
        self.add_edge(Edge(0, 0, '_S_', [], [self.grammar.start]),
                      'initializer')
        for v in range(len(words)):
            self.scanner(v, words[v])
        return self

    def scanner(self, j, word):
        "Add edges everywhere word is expected."
        for cat in grammar.categories_for(word):
            for edge in self.ends_at[j]:
                if unify(cat, edge.expects):
                    self.add_edge(Edge(j, j+1, cat, [word], []), 'scanner')

    def predictor(self, edge):
        "Add edges saying what we expect to see here."
        for rule in self.grammar.rewrites_for(edge.expects):
            self.add_edge(Edge(edge.end, edge.end, rule.lhs, [], rule.rhs),
                          'predictor')

    def completer(self, edge):
        "Use this edge to extend any edges in the chart."
        for old_edge in self.end_at[edge.start]:
            b = unify(edge.lhs, old_edge.expects, old_edge.bindings)
            if b:
                self.add_edge(Edge(old_edge.start, edge.end, old_edge,lhs,
                                [edge] + [old_edge.found], '???'), 'completer')

    def add_edge(self, edge):
        "Put edge into chart, and complete or predict as appropriate."
        if edge not in self.ends_at[edge.end]:
            pass # ???? this is not done ...


"""Module to generate random sentences from a grammar.  The grammar
consists of entries that can be written as S = 'NP VP | S and S',
which gets translated to {'S': [['NP', 'VP'], ['S', 'and', 'S']]}, and
means that one of the top-level lists will be chosen at random, and
then each element of the second-level list will be rewritten; if it is
not in the grammar it rewrites as itself.  The functions rewrite and
rewrite_tree take as input a list of words and an accumulator (empty
list) to which the results are appended.  The function generate and
generate_tree are convenient interfaces to rewrite and rewrite_tree
that accept a string (which defaults to 'S') as input."""

import random

def Grammar(**grammar):
    """Create a dictionary mapping symbols to alternatives."""
    for k in grammar.keys():
        grammar[k] = [alt.strip().split(' ') for alt in grammar[k].split('|')]
    return grammar

grammar = Grammar(
    S = 'NP VP',
    NP = 'Art N',
    VP = 'V NP',
    Art = 'the | a',
    N = 'man | ball | woman | table',
    V = 'hit | took | saw | liked'
    )

def rewrite(words, into):
    "Replace each word with a random entry in grammar (recursively)."
    for word in words:
        if grammar.has_key(word):
            rewrite(random.choice(grammar[word]), into)
        else:
            into.append(word)
    return into

def rewrite_tree(words, into):
    "Replace the list of words into a random tree, chosen from grammar."
    for word in words:
        if grammar.has_key(word):
            into.append({word: rewrite_tree(random.choice(grammar[word]), [])})
        else:
            into.append(word)
    return into

def generate(str='S'):
    "Replace each word in str by a random entry in grammar (recursively)."
    return ' '.join(rewrite(str.split(' '), []))

def generate_tree(cat='S'):
    "Use grammar to rewrite the category cat"
    return rewrite_tree([cat], [])



# Copyright: Peter Norvig, 2002.
# AIMA: Python Code, Example Output.
# Python.org: Tutorial, Language Ref, Libraries.