#!/usr/bin/env python # coding=utf-8 # # Copyright (C) 2012 Matthias Braun, # IPD Snelting, Karlsruhe Institute for Technology # # Usage: Scroll down to the "Example" section and enter your own grammar, # start the script with "python". Output is using Unicode characters, # so make sure your terminal can display them, also writes vcg-graph # files which can be viewed with ycomp. EOF_CHAR = "#" # Part1: Grammars def hash_list(list): hashed_elements = map(hash, list) return reduce(lambda x, y:x^y, hashed_elements, 0) class Production(tuple): def __new__(cls, (left, right)): return tuple.__new__(cls, (left, right)) def __hash__(self): return hash(self[0]) ^ hash_list(self[1]) class Grammar: def __init__(self, terminals, nonterminals, productions, start): self.terminals = terminals self.nonterminals = nonterminals if not isinstance(productions[0], Production): productions = map(Production, productions) self.productions = productions self.start = start def __repr__(self): res = "Terminals: %s\nNonTerminals: %s\n" % (self.terminals, self.nonterminals) res += "Productions:\n" for (left,right) in self.productions: res += "\t%s →" % left if len(right) == 0: res += " ε" for e in right: res += " %s" % e res += "\n" res += "Start: %s" % self.start return res def augment(grammar): newStart = grammar.start while newStart in grammar.nonterminals or newStart in grammar.terminals: newStart += "'" return Grammar( terminals = grammar.terminals, nonterminals = grammar.nonterminals + [newStart], productions = [ (newStart, [grammar.start]) ] + grammar.productions, start = newStart) # Part2: Items, Itemsets class Item(tuple): def __new__(cls, (pos, production)): return tuple.__new__(cls, (pos, production)) def __str__(self): pos = self[0] (left, right) = self[1] res = "%s →" % left for i in range(0, len(right)+1): res += " " if i == pos: res += "⋅" if i < len(right): res += str(right[i]) return res def __hash__(self): pos = self[0] (left, right) = self[1] return hash(pos) ^ hash(left) ^ hash_list(right) def afterDot(self): pos = self[0] (left, right) = self[1] if pos >= len(right): return None return right[pos] class ItemSet(set): def __new__(cls, init = []): return set.__new__(cls, init) def __hash__(self): res = 0 for e in self: res ^= hash(e) return res def __str__(self): res = "" for item in self: res += "%s\n" % str(item) return res def closure(grammar, itemset): added = True while added: newset = ItemSet() added = False for item in itemset: e = item.afterDot() if e not in grammar.nonterminals: continue for production in grammar.productions: if e != production[0]: continue newitem = Item((0, production)) if newitem in itemset: continue newset.add(newitem) added = True if added: itemset = newset.union(itemset) return itemset def itemset_str(itemset): res = "" for item in itemset: res += "%s\n" % str(item) return res # Part3: LR(0)-Automaton def escape_str(string): res = "" for c in string: if c == "\"": res += "\\\"" else: res += c return res def possible_reductions(grammar, itemset): res = set() for (pos, (left,right)) in closure(grammar, itemset): if pos >= len(right): res.add( Production( (left,right) ) ) return res class Automaton: def __init__(self, grammar): # A state is an set of items, we use a dict() instead of a set() here # because strangely python doesn't allow to get items back from the set self.states = dict() # States in their creation order (for more intuitive output) self.ordered_states = [] # A connection is a (from_state, terminal/nonterminal, to_state) tuple self.transitions = set() self.grammar = grammar self._build(grammar) def _build(self, grammar): firststate = ItemSet() for (left,right) in grammar.productions: if left != grammar.start: continue firststate.add(Item( (0, (left,right)) )) self.states[firststate] = firststate self.ordered_states.append(firststate) # Create transitions between states unprocessed = set( [firststate] ) while len(unprocessed) > 0: from_state = unprocessed.pop() from_state_closure = closure(grammar, from_state) for t in grammar.terminals + grammar.nonterminals: newstate = ItemSet() for item in from_state_closure: if item.afterDot() != t: continue pos = item[0] production = item[1] newstate.add(Item( (pos+1, production) )) if len(newstate) == 0: continue to_state = self.states.get(newstate) if to_state == None: to_state = newstate self.states[newstate] = newstate self.ordered_states.append(newstate) unprocessed.add(newstate) connection = (from_state, t, to_state) if connection not in self.transitions: self.transitions.add(connection) def is_accepting(self, state): accept = False for r in possible_reductions(self.grammar, state): if r[0] == self.grammar.start: accept = True break return accept def __str__(self): names = dict() nextnum = 0 res = "" for state in self.ordered_states: name = names.get(state) if name == None: name = "I%s" % (nextnum) nextnum += 1 names[state] = name res += "State %s:\n" % name state_closure = closure(self.grammar, state) res += str(state_closure) res += "Transitions:\n" for (from_state, t, to_state) in self.transitions: if from_state != state: continue to_name = names.get(to_state) if to_name == None: to_name = "I%s" % (nextnum) nextnum += 1 names[to_state] = to_name res += "%s → %s\n" % (t, to_name) if self.is_accepting(state): res += "%s → accept\n" % EOF_CHAR return res def write_vcg(self, out): out.write("graph: {\n") out.write("display_edge_labels: yes\n") out.write("layoutalgorithm: mindepth //$ \"orthogonal\"\n") out.write("graph: {\n") out.write("\ttitle: \"LR(0) automaton\"\n") out.write("\tinfo1: \"Grammar:\n%s\"\n" % escape_str(str(self.grammar))) names = dict() nextnum = 0 res = "" out.write("\tnode: {\n") out.write("\t\ttitle: \"accept\"\n") out.write("\t\tlabel: \"accept\"\n") out.write("\t\tcolor: blue\n") out.write("\t}\n") for state in self.ordered_states: name = names.get(state) if name == None: name = "I%s" % (nextnum) nextnum += 1 names[state] = name out.write("\tnode: {\n") out.write("\t\ttitle: \"%s\"\n" % name) state_closure = closure(self.grammar, state) out.write("\t\tlabel: \"State %s\n%s\"\n" % (name, escape_str(str(state_closure)))) out.write("\t}\n") if self.is_accepting(state): out.write("\tedge: {\n") out.write("\t\tsourcename: \"%s\"\n" % name) out.write("\t\ttargetname: \"accept\"\n") out.write("\t\tlabel: \"%s\"\n" % escape_str(EOF_CHAR)) out.write("\t}\n") for (from_state, t, to_state) in self.transitions: out.write("\tedge: {\n") from_name = names[from_state] to_name = names[to_state] out.write("\t\tsourcename: \"%s\"\n" % from_name) out.write("\t\ttargetname: \"%s\"\n" % to_name) out.write("\t\tlabel: \"%s\"\n" % escape_str(str(t))) out.write("\t}\n") out.write("}") out.write("}") # Example if __name__ == "__main__": G = Grammar( terminals = [ "+", "(", ")", "*", "a" ], nonterminals = [ "S" ], productions = [ ( "S", [ "S", "+", "S" ] ), ( "S", [ "S", "S" ] ), ( "S", [ "(", "S", ")" ] ), ( "S", [ "S", "*" ] ), ( "S", [ "a" ] ), ], start = "S" ) print "=== Grammar ===" print G print "=== Augmented Grammar ===" G_prime = augment(G) print G_prime print "=== LR(0)-Automaton ===" automaton = Automaton(G_prime) print automaton outname = "automaton.vcg" print "Write vcg graph to '%s' (view with ycomp)\n" % outname out = open(outname, "w") automaton.write_vcg(out) out.close()