#!/usr/bin/env python
# coding=utf-8
#
# Copyright (C) 2012 Matthias Braun,
#                    IPD Snelting, Karlsruhe Institute for Technology
#
# Usage: Scroll down to the "Example" section and enter your own grammar,
#        start the script with "python". Output is using Unicode characters,
#        so make sure your terminal can display them, also writes vcg-graph
#        files which can be viewed with ycomp.

EOF_CHAR = "#"

# Part1: Grammars
def hash_list(list):
	hashed_elements = map(hash, list)
	return reduce(lambda x, y:x^y, hashed_elements, 0)

class Production(tuple):
	def __new__(cls, (left, right)):
		return tuple.__new__(cls, (left, right))
	def __hash__(self):
		return hash(self[0]) ^ hash_list(self[1])

class Grammar:
	def __init__(self, terminals, nonterminals, productions, start):
		self.terminals    = terminals
		self.nonterminals = nonterminals
		if not isinstance(productions[0], Production):
			productions = map(Production, productions)
		self.productions  = productions
		self.start        = start

	def __repr__(self):
		res = "Terminals: %s\nNonTerminals: %s\n" % (self.terminals, self.nonterminals)
		res += "Productions:\n"
		for (left,right) in self.productions:
			res += "\t%s →" % left
			if len(right) == 0:
				res += " ε"
			for e in right:
				res += " %s" % e
			res += "\n"
		res += "Start: %s" % self.start
		return res

def augment(grammar):
	newStart = grammar.start
	while newStart in grammar.nonterminals or newStart in grammar.terminals:
		newStart += "'"
	return Grammar(
		terminals    = grammar.terminals,
		nonterminals = grammar.nonterminals + [newStart],
		productions  = [ (newStart, [grammar.start]) ] + grammar.productions,
		start        = newStart)

# Part2: Items, Itemsets
class Item(tuple):
	def __new__(cls, (pos, production)):
		return tuple.__new__(cls, (pos, production))

	def __str__(self):
		pos           = self[0]
		(left, right) = self[1]
		res = "%s →" % left
		for i in range(0, len(right)+1):
			res += " "
			if i == pos:
				res += "⋅"
			if i < len(right):
				res += str(right[i])
		return res

	def __hash__(self):
		pos           = self[0]
		(left, right) = self[1]
		return hash(pos) ^ hash(left) ^ hash_list(right)

	def afterDot(self):
		pos           = self[0]
		(left, right) = self[1]
		if pos >= len(right):
			return None
		return right[pos]

class ItemSet(set):
	def __new__(cls, init = []):
		return set.__new__(cls, init)

	def __hash__(self):
		res = 0
		for e in self:
			res ^= hash(e)
		return res

	def __str__(self):
		res = ""
		for item in self:
			res += "%s\n" % str(item)
		return res

def closure(grammar, itemset):
	added = True
	while added:
		newset = ItemSet()
		added = False
		for item in itemset:
			e = item.afterDot()
			if e not in grammar.nonterminals:
				continue
			for production in grammar.productions:
				if e != production[0]:
					continue
				newitem = Item((0, production))
				if newitem in itemset:
					continue
				newset.add(newitem)
				added = True
		if added:
			itemset = newset.union(itemset)
	return itemset

def itemset_str(itemset):
	res = ""
	for item in itemset:
		res += "%s\n" % str(item)
	return res

# Part3: LR(0)-Automaton
def escape_str(string):
	res = ""
	for c in string:
		if c == "\"":
			res += "\\\""
		else:
			res += c
	return res

def possible_reductions(grammar, itemset):
	res = set()
	for (pos, (left,right)) in closure(grammar, itemset):
		if pos >= len(right):
			res.add( Production( (left,right) ) )
	return res

class Automaton:
	def __init__(self, grammar):
		# A state is an set of items, we use a dict() instead of a set() here
		# because strangely python doesn't allow to get items back from the set
		self.states      = dict()
		# States in their creation order (for more intuitive output)
		self.ordered_states = []
		# A connection is a (from_state, terminal/nonterminal, to_state) tuple
		self.transitions = set()
		self.grammar     = grammar
		self._build(grammar)

	def _build(self, grammar):
		firststate = ItemSet()
		for (left,right) in grammar.productions:
			if left != grammar.start:
				continue
			firststate.add(Item( (0, (left,right)) ))
		self.states[firststate] = firststate
		self.ordered_states.append(firststate)

		# Create transitions between states
		unprocessed = set( [firststate] )
		while len(unprocessed) > 0:
			from_state         = unprocessed.pop()
			from_state_closure = closure(grammar, from_state)
			for t in grammar.terminals + grammar.nonterminals:
				newstate = ItemSet()
				for item in from_state_closure:
					if item.afterDot() != t:
						continue
					pos        = item[0]
					production = item[1]
					newstate.add(Item( (pos+1, production) ))
				if len(newstate) == 0:
					continue
				to_state = self.states.get(newstate)
				if to_state == None:
					to_state = newstate
					self.states[newstate] = newstate
					self.ordered_states.append(newstate)
					unprocessed.add(newstate)
				connection = (from_state, t, to_state)
				if connection not in self.transitions:
					self.transitions.add(connection)

	def is_accepting(self, state):
		accept = False
		for r in possible_reductions(self.grammar, state):
			if r[0] == self.grammar.start:
				accept = True
				break
		return accept

	def __str__(self):
		names = dict()
		nextnum = 0
		res = ""
		for state in self.ordered_states:
			name = names.get(state)
			if name == None:
				name = "I%s" % (nextnum)
				nextnum += 1
				names[state] = name

			res += "State %s:\n" % name
			state_closure = closure(self.grammar, state)
			res += str(state_closure)
			res += "Transitions:\n"
			for (from_state, t, to_state) in self.transitions:
				if from_state != state:
					continue
				to_name = names.get(to_state)
				if to_name == None:
					to_name = "I%s" % (nextnum)
					nextnum += 1
					names[to_state] = to_name

				res += "%s → %s\n" % (t, to_name)
			if self.is_accepting(state):
				res += "%s → accept\n" % EOF_CHAR

		return res

	def write_vcg(self, out):
		out.write("graph: {\n")
		out.write("display_edge_labels: yes\n")
		out.write("layoutalgorithm: mindepth //$ \"orthogonal\"\n")
		out.write("graph: {\n")
		out.write("\ttitle: \"LR(0) automaton\"\n")
		out.write("\tinfo1: \"Grammar:\n%s\"\n" % escape_str(str(self.grammar)))

		names = dict()
		nextnum = 0
		res = ""
		out.write("\tnode: {\n")
		out.write("\t\ttitle: \"accept\"\n")
		out.write("\t\tlabel: \"accept\"\n")
		out.write("\t\tcolor: blue\n")
		out.write("\t}\n")

		for state in self.ordered_states:
			name = names.get(state)
			if name == None:
				name = "I%s" % (nextnum)
				nextnum += 1
				names[state] = name

			out.write("\tnode: {\n")
			out.write("\t\ttitle: \"%s\"\n" % name)
			state_closure = closure(self.grammar, state)
			out.write("\t\tlabel: \"State %s\n%s\"\n" % (name, escape_str(str(state_closure))))
			out.write("\t}\n")

			if self.is_accepting(state):
				out.write("\tedge: {\n")
				out.write("\t\tsourcename: \"%s\"\n" % name)
				out.write("\t\ttargetname: \"accept\"\n")
				out.write("\t\tlabel: \"%s\"\n" % escape_str(EOF_CHAR))
				out.write("\t}\n")

		for (from_state, t, to_state) in self.transitions:
			out.write("\tedge: {\n")
			from_name = names[from_state]
			to_name = names[to_state]
			out.write("\t\tsourcename: \"%s\"\n" % from_name)
			out.write("\t\ttargetname: \"%s\"\n" % to_name)
			out.write("\t\tlabel: \"%s\"\n" % escape_str(str(t)))
			out.write("\t}\n")
		out.write("}")
		out.write("}")

# Example
if __name__ == "__main__":
	G = Grammar(
		terminals    = [ "+", "(", ")", "*", "a" ],
		nonterminals = [ "S" ],
		productions  = [
			( "S", [ "S", "+", "S" ] ),
			( "S", [ "S", "S" ] ),
			( "S", [ "(", "S", ")" ] ),
			( "S", [ "S", "*" ] ),
			( "S", [ "a" ] ),
		],
		start        = "S"
	)

	print "=== Grammar ==="
	print G

	print "=== Augmented Grammar ==="
	G_prime = augment(G)
	print G_prime

	print "=== LR(0)-Automaton ==="
	automaton = Automaton(G_prime)
	print automaton

	outname = "automaton.vcg"
	print "Write vcg graph to '%s' (view with ycomp)\n" % outname
	out = open(outname, "w")
	automaton.write_vcg(out)
	out.close()