Source code for cfpq_data.grammars.generators.dyck_grammar
"""Returns a Dyck grammar that generates a Dyck language of the balanced strings with parentheses of different types."""
import logging
from typing import List, Tuple
from pyformlang.cfg import CFG, Production, Variable, Terminal
__all__ = ["dyck_grammar"]
[docs]
def dyck_grammar(
types: List[Tuple[str, str]],
*,
eps: bool = True,
start_symbol: Variable = Variable("S"),
) -> CFG:
"""Returns a Dyck grammar that generates a Dyck language [1]_ of the balanced strings with parentheses of given types.
Parameters
----------
types : List[Tuple[str, str]]
List of pairs $(op_i, cp_i)$ with opening and closing parentheses for each type.
eps : bool
Whether the empty string belongs to the language generated by the grammar.
start_symbol : Variable
Start symbol of the grammar.
Examples
--------
>>> from cfpq_data import *
>>> cfg = dyck_grammar([("a", "b"), ("c", "d")])
>>> cfg_to_text(cfg)
'S -> \\nS -> a S b S\\nS -> c S d S'
Returns
-------
cfg : CFG
A Dyck context-free grammar.
References
----------
.. [1] https://en.wikipedia.org/wiki/Dyck_language
"""
variables = {start_symbol}
productions = set()
terminals = set()
if eps:
productions.add(Production(start_symbol, []))
else:
for op, cp in types:
productions.add(Production(start_symbol, [Terminal(op), Terminal(cp)]))
for op, cp in types:
terminals.update([Terminal(op), Terminal(cp)])
productions.add(
Production(
start_symbol, [Terminal(op), start_symbol, Terminal(cp), start_symbol]
)
)
cfg = CFG(
variables=variables,
terminals=terminals,
productions=productions,
start_symbol=start_symbol,
)
logging.info(f"Create a Dyck {cfg=} with {types=}, {eps=}")
return cfg