Source code for cfpq_data.grammars.generators.dyck_grammar

"""Returns a Dyck grammar that generates a Dyck language of the balanced strings with parentheses of different types."""
import logging
from typing import List, Tuple
from pyformlang.cfg import CFG, Production, Variable, Terminal

__all__ = ["dyck_grammar"]


[docs] def dyck_grammar( types: List[Tuple[str, str]], *, eps: bool = True, start_symbol: Variable = Variable("S"), ) -> CFG: """Returns a Dyck grammar that generates a Dyck language [1]_ of the balanced strings with parentheses of given types. Parameters ---------- types : List[Tuple[str, str]] List of pairs $(op_i, cp_i)$ with opening and closing parentheses for each type. eps : bool Whether the empty string belongs to the language generated by the grammar. start_symbol : Variable Start symbol of the grammar. Examples -------- >>> from cfpq_data import * >>> cfg = dyck_grammar([("a", "b"), ("c", "d")]) >>> cfg_to_text(cfg) 'S -> \\nS -> a S b S\\nS -> c S d S' Returns ------- cfg : CFG A Dyck context-free grammar. References ---------- .. [1] https://en.wikipedia.org/wiki/Dyck_language """ variables = {start_symbol} productions = set() terminals = set() if eps: productions.add(Production(start_symbol, [])) else: for op, cp in types: productions.add(Production(start_symbol, [Terminal(op), Terminal(cp)])) for op, cp in types: terminals.update([Terminal(op), Terminal(cp)]) productions.add( Production( start_symbol, [Terminal(op), start_symbol, Terminal(cp), start_symbol] ) ) cfg = CFG( variables=variables, terminals=terminals, productions=productions, start_symbol=start_symbol, ) logging.info(f"Create a Dyck {cfg=} with {types=}, {eps=}") return cfg