Source code for cfpq_data.grammars.converters.cfg

"""Create a context-free grammar from different formats."""
import logging
import re

from pyformlang.cfg import CFG, Variable, Production, Terminal
from pyformlang.regular_expression import Regex
from pyformlang.rsa import RecursiveAutomaton as RSA

__all__ = [
    "cfg_from_cnf",
    "cfg_from_regex",
    "cfg_from_rsa",
]


[docs] def cfg_from_cnf(cnf: CFG) -> CFG: """Create a context-free grammar [2]_ from given context-free grammar in Chomsky normal form [1]_. Parameters ---------- cnf : CFG Context free grammar in Chomsky normal form. Examples -------- >>> from cfpq_data import * >>> gr = cnf_from_text("S -> a b") >>> cfg = cfg_from_cnf(gr) >>> cfg_to_text(cfg) 'S -> a#CNF# b#CNF#\\na#CNF# -> a\\nb#CNF# -> b' Returns ------- cfg : CFG Context-free grammar. References ---------- .. [1] https://en.wikipedia.org/wiki/Chomsky_normal_form .. [2] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions """ cfg = CFG.from_text(cnf.to_text(), cnf.start_symbol) logging.info(f"Create {cfg=} from {cnf=}") return cfg
[docs] def cfg_from_regex(regex: Regex, *, start_symbol: Variable = Variable("S")) -> CFG: """Create a context-free grammar [1]_ from given regular expression [2]_. Parameters ---------- regex : Regex Regular expression. start_symbol : string Start symbol of a context-free grammar. Examples -------- >>> from cfpq_data import * >>> expr = regex_from_text("a*") >>> cfg = cfg_from_regex(expr) >>> cfg_to_text(cfg) 'S -> \\nS -> A0\\nS -> S S\\nA0 -> a' Returns ------- cfg : CFG Context-free grammar. References ---------- .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions .. [2] https://en.wikipedia.org/wiki/Regular_expression#Formal_definition """ cfg = regex.to_cfg(start_symbol) logging.info(f"Create {cfg=} from {regex=}") return cfg
[docs] def cfg_from_rsa(rsa: RSA) -> CFG: """Create a context-free grammar [1]_ from given Recursive State Automaton [2]_. Parameters ---------- rsa : RSA Recursive State Automaton. Examples -------- >>> from cfpq_data import * >>> gr = rsa_from_text("S -> (a | b)*") >>> cfg = cfg_from_rsa(gr) >>> cfg_to_text(cfg) 'S -> \\nS -> a S\\nS -> b S' Returns ------- cfg : CFG Context-free grammar. References ---------- .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions .. [2] Alur R., Etessami K., Yannakakis M. (2001) Analysis of Recursive State Machines. In: Berry G., Comon H., Finkel A. (eds) Computer Aided Verification. CAV 2001. Lecture Notes in Computer Science, vol 2102. Springer, Berlin, Heidelberg. https://doi.org/10.1007/3-540-44585-4_18 """ variables = set() terminals = set() productions = set() for symbol in rsa.labels: dfa = rsa.get_box(symbol).dfa variables.add(Variable(symbol.value)) naming = {dfa.start_state: Variable(symbol.value)} for state in dfa.states: if state not in naming: naming[state] = Variable(f"S{len(naming)}") variables.add(naming[state]) if state in dfa.final_states: productions.add(Production(naming[state], [])) for v, label, to in dfa._transition_function.get_edges(): if label.value == label.value.lower(): try: label_value = re.search('"TER:(.*)"', label.value).group(1) except AttributeError: label_value = label.value terminals.add(Terminal(label_value)) production_label = Terminal(label_value) else: try: label_value = re.search('"VAR:(.*)"', label.value).group(1) except AttributeError: label_value = label.value production_label = Variable(label_value) productions.add(Production(naming[v], [production_label, naming[to]])) cfg = CFG( variables=variables, terminals=terminals, start_symbol=Variable(rsa.initial_label.value), productions=productions, ) logging.info(f"Create {cfg=} from {rsa=}") return cfg