"""Module defining the parser class reading the pud file."""
import logging
from collections.abc import Sequence
from pathlib import Path
from ..processor.grammar import Grammar, TokenList
from ..tokens.statements import Define, FromImport
from ..tokens.statements import Grammar as GrammarStmt
from ..tokens.statements import Import
from ..tokens.token import BaseToken
from ..tokens.util import INDENTATION_RE
from .util import DEFAULT_TOKENS
COMMENT_CHAR = "#"
INDENT_SPACES = 4
logger = logging.getLogger(__name__)
type Syntax = list[Define | FromImport | Import | Grammar]
[docs]
class Compiler:
"""Base Compiler class."""
source_path: Path
def __init__(self, tokens: Sequence[type[BaseToken]] | None = None) -> None:
"""Init of Compiler class.
:param tokens: Token classes needed to compile. If is None use default tokens.
"""
default_tokens: Sequence[type[BaseToken]] = DEFAULT_TOKENS
if tokens is None:
tokens = default_tokens
self.tokens = tokens
def _parse_indent(self, line: str, lineno: int) -> int:
"""Get indentation of a line."""
indent = INDENTATION_RE.match(line)
if not indent:
return 0
count = len(indent.group(0))
if line.startswith("\t"):
return count
if (count / INDENT_SPACES).is_integer():
return int(count / INDENT_SPACES)
raise IndentationError(f"Invalid amount of spaces in line {lineno}")
def _parse_line(self, line: str, lineno: int) -> BaseToken:
"""Read statement or function from a line."""
line = line.strip()
for token in self.tokens:
if token.matches(line):
return token.from_string(line, lineno)
raise SyntaxError(f"Invalid statement in line {lineno}")
def _parse_syntax(
self, content: str, indent: int = 0, skip_to: int = 0
) -> tuple[TokenList, int]:
"""Produce syntax list from syntax file content.
:param content: Content of the file to compile.
:param indent: Indentation level to start from.
:param skip_to: Line to start from.
:return: Tuple with syntax and last line number.
"""
syntax: TokenList = []
for i, line in enumerate(content.splitlines(True)):
if not line.strip() or i < skip_to or line.strip().startswith(COMMENT_CHAR):
continue
lineno = i + 1
obj = self._parse_line(line, lineno)
new_indent = self._parse_indent(line, lineno)
if indent == new_indent:
syntax.append(obj)
continue
if new_indent > indent:
last_obj = syntax.pop()
if isinstance(last_obj, tuple):
raise SyntaxError(f"Unexpected indentation in line {lineno}")
sub_syntax, skip_to = self._parse_syntax(content, new_indent, i)
syntax.append((last_obj, sub_syntax))
continue
if new_indent < indent:
return syntax, i
return syntax, len(content.splitlines())
def _import(self, path: str) -> Syntax:
"""Parse the syntax from another file.
:param path: Path of the file to import.
"""
logger.debug("Importing %s...", path)
if hasattr(self, "source_path"):
base_dir = self.source_path.parent
else:
raise ImportError("Can not import without a source file.")
import_file = base_dir / f"{path}.pud"
try:
syntax = self.compile_file(import_file)
except FileNotFoundError as e:
raise ImportError(f"No file {import_file}") from e
return [
obj for obj in syntax if not isinstance(obj, Grammar) or obj.name != "input"
]
def _from_import(self, importobj: str, importpath: str) -> Define | Grammar:
"""Get grammar or define statement from another file.
:param importobj: Name of the grammar or variable to import.
:param importpath: Path of the .pud file.
:returns: The grammar or define statement.
:raises ImportError: If no grammar or variable exists with the given name.
"""
for token in self._import(importpath):
if isinstance(token, Grammar) and token.name == importobj:
return token
if isinstance(token, Define) and token.values[0].value == importobj:
return token
raise ImportError(
f"No grammar or variable with name '{importobj}' in {importpath}.pud"
)
def _compile_syntax(self, syntax: TokenList) -> Syntax:
"""Convert some statements into models for better execution.
:param syntax: Syntax to convert.
:returns: The converted Syntax.
"""
def create_grammar(
token: GrammarStmt, sub_tokens: TokenList, inherits: str = ""
) -> Grammar:
"""Create a grammar object from a token.
:param token: The grammar statement.
:param sub_tokens: Tokens in the grammar.
:param inherits: Optional name of inherited grammar if not value in token.
:returns: A Grammar object.
"""
if len(token.values) == 2:
inherits = token.values[1].value
return Grammar(token.lineno, token.values[0].value, sub_tokens, inherits)
new_syntax: Syntax = []
sub_tokens: TokenList = []
for token in syntax:
if isinstance(token, tuple):
token, sub_tokens = token
match token:
case Define():
new_syntax.append(token)
case FromImport():
importpath = token.values[0].value
importobj = token.values[1].value
new_syntax.append(self._from_import(importobj, importpath))
case GrammarStmt():
new_syntax.append(create_grammar(token, sub_tokens))
case Import():
importpath = token.values[0].value
new_syntax.extend(self._import(importpath))
case _:
raise SyntaxError(
f"Invalid statement outside grammar in line {token.lineno}"
)
return new_syntax
[docs]
def compile(self, content: str) -> Syntax:
"""Produce executable syntax object from syntax file.
:param content: Content of the syntax file.
:return: Tuple with syntax and last line number.
"""
syntax, lines = self._parse_syntax(content)
logger.debug("Parsed %s lines", lines)
return self._compile_syntax(syntax)
[docs]
def compile_file(self, file: Path, encoding: str = "utf-8") -> Syntax:
"""Produce executable syntax object from pud file.
:param file: Path of the syntax file.
:return: Tuple with syntax and last line number.
"""
logger.debug("Compiling %s", file)
self.source_path = file
with open(file, "r", encoding=encoding) as f:
content = f.read()
return self.compile(content)