Source code for graphtransliterator.transliterators.bundled

from collections import OrderedDict
from graphtransliterator.core import GraphTransliterator, CoverageTransliterator
import os
import sys
import yaml


[docs] class Bundled(CoverageTransliterator, GraphTransliterator): """ Subclass of GraphTransliterator used for bundled Graph Transliterator. """ @property def directory(self): """Directory of bundled transliterator, used to load settings.""" return self._module_dir() @property def name(self): """Name of bundled transliterator, e.g. 'Example'""" return self._module_name() def _module_dir(self, **kwargs): """Returns directory of module. Overwritten during testing.""" return os.path.dirname(sys.modules[self.__module__].__file__) def _module_name(self): """Returns name of module. Overwritten during testing.""" return self.__module__ def _init_from(self, method=None, **kwargs): """Initialize from easy-reading YAML or from JSON.""" filename = os.path.join( self.directory, self.name + "." + method # error if None ) # Create GraphTransliterator using factory if method == "yaml": gt = GraphTransliterator.from_yaml_file(filename, **kwargs) elif method == "json": with open(filename, "r") as f: gt = GraphTransliterator.loads(f.read(), **kwargs) # Select coverage superclass, if coverage set. if kwargs.get("coverage"): _super = CoverageTransliterator else: _super = GraphTransliterator _super.__init__( self, gt._tokens, gt._rules, gt._whitespace, onmatch_rules=gt._onmatch_rules, metadata=gt._metadata, ignore_errors=gt._ignore_errors, check_ambiguity=kwargs.get("check_ambiguity", False), onmatch_rules_lookup=gt._onmatch_rules_lookup, tokens_by_class=gt._tokens_by_class, graph=gt._graph, tokenizer_pattern=gt._tokenizer_pattern, graphtransliterator_version=gt._graphtransliterator_version, coverage=kwargs.get("coverage", True), )
[docs] def from_YAML(self, check_ambiguity=True, coverage=True, **kwargs): """Initialize from bundled YAML file (best for development). Parameters ---------- check_ambiguity: `bool`, Should ambiguity be checked. Default is `True.` coverage: `bool` Should test coverage be checked. Default is `True`. """ self._init_from( method="yaml", check_ambiguity=check_ambiguity, coverage=coverage, **kwargs ) return self
[docs] def from_JSON(self, check_ambiguity=False, coverage=False, **kwargs): """Initialize from bundled JSON file (best for speed). Parameters ---------- check_ambiguity: `bool`, Should ambiguity be checked. Default is `False.` coverage: `bool` Should test coverage be checked. Default is `False`.""" self._init_from( method="json", check_ambiguity=check_ambiguity, coverage=coverage, **kwargs )
[docs] @classmethod def new(cls, method="json", **kwargs): """Return a new class instance from method (json/yaml). Parameters ---------- method: `str` (`json` or `yaml`) How to load bundled transliterator, JSON or YAML.""" assert method in ("json", "yaml"), "Unknown method." new_ = cls.__new__(cls) if method == "json": new_.from_JSON(**kwargs) elif method == "yaml": new_.from_YAML(**kwargs) return new_
@property def yaml_tests_filen(self): """ `dict`: Metadata of transliterator """ return os.path.join(self.directory, "tests", "{}_tests.yaml".format(self.name))
[docs] def load_yaml_tests(self): """Iterator for YAML tests. Assumes tests are found in subdirectory `tests` of module with name `NAME_tests.yaml, e.g. `source_to_target/tests/source_to_target_tests.yaml`. """ test_file = self.yaml_tests_filen with open(test_file, "r") as f: return {str(k): str(i) for k, i in yaml.safe_load(f).items()}
[docs] def run_tests(self, transliteration_tests): """Run transliteration tests. Parameters ---------- transliteration_tests: `dict` of {`str`:`str`} Dictionary of test from source -> correct target. """ for source, target in transliteration_tests.items(): source = str(source) target = str(target) result = self.transliterate(source) assert ( self.transliterate(source) == target ), 'Transliteration error: "{}" -> "{}"; should -> "{}"'.format( source, result, target )
[docs] def run_yaml_tests(self): """Run YAML tests in MODULE/tests/MODULE_tests.yaml""" transliteration_tests = self.load_yaml_tests() self.run_tests(transliteration_tests) return True
[docs] def generate_yaml_tests(self, file=None): """Generates YAML tests with complete coverage. Uses the first token in a class as a sample. Assumes for onmatch rules that the first sample token in a class has a unique production, which may not be the case. These should be checked and edited.""" tests = OrderedDict() def sample_token(token_class): """Return first token in token class.""" tokens_in_class = self.tokens_by_class[token_class] return list(tokens_in_class)[0] for rule in self.rules: input_ = "" if rule.prev_classes: for _ in rule.prev_classes: input_ += sample_token(_) if rule.prev_tokens: for _ in rule.prev_tokens: input_ += _ for _ in rule.tokens: input_ += _ if rule.next_tokens: for _ in rule.next_tokens: input_ += _ if rule.next_classes: for _ in rule.next_classes: input_ += sample_token(_) tests[input_] = self.transliterate(input_) if self.onmatch_rules: for rule in self.onmatch_rules: input_ = "" for _ in rule.prev_classes: token = sample_token(_) input_ += token for _ in rule.prev_classes: token = sample_token(_) input_ += token tests[input_] = self.transliterate(input_) return yaml.dump(dict(tests), allow_unicode=True)