Source code for graphtransliterator.graphs

# -*- coding: utf-8 -*-
from collections import UserDict, UserList
from .exceptions import IncompleteGraphCoverageException
import logging

"""
Graph data structure used in Graph Transliterator.
"""
logger = logging.getLogger("graphtransliterator")


[docs] class DirectedGraph: """ A very basic dictionary- and list-based directed graph. Nodes are a list of dictionaries of node data. Edges are nested dictionaries keyed from the head -> tail -> edge properties. An edge list is maintained. Can be exported as a dictionary. Attributes ---------- node : `list` of `dict` List of node data edge : `dict` of {`int`: `dict` of {`int`: `dict`}} Mapping from head to tail of edge, holding edge data edge_list : `list` of `tuple` of (`int`, `int`) List of head and tail of each edge Examples ------- .. jupyter-execute:: from graphtransliterator import DirectedGraph DirectedGraph() """ __slots__ = "edge", "node", "edge_list" def __init__(self, node=None, edge=None, edge_list=None): self.node = node if node else [] self.edge = edge if edge else {} if edge: if not edge_list: edge_list = [ tuple([head_key, tail_key]) for head_key, _ in edge.items() for tail_key in _.keys() ] self.edge_list = sorted(edge_list) # sorts for string comparison else: self.edge_list = []
[docs] def add_edge(self, head, tail, edge_data=None): """ Add an edge to a graph and return its attributes as dict. Parameters ---------- head: `int` Index of head of edge tail: `int` Index of tail of edge edge_data: `dict`, default {} Edge data Returns ------- dict Data of created edge Raises ------ ValueError Invalid ``head`` or ``tail``, or ``edge_data`` is not a dict. Examples -------- .. jupyter-execute:: g = DirectedGraph() g.add_node() .. jupyter-execute:: g.add_node() .. jupyter-execute:: g.add_edge(0,1, {'data_key_1': 'some edge data here'}) .. jupyter-execute:: g.edge """ if edge_data is None: edge_data = {} if type(head) is not int: raise ValueError("Edge head is not an integer: %s." % head) if type(tail) is not int: raise ValueError("Edge tail is not an integer: %s." % tail) if head < 0 or head >= len(self.node): raise ValueError("Head index of edge not in graph: %s." % head) if tail < 0 or tail >= len(self.node): raise ValueError("Tail index of edge not in graph: %s." % tail) if type(edge_data) is not dict: raise ValueError("Edge data is not a dict: %s." % edge_data) if head not in self.edge: self.edge[head] = {} self.edge[head][tail] = edge_data self.edge_list.append(tuple([head, tail])) return self.edge[head][tail]
[docs] def add_node(self, node_data=None): """Create node and return (`int`, `dict`) of node key and object. Parameters ---------- node_data: `dict`, default {} Data to be stored in created node Returns ------- `tuple` of (`int`, `dict`) Index of created node and its data Raises ------ ValueError ``node_data`` is not a ``dict`` Examples -------- .. jupyter-execute:: g = DirectedGraph() g.add_node() .. jupyter-execute:: g.add_node({'datakey1': 'data value'}) .. jupyter-execute:: g.node """ if node_data is None: node_data = {} if type(node_data) is not dict: raise ValueError("node_data must be a dict: %s" % node_data) node_key = len(self.node) self.node.append(node_data) return node_key, self.node[node_key]
class VisitLoggingList(UserList): """Visit logging list.""" def __init__(self, initlist=None): super().__init__(initlist) self.visited = set() def __getitem__(self, i): self.visited.add(i) return self.data[i] def clear_visited(self): self.visited.clear() class VisitLoggingDict(UserDict): """Visit logging dict.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.visited = set() def __getitem__(self, i): self.visited.add(i) return self.data[i] def clear_visited(self): self.visited.clear()
[docs] class VisitLoggingDirectedGraph(DirectedGraph): """A DirectedGraph that logs visits to all nodes and edges. Used to measure the coverage of tests for bundled transliterators.""" def _add_visit_logging(self): self.node = VisitLoggingList(self.node) # Edges are stored {HEAD: {TAIL: {DATA}} # We only care about {TAIL: DATA} for checking coverage. # First, convert all {TAIL: {DATA}} to VisitLoggingDict # (This avoids settings {HEAD: {...}} to visited for head in self.edge.keys(): self.edge[head] = VisitLoggingDict(self.edge[head]) # Second, convert {HEAD: {...}} to VisitLoggingDict self.edge = VisitLoggingDict(self.edge) def __init__(self, graph): """Initialize from existing graph.""" super().__init__(edge=graph.edge, node=graph.node, edge_list=graph.edge_list) self._add_visit_logging()
[docs] def clear_visited(self): """Clear all visited attributes on nodes and edges.""" self.node.visited.clear() for head in self.edge.keys(): self.edge[head].clear_visited() self.edge.clear_visited()
[docs] def check_coverage(self, raise_exception=True): """Checks that all nodes and edges are visited. Parameters ---------- raise_exception: `bool`, default Raise IncompleteGraphCoverageException (default, `True`) Returns ------- Raises ------ IncompleteGraphCoverageException Not all nodes/edges of a graph have been visited.""" errors = False # Check node coverage missed_nodes = [] for node_id in range(len(self.node)): if node_id not in self.node.visited: errors = True missed_nodes.append(node_id) node_data = self.node.data[node_id] # access data so not set to visited logger.warning( "Node {} [{}] has not been visited.".format(node_id, node_data) ) # Check edge coverage missed_edges = [] for head in self.edge.keys(): for tail in self.edge.data[head].keys(): # Access data to not mark visited if tail not in self.edge.data[head].visited: errors = True missed_edges.append((head, tail)) logger.warning( "Edge ({},{}) [{}] has not been visited.".format( head, tail, self.edge.data[head].data[tail] ) ) # Add a comprehensives error message. if errors and raise_exception: error_msgs = [] if missed_nodes: error_msgs.append("Missed nodes: " + str(missed_nodes)) if missed_edges: error_msgs.append("Missed edges: " + str(missed_edges)) raise IncompleteGraphCoverageException("; ".join(error_msgs)) okay = not errors return okay