|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Tangle class to encapsulate all tangle-related data and reduce function parameters. |
| 4 | +""" |
| 5 | + |
| 6 | +from typing import Optional, Dict, Set, List, Tuple |
| 7 | +import networkx as nx |
| 8 | +from .node_id_mapper import NodeIdMapper |
| 9 | + |
| 10 | + |
| 11 | +class Tangle: |
| 12 | + """ |
| 13 | + Encapsulates all information about a tangle region in an assembly graph. |
| 14 | + |
| 15 | + A tangle represents a complex region in the assembly graph that requires |
| 16 | + specialized traversal to resolve structural variants or repetitive sequences. |
| 17 | + |
| 18 | + Core structural attributes (initialized in __init__): |
| 19 | + nodes (set): Oriented node IDs within the tangle (includes RC nodes as negatives) |
| 20 | + nor_nodes (set): Non-oriented/absolute node IDs (nodes without orientation) |
| 21 | + boundary_nodes (dict): Maps incoming boundary nodes → matching outgoing boundary nodes |
| 22 | + original_graph (nx.DiGraph): Original GFA graph with node attributes |
| 23 | + dual_graph (nx.DiGraph): Dual graph where nodes=junctions, edges=original nodes |
| 24 | + node_id_mapper (NodeIdMapper): Bidirectional mapping between node IDs and string names |
| 25 | + |
| 26 | + Solution/derived attributes (added progressively during processing): |
| 27 | + cleaned_tips (list): Tip nodes removed during pre-processing |
| 28 | + coverage_dict (dict): Node ID → coverage value mapping |
| 29 | + median_unique_coverage (float): Estimated coverage for unique nodes |
| 30 | + coverage_range (tuple/list): [low, high] coverage bounds for unique nodes |
| 31 | + multiplicities (dict): Node ID → multiplicity value (from MIP solution) |
| 32 | + multi_graph (nx.MultiDiGraph): Multiplied dual graph after MIP solution |
| 33 | + detected_coverage (float): Final calculated unique coverage from MIP |
| 34 | + """ |
| 35 | + |
| 36 | + def __init__(self, nodes, nor_nodes, boundary_nodes, original_graph, dual_graph, node_id_mapper): |
| 37 | + """ |
| 38 | + Initialize a Tangle with core structural information. |
| 39 | + |
| 40 | + Args: |
| 41 | + nodes (set): Oriented node IDs within the tangle |
| 42 | + nor_nodes (set): Non-oriented/absolute node IDs |
| 43 | + boundary_nodes (dict): Maps incoming → outgoing boundary nodes |
| 44 | + original_graph (nx.DiGraph): Original GFA graph |
| 45 | + dual_graph (nx.DiGraph): Dual graph representation |
| 46 | + node_id_mapper (NodeIdMapper): Node ID to name mapper |
| 47 | + """ |
| 48 | + # Core structural data |
| 49 | + self.nodes: Set[int] = nodes |
| 50 | + self.nor_nodes: Set[int] = nor_nodes |
| 51 | + self.boundary_nodes: Dict[int, int] = boundary_nodes |
| 52 | + self.original_graph: nx.DiGraph = original_graph |
| 53 | + self.dual_graph: nx.DiGraph = dual_graph |
| 54 | + self.node_id_mapper: NodeIdMapper = node_id_mapper |
| 55 | + |
| 56 | + # Solution/derived attributes (to be set later) |
| 57 | + # These will be populated progressively as the pipeline executes |
| 58 | + self.cleaned_tips: List[int] = [] |
| 59 | + self.coverage_dict: Dict[int, float] = {} |
| 60 | + self.median_unique_coverage: Optional[float] = None |
| 61 | + self.coverage_range: Optional[List[float]] = None |
| 62 | + self.multiplicities: Dict[int, int] = {} |
| 63 | + self.multi_graph: Optional[nx.MultiDiGraph] = None |
| 64 | + self.detected_coverage: Optional[float] = None |
0 commit comments