Skip to content

Commit 44fb4bb

Browse files
missing file added
1 parent 4a0104b commit 44fb4bb

1 file changed

Lines changed: 64 additions & 0 deletions

File tree

src/tangle.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Tangle class to encapsulate all tangle-related data and reduce function parameters.
4+
"""
5+
6+
from typing import Optional, Dict, Set, List, Tuple
7+
import networkx as nx
8+
from .node_id_mapper import NodeIdMapper
9+
10+
11+
class Tangle:
12+
"""
13+
Encapsulates all information about a tangle region in an assembly graph.
14+
15+
A tangle represents a complex region in the assembly graph that requires
16+
specialized traversal to resolve structural variants or repetitive sequences.
17+
18+
Core structural attributes (initialized in __init__):
19+
nodes (set): Oriented node IDs within the tangle (includes RC nodes as negatives)
20+
nor_nodes (set): Non-oriented/absolute node IDs (nodes without orientation)
21+
boundary_nodes (dict): Maps incoming boundary nodes → matching outgoing boundary nodes
22+
original_graph (nx.DiGraph): Original GFA graph with node attributes
23+
dual_graph (nx.DiGraph): Dual graph where nodes=junctions, edges=original nodes
24+
node_id_mapper (NodeIdMapper): Bidirectional mapping between node IDs and string names
25+
26+
Solution/derived attributes (added progressively during processing):
27+
cleaned_tips (list): Tip nodes removed during pre-processing
28+
coverage_dict (dict): Node ID → coverage value mapping
29+
median_unique_coverage (float): Estimated coverage for unique nodes
30+
coverage_range (tuple/list): [low, high] coverage bounds for unique nodes
31+
multiplicities (dict): Node ID → multiplicity value (from MIP solution)
32+
multi_graph (nx.MultiDiGraph): Multiplied dual graph after MIP solution
33+
detected_coverage (float): Final calculated unique coverage from MIP
34+
"""
35+
36+
def __init__(self, nodes, nor_nodes, boundary_nodes, original_graph, dual_graph, node_id_mapper):
37+
"""
38+
Initialize a Tangle with core structural information.
39+
40+
Args:
41+
nodes (set): Oriented node IDs within the tangle
42+
nor_nodes (set): Non-oriented/absolute node IDs
43+
boundary_nodes (dict): Maps incoming → outgoing boundary nodes
44+
original_graph (nx.DiGraph): Original GFA graph
45+
dual_graph (nx.DiGraph): Dual graph representation
46+
node_id_mapper (NodeIdMapper): Node ID to name mapper
47+
"""
48+
# Core structural data
49+
self.nodes: Set[int] = nodes
50+
self.nor_nodes: Set[int] = nor_nodes
51+
self.boundary_nodes: Dict[int, int] = boundary_nodes
52+
self.original_graph: nx.DiGraph = original_graph
53+
self.dual_graph: nx.DiGraph = dual_graph
54+
self.node_id_mapper: NodeIdMapper = node_id_mapper
55+
56+
# Solution/derived attributes (to be set later)
57+
# These will be populated progressively as the pipeline executes
58+
self.cleaned_tips: List[int] = []
59+
self.coverage_dict: Dict[int, float] = {}
60+
self.median_unique_coverage: Optional[float] = None
61+
self.coverage_range: Optional[List[float]] = None
62+
self.multiplicities: Dict[int, int] = {}
63+
self.multi_graph: Optional[nx.MultiDiGraph] = None
64+
self.detected_coverage: Optional[float] = None

0 commit comments

Comments
 (0)