-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtransform.py
More file actions
32 lines (25 loc) · 1.36 KB
/
transform.py
File metadata and controls
32 lines (25 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
from typing import Optional
class Transform:
"""Parent class for transforms, that sets up a lot of default file info
"""
DEFAULT_INPUT_DIR = os.path.join('data', 'raw')
DEFAULT_OUTPUT_DIR = os.path.join('data', 'transformed')
def __init__(self, source_name, input_dir: str = None, output_dir: str = None, nlp: bool = False):
# default columns, can be appended to or overwritten as necessary
self.source_name = source_name
self.node_header = ['id', 'name', 'category']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by']
# default dirs
self.input_base_dir = input_dir if input_dir else self.DEFAULT_INPUT_DIR
self.output_base_dir = output_dir if output_dir else self.DEFAULT_OUTPUT_DIR
self.output_dir = os.path.join(self.output_base_dir, source_name)
# default filenames
self.output_node_file = os.path.join(self.output_dir, "nodes.tsv")
self.output_edge_file = os.path.join(self.output_dir, "edges.tsv")
self.output_json_file = os.path.join(self.output_dir, "nodes_edges.json")
self.subset_terms_file = os.path.join(self.input_base_dir,"subset_terms.tsv")
os.makedirs(self.output_dir, exist_ok=True)
def run(self, data_file: Optional[str] = None):
pass