Skip to content

Commit bd91ce6

Browse files
committed
cleaning
1 parent d642034 commit bd91ce6

6 files changed

Lines changed: 8 additions & 24 deletions

File tree

python/mlcroissant/mlcroissant/_src/core/optional.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,6 @@ def librosa(cls) -> types.ModuleType: # pylint: disable=invalid-name
9191
"""Cached librosa module."""
9292
return _try_import("librosa", package_name="librosa")
9393

94-
@cached_class_property
95-
def orjson(cls) -> types.ModuleType: # pylint: disable=invalid-name
96-
"""Cached orjson module."""
97-
return _try_import("orjson", package_name="orjson")
98-
9994
@cached_class_property
10095
def scipy(cls) -> types.ModuleType: # pylint: disable=invalid-name
10196
"""Cached scipy module."""

python/mlcroissant/mlcroissant/_src/operation_graph/operations/parse_json.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,6 @@
99
from mlcroissant._src.structure_graph.nodes.field import Field
1010
from mlcroissant._src.structure_graph.nodes.source import FileProperty
1111

12-
try:
13-
orjson = deps.orjson
14-
except ModuleNotFoundError:
15-
orjson = None
16-
1712

1813
def _unwrap_single_item(value: Any) -> Any:
1914
"""Unwraps a single-item list to its value, or returns the value as is."""
@@ -99,7 +94,7 @@ def parse(self, fh: TextIO) -> pd.DataFrame:
9994
"""
10095
# Load entire JSON file (could be a list or a single dict).
10196
raw = fh.read()
102-
data = orjson.loads(raw) if orjson else json.loads(raw)
97+
data = json.loads(raw)
10398

10499
# Always treat as list of records.
105100
records = data if isinstance(data, list) else [data]
@@ -137,7 +132,7 @@ def raw(self, fh: TextIO) -> pd.DataFrame:
137132
"""
138133
# Raw JSON fallback: one‐cell DataFrame.
139134
raw = fh.read()
140-
content = orjson.loads(raw) if orjson else json.loads(raw)
135+
content = json.loads(raw)
141136
return pd.DataFrame({FileProperty.content: [content]})
142137

143138

@@ -183,9 +178,9 @@ def __init__(self, fields, validate_fhir: bool = False):
183178

184179
# Add FHIR validator if needed
185180
if validate_fhir:
186-
from mlcroissant._src.operation_graph.operations.fhir_validator import FhirValidator
181+
from mlcroissant._src.operation_graph.operations import fhir_validator
187182

188-
self.fhir_validator = FhirValidator(validate_fhir=True)
183+
self.fhir_validator = fhir_validator.FhirValidator(validate_fhir=True)
189184
else:
190185
self.fhir_validator = None
191186

@@ -211,7 +206,7 @@ def parse(self, fh):
211206
line = line.strip()
212207
if not line:
213208
continue
214-
rec = orjson.loads(line) if orjson else json.loads(line)
209+
rec = json.loads(line)
215210
# Optional FHIR validation
216211
if self.fhir_validator:
217212
rec = self.fhir_validator.validate_resource(rec)

python/mlcroissant/mlcroissant/_src/operation_graph/operations/parse_json_test.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,16 +56,14 @@ def test_jsonreader_parse():
5656

5757

5858
def test_jsonreader_parse_deep():
59-
import orjson
60-
6159
# Test nested JSONPath ($.level1.level2[*].value)
6260
field = create_test_field(
6361
source=Source(extract=Extract(json_path="$.level1.level2[*].value"))
6462
)
6563
fields = (field,)
6664
json_obj = {"level1": {"level2": [{"value": 100}, {"value": 200}]}}
6765
expected_df = pd.DataFrame({"$.level1.level2[*].value": [[100, 200]]})
68-
raw_str = orjson.dumps(json_obj).decode("utf-8")
66+
raw_str = json.dumps(json_obj)
6967
fh = io.StringIO(raw_str)
7068
reader = JsonReader(fields=fields)
7169
df = reader.parse(fh)

python/mlcroissant/mlcroissant/_src/operation_graph/operations/read.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,7 @@ def _read_file_content(
188188
and os.getenv("CROISSANT_VALIDATE_FHIR", "").lower()
189189
in ("1", "true")
190190
)
191-
reader = JsonlReader(
192-
self.fields, validate_fhir=validate_fhir
193-
)
191+
reader = JsonlReader(self.fields, validate_fhir=validate_fhir)
194192
if reading_method == ReadingMethod.JSON:
195193
return reader.parse(file)
196194
return reader.raw(file)

python/mlcroissant/mlcroissant/_src/structure_graph/nodes/source.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
from rdflib.namespace import SDO
77

88
from mlcroissant._src.core import constants
9-
from mlcroissant._src.core.optional import deps
109
from mlcroissant._src.core import dataclasses as mlc_dataclasses
1110
from mlcroissant._src.core.context import CroissantVersion
11+
from mlcroissant._src.core.optional import deps
1212
from mlcroissant._src.core.uuid import formatted_uuid_to_json
1313
from mlcroissant._src.core.uuid import uuid_from_jsonld
1414
from mlcroissant._src.structure_graph.base_node import Node

python/mlcroissant/pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ dependencies = [
2828
"pandas",
2929
"pandas-stubs",
3030
"python-dateutil",
31-
"orjson",
3231
"rdflib",
3332
"requests",
3433
"scipy",
@@ -104,7 +103,6 @@ module = [
104103
"jsonpath_rw",
105104
"librosa",
106105
"networkx",
107-
"orjson",
108106
"pandas",
109107
"pillow",
110108
"scipy",

0 commit comments

Comments
 (0)