Skip to content

Commit f85e71c

Browse files
committed
Release version 1.0.16
- Multi-provider AI support (OpenAI, OpenRouter, Ollama, LM Studio, Perplexity) - Structured AI output with JSON Schema - Flexible AI configuration via env vars, config files, and CLI args - Enhanced error handling and code quality improvements - Fixed AI response parsing and service initialization bugs
1 parent 8936d50 commit f85e71c

3 files changed

Lines changed: 156 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [1.0.16] - 2025-12-12
11+
1012
### Added
13+
- **Multi-provider AI support**: Added support for OpenAI, OpenRouter, Ollama, LM Studio, and Perplexity APIs
14+
- **Structured AI output**: Replaced fragile text parsing with JSON Schema-based structured output for reliable AI responses
15+
- **Flexible AI configuration**: Support for environment variables, config files (`undatum.yaml` or `~/.undatum/config.yaml`), and CLI arguments with proper precedence
16+
- **AI provider factory**: New `get_ai_service()` function for easy provider instantiation
17+
- **Enhanced error handling**: Proper exception classes (`AIServiceError`, `AIConfigurationError`, `AIAPIError`) with clear error messages
18+
- **CLI arguments for AI**: Added `--ai-provider`, `--ai-model`, and `--ai-base-url` options to `analyze` command
19+
- **Configuration management**: New `undatum/ai/config.py` module for unified configuration handling
20+
- **Backward compatibility**: Old `get_fields_info()` and `get_description()` functions maintained for compatibility
1121
- Enhanced code quality improvements and Pylint score improvements
1222
- Better error handling and resource management
1323

1424
### Changed
25+
- **AI system refactoring**: Completely refactored AI documentation system from Perplexity-only to multi-provider architecture
26+
- **Structured responses**: All AI providers now use JSON Schema (`response_format: json_object`) instead of parsing CSV from markdown code blocks
27+
- **Provider architecture**: Implemented abstract base class `AIService` with concrete provider implementations
1528
- Improved code quality: fixed indentation, trailing whitespace, and formatting issues
1629
- Refactored file operations to use `with` statements for better resource management
1730
- Updated string formatting to use f-strings and lazy logging
1831
- Fixed dangerous default arguments in function signatures
1932
- Improved type hints and code documentation
33+
- Updated `analyze` command to accept AI provider configuration
34+
- Updated `schemer` command to use new AI service interface
2035

2136
### Fixed
2237
- Fixed critical bug: added missing `_process_json_data` function in analyzer module
@@ -26,6 +41,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2641
- Fixed dictionary iteration patterns (removed unnecessary `.keys()` calls)
2742
- Fixed `isinstance()` calls to use tuple syntax for better performance
2843
- Improved file handling with proper context managers
44+
- **Fixed fragile AI response parsing**: Replaced error-prone text extraction with proper JSON parsing
45+
- **Fixed AI service initialization**: Added proper error handling and fallback when AI service fails to initialize
2946

3047
## [1.0.15] - 2024-XX-XX
3148

@@ -99,7 +116,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99116
### Added
100117
- First public release on PyPI and updated github code
101118

102-
[Unreleased]: https://github.com/datacoon/undatum/compare/v1.0.15...HEAD
119+
[Unreleased]: https://github.com/datacoon/undatum/compare/v1.0.16...HEAD
120+
[1.0.16]: https://github.com/datacoon/undatum/compare/v1.0.15...v1.0.16
103121
[1.0.15]: https://github.com/datacoon/undatum/compare/v1.0.14...v1.0.15
104122
[1.0.14]: https://github.com/datacoon/undatum/compare/v1.0.13...v1.0.14
105123
[1.0.13]: https://github.com/datacoon/undatum/compare/v1.0.12...v1.0.13

pyproject.toml

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
[build-system]
2+
requires = ["setuptools>=65.5.0", "wheel"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
name = "undatum"
7+
version = "1.0.16"
8+
description = "A powerful command-line tool for data processing and analysis"
9+
readme = "README.md"
10+
requires-python = ">=3.8"
11+
license = {text = "MIT"}
12+
authors = [
13+
{name = "Ivan Begtin", email = "ivan@begtin.tech"}
14+
]
15+
keywords = ["json", "jsonl", "csv", "bson", "cli", "dataset", "data-processing"]
16+
classifiers = [
17+
"Development Status :: 5 - Production/Stable",
18+
"Programming Language :: Python",
19+
"Programming Language :: Python :: 3 :: Only",
20+
"Programming Language :: Python :: 3.8",
21+
"Environment :: Console",
22+
"Intended Audience :: Developers",
23+
"Intended Audience :: System Administrators",
24+
"License :: OSI Approved :: BSD License",
25+
"Topic :: Software Development",
26+
"Topic :: System :: Networking",
27+
"Topic :: Terminals",
28+
"Topic :: Text Processing",
29+
"Topic :: Utilities"
30+
]
31+
32+
[project.scripts]
33+
undatum = "undatum.__main__:main"
34+
data = "undatum.__main__:main"
35+
36+
[tool.black]
37+
line-length = 100
38+
target-version = ['py38']
39+
include = '\.pyi?$'
40+
extend-exclude = '''
41+
/(
42+
# directories
43+
\.eggs
44+
| \.git
45+
| \.hg
46+
| \.mypy_cache
47+
| \.tox
48+
| \.venv
49+
| build
50+
| dist
51+
)/
52+
'''
53+
54+
[tool.ruff]
55+
line-length = 100
56+
target-version = "py38"
57+
select = [
58+
"E", # pycodestyle errors
59+
"W", # pycodestyle warnings
60+
"F", # pyflakes
61+
"I", # isort
62+
"B", # flake8-bugbear
63+
"C4", # flake8-comprehensions
64+
"UP", # pyupgrade
65+
]
66+
ignore = [
67+
"E501", # line too long (handled by black)
68+
"B008", # do not perform function calls in argument defaults
69+
"C901", # too complex
70+
]
71+
72+
[tool.ruff.isort]
73+
known-first-party = ["undatum"]
74+
75+
[tool.mypy]
76+
python_version = "3.8"
77+
warn_return_any = true
78+
warn_unused_configs = true
79+
disallow_untyped_defs = false
80+
disallow_incomplete_defs = false
81+
check_untyped_defs = true
82+
no_implicit_optional = true
83+
warn_redundant_casts = true
84+
warn_unused_ignores = true
85+
warn_no_return = true
86+
ignore_missing_imports = true
87+
88+
[[tool.mypy.overrides]]
89+
module = [
90+
"chardet.*",
91+
"click.*",
92+
"dictquery.*",
93+
"jsonlines.*",
94+
"openpyxl.*",
95+
"orjson.*",
96+
"pandas.*",
97+
"pymongo.*",
98+
"qddate.*",
99+
"tabulate.*",
100+
"validators.*",
101+
"xlrd.*",
102+
"xmltodict.*",
103+
"rich.*",
104+
"duckdb.*",
105+
"pyzstd.*",
106+
"pydantic.*",
107+
"typer.*",
108+
]
109+
ignore_missing_imports = true
110+
111+
[tool.pylint.messages_control]
112+
max-line-length = 100
113+
disable = [
114+
"too-few-public-methods",
115+
"too-many-arguments",
116+
"too-many-locals",
117+
"too-many-branches",
118+
"too-many-statements",
119+
"too-many-instance-attributes",
120+
"too-many-positional-arguments",
121+
]
122+
123+
[tool.pytest.ini_options]
124+
testpaths = ["tests"]
125+
python_files = ["test_*.py", "*_test.py"]
126+
python_classes = ["Test*"]
127+
python_functions = ["test_*"]
128+
addopts = "-v --tb=short"

undatum/__init__.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
# -*- coding: utf8 -*-
2-
"""
3-
undatum: a command-line tool for data processing. Brings CSV simplicity to JSON lines and BSON
4-
5-
"""
6-
7-
__version__ = "1.0.15"
8-
__author__ = 'Ivan Begtin'
9-
__licence__ = 'MIT'
1+
# -*- coding: utf8 -*-
2+
"""
3+
undatum: a command-line tool for data processing. Brings CSV simplicity to JSON lines and BSON
4+
5+
"""
6+
7+
__version__ = "1.0.16"
8+
__author__ = 'Ivan Begtin'
9+
__licence__ = 'MIT'

0 commit comments

Comments
 (0)