1111jobs :
1212 quality :
1313 runs-on : ubuntu-latest
14- timeout-minutes : 30
14+ timeout-minutes : 60
1515
1616 steps :
1717 - name : Checkout
1818 uses : actions/checkout@v4
19+ with :
20+ fetch-depth : 0
1921
2022 - name : Setup Java 21
2123 uses : actions/setup-java@v4
@@ -29,18 +31,315 @@ jobs:
2931 with :
3032 python-version : " 3.11"
3133
34+ - name : Setup Node 20
35+ uses : actions/setup-node@v4
36+ with :
37+ node-version : " 20"
38+ cache : npm
39+ cache-dependency-path : |
40+ web/datapillar-studio/package-lock.json
41+ web/datapillar-web-site/package-lock.json
42+
43+ - name : Detect changed files
44+ id : changed
45+ shell : bash
46+ run : |
47+ set -euo pipefail
48+
49+ if [[ "${{ github.event_name }}" == "pull_request" ]]; then
50+ base_sha="${{ github.event.pull_request.base.sha }}"
51+ else
52+ base_sha="${{ github.event.before }}"
53+ fi
54+ head_sha="${{ github.sha }}"
55+
56+ if [[ -z "$base_sha" || "$base_sha" =~ ^0+$ ]]; then
57+ base_sha="$(git rev-list --max-parents=0 HEAD)"
58+ fi
59+
60+ mapfile -t changed_files < <(git diff --name-only "$base_sha" "$head_sha")
61+ if [[ ${#changed_files[@]} -eq 0 ]]; then
62+ mapfile -t changed_files < <(git ls-files)
63+ fi
64+
65+ printf '%s\n' "${changed_files[@]}" > /tmp/changed_files.txt
66+
67+ echo "base_sha=$base_sha" >> "$GITHUB_OUTPUT"
68+ echo "head_sha=$head_sha" >> "$GITHUB_OUTPUT"
69+
70+ {
71+ echo "files<<EOF"
72+ cat /tmp/changed_files.txt
73+ echo "EOF"
74+ } >> "$GITHUB_OUTPUT"
75+
76+ if grep -q '^datapillar-gravitino/' /tmp/changed_files.txt; then
77+ echo "has_gravitino=true" >> "$GITHUB_OUTPUT"
78+ else
79+ echo "has_gravitino=false" >> "$GITHUB_OUTPUT"
80+ fi
81+
82+ if grep -Eq '^web/datapillar-studio/' /tmp/changed_files.txt; then
83+ echo "has_web_studio=true" >> "$GITHUB_OUTPUT"
84+ else
85+ echo "has_web_studio=false" >> "$GITHUB_OUTPUT"
86+ fi
87+
88+ if grep -Eq '^web/datapillar-web-site/' /tmp/changed_files.txt; then
89+ echo "has_web_site=true" >> "$GITHUB_OUTPUT"
90+ else
91+ echo "has_web_site=false" >> "$GITHUB_OUTPUT"
92+ fi
93+
94+ if grep -Eq '^(datapillar-ai|datapillar-oneagentic|datapillar-airflow-plugin)/.*\.py$' /tmp/changed_files.txt; then
95+ echo "has_python_changes=true" >> "$GITHUB_OUTPUT"
96+ else
97+ echo "has_python_changes=false" >> "$GITHUB_OUTPUT"
98+ fi
99+
32100 - name : Validate local scripts syntax
33101 run : |
34102 bash -n scripts/start-local-all.sh
35103 bash -n scripts/stop-local-all.sh
104+ bash -n scripts/check-gravitino-rpc-contract-consistency.sh
105+ bash -n datapillar-gravitino/scripts/merge-overlay-conf.sh
106+ bash -n datapillar-gravitino/scripts/check-overlay-decoupling.sh
107+
108+ - name : Gravitino RPC contract guards
109+ run : |
110+ bash scripts/check-gravitino-rpc-contract-consistency.sh
36111
37- - name : Python syntax smoke check (datapillar-ai)
38- run : python -m compileall -q datapillar-ai/src
112+ - name : Gravitino decoupling guards
113+ run : |
114+ cd datapillar-gravitino
115+ ./scripts/check-overlay-decoupling.sh
39116
40- - name : Maven test-compile (core modules)
117+ - name : Self-module naming and weak-exception gate
118+ env :
119+ BASE_SHA : ${{ steps.changed.outputs.base_sha }}
120+ HEAD_SHA : ${{ steps.changed.outputs.head_sha }}
121+ run : |
122+ python3 - <<'PY'
123+ import ast
124+ import os
125+ import re
126+ import subprocess
127+ from pathlib import Path
128+
129+ changed_files = [
130+ line.strip()
131+ for line in Path('/tmp/changed_files.txt').read_text(encoding='utf-8').splitlines()
132+ if line.strip()
133+ ]
134+
135+ self_prefixes = (
136+ 'datapillar-ai/',
137+ 'datapillar-oneagentic/',
138+ 'datapillar-airflow-plugin/',
139+ 'datapillar-common/',
140+ 'datapillar-auth/',
141+ 'datapillar-studio-service/',
142+ 'datapillar-api-gateway/',
143+ 'datapillar-openlineage/',
144+ 'datapillar-distribution/',
145+ 'web/datapillar-studio/',
146+ 'web/datapillar-web-site/',
147+ )
148+
149+ def in_scope(path: str) -> bool:
150+ if not path.startswith(self_prefixes):
151+ return False
152+ if path.startswith('datapillar-gravitino/'):
153+ return False
154+ if '/tests/' in path or '/test/' in path or '/src/test/' in path:
155+ return False
156+ if '/.venv/' in path or '/.uv-cache/' in path:
157+ return False
158+ return True
159+
160+ java_pattern = re.compile(
161+ r'^(?:\s*@.*\n)*\s*(?:public|protected|private|default)\s+(?:static\s+)?(?:final\s+)?(?:synchronized\s+)?(?:<[^>]+>\s+)?[A-Za-z0-9_<>,\[\].?]+\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(',
162+ re.M,
163+ )
164+ py_except = re.compile(r'\bexcept\s+Exception(?:\s+as\s+\w+)?\s*:')
165+ java_catch = re.compile(r'\bcatch\s*\(\s*Exception\s+\w+\s*\)')
166+
167+ redline_hits = []
168+ review_hits = []
169+ weak_hits = []
170+
171+ for path in changed_files:
172+ if not in_scope(path):
173+ continue
174+ file_path = Path(path)
175+ if not file_path.exists():
176+ continue
177+ suffix = file_path.suffix
178+ if suffix not in {'.py', '.java'}:
179+ continue
180+
181+ source = file_path.read_text(encoding='utf-8', errors='ignore')
182+
183+ if suffix == '.py':
184+ try:
185+ tree = ast.parse(source, filename=path)
186+ except Exception:
187+ continue
188+ for node in ast.walk(tree):
189+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
190+ continue
191+ name = node.name
192+ length = len(name)
193+ line = getattr(node, 'lineno', 1)
194+ if length > 32:
195+ redline_hits.append(f'{path}:{line}:{name} ({length})')
196+ elif 25 <= length <= 32:
197+ review_hits.append(f'{path}:{line}:{name} ({length})')
198+ else:
199+ for match in java_pattern.finditer(source):
200+ name = match.group(1)
201+ length = len(name)
202+ line = source.count('\n', 0, match.start()) + 1
203+ if length > 32:
204+ redline_hits.append(f'{path}:{line}:{name} ({length})')
205+ elif 25 <= length <= 32:
206+ review_hits.append(f'{path}:{line}:{name} ({length})')
207+
208+ diff = subprocess.run(
209+ ['git', 'diff', '--unified=0', '--no-color', os.environ['BASE_SHA'], os.environ['HEAD_SHA'], '--', path],
210+ capture_output=True,
211+ text=True,
212+ check=False,
213+ ).stdout.splitlines()
214+
215+ for line in diff:
216+ if not line.startswith('+') or line.startswith('+++'):
217+ continue
218+ payload = line[1:]
219+ if py_except.search(payload):
220+ weak_hits.append(
221+ f'{path}:New except Exception, need specific exception handling.'
222+ )
223+ if java_catch.search(payload):
224+ weak_hits.append(
225+ f'{path}:New catch(Exception), need specific exception handling.'
226+ )
227+
228+ if review_hits:
229+ print('Naming review tips (25-32):')
230+ for item in sorted(set(review_hits)):
231+ print(item)
232+
233+ if redline_hits:
234+ print('Naming gate failed: function/method name length must not exceed 32.')
235+ for item in sorted(set(redline_hits)):
236+ print(item)
237+ raise SystemExit(1)
238+
239+ if weak_hits:
240+ print('Exception handling gate failed: weak exception handling mode is forbidden.')
241+ for item in sorted(set(weak_hits)):
242+ print(item)
243+ raise SystemExit(1)
244+ PY
245+
246+ - name : Install Python quality tools
247+ if : steps.changed.outputs.has_python_changes == 'true'
248+ run : |
249+ python -m pip install --upgrade pip
250+ pip install ruff black
251+
252+ - name : Frontend lockfile and artifact gate
253+ if : steps.changed.outputs.has_web_studio == 'true' || steps.changed.outputs.has_web_site == 'true'
254+ run : |
255+ python3 - <<'PY'
256+ from pathlib import Path
257+ import subprocess
258+
259+ modules = ["web/datapillar-studio", "web/datapillar-web-site"]
260+ errors: list[str] = []
261+
262+ for module in modules:
263+ lock_candidates = [
264+ Path(module) / "package-lock.json",
265+ Path(module) / "pnpm-lock.yaml",
266+ Path(module) / "yarn.lock",
267+ ]
268+ existing = [str(path) for path in lock_candidates if path.exists()]
269+ if len(existing) != 1:
270+ errors.append(
271+ f"{module}:There must be exactly one lock file(package-lock/pnpm-lock/yarn.lock), current={existing}"
272+ )
273+
274+ tracked_files = subprocess.run(
275+ ["git", "ls-files"],
276+ capture_output=True,
277+ text=True,
278+ check=True,
279+ ).stdout.splitlines()
280+ tracked_tsbuildinfo = [
281+ path
282+ for path in tracked_files
283+ if path.endswith(".tsbuildinfo")
284+ and (
285+ path.startswith("web/datapillar-studio/")
286+ or path.startswith("web/datapillar-web-site/")
287+ )
288+ ]
289+ if tracked_tsbuildinfo:
290+ errors.append(
291+ f"Frontend build artifacts must not be tracked: {tracked_tsbuildinfo}"
292+ )
293+
294+ if errors:
295+ print("Frontend gate failed:")
296+ for item in errors:
297+ print(item)
298+ raise SystemExit(1)
299+ PY
300+
301+ - name : Python quality checks (self modules)
302+ if : steps.changed.outputs.has_python_changes == 'true'
303+ run : |
304+ set -euo pipefail
305+ mapfile -t py_files < <(grep -E '^(datapillar-ai|datapillar-oneagentic|datapillar-airflow-plugin)/.*\.py$' /tmp/changed_files.txt \
306+ | grep -Ev '/tests?/|/\.venv/|/\.uv-cache/' \
307+ || true)
308+ if [[ ${#py_files[@]} -eq 0 ]]; then
309+ echo "No changed python source files, skip."
310+ exit 0
311+ fi
312+ ruff check "${py_files[@]}"
313+ black --check "${py_files[@]}"
314+ python -m py_compile "${py_files[@]}"
315+
316+ - name : Maven test (self modules)
41317 run : |
42318 mvn -q \
43- -pl datapillar-common,datapillar-auth,datapillar-studio-service,datapillar-api-gateway,datapillar-openlineage \
319+ -pl datapillar-common,datapillar-auth,datapillar-studio-service,datapillar-api-gateway,datapillar-openlineage,datapillar-distribution \
44320 -am \
45- -DskipTests \
46- test-compile
321+ test
322+
323+ - name : Frontend gate (datapillar-studio)
324+ if : steps.changed.outputs.has_web_studio == 'true'
325+ run : |
326+ cd web/datapillar-studio
327+ npm ci
328+ npm run lint
329+ npm run type-check
330+ npm run test
331+
332+ - name : Frontend gate (datapillar-web-site)
333+ if : steps.changed.outputs.has_web_site == 'true'
334+ run : |
335+ cd web/datapillar-web-site
336+ npm ci
337+ npm run lint
338+ npm run type-check
339+ npm run test
340+
341+ - name : Gravitino native gate
342+ if : steps.changed.outputs.has_gravitino == 'true'
343+ run : |
344+ cd datapillar-gravitino
345+ ./gradlew check
0 commit comments