|
|
|
@@ -42,11 +42,14 @@ def run_doc_parser(docx_path: str, output_dir: str) -> str | None:
|
|
|
|
|
|
|
|
|
|
print(f"[1/3] Parsing document: {docx_path}")
|
|
|
|
|
result = parse_document(docx_path, output_dir, dry_run=False)
|
|
|
|
|
parsed_path = result.get("output")
|
|
|
|
|
if parsed_path and os.path.isfile(parsed_path):
|
|
|
|
|
# parse_document returns {source, sections, image_sources, image_analysis}
|
|
|
|
|
# Output is saved as <basename>_parsed.json in output_dir
|
|
|
|
|
basename = os.path.splitext(os.path.basename(docx_path))[0]
|
|
|
|
|
parsed_path = os.path.join(output_dir, f"{basename}_parsed.json")
|
|
|
|
|
if os.path.isfile(parsed_path):
|
|
|
|
|
print(f" → {parsed_path}")
|
|
|
|
|
return parsed_path
|
|
|
|
|
print(" ✗ doc_parser failed to produce output", file=sys.stderr)
|
|
|
|
|
print(f" [FAIL] doc_parser output not found: {parsed_path}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -55,10 +58,11 @@ def run_doc_parser(docx_path: str, output_dir: str) -> str | None:
|
|
|
|
|
|
|
|
|
|
def run_ir_pipeline(parsed_path: str) -> str | None:
|
|
|
|
|
"""Run the ir_generation steps. Returns path to ir_final.json or None."""
|
|
|
|
|
config.set_input_file(parsed_path)
|
|
|
|
|
os.makedirs(config.PROJECT_OUTPUT, exist_ok=True)
|
|
|
|
|
os.makedirs(config.IR_OUTPUT, exist_ok=True)
|
|
|
|
|
os.makedirs(config.FINAL_OUTPUT, exist_ok=True)
|
|
|
|
|
env = os.environ.copy()
|
|
|
|
|
env["IR_INPUT_JSON"] = parsed_path
|
|
|
|
|
|
|
|
|
|
steps = [
|
|
|
|
|
("step1_semantic_index.py", "Semantic Index"),
|
|
|
|
@@ -72,7 +76,7 @@ def run_ir_pipeline(parsed_path: str) -> str | None:
|
|
|
|
|
for script, label in steps:
|
|
|
|
|
script_path = PROJECT_ROOT / "skills" / "ir_generation_skill" / script
|
|
|
|
|
if not script_path.exists():
|
|
|
|
|
print(f" ✗ Missing: {script}", file=sys.stderr)
|
|
|
|
|
print(f" [FAIL] Missing: {script}", file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
print(f" Running {script} ({label})...")
|
|
|
|
@@ -80,28 +84,29 @@ def run_ir_pipeline(parsed_path: str) -> str | None:
|
|
|
|
|
[sys.executable, str(script_path)],
|
|
|
|
|
cwd=str(PROJECT_ROOT),
|
|
|
|
|
capture_output=True, text=True,
|
|
|
|
|
env=env,
|
|
|
|
|
)
|
|
|
|
|
if result.returncode != 0:
|
|
|
|
|
print(f" ✗ {script} failed (exit {result.returncode})", file=sys.stderr)
|
|
|
|
|
print(f" [FAIL] {script} failed (exit {result.returncode})", file=sys.stderr)
|
|
|
|
|
print(result.stderr[-500:], file=sys.stderr)
|
|
|
|
|
else:
|
|
|
|
|
# Print last line of stdout for brief progress
|
|
|
|
|
lines = result.stdout.strip().split("\n")
|
|
|
|
|
last = lines[-1] if lines else "done"
|
|
|
|
|
print(f" ✓ {label}: {last[:120]}")
|
|
|
|
|
print(f" [OK] {label}: {last[:120]}")
|
|
|
|
|
|
|
|
|
|
if os.path.isfile(config.IR_FINAL_JSON):
|
|
|
|
|
print(f" → {config.IR_FINAL_JSON}")
|
|
|
|
|
return config.IR_FINAL_JSON
|
|
|
|
|
|
|
|
|
|
print(" ✗ IR generation did not produce ir_final.json", file=sys.stderr)
|
|
|
|
|
print(" [FAIL] IR generation did not produce ir_final.json", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Stage 3: Acceptance Tests ────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_acceptance_tests() -> int:
|
|
|
|
|
def run_acceptance_tests(parsed_json_path: str) -> int:
|
|
|
|
|
"""Run QE acceptance tests. Returns pytest exit code."""
|
|
|
|
|
print("[3/3] Running QE acceptance tests...")
|
|
|
|
|
|
|
|
|
@@ -111,7 +116,7 @@ def run_acceptance_tests() -> int:
|
|
|
|
|
sys.executable, "-m", "pytest", str(test_dir),
|
|
|
|
|
"-v", "--run-acceptance",
|
|
|
|
|
"--ir-path", config.IR_FINAL_JSON,
|
|
|
|
|
"--parsed-path", config.INPUT_JSON,
|
|
|
|
|
"--parsed-path", parsed_json_path,
|
|
|
|
|
"--tb=short",
|
|
|
|
|
],
|
|
|
|
|
cwd=str(PROJECT_ROOT),
|
|
|
|
@@ -141,7 +146,7 @@ def main():
|
|
|
|
|
out_dir = args.output_dir or str(PROJECT_ROOT / "output")
|
|
|
|
|
parsed_path = run_doc_parser(docx, out_dir)
|
|
|
|
|
if not parsed_path:
|
|
|
|
|
print("\n✗ Pipeline blocked at Stage 1 (doc_parser)", file=sys.stderr)
|
|
|
|
|
print("\n[FAIL] Pipeline blocked at Stage 1 (doc_parser)", file=sys.stderr)
|
|
|
|
|
# Create tracking issue for dev-agent
|
|
|
|
|
_maybe_create_blocking_issue("doc_parser", f"Input: {docx}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
@@ -157,15 +162,15 @@ def main():
|
|
|
|
|
# Stage 2: IR generation
|
|
|
|
|
ir_path = run_ir_pipeline(parsed_path)
|
|
|
|
|
if not ir_path:
|
|
|
|
|
print("\n✗ Pipeline blocked at Stage 2 (ir_generation)", file=sys.stderr)
|
|
|
|
|
print("\n[FAIL] Pipeline blocked at Stage 2 (ir_generation)", file=sys.stderr)
|
|
|
|
|
_maybe_create_blocking_issue("ir_generation", f"Parsed: {parsed_path}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
print(f"\n✓ Pipeline complete: {ir_path}")
|
|
|
|
|
print(f"\n[OK] Pipeline complete: {ir_path}")
|
|
|
|
|
|
|
|
|
|
# Stage 3: Acceptance tests
|
|
|
|
|
if args.test:
|
|
|
|
|
exit_code = run_acceptance_tests()
|
|
|
|
|
exit_code = run_acceptance_tests(parsed_path)
|
|
|
|
|
sys.exit(exit_code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|