simplify jobs script

add script to test steps in ai-hub
2026-06-01 16:54:06 -04:00 · 2026-06-01 16:53:45 -04:00
5 changed files with 387 additions and 58 deletions
--- a/examples/ai-hub/README.md
+++ b/examples/ai-hub/README.md
@@ -0,0 +1,117 @@
+# Qualcomm AI Hub Example
+
+This example takes the ONNX model produced by the SageMaker training example and runs the Qualcomm AI Hub upload workflow:
+
+1. Quantize
+2. Compile
+3. Validate
+4. Profile
+5. Download the compiled artifact
+
+## Prerequisites
+
+Run the training example first and wait for it to complete:
+
+```bash
+bash examples/training/run_training.sh --config config.yaml --wait
+```
+
+If the dataset is already uploaded to S3, use:
+
+```bash
+bash examples/training/run_training.sh --config config.yaml --skip-upload --wait
+```
+
+The training artifact must contain a static-shape `model.onnx`. The training example exports an input named `input` with shape `1x3x160x160`.
+
+Your `config.yaml` must include AI Hub settings:
+
+```yaml
+aihub:
+  device: Samsung Galaxy S25 (Family)
+  target_runtime: tflite
+  input_specs:
+    input: [[1, 3, 160, 160], float32]
+  output_dir: build/qai-hub
+```
+
+You also need local Qualcomm AI Hub SDK authentication configured.
+
+## Prepare Inputs
+
+AI Hub does not consume the raw JPG training images directly. It needs NumPy tensors that match the ONNX model input shape and preprocessing.
+
+Generate calibration and validation inputs:
+
+```bash
+uv run python examples/ai-hub/prepare_inputs.py
+```
+
+This writes:
+
+```text
+examples/training/data/aihub_calibration/*.npy
+examples/training/data/inputs.npz
+```
+
+The script applies the same image preprocessing used by the training example:
+
+- resize to `160x160`
+- convert to channel-first `1x3x160x160`
+- normalize with ImageNet mean and standard deviation
+
+Useful options:
+
+```bash
+uv run python examples/ai-hub/prepare_inputs.py \
+  --dataset-dir examples/training/data/flower_photos_sagemaker \
+  --calibration-dir examples/training/data/aihub_calibration \
+  --input-file examples/training/data/inputs.npz \
+  --samples 16
+```
+
+## Run AI Hub
+
+After training completes and inputs are prepared:
+
+```bash
+bash examples/ai-hub/run_ai_hub.sh --config config.yaml
+```
+
+By default, the script uses the last SageMaker training job recorded in `.qc-cli.json`. It downloads that job's `model.tar.gz`, extracts `model.onnx`, runs the AI Hub workflow, and downloads the compiled artifact.
+
+To use a specific training job:
+
+```bash
+bash examples/ai-hub/run_ai_hub.sh \
+  --config config.yaml \
+  --from-job qc-cli-YYYYMMDD-HHMMSS
+```
+
+To resume from a later Workbench step:
+
+```bash
+bash examples/ai-hub/run_ai_hub.sh \
+  --config config.yaml \
+  --from-step validate
+```
+
+To skip downloading the compiled artifact:
+
+```bash
+bash examples/ai-hub/run_ai_hub.sh \
+  --config config.yaml \
+  --skip-download
+```
+
+## Troubleshooting
+
+If AI Hub reports dynamic input shapes, rerun training with the current training source. AI Hub quantization requires the exported ONNX model to use static input shapes.
+
+If `run_ai_hub.sh` reports missing calibration or input files, run:
+
+```bash
+uv run python examples/ai-hub/prepare_inputs.py
+```
+
+If validation fails with a missing input name, make sure `config.yaml` and the generated `.npz` both use `input` as the input name.
--- a/examples/ai-hub/prepare_inputs.py
+++ b/examples/ai-hub/prepare_inputs.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+"""Prepare Qualcomm AI Hub calibration and validation inputs for the training example."""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import numpy as np
+from PIL import Image
+
+
+IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png"}
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--dataset-dir",
+        type=Path,
+        default=Path("examples/training/data/flower_photos_sagemaker"),
+        help="ImageFolder-style dataset used for training.",
+    )
+    parser.add_argument(
+        "--calibration-dir",
+        type=Path,
+        default=Path("examples/training/data/aihub_calibration"),
+        help="Directory where .npy calibration samples will be written.",
+    )
+    parser.add_argument(
+        "--input-file",
+        type=Path,
+        default=Path("examples/training/data/inputs.npz"),
+        help="Validation .npz input file for qc-cli ai-hub validate.",
+    )
+    parser.add_argument("--input-name", default="input", help="ONNX input name.")
+    parser.add_argument("--image-size", type=int, default=160, help="Square image size used by training.")
+    parser.add_argument("--samples", type=int, default=16, help="Number of calibration samples to write.")
+    return parser.parse_args()
+
+
+def preprocess_image(path: Path, image_size: int) -> np.ndarray:
+    image = Image.open(path).convert("RGB").resize((image_size, image_size), Image.Resampling.BILINEAR)
+    array = np.asarray(image, dtype=np.float32) / 255.0
+    array = np.transpose(array, (2, 0, 1))
+    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)[:, None, None]
+    std = np.array([0.229, 0.224, 0.225], dtype=np.float32)[:, None, None]
+    return ((array - mean) / std)[None, ...].astype("float32")
+
+
+def main() -> None:
+    args = parse_args()
+    images = sorted(p for p in args.dataset_dir.rglob("*") if p.suffix.lower() in IMAGE_EXTENSIONS)
+    if not images:
+        raise SystemExit(f"No images found under {args.dataset_dir}")
+    if args.samples < 1:
+        raise SystemExit("--samples must be at least 1")
+
+    args.calibration_dir.mkdir(parents=True, exist_ok=True)
+    args.input_file.parent.mkdir(parents=True, exist_ok=True)
+
+    sample_count = min(args.samples, len(images))
+    prepared = []
+    for index, image_path in enumerate(images[:sample_count]):
+        sample = preprocess_image(image_path, args.image_size)
+        np.save(args.calibration_dir / f"sample_{index:03d}.npy", sample)
+        prepared.append(sample)
+
+    np.savez(args.input_file, **{args.input_name: prepared[0]})
+    print(f"Wrote {sample_count} calibration samples to {args.calibration_dir}")
+    print(f"Wrote validation input to {args.input_file}")
+
+
+if __name__ == "__main__":
+    main()
--- a/examples/ai-hub/run_ai_hub.sh
+++ b/examples/ai-hub/run_ai_hub.sh
@@ -0,0 +1,156 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CONFIG_PATH="config.yaml"
+CALIBRATION_PATH="examples/training/data/aihub_calibration"
+INPUT_FILE="examples/training/data/inputs.npz"
+FROM_STEP="quantize"
+FROM_JOB=""
+MODEL_S3_URI=""
+ONNX_PATH=""
+INPUT_NAME=""
+DOWNLOAD=true
+OUTPUT_PATH=""
+
+usage() {
+  cat <<EOF
+Usage: $0 [options]
+
+Options:
+  --config PATH              Path to qc-cli config file. Default: config.yaml
+  --calibration PATH         Calibration .npz file or directory of .npy samples.
+                              Default: ${CALIBRATION_PATH}
+  --input-file PATH          Validation .npz or .npy inputs. Default: ${INPUT_FILE}
+  --from-step STEP           Resume upload from: quantize, compile, validate, profile.
+                              Default: ${FROM_STEP}
+  --from-job NAME            SageMaker training job whose model artifact should upload.
+                              Defaults to the last training job in local qc-cli state.
+  --model-s3-uri URI         S3 URI of model.tar.gz to upload.
+  --onnx-path PATH           Local ONNX path or ONNX path inside extracted artifact.
+  --input-name NAME          Input name for .npy validation files.
+  --skip-download            Do not download the compiled AI Hub artifact after upload.
+  --output PATH              Destination file for ai-hub download.
+  -h, --help                 Show this help.
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --config)
+      CONFIG_PATH="$2"
+      shift 2
+      ;;
+    --calibration)
+      CALIBRATION_PATH="$2"
+      shift 2
+      ;;
+    --input-file)
+      INPUT_FILE="$2"
+      shift 2
+      ;;
+    --from-step)
+      FROM_STEP="$2"
+      shift 2
+      ;;
+    --from-job)
+      FROM_JOB="$2"
+      shift 2
+      ;;
+    --model-s3-uri)
+      MODEL_S3_URI="$2"
+      shift 2
+      ;;
+    --onnx-path)
+      ONNX_PATH="$2"
+      shift 2
+      ;;
+    --input-name)
+      INPUT_NAME="$2"
+      shift 2
+      ;;
+    --skip-download)
+      DOWNLOAD=false
+      shift
+      ;;
+    --output)
+      OUTPUT_PATH="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "Unknown option: $1" >&2
+      usage >&2
+      exit 1
+      ;;
+  esac
+done
+
+if [[ ! -f "${CONFIG_PATH}" ]]; then
+  echo "Config not found: ${CONFIG_PATH}" >&2
+  exit 1
+fi
+
+case "${FROM_STEP}" in
+  quantize|compile|validate|profile)
+    ;;
+  *)
+    echo "--from-step must be one of: quantize, compile, validate, profile" >&2
+    exit 1
+    ;;
+esac
+
+if [[ ! -e "${CALIBRATION_PATH}" ]]; then
+  echo "Calibration path not found: ${CALIBRATION_PATH}" >&2
+  echo "Pass --calibration with a .npz file or directory of .npy samples." >&2
+  exit 1
+fi
+
+if [[ ! -f "${INPUT_FILE}" ]]; then
+  echo "Input file not found: ${INPUT_FILE}" >&2
+  echo "Pass --input-file with a validation .npz or .npy file." >&2
+  exit 1
+fi
+
+run() {
+  echo "+ $*"
+  "$@"
+}
+
+UPLOAD_ARGS=(
+  "${CALIBRATION_PATH}"
+  "${INPUT_FILE}"
+  --from-step "${FROM_STEP}"
+  --config "${CONFIG_PATH}"
+)
+
+if [[ -n "${FROM_JOB}" ]]; then
+  UPLOAD_ARGS+=(--from-job "${FROM_JOB}")
+fi
+
+if [[ -n "${MODEL_S3_URI}" ]]; then
+  UPLOAD_ARGS+=(--model-s3-uri "${MODEL_S3_URI}")
+fi
+
+if [[ -n "${ONNX_PATH}" ]]; then
+  UPLOAD_ARGS+=(--onnx-path "${ONNX_PATH}")
+fi
+
+if [[ -n "${INPUT_NAME}" ]]; then
+  UPLOAD_ARGS+=(--input-name "${INPUT_NAME}")
+fi
+
+run uv run qc-cli ai-hub upload "${UPLOAD_ARGS[@]}"
+
+if [[ "${DOWNLOAD}" == false ]]; then
+  exit 0
+fi
+
+DOWNLOAD_ARGS=(--config "${CONFIG_PATH}")
+if [[ -n "${OUTPUT_PATH}" ]]; then
+  DOWNLOAD_ARGS+=(--output "${OUTPUT_PATH}")
+fi
+
+run uv run qc-cli ai-hub download "${DOWNLOAD_ARGS[@]}"
--- a/examples/training/source/train.py
+++ b/examples/training/source/train.py
@@ -126,10 +126,6 @@ def export_onnx(model: nn.Module, model_dir: Path, image_size: int) -> None:
        do_constant_folding=True,
        input_names=["input"],
        output_names=["logits"],
-        dynamic_axes={
-            "input": {0: "batch_size"},
-            "logits": {0: "batch_size"},
-        },
    )


--- a/src/qualcomm/aihub_jobs.py
+++ b/src/qualcomm/aihub_jobs.py
@@ -1,32 +1,26 @@
 from pathlib import Path
-from typing import Any
+from typing import Any, TypedDict
+
+import qai_hub.hub as hub
+from qai_hub.client import CompileJob, Device, InferenceJob, Model, ProfileJob, QuantizeDtype, QuantizeJob


-def _hub() -> Any:
-    import qai_hub as hub
-
-    return hub
+class ModelJobResult(TypedDict):
+    job: CompileJob | QuantizeJob
+    job_id: str
+    model: Model
+    model_id: str


-def _id(obj: Any) -> str:
-    for attr in ("model_id", "job_id", "id"):
-        value = getattr(obj, attr, None)
-        if value:
-            return str(value)
-    return str(obj)
+class InferenceJobResult(TypedDict):
+    job: InferenceJob
+    job_id: str
+    outputs: Any


-def _target_model(job: Any) -> Any:
-    if hasattr(job, "get_target_model"):
-        return job.get_target_model()
-    model = getattr(job, "target_model", None)
-    if model is not None:
-        return model
-    return job
-
-
-def get_model(model_id: str) -> Any:
-    return _hub().get_model(model_id)
+class ProfileJobResult(TypedDict):
+    job: ProfileJob
+    job_id: str


 def _dataset_entries(inputs: dict[str, Any]) -> dict[str, list[Any]]:
@@ -41,8 +35,7 @@ def submit_compile_job(
    options: str | None = None,
    job_name: str | None = None,
    model_name: str | None = None,
-) -> dict[str, Any]:
-    hub = _hub()
+) -> ModelJobResult:
    compile_options = f"--target_runtime {target_runtime}"
    if options:
        compile_options = f"{compile_options} {options}"
@@ -52,22 +45,22 @@ def submit_compile_job(
        model_arg = str(model)
    elif isinstance(model, str):
        candidate = Path(model)
-        model_arg = model if candidate.exists() or candidate.suffix else get_model(model)
+        model_arg = model if candidate.exists() or candidate.suffix else hub.get_model(model)

    if model_name and isinstance(model_arg, str) and Path(model_arg).exists():
        model_arg = hub.upload_model(model_arg, name=model_name)

    job = hub.submit_compile_job(
        model=model_arg,
-        device=hub.Device(device_name),
+        device=Device(device_name),
        name=job_name,
        input_specs=input_specs,
        options=compile_options,
    )
-    target_model = _target_model(job)
+    target_model = job.get_target_model()
    if target_model is None:
-        raise RuntimeError(f"Compile job {_id(job)} did not produce a target model.")
-    return {"job": job, "job_id": _id(job), "model": target_model, "model_id": _id(target_model)}
+        raise RuntimeError(f"Compile job {job.job_id} did not produce a target model.")
+    return {"job": job, "job_id": str(job.job_id), "model": target_model, "model_id": str(target_model.model_id)}


 def submit_inference_job(
@@ -76,18 +69,17 @@ def submit_inference_job(
    inputs: dict[str, Any],
    output_dir: str | Path,
    job_name: str | None = None,
-) -> dict[str, Any]:
-    hub = _hub()
+) -> InferenceJobResult:
    job = hub.submit_inference_job(
-        model=get_model(model_id),
-        device=hub.Device(device_name),
+        model=hub.get_model(model_id),
+        device=Device(device_name),
        inputs=_dataset_entries(inputs),
        name=job_name,
    )
    out = Path(output_dir)
    out.mkdir(parents=True, exist_ok=True)
    data = job.download_output_data(str(out))
-    return {"job": job, "job_id": _id(job), "outputs": data}
+    return {"job": job, "job_id": str(job.job_id), "outputs": data}


 def submit_profile_job(
@@ -95,15 +87,14 @@ def submit_profile_job(
    device_name: str,
    options: str | None = None,
    job_name: str | None = None,
-) -> dict[str, Any]:
-    hub = _hub()
+) -> ProfileJobResult:
    job = hub.submit_profile_job(
-        model=get_model(model_id),
-        device=hub.Device(device_name),
+        model=hub.get_model(model_id),
+        device=Device(device_name),
        name=job_name,
        options=options or "",
    )
-    return {"job": job, "job_id": _id(job)}
+    return {"job": job, "job_id": str(job.job_id)}


 def submit_quantize_job(
@@ -112,33 +103,27 @@ def submit_quantize_job(
    options: str | None = None,
    job_name: str | None = None,
    model_name: str | None = None,
-) -> dict[str, Any]:
-    hub = _hub()
+) -> ModelJobResult:
    model_arg = str(model)
    if model_name and Path(model_arg).exists():
        model_arg = hub.upload_model(model_arg, name=model_name)
    job = hub.submit_quantize_job(
        model=model_arg,
        calibration_data=_dataset_entries(calibration_data),
-        weights_dtype=hub.QuantizeDtype.INT8,
-        activations_dtype=hub.QuantizeDtype.INT8,
+        weights_dtype=QuantizeDtype.INT8,
+        activations_dtype=QuantizeDtype.INT8,
        name=job_name,
        options=options or "",
    )
-    target_model = _target_model(job)
+    target_model = job.get_target_model()
    if target_model is None:
-        raise RuntimeError(f"Quantize job {_id(job)} did not produce a target model.")
-    return {"job": job, "job_id": _id(job), "model": target_model, "model_id": _id(target_model)}
+        raise RuntimeError(f"Quantize job {job.job_id} did not produce a target model.")
+    return {"job": job, "job_id": str(job.job_id), "model": target_model, "model_id": str(target_model.model_id)}


 def download_model(model_id: str, output_path: str | Path) -> str:
    dest = Path(output_path)
    dest.parent.mkdir(parents=True, exist_ok=True)
-    model = get_model(model_id)
-    if hasattr(model, "download"):
+    model = hub.get_model(model_id)
    result = model.download(str(dest))
    return str(result or dest)
-    if hasattr(model, "download_model"):
-        result = model.download_model(str(dest))
-        return str(result or dest)
-    raise RuntimeError("AI Hub model object does not expose a download method.")
Author	SHA1	Message	Date
slalom	b411be7904	simplify jobs script	2026-06-01 16:54:06 -04:00
slalom	090be14a6a	add script to test steps in ai-hub	2026-06-01 16:53:45 -04:00