#!/usr/bin/env bash set -euo pipefail CONFIG_PATH="config.yaml" DATASET_DIR="examples/training/data/flower_photos_sagemaker" WAIT=false SKIP_UPLOAD=false POLL_SECONDS=60 usage() { cat <&2 usage >&2 exit 1 ;; esac done if [[ ! -f "${CONFIG_PATH}" ]]; then echo "Config not found: ${CONFIG_PATH}" >&2 exit 1 fi if [[ "${SKIP_UPLOAD}" == false && ! -d "${DATASET_DIR}" ]]; then echo "Dataset not found: ${DATASET_DIR}" >&2 echo "Run: bash examples/training/download_flower_photos.sh" >&2 exit 1 fi run() { echo "+ $*" "$@" } run uv run qc-cli infra status --config "${CONFIG_PATH}" if [[ "${SKIP_UPLOAD}" == false ]]; then run uv run qc-cli upload "${DATASET_DIR}" --config "${CONFIG_PATH}" fi TRAIN_OUTPUT_FILE="$(mktemp)" trap 'rm -f "${TRAIN_OUTPUT_FILE}"' EXIT run uv run qc-cli train start --config "${CONFIG_PATH}" | tee "${TRAIN_OUTPUT_FILE}" JOB_NAME="$(grep -Eo 'qc-cli-[0-9]{8}-[0-9]{6}' "${TRAIN_OUTPUT_FILE}" | tail -n 1)" if [[ -z "${JOB_NAME}" ]]; then echo "Could not find training job name in qc-cli output." >&2 exit 1 fi echo "Submitted SageMaker training job: ${JOB_NAME}" if [[ "${WAIT}" == false ]]; then run uv run qc-cli train status "${JOB_NAME}" --config "${CONFIG_PATH}" exit 0 fi while true; do STATUS_OUTPUT="$(uv run qc-cli train status "${JOB_NAME}" --config "${CONFIG_PATH}")" echo "${STATUS_OUTPUT}" if printf '%s\n' "${STATUS_OUTPUT}" | grep -q 'Status:.*Completed'; then echo "Training completed successfully." exit 0 fi if printf '%s\n' "${STATUS_OUTPUT}" | grep -q 'Status:.*Failed'; then echo "Training failed." >&2 exit 1 fi if printf '%s\n' "${STATUS_OUTPUT}" | grep -q 'Status:.*Stopped'; then echo "Training stopped." >&2 exit 1 fi sleep "${POLL_SECONDS}" done