command to start sagemaker training

include sample training
This commit is contained in:
2026-05-25 16:48:31 -04:00
parent 62ffe163e8
commit 0e728cc193
13 changed files with 796 additions and 5 deletions

View File

@@ -0,0 +1,40 @@
#!/usr/bin/env bash
set -euo pipefail
DATASET_URL="https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
DEST_DIR="${1:-examples/training/data}"
ARCHIVE_PATH="${DEST_DIR}/flower_photos.tgz"
RAW_DATASET_DIR="${DEST_DIR}/flower_photos"
DATASET_DIR="${DEST_DIR}/flower_photos_sagemaker"
CLASS_NAMES=("daisy" "dandelion" "roses" "sunflowers" "tulips")
mkdir -p "${DEST_DIR}"
if [[ -d "${DATASET_DIR}" ]]; then
echo "Dataset already exists: ${DATASET_DIR}"
echo "Use this path with run_training.py:"
echo " ${DATASET_DIR}"
exit 0
fi
echo "Downloading TensorFlow flower_photos dataset..."
if command -v curl >/dev/null 2>&1; then
curl -L "${DATASET_URL}" -o "${ARCHIVE_PATH}"
elif command -v wget >/dev/null 2>&1; then
wget -O "${ARCHIVE_PATH}" "${DATASET_URL}"
else
echo "Either curl or wget is required." >&2
exit 1
fi
echo "Extracting dataset..."
tar -xzf "${ARCHIVE_PATH}" -C "${DEST_DIR}"
echo "Preparing SageMaker directory layout..."
mkdir -p "${DATASET_DIR}"
for class_name in "${CLASS_NAMES[@]}"; do
cp -R "${RAW_DATASET_DIR}/${class_name}" "${DATASET_DIR}/${class_name}"
done
echo "Dataset ready: ${DATASET_DIR}"
find "${DATASET_DIR}" -mindepth 1 -maxdepth 1 -type d -print | sort