41 lines
1.2 KiB
Bash
Executable File
41 lines
1.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
DATASET_URL="https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
|
|
DEST_DIR="${1:-examples/training/data}"
|
|
ARCHIVE_PATH="${DEST_DIR}/flower_photos.tgz"
|
|
RAW_DATASET_DIR="${DEST_DIR}/flower_photos"
|
|
DATASET_DIR="${DEST_DIR}/flower_photos_sagemaker"
|
|
CLASS_NAMES=("daisy" "dandelion" "roses" "sunflowers" "tulips")
|
|
|
|
mkdir -p "${DEST_DIR}"
|
|
|
|
if [[ -d "${DATASET_DIR}" ]]; then
|
|
echo "Dataset already exists: ${DATASET_DIR}"
|
|
echo "Use this path with run_training.py:"
|
|
echo " ${DATASET_DIR}"
|
|
exit 0
|
|
fi
|
|
|
|
echo "Downloading TensorFlow flower_photos dataset..."
|
|
if command -v curl >/dev/null 2>&1; then
|
|
curl -L "${DATASET_URL}" -o "${ARCHIVE_PATH}"
|
|
elif command -v wget >/dev/null 2>&1; then
|
|
wget -O "${ARCHIVE_PATH}" "${DATASET_URL}"
|
|
else
|
|
echo "Either curl or wget is required." >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "Extracting dataset..."
|
|
tar -xzf "${ARCHIVE_PATH}" -C "${DEST_DIR}"
|
|
|
|
echo "Preparing SageMaker directory layout..."
|
|
mkdir -p "${DATASET_DIR}"
|
|
for class_name in "${CLASS_NAMES[@]}"; do
|
|
cp -R "${RAW_DATASET_DIR}/${class_name}" "${DATASET_DIR}/${class_name}"
|
|
done
|
|
|
|
echo "Dataset ready: ${DATASET_DIR}"
|
|
find "${DATASET_DIR}" -mindepth 1 -maxdepth 1 -type d -print | sort
|