Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions roboflow/adapters/rfapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,49 @@ def _save_annotation_error(response):
return AnnotationSaveError(str(responsejson), status_code=response.status_code)


# ---------------------------------------------------------------------------
# Zip upload endpoints
# ---------------------------------------------------------------------------


def init_zip_upload(api_key, workspace_url, project_url, split=None, tags=None, batch_name=None) -> dict:
"""POST /{ws}/{proj}/upload/zip — initialize a zip upload and get a signed URL."""
url = f"{API_URL}/{workspace_url}/{project_url}/upload/zip"
body: Dict[str, Union[str, List[str]]] = {}
if split is not None:
body["split"] = split
if tags is not None:
body["tags"] = tags
if batch_name is not None:
body["batchName"] = batch_name
response = requests.post(url, params={"api_key": api_key}, json=body)
if response.status_code not in (200, 201):
raise RoboflowError(response.text)
return response.json()


def upload_zip_to_signed_url(signed_url, zip_path) -> None:
"""PUT the zip file to the GCS signed URL returned by init_zip_upload."""
with open(zip_path, "rb") as fh:
response = requests.put(
signed_url,
data=fh,
headers={"Content-Type": "application/zip"},
timeout=(60, 3600),
)
if not response.ok:
raise RoboflowError(f"Zip upload to signed URL failed ({response.status_code}): {response.text}")


def get_zip_upload_status(api_key, workspace_url, task_id) -> dict:
"""GET /{ws}/upload/zip/{task_id} — poll status of an async zip upload."""
url = f"{API_URL}/{workspace_url}/upload/zip/{task_id}"
response = requests.get(url, params={"api_key": api_key})
if response.status_code != 200:
raise RoboflowError(response.text)
return response.json()


# ---------------------------------------------------------------------------
# Phase 2: Annotation batch & job endpoints
# ---------------------------------------------------------------------------
Expand Down
36 changes: 33 additions & 3 deletions roboflow/cli/handlers/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ def upload_image(
retries: Annotated[int, typer.Option("-r", "--retries", help="Retry failed uploads N times")] = 0,
labelmap: Annotated[Optional[str], typer.Option(help="Path to labelmap file")] = None,
is_prediction: Annotated[bool, typer.Option("--is-prediction", help="Mark upload as prediction")] = False,
zip_upload: Annotated[
bool,
typer.Option("--zip-upload", help="Zip the directory client-side and use the async zip upload flow"),
] = False,
no_wait: Annotated[
bool,
typer.Option("--no-wait", help="Zip flow: return immediately with task_id instead of polling"),
] = False,
) -> None:
"""Upload an image file or import a directory."""
args = ctx_to_args(
Expand All @@ -44,6 +52,8 @@ def upload_image(
retries=retries,
labelmap=labelmap,
is_prediction=is_prediction,
zip_upload=zip_upload,
no_wait=no_wait,
)
_handle_upload(args)

Expand Down Expand Up @@ -191,7 +201,7 @@ def _handle_upload(args): # noqa: ANN001
return

path = args.path
if os.path.isdir(path):
if os.path.isdir(path) or (os.path.isfile(path) and path.lower().endswith(".zip")):
_handle_upload_directory(args, api_key, path)
elif os.path.isfile(path):
_handle_upload_single(args, api_key, path)
Expand Down Expand Up @@ -262,20 +272,40 @@ def _handle_upload_directory(args, api_key: str, path: str) -> None: # noqa: AN
return

retries = getattr(args, "retries", None) or getattr(args, "num_retries", 0) or 0
tag_raw = getattr(args, "tag", None)
tags = [t.strip() for t in tag_raw.split(",") if t.strip()] if tag_raw else None
wait = not getattr(args, "no_wait", False)

try:
workspace.upload_dataset(
result = workspace.upload_dataset(
dataset_path=path,
project_name=args.project,
num_workers=args.concurrency,
batch_name=getattr(args, "batch", None),
num_retries=retries,
is_prediction=getattr(args, "is_prediction", False),
use_zip_upload=getattr(args, "zip_upload", False),
split=getattr(args, "split", None),
tags=tags,
wait=wait,
)
except Exception as exc:
output_error(args, str(exc))
return

# Count files uploaded (approximate via image extensions)
if isinstance(result, dict):
status = result.get("status", "unknown")
data = {
"status": status,
"task_id": result.get("task_id") or result.get("taskId"),
"path": path,
"project": args.project,
"result": result,
}
output(args, data, text=f"Imported {path} to {args.project} (zip upload, status={status})")
return

# Per-image fallback — count files via image extensions
count = 0
image_exts = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"}
for root, _dirs, files in os.walk(path):
Expand Down
123 changes: 115 additions & 8 deletions roboflow/core/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import json
import os
import sys
import tempfile
import time
import zipfile
from typing import Any, Dict, Generator, List, Optional

import requests
Expand Down Expand Up @@ -298,35 +300,94 @@
batch_name=None,
num_retries=0,
is_prediction=False,
):
*,
use_zip_upload: bool = False,
tags: Optional[List[str]] = None,
split: Optional[str] = None,
wait: bool = True,
poll_interval: float = 5.0,
poll_timeout: float = 3600.0,
) -> Optional[dict]:
"""
Upload a dataset to Roboflow.

A `.zip` ``dataset_path`` or ``use_zip_upload=True`` routes to the
server's async zip upload flow. Everything else (directory inputs by
default) keeps the legacy per-image flow.

Args:
dataset_path (str): path to the dataset
dataset_path (str): path to the dataset directory or a `.zip` file.
project_name (str): name of the project
num_workers (int): number of workers to use for parallel uploads
num_workers (int): number of workers to use for parallel uploads (per-image flow only)
dataset_format (str): format of the dataset (`voc`, `yolov8`, `yolov5`)
project_license (str): license of the project (set to `private` for private projects, only available for paid customers)
project_type (str): type of the project (only `object-detection` is supported)
batch_name (str, optional): name of the batch to upload the images to. Defaults to an automatically generated value.
num_retries (int, optional): number of times to retry uploading an image if the upload fails. Defaults to 0.
is_prediction (bool, optional): whether the annotations provided in the dataset are predictions and not ground truth. Defaults to False.
""" # noqa: E501 // docs
from roboflow.util import folderparser
from roboflow.util.image_utils import load_labelmap
use_zip_upload (bool, optional): opt-in to the zip flow for a directory input (the SDK zips it client-side). Ignored when dataset_path is already a `.zip`.
tags (list[str], optional): zip flow only — tags to apply to the uploaded batch.
split (str, optional): zip flow only — dataset split for the uploaded batch.
wait (bool, optional): zip flow only — poll for processing completion. Defaults to True.
poll_interval (float, optional): zip flow only — seconds between status polls.
poll_timeout (float, optional): zip flow only — total seconds to wait before timing out.

Returns:
dict | None: zip flow returns the final/pending status dict; per-image flow returns None.
""" # noqa: E501 // docs
if dataset_format != "NOT_USED":
print("Warning: parameter 'dataset_format' is deprecated and will be removed in a future release")
project, created = self._get_or_create_project(
project_id=project_name, license=project_license, type=project_type
)
is_classification = project.type == "classification"
parsed_dataset = folderparser.parsefolder(dataset_path, is_classification=is_classification)
if created:
print(f"Created project {project.id}")

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
else:
print(f"Uploading to existing project {project.id}")

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.

is_zip_file = dataset_path.lower().endswith(".zip") and os.path.isfile(dataset_path)
use_zip_flow = is_zip_file or use_zip_upload
if use_zip_flow and is_prediction:
raise RoboflowError(
"Zip upload flow does not support is_prediction=True. "
"Call upload_dataset without use_zip_upload for prediction uploads."
)

if use_zip_flow:
project_slug = project.id.rsplit("/")[1]
temp_zip = None
try:
if dataset_path.lower().endswith(".zip") and os.path.isfile(dataset_path):
zip_path = dataset_path
else:
zip_path = temp_zip = _zip_directory(dataset_path)
print(f"Zipped {dataset_path} -> {zip_path}")

init = rfapi.init_zip_upload(
self.__api_key,
self.url,
project_slug,
split=split,
tags=tags,
batch_name=batch_name,
)
print(f"Uploading zip to Roboflow (task_id={init['taskId']})...")

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
Comment thread
digaobarbosa marked this conversation as resolved.
Dismissed
rfapi.upload_zip_to_signed_url(init["signedUrl"], zip_path)

if not wait:
print(f"Zip uploaded; not waiting for processing. task_id={init['taskId']}")

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
Comment thread
digaobarbosa marked this conversation as resolved.
Dismissed
return {"task_id": init["taskId"], "status": "pending"}

return _poll_zip_status(self.__api_key, self.url, init["taskId"], poll_interval, poll_timeout)
finally:
if temp_zip and os.path.exists(temp_zip):
os.unlink(temp_zip)

from roboflow.util import folderparser
from roboflow.util.image_utils import load_labelmap

is_classification = project.type == "classification"
parsed_dataset = folderparser.parsefolder(dataset_path, is_classification=is_classification)
images = parsed_dataset["images"]

location = parsed_dataset["location"]
Expand Down Expand Up @@ -434,6 +495,8 @@
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
list(executor.map(_upload, images))

return None

def _get_or_create_project(self, project_id, license: str = "MIT", type: str = "object-detection"):
try:
existing_project = self.project(project_id)
Expand Down Expand Up @@ -1271,3 +1334,47 @@
json_value = {"name": self.name, "url": self.url, "projects": projects}

return json.dumps(json_value, indent=2)


def _zip_directory(src_dir: str) -> str:
"""Zip src_dir into a temp file, skipping hidden and macOS-junk entries."""
fd, zip_path = tempfile.mkstemp(suffix=".zip", prefix="roboflow-upload-")
os.close(fd)
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(src_dir):
dirs[:] = [d for d in dirs if not d.startswith(".") and d != "__MACOSX"]
for name in files:
if name.startswith(".") or name == "Thumbs.db":
continue
abs_path = os.path.join(root, name)
rel = os.path.relpath(abs_path, src_dir)
zf.write(abs_path, arcname=rel)
return zip_path


def _poll_zip_status(
api_key: str,
workspace_url: str,
task_id: str,
poll_interval: float,
poll_timeout: float,
) -> dict:
deadline = time.monotonic() + poll_timeout
last_progress = None
while True:
status = rfapi.get_zip_upload_status(api_key, workspace_url, task_id)
state = status.get("status")
progress = (status.get("progress") or {}).get("current")
if progress is not None and progress != last_progress:
print(f" zip-upload progress: {progress}")

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
Comment thread
digaobarbosa marked this conversation as resolved.
Dismissed
last_progress = progress
if state in {"completed", "failed"}:
return status
if time.monotonic() >= deadline:
raise RoboflowError(
f"Zip upload polling timed out after {poll_timeout}s "
f"(task_id={task_id}, last_status={state}). "
f"Call Workspace.upload_dataset(..., wait=False) and poll with "
f"rfapi.get_zip_upload_status to check later."
)
time.sleep(poll_interval)
Loading
Loading