diff --git a/docs/cli.md b/docs/cli.md index 022184f..d10ee9e 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -221,3 +221,51 @@ $ flowbio samples upload-multiplexed --reads1 ./mux_R1.fastq.gz \ --annotation ./sheet.xlsx --json {"data_ids": ["mux_1"], "annotation_id": "ann_1", "warnings": []} ``` + +### `samples batch-template` + +Emit a sample-sheet template for a sample type, to fill in and feed to +`samples upload-batch`. + +``` +flowbio samples batch-template --sample-type TYPE [-o PATH | --output PATH] +``` + +Run `flowbio samples batch-template --help` for the full option list. The sample +type decides which metadata columns are marked required. It is validated against +the available types up front: an unrecognised type fails with a usage error +(exit `2`) listing the valid identifiers. + +**Sample-sheet schema** — the columns, in order: + +- The reserved columns `name`, `reads1`, `reads2`, `project`, `organism` + (`name` and `reads1` are always required; `reads1`/`reads2` are reads file + paths). +- One column per metadata attribute, keyed by its **identifier**. An attribute is + required when it is globally required or required for the chosen sample type. +- A `__annotation` companion column immediately after each attribute + that permits a free-text annotation. + +There is **no** `sample_type` column — the type is supplied via `--sample-type` +to both this command and `upload-batch`. This CSV is distinct from the annotation +sheet produced by `samples annotation-template`. + +**Output** — human: the CSV header row on stdout (or written to `--output`), plus +a summary of required-vs-optional columns on stderr. `--json`: a per-column +descriptor list on stdout (`name`, `kind` of `reserved`/`metadata`/`annotation`, +`required`, closed-value `options` or `null`, and `description`) and **no CSV** — +so an agent can build rows directly. + +**Exit codes** — `0` success; `2` missing `--sample-type`, or an unknown sample +type (the error lists the available types); `3` authentication failure; +otherwise the standard mapping above. + +**Example** + +```bash +$ flowbio samples batch-template --sample-type rna_seq +name,reads1,reads2,project,organism,cell_type,source,source__annotation + +$ flowbio samples batch-template --sample-type rna_seq --json +[{"name": "name", "kind": "reserved", "required": true, "options": null, "description": "..."}, ...] +``` diff --git a/flowbio/cli/_data.py b/flowbio/cli/_data.py index b50d461..10987cc 100644 --- a/flowbio/cli/_data.py +++ b/flowbio/cli/_data.py @@ -30,6 +30,7 @@ def register( upload.add_argument( "path", metavar="PATH", + type=Path, help="Local file to upload.", ) upload.add_argument( @@ -58,7 +59,7 @@ def _upload_command(args: argparse.Namespace, client: Client, output: Output) -> :returns: :attr:`ExitCode.SUCCESS` on success. """ data = client.data.upload_data( - existing_file(Path(args.path)), + existing_file(args.path), filename=args.filename, data_type=args.data_type, is_directory=args.directory, diff --git a/flowbio/cli/_samples.py b/flowbio/cli/_samples.py index a415435..f9ebc41 100644 --- a/flowbio/cli/_samples.py +++ b/flowbio/cli/_samples.py @@ -2,19 +2,24 @@ A thin wrapper over :attr:`Client.samples `: it parses the command line, merges metadata supplied as ``key=value`` pairs and/or a JSON -object, calls the library, and renders the result. The ``--sample-type`` is sent -as-is and validated server-side, not pre-checked by the CLI. +object, calls the library, and renders the result. Most commands send +``--sample-type`` as-is for server-side validation; ``batch-template`` is the +exception, pre-checking the type against the available types up front. """ from __future__ import annotations import argparse import json +from dataclasses import dataclass from pathlib import Path +from typing import Literal from flowbio.cli._exit_codes import CliUsageError, ExitCode from flowbio.cli._files import existing_file from flowbio.cli._output import Output, format_issue +from flowbio.cli._types import JsonValue from flowbio.v2.client import Client +from flowbio.v2.samples import MetadataAttribute, SampleTypeId def register( @@ -46,6 +51,15 @@ def register( "server-side demultiplexing." ), )) + _configure_batch_template(verbs.add_parser( + "batch-template", + parents=[global_parent], + help="Emit a sample-sheet template for a sample type.", + description=( + "Emit a CSV sample-sheet header (or a per-column descriptor under " + "--json) for use with 'samples upload-batch'." + ), + )) def _configure_upload(upload: argparse.ArgumentParser) -> None: @@ -60,17 +74,20 @@ def _configure_upload(upload: argparse.ArgumentParser) -> None: "--sample-type", required=True, metavar="TYPE", + type=SampleTypeId, help="Sample type identifier (sent as-is; validated server-side).", ) upload.add_argument( "--reads1", required=True, metavar="PATH", + type=Path, help="First reads file.", ) upload.add_argument( "--reads2", metavar="PATH", + type=Path, help="Second reads file (makes the sample paired-end).", ) upload.add_argument( @@ -104,6 +121,7 @@ def _configure_annotation_template(annotation_template: argparse.ArgumentParser) "--sample-type", default="generic", metavar="TYPE", + type=SampleTypeId, help=( "Sample type identifier (sent as-is; validated server-side). " "Defaults to 'generic' (base columns common to all types)." @@ -114,6 +132,7 @@ def _configure_annotation_template(annotation_template: argparse.ArgumentParser) "--output", required=True, metavar="PATH", + type=Path, help="File to write the .xlsx workbook to (the template is binary).", ) @@ -126,17 +145,20 @@ def _configure_upload_multiplexed(upload_multiplexed: argparse.ArgumentParser) - "--reads1", required=True, metavar="PATH", + type=Path, help="First multiplexed reads file.", ) upload_multiplexed.add_argument( "--reads2", metavar="PATH", + type=Path, help="Second multiplexed reads file (makes the upload paired-end).", ) upload_multiplexed.add_argument( "--annotation", required=True, metavar="PATH", + type=Path, help="Completed annotation sheet (obtained via `annotation-template`).", ) upload_multiplexed.add_argument( @@ -146,6 +168,25 @@ def _configure_upload_multiplexed(upload_multiplexed: argparse.ArgumentParser) - ) +def _configure_batch_template(batch_template: argparse.ArgumentParser) -> None: + batch_template.set_defaults( + command_parser=batch_template, handler=_batch_template_command, + ) + batch_template.add_argument( + "--sample-type", + required=True, + metavar="TYPE", + type=SampleTypeId, + help="Sample type the template is built for (decides required columns).", + ) + batch_template.add_argument( + "-o", "--output", + metavar="PATH", + type=Path, + help="Write the CSV template to this file instead of stdout.", + ) + + def _upload_command(args: argparse.Namespace, client: Client, output: Output) -> ExitCode: """Upload a single sample and report its identifier. @@ -155,9 +196,9 @@ def _upload_command(args: argparse.Namespace, client: Client, output: Output) -> :returns: :attr:`ExitCode.SUCCESS` on success. """ metadata = _merge_metadata(args.metadata, args.metadata_json) - data = {"reads1": existing_file(Path(args.reads1))} + data = {"reads1": existing_file(args.reads1)} if args.reads2 is not None: - data["reads2"] = existing_file(Path(args.reads2)) + data["reads2"] = existing_file(args.reads2) sample = client.samples.upload_sample( name=args.name, sample_type=args.sample_type, @@ -180,7 +221,7 @@ def _annotation_template_command( :param output: The result/error renderer. :returns: :attr:`ExitCode.SUCCESS` on success. """ - destination = Path(args.output) + destination = args.output template = client.samples.get_annotation_template(args.sample_type) try: destination.write_bytes(template) @@ -209,12 +250,12 @@ def _upload_multiplexed_command( :param output: The result/error renderer. :returns: :attr:`ExitCode.SUCCESS` on success. """ - reads = {"reads1": existing_file(Path(args.reads1))} + reads = {"reads1": existing_file(args.reads1)} if args.reads2 is not None: - reads["reads2"] = existing_file(Path(args.reads2)) + reads["reads2"] = existing_file(args.reads2) upload = client.samples.upload_multiplexed_data( reads=reads, - annotation=existing_file(Path(args.annotation)), + annotation=existing_file(args.annotation), ignore_warnings=not args.reject_warnings, ) if upload.warnings: @@ -233,6 +274,113 @@ def _upload_multiplexed_command( return ExitCode.SUCCESS +@dataclass(frozen=True) +class _TemplateColumn: + """One column of a sample-sheet template, in CSV order.""" + + name: str + kind: Literal["reserved", "metadata", "annotation"] + required: bool + options: list[str] | None + description: str + + @property + def descriptor(self) -> dict[str, JsonValue]: + return { + "name": self.name, + "kind": self.kind, + "required": self.required, + "options": self.options, + "description": self.description, + } + + +_RESERVED_COLUMNS = ( + _TemplateColumn("name", "reserved", True, None, "Unique sample name (no spaces)."), + _TemplateColumn("reads1", "reserved", True, None, "Path to the first reads file."), + _TemplateColumn("reads2", "reserved", False, None, "Path to the second reads file (paired-end)."), + _TemplateColumn("project", "reserved", False, None, "Project identifier to assign the sample to."), + _TemplateColumn("organism", "reserved", False, None, "Organism identifier to associate with the sample."), +) + + +def _batch_template_command( + args: argparse.Namespace, client: Client, output: Output, +) -> ExitCode: + """Emit a sample-sheet template for the chosen sample type. + + :param args: Parsed command-line arguments. + :param client: The authenticated Flow client. + :param output: The result/error renderer. + :returns: :attr:`ExitCode.SUCCESS` on success. + """ + _check_sample_type(client, args.sample_type) + columns = _template_columns( + client.samples.get_metadata_attributes(), args.sample_type, + ) + header = ",".join(column.name for column in columns) + if args.output is not None: + try: + args.output.write_text(f"{header}\n") + except OSError as error: + raise CliUsageError( + f"Could not write sample-sheet template to {args.output}: {error}", + ) from error + output.emit_advisory(f"Wrote sample-sheet template to {args.output}") + if output.json_mode or args.output is None: + output.emit_result(header, [column.descriptor for column in columns]) + output.emit_advisory(_required_summary(columns)) + return ExitCode.SUCCESS + + +def _check_sample_type(client: Client, sample_type: SampleTypeId) -> None: + identifiers = [sample.identifier for sample in client.samples.get_types()] + if sample_type not in identifiers: + raise CliUsageError( + f"Unknown sample type '{sample_type}'. " + f"Available types: {', '.join(sorted(identifiers))}", + ) + + +def _template_columns( + attributes: list[MetadataAttribute], sample_type: SampleTypeId, +) -> list[_TemplateColumn]: + columns = list(_RESERVED_COLUMNS) + for attribute in attributes: + required = ( + attribute.required or sample_type in attribute.required_for_sample_types + ) + columns.append( + _TemplateColumn( + name=attribute.identifier, + kind="metadata", + required=required, + options=attribute.options, + description=attribute.description, + ), + ) + if attribute.allow_annotation: + columns.append( + _TemplateColumn( + name=f"{attribute.identifier}__annotation", + kind="annotation", + required=False, + options=None, + description=f"Free-text annotation for {attribute.identifier}.", + ), + ) + return columns + + +def _required_summary(columns: list[_TemplateColumn]) -> str: + required = [column.name for column in columns if column.required] + optional = [column.name for column in columns if not column.required] + return ( + f"Required columns: {', '.join(required)}\n" + f"Optional columns: {', '.join(optional)}" + ) + + def _merge_metadata( pairs: list[str] | None, json_text: str | None, ) -> dict[str, str]: diff --git a/flowbio/v2/__init__.py b/flowbio/v2/__init__.py index 126bca9..90e6d56 100644 --- a/flowbio/v2/__init__.py +++ b/flowbio/v2/__init__.py @@ -36,7 +36,15 @@ from flowbio.v2.client import Client, ClientConfig from flowbio.v2.data import Data from flowbio.v2.exceptions import AnnotationValidationError -from flowbio.v2.samples import MetadataAttribute, MultiplexedUpload, Organism, Project, Sample, SampleType +from flowbio.v2.samples import ( + MetadataAttribute, + MultiplexedUpload, + Organism, + Project, + Sample, + SampleType, + SampleTypeId, +) __all__ = [ "AnnotationValidationError", @@ -49,6 +57,7 @@ "Project", "Sample", "SampleType", + "SampleTypeId", "TokenCredentials", "UsernamePasswordCredentials", ] diff --git a/flowbio/v2/samples.py b/flowbio/v2/samples.py index bf9ebf2..fd888ed 100644 --- a/flowbio/v2/samples.py +++ b/flowbio/v2/samples.py @@ -28,7 +28,7 @@ from collections.abc import Sequence from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, NewType from pydantic import BaseModel, Field @@ -43,6 +43,11 @@ from flowbio.v2._uploads import ChunkedUploader +SampleTypeId = NewType("SampleTypeId", str) +"""The identifier of a sample type (e.g. ``"RNA-Seq"``), as listed by +:meth:`SampleResource.get_types`.""" + + class SampleType(BaseModel, frozen=True): """A type of sample that can be uploaded to the Flow platform. @@ -53,7 +58,7 @@ class SampleType(BaseModel, frozen=True): print(f"{st.identifier}: {st.name}") """ - identifier: str = Field(description="Unique identifier for this sample type.") + identifier: SampleTypeId = Field(description="Unique identifier for this sample type.") name: str = Field(description="Human-readable display name.") description: str = Field(description="Explanation of what this sample type represents.") @@ -74,12 +79,16 @@ class MetadataAttribute(BaseModel, frozen=True): name: str = Field(description="Human-readable display name.") description: str = Field(description="Explanation of what this attribute represents.") required: bool = Field(description="Whether this attribute is required at sample creation.") - required_for_sample_types: list[str] = Field( + required_for_sample_types: list[SampleTypeId] = Field( description="Sample type identifiers for which this attribute is required at creation.", ) options: list[str] | None = Field( description="The list of valid values, or ``None`` if any value is accepted.", ) + allow_annotation: bool = Field( + default=False, + description="Whether this attribute permits a free-text annotation companion value.", + ) class Project(BaseModel, frozen=True): @@ -146,7 +155,7 @@ def __init__(self, transport: HttpTransport, uploader: ChunkedUploader) -> None: def upload_sample( self, name: str, - sample_type: str, + sample_type: SampleTypeId, data: dict[str, Path], metadata: dict[str, str] | None = None, project_id: str | None = None, @@ -298,7 +307,9 @@ def upload_multiplexed_data( warnings=warnings, ) - def get_annotation_template(self, sample_type: str = "generic") -> bytes: + def get_annotation_template( + self, sample_type: SampleTypeId = SampleTypeId("generic"), + ) -> bytes: """Download an annotation sheet template for multiplexed uploads. Annotation sheets are spreadsheets that describe multiple samples @@ -390,7 +401,7 @@ def get_metadata_attributes(self) -> list[MetadataAttribute]: def _create_metadata_attribute(self, item: dict) -> MetadataAttribute: item["required_for_sample_types"] = [ - link["sample_type_identifier"] + SampleTypeId(link["sample_type_identifier"]) for link in item.get("sample_type_links", []) if link.get("required") ] @@ -440,7 +451,7 @@ def _ordered_files(data: dict[str, Path]) -> list[tuple[str, Path]]: @staticmethod def _build_sample_fields( name: str, - sample_type: str, + sample_type: SampleTypeId, metadata: dict[str, str] | None, project_id: str | None, organism_id: str | None, diff --git a/specs/001-flowbio-cli/contracts/samples-batch-template.md b/specs/001-flowbio-cli/contracts/samples-batch-template.md index 650c33a..8928bd1 100644 --- a/specs/001-flowbio-cli/contracts/samples-batch-template.md +++ b/specs/001-flowbio-cli/contracts/samples-batch-template.md @@ -24,7 +24,11 @@ then one column per metadata attribute, each followed by a **no** `sample_type` column. Data sourced from `client.samples.get_metadata_attributes()` (and its -`allows_annotation`, `options`, `required`, `required_for_sample_types`). +`allow_annotation`, `options`, `required`, `required_for_sample_types`). + +The `--sample-type` is validated against `client.samples.get_types()` before any +template is produced; an unrecognised type is a usage error listing the available +identifiers. ## Output @@ -43,8 +47,9 @@ Data sourced from `client.samples.get_metadata_attributes()` (and its ## Exit codes -`0` success; `2` missing `--sample-type`; `4` unknown sample type (if surfaced by -the lookup); standard mapping otherwise. +`0` success; `2` missing `--sample-type`, or an unknown sample type (validated +against `get_types()`, the error lists the available identifiers); standard +mapping otherwise. ## Acceptance mapping diff --git a/specs/001-flowbio-cli/data-model.md b/specs/001-flowbio-cli/data-model.md index 8e0e4f8..71aab4e 100644 --- a/specs/001-flowbio-cli/data-model.md +++ b/specs/001-flowbio-cli/data-model.md @@ -27,7 +27,7 @@ Existing fields: `identifier`, `name`, `description`, `required`, | Field | Type | Description | |-------|------|-------------| -| `allows_annotation` | `bool` | Whether this attribute permits a free-text annotation companion. Drives the `__annotation` columns and JSON descriptors in `batch-template`. | +| `allow_annotation` | `bool` | Whether this attribute permits a free-text annotation companion. Drives the `__annotation` columns and JSON descriptors in `batch-template`. | Rules: - Populated from the `/samples/metadata` response in `_create_metadata_attribute`. @@ -114,7 +114,7 @@ Per-row pre-flight validation (FR-028), all errors collected before any upload: - `name` contains no spaces; - values for closed-option attributes are within `options`; - a `__annotation` is set only when ``'s value is set **and** the - attribute `allows_annotation`; + attribute `allow_annotation`; - empty cells omitted (not sent as empty values). ### `BatchResult` — `flowbio/cli/_samples.py` @@ -158,7 +158,7 @@ list (no CSV): Column order: reserved columns first (`name`, `reads1`, `reads2`, `project`, `organism`), then one column per metadata attribute, each followed by its -`__annotation` column when `allows_annotation`. There is **no** +`__annotation` column when `allow_annotation`. There is **no** `sample_type` column. ### `AnnotationTemplate` result — `flowbio/cli/_samples.py` diff --git a/specs/001-flowbio-cli/tasks.md b/specs/001-flowbio-cli/tasks.md index ced85a9..b063e7f 100644 --- a/specs/001-flowbio-cli/tasks.md +++ b/specs/001-flowbio-cli/tasks.md @@ -139,14 +139,14 @@ Single-project layout (per plan.md): CLI under `flowbio/cli/` (every module `_`- ### Tests for User Story 3 (MANDATORY — write first) ⚠️ -- [ ] T025 [P] [US3] Write a failing test in `tests/unit/v2/test_samples.py` for the additive `MetadataAttribute.allows_annotation` field: defaults to `False` when the payload omits the key, and is populated from the `/samples/metadata` response (data-model.md §MetadataAttribute, FR-019/FR-024) -- [ ] T027 [P] [US3] Write failing tests in `tests/unit/cli/test_samples.py` covering US3 scenarios 1–6: CSV header of reserved columns (`name,reads1,reads2,project,organism`) then one column per metadata attribute, with `__annotation` after each annotation-enabled attribute, and no `sample_type` column; required/optional summary on stderr without `--json`; `-o/--output PATH` writes to file; `--json` per-column descriptor list (name, kind, required, options, description) with no CSV; missing `--sample-type` → exit 2 (contracts/samples-batch-template.md) +- [X] T025 [P] [US3] Write a failing test in `tests/unit/v2/test_samples.py` for the additive `MetadataAttribute.allow_annotation` field: defaults to `False` when the payload omits the key, and is populated from the `/samples/metadata` response (data-model.md §MetadataAttribute, FR-019/FR-024) +- [X] T027 [P] [US3] Write failing tests in `tests/unit/cli/test_samples.py` covering US3 scenarios 1–6: CSV header of reserved columns (`name,reads1,reads2,project,organism`) then one column per metadata attribute, with `__annotation` after each annotation-enabled attribute, and no `sample_type` column; required/optional summary on stderr without `--json`; `-o/--output PATH` writes to file; `--json` per-column descriptor list (name, kind, required, options, description) with no CSV; missing `--sample-type` → exit 2 (contracts/samples-batch-template.md) ### Implementation for User Story 3 -- [ ] T026 [US3] Implement the additive `allows_annotation: bool = False` field on `MetadataAttribute` and populate it in `_create_metadata_attribute` in `flowbio/v2/samples.py` (additive, backwards-compatible — the feature's only library change) (depends on T025) -- [ ] T028 [US3] Implement the `batch-template` handler and `BatchTemplate` descriptor in `flowbio/cli/_samples.py` (column ordering, `required` derived from `required` OR chosen type in `required_for_sample_types`, `--json` descriptors, `-o/--output` writing, summary to stderr) sourced from `client.samples.get_metadata_attributes()`, and register the subcommand via the `register()` in `flowbio/cli/_samples.py` (wired into `flowbio/cli/_parser.py`) with `help=`/description text on every argument (FR-003, SC-008) (FR-024, FR-025, FR-026) -- [ ] T029 [US3] Document `batch-template` and the sample-sheet schema (reserved columns, metadata-identifier columns, annotation companions) in `docs/cli.md` (FR-041, FR-042) +- [X] T026 [US3] Implement the additive `allow_annotation: bool = False` field on `MetadataAttribute` and populate it in `_create_metadata_attribute` in `flowbio/v2/samples.py` (additive, backwards-compatible — the feature's only library change) (depends on T025) +- [X] T028 [US3] Implement the `batch-template` handler and `BatchTemplate` descriptor in `flowbio/cli/_samples.py` (column ordering, `required` derived from `required` OR chosen type in `required_for_sample_types`, `--json` descriptors, `-o/--output` writing, summary to stderr) sourced from `client.samples.get_metadata_attributes()`, and register the subcommand via the `register()` in `flowbio/cli/_samples.py` (wired into `flowbio/cli/_parser.py`) with `help=`/description text on every argument (FR-003, SC-008) (FR-024, FR-025, FR-026) +- [X] T029 [US3] Document `batch-template` and the sample-sheet schema (reserved columns, metadata-identifier columns, annotation companions) in `docs/cli.md` (FR-041, FR-042) **Checkpoint**: Template generation works independently and defines the contract consumed by batch upload. @@ -165,7 +165,7 @@ Single-project layout (per plan.md): CLI under `flowbio/cli/` (every module `_`- ### Implementation for User Story 4 -- [ ] T031 [US4] Implement `flowbio/cli/_sheet.py`: `SampleSheet`/`SheetRow` CSV parsing, relative-path resolution, empty-cell omission, the FR-028 per-row pre-flight validation collecting all errors (using `MetadataAttribute.allows_annotation` from T026), and the non-CSV → USAGE rejection (depends on T030, T026) +- [ ] T031 [US4] Implement `flowbio/cli/_sheet.py`: `SampleSheet`/`SheetRow` CSV parsing, relative-path resolution, empty-cell omission, the FR-028 per-row pre-flight validation collecting all errors (using `MetadataAttribute.allow_annotation` from T026), and the non-CSV → USAGE rejection (depends on T030, T026) - [ ] T033 [US4] Implement the `upload-batch` handler and `BatchResult` in `flowbio/cli/_samples.py` (parse + validate all rows before any upload; `--skip-invalid`; sequential upload reusing the T018 single-sample path; default-continue vs `--stop-on-error`; exit code: all uploaded→0, pre-flight invalid without `--skip-invalid`→2, any upload failure→1) and register the subcommand via the `register()` in `flowbio/cli/_samples.py` (wired into `flowbio/cli/_parser.py`) with `help=`/description text on every argument (FR-003, SC-008) (FR-027…FR-032) - [ ] T034 [US4] Document `upload-batch` (validation behaviour, `--skip-invalid`/`--stop-on-error`, `--json` shape, worked example) in `docs/cli.md` (FR-041, FR-042) @@ -198,8 +198,8 @@ Single-project layout (per plan.md): CLI under `flowbio/cli/` (every module `_`- - **US1 (P1)**: Foundational only — first runnable command (validates the foundation). - **US2 (P2)**: Foundational only. Adds the metadata-parsing + single-sample path. - **US5 (P5)**: Foundational only. Wraps the **existing** `get_annotation_template` and `upload_multiplexed_data` methods — no library change. Independent of the other sample commands, hence sequenced before US3/US4. -- **US3 (P3)**: Foundational + the additive `MetadataAttribute.allows_annotation` field (T025/T026). Independent of US1/US2/US5. -- **US4 (P4)**: Reuses the US2 single-sample upload path (T018) and the US3 sheet contract + `allows_annotation` field (T026); needs `_sheet.py`. Strongest cross-story coupling — sequenced last. +- **US3 (P3)**: Foundational + the additive `MetadataAttribute.allow_annotation` field (T025/T026). Independent of US1/US2/US5. +- **US4 (P4)**: Reuses the US2 single-sample upload path (T018) and the US3 sheet contract + `allow_annotation` field (T026); needs `_sheet.py`. Strongest cross-story coupling — sequenced last. ### Within Each User Story @@ -244,7 +244,7 @@ Task T011: "Implement flowbio/cli/_progress.py" ### Incremental Delivery -Foundation → US1 (MVP) → US2 → US5 → US3 → US4 → Polish. US5 is sequenced ahead of US3/US4 because it is independent and needs no library change; US3 (and the additive `allows_annotation` field) precedes US4, which reuses both the single-sample path and the sheet contract. Each story is independently testable and adds value without breaking earlier ones. +Foundation → US1 (MVP) → US2 → US5 → US3 → US4 → Polish. US5 is sequenced ahead of US3/US4 because it is independent and needs no library change; US3 (and the additive `allow_annotation` field) precedes US4, which reuses both the single-sample path and the sheet contract. Each story is independently testable and adds value without breaking earlier ones. --- diff --git a/tests/unit/cli/test_samples.py b/tests/unit/cli/test_samples.py index 8364370..f14c80b 100644 --- a/tests/unit/cli/test_samples.py +++ b/tests/unit/cli/test_samples.py @@ -479,3 +479,184 @@ def test_annotation_validation_errors_in_json_error_document( assert result.exit_code == 5 assert result.stdout == "" assert json.loads(result.stderr)["errors"] == errors + + +METADATA_URL = f"{DEFAULT_BASE_URL}/samples/metadata" +RESERVED_HEADER = "name,reads1,reads2,project,organism" + + +def _mock_metadata() -> None: + respx.get(f"{DEFAULT_BASE_URL}/samples/types").mock( + return_value=httpx.Response(HTTPStatus.OK, json=[ + { + "identifier": "rna_seq", + "name": "RNA-Seq", + "description": "RNA sequencing.", + }, + ]), + ) + respx.get(METADATA_URL).mock( + return_value=httpx.Response(HTTPStatus.OK, json=[ + { + "identifier": "cell_type", + "name": "Cell Type", + "description": "The cell type", + "required": False, + "required_for_public": False, + "all_sample_types": False, + "allow_user_terms": False, + "regex_validator": None, + "has_options": True, + "allow_annotation": False, + "sample_type_links": [ + {"sample_type_identifier": "rna_seq", "required": True}, + ], + }, + { + "identifier": "source", + "name": "Source", + "description": "Sample source", + "required": False, + "required_for_public": False, + "all_sample_types": True, + "allow_user_terms": False, + "regex_validator": None, + "has_options": False, + "allow_annotation": True, + "sample_type_links": [], + }, + ]), + ) + respx.get(f"{METADATA_URL}/cell_type/options").mock( + return_value=httpx.Response( + HTTPStatus.OK, + json={"options": [{"value": "Neuron"}, {"value": "Fibroblast"}]}, + ), + ) + + +class TestSamplesBatchTemplate: + + @respx.mock + def test_csv_header_orders_reserved_then_metadata_with_annotation_companion( + self, run_cli, + ) -> None: + _mock_metadata() + + result = run_cli( + "samples", "batch-template", "--sample-type", "rna_seq", + "--token", TOKEN, + ) + + assert result.exit_code == 0 + assert result.stdout.strip() == ( + f"{RESERVED_HEADER},cell_type,source,source__annotation" + ) + assert "sample_type" not in result.stdout + + @respx.mock + def test_summary_of_required_columns_on_stderr_without_json( + self, run_cli, + ) -> None: + _mock_metadata() + + result = run_cli( + "samples", "batch-template", "--sample-type", "rna_seq", + "--token", TOKEN, + ) + + assert "cell_type" in result.stderr + assert "source" in result.stderr + + @respx.mock + def test_output_flag_writes_csv_to_file(self, run_cli, tmp_path: Path) -> None: + _mock_metadata() + destination = tmp_path / "template.csv" + + result = run_cli( + "samples", "batch-template", "--sample-type", "rna_seq", + "-o", str(destination), "--token", TOKEN, + ) + + assert result.exit_code == 0 + assert destination.read_text().splitlines()[0] == ( + f"{RESERVED_HEADER},cell_type,source,source__annotation" + ) + assert "cell_type" not in result.stdout + + @respx.mock + def test_json_emits_column_descriptors_and_no_csv(self, run_cli) -> None: + _mock_metadata() + + result = run_cli( + "samples", "batch-template", "--sample-type", "rna_seq", + "--json", "--token", TOKEN, + ) + + assert result.exit_code == 0 + descriptors = json.loads(result.stdout) + assert result.stdout.count("\n") == 1 + by_name = {column["name"]: column for column in descriptors} + assert by_name["name"]["kind"] == "reserved" + assert by_name["name"]["required"] is True + assert by_name["cell_type"]["kind"] == "metadata" + assert by_name["cell_type"]["required"] is True + assert by_name["cell_type"]["options"] == ["Neuron", "Fibroblast"] + assert by_name["source__annotation"]["kind"] == "annotation" + assert by_name["source"]["required"] is False + assert "sample_type" not in by_name + + @respx.mock + def test_json_with_output_writes_csv_file_and_emits_descriptors( + self, run_cli, tmp_path: Path, + ) -> None: + _mock_metadata() + destination = tmp_path / "template.csv" + + result = run_cli( + "samples", "batch-template", "--sample-type", "rna_seq", + "--json", "-o", str(destination), "--token", TOKEN, + ) + + assert result.exit_code == 0 + assert destination.read_text().splitlines()[0] == ( + f"{RESERVED_HEADER},cell_type,source,source__annotation" + ) + descriptors = json.loads(result.stdout) + assert [column["name"] for column in descriptors][:5] == [ + "name", "reads1", "reads2", "project", "organism", + ] + + @respx.mock + def test_unwritable_output_path_is_usage_error( + self, run_cli, tmp_path: Path, + ) -> None: + _mock_metadata() + destination = tmp_path / "does-not-exist" / "template.csv" + + result = run_cli( + "samples", "batch-template", "--sample-type", "rna_seq", + "-o", str(destination), "--token", TOKEN, + ) + + assert result.exit_code == 2 + assert "Traceback" not in result.stderr + + @respx.mock + def test_unknown_sample_type_is_usage_error_listing_types(self, run_cli) -> None: + _mock_metadata() + + result = run_cli( + "samples", "batch-template", "--sample-type", "bogus", + "--token", TOKEN, + ) + + assert result.exit_code == 2 + assert "bogus" in result.stderr + assert "rna_seq" in result.stderr + assert result.stdout == "" + + def test_missing_sample_type_is_usage_error(self, run_cli) -> None: + result = run_cli("samples", "batch-template", "--token", TOKEN) + + assert result.exit_code == 2 diff --git a/tests/unit/v2/test_samples.py b/tests/unit/v2/test_samples.py index 708ef91..cc42ad7 100644 --- a/tests/unit/v2/test_samples.py +++ b/tests/unit/v2/test_samples.py @@ -200,6 +200,55 @@ def test_populates_required_for_sample_types_from_links(self) -> None: assert result[0].required_for_sample_types == ["rna_seq", "atac_seq"] + @respx.mock + def test_populates_allow_annotation_from_payload(self) -> None: + respx.get(f"{DEFAULT_BASE_URL}/samples/metadata").mock( + return_value=httpx.Response(200, json=[ + { + "identifier": "source", + "name": "Source", + "description": "Sample source", + "required": False, + "required_for_public": False, + "all_sample_types": True, + "allow_user_terms": False, + "regex_validator": None, + "has_options": False, + "allow_annotation": True, + "sample_type_links": [], + }, + ]), + ) + + client = Client() + result = client.samples.get_metadata_attributes() + + assert result[0].allow_annotation is True + + @respx.mock + def test_allow_annotation_defaults_to_false_when_absent(self) -> None: + respx.get(f"{DEFAULT_BASE_URL}/samples/metadata").mock( + return_value=httpx.Response(200, json=[ + { + "identifier": "scientist", + "name": "Scientist", + "description": "Who ran it", + "required": False, + "required_for_public": False, + "all_sample_types": True, + "allow_user_terms": False, + "regex_validator": None, + "has_options": False, + "sample_type_links": [], + }, + ]), + ) + + client = Client() + result = client.samples.get_metadata_attributes() + + assert result[0].allow_annotation is False + class TestGetOwnedProjects: