Skip to content
48 changes: 48 additions & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,51 @@ $ flowbio samples upload-multiplexed --reads1 ./mux_R1.fastq.gz \
--annotation ./sheet.xlsx --json
{"data_ids": ["mux_1"], "annotation_id": "ann_1", "warnings": []}
```

### `samples batch-template`

Emit a sample-sheet template for a sample type, to fill in and feed to
`samples upload-batch`.

```
flowbio samples batch-template --sample-type TYPE [-o PATH | --output PATH]
```

Run `flowbio samples batch-template --help` for the full option list. The sample
type decides which metadata columns are marked required. It is validated against
the available types up front: an unrecognised type fails with a usage error
(exit `2`) listing the valid identifiers.

**Sample-sheet schema** — the columns, in order:

- The reserved columns `name`, `reads1`, `reads2`, `project`, `organism`
(`name` and `reads1` are always required; `reads1`/`reads2` are reads file
paths).
- One column per metadata attribute, keyed by its **identifier**. An attribute is
required when it is globally required or required for the chosen sample type.
- A `<identifier>__annotation` companion column immediately after each attribute
that permits a free-text annotation.

There is **no** `sample_type` column — the type is supplied via `--sample-type`
to both this command and `upload-batch`. This CSV is distinct from the annotation
sheet produced by `samples annotation-template`.

**Output** — human: the CSV header row on stdout (or written to `--output`), plus
a summary of required-vs-optional columns on stderr. `--json`: a per-column
descriptor list on stdout (`name`, `kind` of `reserved`/`metadata`/`annotation`,
`required`, closed-value `options` or `null`, and `description`) and **no CSV** —
so an agent can build rows directly.

**Exit codes** — `0` success; `2` missing `--sample-type`, or an unknown sample
type (the error lists the available types); `3` authentication failure;
otherwise the standard mapping above.

**Example**

```bash
$ flowbio samples batch-template --sample-type rna_seq
name,reads1,reads2,project,organism,cell_type,source,source__annotation

$ flowbio samples batch-template --sample-type rna_seq --json
[{"name": "name", "kind": "reserved", "required": true, "options": null, "description": "..."}, ...]
```
3 changes: 2 additions & 1 deletion flowbio/cli/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def register(
upload.add_argument(
"path",
metavar="PATH",
type=Path,
help="Local file to upload.",
)
upload.add_argument(
Expand Down Expand Up @@ -58,7 +59,7 @@ def _upload_command(args: argparse.Namespace, client: Client, output: Output) ->
:returns: :attr:`ExitCode.SUCCESS` on success.
"""
data = client.data.upload_data(
existing_file(Path(args.path)),
existing_file(args.path),
filename=args.filename,
data_type=args.data_type,
is_directory=args.directory,
Expand Down
164 changes: 156 additions & 8 deletions flowbio/cli/_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,24 @@

A thin wrapper over :attr:`Client.samples <flowbio.v2.Client.samples>`: it parses
the command line, merges metadata supplied as ``key=value`` pairs and/or a JSON
object, calls the library, and renders the result. The ``--sample-type`` is sent
as-is and validated server-side, not pre-checked by the CLI.
object, calls the library, and renders the result. Most commands send
``--sample-type`` as-is for server-side validation; ``batch-template`` is the
exception, pre-checking the type against the available types up front.
"""
from __future__ import annotations

import argparse
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Literal

from flowbio.cli._exit_codes import CliUsageError, ExitCode
from flowbio.cli._files import existing_file
from flowbio.cli._output import Output, format_issue
from flowbio.cli._types import JsonValue
from flowbio.v2.client import Client
from flowbio.v2.samples import MetadataAttribute, SampleTypeId


def register(
Expand Down Expand Up @@ -46,6 +51,15 @@ def register(
"server-side demultiplexing."
),
))
_configure_batch_template(verbs.add_parser(
"batch-template",
parents=[global_parent],
help="Emit a sample-sheet template for a sample type.",
description=(
"Emit a CSV sample-sheet header (or a per-column descriptor under "
"--json) for use with 'samples upload-batch'."
),
))


def _configure_upload(upload: argparse.ArgumentParser) -> None:
Expand All @@ -60,17 +74,20 @@ def _configure_upload(upload: argparse.ArgumentParser) -> None:
"--sample-type",
required=True,
metavar="TYPE",
type=SampleTypeId,
help="Sample type identifier (sent as-is; validated server-side).",
)
upload.add_argument(
"--reads1",
required=True,
metavar="PATH",
type=Path,
help="First reads file.",
)
upload.add_argument(
"--reads2",
metavar="PATH",
type=Path,
help="Second reads file (makes the sample paired-end).",
)
upload.add_argument(
Expand Down Expand Up @@ -104,6 +121,7 @@ def _configure_annotation_template(annotation_template: argparse.ArgumentParser)
"--sample-type",
default="generic",
metavar="TYPE",
type=SampleTypeId,
help=(
"Sample type identifier (sent as-is; validated server-side). "
"Defaults to 'generic' (base columns common to all types)."
Expand All @@ -114,6 +132,7 @@ def _configure_annotation_template(annotation_template: argparse.ArgumentParser)
"--output",
required=True,
metavar="PATH",
type=Path,
help="File to write the .xlsx workbook to (the template is binary).",
)

Expand All @@ -126,17 +145,20 @@ def _configure_upload_multiplexed(upload_multiplexed: argparse.ArgumentParser) -
"--reads1",
required=True,
metavar="PATH",
type=Path,
help="First multiplexed reads file.",
)
upload_multiplexed.add_argument(
"--reads2",
metavar="PATH",
type=Path,
help="Second multiplexed reads file (makes the upload paired-end).",
)
upload_multiplexed.add_argument(
"--annotation",
required=True,
metavar="PATH",
type=Path,
help="Completed annotation sheet (obtained via `annotation-template`).",
)
upload_multiplexed.add_argument(
Expand All @@ -146,6 +168,25 @@ def _configure_upload_multiplexed(upload_multiplexed: argparse.ArgumentParser) -
)


def _configure_batch_template(batch_template: argparse.ArgumentParser) -> None:
batch_template.set_defaults(
command_parser=batch_template, handler=_batch_template_command,
)
batch_template.add_argument(
"--sample-type",
required=True,
metavar="TYPE",
type=SampleTypeId,
help="Sample type the template is built for (decides required columns).",
)
batch_template.add_argument(
"-o", "--output",
metavar="PATH",
type=Path,
help="Write the CSV template to this file instead of stdout.",
)


def _upload_command(args: argparse.Namespace, client: Client, output: Output) -> ExitCode:
"""Upload a single sample and report its identifier.

Expand All @@ -155,9 +196,9 @@ def _upload_command(args: argparse.Namespace, client: Client, output: Output) ->
:returns: :attr:`ExitCode.SUCCESS` on success.
"""
metadata = _merge_metadata(args.metadata, args.metadata_json)
data = {"reads1": existing_file(Path(args.reads1))}
data = {"reads1": existing_file(args.reads1)}
if args.reads2 is not None:
data["reads2"] = existing_file(Path(args.reads2))
data["reads2"] = existing_file(args.reads2)
sample = client.samples.upload_sample(
name=args.name,
sample_type=args.sample_type,
Expand All @@ -180,7 +221,7 @@ def _annotation_template_command(
:param output: The result/error renderer.
:returns: :attr:`ExitCode.SUCCESS` on success.
"""
destination = Path(args.output)
destination = args.output
template = client.samples.get_annotation_template(args.sample_type)
try:
destination.write_bytes(template)
Expand Down Expand Up @@ -209,12 +250,12 @@ def _upload_multiplexed_command(
:param output: The result/error renderer.
:returns: :attr:`ExitCode.SUCCESS` on success.
"""
reads = {"reads1": existing_file(Path(args.reads1))}
reads = {"reads1": existing_file(args.reads1)}
if args.reads2 is not None:
reads["reads2"] = existing_file(Path(args.reads2))
reads["reads2"] = existing_file(args.reads2)
upload = client.samples.upload_multiplexed_data(
reads=reads,
annotation=existing_file(Path(args.annotation)),
annotation=existing_file(args.annotation),
ignore_warnings=not args.reject_warnings,
)
if upload.warnings:
Expand All @@ -233,6 +274,113 @@ def _upload_multiplexed_command(
return ExitCode.SUCCESS


@dataclass(frozen=True)
class _TemplateColumn:
"""One column of a sample-sheet template, in CSV order."""

name: str
kind: Literal["reserved", "metadata", "annotation"]
required: bool
options: list[str] | None
description: str

@property
def descriptor(self) -> dict[str, JsonValue]:
return {
"name": self.name,
"kind": self.kind,
"required": self.required,
"options": self.options,
"description": self.description,
}


_RESERVED_COLUMNS = (
_TemplateColumn("name", "reserved", True, None, "Unique sample name (no spaces)."),
_TemplateColumn("reads1", "reserved", True, None, "Path to the first reads file."),
_TemplateColumn("reads2", "reserved", False, None, "Path to the second reads file (paired-end)."),
_TemplateColumn("project", "reserved", False, None, "Project identifier to assign the sample to."),
_TemplateColumn("organism", "reserved", False, None, "Organism identifier to associate with the sample."),
)


def _batch_template_command(
args: argparse.Namespace, client: Client, output: Output,
) -> ExitCode:
"""Emit a sample-sheet template for the chosen sample type.

:param args: Parsed command-line arguments.
:param client: The authenticated Flow client.
:param output: The result/error renderer.
:returns: :attr:`ExitCode.SUCCESS` on success.
"""
_check_sample_type(client, args.sample_type)
columns = _template_columns(
client.samples.get_metadata_attributes(), args.sample_type,
)
header = ",".join(column.name for column in columns)
if args.output is not None:
try:
args.output.write_text(f"{header}\n")
except OSError as error:
raise CliUsageError(
f"Could not write sample-sheet template to {args.output}: {error}",
) from error
output.emit_advisory(f"Wrote sample-sheet template to {args.output}")
if output.json_mode or args.output is None:
output.emit_result(header, [column.descriptor for column in columns])
output.emit_advisory(_required_summary(columns))
return ExitCode.SUCCESS


def _check_sample_type(client: Client, sample_type: SampleTypeId) -> None:
identifiers = [sample.identifier for sample in client.samples.get_types()]
if sample_type not in identifiers:
raise CliUsageError(
f"Unknown sample type '{sample_type}'. "
f"Available types: {', '.join(sorted(identifiers))}",
)


def _template_columns(
attributes: list[MetadataAttribute], sample_type: SampleTypeId,
) -> list[_TemplateColumn]:
columns = list(_RESERVED_COLUMNS)
for attribute in attributes:
required = (
attribute.required or sample_type in attribute.required_for_sample_types
)
columns.append(
_TemplateColumn(
name=attribute.identifier,
kind="metadata",
required=required,
options=attribute.options,
description=attribute.description,
),
)
if attribute.allow_annotation:
columns.append(
_TemplateColumn(
name=f"{attribute.identifier}__annotation",
kind="annotation",
required=False,
options=None,
description=f"Free-text annotation for {attribute.identifier}.",
),
)
return columns


def _required_summary(columns: list[_TemplateColumn]) -> str:
required = [column.name for column in columns if column.required]
optional = [column.name for column in columns if not column.required]
return (
f"Required columns: {', '.join(required)}\n"
f"Optional columns: {', '.join(optional)}"
)


def _merge_metadata(
pairs: list[str] | None, json_text: str | None,
) -> dict[str, str]:
Expand Down
11 changes: 10 additions & 1 deletion flowbio/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@
from flowbio.v2.client import Client, ClientConfig
from flowbio.v2.data import Data
from flowbio.v2.exceptions import AnnotationValidationError
from flowbio.v2.samples import MetadataAttribute, MultiplexedUpload, Organism, Project, Sample, SampleType
from flowbio.v2.samples import (
MetadataAttribute,
MultiplexedUpload,
Organism,
Project,
Sample,
SampleType,
SampleTypeId,
)

__all__ = [
"AnnotationValidationError",
Expand All @@ -49,6 +57,7 @@
"Project",
"Sample",
"SampleType",
"SampleTypeId",
"TokenCredentials",
"UsernamePasswordCredentials",
]
Loading
Loading