Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions src/regvelo/datasets/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# Remote data URLs
url_adata = "https://drive.google.com/uc?id=1Nzq1F6dGw-nR9lhRLfZdHOG7dcYq7P0i&export=download"
url_grn = "https://drive.google.com/uc?id=1ci_gCwdgGlZ0xSn6gSa_-LlIl9-aDa1c&export=download/"
url_perturb = "https://drive.usercontent.google.com/download?id=1CKADDykadX0CLjYGPgc3YXA-AGRSaA1k&export=download&authuser=1&confirm=t&uuid=37eebdae-5f53-4e53-8d60-a5f52b528720&at=ALBwUgk1uT7pQx7RpXiGyK2yFpBI:1777810924901"

url_adata_murine_processed = "https://drive.usercontent.google.com/download?id=19bNQfW3jMKEEjpjNdUkVd7KDTjJfqxa5&export=download&authuser=1&confirm=t&uuid=4fdf3051-229b-4ce2-b644-cb390424570a&at=APcmpoxgcuZ5r6m6Fb6N_2Og6tEO:1745354679573"
url_adata_murine_normalized = "https://drive.usercontent.google.com/download?id=1xy2FNYi6Y2o_DzXjRmmCtARjoZ97Ro_w&export=download&authuser=1&confirm=t&uuid=12cf5d23-f549-48d9-b7ec-95411a58589f&at=APcmpoyexgouf243lNygF9yRUkmi:1745997349046"
Expand Down Expand Up @@ -63,6 +64,33 @@ def zebrafish_grn(file_path: str | Path = "data/zebrafish_nc/prior_GRN.csv") ->
grn.to_csv(file_path)
return grn

def zebrafish_perturb(file_path: str | Path = "data/zebrafish_nc/perturbseq_all.h5ad") -> AnnData:
r"""Load zebrafish neural crest Perturb-seq dataset.

This dataset contains single-cell CRISPR screening (Perturb-seq) data from zebrafish neural crest (NC) cells, comprising 12,393 cells and 27,599 genes across 9 pooled samples. Cells were perturbed with sgRNAs targeting 22 transcription factors — including ETS-family members (*fli1a*, *elk3*, *erf*, *etv2*, *ets1*, *elf1*), pigment regulators (*mitfa*, *tfec*, *tfeb*), and additional NC regulators (*nr2f2*, *nr2f5*, *rarga*, *rxraa*, *smarcc1a*, *ebf3a*) — as well as combinatorial double or triple knockouts. Unperturbed control and negative-control cells are also included.

Cell type annotations span the major stages of NC development:

- Neural plate border (NPB) progenitors (``NPB_hox``, ``NPB_nohox``),
- Migratory NC cells across axial levels (``mNC_arch1``, ``mNC_arch2``, ``mNC_head_mesenchymal``, ``mNC_hox34``, ``mNC_nohox``, ``mNC_trunk``, ``mNC_vagal``),
- Differentiating NC cells (``dNC_hox34``, ``dNC_nohox``),
- Pigment cell lineages (``Pigment``, ``Pigment_1``, ``Pigment_gch2_high``), and
- Mutant populations (``Mutant``, ``Mutant_hox23``).

The object includes PCA, PHATE, and t-SNE embeddings, as well as imputed latent time and terminal state assignments.

Parameters
----------
file_path
Path to local dataset. Will download from remote URL if not found.

Returns
-------
Annotated data object of zebrafish NC Perturb-seq cells.
"""
adata = read(file_path, backup_url=url_perturb, sparse=True, cache=True)
return adata

def murine_nc(data_type: Literal["preprocessed", "normalized", "velocyto"] = "preprocessed") -> AnnData:
r"""Load mouse neural crest single-cell RNA-seq dataset (subset of Qiu et al.).

Expand Down
Loading