From 82c4526366f45704647b5649558fd0c9906b0183 Mon Sep 17 00:00:00 2001 From: Olusegun Durojaye Date: Tue, 19 May 2026 21:06:36 +0100 Subject: [PATCH 1/3] Add NVIDIA NeMo Text Processing template --- examples/nemo-text-processing/.gitignore | 4 + .../nemo-text-processing/.saturn/saturn.json | 19 + examples/nemo-text-processing/README.md | 98 ++++ .../nemo_text_processing_demo.ipynb | 435 ++++++++++++++++++ examples/nemo-text-processing/start.sh | 23 + 5 files changed, 579 insertions(+) create mode 100644 examples/nemo-text-processing/.gitignore create mode 100644 examples/nemo-text-processing/.saturn/saturn.json create mode 100644 examples/nemo-text-processing/README.md create mode 100644 examples/nemo-text-processing/nemo_text_processing_demo.ipynb create mode 100644 examples/nemo-text-processing/start.sh diff --git a/examples/nemo-text-processing/.gitignore b/examples/nemo-text-processing/.gitignore new file mode 100644 index 00000000..c92e4a4d --- /dev/null +++ b/examples/nemo-text-processing/.gitignore @@ -0,0 +1,4 @@ +startup.log +__pycache__/ +*.pyc +.ipynb_checkpoints/ diff --git a/examples/nemo-text-processing/.saturn/saturn.json b/examples/nemo-text-processing/.saturn/saturn.json new file mode 100644 index 00000000..017c9cad --- /dev/null +++ b/examples/nemo-text-processing/.saturn/saturn.json @@ -0,0 +1,19 @@ +{ + "name": "example-nemo-text-processing", + "image_uri": "public.ecr.aws/saturncloud/saturn-python:2025.05.01", + "description": "NVIDIA NeMo Text Processing β€” normalize text for TTS and ASR pipelines. Demonstrates Text Normalization (writtenβ†’spoken) and Inverse Text Normalization (spokenβ†’written) across 15 languages.", + "working_directory": "/home/jovyan/examples/examples/nemo-text-processing", + "start_script": "bash start.sh", + "git_repositories": [ + { + "url": "https://github.com/saturncloud/examples", + "path": "/home/jovyan/examples" + } + ], + "jupyter_server": { + "disk_space": "10Gi", + "instance_type": "large", + "auto_shutoff": "1 hour" + }, + "version": "2022.01.06" +} diff --git a/examples/nemo-text-processing/README.md b/examples/nemo-text-processing/README.md new file mode 100644 index 00000000..5e7fa753 --- /dev/null +++ b/examples/nemo-text-processing/README.md @@ -0,0 +1,98 @@ +# πŸ“ NVIDIA NeMo Text Processing + +### **Overview** + +This template demonstrates [NVIDIA NeMo Text Processing](https://github.com/NVIDIA/NeMo-text-processing) β€” a library for normalizing text in speech AI pipelines. It covers both directions of text conversion used in production ASR and TTS systems. + +* **Hardware:** CPU Large (2 cores, 16 GB RAM) β€” no GPU required +* **Python:** 3.10+ +* **Use Case:** Speech AI preprocessing β€” TTS pipelines, ASR post-processing, multilingual text normalization + +--- + +### **What it does** + +| Operation | Direction | Example | +|-----------|-----------|---------| +| **Text Normalization (TN)** | Written β†’ Spoken | `"$4.99"` β†’ `"four dollars and ninety nine cents"` | +| **Inverse Text Normalization (ITN)** | Spoken β†’ Written | `"three thirty p m"` β†’ `"3:30 p.m."` | + +Both operations are powered by **Weighted Finite-State Transducers (WFST)** β€” fast, rule-based grammars that require no model download and no GPU. + +--- + +### **Tech Stack** + +* **NeMo Text Processing (`nemo_text_processing`):** Core normalization library from NVIDIA. +* **Pynini / OpenFst:** WFST engine that powers the grammar rules. +* **15 languages supported:** English, German, Spanish, French, Hungarian, Swedish, Mandarin, Arabic, Italian, Armenian, Japanese, Hindi, Korean, Vietnamese, Portuguese. + +--- + +## πŸͺ Using on Saturn Cloud + +### 1. Create the workspace from the template + +In Saturn Cloud, go to **New Resource β†’ Workspace β†’ Templates** and select **NeMo Text Processing**. + +### 2. Start the workspace + +Click **Start**. The startup script installs `nemo_text_processing` automatically. This takes 3–5 minutes on first start β€” watch progress in the **Logs** panel. + +### 3. Open the notebook + +Once the workspace shows **Running**, click **JupyterLab**. Open `nemo_text_processing_demo.ipynb` from the file browser and run the cells top to bottom. + +--- + +## πŸ› οΈ Local Setup + +```bash +pip install nemo_text_processing +``` + +Then open `nemo_text_processing_demo.ipynb` in JupyterLab. + +> **Note:** pip install requires Linux x86_64. On macOS or Windows use conda: +> ```bash +> conda create --name nemo_tn python=3.10 +> conda activate nemo_tn +> conda install -c conda-forge pynini +> pip install nemo_text_processing +> ``` + +--- + +## πŸ““ Notebook contents + +The demo notebook covers six sections: + +1. **Verify Installation** β€” confirms the library is ready +2. **Text Normalization** β€” numbers, dates, times, abbreviations, measurements in English +3. **Multilingual TN** β€” same operations in German and Spanish +4. **Inverse Text Normalization** β€” convert ASR output back to written form +5. **Batch Processing** β€” normalize a list of texts in parallel +6. **TTS / ASR Pipeline Examples** β€” end-to-end pre/post-processing scenarios +7. **Try It Yourself** β€” sandbox cells to test your own text + +--- + +## βš™οΈ Changing the language + +Pass any supported language code to the `Normalizer` or `InverseNormalizer`: + +```python +normalizer = Normalizer(input_case='cased', lang='de') # German +normalizer = Normalizer(input_case='cased', lang='es') # Spanish +normalizer = Normalizer(input_case='cased', lang='zh') # Mandarin +``` + +Supported codes: `en` `de` `es` `fr` `hu` `sv` `zh` `ar` `it` `hy` `ja` `hi` `ko` `vi` `pt` + +--- + +## πŸ”— Resources + +* **NeMo Text Processing repo:** [github.com/NVIDIA/NeMo-text-processing](https://github.com/NVIDIA/NeMo-text-processing) +* **Official tutorials:** [github.com/NVIDIA/NeMo-text-processing/tree/main/tutorials](https://github.com/NVIDIA/NeMo-text-processing/tree/main/tutorials) +* **Saturn Cloud:** [saturncloud.io](https://saturncloud.io/) diff --git a/examples/nemo-text-processing/nemo_text_processing_demo.ipynb b/examples/nemo-text-processing/nemo_text_processing_demo.ipynb new file mode 100644 index 00000000..ce9de1c5 --- /dev/null +++ b/examples/nemo-text-processing/nemo_text_processing_demo.ipynb @@ -0,0 +1,435 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NVIDIA NeMo Text Processing\n", + "\n", + "This notebook demonstrates two core operations from [NVIDIA NeMo Text Processing](https://github.com/NVIDIA/NeMo-text-processing):\n", + "\n", + "| Operation | Direction | Use case |\n", + "|-----------|-----------|----------|\n", + "| **Text Normalization (TN)** | Written β†’ Spoken | Prepare text before Text-to-Speech (TTS) |\n", + "| **Inverse Text Normalization (ITN)** | Spoken β†’ Written | Clean up text after Automatic Speech Recognition (ASR) |\n", + "\n", + "**Supported languages:** English, German, Spanish, French, Hungarian, Swedish, Mandarin, Arabic, Italian, Armenian, Japanese, Hindi, Korean, Vietnamese, Portuguese\n", + "\n", + "**Hardware:** CPU only β€” no GPU required\n", + "\n", + "---\n", + "Run each cell in order. The first section verifies the installation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Verify Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib\n", + "\n", + "if importlib.util.find_spec(\"nemo_text_processing\") is None:\n", + " print(\"Installing nemo_text_processing β€” this takes 3-5 minutes...\")\n", + " import subprocess\n", + " subprocess.run([\"pip\", \"install\", \"--quiet\", \"nemo_text_processing\"], check=True)\n", + " print(\"Done.\")\n", + "else:\n", + " print(\"nemo_text_processing is installed β€” ready to go.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 2. Text Normalization (TN) β€” Written β†’ Spoken\n", + "\n", + "Text Normalization converts written symbols into how they would be read aloud. \n", + "This is the preprocessing step required before feeding text into a TTS model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from nemo_text_processing.text_normalization.normalize import Normalizer\n", + "\n", + "# Create an English normalizer\n", + "normalizer = Normalizer(input_case='cased', lang='en')\n", + "print(\"English normalizer ready.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Numbers\n", + "examples = [\n", + " \"12 kg\",\n", + " \"The temperature is -5Β°C\",\n", + " \"$4.99\",\n", + " \"1,000,000 people\",\n", + " \"Chapter 7\",\n", + "]\n", + "\n", + "print(\"=== Numbers ===\")\n", + "for text in examples:\n", + " result = normalizer.normalize(text, verbose=False)\n", + " print(f\" {text!r:35} β†’ {result!r}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dates and times\n", + "examples = [\n", + " \"The meeting is on 05/12/2024\",\n", + " \"See you at 3:30pm\",\n", + " \"The event starts at 9:00 AM\",\n", + " \"Born on January 1st, 1990\",\n", + "]\n", + "\n", + "print(\"=== Dates and Times ===\")\n", + "for text in examples:\n", + " result = normalizer.normalize(text, verbose=False)\n", + " print(f\" {text!r:45} β†’ {result!r}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Abbreviations, measurements and symbols\n", + "examples = [\n", + " \"Dr. Smith will see you now\",\n", + " \"The speed limit is 60 mph\",\n", + " \"Mix 2 tbsp of sugar\",\n", + " \"The file is 3.5 GB\",\n", + " \"She scored 95% on the exam\",\n", + "]\n", + "\n", + "print(\"=== Abbreviations and Measurements ===\")\n", + "for text in examples:\n", + " result = normalizer.normalize(text, verbose=False)\n", + " print(f\" {text!r:40} β†’ {result!r}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1 Multilingual Text Normalization\n", + "\n", + "Swap the `lang` parameter to normalize text in any of the 15 supported languages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# German\n", + "normalizer_de = Normalizer(input_case='cased', lang='de')\n", + "\n", + "examples_de = [\n", + " \"Die Temperatur betrΓ€gt -3Β°C\",\n", + " \"Das kostet 12,50 €\",\n", + " \"Kapitel 5\",\n", + "]\n", + "\n", + "print(\"=== German (de) ===\")\n", + "for text in examples_de:\n", + " result = normalizer_de.normalize(text, verbose=False)\n", + " print(f\" {text!r:35} β†’ {result!r}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Spanish\n", + "normalizer_es = Normalizer(input_case='cased', lang='es')\n", + "\n", + "examples_es = [\n", + " \"El precio es $25.99\",\n", + " \"Tiene 3 hijos\",\n", + " \"La reuniΓ³n es el 15/06/2024\",\n", + "]\n", + "\n", + "print(\"=== Spanish (es) ===\")\n", + "for text in examples_es:\n", + " result = normalizer_es.normalize(text, verbose=False)\n", + " print(f\" {text!r:35} β†’ {result!r}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 3. Inverse Text Normalization (ITN) β€” Spoken β†’ Written\n", + "\n", + "Inverse Text Normalization converts spoken-form text back into written conventions. \n", + "This is the post-processing step applied to raw ASR output to make it readable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer\n", + "\n", + "inverse_normalizer = InverseNormalizer(lang='en')\n", + "print(\"English inverse normalizer ready.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Numbers and money\n", + "examples = [\n", + " \"twelve kilograms\",\n", + " \"four dollars and ninety nine cents\",\n", + " \"one million people\",\n", + " \"negative five degrees\",\n", + " \"ninety five percent\",\n", + "]\n", + "\n", + "print(\"=== Numbers and Money (ITN) ===\")\n", + "for text in examples:\n", + " result = inverse_normalizer.inverse_normalize(text, verbose=False)\n", + " print(f\" {text!r:45} β†’ {result!r}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dates and times\n", + "examples = [\n", + " \"january first two thousand and twenty four\",\n", + " \"three thirty p m\",\n", + " \"nine o'clock in the morning\",\n", + "]\n", + "\n", + "print(\"=== Dates and Times (ITN) ===\")\n", + "for text in examples:\n", + " result = inverse_normalizer.inverse_normalize(text, verbose=False)\n", + " print(f\" {text!r:50} β†’ {result!r}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Measurements\n", + "examples = [\n", + " \"sixty miles per hour\",\n", + " \"three point five gigabytes\",\n", + " \"two tablespoons of sugar\",\n", + "]\n", + "\n", + "print(\"=== Measurements (ITN) ===\")\n", + "for text in examples:\n", + " result = inverse_normalizer.inverse_normalize(text, verbose=False)\n", + " print(f\" {text!r:40} β†’ {result!r}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 Multilingual Inverse Text Normalization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# German ITN\n", + "inverse_normalizer_de = InverseNormalizer(lang='de')\n", + "\n", + "examples_de = [\n", + " \"zwΓΆlf kilogramm\",\n", + " \"fΓΌnftes kapitel\",\n", + "]\n", + "\n", + "print(\"=== German ITN ===\")\n", + "for text in examples_de:\n", + " result = inverse_normalizer_de.inverse_normalize(text, verbose=False)\n", + " print(f\" {text!r:35} β†’ {result!r}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 4. Batch Processing\n", + "\n", + "Use `normalize_list()` to process multiple texts efficiently with parallel jobs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "texts = [\n", + " \"The package weighs 2.5 kg and costs $19.99\",\n", + " \"Dr. Johnson called at 4:15pm on 03/22/2024\",\n", + " \"The speed limit changes from 30 mph to 60 mph\",\n", + " \"She scored 98.5% and ranked 1st in her class\",\n", + " \"Add 3 tbsp of flour and 1.5 cups of milk\",\n", + "]\n", + "\n", + "results = normalizer.normalize_list(texts, verbose=False, n_jobs=1)\n", + "\n", + "print(\"=== Batch Text Normalization ===\")\n", + "for original, normalized in zip(texts, results):\n", + " print(f\" IN: {original}\")\n", + " print(f\" OUT: {normalized}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 5. TTS and ASR Pipeline Example\n", + "\n", + "This shows how TN and ITN fit into real speech pipelines." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Simulated TTS pipeline: normalize text before synthesizing speech\n", + "tts_inputs = [\n", + " \"The meeting is scheduled for 14:30 on 12/05/2024\",\n", + " \"The total bill is $1,250.75 including 8.5% tax\",\n", + " \"Dr. Chen will present at 9am in Room 3B\",\n", + "]\n", + "\n", + "print(\"=== TTS Pre-processing (TN) ===\")\n", + "print(\"Normalizing text before sending to a TTS model:\\n\")\n", + "for text in tts_inputs:\n", + " normalized = normalizer.normalize(text, verbose=False)\n", + " print(f\" Raw text : {text}\")\n", + " print(f\" TTS input: {normalized}\")\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Simulated ASR pipeline: clean up raw transcript from speech recognition\n", + "asr_outputs = [\n", + " \"the package costs twenty five dollars and ninety nine cents\",\n", + " \"call me back at three forty five p m\",\n", + " \"the file is two point three gigabytes\",\n", + "]\n", + "\n", + "print(\"=== ASR Post-processing (ITN) ===\")\n", + "print(\"Cleaning up raw ASR transcript:\\n\")\n", + "for text in asr_outputs:\n", + " normalized = inverse_normalizer.inverse_normalize(text, verbose=False)\n", + " print(f\" ASR output : {text}\")\n", + " print(f\" Cleaned up : {normalized}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 6. Try It Yourself\n", + "\n", + "Modify the text below and run the cell to test your own inputs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ── Text Normalization ────────────────────────────────────────────────────────\n", + "my_text = \"Change this to any text you want to normalize\"\n", + "my_lang = \"en\" # Options: en, de, es, fr, hu, sv, zh, ar, it, hy, ja, hi, ko, vi, pt\n", + "\n", + "n = Normalizer(input_case='cased', lang=my_lang)\n", + "print(f\"TN [{my_lang}]: {my_text!r}\")\n", + "print(f\" β†’ {n.normalize(my_text, verbose=False)!r}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ── Inverse Text Normalization ────────────────────────────────────────────────\n", + "my_spoken_text = \"change this to any spoken text you want to convert back\"\n", + "my_itn_lang = \"en\" # Options: en, de, es, fr, hu, sv, zh, ar, it, hy, ja, hi, ko, vi, pt\n", + "\n", + "inv_n = InverseNormalizer(lang=my_itn_lang)\n", + "print(f\"ITN [{my_itn_lang}]: {my_spoken_text!r}\")\n", + "print(f\" β†’ {inv_n.inverse_normalize(my_spoken_text, verbose=False)!r}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/nemo-text-processing/start.sh b/examples/nemo-text-processing/start.sh new file mode 100644 index 00000000..96aee819 --- /dev/null +++ b/examples/nemo-text-processing/start.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# NeMo Text Processing β€” install dependencies on Saturn Cloud workspace startup +set -euo pipefail + +LOG="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/startup.log" +log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG"; } + +log "Starting NeMo Text Processing setup..." + +# ─── Check if already installed ─────────────────────────────────────────────── +if python -c "import nemo_text_processing" &>/dev/null 2>&1; then + log "nemo_text_processing already installed β€” OK" + log "Open nemo_text_processing_demo.ipynb in JupyterLab to get started." + exit 0 +fi + +# ─── Install ────────────────────────────────────────────────────────────────── +log "Installing nemo_text_processing (this takes 3-5 minutes on first run)..." +pip install --quiet nemo_text_processing &>>"$LOG" \ + || { log "ERROR: installation failed β€” check startup.log"; exit 1; } + +log "Installation complete." +log "Open nemo_text_processing_demo.ipynb in JupyterLab to get started." From b4d8c412d893fbcb9ad543b65e818d9a185f657f Mon Sep 17 00:00:00 2001 From: Olusegun Durojaye Date: Wed, 20 May 2026 00:37:26 +0100 Subject: [PATCH 2/3] Add branch reference for testing deployment --- examples/nemo-text-processing/.saturn/saturn.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/nemo-text-processing/.saturn/saturn.json b/examples/nemo-text-processing/.saturn/saturn.json index 017c9cad..b7706f5c 100644 --- a/examples/nemo-text-processing/.saturn/saturn.json +++ b/examples/nemo-text-processing/.saturn/saturn.json @@ -7,7 +7,9 @@ "git_repositories": [ { "url": "https://github.com/saturncloud/examples", - "path": "/home/jovyan/examples" + "path": "/home/jovyan/examples", + "reference": "nemo-text-processing", + "reference_type": "branch" } ], "jupyter_server": { From c65f92debc3d71a411b8a35bad8ec91c4db686e2 Mon Sep 17 00:00:00 2001 From: Olusegun Durojaye Date: Wed, 20 May 2026 01:52:33 +0100 Subject: [PATCH 3/3] =?UTF-8?q?Remove=20test=20branch=20reference=20from?= =?UTF-8?q?=20recipe=20=E2=80=94=20defaults=20to=20main?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/nemo-text-processing/.saturn/saturn.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/nemo-text-processing/.saturn/saturn.json b/examples/nemo-text-processing/.saturn/saturn.json index b7706f5c..017c9cad 100644 --- a/examples/nemo-text-processing/.saturn/saturn.json +++ b/examples/nemo-text-processing/.saturn/saturn.json @@ -7,9 +7,7 @@ "git_repositories": [ { "url": "https://github.com/saturncloud/examples", - "path": "/home/jovyan/examples", - "reference": "nemo-text-processing", - "reference_type": "branch" + "path": "/home/jovyan/examples" } ], "jupyter_server": {