diff --git a/dataset_configs/english/hifitts2/config_22khz.yaml b/dataset_configs/english/hifitts2/config_22khz.yaml
new file mode 100644
index 00000000..93506582
--- /dev/null
+++ b/dataset_configs/english/hifitts2/config_22khz.yaml
@@ -0,0 +1,62 @@
+documentation: |
+  HiFiTTS-2 22kHz
+  ###############
+  
+  This config can be used to download the audio data for HiFiTTS-2 22kHz.
+  
+  1. Downloads HiFiTTS-2 audio from LibriVox.
+  2. Outputs a new manifest in which LibriVox audiobook chapters which could not be downloaded (e.g. because they
+     were removed from the website) are removed.
+
+  **Required arguments**.
+
+  * **workspace_dir**: specify the workspace folder where all audio files and manifests will be stored.
+
+  Note that you can customize any part of this config either directly or from command-line.
+ 
+  **Output format**.
+
+  This config outputs 2 manifest files:
+
+  * ``${workspace_dir}/errors.json`` - entries from the input chapters file which failed to download from LibriVox.
+  * ``${workspace_dir}/manifest_filtered_22khz`` - input manifest file without utterances from failed chapters.
+
+processors_to_run: all
+workspace_dir: ???
+manifest_filename: manifest_22khz.json
+output_filename: manifest_filtered_22khz.json
+chapter_filename: chapters_22khz.json
+error_filename: errors_22khz.json
+audio_dir_name: audio_22khz
+chapter_audio_dir_name: chapters
+sample_rate: 22050
+delete_chapter_files: true
+exit_on_error: false
+use_dask: false
+max_workers: 8
+chunksize: 50
+
+input_manifest_file: ${workspace_dir}/${manifest_filename}
+chapter_file: ${workspace_dir}/${chapter_filename}
+error_file: ${workspace_dir}/${error_filename}
+audio_dir: ${workspace_dir}/${audio_dir_name}
+chapter_dir: ${workspace_dir}/${chapter_audio_dir_name}
+final_manifest: ${workspace_dir}/${output_filename}
+
+processors:
+  - _target_: sdp.processors.DownloadHiFiTTS2
+    audio_dir: ${audio_dir}
+    chapter_dir: ${chapter_dir}
+    sample_rate: ${sample_rate}
+    delete_chapter_files: ${delete_chapter_files}
+    exit_on_error: ${exit_on_error}
+    input_manifest_file: ${chapter_file}
+    output_manifest_file: ${error_file}
+    use_dask: ${use_dask}
+    max_workers: ${max_workers}
+    chunksize: ${chunksize}
+
+  - _target_: sdp.processors.RemovedFailedChapters
+    input_manifest_file: ${input_manifest_file}
+    output_manifest_file: ${final_manifest}
+    error_file: ${error_file}
diff --git a/dataset_configs/english/hifitts2/config_44khz.yaml b/dataset_configs/english/hifitts2/config_44khz.yaml
new file mode 100644
index 00000000..33d79f98
--- /dev/null
+++ b/dataset_configs/english/hifitts2/config_44khz.yaml
@@ -0,0 +1,62 @@
+documentation: |
+  HiFiTTS-2 44kHz
+  ##################
+  
+  This config can be used to download the audio data for HiFiTTS-2 44kHz.
+  
+  1. Downloads HiFiTTS-2 audio from LibriVox.
+  2. Outputs a new manifest in which LibriVox audiobook chapters which could not be downloaded (e.g. because they
+     were removed from the website) are removed.
+
+  **Required arguments**.
+
+  * **workspace_dir**: specify the workspace folder where all audio files and manifests will be stored.
+
+  Note that you can customize any part of this config either directly or from command-line.
+ 
+  **Output format**.
+
+  This config outputs 2 manifest files:
+
+  * ``${workspace_dir}/errors.json`` - entries from the input chapters file which failed to download from LibriVox.
+  * ``${workspace_dir}/manifest_filtered_44khz`` - input manifest file without utterances from failed chapters.
+
+processors_to_run: all
+workspace_dir: ???
+manifest_filename: manifest_44khz.json
+output_filename: manifest_filtered_44khz.json
+chapter_filename: chapters_44khz.json
+error_filename: errors_44khz.json
+audio_dir_name: audio_44khz
+chapter_audio_dir_name: chapters
+sample_rate: 44100
+delete_chapter_files: true
+exit_on_error: false
+use_dask: false
+max_workers: 8
+chunksize: 50
+
+input_manifest_file: ${workspace_dir}/${manifest_filename}
+chapter_file: ${workspace_dir}/${chapter_filename}
+error_file: ${workspace_dir}/${error_filename}
+audio_dir: ${workspace_dir}/${audio_dir_name}
+chapter_dir: ${workspace_dir}/${chapter_audio_dir_name}
+final_manifest: ${workspace_dir}/${output_filename}
+
+processors:
+  - _target_: sdp.processors.DownloadHiFiTTS2
+    audio_dir: ${audio_dir}
+    chapter_dir: ${chapter_dir}
+    sample_rate: ${sample_rate}
+    delete_chapter_files: ${delete_chapter_files}
+    exit_on_error: ${exit_on_error}
+    input_manifest_file: ${chapter_file}
+    output_manifest_file: ${error_file}
+    use_dask: ${use_dask}
+    max_workers: ${max_workers}
+    chunksize: ${chunksize}
+
+  - _target_: sdp.processors.RemovedFailedChapters
+    input_manifest_file: ${input_manifest_file}
+    output_manifest_file: ${final_manifest}
+    error_file: ${error_file}
diff --git a/dataset_configs/english/hifitts2/config_bandwidth.yaml b/dataset_configs/english/hifitts2/config_bandwidth.yaml
new file mode 100644
index 00000000..15c219bf
--- /dev/null
+++ b/dataset_configs/english/hifitts2/config_bandwidth.yaml
@@ -0,0 +1,44 @@
+documentation: |
+  HiFiTTS-2 Bandwidth Estimation
+  ##############################
+  
+  This config contains the bandwidth estimation code used for HiFiTTS and HiFiTTS-2.
+  This config can be used to estimate bandwidth for any dataset. For HiFiTTS-2 bandwidth
+  was estimated using the first 30 seconds of every audiobook chapter, but the estimate is still
+  reasonably accurate if run over a shorter duration or with individual utterances.
+
+  **Required arguments**.
+
+  * **workspace_dir**: The workspace folder where all audio files and manifests are stored.
+  * **audio_dir**: Folder in workspace containing audio files to estimate bandwidth of.
+  * **input_manifest_filename**: Manifest file in workspace containing relative paths to audio.
+ 
+  **Output format**.
+  
+  This config outputs a single manifest with the following field(s):
+
+  * **bandwidth (int)**: Estimated bandwidth of the audio file.
+
+processors_to_run: all
+workspace_dir: ???
+audio_dir_name: ???
+input_manifest_filename: ???
+output_manifest_filename: manifest_bandwidth.json
+audio_key: audio_filepath
+use_dask: false
+max_workers: 1
+chunksize: 1
+
+input_manifest_file: ${workspace_dir}/${input_manifest_filename}
+final_manifest: ${workspace_dir}/${output_manifest_filename}
+audio_dir: ${workspace_dir}/${audio_dir_name}
+
+processors:
+  - _target_: sdp.processors.EstimateBandwidth
+    input_manifest_file: ${input_manifest_file}
+    output_manifest_file: ${final_manifest}
+    audio_dir: ${audio_dir}
+    input_audio_key: ${audio_key}
+    use_dask: ${use_dask}
+    max_workers: ${max_workers}
+    chunksize: ${chunksize}
diff --git a/docs/src/sdp/api.rst b/docs/src/sdp/api.rst
index c285c3b8..bfa2bc62 100644
--- a/docs/src/sdp/api.rst
+++ b/docs/src/sdp/api.rst
@@ -116,12 +116,24 @@ HuggingFace Datasets
 .. autodata:: sdp.processors.CreateInitialManifestHuggingFace
    :annotation:
 
+
 YTC Datasets
 ''''''''''''
 
 .. autodata:: sdp.processors.datasets.ytc.create_initial_manifest.CreateInitialManifestYTC
    :annotation:
 
+
+HiFiTTS-2
+''''''''''''''''''''
+
+.. autodata:: sdp.processors.DownloadHiFiTTS2
+   :annotation:
+
+.. autodata:: sdp.processors.RemovedFailedChapters
+   :annotation:
+
+
 Lhotse processors
 #################
 
@@ -151,6 +163,9 @@ used in the downstream processing for additional enhancement or filtering.
 .. autodata:: sdp.processors.ASRTransformers
    :annotation:
 
+.. autodata:: sdp.processors.EstimateBandwidth
+   :annotation:
+
 .. autodata:: sdp.processors.tts.pyannote.PyAnnoteDiarizationAndOverlapDetection
    :annotation:
 
@@ -166,7 +181,6 @@ used in the downstream processing for additional enhancement or filtering.
 .. autodata:: sdp.processors.tts.metrics.BandwidthEstimationProcessor
    :annotation:
 
-
 Text-only processors
 ####################
 
diff --git a/docs/src/sdp/existing_configs.rst b/docs/src/sdp/existing_configs.rst
index 233a05dd..5be69922 100644
--- a/docs/src/sdp/existing_configs.rst
+++ b/docs/src/sdp/existing_configs.rst
@@ -366,6 +366,13 @@ Armenian Toloka
    `config <https://github.com/NVIDIA/NeMo-speech-data-processor/blob/main/dataset_configs/armenian/toloka/pipeline_get_final_res.yaml>`__ |
    :doc:`documentation <config-docs/armenian/toloka/pipeline_get_final_res>`
 
+.. toctree::
+   :hidden:
+
+   config-docs/armenian/toloka/pipeline_start
+   config-docs/armenian/toloka/pipeline_validate_answers
+   config-docs/armenian/toloka/pipeline_get_final_res
+
 YouTube Commons (YTC)
 ~~~~~~~~~~~~~~~~~~~~~~
 
@@ -377,8 +384,26 @@ YouTube Commons (YTC)
 .. toctree::
    :hidden:
 
-   config-docs/armenian/toloka/pipeline_start
-   config-docs/armenian/toloka/pipeline_validate_answers
-   config-docs/armenian/toloka/pipeline_get_final_res
-
    config-docs/tts/ytc/config
+
+HiFiTTS-2
+~~~~~~~~~~~~~~~~~~~~~~~
+
+**Dataset link:** TODO
+
+* **22kHz**:
+   `config <https://github.com/NVIDIA/NeMo-speech-data-processor/blob/main/dataset_configs/english/hifitts2/config_22khz.yaml>`__ |
+   :doc:`documentation <config-docs/english/hifitts2/config_22khz>`
+* **44kHz**:
+   `config <https://github.com/NVIDIA/NeMo-speech-data-processor/blob/main/dataset_configs/english/hifitts2/config_44khz.yaml>`__ |
+   :doc:`documentation <config-docs/english/hifitts2/config_44khz>`
+* **Bandwidth Estimation**:
+   `config <https://github.com/NVIDIA/NeMo-speech-data-processor/blob/main/dataset_configs/english/hifitts2/config_bandwidth.yaml>`__ |
+   :doc:`documentation <config-docs/english/hifitts2/config_bandwidth>`
+
+.. toctree::
+   :hidden:
+
+   config-docs/english/hifitts2/config_22khz
+   config-docs/english/hifitts2/config_44khz
+   config-docs/english/hifitts2/config_bandwidth
diff --git a/sdp/processors/__init__.py b/sdp/processors/__init__.py
index df860331..c3ff70b6 100644
--- a/sdp/processors/__init__.py
+++ b/sdp/processors/__init__.py
@@ -24,6 +24,8 @@
 from sdp.processors.datasets.fleurs.create_initial_manifest import (
     CreateInitialManifestFleurs,
 )
+from sdp.processors.datasets.hifitts2.download_dataset import DownloadHiFiTTS2
+from sdp.processors.datasets.hifitts2.remove_failed_chapters import RemovedFailedChapters
 from sdp.processors.datasets.uzbekvoice.create_initial_manifest import (
     CreateInitialManifestUzbekvoice,
 )
@@ -127,6 +129,7 @@
     MakeLettersUppercaseAfterPeriod,
 )
 from sdp.processors.nemo.asr_inference import ASRInference
+from sdp.processors.nemo.estimate_bandwidth import EstimateBandwidth
 from sdp.processors.nemo.pc_inference import PCInference
 from sdp.processors.toloka.accept_if import AcceptIfWERLess
 from sdp.processors.toloka.create_pool import CreateTolokaPool
diff --git a/sdp/processors/datasets/hifitts2/__init__.py b/sdp/processors/datasets/hifitts2/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/sdp/processors/datasets/hifitts2/download_dataset.py b/sdp/processors/datasets/hifitts2/download_dataset.py
new file mode 100644
index 00000000..6e965e32
--- /dev/null
+++ b/sdp/processors/datasets/hifitts2/download_dataset.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import json
+import librosa
+from pathlib import Path
+import soundfile as sf
+import time
+import urllib.error
+import urllib.request
+
+from sdp.logging import logger
+from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
+
+
+class DownloadHiFiTTS2(BaseParallelProcessor):
+    """
+    Downloads HiFiTTS-2 dataset to local machine. Unsegmented audiobook chapters are first downloaded at a
+    48 kHz from LibriVox. Each chapter is then split into segmented utterance files based on precomputed
+    offsets and durations.
+
+    To reduce disk use, the chapter files can be optionally deleted after they are segmented.
+
+    Metadata for chapters which fail to download due to network errors are stored in an output manifest file,
+    which can be given as input to this processor to attempt the downloads again.
+
+    Args:
+        audio_dir (str): Root directory where utterance files will be saved.
+        chapter_dir (str): Root directory where audiobook chapter files will be saved.
+        sample_rate (int): Sample rate to use for utterance files.
+        delete_chapter_files (bool): Whether to delete each chapter file after it is done being processed.
+        exit_on_error (bool): Whether to terminate the entire processor script if a single chapter downlaod fails.
+        num_retries (int): Number of times to retry chapter download after encountering intermittent HTTP errors.
+
+    Returns:
+        Utterance files are stored under 'audio_dir' and chapter files are downloaded under 'chapter_dir'.
+        
+        If exit_on_error is False, then an output manifest will be saved with manifest entries that fail to downlaod,
+        with error information stored under the 'error_code' and 'error_reason' fields.
+
+    Example:
+        .. code-block:: yaml
+
+            - _target_: sdp.processors.DownloadHiFiTTS2
+              input_manifest_file: ${workspace_dir}/manifest_22khz.json
+              output_manifest_file: ${workspace_dir}/errors_22khz.json
+              audio_dir: ${workspace_dir}/audio_22khz
+              chapter_dir: ${workspace_dir}/chapters
+              max_workers: 8
+    """
+
+    def __init__(
+        self,
+        audio_dir: str,
+        chapter_dir: str,
+        sample_rate: int = 22050,
+        delete_chapter_files: bool = True,
+        exit_on_error: bool = False,
+        num_retries: int = 5,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.audio_dir = Path(audio_dir)
+        self.chapter_dir = Path(chapter_dir)
+        self.sample_rate = sample_rate
+        self.delete_chapter_files = delete_chapter_files
+        self.exit_on_error = exit_on_error
+        self.num_retries = num_retries
+
+    def prepare(self):
+        # Create output directory structure
+        with open(self.input_manifest_file, "rt", encoding="utf-8") as fin:
+            dirs = set()
+            for line in fin:
+                row = json.loads(line)
+                audio_filepath = Path(row["utterances"][0]["audio_filepath"])
+                chapter_dir = audio_filepath.parent
+                dirs.add(chapter_dir)
+
+        for dir in dirs:
+            audio_dir = self.audio_dir / dir
+            chapter_dir = self.chapter_dir / dir
+            audio_dir.mkdir(exist_ok=True, parents=True)
+            chapter_dir.mkdir(exist_ok=True, parents=True)
+
+        return
+
+    def process_dataset_entry(self, data_entry):
+        url = data_entry["url"]
+        chapter_filepath = data_entry["chapter_filepath"]
+        utterances = data_entry["utterances"]
+
+        chapter_path = self.chapter_dir / chapter_filepath
+        for i in range(1, self.num_retries + 1):
+            try:
+                urllib.request.urlretrieve(url=url, filename=chapter_path)
+                break
+            except (urllib.error.HTTPError, urllib.error.URLError) as http_error:
+                error_msg = f"Encountered HTTP error when downloading {url}: {http_error}"
+                logger.warning(error_msg)
+
+                error_code = getattr(http_error, "code", 0)
+                if (not error_code or str(error_code).startswith("5")) and i < self.num_retries:
+                    logger.info(f"Retry {i} for url {url}")
+                    time.sleep(10)
+                    continue
+
+                if self.exit_on_error:
+                    raise RuntimeError(error_msg)
+
+                error_data = {
+                    "url": url,
+                    "chapter_filepath": chapter_filepath,
+                    "error_code": error_code,
+                    "error_reason": http_error.reason,
+                    "utterances": utterances,
+                }
+                return [DataEntry(data=error_data)]
+
+        chapter_audio, sr = librosa.load(path=chapter_path, sr=self.sample_rate)
+
+        for utt in utterances:
+            audio_filepath = utt["audio_filepath"]
+            audio_path = self.audio_dir / audio_filepath
+            offset = utt["offset"]
+            dur = utt["duration"]
+            start_sample = librosa.time_to_samples(offset, sr=sr)
+            end_sample = librosa.time_to_samples(offset + dur, sr=sr)
+            audio = chapter_audio[start_sample:end_sample]
+            sf.write(file=audio_path, data=audio, samplerate=int(sr))
+
+        if self.delete_chapter_files:
+            chapter_path.unlink()
+
+        return []
diff --git a/sdp/processors/datasets/hifitts2/remove_failed_chapters.py b/sdp/processors/datasets/hifitts2/remove_failed_chapters.py
new file mode 100644
index 00000000..b4cd5a8b
--- /dev/null
+++ b/sdp/processors/datasets/hifitts2/remove_failed_chapters.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import json
+from pathlib import Path
+from tqdm import tqdm
+
+from sdp.processors.base_processor import BaseProcessor
+from sdp.utils.common import load_manifest
+
+
+class RemovedFailedChapters(BaseProcessor):
+    """
+    Removes all utterances in the input chapter file from the input manifest. This processor is expected to be
+    run using the file output by the DownloadHiFiTTS2 containing failed chapter downloads.
+
+    Args:
+        error_file (str): Path to file with chapter download errors.
+
+    Returns:
+        This outputs a manifest which is the same as its input manifest but with utterances in 'error_file' removed.
+
+    Example:
+        .. code-block:: yaml
+
+            - _target_: sdp.processors.RemovedFailedChapters
+              input_manifest_file: ${workspace_dir}/manifest_22khz.json
+              output_manifest_file: ${workspace_dir}/manifest_filtered_22khz.json
+              error_file: ${workspace_dir}/errors_22khz.json
+    """
+
+    def __init__(
+        self,
+        error_file: str,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.error_file = Path(error_file)
+
+    def process(self):
+        chapter_rows = load_manifest(self.error_file)
+        audio_files_to_remove = set()
+        for chapter_row in chapter_rows:
+            for utt_list in chapter_row["utterances"]:
+                audio_files_to_remove.add(utt_list["audio_filepath"])
+
+        rows = load_manifest(Path(self.input_manifest_file))
+        with open(self.output_manifest_file, "w", encoding="utf-8") as output_f:
+            for row in tqdm(rows):
+                if row["audio_filepath"] in audio_files_to_remove:
+                    continue
+
+                output_line = f"{json.dumps(row, ensure_ascii=False)}\n"
+                output_f.write(output_line)
diff --git a/sdp/processors/nemo/estimate_bandwidth.py b/sdp/processors/nemo/estimate_bandwidth.py
new file mode 100644
index 00000000..38b261e7
--- /dev/null
+++ b/sdp/processors/nemo/estimate_bandwidth.py
@@ -0,0 +1,87 @@
+import librosa
+import numpy as np
+from pathlib import Path
+
+from sdp.processors.base_processor import BaseParallelProcessor, DataEntry
+
+
+class EstimateBandwidth(BaseParallelProcessor):
+    """
+    Adds estimated bandwidth to each utterance in the input manifest file.
+
+    Args:
+        audio_dir (str): Root directory where audio files are stored.
+        input_audio_key (str): Manifest key with relative audio paths.
+        output_bandwidth_key (str): Manifest key to store estimated bandwidth in.
+        max_seconds (float): The maximum length of audio to use for bandwidth estimation.
+            By default, uses the first 30 seconds.
+        sample_rate (int): Sample rate to resample audio to before doing bandwidth estimation.
+            Defaults to 44100, upsampling the input audio as needed.
+        n_fft (int): Number of FFT bins to use for bandwidth estimation. Defaults to 512.
+        hop_length (int): Audio frame hop length to use for bandwidth estimation.
+            Defaults to 441, corresponding to 0.01 seconds for 44100 sample rate.
+        top_db (float): top_db treshhold to use for bandwidth estimation.
+        frequency_threshold (float): Bandwidth estimation finds the highest frequency with mean power spectrum that is
+            within 'frequency_threshold' dB of its peak power. Defaults to -50 dB.
+
+    Returns:
+        This processor estimates the bandwidth of the audio file in the`input_audio_key` field and saves the estimate
+            in the output_bandwidth_key` field.
+
+    Example:
+        .. code-block:: yaml
+
+            - _target_: sdp.processors.EstimateBandwidth
+              input_manifest_file: ${workspace_dir}/manifest.json
+              output_manifest_file: ${workspace_dir}/manifest_bandwidth.json
+              audio_dir: ${workspace_dir}/audio_22khz
+              max_workers: 8
+    """
+
+    def __init__(
+        self,
+        audio_dir: str,
+        input_audio_key: str = "audio_filepath",
+        output_bandwidth_key: str = "bandwidth",
+        max_seconds: float = 30.0,
+        sample_rate: int = 44100,
+        n_fft: int = 512,
+        hop_length: int = 441,
+        top_db: float = 100.0,
+        frequency_threshold: float = -50.0,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.audio_directory = Path(audio_dir)
+        self.input_audio_key = input_audio_key
+        self.output_bandwidth_key = output_bandwidth_key
+        self.max_seconds = max_seconds
+        self.sample_rate = sample_rate
+        self.n_fft = n_fft
+        self.hop_length = hop_length
+        self.top_db = top_db
+        self.frequency_threshold = frequency_threshold
+
+    def _estimate_bandwidth(self, audio, sample_rate):
+        spec = librosa.stft(y=audio, n_fft=self.n_fft, hop_length=self.hop_length, window="blackmanharris")
+        power_spec = np.abs(spec) ** 2
+        power_spec = np.mean(power_spec, axis=1)
+        power_spec = librosa.power_to_db(power_spec, ref=self.n_fft, top_db=self.top_db)
+
+        bandwidth = 0
+        peak = np.max(power_spec)
+        freq_width = sample_rate / self.n_fft
+        for idx in range(len(power_spec) - 1, -1, -1):
+            if power_spec[idx] - peak > self.frequency_threshold:
+                bandwidth = idx * freq_width
+                break
+
+        return bandwidth
+
+    def process_dataset_entry(self, data_entry):
+        audio_filename = data_entry[self.input_audio_key]
+        audio_file = self.audio_directory / audio_filename
+        audio, sr = librosa.load(path=audio_file, sr=self.sample_rate, duration=self.max_seconds)
+        bandwidth = self._estimate_bandwidth(audio=audio, sample_rate=sr)
+        data_entry[self.output_bandwidth_key] = int(bandwidth)
+        return [DataEntry(data=data_entry)]
diff --git a/tests/prepare_test_data/prepare_hifitts2_data.py b/tests/prepare_test_data/prepare_hifitts2_data.py
new file mode 100644
index 00000000..9a83fad1
--- /dev/null
+++ b/tests/prepare_test_data/prepare_hifitts2_data.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Copies HiFiTTS-2 manifests and audio into a new directory with fewer entries."""
+
+import argparse
+import json
+import os
+from pathlib import Path
+import shutil
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("Preparing HiFiTTS-2 test data")
+    parser.add_argument(
+        "--workspace_folder", required=True, type=Path, help="Path to workspace where dataset was downloaded."
+    )
+    parser.add_argument(
+        "--audio_folder", default="audio_22khz", type=Path, required=False, help="Name of root folder with audio."
+    )
+    parser.add_argument("--test_data_folder", required=True, type=Path, help="Where to place the prepared data")
+    parser.add_argument(
+        "--manifest_filename", default="manifest_22khz.json", type=str, required=False, help="Name of manifest manifest."
+    )
+    parser.add_argument(
+        "--chapters_filename", default="chapters_22khz.json", type=str, required=False, help="Name of chapter manifest."
+    )
+    parser.add_argument(
+        "--error_filename", default="errors_22khz.json", type=str, required=False, help="Name of chapter error manifest."
+    )
+    parser.add_argument("--num_entries", default=20, type=int, help="How many entries to keep from each manifest")
+
+    args = parser.parse_args()
+
+    files_to_copy = [args.manifest_filename, args.chapters_filename, args.error_filename]
+
+    os.makedirs(args.test_data_folder, exist_ok=True)
+    # Copy manifest files
+    for filename in files_to_copy:
+        input_path = args.workspace_folder / filename
+        output_path = args.test_data_folder / filename
+        with open(input_path, "r", encoding="utf-8") as input_f:
+            with open(output_path, "w", encoding="utf-8") as output_f:
+                for i, line in enumerate(input_f):
+                    if i >= args.num_entries:
+                        break
+                    output_f.write(line)
+
+    # Copy audio
+    manifest_path = args.test_data_folder / args.manifest_filename
+    input_audio_dir = args.workspace_folder / args.audio_folder
+    output_audio_dir = args.test_data_folder / args.audio_folder
+    with open(manifest_path, "r", encoding="utf-8") as input_f:
+        for i, line in enumerate(input_f):
+            if i >= args.num_entries:
+                break
+            row = json.loads(line)
+            audio_filepath = row["audio_filepath"]
+            input_path = input_audio_dir / audio_filepath
+            output_path = output_audio_dir / audio_filepath
+            output_path.parent.mkdir(exist_ok=True, parents=True)
+            shutil.copy(src=input_path, dst=output_path)
\ No newline at end of file
diff --git a/tests/test_cfg_end_to_end_tests.py b/tests/test_cfg_end_to_end_tests.py
index db0425e7..9d860ce9 100644
--- a/tests/test_cfg_end_to_end_tests.py
+++ b/tests/test_cfg_end_to_end_tests.py
@@ -36,8 +36,8 @@ class TestCase:
     """Class for keeping track of test cases."""
     config_path: str
     data_check_fn: Callable
-    # Fields in the manifest to ignore (can be set when non-deterministic processor was used)
     reference_manifest_filename: str = "test_data_reference.json"
+    # Fields in the manifest to ignore (can be set when non-deterministic processor was used)
     fields_to_ignore: List[str] = field(default_factory=list)
     processors_to_run: str = ""
 
@@ -232,19 +232,34 @@ def get_test_cases() -> List[Tuple[str, Callable]]:
             data_check_fn=partial(data_check_fn_generic, file_name="everyayah.hf")
         ),
         TestCase(
-            config_path=f"{DATASET_CONFIGS_ROOT}/armenian/toloka/pipeline_start.yaml", 
+            config_path=f"{DATASET_CONFIGS_ROOT}/armenian/toloka/pipeline_start.yaml",
             data_check_fn=data_check_fn_armenian_toloka_pipeline_start,
             fields_to_ignore=['source_filepath'],
             processors_to_run="2:14",
             reference_manifest_filename="pipeline_start/test_data_reference.json"
         ),
         TestCase(
-            config_path=f"{DATASET_CONFIGS_ROOT}/armenian/toloka/pipeline_get_final_res.yaml", 
+            config_path=f"{DATASET_CONFIGS_ROOT}/armenian/toloka/pipeline_get_final_res.yaml",
             data_check_fn=data_check_fn_armenian_toloka_pipeline_get_final_res,
             reference_manifest_filename="pipeline_get_final_res/test_data_reference.json",
             fields_to_ignore=['audio_filepath', 'duration'],
             processors_to_run="1:6"
-        )
+        ),
+        TestCase(
+            config_path=f"{DATASET_CONFIGS_ROOT}/english/hifitts2/config_22khz.yaml",
+            data_check_fn=partial(data_check_fn_generic, file_name="manifest_22khz.json"),
+            processors_to_run="1:2"
+        ),
+        TestCase(
+            config_path=f"{DATASET_CONFIGS_ROOT}/english/hifitts2/config_44khz.yaml",
+            data_check_fn=partial(data_check_fn_generic, file_name="manifest_44khz.json"),
+            processors_to_run="1:2"
+        ),
+        TestCase(
+            config_path=f"{DATASET_CONFIGS_ROOT}/english/hifitts2/config_bandwidth.yaml",
+            data_check_fn=partial(data_check_fn_generic, file_name="manifest_22khz.json"),
+            reference_manifest_filename="test_data_reference_bandwidth.json",
+        ),
     ]
 
 def get_test_names():
@@ -357,6 +372,18 @@ def test_configs(setup_data, tmp_path):
         # Set input_manifest_file for ASRFileCheck to use the existing manifest.json
         cfg.processors[1].input_manifest_file = (data_dir / "pipeline_get_final_res" / "manifest.json").as_posix()
 
+    if "english/hifitts2/config_22khz" in config_path:
+        cfg.processors[1].input_manifest_file = (data_dir / "manifest_22khz.json").as_posix()
+        cfg.processors[1].error_file = (data_dir / "errors_22khz.json").as_posix()
+
+    if "english/hifitts2/config_44khz" in config_path:
+        cfg.processors[1].input_manifest_file = (data_dir / "manifest_44khz.json").as_posix()
+        cfg.processors[1].error_file = (data_dir / "errors_44khz.json").as_posix()
+
+    if "english/hifitts2/config_bandwidth" in config_path:
+        cfg.processors[0].audio_dir = (data_dir / "audio_22khz").as_posix()
+        cfg.processors[0].input_manifest_file = (data_dir / "manifest_22khz.json").as_posix()
+
     run_processors(cfg)
     # additionally, let's test that final generated manifest matches the
     # reference file (ignoring the file paths and additional fields explicitly specified to ignore)