Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from .modules.textInferenceInputsVideos import RunwareTextInferenceInputsVideos
from .modules.audioSections import RunwareAudioSections
from .modules.audioInferenceInputs import audioInferenceInputs
from .modules.audioInferenceReferenceVoices import RunwareAudioInferenceReferenceVoices
from .modules.audioSettings import RunwareAudioSettings
from .modules.audioSettingsVoiceModify import RunwareAudioSettingsVoiceModify
from .modules.providerSettings.elevenlabsProviderSettings import RunwareElevenLabsProviderSettings
Expand Down Expand Up @@ -191,6 +192,7 @@
"Runware Text Inference Inputs Videos": RunwareTextInferenceInputsVideos,
"Runware Audio Sections": RunwareAudioSections,
"Runware Audio Inference Inputs": audioInferenceInputs,
"Runware Audio Inference Inputs Reference Audio": RunwareAudioInferenceReferenceVoices,
"Runware Audio Inference Settings": RunwareAudioSettings,
"Runware Audio Inference Settings Voice Modify": RunwareAudioSettingsVoiceModify,
"Runware ElevenLabs Provider Settings": RunwareElevenLabsProviderSettings,
Expand Down
4 changes: 3 additions & 1 deletion clientlibs/main.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { app } from "../../scripts/app.js";
import { api } from "../../scripts/api.js";
import { promptEnhanceHandler, syncDimensionsNodeHandler, searchNodeHandler, APIKeyHandler, captionNodeHandler, saveTextHandler, mediaUUIDHandler, save3DFilepathHandler, videoTranscriptionHandler, videoOutputsHandler, handleCustomErrors, videoInferenceDimensionsHandler, videoModelSearchFilterHandler, audioModelSearchFilterHandler, textModelSearchFilterHandler, vectorizeModelSearchFilterHandler, vectorizeToggleHandler, useParameterToggleHandler, imageInferenceToggleHandler, imageInferenceAdvancedFeaturesToggleHandler, watermarkAdvancedFeatureToggleHandler, videoInferenceSpeechInputToggleHandler, regionalPromptingRegionsToggleHandler, upscalerToggleHandler, imageUpscalerSettingsToggleHandler, videoUpscalerToggleHandler, audioInferenceToggleHandler, audioInferenceSpeechToggleHandler, audioSettingsToggleHandler, textInferenceSettingsToggleHandler, videoSettingsToggleHandler, videoInferenceSettingsTtsToggleHandler, videoInferenceSettingsActiveSpeakerDetectionToggleHandler, videoInferenceSettingsActiveSpeakerBoundingBoxesToggleHandler, videoInferenceSettingsSegmentsToggleHandler, acceleratorOptionsToggleHandler, bytedanceProviderSettingsToggleHandler, xaiProviderSettingsToggleHandler, viduProviderSettingsToggleHandler, sourcefulProviderSettingsToggleHandler, sourcefulProviderSettingsFontsToggleHandler, threeDInferenceToggleHandler, threeDInferenceSettingsToggleHandler, threeDInferenceSettingsLatToggleHandler, threeDInferenceSettingsMeshClusterToggleHandler, ultralyticsProviderSettingsToggleHandler, openaiProviderSettingsToggleHandler, lightricksProviderSettingsToggleHandler, klingProviderSettingsToggleHandler, lumaProviderSettingsToggleHandler, briaProviderSettingsToggleHandler, pixverseProviderSettingsToggleHandler, alibabaProviderSettingsToggleHandler, mireloProviderSettingsToggleHandler, googleProviderSettingsToggleHandler, syncProviderSettingsToggleHandler, syncSegmentToggleHandler, settingsToggleHandler, outpaintSettingsToggleHandler, safetyInputsToggleHandler, imageInferenceSettingsColorPaletteToggleHandler, imageInferenceSettingsMoodboardsToggleHandler, audioInputToggleHandler, speechInputToggleHandler, briaProviderMaskToggleHandler, wanAnimateAdvancedFeatureSettingsToggleHandler, videoAdvancedFeatureInputsToggleHandler, audioInferenceInputsToggleHandler, audioInferenceSpeechVoicesToggleHandler, referenceVideosToggleHandler } from "./utils.js";
import { promptEnhanceHandler, syncDimensionsNodeHandler, searchNodeHandler, APIKeyHandler, captionNodeHandler, saveTextHandler, mediaUUIDHandler, save3DFilepathHandler, videoTranscriptionHandler, videoOutputsHandler, handleCustomErrors, videoInferenceDimensionsHandler, videoModelSearchFilterHandler, audioModelSearchFilterHandler, textModelSearchFilterHandler, vectorizeModelSearchFilterHandler, vectorizeToggleHandler, useParameterToggleHandler, imageInferenceToggleHandler, imageInferenceAdvancedFeaturesToggleHandler, watermarkAdvancedFeatureToggleHandler, videoInferenceSpeechInputToggleHandler, regionalPromptingRegionsToggleHandler, upscalerToggleHandler, imageUpscalerSettingsToggleHandler, videoUpscalerToggleHandler, audioInferenceToggleHandler, audioInferenceSpeechToggleHandler, audioSettingsToggleHandler, textInferenceSettingsToggleHandler, videoSettingsToggleHandler, videoInferenceSettingsTtsToggleHandler, videoInferenceSettingsActiveSpeakerDetectionToggleHandler, videoInferenceSettingsActiveSpeakerBoundingBoxesToggleHandler, videoInferenceSettingsSegmentsToggleHandler, acceleratorOptionsToggleHandler, bytedanceProviderSettingsToggleHandler, xaiProviderSettingsToggleHandler, viduProviderSettingsToggleHandler, sourcefulProviderSettingsToggleHandler, sourcefulProviderSettingsFontsToggleHandler, threeDInferenceToggleHandler, threeDInferenceSettingsToggleHandler, threeDInferenceSettingsLatToggleHandler, threeDInferenceSettingsMeshClusterToggleHandler, ultralyticsProviderSettingsToggleHandler, openaiProviderSettingsToggleHandler, lightricksProviderSettingsToggleHandler, klingProviderSettingsToggleHandler, lumaProviderSettingsToggleHandler, briaProviderSettingsToggleHandler, pixverseProviderSettingsToggleHandler, alibabaProviderSettingsToggleHandler, mireloProviderSettingsToggleHandler, googleProviderSettingsToggleHandler, syncProviderSettingsToggleHandler, syncSegmentToggleHandler, settingsToggleHandler, outpaintSettingsToggleHandler, safetyInputsToggleHandler, imageInferenceSettingsColorPaletteToggleHandler, imageInferenceSettingsMoodboardsToggleHandler, audioInputToggleHandler, speechInputToggleHandler, briaProviderMaskToggleHandler, wanAnimateAdvancedFeatureSettingsToggleHandler, videoAdvancedFeatureInputsToggleHandler, audioInferenceInputsToggleHandler, audioInferenceReferenceVoiceToggleHandler, audioInferenceSpeechVoicesToggleHandler, referenceVideosToggleHandler } from "./utils.js";
import { RUNWARE_NODE_TYPES, RUNWARE_NODE_PROPS, SEARCH_TERMS } from "./types.js";

const nodeInitList = [];
Expand Down Expand Up @@ -165,6 +165,8 @@ app.registerExtension({
videoAdvancedFeatureInputsToggleHandler(node);
} else if(nodeClass === RUNWARE_NODE_TYPES.AUDIOINFERENCEINPUTS) {
audioInferenceInputsToggleHandler(node);
} else if(nodeClass === RUNWARE_NODE_TYPES.AUDIOINFERENCEINPUTSREFERENCEAUDIO) {
audioInferenceReferenceVoiceToggleHandler(node);
} else if(nodeClass === RUNWARE_NODE_TYPES.REFERENCEVIDEOS ||
nodeClass === RUNWARE_NODE_TYPES.REFERENCEVIDEOS_LEGACY) {
referenceVideosToggleHandler(node);
Expand Down
5 changes: 5 additions & 0 deletions clientlibs/types.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ const RUNWARE_NODE_TYPES = {
TEXTINFERENCEINPUTSVIDEOS: "Runware Text Inference Inputs Videos",
AUDIOSECTIONS: "Runware Audio Sections",
AUDIOINFERENCEINPUTS: "Runware Audio Inference Inputs",
AUDIOINFERENCEINPUTSREFERENCEAUDIO: "Runware Audio Inference Inputs Reference Audio",
AUDIOSETTINGS: "Runware Audio Inference Settings",
AUDIOSETTINGSVOICEMODIFY: "Runware Audio Inference Settings Voice Modify",
PIXVERSEPROVIDERSETTINGS: "Runware Pixverse Provider Settings",
Expand Down Expand Up @@ -574,6 +575,10 @@ const RUNWARE_NODE_PROPS = {
bgColor: DEFAULT_BGCOLOR,
colorModeOnly: true,
},
[RUNWARE_NODE_TYPES.AUDIOINFERENCEINPUTSREFERENCEAUDIO]: {
bgColor: DEFAULT_BGCOLOR,
colorModeOnly: true,
},
[RUNWARE_NODE_TYPES.AUDIOSETTINGS]: {
bgColor: DEFAULT_BGCOLOR,
},
Expand Down
71 changes: 71 additions & 0 deletions clientlibs/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -1298,6 +1298,26 @@ function audioSettingsToggleHandler(settingsNode) {
const cfgIntervalStartWidget = settingsNode.widgets.find(w => w && w.name === "cfgIntervalStart");
const useCfgIntervalEndWidget = settingsNode.widgets.find(w => w && w.name === "useCfgIntervalEnd");
const cfgIntervalEndWidget = settingsNode.widgets.find(w => w && w.name === "cfgIntervalEnd");
const useNormalizeLoudnessWidget = settingsNode.widgets.find(w => w && w.name === "useNormalizeLoudness");
const normalizeLoudnessWidget = settingsNode.widgets.find(w => w && w.name === "normalizeLoudness");
const useTopPWidget = settingsNode.widgets.find(w => w && w.name === "useTopP");
const topPWidget = settingsNode.widgets.find(w => w && w.name === "topP");
const useChunkLengthWidget = settingsNode.widgets.find(w => w && w.name === "useChunkLength");
const chunkLengthWidget = settingsNode.widgets.find(w => w && w.name === "chunkLength");
const useMinChunkLengthWidget = settingsNode.widgets.find(w => w && w.name === "useMinChunkLength");
const minChunkLengthWidget = settingsNode.widgets.find(w => w && w.name === "minChunkLength");
const useNormalizeWidget = settingsNode.widgets.find(w => w && w.name === "useNormalize");
const normalizeWidget = settingsNode.widgets.find(w => w && w.name === "normalize");
const useLatencyWidget = settingsNode.widgets.find(w => w && w.name === "useLatency");
const latencyWidget = settingsNode.widgets.find(w => w && w.name === "latency");
const useMaxTokensWidget = settingsNode.widgets.find(w => w && w.name === "useMaxTokens");
const maxTokensWidget = settingsNode.widgets.find(w => w && w.name === "maxTokens");
const useRepetitionPenaltyWidget = settingsNode.widgets.find(w => w && w.name === "useRepetitionPenalty");
const repetitionPenaltyWidget = settingsNode.widgets.find(w => w && w.name === "repetitionPenalty");
const useConditionOnPreviousChunksWidget = settingsNode.widgets.find(w => w && w.name === "useConditionOnPreviousChunks");
const conditionOnPreviousChunksWidget = settingsNode.widgets.find(w => w && w.name === "conditionOnPreviousChunks");
const useEarlyStopThresholdWidget = settingsNode.widgets.find(w => w && w.name === "useEarlyStopThreshold");
const earlyStopThresholdWidget = settingsNode.widgets.find(w => w && w.name === "earlyStopThreshold");

function toggleWidgetState(useWidget, paramWidget, paramName) {
if (!useWidget || !paramWidget) return;
Expand Down Expand Up @@ -1338,6 +1358,16 @@ function audioSettingsToggleHandler(settingsNode) {
if (useTranscriptWidget && transcriptWidget) toggleWidgetState(useTranscriptWidget, transcriptWidget, "transcript");
if (useCfgIntervalStartWidget && cfgIntervalStartWidget) toggleWidgetState(useCfgIntervalStartWidget, cfgIntervalStartWidget, "cfgIntervalStart");
if (useCfgIntervalEndWidget && cfgIntervalEndWidget) toggleWidgetState(useCfgIntervalEndWidget, cfgIntervalEndWidget, "cfgIntervalEnd");
if (useNormalizeLoudnessWidget && normalizeLoudnessWidget) toggleWidgetState(useNormalizeLoudnessWidget, normalizeLoudnessWidget, "normalizeLoudness");
if (useTopPWidget && topPWidget) toggleWidgetState(useTopPWidget, topPWidget, "topP");
if (useChunkLengthWidget && chunkLengthWidget) toggleWidgetState(useChunkLengthWidget, chunkLengthWidget, "chunkLength");
if (useMinChunkLengthWidget && minChunkLengthWidget) toggleWidgetState(useMinChunkLengthWidget, minChunkLengthWidget, "minChunkLength");
if (useNormalizeWidget && normalizeWidget) toggleWidgetState(useNormalizeWidget, normalizeWidget, "normalize");
if (useLatencyWidget && latencyWidget) toggleWidgetState(useLatencyWidget, latencyWidget, "latency");
if (useMaxTokensWidget && maxTokensWidget) toggleWidgetState(useMaxTokensWidget, maxTokensWidget, "maxTokens");
if (useRepetitionPenaltyWidget && repetitionPenaltyWidget) toggleWidgetState(useRepetitionPenaltyWidget, repetitionPenaltyWidget, "repetitionPenalty");
if (useConditionOnPreviousChunksWidget && conditionOnPreviousChunksWidget) toggleWidgetState(useConditionOnPreviousChunksWidget, conditionOnPreviousChunksWidget, "conditionOnPreviousChunks");
if (useEarlyStopThresholdWidget && earlyStopThresholdWidget) toggleWidgetState(useEarlyStopThresholdWidget, earlyStopThresholdWidget, "earlyStopThreshold");
}

function textInferenceSettingsToggleHandler(settingsNode) {
Expand Down Expand Up @@ -3423,6 +3453,9 @@ function audioModelSearchFilterHandler(audioModelSearchNode) {
"Google": [
"google:gemini@3.1-flash-tts (Gemini 3.1 Flash TTS)",
],
"Fish": [
"fishaudio:s2.1@pro (Fish Audio S2.1 Pro)",
],
};

function filterModelList() {
Expand Down Expand Up @@ -4727,6 +4760,43 @@ function regionalPromptingRegionsToggleHandler(regionsNode) {
}
}

function audioInferenceReferenceVoiceToggleHandler(referenceVoiceNode) {
if (!referenceVoiceNode?.widgets) return;
if (referenceVoiceNode._audioInferenceReferenceVoiceToggleHandlerRegistered) return;
referenceVoiceNode._audioInferenceReferenceVoiceToggleHandlerRegistered = true;

function toggleWidgetState(useWidget, paramWidget, paramName) {
if (!useWidget || !paramWidget) return;

function applyState() {
const enabled = useWidget.value === true;
toggleWidgetEnabled(paramWidget, enabled, referenceVoiceNode);
if (paramWidget.options && paramWidget.options.element) {
paramWidget.options.element.disabled = !enabled;
paramWidget.options.element.style.opacity = enabled ? "1" : "0.5";
paramWidget.options.element.style.pointerEvents = enabled ? "auto" : "none";
}
referenceVoiceNode.setDirtyCanvas(true);
}

appendWidgetCB(useWidget, () => setTimeout(applyState, 50));
setTimeout(applyState, 100);
}

for (let i = 1; i <= 4; i++) {
const useReferenceVoiceWidget = referenceVoiceNode.widgets.find((w) => w && w.name === `useReferenceVoice${i}`);
const audioWidget = referenceVoiceNode.widgets.find((w) => w && w.name === `audio${i}`);
const textWidget = referenceVoiceNode.widgets.find((w) => w && w.name === `text${i}`);

if (useReferenceVoiceWidget && audioWidget) {
toggleWidgetState(useReferenceVoiceWidget, audioWidget, `audio${i}`);
}
if (useReferenceVoiceWidget && textWidget) {
toggleWidgetState(useReferenceVoiceWidget, textWidget, `text${i}`);
}
}
}

function audioInferenceInputsToggleHandler(audioInputsNode) {
if (!audioInputsNode?.widgets) return;

Expand Down Expand Up @@ -5096,6 +5166,7 @@ export {
wanAnimateAdvancedFeatureSettingsToggleHandler,
videoAdvancedFeatureInputsToggleHandler,
audioInferenceInputsToggleHandler,
audioInferenceReferenceVoiceToggleHandler,
audioInferenceSpeechVoicesToggleHandler,
referenceVideosToggleHandler,
};
Expand Down
14 changes: 13 additions & 1 deletion modules/audioInferenceInputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,21 @@ def INPUT_TYPES(cls):
"tooltip": f"Audio URL or mediaUUID for the {ordinal} audio. Only used when 'Use Audios' is enabled.",
"default": "",
})

optionalInputs["Reference Voice"] = ("RUNWAREAUDIOINFERENCEREFERENCEVOICES", {
"tooltip": "Connect Runware Audio Inference Inputs Reference Audio for zero-shot voice cloning (inputs.referenceVoices).",
})

return {
"required": {},
"optional": optionalInputs
}

DESCRIPTION = "Configure custom inputs for Runware Audio Inference, including optional single or multiple audio URL/mediaUUID (inputs.audio or inputs.audios), and single or multiple video inputs for audio extraction or generation."
DESCRIPTION = (
"Configure custom inputs for Runware Audio Inference, including optional single or multiple audio URL/mediaUUID "
"(inputs.audio or inputs.audios), reference voice for cloning (inputs.referenceVoices), "
"and single or multiple video inputs for audio extraction or generation."
)
FUNCTION = "createInputs"
RETURN_TYPES = ("RUNWAREAUDIOINFERENCEINPUTS",)
RETURN_NAMES = ("Audio Inference Inputs",)
Expand All @@ -72,6 +80,7 @@ def createInputs(self, **kwargs) -> tuple[Dict[str, Any]]:
useVideo = kwargs.get("useVideo", False)
video = kwargs.get("Video", None)
useVideos = kwargs.get("useVideos", False)
referenceVoices = kwargs.get("Reference Voice", None)

inputs = {}

Expand Down Expand Up @@ -106,6 +115,9 @@ def createInputs(self, **kwargs) -> tuple[Dict[str, Any]]:

if len(videoList) > 0:
inputs["videos"] = videoList

if referenceVoices is not None and isinstance(referenceVoices, list) and len(referenceVoices) > 0:
inputs["referenceVoices"] = referenceVoices

return (inputs,)

Expand Down
74 changes: 74 additions & 0 deletions modules/audioInferenceReferenceVoices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
Runware Audio Inference Inputs Reference Audio node.
Builds inputs.referenceVoices for zero-shot voice cloning (up to 4 entries).
"""

from typing import Any, Dict, List

from .utils import runwareUtils as rwUtils


class RunwareAudioInferenceReferenceVoices:
"""Build inputs.referenceVoices[] for Fish Audio and other TTS models."""

MAX_REFERENCE_VOICES = 4

@classmethod
def INPUT_TYPES(cls):
optional_inputs = {}
for i in range(1, cls.MAX_REFERENCE_VOICES + 1):
ordinal = rwUtils.getOrdinal(i)
optional_inputs[f"useReferenceVoice{i}"] = ("BOOLEAN", {
"default": False,
"tooltip": f"Enable to include the {ordinal} reference voice in inputs.referenceVoices.",
})
optional_inputs[f"audio{i}"] = ("STRING", {
"default": "",
"tooltip": f"Reference audio clip ({ordinal}) as media UUID, URL, or base64. Required when enabled.",
})
optional_inputs[f"text{i}"] = ("STRING", {
"multiline": True,
"default": "",
"tooltip": f"Transcript of the {ordinal} reference audio clip (1–1000 characters). Required when enabled.",
})

return {
"required": {},
"optional": optional_inputs,
}

RETURN_TYPES = ("RUNWAREAUDIOINFERENCEREFERENCEVOICES",)
RETURN_NAMES = ("referenceVoices",)
FUNCTION = "createReferenceVoices"
CATEGORY = "Runware/Audio"
DESCRIPTION = (
"Configure inputs.referenceVoices for zero-shot voice cloning (up to 4 entries). "
"Each entry: { \"audio\": \"<UUID/URL/base64>\", \"text\": \"<transcript>\" }. "
"Connect to Runware Audio Inference Inputs."
)

def createReferenceVoices(self, **kwargs) -> tuple[List[Dict[str, Any]]]:
reference_voices: List[Dict[str, Any]] = []

for i in range(1, self.MAX_REFERENCE_VOICES + 1):
if not kwargs.get(f"useReferenceVoice{i}", False):
continue

audio = (kwargs.get(f"audio{i}") or "").strip()
text = (kwargs.get(f"text{i}") or "").strip()

if not audio or not text:
continue
Comment thread
Sirsho1997 marked this conversation as resolved.

reference_voices.append({"audio": audio, "text": text})

return (reference_voices,)


NODE_CLASS_MAPPINGS = {
"RunwareAudioInferenceReferenceVoices": RunwareAudioInferenceReferenceVoices,
}

NODE_DISPLAY_NAME_MAPPINGS = {
"RunwareAudioInferenceReferenceVoices": "Runware Audio Inference Inputs Reference Audio",
}
4 changes: 4 additions & 0 deletions modules/audioModelSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class RunwareAudioModelSearch:
"Google": [
"google:gemini@3.1-flash-tts (Gemini 3.1 Flash TTS)",
],
"Fish": [
"fishaudio:s2.1@pro (Fish Audio S2.1 Pro)",
],
}

MODEL_PROVIDERS = [
Expand All @@ -61,6 +64,7 @@ class RunwareAudioModelSearch:
"MiniMax",
"Inworld",
"Google",
"Fish",
]

@classmethod
Expand Down
Loading