From fda71f17f086686b1a0d8b9caef270449090042e Mon Sep 17 00:00:00 2001 From: yqzhishen Date: Sat, 2 May 2026 22:44:29 +0800 Subject: [PATCH 1/3] Support DiffSinger pitch local retaking --- OpenUtau.Core/DiffSinger/DiffSingerPitch.cs | 33 +++++++++++++++++-- .../DiffSinger/DiffSingerRenderer.cs | 33 +++++++++++++++++++ OpenUtau.Core/Editing/NoteBatchEdits.cs | 5 ++- OpenUtau.Core/Render/IRenderer.cs | 6 ++++ OpenUtau.Core/Util/Preferences.cs | 1 + OpenUtau/Strings/Strings.axaml | 1 + OpenUtau/Strings/Strings.zh-CN.axaml | 1 + OpenUtau/ViewModels/PreferencesViewModel.cs | 7 ++++ OpenUtau/Views/PreferencesDialog.axaml | 4 +++ 9 files changed, 87 insertions(+), 4 deletions(-) diff --git a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs index 3cc20c0d7..896a034ae 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs @@ -27,6 +27,8 @@ public class DsPitch : IDisposable DiffSingerSpeakerEmbedManager speakerEmbedManager; const string PEXP = DiffSingerUtils.PEXP; + public float FrameMs => frameMs; + public DsPitch(string rootPath) { this.rootPath = rootPath; @@ -107,7 +109,7 @@ int PhonemeTokenize(string phoneme){ return token; } - public RenderPitchResult Process(RenderPhrase phrase){ + public RenderPitchResult Process(RenderPhrase phrase, HashSet retakeNoteIndexes = null, float[] existingPitch = null){ var startMs = phrase.phones[0].positionMs - DiffSingerUtils.GetHeadMs(frameMs); int headFrames = DiffSingerUtils.headFrames; int tailFrames = DiffSingerUtils.tailFrames; @@ -251,6 +253,29 @@ public RenderPitchResult Process(RenderPhrase phrase){ .ToList(); var pitch = Enumerable.Repeat(60f, totalFrames).ToArray(); var retake = Enumerable.Repeat(true, totalFrames).ToArray(); + if (retakeNoteIndexes != null && existingPitch != null) { + int frameOffset = 0; + for (int noteIdx = 0; noteIdx < note_dur.Count; noteIdx++) { + bool shouldRetake; + if (noteIdx == 0) { + shouldRetake = retakeNoteIndexes.Contains(0); + } else if (noteIdx == note_dur.Count - 1) { + shouldRetake = retakeNoteIndexes.Contains(phrase.notes.Length - 1); + } else { + shouldRetake = retakeNoteIndexes.Contains(noteIdx - 1); + } + for (int f = 0; f < note_dur[noteIdx]; f++) { + int fi = frameOffset + f; + if (fi < totalFrames) { + retake[fi] = shouldRetake; + } + } + frameOffset += note_dur[noteIdx]; + } + for (int i = 0; i < totalFrames && i < existingPitch.Length; i++) { + pitch[i] = existingPitch[i]; + } + } var pitchInputs = new List(); pitchInputs.Add(NamedOnnxValue.CreateFromTensor("encoder_out", encoder_out)); pitchInputs.Add(NamedOnnxValue.CreateFromTensor("note_midi", @@ -322,14 +347,16 @@ public RenderPitchResult Process(RenderPhrase phrase){ .Select(i=>(float)phrase.timeAxis.MsPosToTickPos(startMs + i*frameMs) - phrase.position) .Append((float)phrase.duration + 1) .ToArray(), - tones = pitch_out.Append(pitch_out[^1]).ToArray() + tones = pitch_out.Append(pitch_out[^1]).ToArray(), + retakeMask = retakeNoteIndexes != null ? retake.Append(retake[^1]).ToArray() : null, }; }else{ return new RenderPitchResult{ ticks = Enumerable.Range(0,totalFrames) .Select(i=>(float)phrase.timeAxis.MsPosToTickPos(startMs + i*frameMs) - phrase.position) .ToArray(), - tones = pitch_out + tones = pitch_out, + retakeMask = retakeNoteIndexes != null ? retake : null, }; } } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs index 7844eea55..669bdd55b 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs @@ -508,6 +508,39 @@ public RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) { } } + public RenderPitchResult LoadRenderedPitch(RenderPhrase phrase, HashSet selectedNotePositions) { + if (!Preferences.Default.DiffSingerLocalRetaking) { + return LoadRenderedPitch(phrase); + } + DiffSingerSinger singer = (DiffSingerSinger) phrase.singer; + if (!singer.HasPitchPredictor) { + throw new Exception("This singer has no pitch predictor."); + } + var pitchPredictor = singer.getPitchPredictor()!; + var retakeNoteIndexes = new HashSet(); + for (int i = 0; i < phrase.notes.Length; i++) { + int absPos = phrase.position + phrase.notes[i].position; + if (selectedNotePositions.Contains(absPos)) { + retakeNoteIndexes.Add(i); + } + } + if (retakeNoteIndexes.Count == 0 || retakeNoteIndexes.Count == phrase.notes.Length) { + lock (pitchPredictor) { + return pitchPredictor.Process(phrase); + } + } + var frameMs = pitchPredictor.FrameMs; + int headFrames = DiffSingerUtils.headFrames; + int tailFrames = DiffSingerUtils.tailFrames; + var ph_dur = DiffSingerUtils.PaddedPhoneDurations(phrase, frameMs, headFrames, tailFrames); + int totalFrames = ph_dur.Sum(); + var existingPitch = DiffSingerUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, totalFrames, headFrames, tailFrames, + x => x * 0.01).Select(f => (float)f).ToArray(); + lock (pitchPredictor) { + return pitchPredictor.Process(phrase, retakeNoteIndexes, existingPitch); + } + } + public List LoadRenderedRealCurves(RenderPhrase phrase) { if (!Preferences.Default.DiffSingerTensorCache) { throw new Exception("Please enable DiffSinger tensor cache and re-render the phrase to display correct base curves."); diff --git a/OpenUtau.Core/Editing/NoteBatchEdits.cs b/OpenUtau.Core/Editing/NoteBatchEdits.cs index 817315430..9ad4f4b9b 100644 --- a/OpenUtau.Core/Editing/NoteBatchEdits.cs +++ b/OpenUtau.Core/Editing/NoteBatchEdits.cs @@ -488,7 +488,7 @@ public void RunAsync( var commands = new List(); for (int ph_i = phrases.Count() - 1; ph_i >= 0; ph_i--) { var phrase = phrases[ph_i]; - var result = renderer.LoadRenderedPitch(phrase); + var result = renderer.LoadRenderedPitch(phrase, positions); if (result == null) { continue; } @@ -502,6 +502,9 @@ public void RunAsync( if (result.tones[i] < 0) { continue; } + if (result.retakeMask != null && !result.retakeMask[i]) { + continue; + } int x = phrase.position - part.position + (int)result.ticks[i]; if (result.ticks[i] < 0) { if (i + 1 < result.ticks.Length && result.ticks[i + 1] > 0) { } else diff --git a/OpenUtau.Core/Render/IRenderer.cs b/OpenUtau.Core/Render/IRenderer.cs index a1d420cc1..4ecda7d4f 100644 --- a/OpenUtau.Core/Render/IRenderer.cs +++ b/OpenUtau.Core/Render/IRenderer.cs @@ -40,6 +40,11 @@ public class RenderPitchResult { /// Semitone values in MIDI scale. /// public float[] tones; + + /// + /// Per-frame mask indicating retaken frames. Null means full retake. + /// + public bool[] retakeMask; } public class RenderRealCurveResult { @@ -70,6 +75,7 @@ public interface IRenderer { RenderResult Layout(RenderPhrase phrase); Task Render(RenderPhrase phrase, Progress progress, int trackNo, CancellationTokenSource cancellation, bool isPreRender = false); RenderPitchResult LoadRenderedPitch(RenderPhrase phrase); + RenderPitchResult LoadRenderedPitch(RenderPhrase phrase, HashSet selectedNotePositions) { return LoadRenderedPitch(phrase); } List LoadRenderedRealCurves(RenderPhrase phrase) { return new List(0);} UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings); } diff --git a/OpenUtau.Core/Util/Preferences.cs b/OpenUtau.Core/Util/Preferences.cs index 46aa926f1..0cb0127f6 100644 --- a/OpenUtau.Core/Util/Preferences.cs +++ b/OpenUtau.Core/Util/Preferences.cs @@ -166,6 +166,7 @@ public class SerializablePreferences { public int DiffSingerStepsPitch = 10; public bool DiffSingerTensorCache = true; public bool DiffSingerLangCodeHide = false; + public bool DiffSingerLocalRetaking = false; public bool SkipRenderingMutedTracks = false; public string Language = string.Empty; public string? SortingOrder = null; diff --git a/OpenUtau/Strings/Strings.axaml b/OpenUtau/Strings/Strings.axaml index 7c08bab77..104d90967 100644 --- a/OpenUtau/Strings/Strings.axaml +++ b/OpenUtau/Strings/Strings.axaml @@ -627,6 +627,7 @@ Warning: this option removes custom presets. DiffSinger Render Steps for Acoustic DiffSinger Render Steps for Pitch DiffSinger Render Steps for Variance + DiffSinger Pitch Local Retaking GPU Machine Learning Runner Phase Compensation diff --git a/OpenUtau/Strings/Strings.zh-CN.axaml b/OpenUtau/Strings/Strings.zh-CN.axaml index 8c730fbaa..473153673 100644 --- a/OpenUtau/Strings/Strings.zh-CN.axaml +++ b/OpenUtau/Strings/Strings.zh-CN.axaml @@ -485,6 +485,7 @@ Syntax: prefix,suffix--> + DiffSinger 音高局部重录 机器学习运行器 相位修正 diff --git a/OpenUtau/ViewModels/PreferencesViewModel.cs b/OpenUtau/ViewModels/PreferencesViewModel.cs index 9db829b46..a740d45c4 100644 --- a/OpenUtau/ViewModels/PreferencesViewModel.cs +++ b/OpenUtau/ViewModels/PreferencesViewModel.cs @@ -121,6 +121,7 @@ public int SafeMaxThreadCount { [Reactive] public double DiffSingerDepth { get; set; } [Reactive] public bool DiffSingerTensorCache { get; set; } [Reactive] public bool DiffSingerLangCodeHide { get; set; } + [Reactive] public bool DiffSingerLocalRetaking { get; set; } // Advanced [Reactive] public bool RememberMid { get; set; } @@ -175,6 +176,7 @@ public PreferencesViewModel() { DiffSingerStepsPitch = Preferences.Default.DiffSingerStepsPitch; DiffSingerTensorCache = Preferences.Default.DiffSingerTensorCache; DiffSingerLangCodeHide = Preferences.Default.DiffSingerLangCodeHide; + DiffSingerLocalRetaking = Preferences.Default.DiffSingerLocalRetaking; SkipRenderingMutedTracks = Preferences.Default.SkipRenderingMutedTracks; ThemeName = Preferences.Default.ThemeName; PenPlusDefault = Preferences.Default.PenPlusDefault; @@ -398,6 +400,11 @@ public PreferencesViewModel() { Preferences.Default.DiffSingerLangCodeHide = useCache; Preferences.Save(); }); + this.WhenAnyValue(vm => vm.DiffSingerLocalRetaking) + .Subscribe(value => { + Preferences.Default.DiffSingerLocalRetaking = value; + Preferences.Save(); + }); this.WhenAnyValue(vm => vm.SkipRenderingMutedTracks) .Subscribe(skipRenderingMutedTracks => { Preferences.Default.SkipRenderingMutedTracks = skipRenderingMutedTracks; diff --git a/OpenUtau/Views/PreferencesDialog.axaml b/OpenUtau/Views/PreferencesDialog.axaml index e116bbe34..bbe07e8a4 100644 --- a/OpenUtau/Views/PreferencesDialog.axaml +++ b/OpenUtau/Views/PreferencesDialog.axaml @@ -329,6 +329,10 @@ + + + + From c54b388117befeba79f3f6d81e16602fc6f065d7 Mon Sep 17 00:00:00 2001 From: KakaruHayate Date: Fri, 5 Jun 2026 21:14:04 +0800 Subject: [PATCH 2/3] Extract retake mask + note-index helpers for testability Pull the per-frame retake mask construction (DsPitch.Process) and the absolute-tick -> phrase-local note-index mapping (LoadRenderedPitch overload) out into pure static helpers in DiffSingerRetake so the padding-shift indexing logic and the position lookup can be unit tested without instantiating RenderPhrase / a singer / ONNX. Co-Authored-By: Claude Opus 4.7 --- OpenUtau.Core/DiffSinger/DiffSingerPitch.cs | 20 +-- .../DiffSinger/DiffSingerRenderer.cs | 9 +- OpenUtau.Core/DiffSinger/DiffSingerRetake.cs | 57 ++++++++ .../Core/DiffSinger/DiffSingerRetakeTest.cs | 136 ++++++++++++++++++ 4 files changed, 199 insertions(+), 23 deletions(-) create mode 100644 OpenUtau.Core/DiffSinger/DiffSingerRetake.cs create mode 100644 OpenUtau.Test/Core/DiffSinger/DiffSingerRetakeTest.cs diff --git a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs index 896a034ae..d892680ba 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs @@ -254,24 +254,8 @@ public RenderPitchResult Process(RenderPhrase phrase, HashSet retakeNoteInd var pitch = Enumerable.Repeat(60f, totalFrames).ToArray(); var retake = Enumerable.Repeat(true, totalFrames).ToArray(); if (retakeNoteIndexes != null && existingPitch != null) { - int frameOffset = 0; - for (int noteIdx = 0; noteIdx < note_dur.Count; noteIdx++) { - bool shouldRetake; - if (noteIdx == 0) { - shouldRetake = retakeNoteIndexes.Contains(0); - } else if (noteIdx == note_dur.Count - 1) { - shouldRetake = retakeNoteIndexes.Contains(phrase.notes.Length - 1); - } else { - shouldRetake = retakeNoteIndexes.Contains(noteIdx - 1); - } - for (int f = 0; f < note_dur[noteIdx]; f++) { - int fi = frameOffset + f; - if (fi < totalFrames) { - retake[fi] = shouldRetake; - } - } - frameOffset += note_dur[noteIdx]; - } + retake = DiffSingerRetake.BuildRetakeFrameMask( + note_dur, phrase.notes.Length, retakeNoteIndexes, totalFrames); for (int i = 0; i < totalFrames && i < existingPitch.Length; i++) { pitch[i] = existingPitch[i]; } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs index 669bdd55b..08739df57 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs @@ -517,13 +517,12 @@ public RenderPitchResult LoadRenderedPitch(RenderPhrase phrase, HashSet sel throw new Exception("This singer has no pitch predictor."); } var pitchPredictor = singer.getPitchPredictor()!; - var retakeNoteIndexes = new HashSet(); + var noteRelativePositions = new int[phrase.notes.Length]; for (int i = 0; i < phrase.notes.Length; i++) { - int absPos = phrase.position + phrase.notes[i].position; - if (selectedNotePositions.Contains(absPos)) { - retakeNoteIndexes.Add(i); - } + noteRelativePositions[i] = phrase.notes[i].position; } + var retakeNoteIndexes = DiffSingerRetake.MapSelectedPositionsToNoteIndexes( + phrase.position, noteRelativePositions, selectedNotePositions); if (retakeNoteIndexes.Count == 0 || retakeNoteIndexes.Count == phrase.notes.Length) { lock (pitchPredictor) { return pitchPredictor.Process(phrase); diff --git a/OpenUtau.Core/DiffSinger/DiffSingerRetake.cs b/OpenUtau.Core/DiffSinger/DiffSingerRetake.cs new file mode 100644 index 000000000..bb344739e --- /dev/null +++ b/OpenUtau.Core/DiffSinger/DiffSingerRetake.cs @@ -0,0 +1,57 @@ +using System.Collections.Generic; + +namespace OpenUtau.Core.DiffSinger { + public static class DiffSingerRetake { + public static HashSet MapSelectedPositionsToNoteIndexes( + int phrasePosition, + IReadOnlyList noteRelativePositions, + IReadOnlyCollection selectedAbsolutePositions) { + var result = new HashSet(); + if (selectedAbsolutePositions == null || selectedAbsolutePositions.Count == 0) { + return result; + } + var lookup = selectedAbsolutePositions as ISet ?? new HashSet(selectedAbsolutePositions); + for (int i = 0; i < noteRelativePositions.Count; i++) { + if (lookup.Contains(phrasePosition + noteRelativePositions[i])) { + result.Add(i); + } + } + return result; + } + + public static bool[] BuildRetakeFrameMask( + IReadOnlyList paddedNoteDurations, + int realNoteCount, + IReadOnlyCollection retakeNoteIndexes, + int totalFrames) { + var mask = new bool[totalFrames]; + if (retakeNoteIndexes == null || retakeNoteIndexes.Count == 0 || paddedNoteDurations.Count == 0) { + return mask; + } + var lookup = retakeNoteIndexes as ISet ?? new HashSet(retakeNoteIndexes); + int padded = paddedNoteDurations.Count; + int frameOffset = 0; + for (int noteIdx = 0; noteIdx < padded; noteIdx++) { + int realIdx; + if (noteIdx == 0) { + realIdx = 0; + } else if (noteIdx == padded - 1) { + realIdx = realNoteCount - 1; + } else { + realIdx = noteIdx - 1; + } + bool shouldRetake = lookup.Contains(realIdx); + int dur = paddedNoteDurations[noteIdx]; + for (int f = 0; f < dur; f++) { + int fi = frameOffset + f; + if (fi < totalFrames) { + mask[fi] = shouldRetake; + } + } + frameOffset += dur; + } + return mask; + } + } +} + diff --git a/OpenUtau.Test/Core/DiffSinger/DiffSingerRetakeTest.cs b/OpenUtau.Test/Core/DiffSinger/DiffSingerRetakeTest.cs new file mode 100644 index 000000000..b21a16360 --- /dev/null +++ b/OpenUtau.Test/Core/DiffSinger/DiffSingerRetakeTest.cs @@ -0,0 +1,136 @@ +using System.Collections.Generic; +using System.Linq; +using OpenUtau.Core.DiffSinger; +using Xunit; + +namespace OpenUtau.Core { + public class DiffSingerRetakeTest { + [Fact] + public void MapSelectedPositionsToNoteIndexes_PicksMatchingNotes() { + var noteRel = new[] { 0, 480, 960, 1440 }; + var selected = new HashSet { 100 + 480, 100 + 1440 }; + + var result = DiffSingerRetake.MapSelectedPositionsToNoteIndexes(100, noteRel, selected); + + Assert.Equal(new HashSet { 1, 3 }, result); + } + + [Fact] + public void MapSelectedPositionsToNoteIndexes_ReturnsEmptyWhenNoneSelected() { + var noteRel = new[] { 0, 480 }; + var result = DiffSingerRetake.MapSelectedPositionsToNoteIndexes(0, noteRel, new HashSet()); + Assert.Empty(result); + } + + [Fact] + public void MapSelectedPositionsToNoteIndexes_HandlesNullSelected() { + var noteRel = new[] { 0, 480 }; + var result = DiffSingerRetake.MapSelectedPositionsToNoteIndexes(0, noteRel, null); + Assert.Empty(result); + } + + [Fact] + public void BuildRetakeFrameMask_AllSelected_AllTrue() { + var paddedDurations = new[] { 2, 5, 5, 2 }; + var totalFrames = paddedDurations.Sum(); + var indexes = new HashSet { 0, 1 }; + + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 2, indexes, totalFrames); + + Assert.Equal(totalFrames, mask.Length); + Assert.All(mask, b => Assert.True(b)); + } + + [Fact] + public void BuildRetakeFrameMask_NoneSelected_AllFalse() { + var paddedDurations = new[] { 2, 5, 5, 2 }; + var totalFrames = paddedDurations.Sum(); + var indexes = new HashSet(); + + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 2, indexes, totalFrames); + + Assert.Equal(totalFrames, mask.Length); + Assert.All(mask, b => Assert.False(b)); + } + + [Fact] + public void BuildRetakeFrameMask_PartialSelected_RespectsHeadTailPaddingShift() { + // 3 real notes, padded with head + tail → 5 padded "note durations". + // Mapping: padded[0] → real 0 (head), padded[1] → real 0, padded[2] → real 1, + // padded[3] → real 2, padded[4] → real 2 (tail). + var paddedDurations = new[] { 2, 3, 4, 3, 2 }; // 14 frames total + int totalFrames = paddedDurations.Sum(); + var indexes = new HashSet { 1 }; // retake only middle real note + + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 3, indexes, totalFrames); + + // padded[0] (frames 0-1, head→real 0) → false + Assert.False(mask[0]); + Assert.False(mask[1]); + // padded[1] (frames 2-4, real 0) → false + Assert.False(mask[2]); + Assert.False(mask[4]); + // padded[2] (frames 5-8, real 1) → true + Assert.True(mask[5]); + Assert.True(mask[8]); + // padded[3] (frames 9-11, real 2) → false + Assert.False(mask[9]); + Assert.False(mask[11]); + // padded[4] (frames 12-13, tail→real 2) → false + Assert.False(mask[12]); + Assert.False(mask[13]); + } + + [Fact] + public void BuildRetakeFrameMask_FirstRealNoteSelected_HeadPadIncluded() { + // Selecting real note 0 should mark both head (padded[0]) and padded[1] frames. + var paddedDurations = new[] { 2, 3, 4, 3, 2 }; + int totalFrames = paddedDurations.Sum(); + var indexes = new HashSet { 0 }; + + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 3, indexes, totalFrames); + + Assert.True(mask[0]); + Assert.True(mask[1]); // head padded → real 0 + Assert.True(mask[2]); + Assert.True(mask[4]); // padded[1] → real 0 + Assert.False(mask[5]); // padded[2] → real 1, not selected + } + + [Fact] + public void BuildRetakeFrameMask_LastRealNoteSelected_TailPadIncluded() { + var paddedDurations = new[] { 2, 3, 4, 3, 2 }; + int totalFrames = paddedDurations.Sum(); + var indexes = new HashSet { 2 }; // last real note + + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 3, indexes, totalFrames); + + Assert.False(mask[8]); + Assert.True(mask[9]); // padded[3] → real 2 + Assert.True(mask[11]); + Assert.True(mask[12]); // padded[4] tail → real 2 + Assert.True(mask[13]); + } + + [Fact] + public void BuildRetakeFrameMask_ClampsFramesPastTotal() { + // paddedDurations sum to 10 but totalFrames is 8 (simulating FitDurationSum trim). + var paddedDurations = new[] { 2, 4, 4 }; + var indexes = new HashSet { 0 }; + + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 1, indexes, 8); + + Assert.Equal(8, mask.Length); + // Should not throw; frames past totalFrames silently dropped. + Assert.True(mask[0]); + Assert.True(mask[5]); + } + + [Fact] + public void BuildRetakeFrameMask_EmptyDurations_ReturnsAllFalse() { + var mask = DiffSingerRetake.BuildRetakeFrameMask(new int[0], 0, new HashSet { 0 }, 4); + Assert.Equal(4, mask.Length); + Assert.All(mask, b => Assert.False(b)); + } + } +} From 6e3289d72a81ad68bce3a92af433c2b367451d72 Mon Sep 17 00:00:00 2001 From: KakaruHayate Date: Sat, 6 Jun 2026 12:13:05 +0800 Subject: [PATCH 3/3] Address Copilot review feedback on pitch retake - Fix retake frame mask incorrectly indexing past gap-rest segments inserted in noteDurMsList; callers now supply a paddedToRealNoteIndex array kept in lockstep with the padded duration list. Gap rests follow the preceding real note for continuity. - Mark nullable parameters on Process / BuildRetakeFrameMask / MapSelectedPositionsToNoteIndexes and the retakeMask field. - Guard retakeMask bounds in NoteBatchEdits to tolerate shorter masks. - Move retake tests into OpenUtau.Core.DiffSinger namespace and add coverage for gap-rest mapping and the -1 sentinel. --- OpenUtau.Core/DiffSinger/DiffSingerPitch.cs | 17 +++-- OpenUtau.Core/DiffSinger/DiffSingerRetake.cs | 25 +++---- OpenUtau.Core/Editing/NoteBatchEdits.cs | 2 +- OpenUtau.Core/Render/IRenderer.cs | 2 +- .../Core/DiffSinger/DiffSingerRetakeTest.cs | 67 ++++++++++++++++--- 5 files changed, 83 insertions(+), 30 deletions(-) diff --git a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs index d892680ba..d50b30007 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs @@ -109,7 +109,7 @@ int PhonemeTokenize(string phoneme){ return token; } - public RenderPitchResult Process(RenderPhrase phrase, HashSet retakeNoteIndexes = null, float[] existingPitch = null){ + public RenderPitchResult Process(RenderPhrase phrase, HashSet? retakeNoteIndexes = null, float[]? existingPitch = null){ var startMs = phrase.phones[0].positionMs - DiffSingerUtils.GetHeadMs(frameMs); int headFrames = DiffSingerUtils.headFrames; int tailFrames = DiffSingerUtils.tailFrames; @@ -186,21 +186,29 @@ public RenderPitchResult Process(RenderPhrase phrase, HashSet retakeNoteInd var noteDurMsList = new List(); var noteMidiList = new List(); var noteRestList = new List(); + //paddedToRealNoteIndex is kept in lockstep with noteDurMsList so the retake + //frame mask can map each padded segment to the real note it belongs to. + //Gap-rest segments inserted below follow the preceding real note. + var paddedToRealNoteIndex = new List(); //Head padding noteDurMsList.Add(Math.Max(0, phrase.notes[0].positionMs - startMs)); noteMidiList.Add(phrase.notes[0].adjustedTone); noteRestList.Add(true); + paddedToRealNoteIndex.Add(0); double prevNoteEndMs = phrase.notes[0].positionMs; - foreach (var note in phrase.notes) { + for (int realIdx = 0; realIdx < phrase.notes.Length; realIdx++) { + var note = phrase.notes[realIdx]; double gapMs = note.positionMs - prevNoteEndMs; if (gapMs > 0) { - //Insert a rest note for the gap + //Insert a rest note for the gap; associate it with the previous real note noteDurMsList.Add(gapMs); noteMidiList.Add(note.adjustedTone); noteRestList.Add(true); + paddedToRealNoteIndex.Add(realIdx - 1); } noteDurMsList.Add(note.durationMs); noteMidiList.Add(note.adjustedTone); + paddedToRealNoteIndex.Add(realIdx); //Slur notes follow the previous note's rest status if (note.lyric.StartsWith("+")) { noteRestList.Add(noteRestList[^1]); @@ -219,6 +227,7 @@ public RenderPitchResult Process(RenderPhrase phrase, HashSet retakeNoteInd noteDurMsList.Add(DiffSingerUtils.GetTailMs(frameMs)); noteMidiList.Add(phrase.notes[^1].adjustedTone); noteRestList.Add(true); + paddedToRealNoteIndex.Add(phrase.notes.Length - 1); //Set tone for each rest group using nearest non-rest note var note_rest = noteRestList; @@ -255,7 +264,7 @@ public RenderPitchResult Process(RenderPhrase phrase, HashSet retakeNoteInd var retake = Enumerable.Repeat(true, totalFrames).ToArray(); if (retakeNoteIndexes != null && existingPitch != null) { retake = DiffSingerRetake.BuildRetakeFrameMask( - note_dur, phrase.notes.Length, retakeNoteIndexes, totalFrames); + note_dur, paddedToRealNoteIndex, retakeNoteIndexes, totalFrames); for (int i = 0; i < totalFrames && i < existingPitch.Length; i++) { pitch[i] = existingPitch[i]; } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerRetake.cs b/OpenUtau.Core/DiffSinger/DiffSingerRetake.cs index bb344739e..d5787906b 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerRetake.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerRetake.cs @@ -5,7 +5,7 @@ public static class DiffSingerRetake { public static HashSet MapSelectedPositionsToNoteIndexes( int phrasePosition, IReadOnlyList noteRelativePositions, - IReadOnlyCollection selectedAbsolutePositions) { + IReadOnlyCollection? selectedAbsolutePositions) { var result = new HashSet(); if (selectedAbsolutePositions == null || selectedAbsolutePositions.Count == 0) { return result; @@ -19,10 +19,13 @@ public static HashSet MapSelectedPositionsToNoteIndexes( return result; } + // paddedToRealNoteIndex must be the same length as paddedNoteDurations. + // Each entry is the real-note index the padded segment should follow for retake purposes, + // or -1 for a segment that is never retaken regardless of selection. public static bool[] BuildRetakeFrameMask( IReadOnlyList paddedNoteDurations, - int realNoteCount, - IReadOnlyCollection retakeNoteIndexes, + IReadOnlyList paddedToRealNoteIndex, + IReadOnlyCollection? retakeNoteIndexes, int totalFrames) { var mask = new bool[totalFrames]; if (retakeNoteIndexes == null || retakeNoteIndexes.Count == 0 || paddedNoteDurations.Count == 0) { @@ -31,17 +34,10 @@ public static bool[] BuildRetakeFrameMask( var lookup = retakeNoteIndexes as ISet ?? new HashSet(retakeNoteIndexes); int padded = paddedNoteDurations.Count; int frameOffset = 0; - for (int noteIdx = 0; noteIdx < padded; noteIdx++) { - int realIdx; - if (noteIdx == 0) { - realIdx = 0; - } else if (noteIdx == padded - 1) { - realIdx = realNoteCount - 1; - } else { - realIdx = noteIdx - 1; - } - bool shouldRetake = lookup.Contains(realIdx); - int dur = paddedNoteDurations[noteIdx]; + for (int segIdx = 0; segIdx < padded; segIdx++) { + int realIdx = paddedToRealNoteIndex[segIdx]; + bool shouldRetake = realIdx >= 0 && lookup.Contains(realIdx); + int dur = paddedNoteDurations[segIdx]; for (int f = 0; f < dur; f++) { int fi = frameOffset + f; if (fi < totalFrames) { @@ -54,4 +50,3 @@ public static bool[] BuildRetakeFrameMask( } } } - diff --git a/OpenUtau.Core/Editing/NoteBatchEdits.cs b/OpenUtau.Core/Editing/NoteBatchEdits.cs index 9ad4f4b9b..78a4d8b44 100644 --- a/OpenUtau.Core/Editing/NoteBatchEdits.cs +++ b/OpenUtau.Core/Editing/NoteBatchEdits.cs @@ -502,7 +502,7 @@ public void RunAsync( if (result.tones[i] < 0) { continue; } - if (result.retakeMask != null && !result.retakeMask[i]) { + if (result.retakeMask != null && i < result.retakeMask.Length && !result.retakeMask[i]) { continue; } int x = phrase.position - part.position + (int)result.ticks[i]; diff --git a/OpenUtau.Core/Render/IRenderer.cs b/OpenUtau.Core/Render/IRenderer.cs index 4ecda7d4f..9579a9171 100644 --- a/OpenUtau.Core/Render/IRenderer.cs +++ b/OpenUtau.Core/Render/IRenderer.cs @@ -44,7 +44,7 @@ public class RenderPitchResult { /// /// Per-frame mask indicating retaken frames. Null means full retake. /// - public bool[] retakeMask; + public bool[]? retakeMask; } public class RenderRealCurveResult { diff --git a/OpenUtau.Test/Core/DiffSinger/DiffSingerRetakeTest.cs b/OpenUtau.Test/Core/DiffSinger/DiffSingerRetakeTest.cs index b21a16360..88d1c4879 100644 --- a/OpenUtau.Test/Core/DiffSinger/DiffSingerRetakeTest.cs +++ b/OpenUtau.Test/Core/DiffSinger/DiffSingerRetakeTest.cs @@ -1,9 +1,8 @@ using System.Collections.Generic; using System.Linq; -using OpenUtau.Core.DiffSinger; using Xunit; -namespace OpenUtau.Core { +namespace OpenUtau.Core.DiffSinger { public class DiffSingerRetakeTest { [Fact] public void MapSelectedPositionsToNoteIndexes_PicksMatchingNotes() { @@ -32,10 +31,11 @@ public void MapSelectedPositionsToNoteIndexes_HandlesNullSelected() { [Fact] public void BuildRetakeFrameMask_AllSelected_AllTrue() { var paddedDurations = new[] { 2, 5, 5, 2 }; + var paddedToReal = new[] { 0, 0, 1, 1 }; var totalFrames = paddedDurations.Sum(); var indexes = new HashSet { 0, 1 }; - var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 2, indexes, totalFrames); + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, paddedToReal, indexes, totalFrames); Assert.Equal(totalFrames, mask.Length); Assert.All(mask, b => Assert.True(b)); @@ -44,10 +44,11 @@ public void BuildRetakeFrameMask_AllSelected_AllTrue() { [Fact] public void BuildRetakeFrameMask_NoneSelected_AllFalse() { var paddedDurations = new[] { 2, 5, 5, 2 }; + var paddedToReal = new[] { 0, 0, 1, 1 }; var totalFrames = paddedDurations.Sum(); var indexes = new HashSet(); - var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 2, indexes, totalFrames); + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, paddedToReal, indexes, totalFrames); Assert.Equal(totalFrames, mask.Length); Assert.All(mask, b => Assert.False(b)); @@ -59,10 +60,11 @@ public void BuildRetakeFrameMask_PartialSelected_RespectsHeadTailPaddingShift() // Mapping: padded[0] → real 0 (head), padded[1] → real 0, padded[2] → real 1, // padded[3] → real 2, padded[4] → real 2 (tail). var paddedDurations = new[] { 2, 3, 4, 3, 2 }; // 14 frames total + var paddedToReal = new[] { 0, 0, 1, 2, 2 }; int totalFrames = paddedDurations.Sum(); var indexes = new HashSet { 1 }; // retake only middle real note - var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 3, indexes, totalFrames); + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, paddedToReal, indexes, totalFrames); // padded[0] (frames 0-1, head→real 0) → false Assert.False(mask[0]); @@ -85,10 +87,11 @@ public void BuildRetakeFrameMask_PartialSelected_RespectsHeadTailPaddingShift() public void BuildRetakeFrameMask_FirstRealNoteSelected_HeadPadIncluded() { // Selecting real note 0 should mark both head (padded[0]) and padded[1] frames. var paddedDurations = new[] { 2, 3, 4, 3, 2 }; + var paddedToReal = new[] { 0, 0, 1, 2, 2 }; int totalFrames = paddedDurations.Sum(); var indexes = new HashSet { 0 }; - var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 3, indexes, totalFrames); + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, paddedToReal, indexes, totalFrames); Assert.True(mask[0]); Assert.True(mask[1]); // head padded → real 0 @@ -100,10 +103,11 @@ public void BuildRetakeFrameMask_FirstRealNoteSelected_HeadPadIncluded() { [Fact] public void BuildRetakeFrameMask_LastRealNoteSelected_TailPadIncluded() { var paddedDurations = new[] { 2, 3, 4, 3, 2 }; + var paddedToReal = new[] { 0, 0, 1, 2, 2 }; int totalFrames = paddedDurations.Sum(); var indexes = new HashSet { 2 }; // last real note - var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 3, indexes, totalFrames); + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, paddedToReal, indexes, totalFrames); Assert.False(mask[8]); Assert.True(mask[9]); // padded[3] → real 2 @@ -116,9 +120,10 @@ public void BuildRetakeFrameMask_LastRealNoteSelected_TailPadIncluded() { public void BuildRetakeFrameMask_ClampsFramesPastTotal() { // paddedDurations sum to 10 but totalFrames is 8 (simulating FitDurationSum trim). var paddedDurations = new[] { 2, 4, 4 }; + var paddedToReal = new[] { 0, 0, 0 }; var indexes = new HashSet { 0 }; - var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, 1, indexes, 8); + var mask = DiffSingerRetake.BuildRetakeFrameMask(paddedDurations, paddedToReal, indexes, 8); Assert.Equal(8, mask.Length); // Should not throw; frames past totalFrames silently dropped. @@ -128,9 +133,53 @@ public void BuildRetakeFrameMask_ClampsFramesPastTotal() { [Fact] public void BuildRetakeFrameMask_EmptyDurations_ReturnsAllFalse() { - var mask = DiffSingerRetake.BuildRetakeFrameMask(new int[0], 0, new HashSet { 0 }, 4); + var mask = DiffSingerRetake.BuildRetakeFrameMask( + new int[0], new int[0], new HashSet { 0 }, 4); Assert.Equal(4, mask.Length); Assert.All(mask, b => Assert.False(b)); } + + [Fact] + public void BuildRetakeFrameMask_GapRestFollowsPreviousNote() { + // Real notes [0, 1] with a gap between them. + // Padded layout: head→0, real 0, gap→0 (follows prev), real 1, tail→1. + var paddedDurations = new[] { 2, 3, 2, 4, 2 }; // 13 frames total + var paddedToReal = new[] { 0, 0, 0, 1, 1 }; + int totalFrames = paddedDurations.Sum(); + + // Select real note 0 only: head + real 0 + gap should retake; real 1 + tail should not. + var maskSelectFirst = DiffSingerRetake.BuildRetakeFrameMask( + paddedDurations, paddedToReal, new HashSet { 0 }, totalFrames); + Assert.True(maskSelectFirst[0]); // head + Assert.True(maskSelectFirst[4]); // real 0 + Assert.True(maskSelectFirst[5]); // gap (follows real 0) + Assert.True(maskSelectFirst[6]); // gap + Assert.False(maskSelectFirst[7]); // real 1 + Assert.False(maskSelectFirst[12]); // tail (follows real 1) + + // Select real note 1 only: gap stays unretaken (it follows real 0). + var maskSelectSecond = DiffSingerRetake.BuildRetakeFrameMask( + paddedDurations, paddedToReal, new HashSet { 1 }, totalFrames); + Assert.False(maskSelectSecond[5]); // gap not retaken + Assert.False(maskSelectSecond[6]); + Assert.True(maskSelectSecond[7]); // real 1 + Assert.True(maskSelectSecond[12]); // tail + } + + [Fact] + public void BuildRetakeFrameMask_SegmentMarkedMinusOne_NeverRetakes() { + // -1 in paddedToRealNoteIndex means "never retake this segment". + var paddedDurations = new[] { 2, 3, 2, 4, 2 }; + var paddedToReal = new[] { 0, 0, -1, 1, 1 }; // gap as -1 + int totalFrames = paddedDurations.Sum(); + + var mask = DiffSingerRetake.BuildRetakeFrameMask( + paddedDurations, paddedToReal, new HashSet { 0, 1 }, totalFrames); + + Assert.True(mask[4]); // real 0 + Assert.False(mask[5]); // gap (-1) + Assert.False(mask[6]); + Assert.True(mask[7]); // real 1 + } } }