@@ -790,29 +790,40 @@ int main(int argc, char ** argv) {
790790 prompt = build_lm_prompt (bpe, ace);
791791 }
792792 std::vector<int > uncond;
793- float fill_cfg = cfg_scale;
794- float fill_top_p = top_p;
795- int fill_top_k = top_k;
796- if (need_lyrics) {
797- // lyrics generation: free sampling, no CFG (matches original behavior)
798- fill_cfg = 1 .0f ;
799- fill_top_p = 1 .0f ;
800- fill_top_k = 0 ;
801- } else if (fill_cfg > 1 .0f ) {
793+
794+ // Disable CFG for ANY textual expansion (lyrics OR CoT reasoning),
795+ // as CFG distorts text logits and forces premature newlines.
796+ float fill_cfg = (need_lyrics || req.use_cot_caption ) ? 1 .0f : cfg_scale;
797+ float fill_top_p = top_p;
798+ int fill_top_k = top_k;
799+
800+ if (fill_cfg > 1 .0f ) {
802801 uncond = build_lm_prompt_uncond (bpe, ace, neg_prompt);
803802 }
804803
805804 fsm.reset ();
806- if (need_lyrics && use_fsm && ace.vocal_language != " unknown" && !ace.vocal_language .empty ()) {
807- fsm.force_language (bpe, ace.vocal_language );
805+ MetadataFSM * active_fsm = nullptr ;
806+
807+ if (use_fsm) {
808+ if (need_lyrics) {
809+ // Free text for lyrics. Only use FSM if strictly forcing language.
810+ if (ace.vocal_language != " unknown" && !ace.vocal_language .empty ()) {
811+ fsm.force_language (bpe, ace.vocal_language );
812+ active_fsm = &fsm;
813+ }
814+ } else {
815+ if (!req.use_cot_caption ) {
816+ active_fsm = &fsm;
817+ }
818+ }
808819 }
809820
810821 fprintf (stderr, " [Fill] lyrics=%s metas=%s | %zu tokens, CFG: %.2f, N=%d\n " , need_lyrics ? " generate" : " keep" ,
811822 has_all_metas ? " complete" : " fill gaps" , prompt.size (), fill_cfg, batch_size);
812823
813- auto phase1_texts = generate_phase1_batch (&model, &bpe, prompt, 2048 , temperature, fill_top_p, fill_top_k, seed,
814- batch_size, use_fsm ? &fsm : nullptr , need_lyrics, fill_cfg ,
815- uncond.empty () ? nullptr : &uncond, !need_lyrics);
824+ auto phase1_texts =
825+ generate_phase1_batch (&model, &bpe, prompt, 2048 , temperature, fill_top_p, fill_top_k, seed, batch_size ,
826+ active_fsm, need_lyrics, fill_cfg, uncond.empty () ? nullptr : &uncond, !need_lyrics);
816827
817828 parse_phase1_into_aces (phase1_texts, ace, aces, seed, " Fill" , need_lyrics, req.use_cot_caption );
818829
0 commit comments