diff --git a/llvm/test/CodeGen/AIE/aie2ps/conv2d-outer-loop.ll b/llvm/test/CodeGen/AIE/aie2ps/conv2d-outer-loop.ll index 10b3d045ad97..e3d0315d7b29 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/conv2d-outer-loop.ll +++ b/llvm/test/CodeGen/AIE/aie2ps/conv2d-outer-loop.ll @@ -124,21 +124,21 @@ define void @conv2d_outer_loop(ptr noalias %ifm, ptr noalias %wts, ptr noalias % ; CHECK-NEXT: lda r26, [p3], m1 ; CHECK-NEXT: lda m1, [p3], #-8 ; CHECK-NEXT: lda dn1, [p3], #-8; movx crupsmode, #0 -; CHECK-NEXT: lda dj1, [p3], #12; movx crsrsmode, #0; mov r3, #2 -; CHECK-NEXT: lda dn5, [p3], #-8; lshl r5, r20, r3; mov m2, #32 -; CHECK-NEXT: lda dj5, [p3], m2; lshl r24, r24, r3 +; CHECK-NEXT: lda dj1, [p3], #12; movx crsrsmode, #0; mov r1, #2 +; CHECK-NEXT: lda dn5, [p3], #-8; lshl r5, r20, r1; mov m2, #32 +; CHECK-NEXT: lda dj5, [p3], m2; lshl r24, r24, r1 ; CHECK-NEXT: lda m2, [p3], #-8; add r24, r5, r24 ; CHECK-NEXT: lda dn2, [p3], #-8; geu r5, r16, r18 ; CHECK-NEXT: lda dj2, [p3], #12; movx r16, #0; mov dj3, #-158 ; CHECK-NEXT: lda dn6, [p3], #-8; sub r16, r16, r20; mov dc3, #0 -; CHECK-NEXT: lda.s8 r1, [p3, dj3]; lshl r16, r16, r3; mov p6, p5 +; CHECK-NEXT: lda.s8 r3, [p3, dj3]; lshl r16, r16, r1; mov p6, p5 ; CHECK-NEXT: mova r18, #9; movs dc6, dc3; lshl r20, r5, r28; mov m3, r24 ; CHECK-NEXT: mova r24, #256; movs dc2, dc3; lshl r18, r30, r18; mov dc5, dc3 ; CHECK-NEXT: mova r16, #7; paddb [p1], m3; or r8, r18, r24; vbcst.32 x0, r20; movs m3, r16 ; CHECK-NEXT: mova r10, #264; movs dc1, dc3; lshl r28, r26, r16; mov s0, r22 ; CHECK-NEXT: mova r16, #16; movs p2, p1; add r18, r26, #-1; mov r30, #63 ; CHECK-NEXT: padda [p2], m3; movs dc0, dc3; or r24, r0, r0; mov m3, r28 -; CHECK-NEXT: lda dj6, [p3, #0]; movs p3, p4; or r12, r8, r5; mov s1, r1 +; CHECK-NEXT: lda dj6, [p3, #0]; movs p3, p4; or r12, r8, r5; mov s1, r3 ; CHECK-NEXT: .LBB0_8: // %for.body.i68 ; CHECK-NEXT: // =>This Loop Header: Depth=1 ; CHECK-NEXT: // Child Loop BB0_9 Depth 2 diff --git a/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/avgpool2d_bf16.ll b/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/avgpool2d_bf16.ll index f1188407ff7c..a45174120c8e 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/avgpool2d_bf16.ll +++ b/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/avgpool2d_bf16.ll @@ -30,9 +30,9 @@ define weak_odr dso_local void @_Z9avgpool2dILh1E8bfloat16Qsr5mllib5utilsE11is_o ; CHECK-LABEL: _Z9avgpool2dILh1E8bfloat16Qsr5mllib5utilsE11is_one_of_vIT0_ahS0_7float16EEvPS1_S3_R25avgpool2d_internal_paramsIS1_E: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: paddxm [sp], #64 -; CHECK-NEXT: st r10, [sp, #-52] // 4-byte Folded Spill -; CHECK-NEXT: lda.u8 r0, [p2, #0]; st p6, [sp, #-44]; mov m0, #46 // 4-byte Folded Spill -; CHECK-NEXT: mova m0, #-42; paddb [p2], m0; st p7, [sp, #-40] // 4-byte Folded Spill +; CHECK-NEXT: st p6, [sp, #-44] // 4-byte Folded Spill +; CHECK-NEXT: lda.u8 r0, [p2, #0]; st p7, [sp, #-40]; mov m0, #46 // 4-byte Folded Spill +; CHECK-NEXT: mova m0, #-42; paddb [p2], m0; st r10, [sp, #-52] // 4-byte Folded Spill ; CHECK-NEXT: lda.s16 r2, [p2], m0; st r8, [sp, #-60] // 4-byte Folded Spill ; CHECK-NEXT: st lr, [sp, #-64] // 4-byte Folded Spill ; CHECK-NEXT: st r9, [sp, #-56] // 4-byte Folded Spill diff --git a/llvm/test/TableGen/aie-variable-instr-itinerary.td b/llvm/test/TableGen/aie-variable-instr-itinerary.td index 7a1cf6f2478a..13ef0c8483c5 100644 --- a/llvm/test/TableGen/aie-variable-instr-itinerary.td +++ b/llvm/test/TableGen/aie-variable-instr-itinerary.td @@ -18,12 +18,60 @@ def II_MOV : InstrItinClass; def II_MOV_GPR : InstrItinClass; def II_MOV_PTR : InstrItinClass; def II_OTHER : InstrItinClass; +def II_ADD : InstrItinClass; +def II_ADD_GPR : InstrItinClass; +def II_ADD_PTR : InstrItinClass; -def AIEItineraries : ProcessorItineraries<[], [], [ +// STAGE tests: verify that the equivalence signature is based on stage field +// values (Cycles, Units, TimeInc, TM), not on stage record names. +// +// Two itineraries are equivalent iff every stage field matches. Note that +// TimeInc defines when the *next* stage begins, so the TimeInc of the last +// stage in a sequence is irrelevant. +// +// II_STAGE_A and II_STAGE_B have identical flat data, so they are equivalent +// even though each InstrStage instantiation creates a distinct anonymous record. +// +// The remaining pairs (CYC, FU, TINC, EMP) each test one kind of non-equiv- +// alence: different Cycles, different FuncUnits, different TimeInc on a +// non-last stage, and different empty-cycle counts respectively. +def FU_TEST : FuncUnit; +def FU_TEST2 : FuncUnit; + +def II_STAGE_A : InstrItinClass; +def II_STAGE_B : InstrItinClass; +def II_STAGE_CYC2 : InstrItinClass; +def II_STAGE_EMP1 : InstrItinClass; +def II_STAGE_EMP4 : InstrItinClass; +def II_STAGE_FU2 : InstrItinClass; +def II_STAGE_TINC0 : InstrItinClass; +def II_STAGE_TINC1 : InstrItinClass; + +def AIEItineraries : ProcessorItineraries<[FU_TEST, FU_TEST2], [], [ InstrItinData, InstrItinData, InstrItinData, - InstrItinData + InstrItinData, + InstrItinData, + InstrItinData, + InstrItinData, + // Equivalent pair: identical stage data (Cycles=1, FU_TEST, TimeInc=0). + InstrItinData], [1]>, + InstrItinData], [1]>, + // Non-equivalent: Cycles=2 vs Cycles=1. + InstrItinData], [1]>, + // Non-equivalent: FU_TEST2 vs FU_TEST. + InstrItinData], [1]>, + // Non-equivalent: TimeInc=0 vs TimeInc=1 on the *first* (non-last) stage. + // TimeInc defines when the next stage begins; it is meaningful only for + // stages that are followed by another stage. + InstrItinData, InstrStage<1, [FU_TEST], 1>], [1]>, + InstrItinData, InstrStage<1, [FU_TEST], 1>], [1]>, + // Non-equivalent: empty-cycle prefix with TimeInc=1 vs TimeInc=4. + // An empty-cycle stage (Cycles=0) uses TimeInc as a pure cycle-counter + // advance without consuming a functional unit. + InstrItinData, InstrStage<1, [FU_TEST], 1>], [1]>, + InstrItinData, InstrStage<1, [FU_TEST], 1>], [1]> ]>; def AIESchedModel : SchedMachineModel { @@ -75,99 +123,234 @@ let Itinerary = II_MOV, ItineraryRegPairs = [ItinRegClassPair; } +// ADD instructions: II_ADD_GPR and II_ADD_PTR have different operand cycles, +// so they are non-equivalent itineraries and will be distinguished in the output. +class ADDInstruction : TestInstruction { + int Inst = 2; + let OutOperandList = (outs ); + let InOperandList = (ins GPR:$src1, GPR:$src2); +} + +let Itinerary = II_ADD in + def ADD_instr : ADDInstruction<>; + +let Itinerary = II_ADD, ItineraryRegPairs = [ItinRegClassPair]>] in + def ADD_instr_GPR : ADDInstruction<>; + +let Itinerary = II_ADD, ItineraryRegPairs = [ItinRegClassPair]>] in + def ADD_instr_PTR : ADDInstruction<>; + +let Itinerary = II_ADD, ItineraryRegPairs = [ItinRegClassPair]>, ItinRegClassPair]>] in + def ADD_instr_GPR_PTR : ADDInstruction<>; + +let Itinerary = II_ADD, ItineraryRegPairs = [ItinRegClassPair]>] in { + def ADD_instr_GPR_1 : ADDInstruction<>; + def ADD_instr_GPR_2 : ADDInstruction<>; +} + +// STAGE instructions: one per equivalence/non-equivalence scenario. +class STAGEInstruction : TestInstruction { + int Inst = 3; + let OutOperandList = (outs); + let InOperandList = (ins GPR:$val); +} + +// II_STAGE_A (Cycles=1) vs II_STAGE_CYC2 (Cycles=2): non-equivalent. +let Itinerary = II_STAGE_A, ItineraryRegPairs = [ + ItinRegClassPair]>, + ItinRegClassPair]>] in + def STAGE_cyc_instr : STAGEInstruction<>; + +// II_STAGE_EMP1 (TimeInc=1 on empty) vs II_STAGE_EMP4 (TimeInc=4): non-equiv. +let Itinerary = II_STAGE_EMP1, ItineraryRegPairs = [ + ItinRegClassPair]>, + ItinRegClassPair]>] in + def STAGE_emp_instr : STAGEInstruction<>; + +// II_STAGE_A ≡ II_STAGE_B (same stage data): both appear under II_STAGE_A. +let Itinerary = II_STAGE_A, ItineraryRegPairs = [ + ItinRegClassPair]>, + ItinRegClassPair]>] in + def STAGE_equiv_instr : STAGEInstruction<>; + +// II_STAGE_A (FU_TEST) vs II_STAGE_FU2 (FU_TEST2): non-equivalent. +let Itinerary = II_STAGE_A, ItineraryRegPairs = [ + ItinRegClassPair]>, + ItinRegClassPair]>] in + def STAGE_fu_instr : STAGEInstruction<>; + +// II_STAGE_TINC0 (first stage TimeInc=0) vs II_STAGE_TINC1 (TimeInc=1): non-equiv. +let Itinerary = II_STAGE_TINC0, ItineraryRegPairs = [ + ItinRegClassPair]>, + ItinRegClassPair]>] in + def STAGE_tinc_instr : STAGEInstruction<>; + +// II_ADD_GPR and II_ADD_PTR have distinct operand cycles ([2,1] vs [2,2]), +// so they appear as separate entries in OperandRCRequirements (non-equivalence). +// II_MOV_GPR and II_MOV_PTR have the same operand cycles ([1,1]), so they are +// equivalent and both appear under the II_MOV_GPR label (equivalence). + // CHECK: static constexpr llvm::OperandRCRequirement const OperandRCRequirements[] = { -// CHECK-NEXT: // II_MOV_GPR 0 +// CHECK-NEXT: // II_ADD_GPR 0 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_ADD_GPR 1 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_ADD_GPR 2 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_ADD_GPR 3 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_ADD_PTR 4 +// CHECK-NEXT: {0, &TestAIE::PTRRegClass}, +// CHECK-NEXT: // II_ADD_PTR 5 +// CHECK-NEXT: {0, &TestAIE::PTRRegClass}, +// CHECK-NEXT: // II_MOV_GPR 6 // CHECK-NEXT: {0, &TestAIE::GPRRegClass}, // CHECK-NEXT: {1, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 2 +// CHECK-NEXT: // II_MOV_GPR 8 // CHECK-NEXT: {0, &TestAIE::GPRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 3 +// CHECK-NEXT: // II_MOV_GPR 9 // CHECK-NEXT: {0, &TestAIE::GPRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 4 +// CHECK-NEXT: // II_MOV_GPR 10 // CHECK-NEXT: {0, &TestAIE::GPRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 5 +// CHECK-NEXT: // II_MOV_GPR 11 // CHECK-NEXT: {1, &TestAIE::GPRRegClass}, -// CHECK-NEXT: // II_MOV_PTR 6 +// CHECK-NEXT: // II_MOV_GPR 12 // CHECK-NEXT: {1, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 7 +// CHECK-NEXT: // II_MOV_GPR 13 // CHECK-NEXT: {1, &TestAIE::GPRRegClass}, -// CHECK-NEXT: // II_MOV_PTR 8 +// CHECK-NEXT: // II_MOV_GPR 14 // CHECK-NEXT: {1, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 9 +// CHECK-NEXT: // II_MOV_GPR 15 // CHECK-NEXT: {1, &TestAIE::GPRRegClass}, -// CHECK-NEXT: // II_MOV_PTR 10 +// CHECK-NEXT: // II_MOV_GPR 16 // CHECK-NEXT: {1, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 11 +// CHECK-NEXT: // II_MOV_GPR 17 // CHECK-NEXT: {0, &TestAIE::GPRRegClass}, // CHECK-NEXT: {1, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_PTR 13 +// CHECK-NEXT: // II_MOV_GPR 19 // CHECK-NEXT: {1, &TestAIE::GPRRegClass}, // CHECK-NEXT: {0, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 15 +// CHECK-NEXT: // II_MOV_GPR 21 // CHECK-NEXT: {0, &TestAIE::GPRRegClass}, // CHECK-NEXT: {1, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_PTR 17 +// CHECK-NEXT: // II_MOV_GPR 23 // CHECK-NEXT: {1, &TestAIE::GPRRegClass}, // CHECK-NEXT: {0, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 19 +// CHECK-NEXT: // II_MOV_GPR 25 // CHECK-NEXT: {0, &TestAIE::GPRRegClass}, // CHECK-NEXT: {1, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_PTR 21 +// CHECK-NEXT: // II_MOV_GPR 27 // CHECK-NEXT: {1, &TestAIE::GPRRegClass}, // CHECK-NEXT: {0, &TestAIE::PTRRegClass}, -// CHECK-NEXT: // II_MOV_GPR 23 +// CHECK-NEXT: // II_MOV_GPR 29 // CHECK-NEXT: {0, &TestAIE::PTRRegClass}, // CHECK-NEXT: {1, &TestAIE::GPRRegClass}, -// CHECK-NEXT: // II_MOV_PTR 25 +// CHECK-NEXT: // II_MOV_PTR 31 +// CHECK-NEXT: {0, &TestAIE::PTRRegClass}, +// CHECK-NEXT: // II_STAGE_A 32 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_STAGE_CYC2 33 +// CHECK-NEXT: {0, &TestAIE::PTRRegClass}, +// CHECK-NEXT: // II_STAGE_EMP1 34 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_STAGE_EMP4 35 +// CHECK-NEXT: {0, &TestAIE::PTRRegClass}, +// CHECK-NEXT: // II_STAGE_A 36 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_STAGE_A 37 +// CHECK-NEXT: {0, &TestAIE::PTRRegClass}, +// CHECK-NEXT: // II_STAGE_A 38 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_STAGE_FU2 39 +// CHECK-NEXT: {0, &TestAIE::PTRRegClass}, +// CHECK-NEXT: // II_STAGE_TINC0 40 +// CHECK-NEXT: {0, &TestAIE::GPRRegClass}, +// CHECK-NEXT: // II_STAGE_TINC1 41 // CHECK-NEXT: {0, &TestAIE::PTRRegClass}, // CHECK-NEXT: }; // CHECK-EMPTY: // CHECK-NEXT: static constexpr SchedVariantInfo const SchedVariants[] = { -// CHECK-NEXT: // MOV_instr_GP 0 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[0], 2)}, -// CHECK-NEXT: // MOV_instr_GPR 1 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[2], 1)}, -// CHECK-NEXT: // MOV_instr_GPR_1 2 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[3], 1)}, -// CHECK-NEXT: // MOV_instr_GPR_2 3 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[4], 1)}, -// CHECK-NEXT: // MOV_instr_GPR_PTR 4 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[5], 1)}, -// CHECK-NEXT: {TestAIE::Sched::II_MOV_PTR, llvm::ArrayRef(&OperandRCRequirements[6], 1)}, -// CHECK-NEXT: // MOV_instr_GPR_PTR_1 6 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[7], 1)}, -// CHECK-NEXT: {TestAIE::Sched::II_MOV_PTR, llvm::ArrayRef(&OperandRCRequirements[8], 1)}, -// CHECK-NEXT: // MOV_instr_GPR_PTR_2 8 +// CHECK-NEXT: // ADD_instr_GPR 0 +// CHECK-NEXT: {TestAIE::Sched::II_ADD_GPR, llvm::ArrayRef(&OperandRCRequirements[0], 1)}, +// CHECK-NEXT: // ADD_instr_GPR_1 1 +// CHECK-NEXT: {TestAIE::Sched::II_ADD_GPR, llvm::ArrayRef(&OperandRCRequirements[1], 1)}, +// CHECK-NEXT: // ADD_instr_GPR_2 2 +// CHECK-NEXT: {TestAIE::Sched::II_ADD_GPR, llvm::ArrayRef(&OperandRCRequirements[2], 1)}, +// CHECK-NEXT: // ADD_instr_GPR_PTR 3 +// CHECK-NEXT: {TestAIE::Sched::II_ADD_GPR, llvm::ArrayRef(&OperandRCRequirements[3], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_ADD_PTR, llvm::ArrayRef(&OperandRCRequirements[4], 1)}, +// CHECK-NEXT: // ADD_instr_PTR 5 +// CHECK-NEXT: {TestAIE::Sched::II_ADD_PTR, llvm::ArrayRef(&OperandRCRequirements[5], 1)}, +// CHECK-NEXT: // MOV_instr_GP 6 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[6], 2)}, +// CHECK-NEXT: // MOV_instr_GPR 7 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[8], 1)}, +// CHECK-NEXT: // MOV_instr_GPR_1 8 // CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[9], 1)}, -// CHECK-NEXT: {TestAIE::Sched::II_MOV_PTR, llvm::ArrayRef(&OperandRCRequirements[10], 1)}, -// CHECK-NEXT: // MOV_instr_GP_PG 10 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[11], 2)}, -// CHECK-NEXT: {TestAIE::Sched::II_MOV_PTR, llvm::ArrayRef(&OperandRCRequirements[13], 2)}, -// CHECK-NEXT: // MOV_instr_GP_PG_1 12 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[15], 2)}, -// CHECK-NEXT: {TestAIE::Sched::II_MOV_PTR, llvm::ArrayRef(&OperandRCRequirements[17], 2)}, -// CHECK-NEXT: // MOV_instr_GP_PG_2 14 +// CHECK-NEXT: // MOV_instr_GPR_2 9 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[10], 1)}, +// CHECK-NEXT: // MOV_instr_GPR_PTR 10 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[11], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[12], 1)}, +// CHECK-NEXT: // MOV_instr_GPR_PTR_1 12 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[13], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[14], 1)}, +// CHECK-NEXT: // MOV_instr_GPR_PTR_2 14 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[15], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[16], 1)}, +// CHECK-NEXT: // MOV_instr_GP_PG 16 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[17], 2)}, // CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[19], 2)}, -// CHECK-NEXT: {TestAIE::Sched::II_MOV_PTR, llvm::ArrayRef(&OperandRCRequirements[21], 2)}, -// CHECK-NEXT: // MOV_instr_PG 16 +// CHECK-NEXT: // MOV_instr_GP_PG_1 18 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[21], 2)}, // CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[23], 2)}, -// CHECK-NEXT: // MOV_instr_PTR 17 -// CHECK-NEXT: {TestAIE::Sched::II_MOV_PTR, llvm::ArrayRef(&OperandRCRequirements[25], 1)}, +// CHECK-NEXT: // MOV_instr_GP_PG_2 20 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[25], 2)}, +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[27], 2)}, +// CHECK-NEXT: // MOV_instr_PG 22 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_GPR, llvm::ArrayRef(&OperandRCRequirements[29], 2)}, +// CHECK-NEXT: // MOV_instr_PTR 23 +// CHECK-NEXT: {TestAIE::Sched::II_MOV_PTR, llvm::ArrayRef(&OperandRCRequirements[31], 1)}, +// CHECK-NEXT: // STAGE_cyc_instr 24 +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_A, llvm::ArrayRef(&OperandRCRequirements[32], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_CYC2, llvm::ArrayRef(&OperandRCRequirements[33], 1)}, +// CHECK-NEXT: // STAGE_emp_instr 26 +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_EMP1, llvm::ArrayRef(&OperandRCRequirements[34], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_EMP4, llvm::ArrayRef(&OperandRCRequirements[35], 1)}, +// CHECK-NEXT: // STAGE_equiv_instr 28 +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_A, llvm::ArrayRef(&OperandRCRequirements[36], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_A, llvm::ArrayRef(&OperandRCRequirements[37], 1)}, +// CHECK-NEXT: // STAGE_fu_instr 30 +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_A, llvm::ArrayRef(&OperandRCRequirements[38], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_FU2, llvm::ArrayRef(&OperandRCRequirements[39], 1)}, +// CHECK-NEXT: // STAGE_tinc_instr 32 +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_TINC0, llvm::ArrayRef(&OperandRCRequirements[40], 1)}, +// CHECK-NEXT: {TestAIE::Sched::II_STAGE_TINC1, llvm::ArrayRef(&OperandRCRequirements[41], 1)}, // CHECK-NEXT: }; // CHECK-EMPTY: // CHECK-NEXT: static constexpr InstrVariantInfo const InstrVariantInfos[] = { -// CHECK-NEXT: {TestAIE::MOV_instr_GP, llvm::ArrayRef(&SchedVariants[0], 1)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GPR, llvm::ArrayRef(&SchedVariants[1], 1)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GPR_1, llvm::ArrayRef(&SchedVariants[2], 1)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GPR_2, llvm::ArrayRef(&SchedVariants[3], 1)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GPR_PTR, llvm::ArrayRef(&SchedVariants[4], 2)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GPR_PTR_1, llvm::ArrayRef(&SchedVariants[6], 2)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GPR_PTR_2, llvm::ArrayRef(&SchedVariants[8], 2)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GP_PG, llvm::ArrayRef(&SchedVariants[10], 2)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GP_PG_1, llvm::ArrayRef(&SchedVariants[12], 2)}, -// CHECK-NEXT: {TestAIE::MOV_instr_GP_PG_2, llvm::ArrayRef(&SchedVariants[14], 2)}, -// CHECK-NEXT: {TestAIE::MOV_instr_PG, llvm::ArrayRef(&SchedVariants[16], 1)}, -// CHECK-NEXT: {TestAIE::MOV_instr_PTR, llvm::ArrayRef(&SchedVariants[17], 1)}, +// CHECK-NEXT: {TestAIE::ADD_instr_GPR, llvm::ArrayRef(&SchedVariants[0], 1)}, +// CHECK-NEXT: {TestAIE::ADD_instr_GPR_1, llvm::ArrayRef(&SchedVariants[1], 1)}, +// CHECK-NEXT: {TestAIE::ADD_instr_GPR_2, llvm::ArrayRef(&SchedVariants[2], 1)}, +// CHECK-NEXT: {TestAIE::ADD_instr_GPR_PTR, llvm::ArrayRef(&SchedVariants[3], 2)}, +// CHECK-NEXT: {TestAIE::ADD_instr_PTR, llvm::ArrayRef(&SchedVariants[5], 1)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GP, llvm::ArrayRef(&SchedVariants[6], 1)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GPR, llvm::ArrayRef(&SchedVariants[7], 1)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GPR_1, llvm::ArrayRef(&SchedVariants[8], 1)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GPR_2, llvm::ArrayRef(&SchedVariants[9], 1)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GPR_PTR, llvm::ArrayRef(&SchedVariants[10], 2)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GPR_PTR_1, llvm::ArrayRef(&SchedVariants[12], 2)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GPR_PTR_2, llvm::ArrayRef(&SchedVariants[14], 2)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GP_PG, llvm::ArrayRef(&SchedVariants[16], 2)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GP_PG_1, llvm::ArrayRef(&SchedVariants[18], 2)}, +// CHECK-NEXT: {TestAIE::MOV_instr_GP_PG_2, llvm::ArrayRef(&SchedVariants[20], 2)}, +// CHECK-NEXT: {TestAIE::MOV_instr_PG, llvm::ArrayRef(&SchedVariants[22], 1)}, +// CHECK-NEXT: {TestAIE::MOV_instr_PTR, llvm::ArrayRef(&SchedVariants[23], 1)}, +// CHECK-NEXT: {TestAIE::STAGE_cyc_instr, llvm::ArrayRef(&SchedVariants[24], 2)}, +// CHECK-NEXT: {TestAIE::STAGE_emp_instr, llvm::ArrayRef(&SchedVariants[26], 2)}, +// CHECK-NEXT: {TestAIE::STAGE_equiv_instr, llvm::ArrayRef(&SchedVariants[28], 2)}, +// CHECK-NEXT: {TestAIE::STAGE_fu_instr, llvm::ArrayRef(&SchedVariants[30], 2)}, +// CHECK-NEXT: {TestAIE::STAGE_tinc_instr, llvm::ArrayRef(&SchedVariants[32], 2)}, // CHECK-NEXT: }; // CHECK-EMPTY: // CHECK-NEXT: // Interface object providing access to the variant tables. diff --git a/llvm/utils/TableGen/AIEItineraryEquivalence.h b/llvm/utils/TableGen/AIEItineraryEquivalence.h new file mode 100644 index 000000000000..f9ec740970bc --- /dev/null +++ b/llvm/utils/TableGen/AIEItineraryEquivalence.h @@ -0,0 +1,254 @@ +//===- AIEItineraryEquivalence.h - Itinerary Equivalence Detection -*- C++ -*-// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates +// +//===----------------------------------------------------------------------===// +// +// This file provides utilities for detecting equivalent itineraries. +// Two itineraries are equivalent if they have identical scheduling behavior, +// i.e., the same Stages and OperandCycles. +// +// This is useful for: +// - Consolidating schedule class IDs in VarItinerary tables +// - Reducing table data by storing only one copy per equivalence class +// - Optimizing lookup time +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_AIEITINERARYEQUIVALENCE_H +#define LLVM_UTILS_TABLEGEN_AIEITINERARYEQUIVALENCE_H + +#include "Common/CodeGenSchedule.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" +#include "llvm/TableGen/Record.h" +#include +#include +#include + +#define DEBUG_TYPE "aie-itinerary-equivalence" + +namespace llvm { + +/// Captures the scheduling-relevant fields of a single pipeline stage. +/// +/// The stage record's name is a debug label and is excluded from comparison. +/// What matters is the flat data: how many cycles the stage occupies, which +/// functional units it requires, how many cycles elapse before the next stage +/// begins (TimeInc), and whether it carries a special timing model marker such +/// as Reserved. +struct StageCycleInfo { + /// Length of the stage in machine cycles. + int Cycles = 0; + + /// Functional unit names the stage can use. + std::vector UnitNames; + + /// Cycles until the start of the next stage (cycle delta between consecutive + /// stages). + int TimeInc = 0; + + /// Timing model record name (e.g., "NoItinerary", "Reserved"). + StringRef TimingModelName; + + bool operator==(const StageCycleInfo &Other) const { + return Cycles == Other.Cycles && TimeInc == Other.TimeInc && + UnitNames == Other.UnitNames && + TimingModelName == Other.TimingModelName; + } + + bool operator<(const StageCycleInfo &Other) const { + if (Cycles != Other.Cycles) + return Cycles < Other.Cycles; + if (TimeInc != Other.TimeInc) + return TimeInc < Other.TimeInc; + if (UnitNames != Other.UnitNames) + return UnitNames < Other.UnitNames; + return TimingModelName < Other.TimingModelName; + } +}; + +/// Represents the scheduling signature of an itinerary - used to detect +/// equivalent itineraries that have identical scheduling behavior. +/// +/// Two itineraries with the same signature have identical scheduling +/// characteristics and can be treated as equivalent for scheduling purposes. +struct ItinerarySignature { + /// Per-stage scheduling data. Each entry captures the flat field values of + /// one stage (Cycles, functional units, TimeInc, timing model), which fully + /// determine its scheduling behavior. Stage record names are debug labels + /// and are not included. + std::vector Stages; + + /// Operand cycle latencies. + std::vector OperandCycles; + + /// Bypass identifiers - using bypass record names for comparison. + std::vector BypassNames; + + bool operator<(const ItinerarySignature &Other) const { + if (Stages != Other.Stages) + return Stages < Other.Stages; + if (OperandCycles != Other.OperandCycles) + return OperandCycles < Other.OperandCycles; + return BypassNames < Other.BypassNames; + } +}; + +/// Computes the scheduling signature for an itinerary record. +/// +/// \param ItinData The InstrItinData record to compute the signature for. +/// Must not be null. +/// \returns The scheduling signature for the record. +inline ItinerarySignature computeItinerarySignature(const Record *ItinData) { + assert(ItinData && "ItinData must not be null"); + + ItinerarySignature Sig; + + // Extract per-stage scheduling data. For each stage we read the actual + // field values (Cycles, Units, TimeInc, TM) that determine scheduling + // behavior. The stage record's name is a debug label and is intentionally + // excluded from the signature. + if (ItinData->getValue("Stages")) { + for (const Record *Stage : ItinData->getValueAsListOfDefs("Stages")) { + StageCycleInfo Info; + Info.Cycles = static_cast(Stage->getValueAsInt("Cycles")); + Info.TimeInc = static_cast(Stage->getValueAsInt("TimeInc")); + if (Stage->getValue("Units")) { + for (const Record *Unit : Stage->getValueAsListOfDefs("Units")) + Info.UnitNames.push_back(Unit->getName()); + } + if (Stage->getValue("TM")) { + const Record *TM = Stage->getValueAsDef("TM"); + if (TM) + Info.TimingModelName = TM->getName(); + } + Sig.Stages.push_back(std::move(Info)); + } + } + + // Extract OperandCycles. + if (ItinData->getValue("OperandCycles")) { + Sig.OperandCycles = ItinData->getValueAsListOfInts("OperandCycles"); + } + + // Extract Bypass information. + if (ItinData->getValue("Bypasses")) { + for (const Record *Bypass : ItinData->getValueAsListOfDefs("Bypasses")) { + Sig.BypassNames.push_back(Bypass->getName()); + } + } + + return Sig; +} + +/// Manages equivalence relationships between itineraries. +/// +/// This class builds and maintains a mapping from itinerary names to their +/// canonical representative. Itineraries with identical signatures map to +/// the same representative, which is the first itinerary encountered with +/// that signature. +class ItineraryEquivalenceMap { + /// Maps itinerary name to its representative (canonical) itinerary name. + std::map EquivalenceMap; + + /// Maps itinerary name to its signature. + std::map ItineraryToSignature; + + /// Maps signature to the representative itinerary name. + std::map SignatureToRepresentative; + + /// Statistics for debugging. + unsigned NumEquivalent = 0; + unsigned NumTotal = 0; + +public: + /// Build the equivalence map from scheduling models. + /// + /// \param SchedModels The scheduling models containing schedule classes. + /// \param ItinModel The processor model with itinerary definitions. + void build(const CodeGenSchedModels &SchedModels, + const CodeGenProcModel &ItinModel) { + // Process all schedule classes to build the equivalence map. + for (const CodeGenSchedClass &SchedClass : SchedModels.explicit_classes()) { + // Some schedule classes have no itinerary entry; skip them. + if (SchedClass.Index >= ItinModel.ItinDefList.size()) + continue; + const Record *ItinData = ItinModel.ItinDefList[SchedClass.Index]; + if (!ItinData) + continue; + addItinerary(SchedClass.Name, computeItinerarySignature(ItinData)); + } + + LLVM_DEBUG(dbgs() << "ItineraryEquivalenceMap: Found " << NumEquivalent + << " equivalent itineraries out of " << NumTotal + << " total\n"); + } + + /// Add an itinerary to the equivalence map. + /// + /// \param ItinName The name of the itinerary. + /// \param Sig The scheduling signature of the itinerary. + void addItinerary(StringRef ItinName, const ItinerarySignature &Sig) { + ++NumTotal; + + // Store the signature for this itinerary. + ItineraryToSignature[ItinName] = Sig; + + auto It = SignatureToRepresentative.find(Sig); + if (It == SignatureToRepresentative.end()) { + // This is the first itinerary with this signature - it becomes the + // representative. + SignatureToRepresentative[Sig] = ItinName; + EquivalenceMap[ItinName] = ItinName; + } else { + // Map this itinerary to the existing representative. + EquivalenceMap[ItinName] = It->second; + ++NumEquivalent; + LLVM_DEBUG(dbgs() << "Itinerary " << ItinName << " is equivalent to " + << It->second << "\n"); + } + } + + /// Find the representative itinerary name for a given itinerary. + /// + /// \param ItinName The name of the itinerary to look up. + /// \returns The representative itinerary name, or ItinName if not found. + StringRef getRepresentative(StringRef ItinName) const { + auto It = EquivalenceMap.find(ItinName); + if (It != EquivalenceMap.end()) + return It->second; + // If not found in the map, return the original name. + return ItinName; + } + + /// Check if an itinerary is the representative of its equivalence class. + bool isRepresentative(StringRef ItinName) const { + auto It = EquivalenceMap.find(ItinName); + if (It != EquivalenceMap.end()) + return It->first == It->second; + return true; + } + + /// Get the number of equivalent (non-representative) itineraries found. + unsigned getNumEquivalent() const { return NumEquivalent; } + + /// Get the total number of itineraries processed. + unsigned getNumTotal() const { return NumTotal; } + + /// Get the number of unique equivalence classes (representatives). + unsigned getNumEquivalenceClasses() const { + return SignatureToRepresentative.size(); + } +}; + +} // namespace llvm + +#undef DEBUG_TYPE + +#endif // LLVM_UTILS_TABLEGEN_AIEITINERARYEQUIVALENCE_H diff --git a/llvm/utils/TableGen/AIEVariableInstrItineraryEmitter.cpp b/llvm/utils/TableGen/AIEVariableInstrItineraryEmitter.cpp index edbf305a247f..a61ae15f4677 100644 --- a/llvm/utils/TableGen/AIEVariableInstrItineraryEmitter.cpp +++ b/llvm/utils/TableGen/AIEVariableInstrItineraryEmitter.cpp @@ -17,7 +17,11 @@ // //===----------------------------------------------------------------------===// +#include "AIEItineraryEquivalence.h" +#include "Common/CodeGenDAGPatterns.h" #include "Common/CodeGenInstruction.h" +#include "Common/CodeGenRegisters.h" +#include "Common/CodeGenSchedule.h" #include "Common/CodeGenTarget.h" #include "ConstTable.h" #include "llvm/Support/Debug.h" @@ -25,6 +29,7 @@ #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TGTimer.h" #include "llvm/TableGen/TableGenBackend.h" +#include using namespace llvm; @@ -38,6 +43,9 @@ struct SchedVariantData { StringRef ItineraryName; // List of (OpIdx, RegClassName) pairs for this variant. std::vector> OperandRCs; + // Original index in the instruction's variant list, used to preserve + // ordering after consolidation. + unsigned OrigIdx = 0; }; // Structure to hold all variants for a single instruction. @@ -60,17 +68,351 @@ class AIEVariableInstrItineraryEmitter { void emitTables(raw_ostream &OS); void emitInterface(raw_ostream &OS); + // Consolidate variants by finding register class unions for variants with + // equivalent itineraries. + void consolidateVariants(); + + // Check if two register classes have compatible attributes (SpillSize, + // Allocatable, CopyCost, etc.) for consolidation. + static bool haveCompatibleAttributes(const CodeGenRegisterClass &A, + const CodeGenRegisterClass &B); + + // Find a register class whose members are exactly the union of the input + // classes and has compatible attributes. Returns nullptr if none exists. + const CodeGenRegisterClass * + findUnionClass(ArrayRef RCs); + + // Try to merge a group of Multiplicity variants in WorkSet at a given + // operand position. Tries all candidate subsets to find one with a valid + // union class. On success, mutates WorkSet and returns true. + bool tryMergeAtPosition(std::vector &WorkSet, + size_t BaseIdx, int DiffPos, + ArrayRef CandidateIndices, + unsigned Multiplicity, StringRef RepItin); + const RecordKeeper &Records; CodeGenTarget Target; + CodeGenDAGPatterns CDP; + const CodeGenSchedModels &SchedModels; + CodeGenRegBank &RegBank; + const CodeGenProcModel *ItinModel = nullptr; std::string CurrentNamespace; + + // Manages itinerary equivalence relationships for schedule class + // consolidation. + ItineraryEquivalenceMap ItinEquivMap; }; } // namespace AIEVariableInstrItineraryEmitter::AIEVariableInstrItineraryEmitter( const RecordKeeper &R) - : Records(R), Target(R) { + : Records(R), Target(R), CDP(R), + SchedModels(CDP.getTargetInfo().getSchedModels()), + RegBank(Target.getRegBank()) { CurrentNamespace = Target.getName().str(); + + // Find the itinerary model and build equivalence map. + if (SchedModels.hasItineraries()) { + for (const CodeGenProcModel &ProcModel : SchedModels.procModels()) { + if (ProcModel.hasItineraries()) { + assert(ItinModel == nullptr && "Multiple Itin models"); + ItinModel = &ProcModel; + } + } + + if (ItinModel) + ItinEquivMap.build(SchedModels, *ItinModel); + } +} + +bool AIEVariableInstrItineraryEmitter::haveCompatibleAttributes( + const CodeGenRegisterClass &A, const CodeGenRegisterClass &B) { + // Check that all relevant attributes match. + // RSI contains SpillSize and SpillAlignment per HwMode. + if (A.RSI != B.RSI) + return false; + + if (A.CopyCost != B.CopyCost) + return false; + + if (A.Allocatable != B.Allocatable) + return false; + + if (A.AllocationPriority != B.AllocationPriority) + return false; + + if (A.TSFlags != B.TSFlags) + return false; + + return true; +} + +const CodeGenRegisterClass *AIEVariableInstrItineraryEmitter::findUnionClass( + ArrayRef RCs) { + if (RCs.empty()) + return nullptr; + + if (RCs.size() == 1) + return RCs[0]; + + // First check that all input classes have compatible attributes. + for (size_t I = 1; I < RCs.size(); ++I) { + if (!haveCompatibleAttributes(*RCs[0], *RCs[I])) { + LLVM_DEBUG(dbgs() << " Cannot consolidate: " << RCs[0]->getName() + << " and " << RCs[I]->getName() + << " have incompatible attributes\n"); + return nullptr; + } + } + + // Compute the union of all registers. + std::set UnionRegs; + for (const CodeGenRegisterClass *RC : RCs) { + for (const CodeGenRegister *Reg : RC->getMembers()) + UnionRegs.insert(Reg); + } + + // Search for a register class that: + // 1. Has exactly the union of registers. + // 2. Has compatible attributes with all input classes. + for (const CodeGenRegisterClass &Candidate : RegBank.getRegClasses()) { + const auto &CandidateMembers = Candidate.getMembers(); + if (CandidateMembers.size() != UnionRegs.size()) + continue; + + const bool AllMembersMatch = + llvm::all_of(CandidateMembers, [&](const CodeGenRegister *Reg) { + return UnionRegs.find(Reg) != UnionRegs.end(); + }); + + if (!AllMembersMatch) + continue; + + if (!haveCompatibleAttributes(Candidate, *RCs[0])) + continue; + + LLVM_DEBUG(dbgs() << " Found union class: " << Candidate.getName() + << " for {"; + for (const auto *RC : RCs) dbgs() << RC->getName() << " "; + dbgs() << "}\n"); + + return &Candidate; + } + + return nullptr; +} + +bool AIEVariableInstrItineraryEmitter::tryMergeAtPosition( + std::vector &WorkSet, size_t BaseIdx, int DiffPos, + ArrayRef CandidateIndices, unsigned Multiplicity, + StringRef RepItin) { + const SchedVariantData &Base = WorkSet[BaseIdx]; + const size_t NumNeeded = Multiplicity - 1; + + // Try consecutive windows of NumNeeded candidates from the candidate list. + for (size_t Start = 0; Start + NumNeeded <= CandidateIndices.size(); + ++Start) { + std::vector GroupIndices = {BaseIdx}; + for (size_t I = Start; I < Start + NumNeeded; ++I) + GroupIndices.push_back(CandidateIndices[I]); + + // Collect the register classes at the differing position. + std::vector RCs; + bool AllValid = true; + for (const size_t GI : GroupIndices) { + const StringRef RCName = WorkSet[GI].OperandRCs[DiffPos].second; + const Record *RCDef = Records.getDef(RCName); + if (!RCDef) { + AllValid = false; + break; + } + CodeGenRegisterClass *RC = RegBank.getRegClass(RCDef); + if (!RC) { + AllValid = false; + break; + } + RCs.push_back(RC); + } + + if (!AllValid) + continue; + + const CodeGenRegisterClass *UnionRC = findUnionClass(RCs); + if (!UnionRC) + continue; + + // Build the merged variant, preserving the original index of the base. + SchedVariantData Merged; + Merged.ItineraryName = RepItin; + Merged.OrigIdx = Base.OrigIdx; + for (size_t K = 0; K < Base.OperandRCs.size(); ++K) { + if (static_cast(K) == DiffPos) { + Merged.OperandRCs.emplace_back(Base.OperandRCs[K].first, + UnionRC->getName()); + } else { + Merged.OperandRCs.push_back(Base.OperandRCs[K]); + } + } + + // Replace the base variant with the merged one. + WorkSet[GroupIndices[0]] = std::move(Merged); + + // Remove the other variants in reverse order to preserve indices. + for (size_t RI = GroupIndices.size() - 1; RI >= 1; --RI) + WorkSet.erase(WorkSet.begin() + GroupIndices[RI]); + + return true; + } + + return false; +} + +void AIEVariableInstrItineraryEmitter::consolidateVariants() { + unsigned TotalVariantsBefore = 0; + unsigned TotalVariantsAfter = 0; + + LLVM_DEBUG(dbgs() << "ItineraryEquivalenceMap stats: " + << ItinEquivMap.getNumEquivalent() << " equivalent out of " + << ItinEquivMap.getNumTotal() << " total, " + << ItinEquivMap.getNumEquivalenceClasses() + << " equivalence classes\n"); + + for (InstrVariantData &InstrData : InstrVariants) { + TotalVariantsBefore += InstrData.Variants.size(); + + // Group variants by equivalence class. Use the global representative + // as the grouping key to ensure only genuinely equivalent itineraries + // are grouped together. + std::map> RepToVariantIndices; + for (size_t I = 0; I < InstrData.Variants.size(); ++I) { + const StringRef OrigItin = InstrData.Variants[I].ItineraryName; + const StringRef Rep = ItinEquivMap.getRepresentative(OrigItin); + RepToVariantIndices[Rep].push_back(I); + } + + std::vector ConsolidatedVariants; + + for (auto &[Rep, Indices] : RepToVariantIndices) { + // Use the first variant's itinerary as the instruction-local + // representative. + const StringRef LocalRepItin = + InstrData.Variants[Indices[0]].ItineraryName; + if (Indices.size() == 1) { + ConsolidatedVariants.push_back( + std::move(InstrData.Variants[Indices[0]])); + continue; + } + + // Check if all variants have the same operand indices. + std::set ExpectedOpIndices; + for (const auto &[OpIdx, RCName] : + InstrData.Variants[Indices[0]].OperandRCs) + ExpectedOpIndices.insert(OpIdx); + + auto GetOpIndices = [&](size_t Idx) { + std::set OpIndices; + for (const auto &[OpIdx, RCName] : InstrData.Variants[Idx].OperandRCs) + OpIndices.insert(OpIdx); + return OpIndices; + }; + + const bool AllSameOperands = + llvm::all_of(ArrayRef(Indices).drop_front(), [&](size_t Idx) { + return GetOpIndices(Idx) == ExpectedOpIndices; + }); + + if (!AllSameOperands) { + for (size_t Idx : Indices) + ConsolidatedVariants.push_back(std::move(InstrData.Variants[Idx])); + continue; + } + + // Build a working set of variants to consolidate. + std::vector WorkSet; + for (size_t Idx : Indices) + WorkSet.push_back(InstrData.Variants[Idx]); + + // Try N-tuple consolidation for N = 2, then N = 4. + for (unsigned Multiplicity : {2u, 4u}) { + bool Changed = true; + while (Changed) { + Changed = false; + if (WorkSet.size() < Multiplicity) + break; + + for (size_t I = 0; I < WorkSet.size() && !Changed; ++I) { + const SchedVariantData &Vi = WorkSet[I]; + + // Find variants that differ in exactly one operand position. + std::map> DiffPosToIndices; + for (size_t J = I + 1; J < WorkSet.size(); ++J) { + const SchedVariantData &Vj = WorkSet[J]; + if (Vi.OperandRCs.size() != Vj.OperandRCs.size()) + continue; + + int DifferingOpIdx = -1; + bool Compatible = true; + for (size_t K = 0; K < Vi.OperandRCs.size(); ++K) { + if (Vi.OperandRCs[K].first != Vj.OperandRCs[K].first) { + Compatible = false; + break; + } + if (Vi.OperandRCs[K].second != Vj.OperandRCs[K].second) { + if (DifferingOpIdx >= 0) { + Compatible = false; + break; + } + DifferingOpIdx = static_cast(K); + } + } + + if (Compatible && DifferingOpIdx >= 0) + DiffPosToIndices[DifferingOpIdx].push_back(J); + } + + for (auto &[DiffPos, Candidates] : DiffPosToIndices) { + if (Candidates.size() + 1 < Multiplicity) + continue; + if (!tryMergeAtPosition(WorkSet, I, DiffPos, Candidates, + Multiplicity, LocalRepItin)) + continue; + Changed = true; + break; + } + } + } + } + + LLVM_DEBUG(if (WorkSet.size() < Indices.size()) { + dbgs() << "Consolidated " << Indices.size() << " -> " << WorkSet.size() + << " variants for " << InstrData.InstrName + << " (rep: " << LocalRepItin << ")\n"; + }); + + // All variants in this equivalence group use the local representative. + for (auto &V : WorkSet) { + V.ItineraryName = LocalRepItin; + ConsolidatedVariants.push_back(std::move(V)); + } + } + + // Sort by original index to preserve the input ordering. Merged variants + // inherit the index of their first constituent, so they appear at the + // position of the earliest original variant. + std::sort(ConsolidatedVariants.begin(), ConsolidatedVariants.end(), + [](const SchedVariantData &A, const SchedVariantData &B) { + return A.OrigIdx < B.OrigIdx; + }); + + InstrData.Variants = std::move(ConsolidatedVariants); + TotalVariantsAfter += InstrData.Variants.size(); + } + + LLVM_DEBUG(dbgs() << "Variant consolidation: " << TotalVariantsBefore + << " -> " << TotalVariantsAfter << " (" + << (TotalVariantsBefore - TotalVariantsAfter) + << " removed)\n"); } void AIEVariableInstrItineraryEmitter::emitTables(raw_ostream &OS) { @@ -122,6 +464,7 @@ void AIEVariableInstrItineraryEmitter::emitInterface(raw_ostream &OS) { << "VarItinInterfaceImpl = {\n"; OS << " InstrVariantInfos\n"; OS << "};\n\n"; + OS << "VarItinInterface " << CurrentNamespace << "InstrInfo::getVarItinInterface() const {\n"; OS << " return " << CurrentNamespace << "VarItinInterfaceImpl;\n"; @@ -175,9 +518,17 @@ void AIEVariableInstrItineraryEmitter::run(raw_ostream &OS) { InstrData.Variants.push_back(std::move(Variant)); } + // Assign original indices to preserve ordering after consolidation. + for (unsigned I = 0; I < InstrData.Variants.size(); ++I) + InstrData.Variants[I].OrigIdx = I; + InstrVariants.push_back(std::move(InstrData)); } + // Consolidate variants by finding register class unions for variants with + // equivalent itineraries and compatible register class attributes. + consolidateVariants(); + // Sort InstrVariants by opcode for binary search. std::sort(InstrVariants.begin(), InstrVariants.end(), [&](const InstrVariantData &A, const InstrVariantData &B) {