llvm · wdx727 · May 21, 2025 · May 21, 2025 · May 22, 2025 · May 22, 2025
diff --git a/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h b/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h
@@ -0,0 +1,108 @@
+#ifndef LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
+#define LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+/// An object wrapping several components of a basic block hash. The combined
+/// (blended) hash is represented and stored as one uint64_t, while individual
+/// components are of smaller size (e.g., uint16_t or uint8_t).
+struct BlendedBlockHash {
+private:
+  static uint64_t combineHashes(uint16_t Hash1, uint16_t Hash2, uint16_t Hash3,
+                                uint16_t Hash4) {
+    uint64_t Hash = 0;
+
+    Hash |= uint64_t(Hash4);
+    Hash <<= 16;
+
+    Hash |= uint64_t(Hash3);
+    Hash <<= 16;
+
+    Hash |= uint64_t(Hash2);
+    Hash <<= 16;
+
+    Hash |= uint64_t(Hash1);
+
+    return Hash;
+  }
+
+  static void parseHashes(uint64_t Hash, uint16_t &Hash1, uint16_t &Hash2,
+                          uint16_t &Hash3, uint16_t &Hash4) {
+    Hash1 = Hash & 0xffff;
+    Hash >>= 16;
+
+    Hash2 = Hash & 0xffff;
+    Hash >>= 16;
+
+    Hash3 = Hash & 0xffff;
+    Hash >>= 16;
+
+    Hash4 = Hash & 0xffff;
+    Hash >>= 16;
+  }
+
+public:
+  explicit BlendedBlockHash() {}
+
+  explicit BlendedBlockHash(uint64_t CombinedHash) {
+    parseHashes(CombinedHash, Offset, OpcodeHash, InstrHash, NeighborHash);
+  }
+
+  /// Combine the blended hash into uint64_t.
+  uint64_t combine() const {
+    return combineHashes(Offset, OpcodeHash, InstrHash, NeighborHash);
+  }
+
+  /// Compute a distance between two given blended hashes. The smaller the
+  /// distance, the more similar two blocks are. For identical basic blocks,
+  /// the distance is zero.
+  uint64_t distance(const BlendedBlockHash &BBH) const {
+    assert(OpcodeHash == BBH.OpcodeHash &&
+           "incorrect blended hash distance computation");
+    uint64_t Dist = 0;
+    // Account for NeighborHash
+    Dist += NeighborHash == BBH.NeighborHash ? 0 : 1;
+    Dist <<= 16;
+    // Account for InstrHash
+    Dist += InstrHash == BBH.InstrHash ? 0 : 1;
+    Dist <<= 16;
+    // Account for Offset
+    Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
+    return Dist;
+  }
+
+  /// The offset of the basic block from the function start.
+  uint16_t Offset{0};
+  /// (Loose) Hash of the basic block instructions, excluding operands.
+  uint16_t OpcodeHash{0};
+  /// (Strong) Hash of the basic block instructions, including opcodes and
+  /// operands.
+  uint16_t InstrHash{0};
+  /// Hash of the (loose) basic block together with (loose) hashes of its
+  /// successors and predecessors.
+  uint16_t NeighborHash{0};
+};
+
+class MachineBlockHashInfo : public MachineFunctionPass {
+  DenseMap<unsigned, uint64_t> MBBHashInfo;
+
+public:
+  static char ID;
+  MachineBlockHashInfo();
+
+  StringRef getPassName() const override {
+    return "Basic Block Hash Compute";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+
+  uint64_t getMBBHash(const MachineBasicBlock &MBB);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
@@ -187,6 +187,7 @@ void initializeMIRCanonicalizerPass(PassRegistry &);
 void initializeMIRNamerPass(PassRegistry &);
 void initializeMIRPrintingPassPass(PassRegistry &);
 void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
+void initializeMachineBlockHashInfoPass(PassRegistry&);
 void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
 void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &);
 void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);

diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
@@ -914,9 +914,10 @@ struct BBAddrMap {
     uint32_t Size = 0;   // Size of the basic block.
     Metadata MD = {false, false, false, false,
                    false}; // Metdata for this basic block.
+    uint64_t Hash = 0;     // Hash for this basic block.
 
-    BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD)
-        : ID(ID), Offset(Offset), Size(Size), MD(MD){};
+    BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD, uint64_t Hash)
+        : ID(ID), Offset(Offset), Size(Size), MD(MD), Hash(Hash){};
 
     bool operator==(const BBEntry &Other) const {
       return ID == Other.ID && Offset == Other.Offset && Size == Other.Size &&

diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -162,6 +162,7 @@ struct BBAddrMapEntry {
     llvm::yaml::Hex64 AddressOffset;
     llvm::yaml::Hex64 Size;
     llvm::yaml::Hex64 Metadata;
+    llvm::yaml::Hex64 Hash;
   };
   uint8_t Version;
   llvm::yaml::Hex8 Feature;

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -40,6 +40,7 @@
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockHashInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -451,6 +452,7 @@ const MCSection *AsmPrinter::getCurrentSection() const {
 void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<MachineBlockHashInfo>();
   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
   AU.addRequired<GCModuleInfo>();
   AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
@@ -1479,6 +1481,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
       PrevMBBEndSymbol = MBBSymbol;
     }
 
+    auto MBHI = &getAnalysis<MachineBlockHashInfo>();
+
     if (!Features.OmitBBEntries) {
       // TODO: Remove this check when version 1 is deprecated.
       if (BBAddrMapVersion > 1) {
@@ -1498,6 +1502,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
       emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
       // Emit the Metadata.
       OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
+      // Emit the Hash.
+      OutStreamer->emitULEB128IntValue(MBHI->getMBBHash(MBB));
     }
 
     PrevMBBEndSymbol = MBB.getEndSymbol();

diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
@@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen
   LowerEmuTLS.cpp
   MachineBasicBlock.cpp
   MachineBlockFrequencyInfo.cpp
+  MachineBlockHashInfo.cpp
   MachineBlockPlacement.cpp
   MachineBranchProbabilityInfo.cpp
   MachineCFGPrinter.cpp

diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
@@ -72,6 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMIRNamerPass(Registry);
   initializeMIRProfileLoaderPassPass(Registry);
   initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
+  initializeMachineBlockHashInfoPass(Registry);
   initializeMachineBlockPlacementLegacyPass(Registry);
   initializeMachineBlockPlacementStatsLegacyPass(Registry);
   initializeMachineCFGPrinterPass(Registry);

diff --git a/llvm/lib/CodeGen/MachineBlockHashInfo.cpp b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp
@@ -0,0 +1,106 @@
+#include "llvm/CodeGen/MachineBlockHashInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+using OperandHashFuncTy = function_ref<uint64_t(uint64_t &, const MachineOperand &)>;
+
+uint64_t hashBlock(const MachineBasicBlock &MBB, OperandHashFuncTy OperandHashFunc) {
+  uint64_t Hash = 0;
+  for (const MachineInstr &MI : MBB) {
+    if (MI.isMetaInstruction())
+      continue;
+    // Ignore unconditional jumps
+    if (MI.isUnconditionalBranch())
+      continue;
+    Hash = hashing::detail::hash_16_bytes(Hash, MI.getOpcode());
+    for (const MachineOperand &MO : MI.operands()) {
+      if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
+        continue;  // Skip virtual register defs.
+      Hash = OperandHashFunc(Hash, MO);
+    }
+  }
+  return Hash;
+}
+
+/// Hashing a 64-bit integer to a 16-bit one.
+uint16_t hash_64_to_16(const uint64_t Hash) {
+  uint16_t Res = (uint16_t)(Hash & 0xFFFF);
+  Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
+  Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
+  Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
+  return Res;
+}
+
+uint64_t hashInstOperand(uint64_t &Hash, const MachineOperand &Operand) {
+  return hashing::detail::hash_16_bytes(Hash, hash_value(Operand));
+}
+
+INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash",
+                      "Machine Block Hash Analysis", true, true)
+
+char MachineBlockHashInfo::ID = 0;
+
+MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) {
+  initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) {
+  DenseMap<MachineBasicBlock *, BlendedBlockHash> BlendedHashes;
+  DenseMap<MachineBasicBlock *, uint64_t> OpcodeHashes;
+  uint16_t Offset = 0;
+  // Initialize hash components
+  for (MachineBasicBlock &MBB : F) {
+    BlendedBlockHash BlendedHash;
+    // offset of the machine basic block
+    BlendedHash.Offset = Offset;
+    Offset += MBB.size();
+    // Hashing opcodes
+    uint64_t OpcodeHash = hashBlock(MBB, [](uint64_t &Hash, const MachineOperand &Op) { return Hash; });
+    OpcodeHashes[&MBB] = OpcodeHash;
+    BlendedHash.OpcodeHash = hash_64_to_16(OpcodeHash);
+    // Hash complete instructions
+    uint64_t InstrHash = hashBlock(MBB, hashInstOperand);
+    BlendedHash.InstrHash = hash_64_to_16(InstrHash);
+    BlendedHashes[&MBB] = BlendedHash;
+  }
+
+  // Initialize neighbor hash
+  for (MachineBasicBlock &MBB : F) {
+    uint64_t Hash = OpcodeHashes[&MBB];
+    // Append hashes of successors
+    for (MachineBasicBlock *SuccMBB : MBB.successors()) {
+      uint64_t SuccHash = OpcodeHashes[SuccMBB];
+      Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
+    }
+    // Append hashes of predecessors
+    for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+      uint64_t PredHash = OpcodeHashes[PredMBB];
+      Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
+    }
+    BlendedHashes[&MBB].NeighborHash = hash_64_to_16(Hash);
+  }
+
+  // Assign hashes
+  for (MachineBasicBlock &MBB : F) {
+    if (MBB.getBBID()) {
+      MBBHashInfo[MBB.getBBID()->BaseID] = BlendedHashes[&MBB].combine();
+    }
+  }
+
+  return false;
+}
+
+uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) {
+  if (MBB.getBBID()) {
+    return MBBHashInfo[MBB.getBBID()->BaseID];
+  }
+  return 0;
+}
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
@@ -873,6 +873,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
           uint32_t Offset = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
           uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
           uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+          uint64_t Hash = readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr);
           if (Version >= 1) {
             // Offset is calculated relative to the end of the previous BB.
             Offset += PrevBBEndOffset;
@@ -884,7 +885,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
             MetadataDecodeErr = MetadataOrErr.takeError();
             break;
           }
-          BBEntries.push_back({ID, Offset, Size, *MetadataOrErr});
+          BBEntries.push_back({ID, Offset, Size, *MetadataOrErr, Hash});
         }
         TotalNumBlocks += BBEntries.size();
       }

diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1502,6 +1502,7 @@ void ELFState<ELFT>::writeSectionContent(
         SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset);
         SHeader.sh_size += CBA.writeULEB128(BBE.Size);
         SHeader.sh_size += CBA.writeULEB128(BBE.Metadata);
+        SHeader.sh_size += CBA.writeULEB128(BBE.Hash);
       }
     }
     if (!PGOAnalyses)

diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1882,6 +1882,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping(
   IO.mapRequired("AddressOffset", E.AddressOffset);
   IO.mapRequired("Size", E.Size);
   IO.mapRequired("Metadata", E.Metadata);
+  IO.mapRequired("Hash", E.Hash);
 }
 
 void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(

diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -85,6 +85,7 @@
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       Stack Frame Layout Analysis
 ; CHECK-NEXT:       Unpack machine instruction bundles
+; CHECK-NEXT:       Basic Block Hash Compute
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       AArch64 Assembly Printer

diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -245,6 +245,7 @@
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       Stack Frame Layout Analysis
 ; CHECK-NEXT:       Unpack machine instruction bundles
+; CHECK-NEXT:       Basic Block Hash Compute
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       AArch64 Assembly Printer

diff --git a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll
@@ -62,6 +62,7 @@
 ; HOTNESS-NEXT: Freeing Pass 'Unpack machine instruction bundles'
 ; HOTNESS-NEXT: Executing Pass 'Verify generated machine code'
 ; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code'
+; HOTNESS-NEXT: Executing Pass 'Basic Block Hash Compute'
 ; HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis'
 ; HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter'
 ; HOTNESS-NEXT: Building MachineBlockFrequencyInfo on the fly
@@ -107,6 +108,7 @@
 ; NO_HOTNESS-NEXT: Freeing Pass 'Unpack machine instruction bundles'
 ; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code'
 ; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code'
+; NO_HOTNESS-NEXT: Executing Pass 'Basic Block Hash Compute'
 ; NO_HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis'
 ; NO_HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter'
 ; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer'

diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -150,6 +150,7 @@
 ; GCN-O0-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O0-NEXT:        Stack Frame Layout Analysis
 ; GCN-O0-NEXT:        Function register usage analysis
+; GCN-O0-NEXT:        Basic Block Hash Compute
 ; GCN-O0-NEXT:        AMDGPU Assembly Printer
 ; GCN-O0-NEXT:        Free MachineFunction
 
@@ -433,6 +434,7 @@
 ; GCN-O1-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O1-NEXT:        Stack Frame Layout Analysis
 ; GCN-O1-NEXT:        Function register usage analysis
+; GCN-O1-NEXT:        Basic Block Hash Compute
 ; GCN-O1-NEXT:        AMDGPU Assembly Printer
 ; GCN-O1-NEXT:        Free MachineFunction
 
@@ -744,6 +746,7 @@
 ; GCN-O1-OPTS-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O1-OPTS-NEXT:        Stack Frame Layout Analysis
 ; GCN-O1-OPTS-NEXT:        Function register usage analysis
+; GCN-O1-OPTS-NEXT:        Basic Block Hash Compute
 ; GCN-O1-OPTS-NEXT:        AMDGPU Assembly Printer
 ; GCN-O1-OPTS-NEXT:        Free MachineFunction
 
@@ -1061,6 +1064,7 @@
 ; GCN-O2-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O2-NEXT:        Stack Frame Layout Analysis
 ; GCN-O2-NEXT:        Function register usage analysis
+; GCN-O2-NEXT:        Basic Block Hash Compute
 ; GCN-O2-NEXT:        AMDGPU Assembly Printer
 ; GCN-O2-NEXT:        Free MachineFunction
 
@@ -1391,6 +1395,7 @@
 ; GCN-O3-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O3-NEXT:        Stack Frame Layout Analysis
 ; GCN-O3-NEXT:        Function register usage analysis
+; GCN-O3-NEXT:        Basic Block Hash Compute
 ; GCN-O3-NEXT:        AMDGPU Assembly Printer
 ; GCN-O3-NEXT:        Free MachineFunction