Skip to content

Adding Matching and Inference Functionality to Propeller-Patch 1 #140886

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions llvm/include/llvm/CodeGen/MachineBlockHashInfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#ifndef LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
#define LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H

#include "llvm/CodeGen/MachineFunctionPass.h"

namespace llvm {

/// An object wrapping several components of a basic block hash. The combined
/// (blended) hash is represented and stored as one uint64_t, while individual
/// components are of smaller size (e.g., uint16_t or uint8_t).
struct BlendedBlockHash {
private:
static uint64_t combineHashes(uint16_t Hash1, uint16_t Hash2, uint16_t Hash3,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A lot of the code in this file repeats the implementation in BOLT (which has actually been improved since the original commits), e.g., https://github.com/llvm/llvm-project/blob/main/bolt/lib/Profile/StaleProfileMatching.cpp

Do you think there is a way to unify the implementations? (totally understand if not, since there might be some specifics)

uint16_t Hash4) {
uint64_t Hash = 0;

Hash |= uint64_t(Hash4);
Hash <<= 16;

Hash |= uint64_t(Hash3);
Hash <<= 16;

Hash |= uint64_t(Hash2);
Hash <<= 16;

Hash |= uint64_t(Hash1);

return Hash;
}

static void parseHashes(uint64_t Hash, uint16_t &Hash1, uint16_t &Hash2,
uint16_t &Hash3, uint16_t &Hash4) {
Hash1 = Hash & 0xffff;
Hash >>= 16;

Hash2 = Hash & 0xffff;
Hash >>= 16;

Hash3 = Hash & 0xffff;
Hash >>= 16;

Hash4 = Hash & 0xffff;
Hash >>= 16;
}

public:
explicit BlendedBlockHash() {}

explicit BlendedBlockHash(uint64_t CombinedHash) {
parseHashes(CombinedHash, Offset, OpcodeHash, InstrHash, NeighborHash);
}

/// Combine the blended hash into uint64_t.
uint64_t combine() const {
return combineHashes(Offset, OpcodeHash, InstrHash, NeighborHash);
}

/// Compute a distance between two given blended hashes. The smaller the
/// distance, the more similar two blocks are. For identical basic blocks,
/// the distance is zero.
uint64_t distance(const BlendedBlockHash &BBH) const {
assert(OpcodeHash == BBH.OpcodeHash &&
"incorrect blended hash distance computation");
uint64_t Dist = 0;
// Account for NeighborHash
Dist += NeighborHash == BBH.NeighborHash ? 0 : 1;
Dist <<= 16;
// Account for InstrHash
Dist += InstrHash == BBH.InstrHash ? 0 : 1;
Dist <<= 16;
// Account for Offset
Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
return Dist;
}

/// The offset of the basic block from the function start.
uint16_t Offset{0};
/// (Loose) Hash of the basic block instructions, excluding operands.
uint16_t OpcodeHash{0};
/// (Strong) Hash of the basic block instructions, including opcodes and
/// operands.
uint16_t InstrHash{0};
/// Hash of the (loose) basic block together with (loose) hashes of its
/// successors and predecessors.
uint16_t NeighborHash{0};
};

class MachineBlockHashInfo : public MachineFunctionPass {
DenseMap<unsigned, uint64_t> MBBHashInfo;

public:
static char ID;
MachineBlockHashInfo();

StringRef getPassName() const override {
return "Basic Block Hash Compute";
}

void getAnalysisUsage(AnalysisUsage &AU) const override;

bool runOnMachineFunction(MachineFunction &F) override;

uint64_t getMBBHash(const MachineBasicBlock &MBB);
};

} // end namespace llvm

#endif // LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
1 change: 1 addition & 0 deletions llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ void initializeMIRCanonicalizerPass(PassRegistry &);
void initializeMIRNamerPass(PassRegistry &);
void initializeMIRPrintingPassPass(PassRegistry &);
void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
void initializeMachineBlockHashInfoPass(PassRegistry&);
void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &);
void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
Expand Down
5 changes: 3 additions & 2 deletions llvm/include/llvm/Object/ELFTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -914,9 +914,10 @@ struct BBAddrMap {
uint32_t Size = 0; // Size of the basic block.
Metadata MD = {false, false, false, false,
false}; // Metdata for this basic block.
uint64_t Hash = 0; // Hash for this basic block.

BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD)
: ID(ID), Offset(Offset), Size(Size), MD(MD){};
BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD, uint64_t Hash)
: ID(ID), Offset(Offset), Size(Size), MD(MD), Hash(Hash){};

bool operator==(const BBEntry &Other) const {
return ID == Other.ID && Offset == Other.Offset && Size == Other.Size &&
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/ObjectYAML/ELFYAML.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ struct BBAddrMapEntry {
llvm::yaml::Hex64 AddressOffset;
llvm::yaml::Hex64 Size;
llvm::yaml::Hex64 Metadata;
llvm::yaml::Hex64 Hash;
};
uint8_t Version;
llvm::yaml::Hex8 Feature;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockHashInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
Expand Down Expand Up @@ -451,6 +452,7 @@ const MCSection *AsmPrinter::getCurrentSection() const {
void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineBlockHashInfo>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
Expand Down Expand Up @@ -1479,6 +1481,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
PrevMBBEndSymbol = MBBSymbol;
}

auto MBHI = &getAnalysis<MachineBlockHashInfo>();

if (!Features.OmitBBEntries) {
// TODO: Remove this check when version 1 is deprecated.
if (BBAddrMapVersion > 1) {
Expand All @@ -1498,6 +1502,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
// Emit the Metadata.
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
// Emit the Hash.
OutStreamer->emitULEB128IntValue(MBHI->getMBBHash(MBB));
}

PrevMBBEndSymbol = MBB.getEndSymbol();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen
LowerEmuTLS.cpp
MachineBasicBlock.cpp
MachineBlockFrequencyInfo.cpp
MachineBlockHashInfo.cpp
MachineBlockPlacement.cpp
MachineBranchProbabilityInfo.cpp
MachineCFGPrinter.cpp
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMIRNamerPass(Registry);
initializeMIRProfileLoaderPassPass(Registry);
initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
initializeMachineBlockHashInfoPass(Registry);
initializeMachineBlockPlacementLegacyPass(Registry);
initializeMachineBlockPlacementStatsLegacyPass(Registry);
initializeMachineCFGPrinterPass(Registry);
Expand Down
106 changes: 106 additions & 0 deletions llvm/lib/CodeGen/MachineBlockHashInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "llvm/CodeGen/MachineBlockHashInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/InitializePasses.h"

using namespace llvm;

using OperandHashFuncTy = function_ref<uint64_t(uint64_t &, const MachineOperand &)>;

uint64_t hashBlock(const MachineBasicBlock &MBB, OperandHashFuncTy OperandHashFunc) {
uint64_t Hash = 0;
for (const MachineInstr &MI : MBB) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you also hash the terminator instructions (branches at the end of the block)? They may be reversed across builds.

if (MI.isMetaInstruction())
continue;
// Ignore unconditional jumps
if (MI.isUnconditionalBranch())
continue;
Hash = hashing::detail::hash_16_bytes(Hash, MI.getOpcode());
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
continue; // Skip virtual register defs.
Hash = OperandHashFunc(Hash, MO);
}
}
return Hash;
}

/// Hashing a 64-bit integer to a 16-bit one.
uint16_t hash_64_to_16(const uint64_t Hash) {
uint16_t Res = (uint16_t)(Hash & 0xFFFF);
Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
return Res;
}

uint64_t hashInstOperand(uint64_t &Hash, const MachineOperand &Operand) {
return hashing::detail::hash_16_bytes(Hash, hash_value(Operand));
}

INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash",
"Machine Block Hash Analysis", true, true)

char MachineBlockHashInfo::ID = 0;

MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) {
initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry());
}

void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}

bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) {
DenseMap<MachineBasicBlock *, BlendedBlockHash> BlendedHashes;
DenseMap<MachineBasicBlock *, uint64_t> OpcodeHashes;
uint16_t Offset = 0;
// Initialize hash components
for (MachineBasicBlock &MBB : F) {
BlendedBlockHash BlendedHash;
// offset of the machine basic block
BlendedHash.Offset = Offset;
Offset += MBB.size();
// Hashing opcodes
uint64_t OpcodeHash = hashBlock(MBB, [](uint64_t &Hash, const MachineOperand &Op) { return Hash; });
OpcodeHashes[&MBB] = OpcodeHash;
BlendedHash.OpcodeHash = hash_64_to_16(OpcodeHash);
// Hash complete instructions
uint64_t InstrHash = hashBlock(MBB, hashInstOperand);
BlendedHash.InstrHash = hash_64_to_16(InstrHash);
BlendedHashes[&MBB] = BlendedHash;
}

// Initialize neighbor hash
for (MachineBasicBlock &MBB : F) {
uint64_t Hash = OpcodeHashes[&MBB];
// Append hashes of successors
for (MachineBasicBlock *SuccMBB : MBB.successors()) {
uint64_t SuccHash = OpcodeHashes[SuccMBB];
Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
}
// Append hashes of predecessors
for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
uint64_t PredHash = OpcodeHashes[PredMBB];
Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
}
BlendedHashes[&MBB].NeighborHash = hash_64_to_16(Hash);
}

// Assign hashes
for (MachineBasicBlock &MBB : F) {
if (MBB.getBBID()) {
MBBHashInfo[MBB.getBBID()->BaseID] = BlendedHashes[&MBB].combine();
}
}

return false;
}

uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) {
if (MBB.getBBID()) {
return MBBHashInfo[MBB.getBBID()->BaseID];
}
return 0;
}
3 changes: 2 additions & 1 deletion llvm/lib/Object/ELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
uint32_t Offset = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
uint64_t Hash = readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr);
if (Version >= 1) {
Copy link
Contributor

@rlavaee rlavaee May 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to define a new version number for this to support backward-compatibility.

// Offset is calculated relative to the end of the previous BB.
Offset += PrevBBEndOffset;
Expand All @@ -884,7 +885,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
MetadataDecodeErr = MetadataOrErr.takeError();
break;
}
BBEntries.push_back({ID, Offset, Size, *MetadataOrErr});
BBEntries.push_back({ID, Offset, Size, *MetadataOrErr, Hash});
}
TotalNumBlocks += BBEntries.size();
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjectYAML/ELFEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,7 @@ void ELFState<ELFT>::writeSectionContent(
SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset);
SHeader.sh_size += CBA.writeULEB128(BBE.Size);
SHeader.sh_size += CBA.writeULEB128(BBE.Metadata);
SHeader.sh_size += CBA.writeULEB128(BBE.Hash);
}
}
if (!PGOAnalyses)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjectYAML/ELFYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1882,6 +1882,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping(
IO.mapRequired("AddressOffset", E.AddressOffset);
IO.mapRequired("Size", E.Size);
IO.mapRequired("Metadata", E.Metadata);
IO.mapRequired("Hash", E.Hash);
}

void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Basic Block Hash Compute
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: AArch64 Assembly Printer
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Basic Block Hash Compute
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: AArch64 Assembly Printer
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
; HOTNESS-NEXT: Freeing Pass 'Unpack machine instruction bundles'
; HOTNESS-NEXT: Executing Pass 'Verify generated machine code'
; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code'
; HOTNESS-NEXT: Executing Pass 'Basic Block Hash Compute'
; HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis'
; HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter'
; HOTNESS-NEXT: Building MachineBlockFrequencyInfo on the fly
Expand Down Expand Up @@ -107,6 +108,7 @@
; NO_HOTNESS-NEXT: Freeing Pass 'Unpack machine instruction bundles'
; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code'
; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code'
; NO_HOTNESS-NEXT: Executing Pass 'Basic Block Hash Compute'
; NO_HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis'
; NO_HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter'
; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer'
Expand Down
5 changes: 5 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
; GCN-O0-NEXT: Machine Optimization Remark Emitter
; GCN-O0-NEXT: Stack Frame Layout Analysis
; GCN-O0-NEXT: Function register usage analysis
; GCN-O0-NEXT: Basic Block Hash Compute
; GCN-O0-NEXT: AMDGPU Assembly Printer
; GCN-O0-NEXT: Free MachineFunction

Expand Down Expand Up @@ -433,6 +434,7 @@
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: Stack Frame Layout Analysis
; GCN-O1-NEXT: Function register usage analysis
; GCN-O1-NEXT: Basic Block Hash Compute
; GCN-O1-NEXT: AMDGPU Assembly Printer
; GCN-O1-NEXT: Free MachineFunction

Expand Down Expand Up @@ -744,6 +746,7 @@
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: Stack Frame Layout Analysis
; GCN-O1-OPTS-NEXT: Function register usage analysis
; GCN-O1-OPTS-NEXT: Basic Block Hash Compute
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
; GCN-O1-OPTS-NEXT: Free MachineFunction

Expand Down Expand Up @@ -1061,6 +1064,7 @@
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: Stack Frame Layout Analysis
; GCN-O2-NEXT: Function register usage analysis
; GCN-O2-NEXT: Basic Block Hash Compute
; GCN-O2-NEXT: AMDGPU Assembly Printer
; GCN-O2-NEXT: Free MachineFunction

Expand Down Expand Up @@ -1391,6 +1395,7 @@
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: Stack Frame Layout Analysis
; GCN-O3-NEXT: Function register usage analysis
; GCN-O3-NEXT: Basic Block Hash Compute
; GCN-O3-NEXT: AMDGPU Assembly Printer
; GCN-O3-NEXT: Free MachineFunction

Expand Down
Loading