Skip to content

Commit 53005fd

Browse files
authored
Use the Propeller CFG profile in the PGO analysis map if it is available. (#163252)
This PR implements the emitting of the post-link CFG information in PGO analysis map, as explained in the [RFC](https://discourse.llvm.org/t/rfc-extending-the-pgo-analysis-map-with-propeller-cfg-frequencies/88617). This is enabled by a flag `pgo-analysis-map-emit-bb-sections-cfg`. This PR bumps the SHT_LLVM_BB_ADDR_MAP version to 5. Also includes some refactoring changes related to storing the CFG in the Basic block sections profile reader.
1 parent 7b9168c commit 53005fd

14 files changed

+263
-108
lines changed

llvm/docs/Extensions.rst

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,13 @@ as offsets relative to prior addresses.
416416
The following versioning schemes are currently supported (newer versions support
417417
features of the older versions).
418418

419-
Version 4 (newest): Capable of encoding basic block hashes. This feature is
419+
Version 5 (newest): Capable of encoding Post-Link CFG information, which
420+
provides basic block and edge frequencies obtained from a post-link tool like
421+
Propeller, reflecting the final binary layout. This feature is enabled by the 8th
422+
bit of the feature entry.
423+
The feature data will be two bytes long to accommodate future extensions.
424+
425+
Version 4: Capable of encoding basic block hashes. This feature is
420426
enabled by the 7th bit of the feature byte.
421427

422428
Example:
@@ -526,6 +532,13 @@ those bits are:
526532
defined in ``llvm/Support/BranchProbability.h``. It indicates the probability
527533
that the block is followed by a given successor block during execution.
528534

535+
#. Post-Link CFG - When enabled, the PGO Analysis Map will include CFG
536+
information obtained from a post-link tool, such as Propeller. This feature
537+
is enabled with the ``-pgo-analysis-map-emit-bb-sections-cfg`` flag. When
538+
this option is active, the map will contain basic block and edge frequencies
539+
from the basic block sections profile. This provides more accurate profiling
540+
information that reflects the final binary layout.
541+
529542
This extra data requires version 2 or above. This is necessary since successors
530543
of basic blocks won't know their index but will know their BB ID.
531544

llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h

Lines changed: 50 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,47 @@ struct BBClusterInfo {
4242
unsigned PositionInCluster;
4343
};
4444

45-
// This represents the raw input profile for one function.
46-
struct FunctionPathAndClusterInfo {
47-
// BB Cluster information specified by `UniqueBBID`s.
48-
SmallVector<BBClusterInfo> ClusterInfo;
49-
// Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along
50-
// the edge a -> b (a is not cloned). The index of the path in this vector
51-
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
52-
SmallVector<SmallVector<unsigned>> ClonePaths;
45+
// This represents the CFG profile data for a function.
46+
struct CFGProfile {
5347
// Node counts for each basic block.
5448
DenseMap<UniqueBBID, uint64_t> NodeCounts;
5549
// Edge counts for each edge, stored as a nested map.
5650
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
51+
5752
// Hash for each basic block. The Hashes are stored for every original block
5853
// (not cloned blocks), hence the map key being unsigned instead of
5954
// UniqueBBID.
6055
DenseMap<unsigned, uint64_t> BBHashes;
56+
57+
// Returns the profile count for the given basic block or zero if it does not
58+
// exist.
59+
uint64_t getBlockCount(const UniqueBBID &BBID) const {
60+
return NodeCounts.lookup(BBID);
61+
}
62+
63+
// Returns the profile count for the edge from `SrcBBID` to `SinkBBID` or
64+
// zero if it does not exist.
65+
uint64_t getEdgeCount(const UniqueBBID &SrcBBID,
66+
const UniqueBBID &SinkBBID) const {
67+
auto It = EdgeCounts.find(SrcBBID);
68+
if (It == EdgeCounts.end())
69+
return 0;
70+
return It->second.lookup(SinkBBID);
71+
}
72+
};
73+
74+
// This struct represents the raw optimization profile for a function,
75+
// including CFG data (block and edge counts) and layout directives (clustering
76+
// and cloning paths).
77+
struct FunctionOptimizationProfile {
78+
// BB Cluster information specified by `UniqueBBID`s.
79+
SmallVector<BBClusterInfo> ClusterInfo;
80+
// Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along
81+
// the edge a -> b (a is not cloned). The index of the path in this vector
82+
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
83+
SmallVector<SmallVector<unsigned>> ClonePaths;
84+
// Cfg profile data (block and edge frequencies).
85+
CFGProfile CFG;
6186
};
6287

6388
class BasicBlockSectionsProfileReader {
@@ -81,14 +106,17 @@ class BasicBlockSectionsProfileReader {
81106
SmallVector<SmallVector<unsigned>>
82107
getClonePathsForFunction(StringRef FuncName) const;
83108

84-
// Returns the profile count for the edge from `SrcBBID` to `SinkBBID` in
85-
// function `FuncName` or zero if it does not exist.
86109
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
87-
const UniqueBBID &SinkBBID) const;
110+
const UniqueBBID &DestBBID) const;
88111

89-
// Return the complete function path and cluster info for the given function.
90-
std::pair<bool, FunctionPathAndClusterInfo>
91-
getFunctionPathAndClusterInfo(StringRef FuncName) const;
112+
// Returns a pointer to the CFGProfile for the function \p FuncName.
113+
// Returns nullptr if no profile data is available for the function.
114+
const CFGProfile *getFunctionCFGProfile(StringRef FuncName) const {
115+
auto It = ProgramOptimizationProfile.find(getAliasName(FuncName));
116+
if (It == ProgramOptimizationProfile.end())
117+
return nullptr;
118+
return &It->second.CFG;
119+
}
92120

93121
private:
94122
StringRef getAliasName(StringRef FuncName) const {
@@ -130,16 +158,14 @@ class BasicBlockSectionsProfileReader {
130158
// empty string if no debug info is available.
131159
StringMap<SmallString<128>> FunctionNameToDIFilename;
132160

133-
// This contains the BB cluster information for the whole program.
134-
//
135-
// For every function name, it contains the cloning and cluster information
136-
// for (all or some of) its basic blocks. The cluster information for every
137-
// basic block includes its cluster ID along with the position of the basic
138-
// block in that cluster.
139-
StringMap<FunctionPathAndClusterInfo> ProgramPathAndClusterInfo;
161+
// This map contains the optimization profile for each function in the
162+
// program. A function's optimization profile consists of CFG data (node and
163+
// edge counts) and layout directives such as basic block clustering and
164+
// cloning paths.
165+
StringMap<FunctionOptimizationProfile> ProgramOptimizationProfile;
140166

141167
// Some functions have alias names. We use this map to find the main alias
142-
// name which appears in ProgramPathAndClusterInfo as a key.
168+
// name which appears in ProgramOptimizationProfile as a key.
143169
StringMap<StringRef> FuncAliasMap;
144170
};
145171

@@ -196,12 +222,11 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
196222
SmallVector<SmallVector<unsigned>>
197223
getClonePathsForFunction(StringRef FuncName) const;
198224

225+
const CFGProfile *getFunctionCFGProfile(StringRef FuncName) const;
226+
199227
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
200228
const UniqueBBID &DestBBID) const;
201229

202-
std::pair<bool, FunctionPathAndClusterInfo>
203-
getFunctionPathAndClusterInfo(StringRef FuncName) const;
204-
205230
// Initializes the FunctionNameToDIFilename map for the current module and
206231
// then reads the profile for the matching functions.
207232
bool doInitialization(Module &M) override;

llvm/include/llvm/MC/MCContext.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ class MCContext {
175175
unsigned GetInstance(unsigned LocalLabelVal);
176176

177177
/// SHT_LLVM_BB_ADDR_MAP version to emit.
178-
uint8_t BBAddrMapVersion = 4;
178+
uint8_t BBAddrMapVersion = 5;
179179

180180
/// The file name of the log file from the environment variable
181181
/// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "WinException.h"
2121
#include "llvm/ADT/APFloat.h"
2222
#include "llvm/ADT/APInt.h"
23-
#include "llvm/ADT/BitmaskEnum.h"
2423
#include "llvm/ADT/DenseMap.h"
2524
#include "llvm/ADT/STLExtras.h"
2625
#include "llvm/ADT/SmallPtrSet.h"
@@ -37,6 +36,7 @@
3736
#include "llvm/BinaryFormat/COFF.h"
3837
#include "llvm/BinaryFormat/Dwarf.h"
3938
#include "llvm/BinaryFormat/ELF.h"
39+
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
4040
#include "llvm/CodeGen/GCMetadata.h"
4141
#include "llvm/CodeGen/GCMetadataPrinter.h"
4242
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
@@ -150,6 +150,7 @@ enum class PGOMapFeaturesEnum {
150150
FuncEntryCount,
151151
BBFreq,
152152
BrProb,
153+
PropellerCFG,
153154
All,
154155
};
155156
static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
@@ -166,6 +167,12 @@ static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
166167
"Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is "
167168
"extracted from PGO related analysis."));
168169

170+
static cl::opt<bool> PgoAnalysisMapEmitBBSectionsCfg(
171+
"pgo-analysis-map-emit-bb-sections-cfg",
172+
cl::desc("Enable the post-link cfg information from the basic block "
173+
"sections profile in the PGO analysis map"),
174+
cl::Hidden, cl::init(false));
175+
169176
static cl::opt<bool> BBAddrMapSkipEmitBBEntries(
170177
"basic-block-address-map-skip-bb-entries",
171178
cl::desc("Skip emitting basic block entries in the SHT_LLVM_BB_ADDR_MAP "
@@ -479,6 +486,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
479486
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
480487
if (EmitBBHash)
481488
AU.addRequired<MachineBlockHashInfo>();
489+
AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
482490
}
483491

484492
bool AsmPrinter::doInitialization(Module &M) {
@@ -1409,7 +1417,7 @@ static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
14091417

14101418
static llvm::object::BBAddrMap::Features
14111419
getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
1412-
bool HasCalls) {
1420+
bool HasCalls, const CFGProfile *FuncCFGProfile) {
14131421
// Ensure that the user has not passed in additional options while also
14141422
// specifying all or none.
14151423
if ((PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::None) ||
@@ -1431,17 +1439,17 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
14311439
bool BrProbEnabled =
14321440
AllFeatures ||
14331441
(!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb));
1442+
bool PostLinkCfgEnabled = FuncCFGProfile && PgoAnalysisMapEmitBBSectionsCfg;
14341443

14351444
if ((BBFreqEnabled || BrProbEnabled) && BBAddrMapSkipEmitBBEntries) {
14361445
MF.getFunction().getContext().emitError(
1437-
"BB entries info is required for BBFreq and BrProb "
1438-
"features");
1446+
"BB entries info is required for BBFreq and BrProb features");
14391447
}
14401448
return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled,
14411449
MF.hasBBSections() && NumMBBSectionRanges > 1,
14421450
// Use static_cast to avoid breakage of tests on windows.
14431451
static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls,
1444-
static_cast<bool>(EmitBBHash), false};
1452+
static_cast<bool>(EmitBBHash), PostLinkCfgEnabled};
14451453
}
14461454

14471455
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1450,6 +1458,14 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
14501458
assert(BBAddrMapSection && ".llvm_bb_addr_map section is not initialized.");
14511459
bool HasCalls = !CurrentFnCallsiteEndSymbols.empty();
14521460

1461+
const BasicBlockSectionsProfileReader *BBSPR = nullptr;
1462+
if (auto *BBSPRPass =
1463+
getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>())
1464+
BBSPR = &BBSPRPass->getBBSPR();
1465+
const CFGProfile *FuncCFGProfile = nullptr;
1466+
if (BBSPR)
1467+
FuncCFGProfile = BBSPR->getFunctionCFGProfile(MF.getFunction().getName());
1468+
14531469
const MCSymbol *FunctionSymbol = getFunctionBegin();
14541470

14551471
OutStreamer->pushSection();
@@ -1458,8 +1474,9 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
14581474
uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion();
14591475
OutStreamer->emitInt8(BBAddrMapVersion);
14601476
OutStreamer->AddComment("feature");
1461-
auto Features = getBBAddrMapFeature(MF, MBBSectionRanges.size(), HasCalls);
1462-
OutStreamer->emitInt8(Features.encode());
1477+
auto Features = getBBAddrMapFeature(MF, MBBSectionRanges.size(), HasCalls,
1478+
FuncCFGProfile);
1479+
OutStreamer->emitInt16(Features.encode());
14631480
// Emit BB Information for each basic block in the function.
14641481
if (Features.MultiBBRange) {
14651482
OutStreamer->AddComment("number of basic block ranges");
@@ -1563,6 +1580,11 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
15631580
OutStreamer->AddComment("basic block frequency");
15641581
OutStreamer->emitULEB128IntValue(
15651582
MBFI->getBlockFreq(&MBB).getFrequency());
1583+
if (Features.PostLinkCfg) {
1584+
OutStreamer->AddComment("basic block frequency (propeller)");
1585+
OutStreamer->emitULEB128IntValue(
1586+
FuncCFGProfile->getBlockCount(*MBB.getBBID()));
1587+
}
15661588
}
15671589
if (Features.BrProb) {
15681590
unsigned SuccCount = MBB.succ_size();
@@ -1574,6 +1596,11 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
15741596
OutStreamer->AddComment("successor branch probability");
15751597
OutStreamer->emitULEB128IntValue(
15761598
MBPI->getEdgeProbability(&MBB, SuccMBB).getNumerator());
1599+
if (Features.PostLinkCfg) {
1600+
OutStreamer->AddComment("successor branch frequency (propeller)");
1601+
OutStreamer->emitULEB128IntValue(FuncCFGProfile->getEdgeCount(
1602+
*MBB.getBBID(), *SuccMBB->getBBID()));
1603+
}
15771604
}
15781605
}
15791606
}

llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -119,33 +119,32 @@ BasicBlockMatchingAndInference::initWeightInfoByMatching(MachineFunction &MF) {
119119
StaleMatcher Matcher;
120120
Matcher.init(Blocks, Hashes);
121121
BasicBlockMatchingAndInference::WeightInfo MatchWeight;
122-
auto [IsValid, PathAndClusterInfo] =
123-
BSPR->getFunctionPathAndClusterInfo(MF.getName());
124-
if (!IsValid)
122+
const CFGProfile *CFG = BSPR->getFunctionCFGProfile(MF.getName());
123+
if (CFG == nullptr)
125124
return MatchWeight;
126-
for (auto &BlockCount : PathAndClusterInfo.NodeCounts) {
127-
if (PathAndClusterInfo.BBHashes.count(BlockCount.first.BaseID)) {
128-
auto Hash = PathAndClusterInfo.BBHashes[BlockCount.first.BaseID];
125+
for (auto &BlockCount : CFG->NodeCounts) {
126+
if (CFG->BBHashes.count(BlockCount.first.BaseID)) {
127+
auto Hash = CFG->BBHashes.lookup(BlockCount.first.BaseID);
129128
MachineBasicBlock *Block = Matcher.matchBlock(BlendedBlockHash(Hash));
130129
// When a basic block has clone copies, sum their counts.
131130
if (Block != nullptr)
132131
MatchWeight.BlockWeights[Block] += BlockCount.second;
133132
}
134133
}
135-
for (auto &PredItem : PathAndClusterInfo.EdgeCounts) {
134+
for (auto &PredItem : CFG->EdgeCounts) {
136135
auto PredID = PredItem.first.BaseID;
137-
if (!PathAndClusterInfo.BBHashes.count(PredID))
136+
if (!CFG->BBHashes.count(PredID))
138137
continue;
139-
auto PredHash = PathAndClusterInfo.BBHashes[PredID];
138+
auto PredHash = CFG->BBHashes.lookup(PredID);
140139
MachineBasicBlock *PredBlock =
141140
Matcher.matchBlock(BlendedBlockHash(PredHash));
142141
if (PredBlock == nullptr)
143142
continue;
144143
for (auto &SuccItem : PredItem.second) {
145144
auto SuccID = SuccItem.first.BaseID;
146145
auto EdgeWeight = SuccItem.second;
147-
if (PathAndClusterInfo.BBHashes.count(SuccID)) {
148-
auto SuccHash = PathAndClusterInfo.BBHashes[SuccID];
146+
if (CFG->BBHashes.count(SuccID)) {
147+
auto SuccHash = CFG->BBHashes.lookup(SuccID);
149148
MachineBasicBlock *SuccBlock =
150149
Matcher.matchBlock(BlendedBlockHash(SuccHash));
151150
// When an edge has clone copies, sum their counts.

0 commit comments

Comments
 (0)