diff --git a/create_gcov.cc b/create_gcov.cc index cce528d..0b71425 100644 --- a/create_gcov.cc +++ b/create_gcov.cc @@ -41,6 +41,13 @@ int main(int argc, char **argv) { absl::GetFlag(FLAGS_gcov_version)); devtools_crosstool_autofdo::ProfileCreator creator( absl::GetFlag(FLAGS_binary)); + + // Enable discriminator encoding and two-pass aggregation only for gcov version 3 + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + absl::SetFlag(&FLAGS_use_discriminator_encoding, true); + absl::SetFlag(&FLAGS_use_two_pass_aggregation, true); + } + if (creator.CreateProfile(absl::GetFlag(FLAGS_profile), absl::GetFlag(FLAGS_profiler), &writer, absl::GetFlag(FLAGS_gcov))) { diff --git a/gcov_discriminator_encoding.h b/gcov_discriminator_encoding.h new file mode 100644 index 0000000..63b645d --- /dev/null +++ b/gcov_discriminator_encoding.h @@ -0,0 +1,66 @@ +// Discriminator encoding utilities for AutoFDO +// Similar to LLVM's discriminator encoding scheme +// +// Discriminator format: [Base:8][Multiplicity:7][CopyID:11][Unused:6] +// - Base discriminator (bits 0-7): Distinguishes instructions at same line +// - Multiplicity (bits 8-14): Duplication factor for unrolling/vectorization +// - CopyID (bits 15-25): Unique identifier for code copies +// - Unused (bits 26-31): Reserved + +#ifndef AUTOFDO_DISCRIMINATOR_ENCODING_H_ +#define AUTOFDO_DISCRIMINATOR_ENCODING_H_ + +#include + +namespace devtools_crosstool_autofdo { + +// Extract base discriminator (bits 0-7) +inline uint32_t GetBaseDiscriminator(uint32_t discriminator) { + return discriminator & 0xFF; +} + +// Extract multiplicity/duplication factor (bits 8-14) +// Returns 1 if multiplicity bits are 0 (no duplication) +inline uint32_t GetMultiplicity(uint32_t discriminator) { + uint32_t mult = (discriminator >> 8) & 0x7F; + return (mult == 0) ? 1 : mult; +} + +// Extract copy ID (bits 15-25) +inline uint32_t GetCopyID(uint32_t discriminator) { + return (discriminator >> 15) & 0x7FF; +} + +// Encode discriminator from components +inline uint32_t EncodeDiscriminator(uint32_t base, uint32_t multiplicity, + uint32_t copy_id) { + // Validate ranges + if (base > 0xFF || multiplicity > 127 || copy_id > 0x7FF) { + return base; // Fallback to just base if encoding fails + } + return base | (multiplicity << 8) | (copy_id << 15); +} + +// Check if discriminator has multiplicity encoded +inline bool HasMultiplicity(uint32_t discriminator) { + return GetMultiplicity(discriminator) > 1; +} + +// Check if discriminator has copy ID encoded +inline bool HasCopyID(uint32_t discriminator) { + return GetCopyID(discriminator) != 0; +} + +// Strip only multiplicity bits, keeping base and copy_id +// Used for MAX aggregation where we want to aggregate per (line, base, copy_id) +// but not create separate entries for different multiplicities +inline uint32_t StripMultiplicity(uint32_t discriminator) { + uint32_t base = GetBaseDiscriminator(discriminator); + uint32_t copy_id = GetCopyID(discriminator); + return base | (copy_id << 15); +} + +} // namespace devtools_crosstool_autofdo + +#endif // AUTOFDO_DISCRIMINATOR_ENCODING_H_ + diff --git a/profile_writer.cc b/profile_writer.cc index 875ed06..dc7eec4 100644 --- a/profile_writer.cc +++ b/profile_writer.cc @@ -215,6 +215,11 @@ bool AutoFDOProfileWriter::WriteToFile(const std::string &output_filename) { if (absl::GetFlag(FLAGS_debug_dump)) Dump(); + // Pass 2: Collapse copy_ids before writing (SUM aggregation) + if (absl::GetFlag(FLAGS_use_two_pass_aggregation)) { + symbol_map_->CollapseCopyIDs(); + } + if (!WriteHeader(output_filename)) { return false; } diff --git a/profile_writer.h b/profile_writer.h index 2557b08..6ffe917 100644 --- a/profile_writer.h +++ b/profile_writer.h @@ -18,22 +18,22 @@ class SymbolMap; class ProfileWriter { public: - explicit ProfileWriter(const SymbolMap *symbol_map) + explicit ProfileWriter(SymbolMap *symbol_map) : symbol_map_(symbol_map) {} explicit ProfileWriter() : symbol_map_(nullptr) {} virtual ~ProfileWriter() {} virtual bool WriteToFile(const std::string &output_file) = 0; - void setSymbolMap(const SymbolMap *symbol_map) { symbol_map_ = symbol_map; } + void setSymbolMap(SymbolMap *symbol_map) { symbol_map_ = symbol_map; } void Dump(); protected: - const SymbolMap *symbol_map_; + SymbolMap *symbol_map_; }; class AutoFDOProfileWriter : public ProfileWriter { public: - explicit AutoFDOProfileWriter(const SymbolMap *symbol_map, + explicit AutoFDOProfileWriter(SymbolMap *symbol_map, uint32_t gcov_version) : ProfileWriter(symbol_map), gcov_version_(gcov_version) {} explicit AutoFDOProfileWriter(uint32_t gcov_version) diff --git a/source_info.h b/source_info.h index a8f4a8f..e431719 100644 --- a/source_info.h +++ b/source_info.h @@ -8,9 +8,13 @@ #include #include "base/integral_types.h" +#include "base/logging.h" #include "base/macros.h" #if defined(HAVE_LLVM) #include "llvm/IR/DebugInfoMetadata.h" +#else +// Include discriminator encoding utilities only when LLVM is not available +#include "gcov_discriminator_encoding.h" #endif namespace devtools_crosstool_autofdo { @@ -49,7 +53,25 @@ struct SourceInfo { discriminator) : discriminator)); #else - return (static_cast(line - start_line) << 32) | discriminator; + // Profile stores only base discriminator (bits 0-7). + uint32_t disc = use_discriminator_encoding + ? GetBaseDiscriminator(discriminator) + : discriminator; + return (static_cast(line - start_line) << 32) | disc; +#endif + } + + // Offset that keeps copy_id but strips multiplicity + // Used for MAX aggregation where we want to aggregate per (line, base, copy_id) + uint64_t OffsetWithCopyID() const { +#if defined(HAVE_LLVM) + // LLVM doesn't use two-pass aggregation, so just return regular offset. + // This should never be called in practice (use_two_pass_aggregation=false for LLVM). + return Offset(false); // Use full discriminator +#else + // Strip only multiplicity bits, keep base and copy_id + uint32_t disc = StripMultiplicity(discriminator); + return (static_cast(line - start_line) << 32) | disc; #endif } @@ -59,7 +81,9 @@ struct SourceInfo { return llvm::DILocation::getDuplicationFactorFromDiscriminator( discriminator); #else - return 1; + // Only extract multiplicity if discriminator is non-zero + if (discriminator == 0) return 1; + return GetMultiplicity(discriminator); #endif } diff --git a/symbol_map.cc b/symbol_map.cc index 2483835..74fefcd 100644 --- a/symbol_map.cc +++ b/symbol_map.cc @@ -56,6 +56,10 @@ ABSL_FLAG(bool, demangle_symbol_names, false, ABSL_FLAG(bool, use_discriminator_encoding, false, "Tell the symbol map that the discriminator encoding is enabled in " "the profile."); +ABSL_FLAG(bool, use_two_pass_aggregation, false, + "Enable two-pass aggregation for copy_id: Pass 1 does MAX per " + "(line, base, copy_id), Pass 2 does SUM after stripping copy_id. " + "Should be enabled only for create_gcov with gcov_version=3."); ABSL_FLAG(bool, use_discriminator_multiply_factor, true, "Tell the symbol map whether to use discriminator multiply factors."); #if defined(HAVE_LLVM) @@ -162,6 +166,43 @@ void Symbol::Merge(const Symbol *other) { } } +void Symbol::CollapseCopyIDs() { + // Pass 2: Strip copy_id from discriminators and SUM counts + // Aggregates across different copy_ids to get total execution count + + PositionCountMap new_pos_counts; + + for (const auto &pos_count : pos_counts) { + uint64_t old_offset = pos_count.first; + + // Extract line and discriminator using helpers + uint32_t line = SourceInfo::GetLineNumberFromOffset(old_offset); + uint32_t discriminator = SourceInfo::GetDiscriminatorFromOffset(old_offset); + + // Strip copy_id and multiplicity, keep only base discriminator +#if defined(HAVE_LLVM) + uint32_t base = llvm::DILocation::getBaseDiscriminatorFromDiscriminator( + discriminator); +#else + uint32_t base = GetBaseDiscriminator(discriminator); +#endif + + // Create new offset with only base discriminator + uint64_t new_offset = SourceInfo::GenerateOffset(line, base); + + // SUM counts from different copy_ids + new_pos_counts[new_offset] += pos_count.second; + } + + pos_counts = std::move(new_pos_counts); + + // Recursively collapse copy_ids in all callsites + for (auto &callsite_symbol : callsites) { + if (callsite_symbol.second) { + callsite_symbol.second->CollapseCopyIDs(); + } + } +} void Symbol::EstimateHeadCount() { if (head_count != 0) return; @@ -340,6 +381,15 @@ void SymbolMap::ElideSuffixesAndMerge() { } } +void SymbolMap::CollapseCopyIDs() { + // Pass 2: Collapse copy_ids for all symbols + for (auto &name_symbol : map_) { + if (name_symbol.second) { + name_symbol.second->CollapseCopyIDs(); + } + } +} + void SymbolMap::AddSymbol(absl::string_view name) { std::pair ret = map_.insert(NameSymbolMap::value_type(name, nullptr)); @@ -557,17 +607,33 @@ void SymbolMap::AddSourceCount(absl::string_view symbol_name, count *= duplication; Symbol *symbol = TraverseInlineStack(symbol_name, src, count, data_source); if (!symbol) return; + bool need_conversion = (data_source == PERFDATA || data_source == AFDOPROTO); if (need_conversion && src[0].HasInvalidInfo()) return; - uint64_t offset = src[0].Offset(use_discriminator_encoding); - // If it is to convert perf data or afdoproto to afdo profile, select the - // MAX count if there are multiple records mapping to the same offset. - // If it is just to read afdo profile, merge those counts. + + // Two-pass aggregation for discriminator encoding: + // Pass 1 (here): MAX per (line, base, copy_id) - keep copy_id in offset + // Pass 2 (before write): SUM across copy_ids - strip copy_id + bool use_two_pass = absl::GetFlag(FLAGS_use_two_pass_aggregation); + + // Offset calculation: keep copy_id only for two-pass aggregation + uint64_t offset; + if (use_two_pass) { + // Pass 1: Keep copy_id for MAX aggregation per copy + offset = src[0].OffsetWithCopyID(); + } else { + // Legacy mode or profile reading: strip copy_id + offset = src[0].Offset(use_discriminator_encoding); + } + + // Aggregation method: MAX for raw data conversion, SUM for profile merging if (need_conversion) { + // MAX aggregation for raw perf data (handles duplicate samples) if (count > symbol->pos_counts[offset].count) { symbol->pos_counts[offset].count = count; } } else { + // SUM aggregation for profile merging symbol->pos_counts[offset].count += count; } symbol->pos_counts[offset].num_inst += num_inst; @@ -579,13 +645,22 @@ bool SymbolMap::AddIndirectCallTarget(absl::string_view symbol_name, DataSource data_source) { bool use_discriminator_encoding = absl::GetFlag(FLAGS_use_discriminator_encoding); + bool use_two_pass = absl::GetFlag(FLAGS_use_two_pass_aggregation); Symbol *symbol = TraverseInlineStack(symbol_name, src, 0, data_source); if (!symbol) return false; if ((data_source == PERFDATA || data_source == AFDOPROTO) && src[0].HasInvalidInfo()) return false; - symbol->pos_counts[src[0].Offset(use_discriminator_encoding)] - .target_map[GetOriginalName(target)] = count; + + // Use same offset calculation as AddSourceCount for consistency + uint64_t offset; + if (use_two_pass) { + offset = src[0].OffsetWithCopyID(); // Keep copy_id during Pass 1 + } else { + offset = src[0].Offset(use_discriminator_encoding); // Strip copy_id + } + + symbol->pos_counts[offset].target_map[GetOriginalName(target)] = count; return true; } diff --git a/symbol_map.h b/symbol_map.h index 9e47211..d0b3da8 100644 --- a/symbol_map.h +++ b/symbol_map.h @@ -36,6 +36,11 @@ // Whether to use discriminator encoding. ABSL_DECLARE_FLAG(bool, use_discriminator_encoding); +// Whether to use two-pass aggregation for copy_id: +// Pass 1: MAX per (line, base, copy_id) +// Pass 2: SUM after stripping copy_id +ABSL_DECLARE_FLAG(bool, use_two_pass_aggregation); + #if defined(HAVE_LLVM) // Whether to use FS discriminator. ABSL_DECLARE_FLAG(bool, use_fs_discriminator); @@ -189,6 +194,10 @@ class Symbol { // Merges profile stored in src symbol with this symbol. void Merge(const Symbol *src); + // Pass 2: Strip copy_id and SUM across different copies. + // Called before writing profile to collapse discriminators. + void CollapseCopyIDs(); + // Get an estimation of head count from the starting source or callsite // locations. void EstimateHeadCount(); @@ -416,6 +425,10 @@ class SymbolMap { // profile data. void ElideSuffixesAndMerge(); + // Pass 2: Strip copy_id and SUM across all copies. + // Called before writing profile to get final aggregated counts. + void CollapseCopyIDs(); + // Increments symbol's entry count. void AddSymbolEntryCount(absl::string_view symbol, uint64_t head_count, uint64_t total_count = 0);