Mitigation for bug that results in reuse of inlinee IDs when functions share names

This commit is contained in:
Mason Remaley
2026-04-11 22:59:52 -07:00
parent 541bd6c369
commit 312ef9558b
2 changed files with 128 additions and 59 deletions
+87 -25
View File
@@ -26,6 +26,10 @@ pub const Module = struct {
symbols: []u8,
subsect_info: []u8,
checksum_offset: ?usize,
/// The inlinee source lines, sorted by inlinee. This saves us from repeatedly doing linear
/// searches over all inlinees. We prefer binary search over a hashmap as LLVM somtimes outputs
/// multiple entries for a single inlinee ID, see `getInlineeSourceLines` for more info.
inlinee_source_lines: []InlineeSourceLine,
pub fn deinit(self: *Module, allocator: Allocator) void {
allocator.free(self.module_name);
@@ -33,6 +37,7 @@ pub const Module = struct {
if (self.populated) {
allocator.free(self.symbols);
allocator.free(self.subsect_info);
allocator.free(self.inlinee_source_lines);
}
}
};
@@ -117,6 +122,7 @@ pub fn parseDbiStream(self: *Pdb) !void {
.symbols = undefined,
.subsect_info = undefined,
.checksum_offset = null,
.inlinee_source_lines = undefined,
});
mod_info_offset += this_record_len;
@@ -657,40 +663,58 @@ pub fn getSymbolName(self: *Pdb, proc_sym: *align(1) const pdb.ProcSym) []const
pub const InlineeSourceLine = struct {
signature: pdb.InlineeSourceLineSignature,
info: *align(1) const pdb.InlineeSourceLine,
fn lessThan(_: void, lhs: InlineeSourceLine, rhs: InlineeSourceLine) bool {
return lhs.info.inlinee < rhs.info.inlinee;
}
fn compare(inlinee: u32, self: InlineeSourceLine) std.math.Order {
return std.math.order(inlinee, self.info.inlinee);
}
};
pub fn getInlineeSourceLine(
/// Returns all `InlineeSourceLine`s for a given module with the given inlinee. Ideally there would
/// only be one entry per inlinee, but LLVM appears to assign all functions that share a name the
/// same inlinee ID. This appears to be a bug, so the best the caller can do right now is print all
/// the results.
pub fn getInlineeSourceLines(
self: *Pdb,
mod: *Module,
inlinee: u32,
) ?InlineeSourceLine {
) []const InlineeSourceLine {
_ = self;
var subsects: Io.Reader = .fixed(mod.subsect_info);
while (subsects.takeStructPointer(pdb.DebugSubsectionHeader) catch null) |subsect_hdr| {
var subsect: Io.Reader = .fixed(subsects.take(subsect_hdr.length) catch return null);
if (subsect_hdr.kind == .inlinee_lines) {
const signature = subsect.takeEnum(pdb.InlineeSourceLineSignature, .little) catch return null;
const has_extra_files = switch (signature) {
.normal => false,
.ex => true,
else => continue,
};
while (subsect.takeStructPointer(pdb.InlineeSourceLine) catch null) |inlinee_src_line| {
if (has_extra_files) {
const file_count = subsect.takeInt(u32, .little) catch return null;
const file_bytes = std.math.mul(usize, file_count, @sizeOf(u32)) catch return null;
subsect.discardAll(file_bytes) catch return null;
}
// Binary search to an arbitrary match, if there are other matches they will be adjacent
const any = std.sort.binarySearch(
InlineeSourceLine,
mod.inlinee_source_lines,
inlinee,
InlineeSourceLine.compare,
) orelse return &.{};
if (inlinee_src_line.inlinee == inlinee) return .{
.signature = signature,
.info = inlinee_src_line,
};
}
// Linearly scan to the first match
const begin = b: {
var begin = any;
while (begin > 0) {
const prev = begin - 1;
if (mod.inlinee_source_lines[prev].info.inlinee != inlinee) break;
begin = prev;
}
}
return null;
break :b begin;
};
// Linearly scan to the last match
const end = b: {
var end = any + 1;
while (
end < mod.inlinee_source_lines.len and
mod.inlinee_source_lines[end].info.inlinee == inlinee
) : (end += 1) {}
break :b end;
};
// Return a slice of all the matches
return mod.inlinee_source_lines[begin..end];
}
pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !std.debug.SourceLocation {
@@ -810,7 +834,45 @@ pub fn getModule(self: *Pdb, index: usize) !?*Module {
const gpa = self.allocator;
mod.symbols = try reader.readAlloc(gpa, mod.mod_info.sym_byte_size - 4);
errdefer gpa.free(mod.symbols);
mod.subsect_info = try reader.readAlloc(gpa, mod.mod_info.c13_byte_size);
errdefer gpa.free(mod.subsect_info);
mod.inlinee_source_lines = b: {
var inlinee_source_lines: std.ArrayList(InlineeSourceLine) = .empty;
defer inlinee_source_lines.deinit(gpa);
var subsects: Io.Reader = .fixed(mod.subsect_info);
while (subsects.takeStructPointer(pdb.DebugSubsectionHeader) catch null) |subsect_hdr| {
var subsect: Io.Reader = .fixed(subsects.take(subsect_hdr.length) catch return null);
if (subsect_hdr.kind == .inlinee_lines) {
const inlinee_source_line_signature = subsect.takeEnum(pdb.InlineeSourceLineSignature, .little)
catch return error.InvalidDebugInfo;
const has_extra_files = switch (inlinee_source_line_signature) {
.normal => false,
.ex => true,
else => continue,
};
while (subsect.takeStructPointer(pdb.InlineeSourceLine) catch null) |info| {
if (has_extra_files) {
const file_count = subsect.takeInt(u32, .little) catch
return error.InvalidDebugInfo;
const file_bytes = std.math.mul(usize, file_count, @sizeOf(u32))
catch return error.InvalidDebugInfo;
subsect.discardAll(file_bytes) catch
return error.InvalidDebugInfo;
}
try inlinee_source_lines.append(gpa, .{
.signature = inlinee_source_line_signature,
.info = info,
});
}
}
}
std.mem.sort(InlineeSourceLine, inlinee_source_lines.items, {}, InlineeSourceLine.lessThan);
break :b try inlinee_source_lines.toOwnedSlice(gpa);
};
errdefer gpa.free(mod.inlinee_source_lines);
var sect_offset: usize = 0;
var skip_len: usize = undefined;
+41 -34
View File
@@ -286,43 +286,48 @@ const Module = struct {
var last_inlinee: ?u32 = null;
var iter = pdb.getInlinees(module, proc);
while (iter.next(module)) |inline_site| {
// If our address points into this site, get the source location it
// points at
const inlinee_src_line = pdb.getInlineeSourceLine(
module,
inline_site.inlinee,
) orelse continue;
const maybe_loc = pdb.getInlineSiteSourceLocation(
module,
inline_site,
inlinee_src_line.info,
offset_in_func,
) catch continue;
const loc = maybe_loc orelse continue;
// Filter out duplicate inline sites. Tools like llvm-addr2line output
// duplicate sites in the same cases as us if we elide this check,
// implying that they exist in the underlying data and are not
// indicative of a parser bug. No useful information is lost here since an
// inline site can't actually reference itself.
// implying that they exist in the underlying data and are not indicative
// of a parser bug. No useful information is lost here since an inline site
// can't actually reference itself.
if (inline_site.inlinee == last_inlinee) continue;
last_inlinee = inline_site.inlinee;
// If we're appending this symbol, resolve the name. If we're replacing the
// last symbol, clear the previous symbols and wait to resolve the name
// until we've reached the last symbol to avoid doing work and then
// throwing it out.
const name = b: {
if (resolve_inline_callers) break :b pdb.findInlineeName(inline_site.inlinee);
symbols.items.len = 0;
break :b null;
};
// If our address points into this site, get the source location(s) it
// points at
for (pdb.getInlineeSourceLines(
module,
inline_site.inlinee,
)) |inlinee_src_line| {
const maybe_loc = pdb.getInlineSiteSourceLocation(
module,
inline_site,
inlinee_src_line.info,
offset_in_func,
) catch continue;
const loc = maybe_loc orelse continue;
try symbols.append(gpa, .{
.name = name,
.compile_unit_name = compile_unit_name,
.source_location = loc,
});
// If we aren't trying to resolve inline callers, and we've matched a
// new inline site, we want to overwrite the previous results.
if (!resolve_inline_callers and inline_site.inlinee != last_inlinee) {
symbols.items.len = 0;
}
// Only resolve the name if we're resolving inline callers, otherwise
// wait until we're done to avoid duplicated work.
const name = if (resolve_inline_callers)
pdb.findInlineeName(inline_site.inlinee)
else
null;
try symbols.append(gpa, .{
.name = name,
.compile_unit_name = compile_unit_name,
.source_location = loc,
});
last_inlinee = inline_site.inlinee;
}
}
if (resolve_inline_callers) {
@@ -332,8 +337,10 @@ const Module = struct {
// complexity.
std.mem.reverse(std.debug.Symbol, symbols.items);
} else if (last_inlinee) |inlinee| {
// If we haven't resolved the name yet, resolve it now
symbols.items[symbols.items.len - 1].name = pdb.findInlineeName(inlinee);
// If we aren't resolving inline callers, then all results will have the
// same inline site, and we resolve its name once at the end.
const name = pdb.findInlineeName(inlinee);
for (symbols.items) |*symbol| symbol.name = name;
}
}