Skip to content

Commit 2609ee6

Browse files
committed
Add string cache and use it in cooked index
The cooked index needs to allocate names in some cases -- when canonicalizing or when synthesizing Ada package names. This process currently uses a vector of unique_ptrs to manage the memory. Another series I'm writing adds another spot where this allocation must be done, and examining the result showed that certain names were allocated multiple times. To clean this up, this patch introduces a string cache object and changes the cooked indexer to use it. I considered using bcache here, but bcache doesn't work as nicely with string_view -- because bcache is fundamentally memory-based, a temporary copy of the contents must be made to ensure that bcache can see the trailing \0. Furthermore, writing a custom class lets us avoid another copy when canonicalizing C++ names. Approved-By: Simon Marchi <[email protected]>
1 parent c19c928 commit 2609ee6

File tree

3 files changed

+135
-13
lines changed

3 files changed

+135
-13
lines changed

gdb/dwarf2/cooked-index.c

+4-12
Original file line numberDiff line numberDiff line change
@@ -374,13 +374,11 @@ cooked_index_shard::handle_gnat_encoded_entry
374374
cooked_index_entry *last = (cooked_index_entry *) *slot;
375375
if (last == nullptr || last->per_cu != entry->per_cu)
376376
{
377-
gdb::unique_xmalloc_ptr<char> new_name
378-
= make_unique_xstrndup (name.data (), name.length ());
377+
const char *new_name = m_names.insert (name);
379378
last = create (entry->die_offset, DW_TAG_module,
380-
IS_SYNTHESIZED, language_ada, new_name.get (), parent,
379+
IS_SYNTHESIZED, language_ada, new_name, parent,
381380
entry->per_cu);
382381
last->canonical = last->name;
383-
m_names.push_back (std::move (new_name));
384382
new_entries.push_back (last);
385383
*slot = last;
386384
}
@@ -389,9 +387,7 @@ cooked_index_shard::handle_gnat_encoded_entry
389387
}
390388

391389
entry->set_parent (parent);
392-
auto new_canon = make_unique_xstrndup (tail.data (), tail.length ());
393-
entry->canonical = new_canon.get ();
394-
m_names.push_back (std::move (new_canon));
390+
entry->canonical = m_names.insert (tail);
395391
}
396392

397393
/* See cooked-index.h. */
@@ -509,10 +505,7 @@ cooked_index_shard::finalize (const parent_map_map *parent_maps)
509505
if (canon_name == nullptr)
510506
entry->canonical = entry->name;
511507
else
512-
{
513-
entry->canonical = canon_name.get ();
514-
m_names.push_back (std::move (canon_name));
515-
}
508+
entry->canonical = m_names.insert (std::move (canon_name));
516509
*slot = entry;
517510
}
518511
else
@@ -532,7 +525,6 @@ cooked_index_shard::finalize (const parent_map_map *parent_maps)
532525
m_entries.insert (m_entries.end (), new_gnat_entries.begin (),
533526
new_gnat_entries.end ());
534527

535-
m_names.shrink_to_fit ();
536528
m_entries.shrink_to_fit ();
537529
std::sort (m_entries.begin (), m_entries.end (),
538530
[] (const cooked_index_entry *a, const cooked_index_entry *b)

gdb/dwarf2/cooked-index.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "dwarf2/read.h"
3333
#include "dwarf2/parent-map.h"
3434
#include "gdbsupport/range-chain.h"
35+
#include "gdbsupport/string-set.h"
3536
#include "complaints.h"
3637

3738
#if CXX_STD_THREAD
@@ -381,7 +382,7 @@ class cooked_index_shard
381382
/* The addrmap. This maps address ranges to dwarf2_per_cu objects. */
382383
addrmap_fixed *m_addrmap = nullptr;
383384
/* Storage for canonical names. */
384-
std::vector<gdb::unique_xmalloc_ptr<char>> m_names;
385+
gdb::string_set m_names;
385386
};
386387

387388
using cooked_index_shard_up = std::unique_ptr<cooked_index_shard>;

gdbsupport/string-set.h

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/* String-interning set
2+
3+
Copyright (C) 2025 Free Software Foundation, Inc.
4+
5+
This file is part of GDB.
6+
7+
This program is free software; you can redistribute it and/or modify
8+
it under the terms of the GNU General Public License as published by
9+
the Free Software Foundation; either version 3 of the License, or
10+
(at your option) any later version.
11+
12+
This program is distributed in the hope that it will be useful,
13+
but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
GNU General Public License for more details.
16+
17+
You should have received a copy of the GNU General Public License
18+
along with this program. If not, see <http://www.gnu.org/licenses/>. */
19+
20+
#ifndef GDBSUPPORT_STRING_SET_H
21+
#define GDBSUPPORT_STRING_SET_H
22+
23+
#include "gdbsupport/unordered_set.h"
24+
#include <string_view>
25+
26+
namespace gdb
27+
{
28+
29+
/* This is a string-interning set. It manages storage for strings,
30+
ensuring that just a single copy of a given string is kept. The
31+
underlying C string will remain valid for the lifetime of this
32+
object. */
33+
34+
class string_set
35+
{
36+
public:
37+
38+
/* Insert STR into this set. Returns a pointer to the interned
39+
string. */
40+
const char *insert (const char *str)
41+
{
42+
/* We need to take the length to hash the string anyway, so it's
43+
convenient to just wrap it here. */
44+
return insert (std::string_view (str));
45+
}
46+
47+
/* An overload accepting a string view. */
48+
const char *insert (std::string_view str)
49+
{
50+
return m_set.insert (str).first->get ();
51+
}
52+
53+
/* An overload that takes ownership of the string. */
54+
const char *insert (gdb::unique_xmalloc_ptr<char> str)
55+
{
56+
return m_set.insert (local_string (std::move (str))).first->get ();
57+
}
58+
59+
private:
60+
61+
/* The type of string we store. Note that we do not store
62+
std::string here to avoid the small-string optimization
63+
invalidating a pointer on rehash. */
64+
struct local_string
65+
{
66+
explicit local_string (std::string_view str)
67+
: contents (xstrndup (str.data (), str.size ())),
68+
len (str.size ())
69+
{ }
70+
71+
explicit local_string (gdb::unique_xmalloc_ptr<char> str)
72+
: contents (std::move (str)),
73+
len (strlen (contents.get ()))
74+
{ }
75+
76+
const char *get () const
77+
{ return contents.get (); }
78+
79+
std::string_view as_view () const
80+
{ return std::string_view (contents.get (), len); }
81+
82+
/* \0-terminated string contents. */
83+
gdb::unique_xmalloc_ptr<char> contents;
84+
/* Length of the string. */
85+
size_t len;
86+
};
87+
88+
/* Equality object for the set. */
89+
struct str_eq
90+
{
91+
using is_transparent = void;
92+
93+
bool operator() (std::string_view lhs, const local_string &rhs)
94+
const noexcept
95+
{
96+
return lhs == rhs.as_view ();
97+
}
98+
99+
bool operator() (const local_string &lhs, const local_string &rhs)
100+
const noexcept
101+
{
102+
return (*this) (lhs.as_view (), rhs);
103+
}
104+
};
105+
106+
/* Hash object for the set. */
107+
struct str_hash
108+
{
109+
using is_transparent = void;
110+
using is_avalanching = void;
111+
112+
bool operator() (const local_string &rhs) const noexcept
113+
{
114+
return (*this) (rhs.as_view ());
115+
}
116+
117+
bool operator() (std::string_view rhs) const noexcept
118+
{
119+
return ankerl::unordered_dense::hash<std::string_view> () (rhs);
120+
}
121+
};
122+
123+
/* The strings. */
124+
gdb::unordered_set<local_string, str_hash, str_eq> m_set;
125+
};
126+
127+
}
128+
129+
#endif /* GDBSUPPORT_STRING_SET_H */

0 commit comments

Comments
 (0)