Skip to content

Commit bccdd39

Browse files
committed
gc.c: Fix a race condition in object_id for shareable objects
If an object is shareable and has no capacity left, it isn't safe to store the object ID in fields as it requires an object resize which can't be done unless all field reads are synchronized. So in this case we have to store the ID externally like we used to.
1 parent 7c22330 commit bccdd39

5 files changed

Lines changed: 233 additions & 35 deletions

File tree

gc.c

Lines changed: 133 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1853,6 +1853,61 @@ static const rb_data_type_t id2ref_tbl_type = {
18531853

18541854
#define RUBY_ATOMIC_VALUE_LOAD(x) (VALUE)(RUBY_ATOMIC_PTR_LOAD(x))
18551855

1856+
static VALUE obj_to_id_value = 0;
1857+
static st_table *obj_to_id_tbl = NULL;
1858+
1859+
static void mark_hash_values(st_table *tbl);
1860+
1861+
static void
1862+
obj_to_id_tbl_mark(void *data)
1863+
{
1864+
st_table *table = (st_table *)data;
1865+
if (UNLIKELY(!RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1866+
// It's very unlikely, but if enough object ids were generated, keys may be T_BIGNUM
1867+
mark_hash_values(table);
1868+
}
1869+
// We purposedly don't mark keys, as they are weak references.
1870+
// rb_gc_obj_free_vm_weak_references takes care of cleaning them up.
1871+
}
1872+
1873+
static size_t
1874+
obj_to_id_tbl_memsize(const void *data)
1875+
{
1876+
return rb_st_memsize(data);
1877+
}
1878+
1879+
static void
1880+
obj_to_id_tbl_compact(void *data)
1881+
{
1882+
st_table *table = (st_table *)data;
1883+
if (LIKELY(RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1884+
// We know values are all FIXNUM, so no need to update them.
1885+
gc_ref_update_table_keys_only(table);
1886+
}
1887+
else {
1888+
gc_update_table_refs(table);
1889+
}
1890+
}
1891+
1892+
static void
1893+
obj_to_id_tbl_free(void *data)
1894+
{
1895+
obj_to_id_tbl = NULL; // clear global ref
1896+
st_table *table = (st_table *)data;
1897+
st_free_table(table);
1898+
}
1899+
1900+
static const rb_data_type_t obj_to_id_tbl_type = {
1901+
.wrap_struct_name = "VM/obj_to_id_table",
1902+
.function = {
1903+
.dmark = obj_to_id_tbl_mark,
1904+
.dfree = obj_to_id_tbl_free,
1905+
.dsize = obj_to_id_tbl_memsize,
1906+
.dcompact = obj_to_id_tbl_compact,
1907+
},
1908+
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
1909+
};
1910+
18561911
static VALUE
18571912
class_object_id(VALUE klass)
18581913
{
@@ -1876,11 +1931,23 @@ static inline VALUE
18761931
object_id_get(VALUE obj, shape_id_t shape_id)
18771932
{
18781933
VALUE id;
1879-
if (rb_shape_too_complex_p(shape_id)) {
1880-
id = rb_obj_field_get(obj, ROOT_TOO_COMPLEX_WITH_OBJ_ID);
1881-
}
1882-
else {
1883-
id = rb_obj_field_get(obj, rb_shape_object_id(shape_id));
1934+
1935+
switch (rb_shape_has_object_id(shape_id)) {
1936+
case SHAPE_NO_OBJ_ID:
1937+
rb_bug("Unreacheable");
1938+
break;
1939+
case SHAPE_INLINE_OBJ_ID:
1940+
if (rb_shape_too_complex_p(shape_id)) {
1941+
id = rb_obj_field_get(obj, ROOT_TOO_COMPLEX_WITH_OBJ_ID);
1942+
}
1943+
else {
1944+
id = rb_obj_field_get(obj, rb_shape_object_id_inline(shape_id));
1945+
}
1946+
break;
1947+
case SHAPE_EXTERNAL_OBJ_ID:
1948+
RUBY_ASSERT(obj_to_id_tbl);
1949+
st_lookup(obj_to_id_tbl, obj, &id);
1950+
break;
18841951
}
18851952

18861953
#if RUBY_DEBUG
@@ -1894,7 +1961,7 @@ object_id_get(VALUE obj, shape_id_t shape_id)
18941961
}
18951962

18961963
static VALUE
1897-
object_id0(VALUE obj)
1964+
object_id0(VALUE obj, bool shareable)
18981965
{
18991966
VALUE id = Qfalse;
19001967
shape_id_t shape_id = RBASIC_SHAPE_ID(obj);
@@ -1903,14 +1970,26 @@ object_id0(VALUE obj)
19031970
return object_id_get(obj, shape_id);
19041971
}
19051972

1906-
// rb_shape_object_id_shape may lock if the current shape has
1907-
// multiple children.
1908-
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1909-
19101973
id = generate_next_object_id();
1911-
rb_obj_field_set(obj, object_id_shape_id, id);
19121974

1913-
RUBY_ASSERT(RBASIC_SHAPE_ID(obj) == object_id_shape_id);
1975+
attr_index_t capacity = RSHAPE_CAPACITY(shape_id);
1976+
attr_index_t free_capacity = capacity - RSHAPE_LEN(shape_id);
1977+
if (shareable && capacity && !free_capacity) {
1978+
// The object is shared and has no free capacity, we can't
1979+
// safely store the object_id inline.
1980+
shape_id_t next_shape_id = rb_shape_transition_object_id_external(obj);
1981+
if (RB_UNLIKELY(!obj_to_id_tbl)) {
1982+
obj_to_id_tbl = st_init_numtable();
1983+
obj_to_id_value = TypedData_Wrap_Struct(0, &obj_to_id_tbl_type, obj_to_id_tbl);
1984+
}
1985+
st_insert(obj_to_id_tbl, obj, id);
1986+
RBASIC_SET_SHAPE_ID(obj, next_shape_id);
1987+
}
1988+
else {
1989+
shape_id_t next_shape_id = rb_shape_transition_object_id_inline(obj);
1990+
rb_obj_field_set(obj, next_shape_id, id);
1991+
}
1992+
19141993
RUBY_ASSERT(rb_shape_obj_has_id(obj));
19151994

19161995
if (RB_UNLIKELY(id2ref_tbl)) {
@@ -1936,14 +2015,14 @@ object_id(VALUE obj)
19362015
break;
19372016
}
19382017

1939-
if (UNLIKELY(rb_gc_multi_ractor_p() && rb_ractor_shareable_p(obj))) {
2018+
if (UNLIKELY(rb_gc_multi_ractor_p() && RB_OBJ_SHAREABLE_P(obj))) {
19402019
unsigned int lock_lev = RB_GC_VM_LOCK();
1941-
VALUE id = object_id0(obj);
2020+
VALUE id = object_id0(obj, true);
19422021
RB_GC_VM_UNLOCK(lock_lev);
19432022
return id;
19442023
}
19452024

1946-
return object_id0(obj);
2025+
return object_id0(obj, false);
19472026
}
19482027

19492028
static void
@@ -2045,8 +2124,18 @@ obj_free_object_id(VALUE obj)
20452124
break;
20462125
default: {
20472126
shape_id_t shape_id = RBASIC_SHAPE_ID(obj);
2048-
if (rb_shape_has_object_id(shape_id)) {
2127+
switch (rb_shape_has_object_id(shape_id)) {
2128+
case SHAPE_NO_OBJ_ID:
2129+
break;
2130+
case SHAPE_INLINE_OBJ_ID:
20492131
obj_id = object_id_get(obj, shape_id);
2132+
break;
2133+
case SHAPE_EXTERNAL_OBJ_ID:
2134+
// During shutdown the `obj_to_id_tbl` may be freed first.
2135+
if (obj_to_id_tbl) {
2136+
st_delete(obj_to_id_tbl, &obj, &obj_id);
2137+
}
2138+
break;
20502139
}
20512140
break;
20522141
}
@@ -2739,6 +2828,22 @@ rb_mark_set(st_table *tbl)
27392828
st_foreach(tbl, mark_key, (st_data_t)rb_gc_get_objspace());
27402829
}
27412830

2831+
static int
2832+
mark_value(st_data_t key, st_data_t value, st_data_t data)
2833+
{
2834+
gc_mark_internal((VALUE)value);
2835+
2836+
return ST_CONTINUE;
2837+
}
2838+
2839+
static void
2840+
mark_hash_values(st_table *tbl)
2841+
{
2842+
if (!tbl) return;
2843+
2844+
st_foreach(tbl, mark_value, 0);
2845+
}
2846+
27422847
static int
27432848
mark_keyvalue(st_data_t key, st_data_t value, st_data_t data)
27442849
{
@@ -4137,6 +4242,17 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback,
41374242
}
41384243
break;
41394244
}
4245+
case RB_GC_VM_OBJ_TO_ID_TABLE: {
4246+
if (obj_to_id_tbl) {
4247+
st_foreach_with_replace(
4248+
obj_to_id_tbl,
4249+
vm_weak_table_foreach_weak_key,
4250+
vm_weak_table_foreach_update_weak_key,
4251+
(st_data_t)&foreach_data
4252+
);
4253+
}
4254+
break;
4255+
}
41404256
case RB_GC_VM_GENERIC_FIELDS_TABLE: {
41414257
st_table *generic_fields_tbl = rb_generic_fields_tbl_get();
41424258
if (generic_fields_tbl) {
@@ -5543,6 +5659,7 @@ Init_GC(void)
55435659
{
55445660
#undef rb_intern
55455661
rb_gc_register_address(&id2ref_value);
5662+
rb_gc_register_address(&obj_to_id_value);
55465663

55475664
malloc_offset = gc_compute_malloc_offset();
55485665

gc/gc.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ enum rb_gc_vm_weak_tables {
2929
RB_GC_VM_OVERLOADED_CME_TABLE,
3030
RB_GC_VM_GLOBAL_SYMBOLS_TABLE,
3131
RB_GC_VM_ID2REF_TABLE,
32+
RB_GC_VM_OBJ_TO_ID_TABLE,
3233
RB_GC_VM_GENERIC_FIELDS_TABLE,
3334
RB_GC_VM_FROZEN_STRINGS_TABLE,
3435
RB_GC_VM_CC_REFINEMENT_TABLE,
@@ -160,6 +161,34 @@ gc_ref_update_table_values_only(st_table *tbl)
160161
}
161162
}
162163

164+
static int
165+
hash_foreach_replace_key(st_data_t key, st_data_t value, st_data_t argp, int error)
166+
{
167+
if (rb_gc_location((VALUE)key) != (VALUE)key) {
168+
return ST_REPLACE;
169+
}
170+
return ST_CONTINUE;
171+
}
172+
173+
static int
174+
hash_replace_ref_key(st_data_t *key, st_data_t *value, st_data_t argp, int existing)
175+
{
176+
*key = rb_gc_location((VALUE)*key);
177+
178+
return ST_CONTINUE;
179+
}
180+
181+
static void
182+
gc_ref_update_table_keys_only(st_table *tbl)
183+
{
184+
if (!tbl || tbl->num_entries == 0) return;
185+
186+
// FIXME: this certainly isn't correct. If a key moved, we need to re-hash.
187+
if (st_foreach_with_replace(tbl, hash_foreach_replace_key, hash_replace_ref_key, 0)) {
188+
rb_raise(rb_eRuntimeError, "hash modified during iteration");
189+
}
190+
}
191+
163192
static int
164193
gc_mark_tbl_no_pin_i(st_data_t key, st_data_t value, st_data_t data)
165194
{

shape.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ remove_shape_recursive(rb_shape_t *shape, ID id, rb_shape_t **removed_shape)
709709
static inline shape_id_t transition_complex(shape_id_t shape_id);
710710

711711
static shape_id_t
712-
shape_transition_object_id(shape_id_t original_shape_id)
712+
shape_transition_object_id_inline(shape_id_t original_shape_id)
713713
{
714714
RUBY_ASSERT(!rb_shape_has_object_id(original_shape_id));
715715

@@ -720,17 +720,25 @@ shape_transition_object_id(shape_id_t original_shape_id)
720720
}
721721

722722
RUBY_ASSERT(shape);
723-
return shape_id(shape, original_shape_id) | SHAPE_ID_FL_HAS_OBJECT_ID;
723+
return shape_id(shape, original_shape_id) | SHAPE_ID_FL_OBJ_ID_INLINE;
724724
}
725725

726726
shape_id_t
727-
rb_shape_transition_object_id(VALUE obj)
727+
rb_shape_transition_object_id_inline(VALUE obj)
728728
{
729-
return shape_transition_object_id(RBASIC_SHAPE_ID(obj));
729+
return shape_transition_object_id_inline(RBASIC_SHAPE_ID(obj));
730730
}
731731

732732
shape_id_t
733-
rb_shape_object_id(shape_id_t original_shape_id)
733+
rb_shape_transition_object_id_external(VALUE obj)
734+
{
735+
shape_id_t original_shape_id = RBASIC_SHAPE_ID(obj);
736+
RUBY_ASSERT(!rb_shape_has_object_id(original_shape_id));
737+
return original_shape_id | SHAPE_ID_FL_OBJ_ID_EXTERNAL;
738+
}
739+
740+
shape_id_t
741+
rb_shape_object_id_inline(shape_id_t original_shape_id)
734742
{
735743
RUBY_ASSERT(rb_shape_has_object_id(original_shape_id));
736744

@@ -742,7 +750,7 @@ rb_shape_object_id(shape_id_t original_shape_id)
742750
shape = RSHAPE(shape->parent_id);
743751
}
744752

745-
return shape_id(shape, original_shape_id) | SHAPE_ID_FL_HAS_OBJECT_ID;
753+
return shape_id(shape, original_shape_id) | SHAPE_ID_FL_OBJ_ID_INLINE;
746754
}
747755

748756
static inline shape_id_t
@@ -754,7 +762,7 @@ transition_complex(shape_id_t shape_id)
754762
if (heap_index) {
755763
next_shape_id = rb_shape_root(heap_index - 1) | SHAPE_ID_FL_TOO_COMPLEX;
756764
if (rb_shape_has_object_id(shape_id)) {
757-
next_shape_id = shape_transition_object_id(next_shape_id);
765+
next_shape_id = shape_transition_object_id_inline(next_shape_id);
758766
}
759767
}
760768
else {
@@ -1104,7 +1112,7 @@ rb_shape_rebuild(shape_id_t initial_shape_id, shape_id_t dest_shape_id)
11041112
return shape_id(next_shape, initial_shape_id);
11051113
}
11061114
else {
1107-
return transition_complex(initial_shape_id | (dest_shape_id & SHAPE_ID_FL_HAS_OBJECT_ID));
1115+
return transition_complex(initial_shape_id | (dest_shape_id & SHAPE_ID_FL_OBJ_ID_INLINE));
11081116
}
11091117
}
11101118

@@ -1223,7 +1231,7 @@ rb_shape_verify_consistency(VALUE obj, shape_id_t shape_id)
12231231
shape = RSHAPE(shape->parent_id);
12241232
}
12251233

1226-
if (rb_shape_has_object_id(shape_id)) {
1234+
if (rb_shape_has_object_id(shape_id) == SHAPE_INLINE_OBJ_ID) {
12271235
if (!has_object_id) {
12281236
rb_p(obj);
12291237
rb_bug("shape_id claim having obj_id but doesn't shape_id=%u, obj=%s", shape_id, rb_obj_info(obj));

0 commit comments

Comments
 (0)