X Tutup
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
687e99f
Add debug_assert to invoke_exact_args, lazy func_version reassignment
youknowone Mar 2, 2026
81d307b
working
youknowone Mar 1, 2026
0176223
Add COMPARE_OP, TO_BOOL, FOR_ITER, LOAD_GLOBAL specialization
youknowone Mar 1, 2026
9bb0c46
Add BINARY_SUBSCR, CONTAINS_OP, UNPACK_SEQUENCE, STORE_ATTR specializ…
youknowone Mar 1, 2026
1c07777
Add STORE_SUBSCR, BinaryOpAddUnicode, ToBoolAlwaysTrue, CallLen, Call…
youknowone Mar 1, 2026
240f3ac
Add BinaryOpSubscrStrInt, CallStr1, CallTuple1 specialization
youknowone Mar 1, 2026
cadb9be
Add BinaryOpInplaceAddUnicode specialization
youknowone Mar 1, 2026
fd098fe
Add LoadAttrModule, CallBuiltinO, CallPyGeneral, CallBoundMethodGener…
youknowone Mar 2, 2026
dd29113
Add LoadAttrNondescriptor*, CallMethodDescriptor* specialization
youknowone Mar 2, 2026
b238a27
Add CallBuiltinFast, CallNonPyGeneral specialization
youknowone Mar 2, 2026
d950035
Add SendGen specialization for generator/coroutine send
youknowone Mar 2, 2026
32376d5
Add LoadAttrSlot, StoreAttrSlot specialization for __slots__ access
youknowone Mar 2, 2026
a7c179c
Add LoadSuperAttrAttr, LoadSuperAttrMethod, CallBuiltinClass, CallBui…
youknowone Mar 2, 2026
e1289f1
Add LoadAttrProperty specialization for property descriptor access
youknowone Mar 2, 2026
2350bc1
Add LoadAttrClass specialization for class attribute access
youknowone Mar 2, 2026
ba9d528
Add BinaryOpSubscrListSlice specialization
youknowone Mar 2, 2026
3c88368
Add CallKwPy, CallKwBoundMethod, CallKwNonPy specialization
youknowone Mar 2, 2026
ab6bbb6
Clean up comments in specialization code
youknowone Mar 2, 2026
48fd5c7
fix check_signals
youknowone Mar 2, 2026
51accdb
fix import
youknowone Mar 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions crates/vm/src/builtins/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ impl PyDict {
&self.entries
}

/// Monotonically increasing version for mutation tracking.
pub(crate) fn version(&self) -> u64 {
self.entries.version()
}

/// Returns all keys as a Vec, atomically under a single read lock.
/// Thread-safe: prevents "dictionary changed size during iteration" errors.
pub fn keys_vec(&self) -> Vec<PyObjectRef> {
Expand Down
36 changes: 35 additions & 1 deletion crates/vm/src/builtins/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ pub struct PyFunction {

static FUNC_VERSION_COUNTER: AtomicU32 = AtomicU32::new(1);

/// Atomically allocate the next function version, returning 0 if exhausted.
/// Once the counter wraps to 0, it stays at 0 permanently.
fn next_func_version() -> u32 {
FUNC_VERSION_COUNTER
.fetch_update(Relaxed, Relaxed, |v| (v != 0).then(|| v.wrapping_add(1)))
.unwrap_or(0)
}

unsafe impl Traverse for PyFunction {
fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) {
self.globals.traverse(tracer_fn);
Expand Down Expand Up @@ -204,7 +212,7 @@ impl PyFunction {
annotate: PyMutex::new(None),
module: PyMutex::new(module),
doc: PyMutex::new(doc),
func_version: AtomicU32::new(FUNC_VERSION_COUNTER.fetch_add(1, Relaxed)),
func_version: AtomicU32::new(next_func_version()),
#[cfg(feature = "jit")]
jitted_code: OnceCell::new(),
};
Expand Down Expand Up @@ -603,6 +611,22 @@ impl Py<PyFunction> {
self.func_version.load(Relaxed)
}

/// Returns the current version, assigning a fresh one if previously invalidated.
/// Returns 0 if the version counter has overflowed.
/// `_PyFunction_GetVersionForCurrentState`
pub fn get_version_for_current_state(&self) -> u32 {
let v = self.func_version.load(Relaxed);
if v != 0 {
return v;
}
let new_v = next_func_version();
if new_v == 0 {
return 0;
}
self.func_version.store(new_v, Relaxed);
new_v
}

/// Check if this function is eligible for exact-args call specialization.
/// Returns true if: no VARARGS, no VARKEYWORDS, no kwonly args, not generator/coroutine,
/// and effective_nargs matches co_argcount.
Expand All @@ -627,6 +651,16 @@ impl Py<PyFunction> {
pub fn invoke_exact_args(&self, args: &[PyObjectRef], vm: &VirtualMachine) -> PyResult {
let code: PyRef<PyCode> = (*self.code).to_owned();

debug_assert_eq!(args.len(), code.arg_count as usize);
debug_assert!(code.flags.contains(bytecode::CodeFlags::NEWLOCALS));
debug_assert!(!code.flags.intersects(
bytecode::CodeFlags::VARARGS
| bytecode::CodeFlags::VARKEYWORDS
| bytecode::CodeFlags::GENERATOR
| bytecode::CodeFlags::COROUTINE
));
debug_assert_eq!(code.kwonlyarg_count, 0);

let frame = Frame::new(
code.clone(),
Scope::new(None, self.globals.clone()),
Expand Down
17 changes: 17 additions & 0 deletions crates/vm/src/builtins/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,23 @@ impl PyListIterator {
}
}

impl PyListIterator {
/// Fast path for FOR_ITER specialization.
pub(crate) fn fast_next(&self) -> Option<PyObjectRef> {
self.internal
.lock()
.next(|list, pos| {
let vec = list.borrow_vec();
Ok(PyIterReturn::from_result(vec.get(pos).cloned().ok_or(None)))
})
.ok()
.and_then(|r| match r {
PyIterReturn::Return(v) => Some(v),
PyIterReturn::StopIteration(_) => None,
})
}
}

impl SelfIter for PyListIterator {}
impl IterNext for PyListIterator {
fn next(zelf: &Py<Self>, _vm: &VirtualMachine) -> PyResult<PyIterReturn> {
Expand Down
2 changes: 1 addition & 1 deletion crates/vm/src/builtins/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ impl PyBaseObject {
}

#[pyslot]
fn slot_setattro(
pub(crate) fn slot_setattro(
obj: &PyObject,
attr_name: &Py<PyStr>,
value: PySetterValue,
Expand Down
4 changes: 4 additions & 0 deletions crates/vm/src/builtins/property.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ impl PyProperty {
self.getter.read().clone()
}

pub(crate) fn get_fget(&self) -> Option<PyObjectRef> {
self.getter.read().clone()
}

#[pygetset]
fn fset(&self) -> Option<PyObjectRef> {
self.setter.read().clone()
Expand Down
38 changes: 16 additions & 22 deletions crates/vm/src/builtins/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,19 +613,6 @@ pub struct PyRangeIterator {
length: usize,
}

impl PyRangeIterator {
/// Advance and return next value without going through the iterator protocol.
#[inline]
pub(crate) fn next_fast(&self) -> Option<isize> {
let index = self.index.fetch_add(1);
if index < self.length {
Some(self.start + (index as isize) * self.step)
} else {
None
}
}
}

impl PyPayload for PyRangeIterator {
#[inline]
fn class(ctx: &Context) -> &'static Py<PyType> {
Expand Down Expand Up @@ -660,18 +647,25 @@ impl PyRangeIterator {
}
}

impl PyRangeIterator {
/// Fast path for FOR_ITER specialization. Returns the next isize value
/// without allocating PyInt or PyIterReturn.
pub(crate) fn fast_next(&self) -> Option<isize> {
let index = self.index.fetch_add(1);
if index < self.length {
Some(self.start + (index as isize) * self.step)
} else {
None
}
}
}

impl SelfIter for PyRangeIterator {}
impl IterNext for PyRangeIterator {
fn next(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<PyIterReturn> {
// TODO: In pathological case (index == usize::MAX) this can wrap around
// (since fetch_add wraps). This would result in the iterator spinning again
// from the beginning.
let index = zelf.index.fetch_add(1);
let r = if index < zelf.length {
let value = zelf.start + (index as isize) * zelf.step;
PyIterReturn::Return(vm.ctx.new_int(value).into())
} else {
PyIterReturn::StopIteration(None)
let r = match zelf.fast_next() {
Some(value) => PyIterReturn::Return(vm.ctx.new_int(value).into()),
None => PyIterReturn::StopIteration(None),
};
Ok(r)
}
Expand Down
18 changes: 18 additions & 0 deletions crates/vm/src/builtins/tuple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,24 @@ impl PyTupleIterator {
}
}

impl PyTupleIterator {
/// Fast path for FOR_ITER specialization.
pub(crate) fn fast_next(&self) -> Option<PyObjectRef> {
self.internal
.lock()
.next(|tuple, pos| {
Ok(PyIterReturn::from_result(
tuple.get(pos).cloned().ok_or(None),
))
})
.ok()
.and_then(|r| match r {
PyIterReturn::Return(v) => Some(v),
PyIterReturn::StopIteration(_) => None,
})
}
}

impl SelfIter for PyTupleIterator {}
impl IterNext for PyTupleIterator {
fn next(zelf: &Py<Self>, _vm: &VirtualMachine) -> PyResult<PyIterReturn> {
Expand Down
7 changes: 6 additions & 1 deletion crates/vm/src/builtins/type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ pub struct PyType {
pub tp_version_tag: AtomicU32,
}

/// Monotonic counter for type version tags. Once it reaches `u32::MAX`,
/// `assign_version_tag()` returns 0 permanently, disabling new inline-cache
/// entries but not invalidating correctness (cache misses fall back to the
/// generic path).
static NEXT_TYPE_VERSION: AtomicU32 = AtomicU32::new(1);

unsafe impl crate::object::Traverse for PyType {
Expand Down Expand Up @@ -199,7 +203,8 @@ fn is_subtype_with_mro(a_mro: &[PyTypeRef], a: &Py<PyType>, b: &Py<PyType>) -> b
}

impl PyType {
/// Assign a fresh version tag. Returns 0 on overflow (all caches invalidated).
/// Assign a fresh version tag. Returns 0 if the version counter has been
/// exhausted, in which case no new cache entries can be created.
pub fn assign_version_tag(&self) -> u32 {
loop {
let current = NEXT_TYPE_VERSION.load(Ordering::Relaxed);
Expand Down
25 changes: 24 additions & 1 deletion crates/vm/src/dict_inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ use crate::{
object::{Traverse, TraverseFn},
};
use alloc::fmt;
use core::{mem::size_of, ops::ControlFlow};
use core::mem::size_of;
use core::ops::ControlFlow;
use core::sync::atomic::{AtomicU64, Ordering::Relaxed};
use num_traits::ToPrimitive;

// HashIndex is intended to be same size with hash::PyHash
Expand All @@ -34,6 +36,7 @@ type EntryIndex = usize;

pub struct Dict<T = PyObjectRef> {
inner: PyRwLock<DictInner<T>>,
version: AtomicU64,
}

unsafe impl<T: Traverse> Traverse for Dict<T> {
Expand Down Expand Up @@ -98,6 +101,7 @@ impl<T: Clone> Clone for Dict<T> {
fn clone(&self) -> Self {
Self {
inner: PyRwLock::new(self.inner.read().clone()),
version: AtomicU64::new(0),
}
}
}
Expand All @@ -111,6 +115,7 @@ impl<T> Default for Dict<T> {
indices: vec![IndexEntry::FREE; 8],
entries: Vec::new(),
}),
version: AtomicU64::new(0),
}
}
}
Expand Down Expand Up @@ -254,6 +259,16 @@ impl<T> DictInner<T> {
type PopInnerResult<T> = ControlFlow<Option<DictEntry<T>>>;

impl<T: Clone> Dict<T> {
/// Monotonically increasing version counter for mutation tracking.
pub fn version(&self) -> u64 {
self.version.load(Relaxed)
}

/// Bump the version counter after any mutation.
fn bump_version(&self) {
self.version.fetch_add(1, Relaxed);
}
Comment on lines +263 to +270
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# 1) Show where version bumps happen relative to lock scopes.
rg -n -C4 'bump_version\(' crates/vm/src/dict_inner.rs

Repository: RustPython/RustPython

Length of output: 2663


🏁 Script executed:

# 2) Show consumers of dict version values (to validate whether reads are lock-free).
rg -n -C4 '\.version\(\)' crates/vm/src

Repository: RustPython/RustPython

Length of output: 3008


🏁 Script executed:

# 3) Get the full context around lines 263-270 and surrounding code
head -n 300 crates/vm/src/dict_inner.rs | tail -n +250

Repository: RustPython/RustPython

Length of output: 1941


Use Acquire/Release ordering for version field and perform version bumps under the write lock.

The version() and bump_version() methods use Relaxed ordering. Currently, bump_version() is called after the write lock is released at lines 319, 383, 441, 465, 499, 536, 738, and 756. Meanwhile, version() is read lock-free in frame.rs (lines 4644, 4680, 7076) to validate LOAD_GLOBAL specialization caches. With Relaxed ordering, a reader may see stale version values after mutations complete, causing cache validation to incorrectly accept invalidated caches.

🔧 Suggested direction
-use core::sync::atomic::{AtomicU64, Ordering::Relaxed};
+use core::sync::atomic::{AtomicU64, Ordering::{Acquire, Release}};
...
 pub fn version(&self) -> u64 {
-    self.version.load(Relaxed)
+    self.version.load(Acquire)
 }
...
 fn bump_version(&self) {
-    self.version.fetch_add(1, Relaxed);
+    self.version.fetch_add(1, Release);
 }

Also move each bump_version() call into the same write-lock critical section as the corresponding mutation to ensure version updates are ordered with respect to dict mutations.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/dict_inner.rs` around lines 263 - 270, The version() accessor
and bump_version() updater must use proper Acquire/Release ordering and
bump_version must be called while holding the dict write lock: change version()
to load with Acquire and bump_version() to fetch_add with Release (function
names: version and bump_version in dict_inner.rs), and move every call to
bump_version so it executes inside the same write-lock critical section where
the dictionary mutation occurs (the callers in this repo include the mutation
sites referenced in frame.rs that read the version lock-free for LOAD_GLOBAL
caching); ensure the write lock remains held across the mutation and the
bump_version call so the Release store synchronizes with readers that use
Acquire.


fn read(&self) -> PyRwLockReadGuard<'_, DictInner<T>> {
self.inner.read()
}
Expand Down Expand Up @@ -283,6 +298,7 @@ impl<T: Clone> Dict<T> {
};
if entry.index == index_index {
let removed = core::mem::replace(&mut entry.value, value);
self.bump_version();
// defer dec RC
break Some(removed);
} else {
Expand All @@ -298,6 +314,7 @@ impl<T: Clone> Dict<T> {
continue;
}
inner.unchecked_push(index_index, hash, key.to_pyobject(vm), value, entry_index);
self.bump_version();
break None;
}
};
Expand Down Expand Up @@ -361,6 +378,7 @@ impl<T: Clone> Dict<T> {
inner.indices.resize(8, IndexEntry::FREE);
inner.used = 0;
inner.filled = 0;
self.bump_version();
// defer dec rc
core::mem::take(&mut inner.entries)
};
Expand Down Expand Up @@ -439,6 +457,7 @@ impl<T: Clone> Dict<T> {
continue;
}
inner.unchecked_push(index_index, hash, key.to_owned(), value, entry);
self.bump_version();
break None;
}
};
Expand Down Expand Up @@ -475,6 +494,7 @@ impl<T: Clone> Dict<T> {
value.clone(),
index_entry,
);
self.bump_version();
return Ok(value);
}
}
Expand Down Expand Up @@ -511,6 +531,7 @@ impl<T: Clone> Dict<T> {
let key_obj = key.to_pyobject(vm);
let ret = (key_obj.clone(), value.clone());
inner.unchecked_push(index_index, hash, key_obj, value, index_entry);
self.bump_version();
return Ok(ret);
}
}
Expand Down Expand Up @@ -698,6 +719,7 @@ impl<T: Clone> Dict<T> {
} = IndexEntry::DUMMY;
inner.used -= 1;
let removed = slot.take();
self.bump_version();
Ok(ControlFlow::Break(removed))
}

Expand Down Expand Up @@ -727,6 +749,7 @@ impl<T: Clone> Dict<T> {
// entry.index always refers valid index
inner.indices.get_unchecked_mut(entry.index)
} = IndexEntry::DUMMY;
self.bump_version();
Some((entry.key, entry.value))
}

Expand Down
Loading
Loading
X Tutup