PyListObject 定义如下:
typedef struct {
PyObject_VAR_HEAD
/* Vector of pointers to list elements. list[0] is ob_item[0], etc. */
PyObject **ob_item;
/* ob_item contains space for 'allocated' elements. The number
* currently in use is ob_size.
* Invariants:
* 0 <= ob_size <= allocated
* len(list) == ob_size
* ob_item == NULL implies ob_size == allocated == 0
* list.sort() temporarily sets allocated to -1 to detect mutations.
*
* Items must normally not be NULL, except during construction when
* the list is not yet visible outside the function that builds it.
*/
Py_ssize_t allocated;
} PyListObject;代码注释已经说得足够明白了。
PyObject *
PyList_New(Py_ssize_t size)
{
PyListObject *op;
size_t nbytes;
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
nbytes = size * sizeof(PyObject *);
/* Check for overflow */
if (nbytes / sizeof(PyObject *) != (size_t)size)
return PyErr_NoMemory();
if (num_free_lists) {
num_free_lists--;
op = free_lists[num_free_lists];
_Py_NewReference((PyObject *)op);
} else {
op = PyObject_GC_New(PyListObject, &PyList_Type);
if (op == NULL)
return NULL;
}
if (size <= 0)
op->ob_item = NULL;
else {
op->ob_item = (PyObject **) PyMem_MALLOC(nbytes);
if (op->ob_item == NULL) {
Py_DECREF(op);
return PyErr_NoMemory();
}
memset(op->ob_item, 0, nbytes);
}
op->ob_size = size;
op->allocated = size;
_PyObject_GC_TRACK(op);
return (PyObject *) op;
}int
PyList_SetItem(register PyObject *op, register Py_ssize_t i,
register PyObject *newitem)
{
register PyObject *olditem;
register PyObject **p;
if (!PyList_Check(op)) {
Py_XDECREF(newitem);
PyErr_BadInternalCall();
return -1;
}
if (i < 0 || i>= ((PyListObject *)op) -> ob_size) {
Py_XDECREF(newitem);
PyErr_SetString(PyExc_IndexError,
"list assignment index out of range");
return -1;
}
p = ((PyListObject *)op) -> ob_item + i;
olditem = *p;
*p = newitem;
Py_XDECREF(olditem);
return 0;
}旧元素的 refcnt - 1
static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
Py_ssize_t i, n = self->ob_size;
PyObject **items;
if (v == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (n == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"cannot add more objects to list");
return -1;
}
if (list_resize(self, n+1) == -1)
return -1;
if (where < 0) {
where += n;
if (where < 0)
where = 0;
}
if (where> n)
where = n;
items = self->ob_item;
for (i = n; --i>= where; )
items[i+1] = items[i];
Py_INCREF(v);
items[where] = v;
return 0;
}
int
PyList_Insert(PyObject *op, Py_ssize_t where, PyObject *newitem)
{
if (!PyList_Check(op)) {
PyErr_BadInternalCall();
return -1;
}
return ins1((PyListObject *)op, where, newitem);
}插入元素的流程:
-
调整 PyListObject 的尺寸,以便容下新元素。
-
从最后一个元素开始,每个元素往后以一步,直至 where(新元素插入的位置)
-
将新元素放到 where。
list_resize 代码如下:
/* Ensure ob_item has room for at least newsize elements, and set
* ob_size to newsize. If newsize > ob_size on entry, the content
* of the new slots at exit is undefined heap trash; it's the caller's
* responsiblity to overwrite them with sane values.
* The number of allocated elements may grow, shrink, or stay the same.
* Failure is impossible if newsize <= self.allocated on entry, although
* that partly relies on an assumption that the system realloc() never
* fails when passed a number of bytes <= the number of bytes last
* allocated (the C standard doesn't guarantee this, but it's hard to
* imagine a realloc implementation where it wouldn't be true).
* Note that self->ob_item may change, and even if newsize is less
* than ob_size on entry.
*/
static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
PyObject **items;
size_t new_allocated;
Py_ssize_t allocated = self->allocated;
/* Bypass realloc() when a previous overallocation is large enough
to accommodate the newsize. If the newsize falls lower than half
the allocated size, then proceed with the realloc() to shrink the list.
*/
if (allocated>= newsize && newsize >= (allocated >> 1)) {
assert(self->ob_item != NULL || newsize == 0);
self->ob_size = newsize;
return 0;
}
/* This over-allocates proportional to the list size, making room
* for additional growth. The over-allocation is mild, but is
* enough to give linear-time amortized behavior over a long
* sequence of appends() in the presence of a poorly-performing
* system realloc().
* The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
*/
new_allocated = (newsize>> 3) + (newsize < 9 ? 3 : 6) + newsize;
if (newsize == 0)
new_allocated = 0;
items = self->ob_item;
if (new_allocated <= ((~(size_t)0) / sizeof(PyObject *)))
PyMem_RESIZE(items, PyObject *, new_allocated);
else
items = NULL;
if (items == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = items;
self->ob_size = newsize;
self->allocated = new_allocated;
return 0;
}从代码中可以看出,为了避免频繁的调用 realloc,Python 会多分配一些空间。
static int
app1(PyListObject *self, PyObject *v)
{
Py_ssize_t n = PyList_GET_SIZE(self);
assert (v != NULL);
if (n == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"cannot add more objects to list");
return -1;
}
if (list_resize(self, n+1) == -1)
return -1;
Py_INCREF(v);
PyList_SET_ITEM(self, n, v);
return 0;
}
int
PyList_Append(PyObject *op, PyObject *newitem)
{
if (PyList_Check(op) && (newitem != NULL))
return app1((PyListObject *)op, newitem);
PyErr_BadInternalCall();
return -1;
}static PyObject *
listremove(PyListObject *self, PyObject *v)
{
Py_ssize_t i;
for (i = 0; i < self->ob_size; i++) {
int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ);
if (cmp> 0) {
if (list_ass_slice(self, i, i+1,
(PyObject *)NULL) == 0)
Py_RETURN_NONE;
return NULL;
}
else if (cmp < 0)
return NULL;
}
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}关键的是 list_ass_slice 函数,具体看源码,这里就粘贴了。
当销毁 PyListObject 对象时,只释放了 PyListObject 关联的 PyObject* 数组占有的内存,PyListObject 本身的内存没有释放,而是缓存在 free_lists 中。当下次创建 PyListObject 对象时,可以直接利用 free_lists 中的 PyListObject。