static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
PyObject **items;
size_t new_allocated, num_allocated_bytes;
Py_ssize_t allocated = self->allocated;
/* Bypass realloc() when a previous overallocation is large enough
to accommodate the newsize. If the newsize falls lower than half
the allocated size, then proceed with the realloc() to shrink the list.
*/
if (allocated >= newsize && newsize >= (allocated >> 1)) {
assert(self->ob_item != NULL || newsize == 0);
Py_SIZE(self) = newsize;
return 0;
}
/* This over-allocates proportional to the list size, making room
* for additional growth. The over-allocation is mild, but is
* enough to give linear-time amortized behavior over a long
* sequence of appends() in the presence of a poorly-performing
* system realloc().
* The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
* Note: new_allocated won't overflow because the largest possible value
* is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
*/
new_allocated = (size_t)newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6);
if (new_allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
PyErr_NoMemory();
return -1;
}
if (newsize == 0)
new_allocated = 0;
num_allocated_bytes = new_allocated * sizeof(PyObject *);
items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
if (items == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = items;
Py_SIZE(self) = newsize;
self->allocated = new_allocated;
return 0;
}
查看CPython方法的源代码append(上面列出),我发现append只要数组中有空闲空间,复杂度就是O(1),否则Python以指数方式增加新的大小(+3/+6固定量)列表的内存)并将现有元素复制到新数组中。事实证明,由于分配的数组大小呈指数增长,随着列表的增长,重新分配的次数会减少。因此,例如:
lst = []
for i in range(10):
lst.append(i)
第一次重新分配时,复制 1 个元素,第二次复制 4 个元素,第三次复制 10 个元素,依此类推,理论上给出了以下形式的几何级数:1+4+10+⋯≤O(n)。据我了解,随着每次后续迭代,重新分配之间的操作数量线性增长,并且 n 加法的重新分配总数以 O(logn) 的形式减少。那么,在这种情况下,为什么对于 n 个附加序列,总复杂度是 O(n),而不是 O(n logn)?