一个函数意味着一个新的作用域(命名空间),当 Python 虚拟机执行一个函数时,首先创建一个 PyFrameObject,然后执行函数的字节码(PyCodeObject)。
PyFunctionObject 的定义:
typedef struct {
PyObject_HEAD
PyObject *func_code; /* A code object */
PyObject *func_globals; /* A dictionary (other mappings won't do) */
PyObject *func_defaults; /* NULL or a tuple */
PyObject *func_closure; /* NULL or a tuple of cell objects */
PyObject *func_doc; /* The __doc__ attribute, can be anything */
PyObject *func_name; /* The __name__ attribute, a string object */
PyObject *func_dict; /* The __dict__ attribute, a dict or NULL */
PyObject *func_weakreflist; /* List of weak references */
PyObject *func_module; /* The __module__ attribute, can be anything */
/* Invariant:
* func_closure contains the bindings for func_code->co_freevars, so
* PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code)
* (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0).
*/
} PyFunctionObject;PyCodeObject 是在编译时产生的,包含函数的一些静态信息,如 co_consts, co_names, co_code 等,而 PyFunctionObject 则是执行 def 语句时动态产生的,包含 PyCodeObject 以及其它动态的信息,如 func_globals。
一个函数只有一个 PyCodeObject,但是却会产生多个 PyFunctionObject,每次调用函数时都会创建一个新的 PyFunctionObject,该 PyFunctionObject 关联至唯一的 PyCodeObject。
[func_0.py]
def f():
0 LOAD_CONST 0 (code object f)
3 MAKE_FUNCTION 0
6 STORE_NAME 0 (f)
print "Function"
0 LOAD_CONST 1 (“Function”)
3 PRINT_ITEM
4 PRINT_NEWLINE
5 LOAD_CONST 0 (None)
8 RETURN_VALUE
f()
9 LOAD_NAME 0 (f)
12 CALL_FUNCTION 0
15 POP_TOP
16 LOAD_CONST 1 (None)
19 RETURN_VALUE重点指令:MAKE_FUNCTION,CALL_FUNCTION。
case MAKE_FUNCTION:
v = POP(); /* code object */
x = PyFunction_New(v, f->f_globals);
Py_DECREF(v);
/* XXX Maybe this should be a separate opcode? */
if (x != NULL && oparg> 0) {
// oparg 表示 “具有默认值的参数” 的个数
v = PyTuple_New(oparg);
if (v == NULL) {
Py_DECREF(x);
x = NULL;
break;
}
while (--oparg>= 0) {
w = POP();
PyTuple_SET_ITEM(v, oparg, w);
}
err = PyFunction_SetDefaults(x, v);
Py_DECREF(v);
}
PUSH(x);
break;case CALL_FUNCTION:
{
PyObject **sp;
PCALL(PCALL_ALL);
sp = stack_pointer;
#ifdef WITH_TSC
x = call_function(&sp, oparg, &intr0, &intr1);
#else
x = call_function(&sp, oparg);
#endif
stack_pointer = sp;
PUSH(x); // 函数返回值
if (x != NULL)
continue;
break;
}
static PyObject *
call_function(PyObject ***pp_stack, int oparg
#ifdef WITH_TSC
, uint64* pintr0, uint64* pintr1
#endif
)
{
// oparg 是 short 类型,高字节表示 nk(实参中关键字参数个数),低字节表示 na(实参中位置参数个数)
int na = oparg & 0xff;
int nk = (oparg>>8) & 0xff;
int n = na + 2 * nk;
PyObject **pfunc = (*pp_stack) - n - 1;
PyObject *func = *pfunc;
PyObject *x, *w;
/* Always dispatch PyCFunction first, because these are
presumed to be the most frequent callable object.
*/
if (PyCFunction_Check(func) && nk == 0) {
int flags = PyCFunction_GET_FLAGS(func);
PyThreadState *tstate = PyThreadState_GET();
PCALL(PCALL_CFUNCTION);
if (flags & (METH_NOARGS | METH_O)) {
PyCFunction meth = PyCFunction_GET_FUNCTION(func);
PyObject *self = PyCFunction_GET_SELF(func);
if (flags & METH_NOARGS && na == 0) {
C_TRACE(x, (*meth)(self,NULL));
}
else if (flags & METH_O && na == 1) {
PyObject *arg = EXT_POP(*pp_stack);
C_TRACE(x, (*meth)(self,arg));
Py_DECREF(arg);
}
else {
err_args(func, flags, na);
x = NULL;
}
}
else {
PyObject *callargs;
callargs = load_args(pp_stack, na);
READ_TIMESTAMP(*pintr0);
C_TRACE(x, PyCFunction_Call(func,callargs,NULL));
READ_TIMESTAMP(*pintr1);
Py_XDECREF(callargs);
}
} else {
if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
/* optimize access to bound methods */
PyObject *self = PyMethod_GET_SELF(func);
PCALL(PCALL_METHOD);
PCALL(PCALL_BOUND_METHOD);
Py_INCREF(self);
func = PyMethod_GET_FUNCTION(func);
Py_INCREF(func);
Py_DECREF(*pfunc);
*pfunc = self;
na++;
n++;
} else
Py_INCREF(func);
READ_TIMESTAMP(*pintr0);
if (PyFunction_Check(func))
x = fast_function(func, pp_stack, n, na, nk);
else
x = do_call(func, pp_stack, na, nk);
READ_TIMESTAMP(*pintr1);
Py_DECREF(func);
}
/* Clear the stack of the function object. Also removes
the arguments in case they weren't consumed already
(fast_function() and err_args() leave them on the stack).
*/
while ((*pp_stack) > pfunc) {
w = EXT_POP(*pp_stack);
Py_DECREF(w);
PCALL(PCALL_POP);
}
return x;
}fast_function 和 PyEval_EvalCodeEx 的代码太长,就不复制粘贴了。源代码(branch: ch11)有一些注释。
几个和函数参数有关的变量:
-
PyCodeObject->co_argcount
形参中参数的个数,不包括 *args 和 **kwargs
-
PyCodeObject->co_nlocals
函数局部变量的个数,包括参数个数(co_argcount + *args + **kwargs)
-
na
实参中位置参数的个数
-
nw
实参中关键字参数的个数
f_localsplus 布局
PyFrameObject:
+------------+-------------------+---------------------------+
| | | |
| frame_info | extras | valuestack |
| | | |
+--------------------------------+---------------------------+
| |
<-------------+ f_localsplus +----------------->
| |
extras:
+----------+---------+-------+----------+----------+----------+
| | | | | | |
| pos args | kw args | *args | **kwargs | cellvars | freevars |
| | | | | | |
+----------+---------+-------+----------+----------+----------+
cellvars 一般是外层函数使用,存放着被内层嵌套函数引用的变量
freevars 一般是内层嵌套函数使用,存放着对外层函数中变量的引用
可以使用下面的例子去思考源码中 fast_function和 PyEval_EvalCodeEx的执行流程。
-
没有参数
def f(): print 'hello world' f() # co_argcount = 0 # na = 0, nw = 0
符合 fast_function 条件
-
只有位置参数
def f(a, b): print 'hello world' f(1, 2) # co_argcount = 2 # na = 2, nw = 0 f(a=1, b=2) # co_argcount = 2 # na = 0, nw = 2
f(1, 2) 符合 fast_function 条件
f(a=1, b=2) 不符合 fast_function 条件
-
位置参数和关键字参数
def f(a, b, c=1, d=2): print 'hello world' f(1, 2) f(1, 2, 3) f(1, 2, 3, 4) f(1, 2, c=3) f(1, 2, d=3)
-
可变参数
def f(a, b, *args, **kwargs): print 'hello world' f(1, 2) f(1, 2, 3, 4, c=1, d=2)