メモ書きから。
ソースコードリーディング
於 森ビル19F
#cpython_reading
自己紹介
Cを使ったOOP実装
マルチプラットフォーム対応は大変→Javaっていいですね。
gtags
全体像
Py_Main
ファイル指定がない場合はREPLが起動
run_mod → evalに相当
Modlues/python.c
stdin_is_interactive
RunMainFromImporter(filename);
PyRun_FileExFlags
メモリ管理
Arena 256kb Pool×64
Pool 4kb Block×n
Block 8-256bytes
*詳しくはGC本で! *
→
ステートメント毎に分岐
PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals,
PyObject *locals, int closeit, PyCompilerFlags *flags)
ast_for_starred(struct compiling *c, const node *n)
switch (TYPE(n)) {
case expr_stmt:
return ast_for_expr_stmt(c, n);
case del_stmt:
return ast_for_del_stmt(c, n);
case pass_stmt:
return Pass(LINENO(n), n->n_col_offset, c->c_arena);
case flow_stmt:
return ast_for_flow_stmt(c, n);
case import_stmt:
return ast_for_import_stmt(c, n);
case global_stmt:
return ast_for_global_stmt(c, n);
case nonlocal_stmt:
return ast_for_nonlocal_stmt(c, n);
case assert_stmt:
return ast_for_assert_stmt(c, n);
default:
PyErr_Format(PyExc_SystemError,
"unhandled small_stmt: TYPE=%d NCH=%d\n",
TYPE(n), NCH(n));
return NULL;
}
flow_stmt とはなにか?
↓これ。
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
(Grammar/Grammar)
PEP3107 アノテーションの処理はどうなっているか?
Grammar/Grammar
Include/graminit.h
tfpdef
自己紹介続き
Ruby や C で書いたら Java で書き直された(メンテする人がいないので)
=====
CPython 3.2
evalを追う
一般的なインタープリターでのevalの実装
抽象構文木(AST) のノードに対して
現在の環境の下で
より小さなノードがあれば評価
それらを組み合わせて評価
ASTと値と環境の実装がわかればあとはわかる
ceval.c
co は code のこと
ASTと値と環境の実装を探ろう
AST
PyCodeObjectという型がある
値
環境
PyCodeObject
AST Include/code.h
/* Bytecode object */
typedef struct {
PyObject_HEAD
int co_argcount; /* #arguments, except *args */
int co_kwonlyargcount; /* #keyword only arguments */
int co_nlocals; /* #local variables */
int co_stacksize; /* #entries needed for evaluation stack */
int co_flags; /* CO_..., see below */
PyObject *co_code; /* instruction opcodes */
PyObject *co_consts; /* list (constants used) */
PyObject *co_names; /* list of strings (names used) */
PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest doesn't count for hash or comparisons */
PyObject *co_filename; /* unicode (where it was loaded from) */
PyObject *co_name; /* unicode (name, for reference) */
int co_firstlineno; /* first source line number */
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See
Objects/lnotab_notes.txt for details. */
void *co_zombieframe; /* for optimization only (see frameobject.c) */
PyObject *co_weakreflist; /* to support weakrefs to code objects */
} PyCodeObject;
C でどのようにOOPを実現しているか? → 構造体を使って。
#COMとかもお仲間に入るのかしらん。
object.h
/*
Objects are structures allocated on the heap. Special rules apply to
the use of objects to ensure they are properly garbage-collected.
Objects are never allocated statically or on the stack; they must be
accessed through special macros and functions only. (Type objects are
exceptions to the first rule; the standard types are represented by
statically initialized type objects, although work on type/class unification
for Python 2.2 made it possible to have heap-allocated type objects too).
An object has a 'reference count' that is increased or decreased when a
pointer to the object is copied or deleted; when the reference count
reaches zero there are no references to the object left and it can be
removed from the heap.
(略)
Objects are always accessed through pointers of the type 'PyObject *'.
The type 'PyObject' is a structure that only contains the reference count
and the type pointer. The actual memory allocated for an object
contains other data that can only be accessed after casting the pointer
to a pointer to a longer structure type. This longer type must start
with the reference count and type fields; the macro PyObject_HEAD should be
used for this (to accommodate for future changes). The implementation
of a particular object type can cast the object pointer to the proper
type and back.
A standard interface exists for objects that contain an array of items
whose size is determined when the object is allocated.
*/
opcode.h
まとめ
ASTといいつつ詳細を調べるとバイトコードっぽい
型オブジェクト
PyObject object.h
_typeobject.h object.h
#ifdef Py_LIMITED_API
typedef struct _typeobject PyTypeObject; /* opaque */
#else
typedef struct _typeobject {
PyObject_VAR_HEAD
const char *tp_name; /* For printing, in format "<module>.<name>" */
Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */
/* Methods to implement standard operations */
destructor tp_dealloc;
printfunc tp_print;
getattrfunc tp_getattr;
setattrfunc tp_setattr;
void *tp_reserved; /* formerly known as tp_compare */
reprfunc tp_repr;
/* Method suites for standard classes */
PyNumberMethods *tp_as_number;
PySequenceMethods *tp_as_sequence;
PyMappingMethods *tp_as_mapping;
/* More standard operations (here for binary compatibility) */
hashfunc tp_hash;
ternaryfunc tp_call;
reprfunc tp_str;
getattrofunc tp_getattro;
setattrofunc tp_setattro;
/* Functions to access object as input/output buffer */
PyBufferProcs *tp_as_buffer;
/* Flags to define presence of optional/expanded features */
long tp_flags;
const char *tp_doc; /* Documentation string */
/* Assigned meaning in release 2.0 */
/* call function for all accessible objects */
traverseproc tp_traverse;
/* delete references to contained objects */
inquiry tp_clear;
/* Assigned meaning in release 2.1 */
/* rich comparisons */
richcmpfunc tp_richcompare;
/* weak reference enabler */
Py_ssize_t tp_weaklistoffset;
/* Iterators */
getiterfunc tp_iter;
iternextfunc tp_iternext;
/* Attribute descriptor and subclassing stuff */
struct PyMethodDef *tp_methods;
struct PyMemberDef *tp_members;
struct PyGetSetDef *tp_getset;
struct _typeobject *tp_base;
PyObject *tp_dict;
descrgetfunc tp_descr_get;
descrsetfunc tp_descr_set;
Py_ssize_t tp_dictoffset;
initproc tp_init;
allocfunc tp_alloc;
newfunc tp_new;
freefunc tp_free; /* Low-level free-memory routine */
inquiry tp_is_gc; /* For PyObject_IS_GC */
PyObject *tp_bases;
PyObject *tp_mro; /* method resolution order */
PyObject *tp_cache;
PyObject *tp_subclasses;
PyObject *tp_weaklist;
destructor tp_del;
/* Type attribute cache version tag. Added in version 2.6 */
unsigned int tp_version_tag;
#ifdef COUNT_ALLOCS
/* these must be last and never explicitly initialized */
Py_ssize_t tp_allocs;
Py_ssize_t tp_frees;
Py_ssize_t tp_maxalloc;
struct _typeobject *tp_prev;
struct _typeobject *tp_next;
#endif
} PyTypeObject;
#endif
PyLongObject(longobject.h)
2.x までは int (通常の整数) と long (多倍長の長整数)があったが 3.0 で long に統合された
_longobject.h
Include/longintrepr.h:89:struct _longobject {
struct _longobject {
}
longinterp.h
/* Long integer representation.
The absolute value of a number is equal to
SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
Negative numbers are represented with ob_size < 0;
zero is represented by ob_size == 0.
In a normalized number, ob_digit[abs(ob_size)-1] (the most significant
digit) is never zero. Also, in all cases, for all valid i,
0 <= ob_digit[i] <= MASK.
The allocation function takes care of allocating extra memory
so that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available.
CAUTION: Generic code manipulating subtypes of PyVarObject has to
aware that longs abuse ob_size's sign bit.
*/
struct _longobject {
PyObject_VAR_HEAD
digit ob_digit[1]; ←実際にはこれは可変長配列になる
};
フレームって?
PyFrameOjbect frameobject.h
/* Frame object interface */
#ifndef Py_LIMITED_API
#ifndef Py_FRAMEOBJECT_H
#define Py_FRAMEOBJECT_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
int b_type; /* what kind of block this is */
int b_handler; /* where to jump to find handler */
int b_level; /* value stack level to pop to */
} PyTryBlock;
typedef struct _frame {
PyObject_VAR_HEAD
struct _frame *f_back; /* previous frame, or NULL */
PyCodeObject *f_code; /* code segment */
PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
PyObject *f_globals; /* global symbol table (PyDictObject) */
PyObject *f_locals; /* local symbol table (any mapping) */
PyObject **f_valuestack; /* points after the last local */
/* Next free slot in f_valuestack. Frame creation sets to f_valuestack.
Frame evaluation usually NULLs it, but a frame that yields sets it
to the current stack top. */
PyObject **f_stacktop;
PyObject *f_trace; /* Trace function */
/* In a generator, we need to be able to swap between the exception
state inside the generator and the exception state of the calling
frame (which shouldn't be impacted when the generator "yields"
from an except handler).
These three fields exist exactly for that, and are unused for
non-generator frames. See the SAVE_EXC_STATE and SWAP_EXC_STATE
macros in ceval.c for details of their use. */
PyObject *f_exc_type, *f_exc_value, *f_exc_traceback;
PyThreadState *f_tstate;
int f_lasti; /* Last instruction if called */
/* Call PyFrame_GetLineNumber() instead of reading this field
directly. As of 2.3 f_lineno is only valid when tracing is
active (i.e. when f_trace is set). At other times we use
PyCode_Addr2Line to calculate the line from the current
bytecode index. */
int f_lineno; /* Current line number */
int f_iblock; /* index in f_blockstack */
PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
} PyFrameObject;
↑ 一つ前のフレーム、現在のコード、環境などが構造体としてまとまったもの。
frameobjet.c
コードや変数からフレームを作成
おおよそ構造体にコピーしているだけ
フレームオブジェクトを作るときに
ゾンビフレームというのが codee にあればそれを使う
なければフリーリストから取得
そこにもなければ malloc
/* Stack frames are allocated and deallocated at a considerable rate.
In an attempt to improve the speed of function calls, we:
1. Hold a single "zombie" frame on each code object. This retains
the allocated and initialised frame object from an invocation of
the code object. The zombie is reanimated the next time we need a
frame object for that code object. Doing this saves the malloc/
realloc required when using a free_list frame that isn't the
correct size. It also saves some field initialisation.
In zombie mode, no field of PyFrameObject holds a reference, but
the following fields are still valid:
* ob_type, ob_size, f_code, f_valuestack;
* f_locals, f_trace,
f_exc_type, f_exc_value, f_exc_traceback are NULL;
* f_localsplus does not require re-allocation and
the local variables in f_localsplus are NULL.
2. We also maintain a separate free list of stack frames (just like
floats are allocated in a special way -- see floatobject.c). When
a stack frame is on the free list, only the following members have
a meaning:
ob_type == &Frametype
f_back next item on free list, or NULL
f_stacksize size of value stack
ob_size size of localsplus
Note that the value and block stacks are preserved -- this can save
another malloc() call or two (and two free() calls as well!).
Also note that, unlike for integers, each frame object is a
malloc'ed object in its own right -- it is only the actual calls to
malloc() that we are trying to save here, not the administration.
After all, while a typical program may make millions of calls, a
call depth of more than 20 or 30 is probably already exceptional
unless the program contains run-away recursion. I hope.
Later, PyFrame_MAXFREELIST was added to bound the # of frames saved on
free_list. Else programs creating lots of cyclic trash involving
frames could provoke free_list into growing without bound.
*/
static PyFrameObject *free_list = NULL;
static int numfree = 0; /* number of frames currently in free_list */
PyEval_EvalCodeってどこから呼ばれるの? → run_mod() から
ceval.c
PyObject *
PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
{
#ifdef DXPAIRS
int lastopcode = 0;
#endif
register PyObject **stack_pointer; /* Next free slot in value stack */
register unsigned char *next_instr;
register int opcode; /* Current opcode */
register int oparg; /* Current opcode argument, if any */
register enum why_code why; /* Reason for block stack unwind */
register int err; /* Error status -- nonzero if error */
register PyObject *x; /* Result object -- NULL if error */
register PyObject *v; /* Temporary objects popped off stack */
register PyObject *w;
register PyObject *u;
register PyObject *t;
register PyObject **fastlocals, **freevars;
PyObject *retval = NULL; /* Return value */
PyThreadState *tstate = PyThreadState_GET();
PyCodeObject *co;
x だの v だのの一文字変数はナニモノ?
#何かのルールに従っての命名か
大量のマクロが定義されている
高速化のための努力
/* Computed GOTOs, or
the-optimization-commonly-but-improperly-known-as-"threaded code"
using gcc's labels-as-values extension
(http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
The traditional bytecode evaluation loop uses a "switch" statement, which
decent compilers will optimize as a single indirect branch instruction
combined with a lookup table of jump addresses. However, since the
indirect jump instruction is shared by all opcodes, the CPU will have a
hard time making the right prediction for where to jump next (actually,
it will be always wrong except in the uncommon case of a sequence of
several identical opcodes).
"Threaded code" in contrast, uses an explicit jump table and an explicit
indirect jump instruction at the end of each opcode. Since the jump
instruction is at a different address for each opcode, the CPU will make a
separate prediction for each of these instructions, which is equivalent to
predicting the second opcode of each opcode pair. These predictions have
a much better chance to turn out valid, especially in small bytecode loops.
(略)
NOTE: care must be taken that the compiler doesn't try to "optimize" the
indirect jumps by sharing them between all opcodes. Such optimizations
can be disabled on gcc by using the -fno-gcse flag (or possibly
-fno-crossjumping).
*/
実際のコード
/* Main switch on opcode */
READ_TIMESTAMP(inst0);
switch (opcode) {
/* BEWARE!
It is essential that any operation that fails sets either
x to NULL, err to nonzero, or why to anything but WHY_NOT,
and that no operation that succeeds does this! */
/* case STOP_CODE: this is an error! */
computed goto するために複雑になっている
(→ ダイレクトスレッデッドコードとか調べよう)
NOP
LOAD_CONST
stack machine ぽい?
TARGET(BINARY_ADD を調べるとそれっぽい
スタックマシンなら、スタックトップを見て分岐する命令があるはず
TARGET(BINARY_ADD)
w = POP();
v = TOP();
if (PyUnicode_CheckExact(v) &&
PyUnicode_CheckExact(w)) {
x = unicode_concatenate(v, w, f, next_instr);
/* unicode_concatenate consumed the ref to v */
goto skip_decref_vx;
}
あった
関数呼び出し CALL_FUNCTION
call_function
TARGET(CALL_FUNCTION)
{
PyObject **sp;
PCALL(PCALL_ALL);
sp = stack_pointer;
#ifdef WITH_TSC
x = call_function(&sp, oparg, &intr0, &intr1);
#else
x = call_function(&sp, oparg);
#endif
stack_pointer = sp;
PUSH(x);
if (x != NULL)
DISPATCH();
break;
}
関数呼び出しの本体
static PyObject *
call_function(PyObject ***pp_stack, int oparg
#ifdef WITH_TSC
, uint64* pintr0, uint64* pintr1
#endif
)
{
int na = oparg & 0xff;
int nk = (oparg>>8) & 0xff;
int n = na + 2 * nk;
PyObject **pfunc = (*pp_stack) - n - 1;
PyObject *func = *pfunc;
PyObject *x, *w;
/* Always dispatch PyCFunction first, because these are
presumed to be the most frequent callable object.
*/
optarg の下16ビットは引数の数を表している
na positional 引数
nk キーワード引数 (キーワード、値のペアなので二倍)
int na = oparg & 0xff;
int nk = (oparg>>8) & 0xff;
int n = na + 2 * nk;
---
disモジュールのdis()が便利
"""Disassembler of Python byte code into mnemonics."""
import sys
import types
from opcode import *
from opcode import __all__ as _opcodes_all
slideshare
=======
つづく。かもしれない。