Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.12a1 3510 (FOR_ITER leaves iterator on the stack)
# Python 3.12a1 3511 (Add STOPITERATION_ERROR instruction)
# Python 3.12a1 3512 (Remove all unused consts from code objects)
# Python 3.12a1 3513 (Compress marshalled bytecode)

# Python 3.13 will start with 3550

Expand All @@ -438,7 +439,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3512).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3513).to_bytes(2, 'little') + b'\r\n'

_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

Expand Down
2 changes: 1 addition & 1 deletion Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def pseudo_op(name, op, real_ops):
hasfree.append(148)
def_op('COPY_FREE_VARS', 149)
def_op('YIELD_VALUE', 150)
def_op('RESUME', 151) # This must be kept in sync with deepfreeze.py
def_op('RESUME', 151)
def_op('MATCH_CLASS', 152)

def_op('FORMAT_VALUE', 155)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Modify the :mod:`marshal` format to serialize bytecode more efficiently.
70 changes: 32 additions & 38 deletions Programs/test_frozenmain.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

81 changes: 73 additions & 8 deletions Python/marshal.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_code.h" // _PyCode_New()
#include "pycore_hashtable.h" // _Py_hashtable_t
#include "pycore_opcode.h"
#include "marshal.h" // Py_MARSHAL_VERSION

/*[clinic input]
Expand Down Expand Up @@ -291,6 +292,21 @@ w_float_str(double v, WFILE *p)
PyMem_Free(buf);
}

static void
w_bytecode(PyCodeObject *code, WFILE *p)
{
W_SIZE(Py_SIZE(code), p);
for (Py_ssize_t i = 0; i < Py_SIZE(code); i++) {
_Py_CODEUNIT instruction = _PyCode_CODE(code)[i];
int opcode = _PyOpcode_Deopt[_Py_OPCODE(instruction)];
w_byte(opcode, p);
if (HAS_ARG(opcode)) {
w_byte(_Py_OPARG(instruction), p);
}
i += _PyOpcode_Caches[opcode];
}
}

static int
w_ref(PyObject *v, char *flag, WFILE *p)
{
Expand Down Expand Up @@ -550,18 +566,13 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
}
else if (PyCode_Check(v)) {
PyCodeObject *co = (PyCodeObject *)v;
PyObject *co_code = _PyCode_GetCode(co);
if (co_code == NULL) {
p->error = WFERR_NOMEMORY;
return;
}
W_TYPE(TYPE_CODE, p);
w_long(co->co_argcount, p);
w_long(co->co_posonlyargcount, p);
w_long(co->co_kwonlyargcount, p);
w_long(co->co_stacksize, p);
w_long(co->co_flags, p);
w_object(co_code, p);
w_bytecode(co, p);
w_object(co->co_consts, p);
w_object(co->co_names, p);
w_object(co->co_localsplusnames, p);
Expand All @@ -572,7 +583,6 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
w_long(co->co_firstlineno, p);
w_object(co->co_linetable, p);
w_object(co->co_exceptiontable, p);
Py_DECREF(co_code);
}
else if (PyObject_CheckBuffer(v)) {
/* Write unknown bytes-like objects as a bytes object */
Expand Down Expand Up @@ -921,6 +931,61 @@ r_float_str(RFILE *p)
return PyOS_string_to_double(buf, NULL, NULL);
}

static PyObject *
r_bytecode(RFILE *p)
{
long size = r_long(p);
if (PyErr_Occurred()) {
return NULL;
}
Py_ssize_t nbytes = size * sizeof(_Py_CODEUNIT);
if (nbytes < 0 || SIZE32_MAX < nbytes) {
const char *e = "bad marshal data (bytecode size out of range)";
PyErr_SetString(PyExc_ValueError, e);
return NULL;
}
PyObject *bytecode = PyBytes_FromStringAndSize(NULL, nbytes);
if (bytecode == NULL) {
return NULL;
}
_Py_CODEUNIT *buffer = (_Py_CODEUNIT *)PyBytes_AS_STRING(bytecode);
long i = 0;
while (i < size) {
int opcode = r_byte(p);
if (opcode == EOF) {
const char *e = "EOF read where opcode expected";
PyErr_SetString(PyExc_EOFError, e);
return NULL;
}
int oparg;
if (HAS_ARG(opcode)) {
oparg = r_byte(p);
if (oparg == EOF) {
const char *e = "EOF read where oparg expected";
PyErr_SetString(PyExc_EOFError, e);
return NULL;
}
}
else {
oparg = 0;
}
assert(0x00 <= opcode && opcode < 0x100);
assert(0x00 <= oparg && oparg < 0x100);
buffer[i].opcode = opcode;
buffer[i++].oparg = oparg;
for (int j = 0; j < _PyOpcode_Caches[opcode]; j++) {
buffer[i].opcode = CACHE;
buffer[i++].oparg = 0;
}
}
if (i != size) {
const char *e = "bad marshal data (bytecode size incorrect)";
PyErr_SetString(PyExc_ValueError, e);
return NULL;
}
return bytecode;
}

/* allocate the reflist index for a new object. Return -1 on failure */
static Py_ssize_t
r_ref_reserve(int flag, RFILE *p)
Expand Down Expand Up @@ -1378,7 +1443,7 @@ r_object(RFILE *p)
flags = (int)r_long(p);
if (PyErr_Occurred())
goto code_error;
code = r_object(p);
code = r_bytecode(p);
if (code == NULL)
goto code_error;
consts = r_object(p);
Expand Down
4 changes: 2 additions & 2 deletions Tools/build/deepfreeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
from typing import Dict, FrozenSet, TextIO, Tuple

import umarshal
import opcode_for_build as opcode
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't a normal import so it shouldn't look like one. Maybe something like opcode = opcode_finder.get_opcodes()

from generate_global_objects import get_identifiers_and_strings

verbose = False
identifiers, strings = get_identifiers_and_strings()

# This must be kept in sync with opcode.py
RESUME = 151
RESUME = opcode.opmap["RESUME"]

def isprintable(b: bytes) -> bool:
return all(0x20 <= c < 0x7f for c in b)
Expand Down
17 changes: 17 additions & 0 deletions Tools/build/opcode_for_build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""
Parts of our build process (looking at you, deepfreeze) need the opcode module
for the Python *being built*, not the Python *doing the building*.

This basically just loads ../../Lib/opcode.py and re-exports everything:

>>> import opcode_for_build as opcode
"""

import os

_opcode_path = os.path.join(
os.path.dirname(__file__), os.pardir, os.pardir, "Lib", "opcode.py"
)
with open(_opcode_path, encoding="utf-8") as _opcode_file:
# Don't try this at home, kids:
exec(_opcode_file.read())
22 changes: 21 additions & 1 deletion Tools/build/umarshal.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Implementat marshal.loads() in pure Python

import ast
import opcode_for_build as opcode

from typing import Any, Tuple

Expand Down Expand Up @@ -47,6 +48,8 @@ class Type:
CO_FAST_CELL = 0x40
CO_FAST_FREE = 0x80

CACHE = opcode.opmap["CACHE"]


class Code:
def __init__(self, **kwds: Any):
Expand Down Expand Up @@ -178,6 +181,23 @@ def r_object(self) -> Any:
finally:
self.level = old_level

def r_bytecode(self) -> bytes:
nbytes = self.r_long() * 2
bytecode = bytearray()
while len(bytecode) < nbytes:
opcode_byte = self.r_byte()
if opcode.HAVE_ARGUMENT <= opcode_byte:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks weird. I'm so used to if x >= N that seeing if N <= x just feels wrong.

Suggested change
if opcode.HAVE_ARGUMENT <= opcode_byte:
if opcode_byte >= opcode.HAVE_ARGUMENT:

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const == variable is a C-ism to prevent accidentally writing = instead of ==. No need in Python.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a weird habit where I always use < and <= for comparisons, regardless of the placement of constants, etc. I guess I like the parallel with chained comparisons like lo <= x < hi, which almost always use < and <=.

I'll change it, though.

oparg_byte = self.r_byte()
else:
oparg_byte = 0
assert 0x00 <= opcode_byte < 0x100
assert 0x00 <= oparg_byte < 0x100
bytecode.extend([opcode_byte, oparg_byte])
for _ in range(opcode._inline_cache_entries[opcode_byte]):
bytecode.extend([CACHE, 0])
assert len(bytecode) == nbytes
return bytes(bytecode)

def _r_object(self) -> Any:
code = self.r_byte()
flag = code & FLAG_REF
Expand Down Expand Up @@ -279,7 +299,7 @@ def R_REF(obj: Any) -> Any:
retval.co_kwonlyargcount = self.r_long()
retval.co_stacksize = self.r_long()
retval.co_flags = self.r_long()
retval.co_code = self.r_object()
retval.co_code = self.r_bytecode()
retval.co_consts = self.r_object()
retval.co_names = self.r_object()
retval.co_localsplusnames = self.r_object()
Expand Down