-
-
Notifications
You must be signed in to change notification settings - Fork 34.6k
gh-105699: Use a _Py_hashtable_t for the PyModuleDef Cache #106974
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
3397777
670e632
485d130
77d5e8f
04a19c4
5fde127
818d828
e83b0cc
9e81cd5
430716b
647fbc2
bbe5eda
1d7cbd0
20c8b9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
|
|
||
| #include "Python.h" | ||
|
|
||
| #include "pycore_hashtable.h" // _Py_hashtable_new_full() | ||
| #include "pycore_import.h" // _PyImport_BootstrapImp() | ||
| #include "pycore_initconfig.h" // _PyStatus_OK() | ||
| #include "pycore_interp.h" // struct _import_runtime_state | ||
|
|
@@ -916,13 +917,28 @@ extensions_lock_release(void) | |
| dictionary, to avoid loading shared libraries twice. | ||
| */ | ||
|
|
||
| static Py_uhash_t | ||
| hashtable_hash_str(const void *key) | ||
| { | ||
| return _Py_HashBytes(key, strlen((const char *)key)); | ||
| } | ||
|
|
||
| static int | ||
| hashtable_compare_str(const void *key1, const void *key2) | ||
| { | ||
| return strcmp((const char *)key1, (const char *)key2) == 0; | ||
| } | ||
|
|
||
| static void | ||
| _extensions_cache_init(void) | ||
| hashtable_destroy_str(void *ptr) | ||
| { | ||
| /* The runtime (i.e. main interpreter) must be initializing, | ||
| so we don't need to worry about the lock. */ | ||
| _PyThreadState_InitDetached(&EXTENSIONS.main_tstate, | ||
| _PyInterpreterState_Main()); | ||
| PyMem_RawFree(ptr); | ||
| } | ||
|
|
||
| static void | ||
| hashtable_destroy_str_table(void *ptr) | ||
| { | ||
| _Py_hashtable_destroy((_Py_hashtable_t *)ptr); | ||
| } | ||
|
|
||
| static PyModuleDef * | ||
|
|
@@ -931,19 +947,24 @@ _extensions_cache_get(PyObject *filename, PyObject *name) | |
| PyModuleDef *def = NULL; | ||
| extensions_lock_acquire(); | ||
|
|
||
| PyObject *key = PyTuple_Pack(2, filename, name); | ||
| if (key == NULL) { | ||
| if (EXTENSIONS.hashtable == NULL) { | ||
| goto finally; | ||
| } | ||
|
|
||
| PyObject *extensions = EXTENSIONS.dict; | ||
| if (extensions == NULL) { | ||
| _Py_hashtable_entry_t *entry; | ||
| entry = _Py_hashtable_get_entry(EXTENSIONS.hashtable, | ||
| PyUnicode_AsUTF8(filename)); | ||
| if (entry == NULL) { | ||
| goto finally; | ||
| } | ||
| entry = _Py_hashtable_get_entry((_Py_hashtable_t *)entry->value, | ||
| PyUnicode_AsUTF8(name)); | ||
| if (entry == NULL) { | ||
| goto finally; | ||
| } | ||
| def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); | ||
| def = (PyModuleDef *)entry->value; | ||
|
|
||
| finally: | ||
| Py_XDECREF(key); | ||
| extensions_lock_release(); | ||
| return def; | ||
| } | ||
|
|
@@ -952,134 +973,125 @@ static int | |
| _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) | ||
| { | ||
| int res = -1; | ||
| PyThreadState *oldts = NULL; | ||
| extensions_lock_acquire(); | ||
|
|
||
| /* Swap to the main interpreter, if necessary. This matters if | ||
| the dict hasn't been created yet or if the item isn't in the | ||
| dict yet. In both cases we must ensure the relevant objects | ||
| are created using the main interpreter. */ | ||
| PyThreadState *main_tstate = &EXTENSIONS.main_tstate; | ||
| PyInterpreterState *interp = _PyInterpreterState_GET(); | ||
| if (!_Py_IsMainInterpreter(interp)) { | ||
| _PyThreadState_BindDetached(main_tstate); | ||
| oldts = _PyThreadState_Swap(interp->runtime, main_tstate); | ||
| assert(!_Py_IsMainInterpreter(oldts->interp)); | ||
|
|
||
| /* Make sure the name and filename objects are owned | ||
| by the main interpreter. */ | ||
| name = PyUnicode_InternFromString(PyUnicode_AsUTF8(name)); | ||
| assert(name != NULL); | ||
| filename = PyUnicode_InternFromString(PyUnicode_AsUTF8(filename)); | ||
| assert(filename != NULL); | ||
| if (EXTENSIONS.hashtable == NULL) { | ||
| EXTENSIONS.hashtable = _Py_hashtable_new_full( | ||
| hashtable_hash_str, | ||
| hashtable_compare_str, | ||
| hashtable_destroy_str, | ||
| hashtable_destroy_str_table, | ||
| NULL | ||
| ); | ||
| if (EXTENSIONS.hashtable == NULL) { | ||
| goto finally; | ||
| } | ||
| } | ||
|
|
||
| PyObject *key = PyTuple_Pack(2, filename, name); | ||
| if (key == NULL) { | ||
| goto finally; | ||
| _Py_hashtable_t *byname; | ||
| _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( | ||
| EXTENSIONS.hashtable, PyUnicode_AsUTF8(filename)); | ||
| if (entry == NULL) { | ||
| byname = _Py_hashtable_new_full( | ||
| hashtable_hash_str, | ||
| hashtable_compare_str, | ||
| hashtable_destroy_str, | ||
| NULL, | ||
| NULL | ||
| ); | ||
| if (byname == NULL) { | ||
| goto finally; | ||
| } | ||
| char *filenamestr = PyMem_RawMalloc( | ||
| strlen(PyUnicode_AsUTF8(filename)) + 1); | ||
| if (filenamestr == NULL) { | ||
| _Py_hashtable_destroy(byname); | ||
| goto finally; | ||
| } | ||
| strncpy(filenamestr, PyUnicode_AsUTF8(filename), | ||
| strlen(PyUnicode_AsUTF8(filename)) + 1); | ||
| if (_Py_hashtable_set(EXTENSIONS.hashtable, filenamestr, byname) < 0) { | ||
| _Py_hashtable_destroy(byname); | ||
| PyMem_RawFree(filenamestr); | ||
| goto finally; | ||
| } | ||
| } | ||
|
|
||
| PyObject *extensions = EXTENSIONS.dict; | ||
| if (extensions == NULL) { | ||
| extensions = PyDict_New(); | ||
| if (extensions == NULL) { | ||
| else { | ||
| byname = (_Py_hashtable_t *)entry->value; | ||
| _Py_hashtable_entry_t *subentry = _Py_hashtable_get_entry( | ||
| byname, PyUnicode_AsUTF8(name)); | ||
| if (subentry != NULL) { | ||
| if (subentry->value == NULL) { | ||
| subentry->value = Py_NewRef(def); | ||
| } | ||
| else { | ||
| /* We expect it to be static, so it must be the same pointer. */ | ||
| assert((PyModuleDef *)subentry->value == def); | ||
| } | ||
| res = 0; | ||
| goto finally; | ||
| } | ||
| EXTENSIONS.dict = extensions; | ||
| } | ||
|
|
||
| PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); | ||
| if (PyErr_Occurred()) { | ||
| char *namestr = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(name)) + 1); | ||
| if (namestr == NULL) { | ||
| goto finally; | ||
| } | ||
| else if (actual != NULL) { | ||
| /* We expect it to be static, so it must be the same pointer. */ | ||
| assert(def == actual); | ||
| res = 0; | ||
| goto finally; | ||
| } | ||
|
|
||
| /* This might trigger a resize, which is why we must switch | ||
| to the main interpreter. */ | ||
| res = PyDict_SetItem(extensions, key, (PyObject *)def); | ||
| if (res < 0) { | ||
| res = -1; | ||
| strncpy(namestr, PyUnicode_AsUTF8(name), strlen(PyUnicode_AsUTF8(name)) + 1); | ||
| if (_Py_hashtable_set(byname, namestr, Py_NewRef(def)) < 0) { | ||
| PyMem_RawFree(namestr); | ||
| goto finally; | ||
| } | ||
| res = 0; | ||
|
|
||
| finally: | ||
| Py_XDECREF(key); | ||
| if (oldts != NULL) { | ||
| _PyThreadState_Swap(interp->runtime, oldts); | ||
| _PyThreadState_UnbindDetached(main_tstate); | ||
| Py_DECREF(name); | ||
| Py_DECREF(filename); | ||
| } | ||
| extensions_lock_release(); | ||
| return res; | ||
| } | ||
|
|
||
| static int | ||
| static void | ||
| _extensions_cache_delete(PyObject *filename, PyObject *name) | ||
| { | ||
| int res = -1; | ||
| PyThreadState *oldts = NULL; | ||
| extensions_lock_acquire(); | ||
|
|
||
| PyObject *key = PyTuple_Pack(2, filename, name); | ||
| if (key == NULL) { | ||
| goto finally; | ||
| } | ||
|
|
||
| PyObject *extensions = EXTENSIONS.dict; | ||
| if (extensions == NULL) { | ||
| res = 0; | ||
| if (EXTENSIONS.hashtable == NULL) { | ||
| /* It was never added. */ | ||
| goto finally; | ||
| } | ||
|
|
||
| PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); | ||
| if (PyErr_Occurred()) { | ||
| _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( | ||
| EXTENSIONS.hashtable, PyUnicode_AsUTF8(filename)); | ||
| if (entry == NULL) { | ||
| /* It was never added. */ | ||
| goto finally; | ||
| } | ||
| else if (actual == NULL) { | ||
| /* It was already removed or never added. */ | ||
| res = 0; | ||
| _Py_hashtable_entry_t *subentry = _Py_hashtable_get_entry( | ||
| (_Py_hashtable_t *)entry->value, PyUnicode_AsUTF8(name)); | ||
| if (subentry == NULL) { | ||
| /* It never added. */ | ||
| goto finally; | ||
| } | ||
|
|
||
| /* Swap to the main interpreter, if necessary. */ | ||
| PyThreadState *main_tstate = &EXTENSIONS.main_tstate; | ||
| PyInterpreterState *interp = _PyInterpreterState_GET(); | ||
| if (!_Py_IsMainInterpreter(interp)) { | ||
| _PyThreadState_BindDetached(main_tstate); | ||
| oldts = _PyThreadState_Swap(interp->runtime, main_tstate); | ||
| assert(!_Py_IsMainInterpreter(oldts->interp)); | ||
| } | ||
|
|
||
| if (PyDict_DelItem(extensions, key) < 0) { | ||
| if (subentry->value == NULL) { | ||
| /* It was already removed. */ | ||
| goto finally; | ||
| } | ||
| res = 0; | ||
| /* This decref would be problematic if the module def were | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That brings up a good point: shouldn't the PyModuleDefs placed in the shared cache be made immortal? Otherwise how can they sensibly be shared at all?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense. We don't use module defs like the usual objects and generally assume they are statically defined, but it would be good to play it safe.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| dynamically allocated, it were the last ref, and this function | ||
| were called with an interpreter other than the def's owner. */ | ||
| Py_DECREF(subentry->value); | ||
| subentry->value = NULL; | ||
|
|
||
| finally: | ||
| if (oldts != NULL) { | ||
| _PyThreadState_Swap(interp->runtime, oldts); | ||
| _PyThreadState_UnbindDetached(main_tstate); | ||
| } | ||
| Py_XDECREF(key); | ||
| extensions_lock_release(); | ||
| return res; | ||
| } | ||
|
|
||
| static void | ||
| _extensions_cache_clear_all(void) | ||
| { | ||
| /* The runtime (i.e. main interpreter) must be finalizing, | ||
| so we don't need to worry about the lock. */ | ||
| // XXX assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); | ||
| Py_CLEAR(EXTENSIONS.dict); | ||
| _PyThreadState_ClearDetached(&EXTENSIONS.main_tstate); | ||
| _Py_hashtable_destroy(EXTENSIONS.hashtable); | ||
| EXTENSIONS.hashtable = NULL; | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -1233,6 +1245,8 @@ import_find_extension(PyThreadState *tstate, PyObject *name, | |
| PyObject *m_copy = def->m_base.m_copy; | ||
| /* Module does not support repeated initialization */ | ||
| if (m_copy == NULL) { | ||
| /* It might be a core module (e.g. sys & builtins), | ||
| for which we don't set m_copy. */ | ||
| m_copy = get_core_module_dict(tstate->interp, name, filename); | ||
| if (m_copy == NULL) { | ||
| return NULL; | ||
|
|
@@ -1302,9 +1316,7 @@ clear_singlephase_extension(PyInterpreterState *interp, | |
| } | ||
|
|
||
| /* Clear the cached module def. */ | ||
| if (_extensions_cache_delete(filename, name) < 0) { | ||
| return -1; | ||
| } | ||
| _extensions_cache_delete(filename, name); | ||
|
|
||
| return 0; | ||
| } | ||
|
|
@@ -3053,6 +3065,8 @@ void | |
| _PyImport_Fini(void) | ||
| { | ||
| /* Destroy the database used by _PyImport_{Fixup,Find}Extension */ | ||
| // XXX Should we actually leave them (mostly) intact, since we don't | ||
| // ever dlclose() the module files? | ||
| _extensions_cache_clear_all(); | ||
|
|
||
| /* Use the same memory allocator as _PyImport_Init(). */ | ||
|
|
@@ -3090,10 +3104,6 @@ _PyImport_Fini2(void) | |
| PyStatus | ||
| _PyImport_InitCore(PyThreadState *tstate, PyObject *sysmod, int importlib) | ||
| { | ||
| if (_Py_IsMainInterpreter(tstate->interp)) { | ||
| _extensions_cache_init(); | ||
| } | ||
|
|
||
| // XXX Initialize here: interp->modules and interp->import_func. | ||
| // XXX Initialize here: sys.modules and sys.meta_path. | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.