Skip to content

Commit e5bb5ff

Browse files
authored
Importer fixes (#1431)
1 parent 3247281 commit e5bb5ff

11 files changed

Lines changed: 193 additions & 142 deletions

File tree

product/runtime/docs/sphinx/changelog.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Bugfixes
2525

2626
- .pth files are now processed after `sys.path` is fully initialized. (`#1338
2727
<https://github.com/chaquo/chaquopy/issues/1338>`__)
28-
- The importer now recognizes libraries with an SOABI suffix such as
28+
- The importer now recognizes modules with an SOABI suffix such as
2929
`.cpython-313-aarch64-linux-android.so`. (`#1370
3030
<https://github.com/chaquo/chaquopy/issues/1370>`__)
3131

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improved compatibility with various ways of bundling non-Python libraries in a wheel.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The importer now matches the standard behavior of preferring an .so file over a .py file of the same name.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improved compatibility with various ways of bundling non-Python libraries in a wheel.

product/runtime/src/main/python/java/android/importer.py

Lines changed: 64 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,12 @@ def hook(path):
8282
assert isinstance(finder, AssetFinder), ("Finder for '{}' is {}"
8383
.format(entry, type(finder).__name__))
8484

85-
# Extract data files from the root directory. This includes .pth files, which will be
86-
# read by addsitedir below.
85+
# Extract necessary files from the root directory. This includes .pth files,
86+
# which will be read by addsitedir below.
8787
finder.extract_dir("", recursive=False)
8888

89-
# Extract data files from top-level directories which aren't Python packages.
89+
# Extract necessary files from top-level directories which aren't Python
90+
# packages or dist-info directories.
9091
for name in finder.listdir(""):
9192
if finder.isdir(name) and \
9293
not is_dist_info(name) and \
@@ -135,14 +136,17 @@ def initialize_ctypes():
135136
def find_library_override(name):
136137
filename = "lib{}.so".format(name)
137138

138-
# First look in the requirements. The return value will probably be passed to
139-
# CDLL_init_override below, but the caller may load the library using another
140-
# API (e.g. soundfile uses ffi.dlopen), so make sure its dependencies are
141-
# extracted and pre-loaded.
139+
# First look in the requirements.
142140
try:
143-
return reqs_finder.extract_lib(filename)
141+
filename = reqs_finder.find_lib(filename)
144142
except FileNotFoundError:
145143
pass
144+
else:
145+
# The return value will probably be passed to CDLL_init_override below, but
146+
# the caller may load the library in another way (e.g. soundfile uses
147+
# ffi.dlopen), so make sure its dependencies are pre-loaded.
148+
load_needed(filename)
149+
return filename
146150

147151
# For system libraries I can't see any easy way of finding the absolute library
148152
# filename, but we can at least support the case where the user passes the return value
@@ -159,15 +163,12 @@ def CDLL_init_override(self, name, *args, **kwargs):
159163
if name: # CDLL(None) is equivalent to dlopen(NULL).
160164
if "/" not in name:
161165
try:
162-
name = reqs_finder.extract_lib(name)
166+
name = reqs_finder.find_lib(name)
163167
except FileNotFoundError:
164168
pass
165-
else:
166-
# Some packages (e.g. llvmlite) use CDLL to load libraries from their own
167-
# directories.
168-
finder = get_importer(dirname(name))
169-
if isinstance(finder, AssetFinder):
170-
name = finder.extract_so(name)
169+
170+
if exists(name):
171+
load_needed(name)
171172

172173
CDLL_init_original(self, name, *args, **kwargs)
173174

@@ -521,16 +522,12 @@ def iter_modules(self, prefix=""):
521522
if mod_base_name and (mod_base_name != "__init__"):
522523
yield prefix + mod_base_name, False
523524

524-
# If this method raises FileNotFoundError, then maybe it's a system library, or one of the
525-
# libraries loaded by AndroidPlatform.loadNativeLibs. If the library is truly missing,
526-
# we'll get an exception when we load the file that needs it.
527-
def extract_lib(self, filename):
528-
return self.extract_so(f"chaquopy/lib/{filename}")
529-
530-
def extract_so(self, path):
531-
path = self.extract_if_changed(self.zip_path(path))
532-
load_needed(self, path)
533-
return path
525+
def find_lib(self, filename):
526+
zip_path = f"chaquopy/lib/{filename}"
527+
if self.exists(zip_path):
528+
return join(self.extract_root, zip_path)
529+
else:
530+
raise FileNotFoundError(zip_path)
534531

535532
def extract_dir(self, zip_dir, recursive=True):
536533
dotted_dir = zip_dir.replace("/", ".")
@@ -542,9 +539,9 @@ def extract_dir(self, zip_dir, recursive=True):
542539
if self.isdir(zip_path):
543540
if recursive:
544541
self.extract_dir(zip_path)
545-
elif (extract_package and filename.endswith(".py")
546-
or not (any(filename.endswith(suffix) for suffix in LOADERS) or
547-
re.search(r"^lib.*\.so\.", filename))): # e.g. libgfortran
542+
# For performance, we don't extract any Python modules unless their
543+
# containing package is listed in extract_packages.
544+
elif extract_package or not filename.endswith(PYTHON_SUFFIXES):
548545
self.extract_if_changed(zip_path)
549546

550547
def extract_if_changed(self, zip_path):
@@ -715,34 +712,46 @@ def get_code(self, fullname):
715712

716713
class ExtensionAssetLoader(AssetLoader, machinery.ExtensionFileLoader):
717714
def create_module(self, spec):
718-
self.finder.extract_so(self.path)
715+
self.finder.extract_if_changed(self.finder.zip_path(self.path))
716+
load_needed(self.path)
719717
return super().create_module(spec)
720718

721719

722720
needed_lock = RLock()
723721
needed_loaded = {}
724722

725-
# CDLL will cause a recursive call back to extract_so, so there's no need for any additional
726-
# recursion here. If we return to executables in the future, we can implement a separate
727-
# recursive extraction on top of get_needed.
728-
def load_needed(finder, path):
723+
# Load any libraries in chaquopy/lib which are needed by the .so file at the given path.
724+
#
725+
# RTLD_GLOBAL and RTLD_LOCAL behave a bit differently on Android compared to Linux:
726+
#
727+
# * Regardless of the mode, dlopening a library is sufficient to make it available to
728+
# other libraries which use its SONAME in a DT_NEEDED entry.
729+
#
730+
# * Regardless of the mode, the library's symbols are NOT implicitly available to other
731+
# libraries which don't list it in DT_NEEDED. But this may change in the future, so
732+
# it's safer for us to use the default of RTLD_LOCAL, which should avoid conflicts
733+
# between multiple libraries defining the same symbol.
734+
#
735+
# * RTLD_GLOBAL makes the library's symbols available to explicit searches of other
736+
# libraries using dlsym, but that probably isn't relevant to us.
737+
#
738+
# Sources:
739+
# * https://github.com/android/ndk/issues/1244#issuecomment-620310397
740+
# * https://android.googlesource.com/platform/bionic/+/master/android-changes-for-ndk-developers.md
741+
def load_needed(path):
729742
with needed_lock:
730743
for soname in get_needed(path):
731744
if soname not in needed_loaded:
732745
try:
733-
# Before API level 23, the only dlopen mode was RTLD_GLOBAL, and
734-
# RTLD_LOCAL was ignored. From API level 23, RTLD_LOCAL is available
735-
# and used by default, just like in Linux
736-
# (https://android.googlesource.com/platform/bionic/+/master/android-changes-for-ndk-developers.md).
737-
#
738-
# We use RTLD_GLOBAL to make the library's symbols available to
739-
# subsequently-loaded libraries, but this may not actually work -
740-
# see #728.
746+
# Whether the library is closed when the CDLL object is garbage
747+
# collected is not documented, so keep a reference for safety.
741748
#
742-
# It doesn't look like the library is closed when the CDLL object is garbage
743-
# collected, but this isn't documented, so keep a reference for safety.
744-
needed_loaded[soname] = ctypes.CDLL(soname, ctypes.RTLD_GLOBAL)
745-
except FileNotFoundError:
749+
# CDLL will recursively call load_needed if necessary.
750+
needed_loaded[soname] = ctypes.CDLL(soname)
751+
except OSError:
752+
# It's not in chaquopy/lib, but maybe it can be found in some other
753+
# way, such as DT_RUNPATH. If it's truly missing, we'll get an
754+
# exception when we load the file that needs it.
746755
needed_loaded[soname] = None
747756

748757

@@ -759,12 +768,20 @@ def get_needed(path):
759768

760769
# Suffixes are in order of preference.
761770
LOADERS = {
771+
# .so files should come first, to match the standard finder. For example, pyzmq
772+
# 27.1.0 depends on this (see zmq/backend/cython/_zmq.py).
773+
**{suffix: ExtensionAssetLoader for suffix in _imp.extension_suffixes()},
774+
762775
# .pyc should be preferred over .py, because it'll load faster.
763776
".pyc": SourcelessAssetLoader,
764777
".py": SourceAssetLoader,
765778
}
766-
for suffix in _imp.extension_suffixes():
767-
LOADERS[suffix] = ExtensionAssetLoader
779+
780+
# If a filename ends with .so, without any .cpython or .abi3 marker, then we can't
781+
# distinguish it from a non-Python library, so we must eagerly extract it.
782+
PYTHON_SUFFIXES = tuple(
783+
suffix for suffix in LOADERS if suffix != ".so"
784+
)
768785

769786

770787
class AssetZipFile(ZipFile):

0 commit comments

Comments
 (0)