Skip to content

Commit b297e0f

Browse files
committed
make hyperlink handle + like an HTML form post by default
1 parent e5cd7e2 commit b297e0f

File tree

3 files changed

+72
-14
lines changed

3 files changed

+72
-14
lines changed

src/hyperlink/_url.py

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
467467
)
468468
# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
469469

470+
NO_QUERY_PLUS_SCHEMES = set()
470471

471-
def register_scheme(text, uses_netloc=True, default_port=None):
472-
# type: (Text, bool, Optional[int]) -> None
472+
473+
def register_scheme(
474+
text, uses_netloc=True, default_port=None, query_plus_is_space=True
475+
):
476+
# type: (Text, bool, Optional[int], bool) -> None
473477
"""Registers new scheme information, resulting in correct port and
474478
slash behavior from the URL object. There are dozens of standard
475479
schemes preregistered, so this function is mostly meant for
@@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
485489
not. Defaults to True.
486490
default_port: The default port, if any, for
487491
netloc-using schemes.
492+
query_plus_is_space: If true, a "+" in the query string should be
493+
decoded as a space by DecodedURL.
488494
489495
.. _file an issue: https://github.com/mahmoud/hyperlink/issues
490496
"""
@@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
510516
else:
511517
raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)
512518

519+
if not query_plus_is_space:
520+
NO_QUERY_PLUS_SCHEMES.add(text)
521+
513522
return
514523

515524

@@ -1998,6 +2007,9 @@ class DecodedURL(object):
19982007
lazy: Set to True to avoid pre-decode all parts of the URL to check for
19992008
validity.
20002009
Defaults to False.
2010+
query_plus_is_space: + characters in the query string should be treated
2011+
as spaces when decoding. If unspecified, the default is taken from
2012+
the scheme.
20012013
20022014
.. note::
20032015
@@ -2012,18 +2024,21 @@ class DecodedURL(object):
20122024
.. versionadded:: 18.0.0
20132025
"""
20142026

2015-
def __init__(self, url=_EMPTY_URL, lazy=False):
2016-
# type: (URL, bool) -> None
2027+
def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
2028+
# type: (URL, bool, Optional[bool]) -> None
20172029
self._url = url
2030+
if query_plus_is_space is None:
2031+
query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
2032+
self._query_plus_is_space = query_plus_is_space
20182033
if not lazy:
20192034
# cache the following, while triggering any decoding
20202035
# issues with decodable fields
20212036
self.host, self.userinfo, self.path, self.query, self.fragment
20222037
return
20232038

20242039
@classmethod
2025-
def from_text(cls, text, lazy=False):
2026-
# type: (Text, bool) -> DecodedURL
2040+
def from_text(cls, text, lazy=False, query_plus_is_space=None):
2041+
# type: (Text, bool, Optional[bool]) -> DecodedURL
20272042
"""\
20282043
Make a `DecodedURL` instance from any text string containing a URL.
20292044
@@ -2034,7 +2049,7 @@ def from_text(cls, text, lazy=False):
20342049
Defaults to True.
20352050
"""
20362051
_url = URL.from_text(text)
2037-
return cls(_url, lazy=lazy)
2052+
return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)
20382053

20392054
@property
20402055
def encoded_url(self):
@@ -2059,22 +2074,34 @@ def to_iri(self):
20592074
"Passthrough to :meth:`~hyperlink.URL.to_iri()`"
20602075
return self._url.to_iri()
20612076

2077+
def _clone(self, url):
2078+
# type: (URL) -> DecodedURL
2079+
return self.__class__(
2080+
url,
2081+
# TODO: propagate laziness?
2082+
query_plus_is_space=self._query_plus_is_space,
2083+
)
2084+
20622085
def click(self, href=u""):
20632086
# type: (Union[Text, URL, DecodedURL]) -> DecodedURL
20642087
"""Return a new DecodedURL wrapping the result of
20652088
:meth:`~hyperlink.URL.click()`
20662089
"""
20672090
if isinstance(href, DecodedURL):
20682091
href = href._url
2069-
return self.__class__(self._url.click(href=href))
2092+
return self._clone(
2093+
self._url.click(href=href),
2094+
)
20702095

20712096
def sibling(self, segment):
20722097
# type: (Text) -> DecodedURL
20732098
"""Automatically encode any reserved characters in *segment* and
20742099
return a new `DecodedURL` wrapping the result of
20752100
:meth:`~hyperlink.URL.sibling()`
20762101
"""
2077-
return self.__class__(self._url.sibling(_encode_reserved(segment)))
2102+
return self._clone(
2103+
self._url.sibling(_encode_reserved(segment)),
2104+
)
20782105

20792106
def child(self, *segments):
20802107
# type: (Text) -> DecodedURL
@@ -2085,7 +2112,7 @@ def child(self, *segments):
20852112
if not segments:
20862113
return self
20872114
new_segs = [_encode_reserved(s) for s in segments]
2088-
return self.__class__(self._url.child(*new_segs))
2115+
return self._clone(self._url.child(*new_segs))
20892116

20902117
def normalize(
20912118
self,
@@ -2101,7 +2128,7 @@ def normalize(
21012128
"""Return a new `DecodedURL` wrapping the result of
21022129
:meth:`~hyperlink.URL.normalize()`
21032130
"""
2104-
return self.__class__(
2131+
return self._clone(
21052132
self._url.normalize(
21062133
scheme, host, path, query, fragment, userinfo, percents
21072134
)
@@ -2148,11 +2175,18 @@ def path(self):
21482175
def query(self):
21492176
# type: () -> QueryPairs
21502177
if not hasattr(self, "_query"):
2178+
if self._query_plus_is_space:
2179+
predecode = lambda x: x.replace("+", "%20")
2180+
else:
2181+
predecode = lambda x: x
2182+
21512183
self._query = cast(
21522184
QueryPairs,
21532185
tuple(
21542186
tuple(
2155-
_percent_decode(x, raise_subencoding_exc=True)
2187+
_percent_decode(
2188+
predecode(x), raise_subencoding_exc=True
2189+
)
21562190
if x is not None
21572191
else None
21582192
for x in (k, v)
@@ -2248,7 +2282,7 @@ def replace(
22482282
userinfo=userinfo_text,
22492283
uses_netloc=uses_netloc,
22502284
)
2251-
return self.__class__(url=new_url)
2285+
return self._clone(url=new_url)
22522286

22532287
def get(self, name):
22542288
# type: (Text) -> List[Optional[Text]]

src/hyperlink/test/test_decoded_url.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,3 +210,19 @@ def test_click_decoded_url(self):
210210
assert clicked.host == durl.host
211211
assert clicked.path == durl_dest.path
212212
assert clicked.path == ("tëst",)
213+
214+
def test_decode_plus(self):
215+
# type: () -> None
216+
durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B")
217+
assert durl.path == ("x+y+",)
218+
assert durl.get("a") == ["b c+"]
219+
assert durl.query == (("a", "b c+"),)
220+
221+
def test_decode_nonplussed(self):
222+
# type: () -> None
223+
durl = DecodedURL.from_text(
224+
"/x+y%2B?a=b+c%2B", query_plus_is_space=False
225+
)
226+
assert durl.path == ("x+y+",)
227+
assert durl.get("a") == ["b+c+"]
228+
assert durl.query == (("a", "b+c+"),)

src/hyperlink/test/test_scheme_registration.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from .. import _url
77
from .common import HyperlinkTestCase
8-
from .._url import register_scheme, URL
8+
from .._url import register_scheme, URL, DecodedURL
99

1010

1111
class TestSchemeRegistration(HyperlinkTestCase):
@@ -70,3 +70,11 @@ def test_register_invalid_port(self):
7070
# type: () -> None
7171
with self.assertRaises(ValueError):
7272
register_scheme("nope", default_port=cast(bool, object()))
73+
74+
def test_register_no_quote_plus_scheme(self):
75+
# type: () -> None
76+
register_scheme("keepplus", query_plus_is_space=False)
77+
DecodedURL.from_text("keepplus://heyoo/?q=a+b")
78+
plus_is_space = DecodedURL.from_text(
79+
"https://abc.xyz/x+y%2B?a=b+c%2B", query_plus_is_space=False
80+
)

0 commit comments

Comments
 (0)