@@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
467467)
468468# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
469469
470+ NO_QUERY_PLUS_SCHEMES = set ()
470471
471- def register_scheme (text , uses_netloc = True , default_port = None ):
472- # type: (Text, bool, Optional[int]) -> None
472+
473+ def register_scheme (
474+ text , uses_netloc = True , default_port = None , query_plus_is_space = True
475+ ):
476+ # type: (Text, bool, Optional[int], bool) -> None
473477 """Registers new scheme information, resulting in correct port and
474478 slash behavior from the URL object. There are dozens of standard
475479 schemes preregistered, so this function is mostly meant for
@@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
485489 not. Defaults to True.
486490 default_port: The default port, if any, for
487491 netloc-using schemes.
492+ query_plus_is_space: If true, a "+" in the query string should be
493+ decoded as a space by DecodedURL.
488494
489495 .. _file an issue: https://github.com/mahmoud/hyperlink/issues
490496 """
@@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
510516 else :
511517 raise ValueError ("uses_netloc expected bool, not: %r" % uses_netloc )
512518
519+ if not query_plus_is_space :
520+ NO_QUERY_PLUS_SCHEMES .add (text )
521+
513522 return
514523
515524
@@ -1998,6 +2007,9 @@ class DecodedURL(object):
19982007 lazy: Set to True to avoid pre-decode all parts of the URL to check for
19992008 validity.
20002009 Defaults to False.
2010+ query_plus_is_space: + characters in the query string should be treated
2011+ as spaces when decoding. If unspecified, the default is taken from
2012+ the scheme.
20012013
20022014 .. note::
20032015
@@ -2012,18 +2024,21 @@ class DecodedURL(object):
20122024 .. versionadded:: 18.0.0
20132025 """
20142026
2015- def __init__ (self , url = _EMPTY_URL , lazy = False ):
2016- # type: (URL, bool) -> None
2027+ def __init__ (self , url = _EMPTY_URL , lazy = False , query_plus_is_space = None ):
2028+ # type: (URL, bool, Optional[bool] ) -> None
20172029 self ._url = url
2030+ if query_plus_is_space is None :
2031+ query_plus_is_space = url .scheme not in NO_QUERY_PLUS_SCHEMES
2032+ self ._query_plus_is_space = query_plus_is_space
20182033 if not lazy :
20192034 # cache the following, while triggering any decoding
20202035 # issues with decodable fields
20212036 self .host , self .userinfo , self .path , self .query , self .fragment
20222037 return
20232038
20242039 @classmethod
2025- def from_text (cls , text , lazy = False ):
2026- # type: (Text, bool) -> DecodedURL
2040+ def from_text (cls , text , lazy = False , query_plus_is_space = None ):
2041+ # type: (Text, bool, Optional[bool] ) -> DecodedURL
20272042 """\
20282043 Make a `DecodedURL` instance from any text string containing a URL.
20292044
@@ -2034,7 +2049,7 @@ def from_text(cls, text, lazy=False):
20342049 Defaults to True.
20352050 """
20362051 _url = URL .from_text (text )
2037- return cls (_url , lazy = lazy )
2052+ return cls (_url , lazy = lazy , query_plus_is_space = query_plus_is_space )
20382053
20392054 @property
20402055 def encoded_url (self ):
@@ -2059,22 +2074,34 @@ def to_iri(self):
20592074 "Passthrough to :meth:`~hyperlink.URL.to_iri()`"
20602075 return self ._url .to_iri ()
20612076
2077+ def _clone (self , url ):
2078+ # type: (URL) -> DecodedURL
2079+ return self .__class__ (
2080+ url ,
2081+ # TODO: propagate laziness?
2082+ query_plus_is_space = self ._query_plus_is_space ,
2083+ )
2084+
20622085 def click (self , href = u"" ):
20632086 # type: (Union[Text, URL, DecodedURL]) -> DecodedURL
20642087 """Return a new DecodedURL wrapping the result of
20652088 :meth:`~hyperlink.URL.click()`
20662089 """
20672090 if isinstance (href , DecodedURL ):
20682091 href = href ._url
2069- return self .__class__ (self ._url .click (href = href ))
2092+ return self ._clone (
2093+ self ._url .click (href = href ),
2094+ )
20702095
20712096 def sibling (self , segment ):
20722097 # type: (Text) -> DecodedURL
20732098 """Automatically encode any reserved characters in *segment* and
20742099 return a new `DecodedURL` wrapping the result of
20752100 :meth:`~hyperlink.URL.sibling()`
20762101 """
2077- return self .__class__ (self ._url .sibling (_encode_reserved (segment )))
2102+ return self ._clone (
2103+ self ._url .sibling (_encode_reserved (segment )),
2104+ )
20782105
20792106 def child (self , * segments ):
20802107 # type: (Text) -> DecodedURL
@@ -2085,7 +2112,7 @@ def child(self, *segments):
20852112 if not segments :
20862113 return self
20872114 new_segs = [_encode_reserved (s ) for s in segments ]
2088- return self .__class__ (self ._url .child (* new_segs ))
2115+ return self ._clone (self ._url .child (* new_segs ))
20892116
20902117 def normalize (
20912118 self ,
@@ -2101,7 +2128,7 @@ def normalize(
21012128 """Return a new `DecodedURL` wrapping the result of
21022129 :meth:`~hyperlink.URL.normalize()`
21032130 """
2104- return self .__class__ (
2131+ return self ._clone (
21052132 self ._url .normalize (
21062133 scheme , host , path , query , fragment , userinfo , percents
21072134 )
@@ -2148,11 +2175,18 @@ def path(self):
21482175 def query (self ):
21492176 # type: () -> QueryPairs
21502177 if not hasattr (self , "_query" ):
2178+ if self ._query_plus_is_space :
2179+ predecode = lambda x : x .replace ("+" , "%20" )
2180+ else :
2181+ predecode = lambda x : x
2182+
21512183 self ._query = cast (
21522184 QueryPairs ,
21532185 tuple (
21542186 tuple (
2155- _percent_decode (x , raise_subencoding_exc = True )
2187+ _percent_decode (
2188+ predecode (x ), raise_subencoding_exc = True
2189+ )
21562190 if x is not None
21572191 else None
21582192 for x in (k , v )
@@ -2248,7 +2282,7 @@ def replace(
22482282 userinfo = userinfo_text ,
22492283 uses_netloc = uses_netloc ,
22502284 )
2251- return self .__class__ (url = new_url )
2285+ return self ._clone (url = new_url )
22522286
22532287 def get (self , name ):
22542288 # type: (Text) -> List[Optional[Text]]
0 commit comments