-
-
Notifications
You must be signed in to change notification settings - Fork 34.8k
bpo-34482: Add tests for proper handling of non-UTF-8-encodable strin… #8878
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
97037bd
a7e435d
4603f7e
beca2bd
51bd826
f24c13c
faadbb9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -302,6 +302,8 @@ def test_tzname(self): | |
| self.assertEqual('UTC+09:30', timezone(9.5 * HOUR).tzname(None)) | ||
| self.assertEqual('UTC-00:01', timezone(timedelta(minutes=-1)).tzname(None)) | ||
| self.assertEqual('XYZ', timezone(-5 * HOUR, 'XYZ').tzname(None)) | ||
| # bpo-34482: Check that surrogates are handled properly. | ||
| self.assertEqual('\ud800', timezone(ZERO, '\ud800').tzname(None)) | ||
|
|
||
| # Sub-minute offsets: | ||
| self.assertEqual('UTC+01:06:40', timezone(timedelta(0, 4000)).tzname(None)) | ||
|
|
@@ -1307,6 +1309,14 @@ def test_strftime(self): | |
| except ValueError: | ||
| pass | ||
|
|
||
| # bpo-34482: Check that surrogates don't cause a crash. | ||
| # FIXME: The C datetime implementation raises an exception | ||
| # while the pure-Python one succeeds. | ||
| try: | ||
| t.strftime('\ud800') | ||
| except UnicodeEncodeError: | ||
| pass | ||
|
|
||
| #check that this standard extension works | ||
| t.strftime("%f") | ||
|
|
||
|
|
@@ -1746,6 +1756,9 @@ def test_isoformat(self): | |
| self.assertEqual(t.isoformat('T'), "0001-02-03T04:05:01.000123") | ||
| self.assertEqual(t.isoformat(' '), "0001-02-03 04:05:01.000123") | ||
| self.assertEqual(t.isoformat('\x00'), "0001-02-03\x0004:05:01.000123") | ||
| # bpo-34482: Check that surrogates are handled properly. | ||
| self.assertEqual(t.isoformat('\ud800'), | ||
| "0001-02-03\ud80004:05:01.000123") | ||
| self.assertEqual(t.isoformat(timespec='hours'), "0001-02-03T04") | ||
| self.assertEqual(t.isoformat(timespec='minutes'), "0001-02-03T04:05") | ||
| self.assertEqual(t.isoformat(timespec='seconds'), "0001-02-03T04:05:01") | ||
|
|
@@ -1754,6 +1767,8 @@ def test_isoformat(self): | |
| self.assertEqual(t.isoformat(timespec='auto'), "0001-02-03T04:05:01.000123") | ||
| self.assertEqual(t.isoformat(sep=' ', timespec='minutes'), "0001-02-03 04:05") | ||
| self.assertRaises(ValueError, t.isoformat, timespec='foo') | ||
| # bpo-34482: Check that surrogates are handled properly. | ||
| self.assertRaises(ValueError, t.isoformat, timespec='\ud800') | ||
| # str is ISO format with the separator forced to a blank. | ||
| self.assertEqual(str(t), "0001-02-03 04:05:01.000123") | ||
|
|
||
|
|
@@ -2277,13 +2292,21 @@ def test_utcnow(self): | |
| self.assertLessEqual(abs(from_timestamp - from_now), tolerance) | ||
|
|
||
| def test_strptime(self): | ||
| string = '2004-12-01 13:02:47.197' | ||
| format = '%Y-%m-%d %H:%M:%S.%f' | ||
| expected = _strptime._strptime_datetime(self.theclass, string, format) | ||
| got = self.theclass.strptime(string, format) | ||
| self.assertEqual(expected, got) | ||
| self.assertIs(type(expected), self.theclass) | ||
| self.assertIs(type(got), self.theclass) | ||
| inputs = [ | ||
| ('2004-12-01 13:02:47.197', '%Y-%m-%d %H:%M:%S.%f'), | ||
| # bpo-34482: Check that surrogates are handled properly. | ||
| ('2004-12-01\ud80013:02:47.197', '%Y-%m-%d\ud800%H:%M:%S.%f'), | ||
| ('2004\ud80012-01 13:02:47.197', '%Y\ud800%m-%d %H:%M:%S.%f'), | ||
| ('2004-12-01 13:02\ud80047.197', '%Y-%m-%d %H:%M\ud800%S.%f'), | ||
| ] | ||
| for string, format in inputs: | ||
| with self.subTest(string=string, format=format): | ||
| expected = _strptime._strptime_datetime(self.theclass, string, | ||
| format) | ||
| got = self.theclass.strptime(string, format) | ||
| self.assertEqual(expected, got) | ||
| self.assertIs(type(expected), self.theclass) | ||
| self.assertIs(type(got), self.theclass) | ||
|
|
||
| strptime = self.theclass.strptime | ||
| self.assertEqual(strptime("+0002", "%z").utcoffset(), 2 * MINUTE) | ||
|
|
@@ -2869,6 +2892,8 @@ def test_isoformat(self): | |
| self.assertEqual(t.isoformat(timespec='microseconds'), "12:34:56.123456") | ||
| self.assertEqual(t.isoformat(timespec='auto'), "12:34:56.123456") | ||
| self.assertRaises(ValueError, t.isoformat, timespec='monkey') | ||
| # bpo-34482: Check that surrogates are handled properly. | ||
| self.assertRaises(ValueError, t.isoformat, timespec='\ud800') | ||
|
|
||
| t = self.theclass(hour=12, minute=34, second=56, microsecond=999500) | ||
| self.assertEqual(t.isoformat(timespec='milliseconds'), "12:34:56.999") | ||
|
|
@@ -2919,6 +2944,14 @@ def test_strftime(self): | |
| # A naive object replaces %z and %Z with empty strings. | ||
| self.assertEqual(t.strftime("'%z' '%Z'"), "'' ''") | ||
|
|
||
| # bpo-34482: Check that surrogates don't cause a crash. | ||
| # FIXME: The C datetime implementation raises an exception | ||
| # while the pure-Python one succeeds. | ||
| try: | ||
| t.strftime('\ud800') | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems worth of a separate test, or at least a subtest. Also, you may as well assert something about the result of this,
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pganssle, why assert that? It's not clear to me that this should be something we ensure. IMO the test is fine. I'd just suggest removing the "FIXME" paragraph entirely (here and above).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we saying that I think it's reasonable to either pick something and stick with it or formally make this an error on the pure-python side as well.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good to me. Let's go with your fix (in the PR to this PR's branch) and the test you suggest above. @izbyshev, could you update accordingly?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @taleinat OK, but I think it'd cleaner to go with two separate PRs for bpo-34481 and bpo-34482. @pganssle suggests the same.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm.. This seems like a separate bug on Mac, honestly, rather than implementation-dependent behavior. Usually the "implementation-dependent" stuff in strftime is in how different platforms interpret the different formatting directives, not stuff like this. I'd be tempted to move the surrogate assertion portion added into a separate test that you can wrap in As for what to do about it, technically it's probably a bug in the platform's
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It turned out that behavior of This is indeed what happens on my Windows 8.1 with Python 3.7 in a manual test: Moreover: This I can't explain currently. Decoding in timemodule.c uses the locale encoding (with I'll try to understand this tomorrow.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is the summary of what I've learned.
Given the above, I don't think that we should check the return value of
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @izbyshev Awesome work. I'm thinking the best way forward at this point is to drop the assertion portion and to copy all this information into a separate bug report about the inconsistency in strftime. It might be a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pganssle After thinking about a separate bug report, I realized that the whole situation is not so different from the usual stuff with file-system-related functions. Some platforms have wide-character APIs, others have only byte-oriented APIs, and Python tries to appease both. |
||
| except UnicodeEncodeError: | ||
| pass | ||
|
|
||
| def test_format(self): | ||
| t = self.theclass(1, 2, 3, 4) | ||
| self.assertEqual(t.__format__(''), str(t)) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Add tests for proper handling of non-UTF-8-encodable strings in | ||
| :mod:`datetime` classes. Patch by Alexey Izbyshev. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's leave the first "happy path" test as it is.
I suggest separating the surrogate examples, keeping the loop just for them, and removing the type assertions for them, i.e. just checking that the result equals what is expected.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds reasonable, fixed.