@@ -872,16 +872,30 @@ def test_ignore_case(self):
872872 self .assertEqual (re .match (r"((a)\s(abc|a))" , "a a" , re .I ).group (1 ), "a a" )
873873 self .assertEqual (re .match (r"((a)\s(abc|a)*)" , "a aa" , re .I ).group (1 ), "a aa" )
874874
875- assert '\u212a ' .lower () == 'k' # 'K'
875+ # Two different characters have the same lowercase.
876+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
876877 self .assertTrue (re .match (r'K' , '\u212a ' , re .I ))
877878 self .assertTrue (re .match (r'k' , '\u212a ' , re .I ))
878879 self .assertTrue (re .match (r'\u212a' , 'K' , re .I ))
879880 self .assertTrue (re .match (r'\u212a' , 'k' , re .I ))
880- assert '\u017f ' .upper () == 'S' # 'ſ'
881+
882+ # Two different characters have the same uppercase.
883+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
881884 self .assertTrue (re .match (r'S' , '\u017f ' , re .I ))
882885 self .assertTrue (re .match (r's' , '\u017f ' , re .I ))
883886 self .assertTrue (re .match (r'\u017f' , 'S' , re .I ))
884887 self .assertTrue (re .match (r'\u017f' , 's' , re .I ))
888+
889+ # Two different characters have the same uppercase. Unicode 9.0+.
890+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
891+ self .assertTrue (re .match (r'\u0412' , '\u0432 ' , re .I ))
892+ self .assertTrue (re .match (r'\u0412' , '\u1c80 ' , re .I ))
893+ self .assertTrue (re .match (r'\u0432' , '\u0412 ' , re .I ))
894+ self .assertTrue (re .match (r'\u0432' , '\u1c80 ' , re .I ))
895+ self .assertTrue (re .match (r'\u1c80' , '\u0412 ' , re .I ))
896+ self .assertTrue (re .match (r'\u1c80' , '\u0432 ' , re .I ))
897+
898+ # Two different characters have the same multicharacter uppercase.
885899 assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
886900 self .assertTrue (re .match (r'\ufb05' , '\ufb06 ' , re .I ))
887901 self .assertTrue (re .match (r'\ufb06' , '\ufb05 ' , re .I ))
@@ -895,16 +909,31 @@ def test_ignore_case_set(self):
895909 self .assertTrue (re .match (br'[19a]' , b'a' , re .I ))
896910 self .assertTrue (re .match (br'[19a]' , b'A' , re .I ))
897911 self .assertTrue (re .match (br'[19A]' , b'a' , re .I ))
898- assert '\u212a ' .lower () == 'k' # 'K'
912+
913+ # Two different characters have the same lowercase.
914+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
899915 self .assertTrue (re .match (r'[19K]' , '\u212a ' , re .I ))
900916 self .assertTrue (re .match (r'[19k]' , '\u212a ' , re .I ))
901917 self .assertTrue (re .match (r'[19\u212a]' , 'K' , re .I ))
902918 self .assertTrue (re .match (r'[19\u212a]' , 'k' , re .I ))
903- assert '\u017f ' .upper () == 'S' # 'ſ'
919+
920+ # Two different characters have the same uppercase.
921+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
904922 self .assertTrue (re .match (r'[19S]' , '\u017f ' , re .I ))
905923 self .assertTrue (re .match (r'[19s]' , '\u017f ' , re .I ))
906924 self .assertTrue (re .match (r'[19\u017f]' , 'S' , re .I ))
907925 self .assertTrue (re .match (r'[19\u017f]' , 's' , re .I ))
926+
927+ # Two different characters have the same uppercase. Unicode 9.0+.
928+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
929+ self .assertTrue (re .match (r'[19\u0412]' , '\u0432 ' , re .I ))
930+ self .assertTrue (re .match (r'[19\u0412]' , '\u1c80 ' , re .I ))
931+ self .assertTrue (re .match (r'[19\u0432]' , '\u0412 ' , re .I ))
932+ self .assertTrue (re .match (r'[19\u0432]' , '\u1c80 ' , re .I ))
933+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0412 ' , re .I ))
934+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0432 ' , re .I ))
935+
936+ # Two different characters have the same multicharacter uppercase.
908937 assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
909938 self .assertTrue (re .match (r'[19\ufb05]' , '\ufb06 ' , re .I ))
910939 self .assertTrue (re .match (r'[19\ufb06]' , '\ufb05 ' , re .I ))
@@ -928,16 +957,30 @@ def test_ignore_case_range(self):
928957 self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010428 ' , re .I ))
929958 self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010400 ' , re .I ))
930959
931- assert '\u212a ' .lower () == 'k' # 'K'
960+ # Two different characters have the same lowercase.
961+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
932962 self .assertTrue (re .match (r'[J-M]' , '\u212a ' , re .I ))
933963 self .assertTrue (re .match (r'[j-m]' , '\u212a ' , re .I ))
934964 self .assertTrue (re .match (r'[\u2129-\u212b]' , 'K' , re .I ))
935965 self .assertTrue (re .match (r'[\u2129-\u212b]' , 'k' , re .I ))
936- assert '\u017f ' .upper () == 'S' # 'ſ'
966+
967+ # Two different characters have the same uppercase.
968+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
937969 self .assertTrue (re .match (r'[R-T]' , '\u017f ' , re .I ))
938970 self .assertTrue (re .match (r'[r-t]' , '\u017f ' , re .I ))
939971 self .assertTrue (re .match (r'[\u017e-\u0180]' , 'S' , re .I ))
940972 self .assertTrue (re .match (r'[\u017e-\u0180]' , 's' , re .I ))
973+
974+ # Two different characters have the same uppercase. Unicode 9.0+.
975+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
976+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u0432 ' , re .I ))
977+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u1c80 ' , re .I ))
978+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u0412 ' , re .I ))
979+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u1c80 ' , re .I ))
980+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0412 ' , re .I ))
981+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0432 ' , re .I ))
982+
983+ # Two different characters have the same multicharacter uppercase.
941984 assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
942985 self .assertTrue (re .match (r'[\ufb04-\ufb05]' , '\ufb06 ' , re .I ))
943986 self .assertTrue (re .match (r'[\ufb06-\ufb07]' , '\ufb05 ' , re .I ))
0 commit comments