From 700a1e2e3572ec2c8766da9acdaf8babce5d8d20 Mon Sep 17 00:00:00 2001 From: Neal Gompa Date: Thu, 15 Oct 2020 10:30:07 -0400 Subject: [PATCH] Cover some subject prefix use cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The subject_prefix handler has many bugs and was mostly not covered for lists with a non-ascii charset. Co-authored-by: Aurélien Bompard Signed-off-by: Neal Gompa --- src/mailman/handlers/subject_prefix.py | 19 +++-- .../handlers/tests/test_subject_prefix.py | 81 +++++++++++++++++-- 2 files changed, 87 insertions(+), 13 deletions(-) diff --git a/src/mailman/handlers/subject_prefix.py b/src/mailman/handlers/subject_prefix.py index 95ec0ec2d..10ad07875 100644 --- a/src/mailman/handlers/subject_prefix.py +++ b/src/mailman/handlers/subject_prefix.py @@ -39,13 +39,12 @@ def ascii_header(mlist, msgdata, subject, prefix, prefix_pattern, ws): if charset not in ASCII_CHARSETS: return None subject_text = EMPTYSTRING.join(str(subject).splitlines()) + subject_text = re.sub(prefix_pattern, '', subject_text) # At this point, the subject may become null if someone posted mail # with "Subject: [subject prefix]". if subject_text.strip() == '': with _.using(mlist.preferred_language.code): subject_text = _('(no subject)') - else: - subject_text = re.sub(prefix_pattern, '', subject_text) msgdata['stripped_subject'] = subject_text rematch = re.match(RE_PATTERN, subject_text, re.I) if rematch: @@ -83,13 +82,12 @@ def all_same_charset(mlist, msgdata, subject, prefix, prefix_pattern, ws): if charset != list_charset: return None subject_text = EMPTYSTRING.join(chunks) + subject_text = re.sub(prefix_pattern, '', subject_text) # At this point, the subject may become null if someone posted mail # with "Subject: [subject prefix]". if subject_text.strip() == '': - with _.push(mlist.preferred_language.code): + with _.using(mlist.preferred_language.code): subject_text = _('(no subject)') - else: - subject_text = re.sub(prefix_pattern, '', subject_text) msgdata['stripped_subject'] = subject_text rematch = re.match(RE_PATTERN, subject_text, re.I) if rematch: @@ -114,7 +112,7 @@ def mixed_charsets(mlist, msgdata, subject, prefix, prefix_pattern, ws): list_charset = mlist.preferred_language.charset chunks = decode_header(subject.encode()) if len(chunks) == 0: - with _.push(mlist.preferred_language.code): + with _.using(mlist.preferred_language.code): subject_text = _('(no subject)') chunks = [(prefix, list_charset), (subject_text, list_charset), @@ -134,13 +132,20 @@ def mixed_charsets(mlist, msgdata, subject, prefix, prefix_pattern, ws): chunks.insert(0, ('', 'us-ascii')) first_text = '' first_text = re.sub(prefix_pattern, '', first_text).lstrip() + if not first_text.strip() and len(chunks) <= 1: + with _.using(mlist.preferred_language.code): + subject_text = _('(no subject)') + chunks = [(prefix.strip(), list_charset), + (subject_text, list_charset), + ] + return make_header(chunks, continuation_ws=ws) rematch = re.match(RE_PATTERN, first_text, re.I) if rematch: first_text = 'Re: ' + first_text[rematch.end():] chunks[0] = (first_text, chunk_charset) # The subject text stripped of the prefix, for use in the NNTP gateway. msgdata['stripped_subject'] = str(make_header(chunks, continuation_ws=ws)) - chunks.insert(0, (prefix, list_charset)) + chunks.insert(0, (prefix.strip(), list_charset)) return make_header(chunks, continuation_ws=ws) diff --git a/src/mailman/handlers/tests/test_subject_prefix.py b/src/mailman/handlers/tests/test_subject_prefix.py index c2a257035..1d8bc175d 100644 --- a/src/mailman/handlers/tests/test_subject_prefix.py +++ b/src/mailman/handlers/tests/test_subject_prefix.py @@ -33,6 +33,13 @@ class TestSubjectPrefix(unittest.TestCase): def setUp(self): self._mlist = create_list('test@example.com') self._process = config.handlers['subject-prefix'].process + language_manager = getUtility(ILanguageManager) + if 'xx' not in language_manager: + language_manager.add('xx', 'utf-8', 'Freedonia') + + def tearDown(self): + # The LanguageManager may need a 'remove' method. + del getUtility(ILanguageManager)._languages['xx'] def test_isdigest(self): # If the message is destined for the digest, the Subject header does @@ -114,6 +121,14 @@ class TestSubjectPrefix(unittest.TestCase): self._process(self._mlist, msg, {}) self.assertEqual(str(msg['subject']), '[Test] A test message') + def test_multiline_subject_non_ascii_list(self): + # The subject appears on multiple lines on a non-ascii list. + self._mlist.preferred_language = 'xx' + msg = Message() + msg['Subject'] = '\n A test message' + self._process(self._mlist, msg, {}) + self.assertEqual(str(msg['subject']), '[Test] A test message') + def test_i18n_prefix(self): # The Subject header is encoded, but the prefix is still added. msg = Message() @@ -130,7 +145,7 @@ class TestSubjectPrefix(unittest.TestCase): msg['Subject'] = '[Test] ' self._process(self._mlist, msg, {}) subject = msg['subject'] - self.assertEqual(str(subject), '[Test] ') + self.assertEqual(str(subject), '[Test] (no subject)') def test_prefix_only_all_same(self): # Incoming subject is only the prefix. @@ -141,7 +156,7 @@ class TestSubjectPrefix(unittest.TestCase): self._process(self._mlist, msg, {}) self._mlist.preferred_language.charset = old_charset subject = msg['subject'] - self.assertEqual(str(subject), '[Test] ') + self.assertEqual(str(subject), '[Test] (no subject)') def test_prefix_only_mixed(self): # Incoming subject is only the prefix. @@ -149,7 +164,7 @@ class TestSubjectPrefix(unittest.TestCase): msg['Subject'] = '=?utf-8?Q?[Test]_?=' self._process(self._mlist, msg, {}) subject = msg['subject'] - self.assertEqual(str(subject), '[Test] ') + self.assertEqual(str(subject), '[Test] (no subject)') def test_re_only(self): # Incoming subject is only Re:. @@ -198,15 +213,13 @@ class TestSubjectPrefix(unittest.TestCase): def test_decode_header_returns_string(self): # Under some circumstances, email.header.decode_header() returns a # string value. Ensure we can handle that. - manager = getUtility(ILanguageManager) - manager.add('xx', 'iso-8859-1', 'Xlandia') self._mlist.preferred_language = 'xx' msg = Message() msg['Subject'] = 'Plain text' self._process(self._mlist, msg, {}) subject = msg['subject'] self.assertEqual(subject.encode(), - '=?iso-8859-1?q?=5BTest=5D_?= Plain text') + '=?utf-8?b?W1Rlc3Rd?= Plain text') def test_unknown_encoded_subject(self): msg = Message() @@ -215,3 +228,59 @@ class TestSubjectPrefix(unittest.TestCase): subject = msg['subject'] self.assertEqual(str(subject), '[Test] Non-ascii subject - fran�ais') + + def test_non_ascii_list(self): + # The mailing list has a non-ascii language + self._mlist.preferred_language = 'xx' + msg = Message() + msg['Subject'] = 'A test message' + self._process(self._mlist, msg, {}) + self.assertEqual(str(msg['subject']), '[Test] A test message') + + def test_no_subject(self): + # The email has no subject + msg = Message() + msg['Subject'] = '' + self._process(self._mlist, msg, {}) + self.assertEqual(str(msg['subject']), '[Test] (no subject)') + + def test_no_subject_non_ascii_list(self): + # The email has no subject on a non-ascii list + self._mlist.preferred_language = 'xx' + msg = Message() + msg['Subject'] = '' + self._process(self._mlist, msg, {}) + self.assertEqual(str(msg['subject']), '[Test] (no subject)') + + def test_no_real_subject(self): + # The email has no subject + msg = Message() + msg['Subject'] = '[Test] ' + self._process(self._mlist, msg, {}) + self.assertEqual(str(msg['subject']), '[Test] (no subject)') + + def test_no_real_subject_non_ascii_list(self): + # The email has no subject on a non-ascii list + self._mlist.preferred_language = 'xx' + msg = Message() + msg['Subject'] = '[Test] ' + self._process(self._mlist, msg, {}) + self.assertEqual(str(msg['subject']), '[Test] (no subject)') + + def test_non_ascii_subject_and_list(self): + # The mailing list has a non-ascii language and the subject is + # non-ascii with the same encoding. + self._mlist.preferred_language = 'xx' + msg = Message() + msg['Subject'] = '=?utf-8?q?d=C3=A9sirable?=' + self._process(self._mlist, msg, {}) + self.assertEqual(str(msg['subject']), '[Test] d\xe9sirable') + + def test_non_ascii_empty_subject_and_non_ascii_list(self): + # The mailing list has a non-ascii language and the subject is + # non-ascii with the same encoding, but actually empty. + self._mlist.preferred_language = 'xx' + msg = Message() + msg['Subject'] = '=?utf-8?q?[Test]_?=' + self._process(self._mlist, msg, {}) + self.assertEqual(str(msg['subject']), '[Test] (no subject)') -- 2.28.0