From 700a1e2e3572ec2c8766da9acdaf8babce5d8d20 Mon Sep 17 00:00:00 2001
From: Neal Gompa <ngompa13@gmail.com>
Date: Thu, 15 Oct 2020 10:30:07 -0400
Subject: [PATCH] Cover some subject prefix use cases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The subject_prefix handler has many bugs and was mostly not covered
for lists with a non-ascii charset.

Co-authored-by: Aurélien Bompard <aurelien@bompard.org>
Signed-off-by: Neal Gompa <ngompa13@gmail.com>
---
 src/mailman/handlers/subject_prefix.py        | 19 +++--
 .../handlers/tests/test_subject_prefix.py     | 81 +++++++++++++++++--
 2 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/src/mailman/handlers/subject_prefix.py b/src/mailman/handlers/subject_prefix.py
index 95ec0ec2d..10ad07875 100644
--- a/src/mailman/handlers/subject_prefix.py
+++ b/src/mailman/handlers/subject_prefix.py
@@ -39,13 +39,12 @@ def ascii_header(mlist, msgdata, subject, prefix, prefix_pattern, ws):
         if charset not in ASCII_CHARSETS:
             return None
     subject_text = EMPTYSTRING.join(str(subject).splitlines())
+    subject_text = re.sub(prefix_pattern, '', subject_text)
     # At this point, the subject may become null if someone posted mail
     # with "Subject: [subject prefix]".
     if subject_text.strip() == '':
         with _.using(mlist.preferred_language.code):
             subject_text = _('(no subject)')
-    else:
-        subject_text = re.sub(prefix_pattern, '', subject_text)
     msgdata['stripped_subject'] = subject_text
     rematch = re.match(RE_PATTERN, subject_text, re.I)
     if rematch:
@@ -83,13 +82,12 @@ def all_same_charset(mlist, msgdata, subject, prefix, prefix_pattern, ws):
         if charset != list_charset:
             return None
     subject_text = EMPTYSTRING.join(chunks)
+    subject_text = re.sub(prefix_pattern, '', subject_text)
     # At this point, the subject may become null if someone posted mail
     # with "Subject: [subject prefix]".
     if subject_text.strip() == '':
-        with _.push(mlist.preferred_language.code):
+        with _.using(mlist.preferred_language.code):
             subject_text = _('(no subject)')
-    else:
-        subject_text = re.sub(prefix_pattern, '', subject_text)
     msgdata['stripped_subject'] = subject_text
     rematch = re.match(RE_PATTERN, subject_text, re.I)
     if rematch:
@@ -114,7 +112,7 @@ def mixed_charsets(mlist, msgdata, subject, prefix, prefix_pattern, ws):
     list_charset = mlist.preferred_language.charset
     chunks = decode_header(subject.encode())
     if len(chunks) == 0:
-        with _.push(mlist.preferred_language.code):
+        with _.using(mlist.preferred_language.code):
             subject_text = _('(no subject)')
         chunks = [(prefix, list_charset),
                   (subject_text, list_charset),
@@ -134,13 +132,20 @@ def mixed_charsets(mlist, msgdata, subject, prefix, prefix_pattern, ws):
             chunks.insert(0, ('', 'us-ascii'))
             first_text = ''
     first_text = re.sub(prefix_pattern, '', first_text).lstrip()
+    if not first_text.strip() and len(chunks) <= 1:
+        with _.using(mlist.preferred_language.code):
+            subject_text = _('(no subject)')
+        chunks = [(prefix.strip(), list_charset),
+                  (subject_text, list_charset),
+                  ]
+        return make_header(chunks, continuation_ws=ws)
     rematch = re.match(RE_PATTERN, first_text, re.I)
     if rematch:
         first_text = 'Re: ' + first_text[rematch.end():]
     chunks[0] = (first_text, chunk_charset)
     # The subject text stripped of the prefix, for use in the NNTP gateway.
     msgdata['stripped_subject'] = str(make_header(chunks, continuation_ws=ws))
-    chunks.insert(0, (prefix, list_charset))
+    chunks.insert(0, (prefix.strip(), list_charset))
     return make_header(chunks, continuation_ws=ws)
 
 
diff --git a/src/mailman/handlers/tests/test_subject_prefix.py b/src/mailman/handlers/tests/test_subject_prefix.py
index c2a257035..1d8bc175d 100644
--- a/src/mailman/handlers/tests/test_subject_prefix.py
+++ b/src/mailman/handlers/tests/test_subject_prefix.py
@@ -33,6 +33,13 @@ class TestSubjectPrefix(unittest.TestCase):
     def setUp(self):
         self._mlist = create_list('test@example.com')
         self._process = config.handlers['subject-prefix'].process
+        language_manager = getUtility(ILanguageManager)
+        if 'xx' not in language_manager:
+            language_manager.add('xx', 'utf-8', 'Freedonia')
+
+    def tearDown(self):
+        # The LanguageManager may need a 'remove' method.
+        del getUtility(ILanguageManager)._languages['xx']
 
     def test_isdigest(self):
         # If the message is destined for the digest, the Subject header does
@@ -114,6 +121,14 @@ class TestSubjectPrefix(unittest.TestCase):
         self._process(self._mlist, msg, {})
         self.assertEqual(str(msg['subject']), '[Test]  A test message')
 
+    def test_multiline_subject_non_ascii_list(self):
+        # The subject appears on multiple lines on a non-ascii list.
+        self._mlist.preferred_language = 'xx'
+        msg = Message()
+        msg['Subject'] = '\n A test message'
+        self._process(self._mlist, msg, {})
+        self.assertEqual(str(msg['subject']), '[Test] A test message')
+
     def test_i18n_prefix(self):
         # The Subject header is encoded, but the prefix is still added.
         msg = Message()
@@ -130,7 +145,7 @@ class TestSubjectPrefix(unittest.TestCase):
         msg['Subject'] = '[Test] '
         self._process(self._mlist, msg, {})
         subject = msg['subject']
-        self.assertEqual(str(subject), '[Test] ')
+        self.assertEqual(str(subject), '[Test] (no subject)')
 
     def test_prefix_only_all_same(self):
         # Incoming subject is only the prefix.
@@ -141,7 +156,7 @@ class TestSubjectPrefix(unittest.TestCase):
         self._process(self._mlist, msg, {})
         self._mlist.preferred_language.charset = old_charset
         subject = msg['subject']
-        self.assertEqual(str(subject), '[Test] ')
+        self.assertEqual(str(subject), '[Test] (no subject)')
 
     def test_prefix_only_mixed(self):
         # Incoming subject is only the prefix.
@@ -149,7 +164,7 @@ class TestSubjectPrefix(unittest.TestCase):
         msg['Subject'] = '=?utf-8?Q?[Test]_?='
         self._process(self._mlist, msg, {})
         subject = msg['subject']
-        self.assertEqual(str(subject), '[Test] ')
+        self.assertEqual(str(subject), '[Test] (no subject)')
 
     def test_re_only(self):
         # Incoming subject is only Re:.
@@ -198,15 +213,13 @@ class TestSubjectPrefix(unittest.TestCase):
     def test_decode_header_returns_string(self):
         # Under some circumstances, email.header.decode_header() returns a
         # string value.  Ensure we can handle that.
-        manager = getUtility(ILanguageManager)
-        manager.add('xx', 'iso-8859-1', 'Xlandia')
         self._mlist.preferred_language = 'xx'
         msg = Message()
         msg['Subject'] = 'Plain text'
         self._process(self._mlist, msg, {})
         subject = msg['subject']
         self.assertEqual(subject.encode(),
-                         '=?iso-8859-1?q?=5BTest=5D_?= Plain text')
+                         '=?utf-8?b?W1Rlc3Rd?= Plain text')
 
     def test_unknown_encoded_subject(self):
         msg = Message()
@@ -215,3 +228,59 @@ class TestSubjectPrefix(unittest.TestCase):
         subject = msg['subject']
         self.assertEqual(str(subject),
                          '[Test]  Non-ascii subject - fran�ais')
+
+    def test_non_ascii_list(self):
+        # The mailing list has a non-ascii language
+        self._mlist.preferred_language = 'xx'
+        msg = Message()
+        msg['Subject'] = 'A test message'
+        self._process(self._mlist, msg, {})
+        self.assertEqual(str(msg['subject']), '[Test] A test message')
+
+    def test_no_subject(self):
+        # The email has no subject
+        msg = Message()
+        msg['Subject'] = ''
+        self._process(self._mlist, msg, {})
+        self.assertEqual(str(msg['subject']), '[Test] (no subject)')
+
+    def test_no_subject_non_ascii_list(self):
+        # The email has no subject on a non-ascii list
+        self._mlist.preferred_language = 'xx'
+        msg = Message()
+        msg['Subject'] = ''
+        self._process(self._mlist, msg, {})
+        self.assertEqual(str(msg['subject']), '[Test] (no subject)')
+
+    def test_no_real_subject(self):
+        # The email has no subject
+        msg = Message()
+        msg['Subject'] = '[Test] '
+        self._process(self._mlist, msg, {})
+        self.assertEqual(str(msg['subject']), '[Test] (no subject)')
+
+    def test_no_real_subject_non_ascii_list(self):
+        # The email has no subject on a non-ascii list
+        self._mlist.preferred_language = 'xx'
+        msg = Message()
+        msg['Subject'] = '[Test] '
+        self._process(self._mlist, msg, {})
+        self.assertEqual(str(msg['subject']), '[Test] (no subject)')
+
+    def test_non_ascii_subject_and_list(self):
+        # The mailing list has a non-ascii language and the subject is
+        # non-ascii with the same encoding.
+        self._mlist.preferred_language = 'xx'
+        msg = Message()
+        msg['Subject'] = '=?utf-8?q?d=C3=A9sirable?='
+        self._process(self._mlist, msg, {})
+        self.assertEqual(str(msg['subject']), '[Test] d\xe9sirable')
+
+    def test_non_ascii_empty_subject_and_non_ascii_list(self):
+        # The mailing list has a non-ascii language and the subject is
+        # non-ascii with the same encoding, but actually empty.
+        self._mlist.preferred_language = 'xx'
+        msg = Message()
+        msg['Subject'] = '=?utf-8?q?[Test]_?='
+        self._process(self._mlist, msg, {})
+        self.assertEqual(str(msg['subject']), '[Test] (no subject)')
-- 
2.28.0