Scrubber.py - Cleaned up a few loose ends and incomplete things left after the

previous commit. - Normalized a couple more 'URL:'. - Improved handling of None payloads. - Cleaned up a few charset coercions. OutgoingRunner.py - Made probe bounce processing and queuing of bounces conditional on having some permanent failure(s).
author: Mark Sapiro <msapiro@value.net> 2007-06-22 10:49:27 -0700
committer: Mark Sapiro <msapiro@value.net> 2007-06-22 10:49:27 -0700
commit: 0e302ec9331aba5a050f5bce45d20701627e6bb5 (patch)
tree: 6907e205dfd24f8b658debeb80a898c6dc5f3c90 /Mailman/Handlers/Scrubber.py
parent: 6da513d8870677223dc4d52ebe1c1db7349230ee (diff)
download: mailman2-0e302ec9331aba5a050f5bce45d20701627e6bb5.tar.gz
mailman2-0e302ec9331aba5a050f5bce45d20701627e6bb5.tar.xz
mailman2-0e302ec9331aba5a050f5bce45d20701627e6bb5.zip
1 files changed, 19 insertions, 17 deletions
diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py
index de02fd45..48660038 100644
--- a/Mailman/Handlers/Scrubber.py
+++ b/Mailman/Handlers/Scrubber.py
@@ -223,7 +223,7 @@ def process(mlist, msg, msgdata=None):
                 replace_payload_by_text(part, _("""\
 An embedded and charset-unspecified text was scrubbed...
 Name: %(filename)s
-Url: %(url)s
+URL: %(url)s
 """), lcset)
         elif ctype == 'text/html' and isinstance(sanitize, IntType):
             if sanitize == 0:
@@ -293,7 +293,7 @@ From: %(who)s
 Subject: %(subject)s
 Date: %(date)s
 Size: %(size)s
-Url: %(url)s
+URL: %(url)s
 """), lcset)
         # If the message isn't a multipart, then we'll strip it out as an
         # attachment that would have to be separately downloaded.  Pipermail
@@ -316,6 +316,7 @@ Url: %(url)s
             finally:
                 os.umask(omask)
             desc = part.get('content-description', _('not available'))
+            desc = Utils.oneline(desc, lcset)
             filename = part.get_filename(_('not available'))
             filename = Utils.oneline(filename, lcset)
             replace_payload_by_text(part, _("""\
@@ -324,7 +325,7 @@ Name: %(filename)s
 Type: %(ctype)s
 Size: %(size)d bytes
 Desc: %(desc)s
-Url : %(url)s
+URL: %(url)s
 """), lcset)
         outer = False
     # We still have to sanitize multipart messages to flat text because
@@ -356,14 +357,14 @@ Url : %(url)s
                 text.append(_('Skipped content of type %(partctype)s\n'))
                 continue
             try:
-                t = part.get_payload(decode=True)
+                t = part.get_payload(decode=True) or ''
             # MAS: TypeError exception can occur if payload is None. This
             # was observed with a message that contained an attached
             # message/delivery-status part. Because of the special parsing
             # of this type, this resulted in a text/plain sub-part with a
             # null body. See bug 1430236.
             except (binascii.Error, TypeError):
-                t = part.get_payload()
+                t = part.get_payload() or ''
             # TK: get_content_charset() returns 'iso-2022-jp' for internally
             # crafted (scrubbed) 'euc-jp' text part. So, first try
             # get_charset(), then get_content_charset() for the parts
@@ -373,21 +374,20 @@ Url : %(url)s
                 partcharset = str(partcharset)
             else:
                 partcharset = part.get_content_charset()
-            # If the part is Content-Type: message/delivery-status, payload is
-            # None so test here.
-            if t and partcharset and partcharset <> charset:
+            if partcharset and partcharset <> charset:
                 try:
                     t = unicode(t, partcharset, 'replace')
-                except (UnicodeError, LookupError, ValueError, AssertionError):
-                    # Replace funny characters.  We use errors='replace' for
-                    # both calls since the first replace will leave U+FFFD,
-                    # which isn't ASCII encodeable.
-                    u = unicode(t, 'ascii', 'replace')
-                    t = u.encode('ascii', 'replace')
+                except (UnicodeError, LookupError, ValueError,
+                        AssertionError):
+                    # We can get here if partcharset is bogus in come way.
+                    # Replace funny characters.  We use errors='replace'
+                    t = unicode(t, 'ascii', 'replace')
                 try:
                     # Should use HTML-Escape, or try generalizing to UTF-8
                     t = t.encode(charset, 'replace')
-                except (UnicodeError, LookupError, ValueError, AssertionError):
+                except (UnicodeError, LookupError, ValueError,
+                        AssertionError):
+                    # if the message charset is bogus, use the list's.
                     t = t.encode(lcset, 'replace')
             # Separation is useful
             if isinstance(t, StringType):
@@ -401,7 +401,8 @@ Url : %(url)s
         try:
             s = unicode(sep, lcset, 'replace')
             sep = s.encode(charset, 'replace')
-        except (UnicodeError, LookupError, ValueError):
+        except (UnicodeError, LookupError, ValueError,
+                AssertionError):
             pass
         replace_payload_by_text(msg, sep.join(text), charset)
         if format:
@@ -479,7 +480,8 @@ def save_attachment(mlist, msg, dir, filter_html=True):
             # which one should we go with?  For now, let's go with the one we
             # guessed so attachments can't lie about their type.  Also, if the
             # filename /has/ no extension, then tack on the one we guessed.
-            filebase, ignore = os.path.splitext(filename)
+            # The extension was removed from the name above.
+            filebase = filename
         # Now we're looking for a unique name for this file on the file
         # system.  If msgdir/filebase.ext isn't unique, we'll add a counter
         # after filebase, e.g. msgdir/filebase-cnt.ext
author	Mark Sapiro <msapiro@value.net>	2007-06-22 10:49:27 -0700
committer	Mark Sapiro <msapiro@value.net>	2007-06-22 10:49:27 -0700
commit	0e302ec9331aba5a050f5bce45d20701627e6bb5 (patch)
tree	6907e205dfd24f8b658debeb80a898c6dc5f3c90 /Mailman/Handlers/Scrubber.py
parent	6da513d8870677223dc4d52ebe1c1db7349230ee (diff)
download	mailman2-0e302ec9331aba5a050f5bce45d20701627e6bb5.tar.gz mailman2-0e302ec9331aba5a050f5bce45d20701627e6bb5.tar.xz mailman2-0e302ec9331aba5a050f5bce45d20701627e6bb5.zip