From 92c372e5ea503c863970472f5c53d9f129af69a7 Mon Sep 17 00:00:00 2001 From: tkikuchi <> Date: Mon, 26 Dec 2005 07:19:35 +0000 Subject: Python interpreter has evolved to be strict on ascii charset range. Subject manipulation should be done in unicode string mode. --- Mailman/Handlers/CookHeaders.py | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/Mailman/Handlers/CookHeaders.py b/Mailman/Handlers/CookHeaders.py index 0bd58ec1..eee5991e 100644 --- a/Mailman/Handlers/CookHeaders.py +++ b/Mailman/Handlers/CookHeaders.py @@ -253,6 +253,11 @@ def prefix_subject(mlist, msg, msgdata): # subject is mime-encoded and cset is set as us-ascii. See detail # for ch_oneline() (CookHeaders one line function). subject, cset = ch_oneline(subject) + # TK: Python interpreter has eveolved to be strict on ascii charset + # code range. It is safe to use unicode string when manupilating + # header contents with re module. It would be best to return unicode + # in ch_oneline() but here is temporary solution. + subject = unicode(subject, cset) # If the subject_prefix contains '%d', it is replaced with the # mailing list sequential number. Sequential number format allows # '%d' or '%05d' like pattern. @@ -279,6 +284,7 @@ def prefix_subject(mlist, msg, msgdata): if subject.strip() == '': subject = _('(no subject)') cset = Utils.GetCharSet(mlist.preferred_language) + subject = unicode(subject, cset) # and substitute %d in prefix with post_id try: prefix = prefix % mlist.post_id @@ -289,21 +295,15 @@ def prefix_subject(mlist, msg, msgdata): if cset == 'us-ascii': try: if old_style: - h = ' '.join([recolon, prefix, subject]) + h = u' '.join([recolon, prefix, subject]) else: - h = ' '.join([prefix, recolon, subject]) - if type(h) == UnicodeType: - h = h.encode('us-ascii') - else: - h = unicode(h, 'us-ascii').encode('us-ascii') + h = u' '.join([prefix, recolon, subject]) + h = h.encode('us-ascii') h = uheader(mlist, h, 'Subject', continuation_ws=ws) del msg['subject'] msg['Subject'] = h - ss = ' '.join([recolon, subject]) - if _isunicode(ss): - ss = ss.encode('us-ascii') - else: - ss = unicode(ss, 'us-ascii').encode('us-ascii') + ss = u' '.join([recolon, subject]) + ss = ss.encode('us-ascii') ss = uheader(mlist, ss, 'Subject', continuation_ws=ws) msgdata['stripped_subject'] = ss return @@ -316,15 +316,8 @@ def prefix_subject(mlist, msg, msgdata): else: h = uheader(mlist, prefix, 'Subject', continuation_ws=ws) h.append(recolon) - # in seq version, subject header is already concatnated - if not _isunicode(subject): - try: - subject = unicode(subject, cset, 'replace') - except (LookupError, TypeError): - # unknown codec - cset = Utils.GetCharSet(mlist.preferred_language) - subject = unicode(subject, cset, 'replace') - subject = subject.encode(cset,'replace') + # TK: Subject is concatnated and unicode string. + subject = subject.encode(cset, 'replace') h.append(subject, cset) del msg['subject'] msg['Subject'] = h -- cgit v1.2.3